Skip to content
2 changes: 1 addition & 1 deletion plotly_resampler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

__docformat__ = "numpy"
__author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
__version__ = "0.5.0"
__version__ = "0.6.3"

__all__ = [
"__version__",
Expand Down
21 changes: 12 additions & 9 deletions plotly_resampler/aggregation/aggregation_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ def _supports_dtype(self, s: pd.Series):
@staticmethod
def _calc_med_diff(s: pd.Series) -> Tuple[float, np.ndarray]:
# ----- divide and conquer heuristic to calculate the median diff ------
s_idx_diff = np.diff(s.index.values) # remark: s_idx_diff.shape === len(s) -1
# remark: thanks to the prepend -> s_idx_diff.shape === len(s)
siv = s.index.values
s_idx_diff = np.diff(s.index.values, prepend=siv[0])

# To do so - use a quantile-based (median) approach where we reshape the data
# into `n_blocks` blocks and calculate the min
Expand All @@ -66,9 +68,11 @@ def _calc_med_diff(s: pd.Series) -> Tuple[float, np.ndarray]:
sid_v: np.ndarray = s_idx_diff[: blck_size * n_blcks].reshape(n_blcks, -1)

# calculate the min and max and calculate the median on that
med_diff = np.median(np.concatenate((sid_v.min(axis=0), sid_v.max(axis=0))))
med_diff = np.quantile(
np.concatenate((sid_v.min(axis=0), sid_v.max(axis=0))), q=0.55
)
else:
med_diff = np.median(s_idx_diff)
med_diff = np.quantile(s_idx_diff, q=0.55)

return med_diff, s_idx_diff

Expand All @@ -77,17 +81,17 @@ def _insert_gap_none(self, s: pd.Series) -> pd.Series:
med_diff, s_idx_diff = self._calc_med_diff(s)
# add None data-points in-between the gaps
if med_diff is not None:
df_gap_idx = s.index.values[1:][s_idx_diff > 3 * med_diff]
df_gap_idx = s.index.values[s_idx_diff > 3 * med_diff]
if len(df_gap_idx):
df_res_gap = pd.Series(
index=df_gap_idx, data=None, name=s.name, copy=False
)

if isinstance(df_res_gap.index, pd.DatetimeIndex):
# Due to the s.index`.values` cast, df_res_gap has lost
# Due to the s.index`.values` cast, df_res_gap has lost
# time-information, so now we restore it
df_res_gap.index = (
df_res_gap.index.tz_localize('UTC').tz_convert(s.index.tz)
df_res_gap.index = df_res_gap.index.tz_localize("UTC").tz_convert(
s.index.tz
)

# Note:
Expand All @@ -104,8 +108,7 @@ def _replace_gap_end_none(self, s: pd.Series) -> pd.Series:
med_diff, s_idx_diff = self._calc_med_diff(s)
if med_diff is not None:
# Replace data-points with None where the gaps occur
s.iloc[1:].loc[s_idx_diff > 3 * med_diff] = None

s.loc[s_idx_diff > 3 * med_diff] = None
return s

def aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
Expand Down
58 changes: 40 additions & 18 deletions plotly_resampler/aggregation/aggregators.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ class LTTB(AbstractSeriesAggregator):

"""

def __init__(self, interleave_gaps: bool = True, ):
def __init__(
self,
interleave_gaps: bool = True,
):
"""
Parameters
----------
Expand All @@ -56,7 +59,7 @@ def __init__(self, interleave_gaps: bool = True, ):
super().__init__(
interleave_gaps,
dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]]
+ ["category", "bool"],
+ ["category", "bool"],
)

def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
Expand All @@ -66,11 +69,11 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
s_i = s.index.values

if s_i.dtype.type == np.datetime64:
# lttbc does not support this datatype -> convert to int
# lttbc does not support this datatype -> convert to int
# (where the time is represented in ns)
s_i = s_i.astype(int)
idx, data = lttbc.downsample(s_i, s_v, n_out)
idx = pd.to_datetime(idx, unit='ns', utc=True).tz_convert(s.index.tz)
idx = pd.to_datetime(idx, unit="ns", utc=True).tz_convert(s.index.tz)
else:
idx, data = lttbc.downsample(s_i, s_v, n_out)
idx = idx.astype(s_i.dtype)
Expand Down Expand Up @@ -129,12 +132,14 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
# add the corresponding offset
argmin = (
s[: block_size * offset.shape[0]]
.values.reshape(-1, block_size).argmin(axis=1)
.values.reshape(-1, block_size)
.argmin(axis=1)
+ offset
)
argmax = (
s[argmax_offset: block_size * offset.shape[0] + argmax_offset]
.values.reshape(-1, block_size).argmax(axis=1)
s[argmax_offset : block_size * offset.shape[0] + argmax_offset]
.values.reshape(-1, block_size)
.argmax(axis=1)
+ offset
+ argmax_offset
)
Expand All @@ -150,7 +155,7 @@ class MinMaxAggregator(AbstractSeriesAggregator):
.. note::
This method is rather efficient when scaling to large data sizes and can be used
as a data-reduction step before feeding it to the :class:`LTTB <LTTB>`
algorithm, as :class:`EfficientLTTB <EfficientLTTB>` does with the
algorithm, as :class:`EfficientLTTB <EfficientLTTB>` does with the
:class:`MinMaxOverlapAggregator <MinMaxOverlapAggregator>`.

"""
Expand All @@ -173,22 +178,35 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
block_size = math.ceil(s.shape[0] / n_out * 2)

# Calculate the offset range which will be added to the argmin and argmax pos
offset = np.arange(
0, stop=s.shape[0] - block_size, step=block_size
)
offset = np.arange(0, stop=s.shape[0] - block_size, step=block_size)

# Calculate the argmin & argmax on the reshaped view of `s` &
# add the corresponding offset
argmin = (
s[: block_size * offset.shape[0]]
.values.reshape(-1, block_size).argmin(axis=1)
.values.reshape(-1, block_size)
.argmin(axis=1)
+ offset
)
argmax = (
s[: block_size * offset.shape[0]]
.values.reshape(-1, block_size).argmax(axis=1)
.values.reshape(-1, block_size)
.argmax(axis=1)
+ offset
)

# Note: the implementation below flips the array to search from
# right-to left (as min or max will always usee the first same minimum item,
# i.e. the most left item)
# This however creates a large computational overhead -> we do not use this
# implementation and suggest using the minmaxaggregator.
# argmax = (
# (block_size - 1)
# - np.fliplr(
# s[: block_size * offset.shape[0]].values.reshape(-1, block_size)
# ).argmax(axis=1)
# ) + offset

# Sort the argmin & argmax (where we append the first and last index item)
# and then slice the original series on these indexes.
return s.iloc[np.unique(np.concatenate((argmin, argmax, [0, s.shape[0] - 1])))]
Expand All @@ -209,14 +227,18 @@ def __init__(self, interleave_gaps: bool = True):
sampled data. A quantile-based approach is used to determine the gaps /
irregularly sampled data. By default, True.
"""
self.lttb = LTTB(interleave_gaps=interleave_gaps)
self.minmax = MinMaxOverlapAggregator(interleave_gaps=interleave_gaps)
super().__init__(interleave_gaps, dtype_regex_list=None)
self.lttb = LTTB(interleave_gaps=False)
self.minmax = MinMaxOverlapAggregator(interleave_gaps=False)
super().__init__(
interleave_gaps,
dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]]
+ ["category", "bool"],
)

def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
if s.shape[0] > n_out * 1_000:
s = self.minmax._aggregate(s, n_out * 50)
return self.lttb.aggregate(s, n_out)
return self.lttb._aggregate(s, n_out)


class EveryNthPoint(AbstractSeriesAggregator):
Expand Down Expand Up @@ -249,7 +271,7 @@ class FuncAggregator(AbstractSeriesAggregator):
"""

def __init__(
self, aggregation_func, interleave_gaps: bool = True, dtype_regex_list=None
self, aggregation_func, interleave_gaps: bool = True, dtype_regex_list=None
):
"""
Parameters
Expand Down
17 changes: 6 additions & 11 deletions plotly_resampler/figure_resampler/figure_resampler_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,20 +602,15 @@ def add_trace(
trace.uid = uuid

hf_x = (
trace["x"]
if hasattr(trace, "x") and hf_x is None
else hf_x.values
if isinstance(hf_x, pd.Series)
else hf_x
trace["x"] if hasattr(trace, "x") and hf_x is None
else hf_x.values if isinstance(hf_x, pd.Series)
else hf_x if isinstance(hf_x, pd.Index)
else np.asarray(hf_x)
)
if isinstance(hf_x, tuple):
hf_x = list(hf_x)

hf_y = (
trace["y"]
if hasattr(trace, "y") and hf_y is None
else hf_y.values
if isinstance(hf_y, pd.Series)
trace["y"] if hasattr(trace, "y") and hf_y is None
else hf_y.values if isinstance(hf_y, (pd.Series, pd.Index))
else hf_y
)
hf_y = np.asarray(hf_y)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "plotly-resampler" # Do not forget to update the __init__.py __version__ variable
version = "0.5.0"
version = "0.6.3"
description = "Visualizing large time series with plotly"
authors = ["Jonas Van Der Donckt", "Jeroen Van Der Donckt", "Emiel Deprost"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def cat_series() -> pd.Series:

@pytest.fixture
def bool_series() -> pd.Series:
bool_list = [True, False, True, True, True, True]
bool_list = [True, False, True, True, True, True] + [True] * 50
return pd.Series(bool_list * (_nb_samples // len(bool_list)), dtype="bool")


Expand Down