From 52ef67d9a43297465f1243f5b647f93db959c5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= Date: Sat, 15 Apr 2017 16:35:43 +0200 Subject: [PATCH 01/10] add test for sample_interval --- tests/test_traces.py | 94 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 3 deletions(-) diff --git a/tests/test_traces.py b/tests/test_traces.py index 67b7285..2abb590 100644 --- a/tests/test_traces.py +++ b/tests/test_traces.py @@ -1,7 +1,8 @@ from datetime import datetime, timedelta import nose -from traces import Histogram, TimeSeries + +from traces import TimeSeries, Domain def test_init_data(): @@ -59,7 +60,7 @@ def test_merge(): assert True in ts_merge[0] assert None in ts_merge[0] - + def test_set_interval(): ts = TimeSeries() nose.tools.assert_raises(KeyError, ts.get, 0) @@ -99,6 +100,7 @@ def test_set_interval_datetime(): (datetime(2012, 1, 8, 0, 0), 100), (datetime(2012, 1, 10, 0, 0), 10)] + def test_remove_points_from_interval(): ts = TimeSeries(default=0) ts[0] = 0 @@ -111,7 +113,7 @@ def test_remove_points_from_interval(): del ts[3.5:4.5] assert ts[5] == 1 - + ts[4] = 0 del ts[3:4.5] @@ -124,3 +126,89 @@ def test_remove_points_from_interval(): del ts[3.5:4] assert ts[5] == 0 + + +def test_sample_interval_days(): + import pandas as pd + ts = Domain([(datetime(2012, 1, 1), 400), + (datetime(2012, 3, 1), 400)]) + + ts[datetime(2012, 1, 4):datetime(2012, 1, 20)] = 10 + ts[datetime(2012, 1, 25):datetime(2012, 2, 7)] = 50 + ts[datetime(2012, 1, 19):datetime(2012, 1, 27)] = 0 + + sr = ts.sample_interval(sampling_period=timedelta(days=1), end=datetime(2012, 2, 1)) + assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0), + (pd.Timestamp('2012-01-02 00:00:00'), 400.0), + (pd.Timestamp('2012-01-03 00:00:00'), 400.0), + (pd.Timestamp('2012-01-04 00:00:00'), 10.0), + (pd.Timestamp('2012-01-05 00:00:00'), 10.0), + (pd.Timestamp('2012-01-06 00:00:00'), 10.0), + (pd.Timestamp('2012-01-07 00:00:00'), 10.0), + (pd.Timestamp('2012-01-08 00:00:00'), 10.0), + (pd.Timestamp('2012-01-09 00:00:00'), 10.0), + (pd.Timestamp('2012-01-10 00:00:00'), 10.0), + (pd.Timestamp('2012-01-11 00:00:00'), 10.0), + (pd.Timestamp('2012-01-12 00:00:00'), 10.0), + (pd.Timestamp('2012-01-13 00:00:00'), 10.0), + (pd.Timestamp('2012-01-14 00:00:00'), 10.0), + (pd.Timestamp('2012-01-15 00:00:00'), 10.0), + (pd.Timestamp('2012-01-16 00:00:00'), 10.0), + (pd.Timestamp('2012-01-17 00:00:00'), 10.0), + (pd.Timestamp('2012-01-18 00:00:00'), 10.0), + (pd.Timestamp('2012-01-19 00:00:00'), 0.0), + (pd.Timestamp('2012-01-20 00:00:00'), 0.0), + (pd.Timestamp('2012-01-21 00:00:00'), 0.0), + (pd.Timestamp('2012-01-22 00:00:00'), 0.0), + (pd.Timestamp('2012-01-23 00:00:00'), 0.0), + (pd.Timestamp('2012-01-24 00:00:00'), 0.0), + (pd.Timestamp('2012-01-25 00:00:00'), 0.0), + (pd.Timestamp('2012-01-26 00:00:00'), 0.0), + (pd.Timestamp('2012-01-27 00:00:00'), 50.0), + (pd.Timestamp('2012-01-28 00:00:00'), 50.0), + (pd.Timestamp('2012-01-29 00:00:00'), 50.0), + (pd.Timestamp('2012-01-30 00:00:00'), 50.0), + (pd.Timestamp('2012-01-31 00:00:00'), 50.0)] + + +def test_sample_interval_hours(): + import pandas as pd + + ts = Domain([(datetime(2012, 1, 1), 400), + (datetime(2012, 1, 10), 400)]) + + ts[datetime(2012, 1, 4, 12):datetime(2012, 1, 6, 20)] = 10 + ts[datetime(2012, 1, 7, 9):datetime(2012, 1, 10)] = 50 + + sr = ts.sample_interval(sampling_period=timedelta(days=1)) + assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 205.0), + (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0), + (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 75.0), + (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 181.25), + (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0), + (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)] + + sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="max") + assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0), + (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0), + (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)] + + sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="min") + assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0), + (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 10.0), + (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0), + (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 10.0), + (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 50.0), + (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0), + (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)] From ba8044a69d83453e30fd8dab0f0fcbb5d348dc7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= Date: Sat, 15 Apr 2017 16:44:02 +0200 Subject: [PATCH 02/10] clean test --- tests/test_traces.py | 54 ++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/test_traces.py b/tests/test_traces.py index 2abb590..134c947 100644 --- a/tests/test_traces.py +++ b/tests/test_traces.py @@ -181,34 +181,34 @@ def test_sample_interval_hours(): ts[datetime(2012, 1, 7, 9):datetime(2012, 1, 10)] = 50 sr = ts.sample_interval(sampling_period=timedelta(days=1)) - assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 205.0), - (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0), - (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 75.0), - (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 181.25), - (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0), - (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)] + assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0), + (pd.Timestamp('2012-01-02 00:00:00'), 400.0), + (pd.Timestamp('2012-01-03 00:00:00'), 400.0), + (pd.Timestamp('2012-01-04 00:00:00'), 205.0), + (pd.Timestamp('2012-01-05 00:00:00'), 10.0), + (pd.Timestamp('2012-01-06 00:00:00'), 75.0), + (pd.Timestamp('2012-01-07 00:00:00'), 181.25), + (pd.Timestamp('2012-01-08 00:00:00'), 50.0), + (pd.Timestamp('2012-01-09 00:00:00'), 50.0)] sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="max") - assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0), - (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0), - (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)] + assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0), + (pd.Timestamp('2012-01-02 00:00:00'), 400.0), + (pd.Timestamp('2012-01-03 00:00:00'), 400.0), + (pd.Timestamp('2012-01-04 00:00:00'), 400.0), + (pd.Timestamp('2012-01-05 00:00:00'), 10.0), + (pd.Timestamp('2012-01-06 00:00:00'), 400.0), + (pd.Timestamp('2012-01-07 00:00:00'), 400.0), + (pd.Timestamp('2012-01-08 00:00:00'), 50.0), + (pd.Timestamp('2012-01-09 00:00:00'), 50.0)] sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="min") - assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0), - (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 10.0), - (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0), - (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 10.0), - (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 50.0), - (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0), - (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)] + assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0), + (pd.Timestamp('2012-01-02 00:00:00'), 400.0), + (pd.Timestamp('2012-01-03 00:00:00'), 400.0), + (pd.Timestamp('2012-01-04 00:00:00'), 10.0), + (pd.Timestamp('2012-01-05 00:00:00'), 10.0), + (pd.Timestamp('2012-01-06 00:00:00'), 10.0), + (pd.Timestamp('2012-01-07 00:00:00'), 50.0), + (pd.Timestamp('2012-01-08 00:00:00'), 50.0), + (pd.Timestamp('2012-01-09 00:00:00'), 50.0)] From ef2dedc186117fb66a7eb844dc73bf5c4950767c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= Date: Sat, 15 Apr 2017 16:44:14 +0200 Subject: [PATCH 03/10] add sample_interval function --- traces/timeseries.py | 105 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/traces/timeseries.py b/traces/timeseries.py index d6d91c8..8e3103c 100644 --- a/traces/timeseries.py +++ b/traces/timeseries.py @@ -392,6 +392,111 @@ def sample(self, sampling_period, start=None, end=None, current_time += sampling_period return result + def sample_interval(self, sampling_period, + start=None, end=None, + operation="mean"): + + start, end, mask = self._check_boundaries(start, end) + sampling_period = self._check_regularization(start, end, + sampling_period) + + try: + import pandas as pd + except ImportError: + msg = "sample_interval need pandas to be installed" + raise ImportError(msg) + + # create index on [start, end) + idx = pd.date_range(start, end, freq=sampling_period, closed=None) + idx_list = idx.values # list(idx) + + # create all inflexion points + def items_in_horizon(): + # yields all items between start and end as well as start and end + yield (start, self[start]) + for t, v in self.items(): + if t < start: + continue + if t > end: + break + yield t, v + yield (end, self[end]) + inflexion_times, inflexion_values = zip(*items_in_horizon()) + inflexion_times = pd.DatetimeIndex(inflexion_times) + + # identify all inflexion intervals + # by index: point i is in interval [idx[ifl_int[i]], idx[ifl_int[i]+1] + inflexion_intervals = inflexion_times.floor(sampling_period)\ + .map(idx.get_loc) + + # convert DatetimeIndex to numpy array for faster indexation + inflexion_times = inflexion_times.values + + Np1 = len(idx_list) - 1 + + # convert to timestamp + # (to make interval arithmetic faster, no need for total_seconds) + inflexion_times = (inflexion_times.astype("int64")) + idx_times = (idx.astype("int64")) + + # initialise init, update and finish functions depending + # on the aggregation operator + init, update, finish = { + "mean": ( + lambda t, v: 0.0, + lambda agg, t0, t1, v: agg + (t1 - t0) * v, + lambda agg, t_start, t_end: agg / (t_end - t_start), + ), + "max": ( + lambda t, v: v, + lambda agg, t0, t1, v: max(agg, v), + lambda agg, t_start, t_end: agg, + ), + "min": ( + lambda t, v: v, + lambda agg, t0, t1, v: min(agg, v), + lambda agg, t_start, t_end: agg, + ), + }[operation] + + # initialise first interval + t_start, t_end = idx_times[0:2] + i0, t0, v0 = 0, t_start, self[start] + agg = init(t0, v0) + + result = [] + for i1, t1, v1 in zip(inflexion_intervals, + inflexion_times, + inflexion_values): + if i0 != i1: + # change of interval + + # finish previous interval + agg = update(agg, t0, t_end, v0) + agg = finish(agg, t_start, t_end) + result.append((idx_list[i0], agg)) + + # handle all intervals between t_end and t1 + if i1 != i0 + 1: + result.append((idx_list[i0 + 1], v0)) + + # if last_point, break + if i1 == Np1: + break + + # set up new interval + t_start, t_end = idx_times[i1:i1 + 2] + i0, t0 = i1, t_start + agg = init(t0, v0) + + agg = update(agg, t0, t1, v0) + + i0, t0, v0 = i1, t1, v1 + + df = pd.DataFrame.from_records(result) + return df.set_index(0).iloc[:, 0].reindex(idx[:-1]).ffill() + + def moving_average(self, sampling_period, window_size=None, start=None, end=None, From 2ae977788a3f7adcbb1419d1c561b479256658d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= Date: Sat, 15 Apr 2017 17:01:18 +0200 Subject: [PATCH 04/10] fix pep8 error --- traces/timeseries.py | 1 - 1 file changed, 1 deletion(-) diff --git a/traces/timeseries.py b/traces/timeseries.py index 8e3103c..051ba8c 100644 --- a/traces/timeseries.py +++ b/traces/timeseries.py @@ -496,7 +496,6 @@ def items_in_horizon(): df = pd.DataFrame.from_records(result) return df.set_index(0).iloc[:, 0].reindex(idx[:-1]).ffill() - def moving_average(self, sampling_period, window_size=None, start=None, end=None, From ff4e42fb221587d77c02765318764d7204e20c25 Mon Sep 17 00:00:00 2001 From: sdementen Date: Wed, 26 Apr 2017 05:30:20 +0200 Subject: [PATCH 05/10] clean: do not repeat start,end in items_in_horizon --- traces/timeseries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/traces/timeseries.py b/traces/timeseries.py index 051ba8c..2ac81f9 100644 --- a/traces/timeseries.py +++ b/traces/timeseries.py @@ -415,9 +415,9 @@ def items_in_horizon(): # yields all items between start and end as well as start and end yield (start, self[start]) for t, v in self.items(): - if t < start: + if t <= start: continue - if t > end: + if t >= end: break yield t, v yield (end, self[end]) From b366902624a462bcbaa390b178877906fc281447 Mon Sep 17 00:00:00 2001 From: sdementen Date: Wed, 26 Apr 2017 06:20:44 +0200 Subject: [PATCH 06/10] fix: more robust detection on inflexion_intervals previous solution with floor did not work for non simple sample_interval (like 15 days) --- traces/timeseries.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/traces/timeseries.py b/traces/timeseries.py index 2ac81f9..1f5b9ca 100644 --- a/traces/timeseries.py +++ b/traces/timeseries.py @@ -426,8 +426,9 @@ def items_in_horizon(): # identify all inflexion intervals # by index: point i is in interval [idx[ifl_int[i]], idx[ifl_int[i]+1] - inflexion_intervals = inflexion_times.floor(sampling_period)\ - .map(idx.get_loc) + inflexion_intervals = inflexion_times.map( + lambda t: idx.get_loc(t, method="ffill")) + # convert DatetimeIndex to numpy array for faster indexation inflexion_times = inflexion_times.values From b509e4c74eae28974432c2bde16d83a294c0dc20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= Date: Wed, 26 Apr 2017 06:21:51 +0200 Subject: [PATCH 07/10] improve documentation + support passing a DateTimeIndex to sample_interval --- traces/timeseries.py | 45 +++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/traces/timeseries.py b/traces/timeseries.py index 1f5b9ca..a1b62cd 100644 --- a/traces/timeseries.py +++ b/traces/timeseries.py @@ -26,7 +26,6 @@ except ImportError: pass - EXTEND_BACK = object() @@ -301,6 +300,7 @@ def iterperiods(self, start=None, end=None, value=None): value_function = self._value_function(value) # get start index and value + start_index = self._d.bisect_right(start) if start_index: start_value = self._d[self._d.iloc[start_index - 1]] @@ -392,13 +392,24 @@ def sample(self, sampling_period, start=None, end=None, current_time += sampling_period return result - def sample_interval(self, sampling_period, + def sample_interval(self, sampling_period=None, start=None, end=None, + idx=None, operation="mean"): - - start, end, mask = self._check_boundaries(start, end) - sampling_period = self._check_regularization(start, end, - sampling_period) + """ + Sampling on intervals by using some operation (mean,max,min). + + It can be called either with sampling_period, [start], [end] or with a idx as a DateTimeIndex. + + The returing pandas.Series will be indexed either on pandas.date_range(start,end,sampling_period) or on idx. + + :param sampling_period: the sampling period + :param start: the start time of the sampling + :param end: the end time of the sampling + :param idx: a DateTimeIndex with the start times of the intervals + :param operation: "mean", "max" or "min" + :return: a pandas Series with the Trace sampled + """ try: import pandas as pd @@ -406,8 +417,15 @@ def sample_interval(self, sampling_period, msg = "sample_interval need pandas to be installed" raise ImportError(msg) - # create index on [start, end) - idx = pd.date_range(start, end, freq=sampling_period, closed=None) + if idx is None: + start, end, mask = self._check_boundaries(start, end) + sampling_period = self._check_regularization(start, end, + sampling_period) + # create index on [start, end) + idx = pd.date_range(start, end, freq=sampling_period, closed=None) + else: + start, end, mask = self._check_boundaries(idx[0], idx[-1]) + idx_list = idx.values # list(idx) # create all inflexion points @@ -421,14 +439,14 @@ def items_in_horizon(): break yield t, v yield (end, self[end]) + inflexion_times, inflexion_values = zip(*items_in_horizon()) inflexion_times = pd.DatetimeIndex(inflexion_times) # identify all inflexion intervals # by index: point i is in interval [idx[ifl_int[i]], idx[ifl_int[i]+1] - inflexion_intervals = inflexion_times.map( - lambda t: idx.get_loc(t, method="ffill")) - + # TODO: look to use searchsorted as it operates more efficienly (but offset of 1 in most cases) + inflexion_intervals = inflexion_times.map(lambda t: idx.get_loc(t, method="ffill")) # convert DatetimeIndex to numpy array for faster indexation inflexion_times = inflexion_times.values @@ -597,7 +615,6 @@ def bin(self, unit, n_units=1, start=None, end=None, mask=None, result = sortedcontainers.SortedDict() for bin_start, bin_end in mask.spans_between(start, end, unit, n_units=n_units): - result[bin_start] = function(bin_start, bin_end, mask=mask, normalized=False) @@ -1133,7 +1150,7 @@ def __init__(self, data=None): def __repr__(self): return '\n%s\n' % \ - pprint.pformat(self._d) + pprint.pformat(self._d) def start(self): try: @@ -1203,7 +1220,6 @@ def spans_between(self, start, end, unit, n_units=1): def hour_of_day(start, end, hour): - # start should be date, or if datetime, will use date of datetime floored = utils.datetime_floor(start) @@ -1221,7 +1237,6 @@ def hour_of_day(start, end, hour): def day_of_week(start, end, weekday): - # allow weekday name or number number = utils.weekday_number(weekday) From fb71dac82d92d8549b337666a8b1f8d0b284571b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= Date: Wed, 26 Apr 2017 06:28:30 +0200 Subject: [PATCH 08/10] add test for new idx parameter in sample_interval --- tests/test_traces.py | 20 ++++++++++++++++++++ traces/timeseries.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/test_traces.py b/tests/test_traces.py index 134c947..693486d 100644 --- a/tests/test_traces.py +++ b/tests/test_traces.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta import nose +from pandas.util.testing import assert_series_equal from traces import TimeSeries, Domain @@ -212,3 +213,22 @@ def test_sample_interval_hours(): (pd.Timestamp('2012-01-07 00:00:00'), 50.0), (pd.Timestamp('2012-01-08 00:00:00'), 50.0), (pd.Timestamp('2012-01-09 00:00:00'), 50.0)] + + +def test_sample_interval_index(): + import pandas as pd + + start = datetime(2012, 1, 1) + end = datetime(2012, 1, 10) + + ts = Domain([(start, 400), + (end, 400)]) + + ts[datetime(2012, 1, 4, 12):datetime(2012, 1, 6, 20)] = 10 + ts[datetime(2012, 1, 7, 9):datetime(2012, 1, 10)] = 50 + + idx = pd.date_range(start, end, freq="D") + sr = ts.sample_interval(sampling_period=timedelta(days=1)) + sr2 = ts.sample_interval(idx=idx) + + assert_series_equal(sr, sr2) diff --git a/traces/timeseries.py b/traces/timeseries.py index a1b62cd..53b074e 100644 --- a/traces/timeseries.py +++ b/traces/timeseries.py @@ -401,7 +401,7 @@ def sample_interval(self, sampling_period=None, It can be called either with sampling_period, [start], [end] or with a idx as a DateTimeIndex. - The returing pandas.Series will be indexed either on pandas.date_range(start,end,sampling_period) or on idx. + The returing pandas.Series will be indexed either on pandas.date_range(start,end,freq=sampling_period) or on idx. :param sampling_period: the sampling period :param start: the start time of the sampling From 09f7d038c251243ac58431503e47a5434eb7c239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= Date: Wed, 26 Apr 2017 06:45:48 +0200 Subject: [PATCH 09/10] fix pep8 --- traces/timeseries.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/traces/timeseries.py b/traces/timeseries.py index 53b074e..c8d0835 100644 --- a/traces/timeseries.py +++ b/traces/timeseries.py @@ -396,12 +396,13 @@ def sample_interval(self, sampling_period=None, start=None, end=None, idx=None, operation="mean"): - """ - Sampling on intervals by using some operation (mean,max,min). + """Sampling on intervals by using some operation (mean,max,min). - It can be called either with sampling_period, [start], [end] or with a idx as a DateTimeIndex. + It can be called either with sampling_period, [start], [end] + or with a idx as a DateTimeIndex. - The returing pandas.Series will be indexed either on pandas.date_range(start,end,freq=sampling_period) or on idx. + The returing pandas.Series will be indexed either on + pandas.date_range(start,end,freq=sampling_period) or on idx. :param sampling_period: the sampling period :param start: the start time of the sampling @@ -420,7 +421,7 @@ def sample_interval(self, sampling_period=None, if idx is None: start, end, mask = self._check_boundaries(start, end) sampling_period = self._check_regularization(start, end, - sampling_period) + sampling_period) # create index on [start, end) idx = pd.date_range(start, end, freq=sampling_period, closed=None) else: @@ -445,8 +446,10 @@ def items_in_horizon(): # identify all inflexion intervals # by index: point i is in interval [idx[ifl_int[i]], idx[ifl_int[i]+1] - # TODO: look to use searchsorted as it operates more efficienly (but offset of 1 in most cases) - inflexion_intervals = inflexion_times.map(lambda t: idx.get_loc(t, method="ffill")) + # TODO: look to use searchsorted as it operates more + # TODO: efficienly (but offset of 1 in most cases) + inflexion_intervals = inflexion_times.map( + lambda t: idx.get_loc(t, method="ffill")) # convert DatetimeIndex to numpy array for faster indexation inflexion_times = inflexion_times.values From 31a1ab886c3863292ad34ac5901d2ad0c4d9f4e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= Date: Wed, 26 Apr 2017 06:54:23 +0200 Subject: [PATCH 10/10] fix pep8 --- traces/timeseries.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/traces/timeseries.py b/traces/timeseries.py index c8d0835..10140f2 100644 --- a/traces/timeseries.py +++ b/traces/timeseries.py @@ -397,17 +397,17 @@ def sample_interval(self, sampling_period=None, idx=None, operation="mean"): """Sampling on intervals by using some operation (mean,max,min). - - It can be called either with sampling_period, [start], [end] + + It can be called either with sampling_period, [start], [end] or with a idx as a DateTimeIndex. - - The returing pandas.Series will be indexed either on + + The returing pandas.Series will be indexed either on pandas.date_range(start,end,freq=sampling_period) or on idx. - + :param sampling_period: the sampling period :param start: the start time of the sampling :param end: the end time of the sampling - :param idx: a DateTimeIndex with the start times of the intervals + :param idx: a DateTimeIndex with the start times of the intervals :param operation: "mean", "max" or "min" :return: a pandas Series with the Trace sampled """