From 52ef67d9a43297465f1243f5b647f93db959c5e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= <sdementen@gmail.com>
Date: Sat, 15 Apr 2017 16:35:43 +0200
Subject: [PATCH 01/10] add test for sample_interval

---
 tests/test_traces.py | 94 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/tests/test_traces.py b/tests/test_traces.py
index 67b7285..2abb590 100644
--- a/tests/test_traces.py
+++ b/tests/test_traces.py
@@ -1,7 +1,8 @@
 from datetime import datetime, timedelta
 
 import nose
-from traces import Histogram, TimeSeries
+
+from traces import TimeSeries, Domain
 
 
 def test_init_data():
@@ -59,7 +60,7 @@ def test_merge():
     assert True in ts_merge[0]
     assert None in ts_merge[0]
 
-    
+
 def test_set_interval():
     ts = TimeSeries()
     nose.tools.assert_raises(KeyError, ts.get, 0)
@@ -99,6 +100,7 @@ def test_set_interval_datetime():
                           (datetime(2012, 1, 8, 0, 0), 100),
                           (datetime(2012, 1, 10, 0, 0), 10)]
 
+
 def test_remove_points_from_interval():
     ts = TimeSeries(default=0)
     ts[0] = 0
@@ -111,7 +113,7 @@ def test_remove_points_from_interval():
     del ts[3.5:4.5]
 
     assert ts[5] == 1
-    
+
     ts[4] = 0
 
     del ts[3:4.5]
@@ -124,3 +126,89 @@ def test_remove_points_from_interval():
     del ts[3.5:4]
 
     assert ts[5] == 0
+
+
+def test_sample_interval_days():
+    import pandas as pd
+    ts = Domain([(datetime(2012, 1, 1), 400),
+                 (datetime(2012, 3, 1), 400)])
+
+    ts[datetime(2012, 1, 4):datetime(2012, 1, 20)] = 10
+    ts[datetime(2012, 1, 25):datetime(2012, 2, 7)] = 50
+    ts[datetime(2012, 1, 19):datetime(2012, 1, 27)] = 0
+
+    sr = ts.sample_interval(sampling_period=timedelta(days=1), end=datetime(2012, 2, 1))
+    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-02 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-03 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-04 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-05 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-06 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-07 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-08 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-09 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-10 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-11 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-12 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-13 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-14 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-15 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-16 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-17 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-18 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-19 00:00:00'), 0.0),
+                                    (pd.Timestamp('2012-01-20 00:00:00'), 0.0),
+                                    (pd.Timestamp('2012-01-21 00:00:00'), 0.0),
+                                    (pd.Timestamp('2012-01-22 00:00:00'), 0.0),
+                                    (pd.Timestamp('2012-01-23 00:00:00'), 0.0),
+                                    (pd.Timestamp('2012-01-24 00:00:00'), 0.0),
+                                    (pd.Timestamp('2012-01-25 00:00:00'), 0.0),
+                                    (pd.Timestamp('2012-01-26 00:00:00'), 0.0),
+                                    (pd.Timestamp('2012-01-27 00:00:00'), 50.0),
+                                    (pd.Timestamp('2012-01-28 00:00:00'), 50.0),
+                                    (pd.Timestamp('2012-01-29 00:00:00'), 50.0),
+                                    (pd.Timestamp('2012-01-30 00:00:00'), 50.0),
+                                    (pd.Timestamp('2012-01-31 00:00:00'), 50.0)]
+
+
+def test_sample_interval_hours():
+    import pandas as pd
+
+    ts = Domain([(datetime(2012, 1, 1), 400),
+                 (datetime(2012, 1, 10), 400)])
+
+    ts[datetime(2012, 1, 4, 12):datetime(2012, 1, 6, 20)] = 10
+    ts[datetime(2012, 1, 7, 9):datetime(2012, 1, 10)] = 50
+
+    sr = ts.sample_interval(sampling_period=timedelta(days=1))
+    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 205.0),
+                                    (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0),
+                                    (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 75.0),
+                                    (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 181.25),
+                                    (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0),
+                                    (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)]
+
+    sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="max")
+    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0),
+                                    (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0),
+                                    (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)]
+
+    sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="min")
+    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0),
+                                    (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 10.0),
+                                    (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0),
+                                    (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 10.0),
+                                    (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 50.0),
+                                    (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0),
+                                    (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)]

From ba8044a69d83453e30fd8dab0f0fcbb5d348dc7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= <sdementen@gmail.com>
Date: Sat, 15 Apr 2017 16:44:02 +0200
Subject: [PATCH 02/10] clean test

---
 tests/test_traces.py | 54 ++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/tests/test_traces.py b/tests/test_traces.py
index 2abb590..134c947 100644
--- a/tests/test_traces.py
+++ b/tests/test_traces.py
@@ -181,34 +181,34 @@ def test_sample_interval_hours():
     ts[datetime(2012, 1, 7, 9):datetime(2012, 1, 10)] = 50
 
     sr = ts.sample_interval(sampling_period=timedelta(days=1))
-    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 205.0),
-                                    (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0),
-                                    (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 75.0),
-                                    (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 181.25),
-                                    (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0),
-                                    (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)]
+    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-02 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-03 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-04 00:00:00'), 205.0),
+                                    (pd.Timestamp('2012-01-05 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-06 00:00:00'), 75.0),
+                                    (pd.Timestamp('2012-01-07 00:00:00'), 181.25),
+                                    (pd.Timestamp('2012-01-08 00:00:00'), 50.0),
+                                    (pd.Timestamp('2012-01-09 00:00:00'), 50.0)]
 
     sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="max")
-    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0),
-                                    (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0),
-                                    (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)]
+    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-02 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-03 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-04 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-05 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-06 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-07 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-08 00:00:00'), 50.0),
+                                    (pd.Timestamp('2012-01-09 00:00:00'), 50.0)]
 
     sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="min")
-    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-02 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-03 00:00:00', offset='D'), 400.0),
-                                    (pd.Timestamp('2012-01-04 00:00:00', offset='D'), 10.0),
-                                    (pd.Timestamp('2012-01-05 00:00:00', offset='D'), 10.0),
-                                    (pd.Timestamp('2012-01-06 00:00:00', offset='D'), 10.0),
-                                    (pd.Timestamp('2012-01-07 00:00:00', offset='D'), 50.0),
-                                    (pd.Timestamp('2012-01-08 00:00:00', offset='D'), 50.0),
-                                    (pd.Timestamp('2012-01-09 00:00:00', offset='D'), 50.0)]
+    assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-02 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-03 00:00:00'), 400.0),
+                                    (pd.Timestamp('2012-01-04 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-05 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-06 00:00:00'), 10.0),
+                                    (pd.Timestamp('2012-01-07 00:00:00'), 50.0),
+                                    (pd.Timestamp('2012-01-08 00:00:00'), 50.0),
+                                    (pd.Timestamp('2012-01-09 00:00:00'), 50.0)]

From ef2dedc186117fb66a7eb844dc73bf5c4950767c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= <sdementen@gmail.com>
Date: Sat, 15 Apr 2017 16:44:14 +0200
Subject: [PATCH 03/10] add sample_interval function

---
 traces/timeseries.py | 105 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

diff --git a/traces/timeseries.py b/traces/timeseries.py
index d6d91c8..8e3103c 100644
--- a/traces/timeseries.py
+++ b/traces/timeseries.py
@@ -392,6 +392,111 @@ def sample(self, sampling_period, start=None, end=None,
             current_time += sampling_period
         return result
 
+    def sample_interval(self, sampling_period,
+                        start=None, end=None,
+                        operation="mean"):
+
+        start, end, mask = self._check_boundaries(start, end)
+        sampling_period = self._check_regularization(start, end,
+                                                     sampling_period)
+
+        try:
+            import pandas as pd
+        except ImportError:
+            msg = "sample_interval need pandas to be installed"
+            raise ImportError(msg)
+
+        # create index on [start, end)
+        idx = pd.date_range(start, end, freq=sampling_period, closed=None)
+        idx_list = idx.values  # list(idx)
+
+        # create all inflexion points
+        def items_in_horizon():
+            # yields all items between start and end as well as start and end
+            yield (start, self[start])
+            for t, v in self.items():
+                if t < start:
+                    continue
+                if t > end:
+                    break
+                yield t, v
+            yield (end, self[end])
+        inflexion_times, inflexion_values = zip(*items_in_horizon())
+        inflexion_times = pd.DatetimeIndex(inflexion_times)
+
+        # identify all inflexion intervals
+        # by index: point i is in interval [idx[ifl_int[i]], idx[ifl_int[i]+1]
+        inflexion_intervals = inflexion_times.floor(sampling_period)\
+            .map(idx.get_loc)
+
+        # convert DatetimeIndex to numpy array for faster indexation
+        inflexion_times = inflexion_times.values
+
+        Np1 = len(idx_list) - 1
+
+        # convert to timestamp
+        # (to make interval arithmetic faster, no need for total_seconds)
+        inflexion_times = (inflexion_times.astype("int64"))
+        idx_times = (idx.astype("int64"))
+
+        # initialise init, update and finish functions depending
+        # on the aggregation operator
+        init, update, finish = {
+            "mean": (
+                lambda t, v: 0.0,
+                lambda agg, t0, t1, v: agg + (t1 - t0) * v,
+                lambda agg, t_start, t_end: agg / (t_end - t_start),
+            ),
+            "max": (
+                lambda t, v: v,
+                lambda agg, t0, t1, v: max(agg, v),
+                lambda agg, t_start, t_end: agg,
+            ),
+            "min": (
+                lambda t, v: v,
+                lambda agg, t0, t1, v: min(agg, v),
+                lambda agg, t_start, t_end: agg,
+            ),
+        }[operation]
+
+        # initialise first interval
+        t_start, t_end = idx_times[0:2]
+        i0, t0, v0 = 0, t_start, self[start]
+        agg = init(t0, v0)
+
+        result = []
+        for i1, t1, v1 in zip(inflexion_intervals,
+                              inflexion_times,
+                              inflexion_values):
+            if i0 != i1:
+                # change of interval
+
+                # finish previous interval
+                agg = update(agg, t0, t_end, v0)
+                agg = finish(agg, t_start, t_end)
+                result.append((idx_list[i0], agg))
+
+                # handle all intervals between t_end and t1
+                if i1 != i0 + 1:
+                    result.append((idx_list[i0 + 1], v0))
+
+                # if last_point, break
+                if i1 == Np1:
+                    break
+
+                # set up new interval
+                t_start, t_end = idx_times[i1:i1 + 2]
+                i0, t0 = i1, t_start
+                agg = init(t0, v0)
+
+            agg = update(agg, t0, t1, v0)
+
+            i0, t0, v0 = i1, t1, v1
+
+        df = pd.DataFrame.from_records(result)
+        return df.set_index(0).iloc[:, 0].reindex(idx[:-1]).ffill()
+
+
     def moving_average(self, sampling_period,
                        window_size=None,
                        start=None, end=None,

From 2ae977788a3f7adcbb1419d1c561b479256658d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= <sdementen@gmail.com>
Date: Sat, 15 Apr 2017 17:01:18 +0200
Subject: [PATCH 04/10] fix pep8 error

---
 traces/timeseries.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/traces/timeseries.py b/traces/timeseries.py
index 8e3103c..051ba8c 100644
--- a/traces/timeseries.py
+++ b/traces/timeseries.py
@@ -496,7 +496,6 @@ def items_in_horizon():
         df = pd.DataFrame.from_records(result)
         return df.set_index(0).iloc[:, 0].reindex(idx[:-1]).ffill()
 
-
     def moving_average(self, sampling_period,
                        window_size=None,
                        start=None, end=None,

From ff4e42fb221587d77c02765318764d7204e20c25 Mon Sep 17 00:00:00 2001
From: sdementen <sdementen@users.noreply.github.com>
Date: Wed, 26 Apr 2017 05:30:20 +0200
Subject: [PATCH 05/10] clean: do not repeat start,end in items_in_horizon

---
 traces/timeseries.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/traces/timeseries.py b/traces/timeseries.py
index 051ba8c..2ac81f9 100644
--- a/traces/timeseries.py
+++ b/traces/timeseries.py
@@ -415,9 +415,9 @@ def items_in_horizon():
             # yields all items between start and end as well as start and end
             yield (start, self[start])
             for t, v in self.items():
-                if t < start:
+                if t <= start:
                     continue
-                if t > end:
+                if t >= end:
                     break
                 yield t, v
             yield (end, self[end])

From b366902624a462bcbaa390b178877906fc281447 Mon Sep 17 00:00:00 2001
From: sdementen <sdementen@users.noreply.github.com>
Date: Wed, 26 Apr 2017 06:20:44 +0200
Subject: [PATCH 06/10] fix: more robust detection on inflexion_intervals

previous solution with floor did not work for non simple sample_interval (like 15 days)
---
 traces/timeseries.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/traces/timeseries.py b/traces/timeseries.py
index 2ac81f9..1f5b9ca 100644
--- a/traces/timeseries.py
+++ b/traces/timeseries.py
@@ -426,8 +426,9 @@ def items_in_horizon():
 
         # identify all inflexion intervals
         # by index: point i is in interval [idx[ifl_int[i]], idx[ifl_int[i]+1]
-        inflexion_intervals = inflexion_times.floor(sampling_period)\
-            .map(idx.get_loc)
+        inflexion_intervals = inflexion_times.map(
+            lambda t: idx.get_loc(t, method="ffill"))
+
 
         # convert DatetimeIndex to numpy array for faster indexation
         inflexion_times = inflexion_times.values

From b509e4c74eae28974432c2bde16d83a294c0dc20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= <sdementen@gmail.com>
Date: Wed, 26 Apr 2017 06:21:51 +0200
Subject: [PATCH 07/10] improve documentation + support passing a DateTimeIndex
 to sample_interval

---
 traces/timeseries.py | 45 +++++++++++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/traces/timeseries.py b/traces/timeseries.py
index 1f5b9ca..a1b62cd 100644
--- a/traces/timeseries.py
+++ b/traces/timeseries.py
@@ -26,7 +26,6 @@
 except ImportError:
     pass
 
-
 EXTEND_BACK = object()
 
 
@@ -301,6 +300,7 @@ def iterperiods(self, start=None, end=None, value=None):
         value_function = self._value_function(value)
 
         # get start index and value
+
         start_index = self._d.bisect_right(start)
         if start_index:
             start_value = self._d[self._d.iloc[start_index - 1]]
@@ -392,13 +392,24 @@ def sample(self, sampling_period, start=None, end=None,
             current_time += sampling_period
         return result
 
-    def sample_interval(self, sampling_period,
+    def sample_interval(self, sampling_period=None,
                         start=None, end=None,
+                        idx=None,
                         operation="mean"):
-
-        start, end, mask = self._check_boundaries(start, end)
-        sampling_period = self._check_regularization(start, end,
-                                                     sampling_period)
+        """
+        Sampling on intervals by using some operation (mean,max,min).
+        
+        It can be called either with sampling_period, [start], [end] or with a idx as a DateTimeIndex.
+        
+        The returing pandas.Series will be indexed either on pandas.date_range(start,end,sampling_period) or on idx.
+        
+        :param sampling_period: the sampling period
+        :param start: the start time of the sampling
+        :param end: the end time of the sampling
+        :param idx: a DateTimeIndex with the start times of the intervals 
+        :param operation: "mean", "max" or "min"
+        :return: a pandas Series with the Trace sampled
+        """
 
         try:
             import pandas as pd
@@ -406,8 +417,15 @@ def sample_interval(self, sampling_period,
             msg = "sample_interval need pandas to be installed"
             raise ImportError(msg)
 
-        # create index on [start, end)
-        idx = pd.date_range(start, end, freq=sampling_period, closed=None)
+        if idx is None:
+            start, end, mask = self._check_boundaries(start, end)
+            sampling_period = self._check_regularization(start, end,
+                                                     sampling_period)
+            # create index on [start, end)
+            idx = pd.date_range(start, end, freq=sampling_period, closed=None)
+        else:
+            start, end, mask = self._check_boundaries(idx[0], idx[-1])
+
         idx_list = idx.values  # list(idx)
 
         # create all inflexion points
@@ -421,14 +439,14 @@ def items_in_horizon():
                     break
                 yield t, v
             yield (end, self[end])
+
         inflexion_times, inflexion_values = zip(*items_in_horizon())
         inflexion_times = pd.DatetimeIndex(inflexion_times)
 
         # identify all inflexion intervals
         # by index: point i is in interval [idx[ifl_int[i]], idx[ifl_int[i]+1]
-        inflexion_intervals = inflexion_times.map(
-            lambda t: idx.get_loc(t, method="ffill"))
-
+        # TODO: look to use searchsorted as it operates more efficienly (but offset of 1 in most cases)
+        inflexion_intervals = inflexion_times.map(lambda t: idx.get_loc(t, method="ffill"))
 
         # convert DatetimeIndex to numpy array for faster indexation
         inflexion_times = inflexion_times.values
@@ -597,7 +615,6 @@ def bin(self, unit, n_units=1, start=None, end=None, mask=None,
         result = sortedcontainers.SortedDict()
         for bin_start, bin_end in mask.spans_between(start, end, unit,
                                                      n_units=n_units):
-
             result[bin_start] = function(bin_start, bin_end,
                                          mask=mask, normalized=False)
 
@@ -1133,7 +1150,7 @@ def __init__(self, data=None):
 
     def __repr__(self):
         return '<Domain>\n%s\n</Domain>' % \
-            pprint.pformat(self._d)
+               pprint.pformat(self._d)
 
     def start(self):
         try:
@@ -1203,7 +1220,6 @@ def spans_between(self, start, end, unit, n_units=1):
 
 
 def hour_of_day(start, end, hour):
-
     # start should be date, or if datetime, will use date of datetime
     floored = utils.datetime_floor(start)
 
@@ -1221,7 +1237,6 @@ def hour_of_day(start, end, hour):
 
 
 def day_of_week(start, end, weekday):
-
     # allow weekday name or number
     number = utils.weekday_number(weekday)
 

From fb71dac82d92d8549b337666a8b1f8d0b284571b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= <sdementen@gmail.com>
Date: Wed, 26 Apr 2017 06:28:30 +0200
Subject: [PATCH 08/10] add test for new idx parameter in sample_interval

---
 tests/test_traces.py | 20 ++++++++++++++++++++
 traces/timeseries.py |  2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/test_traces.py b/tests/test_traces.py
index 134c947..693486d 100644
--- a/tests/test_traces.py
+++ b/tests/test_traces.py
@@ -1,6 +1,7 @@
 from datetime import datetime, timedelta
 
 import nose
+from pandas.util.testing import assert_series_equal
 
 from traces import TimeSeries, Domain
 
@@ -212,3 +213,22 @@ def test_sample_interval_hours():
                                     (pd.Timestamp('2012-01-07 00:00:00'), 50.0),
                                     (pd.Timestamp('2012-01-08 00:00:00'), 50.0),
                                     (pd.Timestamp('2012-01-09 00:00:00'), 50.0)]
+
+
+def test_sample_interval_index():
+    import pandas as pd
+
+    start = datetime(2012, 1, 1)
+    end = datetime(2012, 1, 10)
+
+    ts = Domain([(start, 400),
+                 (end, 400)])
+
+    ts[datetime(2012, 1, 4, 12):datetime(2012, 1, 6, 20)] = 10
+    ts[datetime(2012, 1, 7, 9):datetime(2012, 1, 10)] = 50
+
+    idx = pd.date_range(start, end, freq="D")
+    sr = ts.sample_interval(sampling_period=timedelta(days=1))
+    sr2 = ts.sample_interval(idx=idx)
+
+    assert_series_equal(sr, sr2)
diff --git a/traces/timeseries.py b/traces/timeseries.py
index a1b62cd..53b074e 100644
--- a/traces/timeseries.py
+++ b/traces/timeseries.py
@@ -401,7 +401,7 @@ def sample_interval(self, sampling_period=None,
         
         It can be called either with sampling_period, [start], [end] or with a idx as a DateTimeIndex.
         
-        The returing pandas.Series will be indexed either on pandas.date_range(start,end,sampling_period) or on idx.
+        The returing pandas.Series will be indexed either on pandas.date_range(start,end,freq=sampling_period) or on idx.
         
         :param sampling_period: the sampling period
         :param start: the start time of the sampling

From 09f7d038c251243ac58431503e47a5434eb7c239 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= <sdementen@gmail.com>
Date: Wed, 26 Apr 2017 06:45:48 +0200
Subject: [PATCH 09/10] fix pep8

---
 traces/timeseries.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/traces/timeseries.py b/traces/timeseries.py
index 53b074e..c8d0835 100644
--- a/traces/timeseries.py
+++ b/traces/timeseries.py
@@ -396,12 +396,13 @@ def sample_interval(self, sampling_period=None,
                         start=None, end=None,
                         idx=None,
                         operation="mean"):
-        """
-        Sampling on intervals by using some operation (mean,max,min).
+        """Sampling on intervals by using some operation (mean,max,min).
         
-        It can be called either with sampling_period, [start], [end] or with a idx as a DateTimeIndex.
+        It can be called either with sampling_period, [start], [end] 
+        or with a idx as a DateTimeIndex.
         
-        The returing pandas.Series will be indexed either on pandas.date_range(start,end,freq=sampling_period) or on idx.
+        The returing pandas.Series will be indexed either on 
+        pandas.date_range(start,end,freq=sampling_period) or on idx.
         
         :param sampling_period: the sampling period
         :param start: the start time of the sampling
@@ -420,7 +421,7 @@ def sample_interval(self, sampling_period=None,
         if idx is None:
             start, end, mask = self._check_boundaries(start, end)
             sampling_period = self._check_regularization(start, end,
-                                                     sampling_period)
+                                                         sampling_period)
             # create index on [start, end)
             idx = pd.date_range(start, end, freq=sampling_period, closed=None)
         else:
@@ -445,8 +446,10 @@ def items_in_horizon():
 
         # identify all inflexion intervals
         # by index: point i is in interval [idx[ifl_int[i]], idx[ifl_int[i]+1]
-        # TODO: look to use searchsorted as it operates more efficienly (but offset of 1 in most cases)
-        inflexion_intervals = inflexion_times.map(lambda t: idx.get_loc(t, method="ffill"))
+        # TODO: look to use searchsorted as it operates more
+        # TODO: efficienly (but offset of 1 in most cases)
+        inflexion_intervals = inflexion_times.map(
+            lambda t: idx.get_loc(t, method="ffill"))
 
         # convert DatetimeIndex to numpy array for faster indexation
         inflexion_times = inflexion_times.values

From 31a1ab886c3863292ad34ac5901d2ad0c4d9f4e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20de=20Menten?= <sdementen@gmail.com>
Date: Wed, 26 Apr 2017 06:54:23 +0200
Subject: [PATCH 10/10] fix pep8

---
 traces/timeseries.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/traces/timeseries.py b/traces/timeseries.py
index c8d0835..10140f2 100644
--- a/traces/timeseries.py
+++ b/traces/timeseries.py
@@ -397,17 +397,17 @@ def sample_interval(self, sampling_period=None,
                         idx=None,
                         operation="mean"):
         """Sampling on intervals by using some operation (mean,max,min).
-        
-        It can be called either with sampling_period, [start], [end] 
+
+        It can be called either with sampling_period, [start], [end]
         or with a idx as a DateTimeIndex.
-        
-        The returing pandas.Series will be indexed either on 
+
+        The returing pandas.Series will be indexed either on
         pandas.date_range(start,end,freq=sampling_period) or on idx.
-        
+
         :param sampling_period: the sampling period
         :param start: the start time of the sampling
         :param end: the end time of the sampling
-        :param idx: a DateTimeIndex with the start times of the intervals 
+        :param idx: a DateTimeIndex with the start times of the intervals
         :param operation: "mean", "max" or "min"
         :return: a pandas Series with the Trace sampled
         """