From f327269686e80ef3cbcb8be1f9419967632c3040 Mon Sep 17 00:00:00 2001
From: stringertheory <mike.stringer.internet@gmail.com>
Date: Sun, 4 Feb 2024 15:54:45 -0600
Subject: [PATCH] removing use of numpy from eventseries

---
 tests/test_eventseries.py |  6 +++--
 traces/eventseries.py     | 50 ++++++++++++++++++++-------------------
 2 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/tests/test_eventseries.py b/tests/test_eventseries.py
index 20f20c3..b24d3d7 100644
--- a/tests/test_eventseries.py
+++ b/tests/test_eventseries.py
@@ -147,12 +147,14 @@ def test_time_lag():
         "2019-02-16",
         "2019-02-26",
         "2019-02-16",
+        "2019-02-16",
     ]
     es = EventSeries(pd.to_datetime(data))
 
-    time_lag = es.time_lag()
+    time_lag = list(es.iter_interevent_times())
     assert time_lag[0] == pd.Timedelta(days=15)
     assert time_lag[1] == pd.Timedelta(days=0)
 
     # Make sure we got the right shape
-    assert time_lag.shape[0] == len(data) - 1
+    assert len(time_lag) == len(data) - 1
+    print(time_lag)
diff --git a/traces/eventseries.py b/traces/eventseries.py
index 9efaaeb..7255dfc 100644
--- a/traces/eventseries.py
+++ b/traces/eventseries.py
@@ -1,47 +1,49 @@
-import collections
+import itertools
 
-import numpy as np
 import sortedcontainers
 
+from . import utils
+
 
 class EventSeries(sortedcontainers.SortedList):
     def __init__(self, data=None):
         super().__init__(data)
 
     def cumsum(self):
-        """
-        Returns a TimeSeries with each unique time in the EventSeries as an
-        index point and with the cumulative number of events that have occured
-        since the earliest time in the EventSeries as the value
+        """Alias for cumulative_sum"""
+        return self.cumulative_sum()
+
+    def cumulative_sum(self):
+        """Returns a TimeSeries with each unique time in the
+        EventSeries as an index point and with the cumulative number
+        of events that have occured since the earliest time in the
+        EventSeries as the value
+
         """
         from traces import TimeSeries
 
-        # Multiple events can happen at the same time so we need to hash them
-        # as counts
-        c = collections.Counter(self)
-        # Then we want to sort them and calculate the cumsum
-        c = sortedcontainers.SortedDict(c)
-        keys = c.keys()
-        values = np.cumsum(list(c.values()))
+        ts = TimeSeries(default=0)
+        running_total = 0
+        for t, event_group in itertools.groupby(self):
+            running_total += len(list(event_group))
+            ts[t] = running_total
 
-        return TimeSeries(zip(keys, values), default=0)
+        return ts
 
     def events_between(self, start, end):
-        """
-        Returns the number of events that occured between `start and `end.
-        Calculates on a closed interval, so start and end are included in the
-        range
+        """Returns the number of events that occured between `start
+        and `end.  Calculates on a closed interval, so start and end
+        are included in the range
+
         """
         start_idx = self.bisect_left(start)
         end_idx = self.bisect_right(end)
         return end_idx - start_idx
 
-    def time_lag(self):
-        """
-        Returns a `np.array` of inter event arrival times. This will only work
-        for EventSeries of a type that have a minus operation implemented.
-        """
-        return np.array(self[1:]) - np.array(self[0:-1])
+    def iter_interevent_times(self):
+        """Returns a list of inter-event arrival times."""
+        for t0, t1 in utils.pairwise(self):
+            yield t1 - t0
 
     @staticmethod
     def count_active(es_open, es_closed):