From f327269686e80ef3cbcb8be1f9419967632c3040 Mon Sep 17 00:00:00 2001 From: stringertheory <mike.stringer.internet@gmail.com> Date: Sun, 4 Feb 2024 15:54:45 -0600 Subject: [PATCH] removing use of numpy from eventseries --- tests/test_eventseries.py | 6 +++-- traces/eventseries.py | 50 ++++++++++++++++++++------------------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/tests/test_eventseries.py b/tests/test_eventseries.py index 20f20c3..b24d3d7 100644 --- a/tests/test_eventseries.py +++ b/tests/test_eventseries.py @@ -147,12 +147,14 @@ def test_time_lag(): "2019-02-16", "2019-02-26", "2019-02-16", + "2019-02-16", ] es = EventSeries(pd.to_datetime(data)) - time_lag = es.time_lag() + time_lag = list(es.iter_interevent_times()) assert time_lag[0] == pd.Timedelta(days=15) assert time_lag[1] == pd.Timedelta(days=0) # Make sure we got the right shape - assert time_lag.shape[0] == len(data) - 1 + assert len(time_lag) == len(data) - 1 + print(time_lag) diff --git a/traces/eventseries.py b/traces/eventseries.py index 9efaaeb..7255dfc 100644 --- a/traces/eventseries.py +++ b/traces/eventseries.py @@ -1,47 +1,49 @@ -import collections +import itertools -import numpy as np import sortedcontainers +from . import utils + class EventSeries(sortedcontainers.SortedList): def __init__(self, data=None): super().__init__(data) def cumsum(self): - """ - Returns a TimeSeries with each unique time in the EventSeries as an - index point and with the cumulative number of events that have occured - since the earliest time in the EventSeries as the value + """Alias for cumulative_sum""" + return self.cumulative_sum() + + def cumulative_sum(self): + """Returns a TimeSeries with each unique time in the + EventSeries as an index point and with the cumulative number + of events that have occured since the earliest time in the + EventSeries as the value + """ from traces import TimeSeries - # Multiple events can happen at the same time so we need to hash them - # as counts - c = collections.Counter(self) - # Then we want to sort them and calculate the cumsum - c = sortedcontainers.SortedDict(c) - keys = c.keys() - values = np.cumsum(list(c.values())) + ts = TimeSeries(default=0) + running_total = 0 + for t, event_group in itertools.groupby(self): + running_total += len(list(event_group)) + ts[t] = running_total - return TimeSeries(zip(keys, values), default=0) + return ts def events_between(self, start, end): - """ - Returns the number of events that occured between `start and `end. - Calculates on a closed interval, so start and end are included in the - range + """Returns the number of events that occured between `start + and `end. Calculates on a closed interval, so start and end + are included in the range + """ start_idx = self.bisect_left(start) end_idx = self.bisect_right(end) return end_idx - start_idx - def time_lag(self): - """ - Returns a `np.array` of inter event arrival times. This will only work - for EventSeries of a type that have a minus operation implemented. - """ - return np.array(self[1:]) - np.array(self[0:-1]) + def iter_interevent_times(self): + """Returns a list of inter-event arrival times.""" + for t0, t1 in utils.pairwise(self): + yield t1 - t0 @staticmethod def count_active(es_open, es_closed):