Skip to content

Commit

Permalink
removing use of numpy from eventseries
Browse files Browse the repository at this point in the history
  • Loading branch information
stringertheory committed Feb 4, 2024
1 parent 47f7016 commit f327269
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 26 deletions.
6 changes: 4 additions & 2 deletions tests/test_eventseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,14 @@ def test_time_lag():
"2019-02-16",
"2019-02-26",
"2019-02-16",
"2019-02-16",
]
es = EventSeries(pd.to_datetime(data))

time_lag = es.time_lag()
time_lag = list(es.iter_interevent_times())
assert time_lag[0] == pd.Timedelta(days=15)
assert time_lag[1] == pd.Timedelta(days=0)

# Make sure we got the right shape
assert time_lag.shape[0] == len(data) - 1
assert len(time_lag) == len(data) - 1
print(time_lag)
50 changes: 26 additions & 24 deletions traces/eventseries.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,49 @@
import collections
import itertools

import numpy as np
import sortedcontainers

from . import utils


class EventSeries(sortedcontainers.SortedList):
def __init__(self, data=None):
super().__init__(data)

def cumsum(self):
"""
Returns a TimeSeries with each unique time in the EventSeries as an
index point and with the cumulative number of events that have occured
since the earliest time in the EventSeries as the value
"""Alias for cumulative_sum"""
return self.cumulative_sum()

def cumulative_sum(self):
"""Returns a TimeSeries with each unique time in the
EventSeries as an index point and with the cumulative number
of events that have occured since the earliest time in the
EventSeries as the value
"""
from traces import TimeSeries

# Multiple events can happen at the same time so we need to hash them
# as counts
c = collections.Counter(self)
# Then we want to sort them and calculate the cumsum
c = sortedcontainers.SortedDict(c)
keys = c.keys()
values = np.cumsum(list(c.values()))
ts = TimeSeries(default=0)
running_total = 0
for t, event_group in itertools.groupby(self):
running_total += len(list(event_group))
ts[t] = running_total

return TimeSeries(zip(keys, values), default=0)
return ts

def events_between(self, start, end):
"""
Returns the number of events that occured between `start and `end.
Calculates on a closed interval, so start and end are included in the
range
"""Returns the number of events that occured between `start
and `end. Calculates on a closed interval, so start and end
are included in the range
"""
start_idx = self.bisect_left(start)
end_idx = self.bisect_right(end)
return end_idx - start_idx

def time_lag(self):
"""
Returns a `np.array` of inter event arrival times. This will only work
for EventSeries of a type that have a minus operation implemented.
"""
return np.array(self[1:]) - np.array(self[0:-1])
def iter_interevent_times(self):
"""Returns a list of inter-event arrival times."""
for t0, t1 in utils.pairwise(self):
yield t1 - t0

@staticmethod
def count_active(es_open, es_closed):
Expand Down

0 comments on commit f327269

Please sign in to comment.