Skip to content

Commit

Permalink
Make date coordinate interval check less stringent (fix #548)
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 733832024
  • Loading branch information
santoso-wijaya authored and The Meridian Authors committed Mar 5, 2025
1 parent 248a362 commit 5937bb1
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 36 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ To release a new version (e.g. from `1.0.0` -> `2.0.0`):

* Align `NaNs` in `spend_grid` and `incremental_outcome_grid` in the optimizer.
* Fix the stopping criteria of target total ROI in flexible budget optimization.
* Fix issue #548: Make time coordinate regularity check less stringent.

## [1.0.4] - 2025-02-28

Expand Down
6 changes: 4 additions & 2 deletions meridian/data/input_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,11 +542,13 @@ def _validate_times(self):
try:
_ = self.time_coordinates.interval_days
except ValueError as exc:
raise ValueError("Time coordinates must be evenly spaced.") from exc
raise ValueError("Time coordinates must be regularly spaced.") from exc
try:
_ = self.media_time_coordinates.interval_days
except ValueError as exc:
raise ValueError("Media time coordinates must be evenly spaced.") from exc
raise ValueError(
"Media time coordinates must be regularly spaced."
) from exc

def _validate_time(self, array: xr.DataArray | None):
"""Validates the `time` dimension of the given `DataArray`.
Expand Down
33 changes: 21 additions & 12 deletions meridian/data/input_data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,15 +956,20 @@ def test_time_interval_irregular(self):
# that it is not regularly spaced with other coordinate values.
old_time_coords = kpi[constants.TIME].values
new_time_coords = old_time_coords.copy()
new_time_coords[-1] = (
datetime.datetime.strptime(old_time_coords[-1], constants.DATE_FORMAT)
+ datetime.timedelta(days=2)
).strftime(constants.DATE_FORMAT)
# There are 150 time coordinates; here we disturb ~10 of them by 2 days each
# to make them fail the time index's regularity test.
for disturb_index in (3, 10, 23, 30, 55, 99, 101, 120, 133, 140):
new_time_coords[disturb_index] = (
datetime.datetime.strptime(
old_time_coords[disturb_index], constants.DATE_FORMAT
)
+ datetime.timedelta(days=2)
).strftime(constants.DATE_FORMAT)
kpi = kpi.assign_coords({constants.TIME: new_time_coords})

with self.assertRaisesWithLiteralMatch(
ValueError,
"Time coordinates must be evenly spaced.",
"Time coordinates must be regularly spaced.",
):
input_data.InputData(
controls=self.not_lagged_controls,
Expand All @@ -982,17 +987,21 @@ def test_media_time_interval_irregular(self):
# value so that it is not regularly spaced with other coordinate values.
old_media_time_coords = media[constants.MEDIA_TIME].values
new_media_time_coords = old_media_time_coords.copy()
new_media_time_coords[-1] = (
datetime.datetime.strptime(
old_media_time_coords[-1], constants.DATE_FORMAT
)
+ datetime.timedelta(days=2)
).strftime(constants.DATE_FORMAT)
# There are 150 time coordinates; here we disturb ~10 of them by 2 days each
# to make them fail the time index's regularity test.
print(f"old_media_time_coords: {old_media_time_coords}")
for disturb_index in (3, 10, 23, 30, 55, 99, 101, 120, 133, 140):
new_media_time_coords[disturb_index] = (
datetime.datetime.strptime(
old_media_time_coords[disturb_index], constants.DATE_FORMAT
)
+ datetime.timedelta(days=2)
).strftime(constants.DATE_FORMAT)
media = media.assign_coords({constants.MEDIA_TIME: new_media_time_coords})

with self.assertRaisesWithLiteralMatch(
ValueError,
"Media time coordinates must be evenly spaced.",
"Media time coordinates must be regularly spaced.",
):
input_data.InputData(
controls=self.not_lagged_controls,
Expand Down
54 changes: 39 additions & 15 deletions meridian/data/time_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ def from_dates(
return cls(datetime_index=_to_pandas_datetime_index(dates))

def __post_init__(self):
if len(self.datetime_index) <= 1:
raise ValueError(
"There must be more than one date index in the time coordinates."
)
if not self.datetime_index.is_monotonic_increasing:
raise ValueError(
"Time coordinates must be strictly monotonically increasing."
Expand All @@ -162,28 +166,48 @@ def all_dates_str(self) -> list[str]:

@functools.cached_property
def interval_days(self) -> int:
"""Returns the interval between two neighboring dates in `all_dates`.
"""Returns the *mean* interval between two neighboring dates in `all_dates`.
Raises:
ValueError if the date index is not regularly spaced.
ValueError if the date index is not "regularly spaced".
"""
if not self._is_regular_time_index():
# TODO: Add debugging info here.
raise ValueError("Time coordinates are not regularly spaced!")

# Calculate the difference between consecutive dates, in days.
diff = self.datetime_index.to_series().diff().dt.days.dropna()
diffs = self._interval_days
# Return the rounded mean interval.
return int(np.round(np.mean(diffs)))

if diff.nunique() == 0:
# This edge case happens when there is only one date in the index.
# This is unlikely to happen in practice, but we handle it just in case.
warnings.warn(
"The time coordinates only have one date. Returning an interval of 0."
)
return 0
@property
def _timedelta_index(self) -> pd.TimedeltaIndex:
"""Returns the timedeltas between consecutive dates in `datetime_index`."""
return self.datetime_index.diff().dropna()

@property
def _interval_days(self) -> Sequence[int]:
"""Converts `_timedelta_index` to a sequence of days for easier compute."""
return self._timedelta_index.days.to_numpy()

def _is_regular_time_index(self, cv_threshold: float = 0.1) -> bool:
"""Returns True if the time index is "regularly spaced".
Here, "regularly spaced" means that the coefficient of variation (CV) of the
deltas between consecutive dates in the time index (STD/MEAN) is less than
`cv_threshold`.
"""
diffs = self._interval_days
mean = np.mean(diffs)
std = np.std(diffs)
cv = std / mean

# Check for regularity.
if diff.nunique() != 1:
raise ValueError("`datetime_index` coordinates are not evenly spaced!")
is_regular = cv < cv_threshold
if not is_regular:
# TODO: Emit offending date intervals.
warnings.warn(f"Time coordinates CV value: {cv}")

# Finally, return the mode interval.
return diff.mode()[0]
return is_regular

def get_selected_dates(
self,
Expand Down
139 changes: 132 additions & 7 deletions meridian/data/time_coordinates_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ def setUp(self):
self.all_dates
)

def test_constructor_must_have_more_than_one_date(self):
with self.assertRaisesRegex(
ValueError,
"There must be more than one date index",
):
time_coordinates.TimeCoordinates.from_dates(
pd.DatetimeIndex([np.datetime64("2024-01-01")])
)

def test_property_all_dates(self):
expected_dates = [
dt.datetime.strptime(date, constants.DATE_FORMAT).date()
Expand Down Expand Up @@ -113,22 +122,138 @@ def test_property_interval_days_daily(self):
)
self.assertEqual(coordinates.interval_days, 1)

def test_property_nonregular_interval_days(self):
@parameterized.named_parameters(
dict(
testcase_name="weekly_interval_days",
dates=[
dt.datetime(2024, 1, 1) + dt.timedelta(days=7 * i)
for i in range(14)
],
expected_interval_days=7,
),
dict(
testcase_name="thirty_day_interval_days",
dates=[
dt.datetime(2024, 1, 1) + dt.timedelta(days=30 * i)
for i in range(24)
],
expected_interval_days=30,
),
)
def test_property_interval_days_regular(
self, dates: list[dt.date], expected_interval_days
):
coordinates = time_coordinates.TimeCoordinates.from_dates(
pd.DatetimeIndex(dates)
)
self.assertEqual(coordinates.interval_days, expected_interval_days)

@parameterized.named_parameters(
dict(
testcase_name="weekly_skips_one_day_on_last_week",
dates=[
dt.datetime(2024, 1, 1)
+ dt.timedelta(days=7 * i + (-1 if i == 13 else 0))
for i in range(20)
],
expected_interval_days=7,
),
dict(
testcase_name="weekly_skips_one_day_every_fourth_week",
dates=[
dt.datetime(2024, 1, 1)
+ dt.timedelta(days=7 * i + (1 if i % 4 == 0 else 0))
for i in range(20)
],
expected_interval_days=7,
),
dict(
testcase_name="monthly_on_the_first_day_of_each_month_one_year",
dates=[dt.datetime(2024, m, 1) for m in range(1, 13)],
expected_interval_days=30,
),
dict(
testcase_name="monthly_on_the_last_day_of_each_month_two_years",
dates=[
dt.datetime(2024 + y, m, 1) - dt.timedelta(days=1)
for y in range(2)
for m in range(1, 13)
],
expected_interval_days=30,
),
dict(
testcase_name="monthly_on_the_fifteenth_each_month_three_years",
dates=[
dt.datetime(2024 + y, m, 15)
for y in range(3)
for m in range(1, 13)
],
expected_interval_days=30,
),
)
def test_property_interval_days_fuzzy_regular(
self,
dates: list[dt.date],
expected_interval_days: int,
):
coordinates = time_coordinates.TimeCoordinates.from_dates(
pd.DatetimeIndex(dates)
)
self.assertEqual(coordinates.interval_days, expected_interval_days)

@parameterized.named_parameters(
dict( # CV value: 0.212
testcase_name="weekly_misses_one_entire_week",
dates=[
dt.datetime(2024, 1, 1)
+ dt.timedelta(days=7 * (i + 1 if i >= 4 else i))
for i in range(20)
],
),
dict( # CV value: 0.165
testcase_name="weekly_one_week_is_half_week",
dates=[
dt.datetime(2024, 1, 1)
+ dt.timedelta(days=7 * i + (4 if i == 10 else 0))
for i in range(25)
],
),
dict( # CV value: 0.127
testcase_name="monthly_one_month_is_off_by_ten_days",
dates=[
# 2024/3/10 on March, 15th for the rest.
dt.date(2024, m, 15) - dt.timedelta(days=(10 if m == 2 else 0))
for m in range(1, 13)
],
),
)
def test_property_interval_days_fuzzy_irregular(
self,
dates: list[dt.date],
):
coordinates = time_coordinates.TimeCoordinates.from_dates(
pd.DatetimeIndex(dates)
)
with self.assertRaisesRegex(
ValueError,
"Time coordinates are not regularly spaced!",
):
_ = coordinates.interval_days

def test_property_irregular_interval_days(self):
dates = ["2024-01-01", "2024-01-03", "2024-01-10"]
all_dates = xr.DataArray(
data=np.array(["2024-01-01", "2024-01-08", "2024-01-16"]),
data=np.array(dates),
dims=[constants.TIME],
coords={
constants.TIME: (
[constants.TIME],
["2024-01-01", "2024-01-08", "2024-01-16"],
),
constants.TIME: ([constants.TIME], dates),
},
)
coordinates = time_coordinates.TimeCoordinates.from_dates(all_dates)

with self.assertRaisesRegex(
ValueError,
"`datetime_index` coordinates are not evenly spaced!",
"Time coordinates are not regularly spaced!",
):
_ = coordinates.interval_days

Expand Down

0 comments on commit 5937bb1

Please sign in to comment.