Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use summary_metrics in optimal_freq calculation now that new data is supported. #555

Merged
merged 1 commit into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 10 additions & 175 deletions meridian/analysis/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3480,175 +3480,6 @@ def baseline_summary_metrics(
baseline_pct_of_contribution,
])

# TODO: This method can be replaced once generalized
# `media_summary_metric` is done.
def _counterfactual_metric_dataset(
self,
use_posterior: bool = True,
new_data: DataTensors | None = None,
marginal_roi_by_reach: bool = True,
selected_geos: Sequence[str] | None = None,
selected_times: Sequence[str] | None = None,
use_kpi: bool = False,
attrs: Mapping[str, Any] | None = None,
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
batch_size: int = constants.DEFAULT_BATCH_SIZE,
) -> xr.Dataset:
"""Calculates the counterfactual metric dataset.

Args:
use_posterior: Boolean. If `True`, posterior counterfactual metrics are
generated. If `False`, prior counterfactual metrics are generated.
new_data: Optional DataTensors. When specified, it contains the
counterfactual `media`, `reach`, `frequency`, `media_spend`, `rf_spend`
and `revenue_per_kpi` values. The new tensors' dimensions must match the
dimensions of the corresponding original tensors from
`meridian.input_data`. If `None`, the existing tensors from the Meridian
object are used.
marginal_roi_by_reach: Boolean. Marginal ROI (mROI) is defined as the
return on the next dollar spent. If this argument is `True`, the
assumption is that the next dollar spent only impacts reach, holding
frequency constant. If this argument is `False`, the assumption is that
the next dollar spent only impacts frequency, holding reach constant.
selected_geos: Optional list contains a subset of geos to include. By
default, all geos are included.
selected_times: Optional list contains a subset of times to include. By
default, all time periods are included.
use_kpi: Boolean. If `True`, the counterfactual metrics are calculated
using KPI. If `False`, the counterfactual metrics are calculated using
revenue.
attrs: Optional dictionary of attributes to add to the dataset.
confidence_level: Confidence level for prior and posterior credible
intervals, represented as a value between zero and one.
batch_size: Maximum draws per chain in each batch. The calculation is run
in batches to avoid memory exhaustion. If a memory error occurs, try
reducing `batch_size`. The calculation will generally be faster with
larger `batch_size` values.

Returns:
An xarray Dataset which contains:
* Coordinates: `channel`, `metric` (`mean`, `median`, `ci_lo`, `ci_hi`).
* Data variables:
* `spend`: The spend for each channel.
* `pct_of_spend`: The percentage of spend for each channel.
* `incremental_outcome`: The incremental outcome for each channel.
* `pct_of_contribution`: The contribution percentage for each channel.
* `roi`: The ROI for each channel.
* `effectiveness`: The effectiveness for each channel.
* `mroi`: The marginal ROI for each channel.
* `cpik`: The CPIK for each channel.
"""
dim_kwargs = {
"selected_geos": selected_geos,
"selected_times": selected_times,
}
metric_tensor_kwargs = {
"use_posterior": use_posterior,
"use_kpi": use_kpi,
"batch_size": batch_size,
}
filled_data = self._validate_and_fill_roi_analysis_arguments(
new_data=new_data or DataTensors()
)
spend = filled_data.total_spend()
if spend is not None and spend.ndim == 3:
spend = self.filter_and_aggregate_geos_and_times(spend, **dim_kwargs)

# _counterfactual_metric_dataset() is called only from `optimal_freq()`
# and uses only paid channels.
incremental_outcome_tensor = self.incremental_outcome(
new_data=filled_data,
include_non_paid_channels=False,
**dim_kwargs,
**metric_tensor_kwargs,
)
# expected_outcome returns a tensor of shape (n_chains, n_draws).
mean_expected_outcome = tf.reduce_mean(
self.expected_outcome(
new_data=filled_data,
**dim_kwargs,
**metric_tensor_kwargs,
),
(0, 1),
)

# Calculate the mean, median, and confidence intervals for each metric.
incremental_outcome = get_central_tendency_and_ci(
data=incremental_outcome_tensor,
confidence_level=confidence_level,
include_median=True,
)
pct_of_contribution = get_central_tendency_and_ci(
data=incremental_outcome_tensor
/ mean_expected_outcome[..., None]
* 100,
confidence_level=confidence_level,
include_median=True,
)
roi = get_central_tendency_and_ci(
data=tf.math.divide_no_nan(incremental_outcome_tensor, spend),
confidence_level=confidence_level,
include_median=True,
)
mroi = get_central_tendency_and_ci(
data=self.marginal_roi(
by_reach=marginal_roi_by_reach,
new_data=filled_data,
**dim_kwargs,
**metric_tensor_kwargs,
),
confidence_level=confidence_level,
include_median=True,
)
effectiveness = get_central_tendency_and_ci(
data=incremental_outcome_tensor
/ self.get_aggregated_impressions(
**dim_kwargs,
optimal_frequency=filled_data.frequency,
include_non_paid_channels=False,
),
confidence_level=confidence_level,
include_median=True,
)
cpik = get_central_tendency_and_ci(
data=tf.math.divide_no_nan(spend, incremental_outcome_tensor),
confidence_level=confidence_level,
include_median=True,
)

budget = np.sum(spend) if np.sum(spend) > 0 else 1
dims = [constants.CHANNEL, constants.METRIC]
data_vars = {
constants.SPEND: ([constants.CHANNEL], spend),
constants.PCT_OF_SPEND: ([constants.CHANNEL], spend / budget),
constants.INCREMENTAL_OUTCOME: (dims, incremental_outcome),
constants.PCT_OF_CONTRIBUTION: (dims, pct_of_contribution),
constants.ROI: (dims, roi),
constants.MROI: (dims, mroi),
constants.EFFECTIVENESS: (dims, effectiveness),
constants.CPIK: (dims, cpik),
}

return xr.Dataset(
data_vars=data_vars,
coords={
constants.CHANNEL: (
[constants.CHANNEL],
self._meridian.input_data.get_all_paid_channels(),
),
constants.METRIC: (
[constants.METRIC],
[
constants.MEAN,
constants.MEDIAN,
constants.CI_LO,
constants.CI_HI,
],
),
},
attrs=attrs,
)

def optimal_freq(
self,
freq_grid: Sequence[float] | None = None,
Expand Down Expand Up @@ -3770,26 +3601,30 @@ def optimal_freq(
)

# Compute the optimized metrics based on the optimal frequency.
optimized_metrics_by_reach = self._counterfactual_metric_dataset(
use_posterior=use_posterior,
optimized_metrics_by_reach = self.summary_metrics(
new_data=DataTensors(
reach=optimal_reach, frequency=optimal_frequency_tensor
),
marginal_roi_by_reach=True,
selected_geos=selected_geos,
selected_times=selected_times,
use_kpi=use_kpi,
).sel({constants.CHANNEL: rf_channel_values})
optimized_metrics_by_frequency = self._counterfactual_metric_dataset(
use_posterior=use_posterior,
).sel({
constants.CHANNEL: rf_channel_values,
constants.DISTRIBUTION: dist_type,
})
optimized_metrics_by_frequency = self.summary_metrics(
new_data=DataTensors(
reach=optimal_reach, frequency=optimal_frequency_tensor
),
marginal_roi_by_reach=False,
selected_geos=selected_geos,
selected_times=selected_times,
use_kpi=use_kpi,
).sel({constants.CHANNEL: rf_channel_values})
).sel({
constants.CHANNEL: rf_channel_values,
constants.DISTRIBUTION: dist_type,
})

data_vars = {
constants.ROI: (
Expand Down
24 changes: 12 additions & 12 deletions meridian/analysis/analyzer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1381,8 +1381,8 @@ def test_optimal_frequency_data_media_and_rf_correct(self):
constants.OPTIMIZED_EFFECTIVENESS: (
[constants.RF_CHANNEL, constants.METRIC],
[
[0.001699, 0.001705, 0.000595, 0.002795],
[0.00259, 0.002557, 0.000663, 0.004585],
[0.00038, 0.000382, 0.000133, 0.000626],
[0.000544, 0.000537, 0.000139, 0.000963],
],
),
constants.OPTIMIZED_PCT_OF_CONTRIBUTION: (
Expand All @@ -1406,7 +1406,7 @@ def test_optimal_frequency_data_media_and_rf_correct(self):
),
constants.OPTIMIZED_CPIK: (
[constants.RF_CHANNEL, constants.METRIC],
[[0.371, 0.371, 0.132, 0.623], [0.337, 0.337, 0.085, 0.591]],
[[1.16, 1.13, 0.417, 1.95], [1.06, 1.05, 0.268, 1.85]],
),
},
attrs={
Expand Down Expand Up @@ -3306,8 +3306,8 @@ def test_optimal_frequency_data_rf_only_correct(self):
constants.OPTIMIZED_EFFECTIVENESS: (
[constants.RF_CHANNEL, constants.METRIC],
[
[0.000604, 0.000458, 0.000146, 0.001189],
[0.002804, 0.003047, 0.000166, 0.005135],
[4.055367e-04, 3.081534e-04, 9.794392e-05, 7.991136e-04],
[5.891235e-04, 6.401768e-04, 3.493646e-05, 1.078733e-03],
],
),
constants.OPTIMIZED_PCT_OF_CONTRIBUTION: (
Expand All @@ -3332,8 +3332,8 @@ def test_optimal_frequency_data_rf_only_correct(self):
constants.OPTIMIZED_CPIK: (
[constants.RF_CHANNEL, constants.METRIC],
[
[0.476, 0.48521742, 0.104, 0.849],
[0.698, 0.231271, 0.076, 2.403],
[1.496871, 1.523583, 0.329373, 2.666992],
[2.191838, 0.726191, 0.239616, 7.546689],
],
),
},
Expand Down Expand Up @@ -3837,8 +3837,8 @@ def test_optimal_frequency_data_no_revenue_per_kpi_correct(self):
constants.OPTIMIZED_EFFECTIVENESS: (
[constants.RF_CHANNEL, constants.METRIC],
[
[0.000541, 0.000543, 0.00019, 0.00089],
[0.000832, 0.000821, 0.000215, 0.00147],
[1.211769e-04, 1.216424e-04, 4.246090e-05, 1.993549e-04],
[1.747521e-04, 1.725175e-04, 4.524036e-05, 3.088720e-04],
],
),
constants.OPTIMIZED_MROI_BY_REACH: (
Expand All @@ -3858,8 +3858,8 @@ def test_optimal_frequency_data_no_revenue_per_kpi_correct(self):
constants.OPTIMIZED_CPIK: (
[constants.RF_CHANNEL, constants.METRIC],
[
[1.166, 1.139, 0.417, 1.959],
[1.042, 1.038, 0.267, 1.82],
[1.1664926, 1.1397777, 0.41729444, 1.9592067],
[1.041547, 1.0384206, 0.26651454, 1.8195891],
],
),
},
Expand Down Expand Up @@ -3910,7 +3910,7 @@ def test_optimal_frequency_data_no_revenue_per_kpi_correct(self):
xr.testing.assert_allclose(
actual.optimized_cpik,
expected.optimized_cpik,
atol=0.001,
atol=0.01,
)
self.assertEqual(actual.confidence_level, expected.confidence_level)
self.assertEqual(actual.use_posterior, expected.use_posterior)
Expand Down