diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 498d808..816f372 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -10,6 +10,8 @@ Major: - Updated requirements to use mabwiser>=2.7 to benefit from enhancements, including vectorized predict for Linear policies and tracking of arm status. - Fixed tests due to changes in random seeding for Linear policies. +Minor: +- Added Diversity metrics to available MAB evaluation metrics. ------------------------------------------------------------------------------- August, 16, 2022 1.0.3 diff --git a/mab2rec/_version.py b/mab2rec/_version.py index d141368..5d9de70 100644 --- a/mab2rec/_version.py +++ b/mab2rec/_version.py @@ -4,5 +4,5 @@ __author__ = "FMR LLC" __email__ = "opensource@fmr.com" -__version__ = "1.0.3" +__version__ = "1.1.0" __copyright__ = "Copyright (C), FMR LLC" diff --git a/mab2rec/pipeline.py b/mab2rec/pipeline.py index 5128f5b..03abadc 100644 --- a/mab2rec/pipeline.py +++ b/mab2rec/pipeline.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from jurity.recommenders import CombinedMetrics, BinaryRecoMetrics, RankingRecoMetrics +from jurity.recommenders import CombinedMetrics, BinaryRecoMetrics, RankingRecoMetrics, DiversityRecoMetrics from sklearn.model_selection import GroupKFold from mabwiser.utils import check_true, Arm @@ -620,8 +620,11 @@ def _validate_bench(recommenders, metrics, train_data, test_data, cv): RankingRecoMetrics.Precision, RankingRecoMetrics.Recall, RankingRecoMetrics.NDCG, - RankingRecoMetrics.MAP)), - TypeError("Evaluation metric values must be BinaryRecoMetrics or RankingRecoMetrics instances.")) + RankingRecoMetrics.MAP, + DiversityRecoMetrics.InterListDiversity, + DiversityRecoMetrics.IntraListDiversity)), + TypeError("Evaluation metric values must be BinaryRecoMetrics, RankingRecoMetrics, " + "or DiversityRecoMetrics instances.")) # Train/test data check_true(train_data is not None, ValueError("Train data cannot be none.")) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index df45a41..bbcc64b 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -10,6 +10,7 @@ import pandas as pd from mabwiser.linear import _Linear +from jurity.recommenders import DiversityRecoMetrics from mab2rec import BanditRecommender, LearningPolicy, NeighborhoodPolicy from mab2rec.pipeline import train, score, benchmark @@ -390,3 +391,25 @@ def test_benchmark_cv(self): for rec in recommenders.values(): self.assertTrue(rec.mab is None) + + def test_benchmark_diversity_metrics(self): + recommenders = deepcopy(self.recommenders) + metrics = [] + metric_params = {'click_column': Constants.score, + 'user_id_column': Constants.user_id, + 'item_id_column': Constants.item_id} + for k in [3, 5]: + metrics.append(DiversityRecoMetrics.InterListDiversity(**metric_params, k=k, + user_sample_size=100)) + metrics.append(DiversityRecoMetrics.IntraListDiversity(**metric_params, k=k, + user_sample_size=100, + item_features=item_features_df)) + + recommendations, rec_metrics = benchmark(recommenders, metrics, train_data, test_data, + user_features=user_features_df) + self.assertEqual(recommendations.keys(), self.recommenders.keys()) + self.assertEqual(rec_metrics.keys(), self.recommenders.keys()) + self.assertAlmostEqual(rec_metrics["Random"]["Inter-List Diversity@3"], 0.9856228956228957) + self.assertAlmostEqual(rec_metrics["Random"]["Inter-List Diversity@5"], 0.9749818181818182) + self.assertAlmostEqual(rec_metrics["Random"]["Intra-List Diversity@3"], 0.7602157694547105) + self.assertAlmostEqual(rec_metrics["Random"]["Intra-List Diversity@5"], 0.7547351779782561)