From db4c2e26dc9c73e7120d21429ba00c1f1270c4e2 Mon Sep 17 00:00:00 2001 From: Ashish Bharadwaj Srinivasa Date: Fri, 7 Apr 2023 10:02:12 -0700 Subject: [PATCH] Computing ranking metrics on first positive label --- .../ranking/model/metrics/helpers/aux_metrics_helper.py | 4 ++-- .../ranking/model/metrics/helpers/metrics_helper.py | 3 +++ python/ml4ir/base/model/scoring/prediction_helper.py | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/python/ml4ir/applications/ranking/model/metrics/helpers/aux_metrics_helper.py b/python/ml4ir/applications/ranking/model/metrics/helpers/aux_metrics_helper.py index 7c18b368..cf815cc8 100644 --- a/python/ml4ir/applications/ranking/model/metrics/helpers/aux_metrics_helper.py +++ b/python/ml4ir/applications/ranking/model/metrics/helpers/aux_metrics_helper.py @@ -267,7 +267,7 @@ def compute_aux_metrics_on_query_group(query_group: pd.DataFrame, compute_aux_metrics( aux_label_values=query_group[aux_label], ranks=query_group[old_rank_col], - click_rank=query_group[query_group[label_col] == 1][old_rank_col].values[0] + click_rank=query_group[query_group[label_col] == 1][old_rank_col].min() if (query_group[label_col] == 1).sum() != 0 else float("inf"), prefix="old_", @@ -277,7 +277,7 @@ def compute_aux_metrics_on_query_group(query_group: pd.DataFrame, compute_aux_metrics( aux_label_values=query_group[aux_label], ranks=query_group[new_rank_col], - click_rank=query_group[query_group[label_col] == 1][new_rank_col].values[0] + click_rank=query_group[query_group[label_col] == 1][new_rank_col].min() if (query_group[label_col] == 1).sum() != 0 else float("inf"), prefix="new_", diff --git a/python/ml4ir/applications/ranking/model/metrics/helpers/metrics_helper.py b/python/ml4ir/applications/ranking/model/metrics/helpers/metrics_helper.py index 7272f504..607d16cc 100644 --- a/python/ml4ir/applications/ranking/model/metrics/helpers/metrics_helper.py +++ b/python/ml4ir/applications/ranking/model/metrics/helpers/metrics_helper.py @@ -66,6 +66,9 @@ def get_grouped_stats( df_clicked = df[df[label_col] == 1.0] df = df[df[query_key_col].isin(df_clicked[query_key_col])] + # Pick most relevant record for each query to compute ranking metrics + df_clicked = df_clicked.groupby(query_key_col).apply(min) + # Compute metrics on aux labels df_aux_metrics = pd.DataFrame() if aux_label: diff --git a/python/ml4ir/base/model/scoring/prediction_helper.py b/python/ml4ir/base/model/scoring/prediction_helper.py index 58b89ce2..8fd1662e 100644 --- a/python/ml4ir/base/model/scoring/prediction_helper.py +++ b/python/ml4ir/base/model/scoring/prediction_helper.py @@ -86,6 +86,7 @@ def _predict_score(features, label): if is_compiled: scores = infer(features)[output_name] else: + import pdb; pdb.set_trace() scores = infer(**features)[output_name] # Set scores of padded records to 0