Adding RankMatchFailure metric (#184)

* Adding RankMatchFailure metric * Adding tests and fixing a few bugs * Adding missing variable
salesforce · Jul 29, 2022 · 6ff9238 · 6ff9238
1 parent 83cbd34
commit 6ff9238
Show file tree

Hide file tree

Showing 11 changed files with 743 additions and 130 deletions.
diff --git a/python/ml4ir/applications/ranking/config/keys.py b/python/ml4ir/applications/ranking/config/keys.py
@@ -27,6 +27,7 @@ class MetricKey(Key):
     ACR = "ACR"
     NDCG = "NDCG"
     PRECISION = "Precision"
+    RankMatchFailure = "RankMatchFailure"
     CATEGORICAL_ACCURACY = "categorical_accuracy"
     TOP_5_CATEGORICAL_ACCURACY = "top_5_categorical_accuracy"
 

diff --git a/python/ml4ir/applications/ranking/model/losses/listwise_losses.py b/python/ml4ir/applications/ranking/model/losses/listwise_losses.py
@@ -36,18 +36,24 @@ def _loss_fn(y_true, y_pred):
             mask : [batch_size, num_classes]
             """
 
-            #Fixme
+            # Fixme
             """
             Queries with ties in the highest scores would have multiple one's in the 1-hot vector.
             Queries with all zeros for y_true would have all ones as their 1-hot vector. 
             A simple remedy is to scale down the loss by the number of ties per query.
             """
             if is_aux_loss:  # converting y-true to 1-hot for cce
-                y_true_1_hot = tf.equal(y_true, tf.expand_dims(tf.math.reduce_max(y_true, axis=1), axis=1))
+                y_true_1_hot = tf.equal(
+                    y_true, tf.expand_dims(tf.math.reduce_max(y_true, axis=1), axis=1)
+                )
                 y_true_1_hot = tf.cast(y_true_1_hot, dtype=tf.float32)
                 # scaling down the loss of a query by 1/(number of ties)
-                sample_weights = tf.math.divide(tf.constant(1, dtype=tf.float32), tf.reduce_sum(y_true_1_hot, axis=1))
-                return cce(y_true_1_hot, tf.math.multiply(y_pred, mask), sample_weight=sample_weights)
+                sample_weights = tf.math.divide(
+                    tf.constant(1, dtype=tf.float32), tf.reduce_sum(y_true_1_hot, axis=1)
+                )
+                return cce(
+                    y_true_1_hot, tf.math.multiply(y_pred, mask), sample_weight=sample_weights
+                )
             else:
                 return cce(y_true, tf.math.multiply(y_pred, mask))
 
@@ -133,7 +139,10 @@ def _loss_fn(y_true, y_pred):
                 y_pred_non_zero = tf.boolean_mask(y_pred, non_zero)
                 # retain values in y_true corresponding to non zero values in y_pred
                 y_true_softmax_masked = tf.boolean_mask(y_true_softmax, non_zero)
-                return tf.math.divide(-tf.reduce_sum(y_true_softmax_masked * tf.math.log(y_pred_non_zero)), tf.constant(batch_size, dtype=tf.float32))
+                return tf.math.divide(
+                    -tf.reduce_sum(y_true_softmax_masked * tf.math.log(y_pred_non_zero)),
+                    tf.constant(batch_size, dtype=tf.float32),
+                )
             else:
                 return -tf.reduce_sum(y_true * tf.math.log(tf.math.multiply(y_pred, mask)), 1)
 
@@ -224,4 +233,4 @@ def _loss_fn(y_true, y_pred):
             # Scale the sum of losses down by number of queries in the batch
             return tf.math.divide(bce(y_true, y_pred), batch_size)
 
-        return _loss_fn
+        return _loss_fn
diff --git a/python/ml4ir/applications/ranking/model/metrics/metric_factory.py b/python/ml4ir/applications/ranking/model/metrics/metric_factory.py
@@ -1,7 +1,7 @@
 from tensorflow.keras.metrics import Metric
 
 from ml4ir.applications.ranking.config.keys import MetricKey
-from ml4ir.applications.ranking.model.metrics.metrics_impl import MRR, ACR
+from ml4ir.applications.ranking.model.metrics.metrics_impl import MRR, ACR, RankMatchFailure
 from ml4ir.applications.classification.model.metrics.metrics_impl import CategoricalAccuracy
 
 
@@ -25,6 +25,8 @@ def get_metric(metric_key: str) -> Metric:
         return ACR
     elif metric_key == MetricKey.NDCG:
         raise NotImplementedError
+    elif metric_key == MetricKey.RankMatchFailure:
+        return RankMatchFailure
     elif metric_key == MetricKey.CATEGORICAL_ACCURACY:
         return CategoricalAccuracy
     else: