huggingface · gaetanlop · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/docs/source/judges.mdx b/docs/source/judges.mdx
@@ -46,10 +46,18 @@ judge.judge(
 )  # Outputs: [0, 1]
 ```
 
+## AllTrueJudge
+
+[[autodoc]] AllTrueJudge
+
 ## BaseJudge
 
 [[autodoc]] BaseJudge
 
+## BaseBinaryJudge
+
+[[autodoc]] BaseBinaryJudge
+
 ## BaseRankJudge
 
 [[autodoc]] BaseRankJudge
@@ -58,6 +66,10 @@ judge.judge(
 
 [[autodoc]] BasePairwiseJudge
 
+## RandomBinaryJudge
+
+[[autodoc]] RandomBinaryJudge
+
 ## RandomRankJudge
 
 [[autodoc]] RandomRankJudge

diff --git a/tests/test_judges.py b/tests/test_judges.py
@@ -14,25 +14,52 @@
 
 import unittest
 
-from trl import HfPairwiseJudge, PairRMJudge, RandomPairwiseJudge, RandomRankJudge, is_llmblender_available
+from trl import (
+    AllTrueJudge,
+    HfPairwiseJudge,
+    PairRMJudge,
+    RandomBinaryJudge,
+    RandomPairwiseJudge,
+    RandomRankJudge,
+    is_llmblender_available,
+)
 
 
 class TestJudges(unittest.TestCase):
-    def _get_prompts_and_completions(self):
+    def _get_prompts_and_pairwise_completions(self):
         prompts = ["The capital of France is", "The biggest planet in the solar system is"]
         completions = [["Paris", "Marseille"], ["Saturn", "Jupiter"]]
         return prompts, completions
 
+    def _get_prompts_and_single_completions(self):
+        prompts = ["What's the capital of France?", "What's the color of the sky?"]
+        completions = ["Marseille", "blue"]
+        return prompts, completions
+
+    def test_all_true_judge(self):
+        moj = AllTrueJudge(judges=[RandomBinaryJudge(), RandomBinaryJudge()])
+        prompts, completions = self._get_prompts_and_single_completions()
+        judgements = moj.judge(prompts=prompts, completions=completions)
+        self.assertEqual(len(judgements), 2)
+        self.assertTrue(all(judgement in {True, False} for judgement in judgements))
+
+    def test_random_constraint_judge(self):
+        judge = RandomBinaryJudge()
+        prompts, completions = self._get_prompts_and_single_completions()
+        judgements = judge.judge(prompts=prompts, completions=completions)
+        self.assertEqual(len(judgements), 2)
+        self.assertTrue(all(judgement in {0, 1, -1} for judgement in judgements))
+
     def test_random_pairwise_judge(self):
         judge = RandomPairwiseJudge()
-        prompts, completions = self._get_prompts_and_completions()
+        prompts, completions = self._get_prompts_and_pairwise_completions()
         ranks = judge.judge(prompts=prompts, completions=completions)
         self.assertEqual(len(ranks), 2)
         self.assertTrue(all(isinstance(rank, int) for rank in ranks))
 
     def test_random_rank_judge(self):
         judge = RandomRankJudge()
-        prompts, completions = self._get_prompts_and_completions()
+        prompts, completions = self._get_prompts_and_pairwise_completions()
         ranks = judge.judge(prompts=prompts, completions=completions)
         self.assertEqual(len(ranks), 2)
         self.assertTrue(all(isinstance(rank, list) for rank in ranks))
@@ -41,7 +68,7 @@ def test_random_rank_judge(self):
     @unittest.skip("This test needs to be run manually since it requires a valid Hugging Face API key.")
     def test_hugging_face_judge(self):
         judge = HfPairwiseJudge()
-        prompts, completions = self._get_prompts_and_completions()
+        prompts, completions = self._get_prompts_and_pairwise_completions()
         ranks = judge.judge(prompts=prompts, completions=completions)
         self.assertEqual(len(ranks), 2)
         self.assertTrue(all(isinstance(rank, int) for rank in ranks))
@@ -50,7 +77,7 @@ def test_hugging_face_judge(self):
     @unittest.skipIf(not is_llmblender_available(), "llm-blender is not available")
     def test_pair_rm_judge(self):
         judge = PairRMJudge()
-        prompts, completions = self._get_prompts_and_completions()
+        prompts, completions = self._get_prompts_and_pairwise_completions()
         ranks = judge.judge(prompts=prompts, completions=completions)
         self.assertEqual(len(ranks), 2)
         self.assertTrue(all(isinstance(rank, int) for rank in ranks))
@@ -59,7 +86,7 @@ def test_pair_rm_judge(self):
     @unittest.skipIf(not is_llmblender_available(), "llm-blender is not available")
     def test_pair_rm_judge_return_scores(self):
         judge = PairRMJudge()
-        prompts, completions = self._get_prompts_and_completions()
+        prompts, completions = self._get_prompts_and_pairwise_completions()
         probs = judge.judge(prompts=prompts, completions=completions, return_scores=True)
         self.assertEqual(len(probs), 2)
         self.assertTrue(all(isinstance(prob, float) for prob in probs))

diff --git a/trl/__init__.py b/trl/__init__.py
@@ -50,7 +50,9 @@
     "trainer": [
         "AlignPropConfig",
         "AlignPropTrainer",
+        "AllTrueJudge",
         "BaseJudge",
+        "BaseBinaryJudge",
         "BasePairwiseJudge",
         "BaseRankJudge",
         "BCOConfig",
@@ -82,6 +84,7 @@
         "PPOTrainer",
         "PPOv2Config",
         "PPOv2Trainer",
+        "RandomBinaryJudge",
         "RandomPairwiseJudge",
         "RandomRankJudge",
         "RewardConfig",
@@ -146,6 +149,8 @@
     from .trainer import (
         AlignPropConfig,
         AlignPropTrainer,
+        AllTrueJudge,
+        BaseBinaryJudge,
         BaseJudge,
         BasePairwiseJudge,
         BaseRankJudge,
@@ -178,6 +183,7 @@
         PPOTrainer,
         PPOv2Config,
         PPOv2Trainer,
+        RandomBinaryJudge,
         RandomPairwiseJudge,
         RandomRankJudge,
         RewardConfig,

diff --git a/trl/trainer/__init__.py b/trl/trainer/__init__.py
@@ -34,12 +34,15 @@
     "gkd_trainer": ["GKDTrainer"],
     "iterative_sft_trainer": ["IterativeSFTTrainer"],
     "judges": [
+        "AllTrueJudge",
         "BaseJudge",
+        "BaseBinaryJudge",
         "BasePairwiseJudge",
         "BaseRankJudge",
         "HfPairwiseJudge",
         "OpenAIPairwiseJudge",
         "PairRMJudge",
+        "RandomBinaryJudge",
         "RandomPairwiseJudge",
         "RandomRankJudge",
     ],
@@ -98,12 +101,15 @@
     from .gkd_trainer import GKDTrainer
     from .iterative_sft_trainer import IterativeSFTTrainer
     from .judges import (
+        AllTrueJudge,
+        BaseBinaryJudge,
         BaseJudge,
         BasePairwiseJudge,
         BaseRankJudge,
         HfPairwiseJudge,
         OpenAIPairwiseJudge,
         PairRMJudge,
+        RandomBinaryJudge,
         RandomPairwiseJudge,
         RandomRankJudge,
     )

diff --git a/trl/trainer/judges.py b/trl/trainer/judges.py
@@ -135,6 +135,53 @@ def judge(self, prompts: List[str], completions: List[List[str]], shuffle_order:
         raise NotImplementedError("Judge subclasses must implement the `judge` method.")
 
 
+class BaseBinaryJudge(BaseJudge):
+    """
+    Base class for binary judges.
+    """
+
+    @abstractmethod
+    def judge(
+        self,
+        prompts: List[str],
+        completions: List[str],
+        gold_completions: Optional[List[str]] = None,
+        shuffle_order: bool = True,
+    ) -> List[int]:
+        """
+        Judge the completion for a given prompt. Used to assess if a completion satisfies a constraint.
+
+        This base class should be used to implement binary evaluations as done in section 4.1.4 of the CGPO paper (https://arxiv.org/pdf/2409.20370).
+        It is relevant for assessing whether or not a prompt completion pair satisfies a specific contraint.
+
+        Args:
+            prompts (`List[str]`): List of prompts.
+            completions (`List[str]`): List of completions.
+            gold_completions (`List[str]`, `optional`): List of gold completions if it exists.
+            shuffle_order (`bool`): Whether to shuffle the order of the completions to avoid positional bias.
+
+        Returns:
+            List[int]: A list of binary labels:
+                - 1 indicates that the completion satisfies the evaluated constraint.
+                - 0 indicates that the completion does not satisfy the evaluated constraint.
+
+        Note:
+            If the judge returns -1 for any prompt, it indicates that the inner process used to compute the preference has failed.
+            For instance, this could occur if the underlying language model or rule based contraint returned an invalid answer.
+            In such cases, the caller should handle these invalid indices appropriately, possibly by implementing fallback logic or error handling.
+        """
+        raise NotImplementedError("Judge subclasses must implement the `judge` method.")
+
+
+class RandomBinaryJudge(BaseBinaryJudge):
+    """
+    Random binary judge, for testing purposes.
+    """
+
+    def judge(self, prompts, completions, gold_completions=None, shuffle_order=True):
+        return [random.choice([0, 1]) for _ in range(len(prompts))]
+
+
 class RandomRankJudge(BaseRankJudge):
     """
     Random rank, for testing purposes.
@@ -361,3 +408,35 @@ def get_rank(prompt, candidates):
 
         # Return the ranks
         return ranks
+
+
+class AllTrueJudge(BaseBinaryJudge):
+    """
+    Unify the decision of multiple BaseBinaryJudge.
+
+    This class returns False if it fails on any of the binary judges (ie a judge returns 0 or -1) and returns True otherwise.
+
+    It is an implementation of the Mixture of Judges as described in the [CGPO paper](https://huggingface.co/papers/2409.20370)
+
+    Args:
+        judges (`List[BaseBinaryJudge]`): A list of [`BaseBinaryJudge`].
+    """
+
+    def __init__(self, judges: List[BaseBinaryJudge]):
+        self.judges = judges
+
+    def judge(
+        self,
+        prompts: List[str],
+        completions: List[str],
+        gold_completions: Optional[List[str]] = None,
+        shuffle_order: bool = True,
+    ) -> List[bool]:
+        all_binary_judgments = [
+            judge.judge(prompts, completions, gold_completions, shuffle_order) for judge in self.judges
+        ]
+
+        return [
+            True if all(all_binary_judgment == 1 for all_binary_judgment in binary_judgments) else False
+            for binary_judgments in zip(*all_binary_judgments)
+        ]