Fix issue where bgb search would not stop

DistriNet · Oct 12, 2024 · e3b44f8 · e3b44f8
1 parent 6b7b89c
commit e3b44f8
Show file tree

Hide file tree

Showing 4 changed files with 42 additions and 39 deletions.
diff --git a/bci/database/mongo/mongodb.py b/bci/database/mongo/mongodb.py
@@ -19,7 +19,7 @@
     WorkerParameters,
 )
 from bci.evaluations.outcome_checker import OutcomeChecker
-from bci.version_control.states.state import State
+from bci.version_control.states.state import State, StateCondition
 
 logger = logging.getLogger(__name__)
 
@@ -180,6 +180,10 @@ def get_evaluated_states(
             state = State.from_dict(doc['state'])
             state.result = StateResult.from_dict(doc['results'], is_dirty=doc['dirty'])
             state.outcome = outcome_checker.get_outcome(state.result)
+            if doc['dirty']:
+                state.condition = StateCondition.FAILED
+            else:
+                state.condition = StateCondition.COMPLETED
             states.append(state)
         return states
 

diff --git a/bci/search_strategy/bgb_search.py b/bci/search_strategy/bgb_search.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Optional
 
 from bci.search_strategy.bgb_sequence import BiggestGapBisectionSequence
 from bci.search_strategy.sequence_strategy import SequenceFinished
@@ -11,7 +12,7 @@
 class BiggestGapBisectionSearch(BiggestGapBisectionSequence):
     """
     This search strategy will split the biggest gap between two states in half and return the state in the middle.
-    It will prioritize gaps with different outcomes and bigger gaps.
+    It will only consider states where the non-None outcome differs.
     It stops when there are no more states to evaluate between two states with different outcomes.
     """
     def __init__(self, state_factory: StateFactory) -> None:
@@ -39,13 +40,7 @@ def next(self) -> State:
             self._add_state(self._upper_state)
             return self._upper_state
 
-        # Select a pair to bisect
-        pairs = list(zip(self._completed_states, self._completed_states[1:]))
-        # Filter out all gap pairs (pairs defining ranges without any available binaries)
-        pairs = [pair for pair in pairs if pair not in self._unavailability_gap_pairs]
-        while self.__continue_search():
-            # Prioritize pairs according to the max_key function
-            next_pair = max(pairs, key=self.max_key)
+        while next_pair := self.__get_next_pair_to_split():
             splitter_state = self._find_best_splitter_state(next_pair[0], next_pair[1])
             if splitter_state is None:
                 self._unavailability_gap_pairs.add(next_pair)
@@ -55,35 +50,36 @@ def next(self) -> State:
                 )
                 self._add_state(splitter_state)
                 return splitter_state
-            pairs.remove(next_pair)
         raise SequenceFinished()
 
-    @staticmethod
-    def max_key(pair: tuple[State, State]) -> tuple[bool, int]:
-        """
-        Returns a score used for deciding the order of pairs.
-        Pairs are sorted in the following way:
-        1. Pairs with different non-None outcomes are considered first.
-        2. After that, pairs with the biggest gap are considered first.
-        """
-        return (
-            pair[0].outcome is not None and pair[1].outcome is not None and pair[0].outcome != pair[1].outcome,
-            pair[1].index - pair[0].index,
-        )
-
-    def __continue_search(self) -> bool:
+    def __get_next_pair_to_split(self) -> Optional[tuple[State, State]]:
         """
-        Returns True if the search should continue.
-        This is the case if there are still unevaluated states between two states with different non-None outcomes.
+        Returns the next pair of states to split.
         """
-        # Filter out all states with a None outcome
-        states = [state for state in self._completed_states if state.outcome is not None]
-        # Make pairs with different outcomes
+        # Make pairwise list of states and remove pairs with the same outcome
+        states = self._completed_states
         pairs = [(state1, state2) for state1, state2 in zip(states, states[1:]) if state1.outcome != state2.outcome]
-        for first, last in pairs:
-            # Check if all states between first and last are either evaluated or unavailable
-            for index in [index for index in range(first.index + 1, last.index)]:
-                state = self._state_factory.create_state(index)
-                if state not in self._completed_states and not self._state_is_in_unavailability_gap(state):
-                    return True
-        return False
+        # Remove the first and last pair if they have a first and last state with a None outcome, respectively
+        if pairs[0][0].outcome is None:
+            pairs = pairs[1:]
+        if pairs[-1][1].outcome is None:
+            pairs = pairs[:-1]
+        # Remove all pairs that have already been identified as unavailability gaps
+        pairs = [pair for pair in pairs if pair not in self._unavailability_gap_pairs]
+        # Remove any pair where the same None-outcome state is present in a pair where the sibling states have the same outcome
+        pairs_with_failed = [pair for pair in pairs if pair[0].outcome is None or pair[1].outcome is None]
+        for i in range(0, len(pairs_with_failed), 2):
+            first_pair = pairs_with_failed[i]
+            second_pair = pairs_with_failed[i + 1]
+            if first_pair[0].outcome == second_pair[1].outcome:
+                pairs.remove(first_pair)
+                pairs.remove(second_pair)
+
+        if not pairs:
+            return None
+        # Sort pairs to prioritize pairs with bigger gaps.
+        # This way, we refrain from pinpointing pair-by-pair, making the search more efficient.
+        # E.g., when the splitter of the first gap is being evaluated, we can already evaluate the
+        # splitter of the second gap with having to wait for the first gap to be fully evaluated.
+        pairs.sort(key=lambda pair: pair[1].index - pair[0].index, reverse=True)
+        return pairs[0]
diff --git a/bci/search_strategy/bgb_sequence.py b/bci/search_strategy/bgb_sequence.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Optional
 
 from bci.search_strategy.sequence_strategy import SequenceFinished, SequenceStrategy
 from bci.version_control.factory import StateFactory
@@ -56,11 +57,13 @@ def next(self) -> State:
             pairs.remove(furthest_pair)
         raise SequenceFinished()
 
-    def _find_best_splitter_state(self, first_state: State, last_state: State) -> State | None:
+    def _find_best_splitter_state(self, first_state: State, last_state: State) -> Optional[State]:
         """
         Returns the most suitable state that splits the gap between the two states.
         The state should be as close as possible to the middle of the gap and should have an available binary.
         """
+        if first_state.index + 1 == last_state.index:
+            return None
         best_splitter_index = first_state.index + (last_state.index - first_state.index) // 2
         target_state = self._state_factory.create_state(best_splitter_index)
         return self._find_closest_state_with_available_binary(target_state, (first_state, last_state))

diff --git a/bci/version_control/states/revisions/base.py b/bci/version_control/states/revisions/base.py
@@ -91,7 +91,7 @@ def _is_valid_revision_number(self, revision_number: int) -> bool:
         return re.match(r'[0-9]{1,7}', str(revision_number)) is not None
 
     def __str__(self):
-        return f'RevisionState(id: {self._revision_id}, number: {self._revision_nb})'
+        return f'RevisionState(number: {self._revision_nb}, id: {self._revision_id})'
 
     def __repr__(self):
-        return f'RevisionState(id: {self._revision_id}, number: {self._revision_nb})'
+        return f'RevisionState(number: {self._revision_nb}, id: {self._revision_id})'