using entailer to pre-filter the retrieved rules

fractalego · Aug 4, 2024 · 94c160e · 94c160e
1 parent 902ad0f
commit 94c160e
Show file tree

Hide file tree

Showing 5 changed files with 35 additions and 6 deletions.
diff --git a/todo.txt b/todo.txt
@@ -1,5 +1,13 @@
-* Add tqdm to indexing.
-* Make it index when wafl start first, not at the first use/login
+* apply entailer to rule retrieval:
+  if more than one rule is retrieved, then the one
+  that is entailed by the query should be chosen
+
+* the answer from the indexed files should be directed from a rule.
+   - facts and rules should live at the highest level of the retrieval
+
+
+/* Add tqdm to indexing.
+/* Make it index when wafl start first, not at the first use/login
 
 /* The prior items with timestamps might not be necessary.
 /  - Just implement a queue with a fixed size

diff --git a/wafl/answerer/answerer_implementation.py b/wafl/answerer/answerer_implementation.py
@@ -3,6 +3,7 @@
 
 from typing import List, Tuple
 
+from wafl.answerer.entailer import Entailer
 from wafl.exceptions import CloseConversation
 from wafl.data_objects.facts import Fact, Sources
 from wafl.interface.conversation import Conversation, Utterance
@@ -160,3 +161,9 @@ def add_dummy_utterances_to_continue_generation(
 
 def add_memories_to_facts(facts: str, memories: List[str]) -> str:
     return facts + "\n" + "\n".join(memories)
+
+
+def select_best_rules_using_entailer(conversation: Conversation, rules_as_strings: List[str], entailer: Entailer, num_rules: int) -> str:
+    query_text = conversation.get_last_speaker_utterance("user")
+    rules_as_strings = sorted(rules_as_strings, key=lambda x: entailer.get_score(query_text, x), reverse=True)
+    return rules_as_strings[:num_rules]
diff --git a/wafl/answerer/dialogue_answerer.py b/wafl/answerer/dialogue_answerer.py
@@ -2,20 +2,20 @@
 from inspect import getmembers, isfunction
 from typing import List
 
+from wafl.answerer.entailer import Entailer
 from wafl.answerer.answerer_implementation import (
     substitute_memory_in_answer_and_get_memories_if_present,
     create_one_liner,
     get_facts_with_metadata_from_facts_and_thresholds,
     add_dummy_utterances_to_continue_generation,
     add_memories_to_facts,
     execute_results_in_answer,
-    create_memory_from_fact_list,
+    create_memory_from_fact_list, select_best_rules_using_entailer,
 )
 from wafl.answerer.base_answerer import BaseAnswerer
 from wafl.answerer.rule_maker import RuleMaker
 from wafl.connectors.clients.llm_chat_client import LLMChatClient
 from wafl.data_objects.dataclasses import Query, Answer
-from wafl.data_objects.facts import Sources
 from wafl.interface.conversation import Conversation
 from wafl.simple_text_processing.questions import is_question
 
@@ -24,6 +24,7 @@ class DialogueAnswerer(BaseAnswerer):
     def __init__(self, config, knowledge, interface, code_path, logger):
         self._threshold_for_facts = 0.85
         self._client = LLMChatClient(config)
+        self._entailer = Entailer(config)
         self._knowledge = knowledge
         self._logger = logger
         self._interface = interface
@@ -108,8 +109,9 @@ async def _get_relevant_facts(self, query: Query, has_prior_rules: bool) -> str:
         return memory
 
     async def _get_relevant_rules(self, conversation: Conversation) -> List[str]:
-        rules = await self._rule_creator.create_from_query(conversation)
-        for rule in rules:
+        rules_as_strings = await self._rule_creator.create_from_query(conversation)
+        rules_as_strings = select_best_rules_using_entailer(conversation, rules_as_strings, self._entailer, num_rules=1)
+        for rule in rules_as_strings:
             if rule not in self._prior_rules:
                 self._prior_rules.insert(0, rule)
         self._prior_rules = self._prior_rules[: self._max_num_rules]

diff --git a/wafl/answerer/entailer.py b/wafl/answerer/entailer.py
@@ -9,3 +9,6 @@ def __init__(self, config):
     async def left_entails_right(self, lhs: str, rhs: str) -> bool:
         prediction = await self.entailer_client.get_entailment_score(lhs, rhs)
         return prediction > 0.5
+
+    async def get_score(self, lhs: str, rhs: str) -> float:
+        return await self.entailer_client.get_entailment_score(lhs, rhs)
diff --git a/wafl/interface/conversation.py b/wafl/interface/conversation.py
@@ -111,6 +111,15 @@ def get_last_speaker_utterances(self, speaker: str, n: int) -> List[str]:
             if utterance.speaker == speaker
         ][-n:]
 
+    def get_last_speaker_utterance(self, speaker: str) -> str:
+        if not self.utterances:
+            return ""
+
+        for utterance in reversed(self.utterances):
+            if utterance.speaker == speaker:
+                return utterance.text
+        return ""
+
     def get_first_timestamp(self) -> float:
         return self.utterances[0].timestamp if self.utterances else None