update counts live

seperman · Dec 10, 2019 · ace212d · ace212d
1 parent 6e414a2
commit ace212d
Show file tree

Hide file tree

Showing 3 changed files with 110 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -252,7 +252,7 @@ converted to contexts:
 
 Most people who use Fast Autocomplete, want to control how results are sorted. If you don't control that, the results will be sorted based on the order that Autocomplete found the nodes in the graph that matched the criteria.
 
-The easiest way to sort is to give each item a count.
+The easiest way to sort is to give each item a count. **Fast AutoComplete will use the count to sort items that are partial matches.**
 
 For example:
 
@@ -271,7 +271,7 @@ The format of the file needs to be:
 }
 ```
 
-An example is included in the <tests/fixtures/sample_words.json>
+An example is included in the [sample_words.json](tests/fixtures/sample_words.json)
 
 ```json
 {
@@ -331,6 +331,72 @@ autocomplete = autocomplete_factory(content_files=content_files)
 
 Great question. You need to extend AutoComplete class to use these items. I will write a blog post about it.
 
+
+### Change the sorting by updating counts
+
+Fast Autocomplete by default uses the "count" of the items to sort the items in the results. Think about these counts as a "guide" to Fast autocomplete so it can polish its results. Depending on whether or not Fast autocomplete finds exact matches to user's query, the counts will be used to refine the results. You can update the counts in an autocomplete object live.
+
+For example, in the [sample csv of car makes and models](tests/fixtures/makes_models_from_wikipedia.csv) we have:
+
+```csv
+make,model,count
+Toyota,Aurion,6094
+Toyota,Avalon,8803
+Toyota,Avensis,1630
+Toyota,Auris,4025
+Toyota,Aygo,2115
+```
+
+If we use the autocomplete to search:
+
+```py
+>>> auto_complete = AutoComplete(words=WIKIPEDIA_WORDS, synonyms=SYNONYMS, full_stop_words=['bmw', 'alfa romeo'])
+>>> autocomplete.search(word='toyota a')
+[['toyota'], ['toyota avalon'], ['toyota aurion'], ['toyota auris']]
+```
+
+However as you can notice `toyota aygo` had the count of 2115 and thus it didn't make it to the top 3 results.
+
+We can set the count for `toyota aygo` to a higher number to boost it in the results using `update_count_of_word`.
+
+The `update_count_of_word` can change the count via setting the word's count directly or by offsetting its current value.
+
+```py
+>>> auto_complete = AutoComplete(words=WIKIPEDIA_WORDS, synonyms=SYNONYMS, full_stop_words=['bmw', 'alfa romeo'])
+>>> auto_complete.update_count_of_word(word='toyota aygo', count=10000)
+10000
+```
+
+Now if we search:
+
+```py
+>>> autocomplete.search(word='toyota a')
+[['toyota'], ['toyota aygo'], ['toyota avalon'], ['toyota aurion']]
+```
+
+We can double check the count of a node:
+
+```py
+>>> autocomplete.get_count_of_word('toyota aygo')
+10000
+```
+
+Now let's use the offset to offset the current count of a different node:
+
+
+```py
+>>> auto_complete.update_count_of_word(word='toyota aurion', offset=-6000)
+94
+```
+
+When we search, `toyota aurion` is not in the top 3 results anymore!
+
+```py
+>>> autocomplete.search(word='toyota a')
+[['toyota'], ['toyota aygo'], ['toyota avalon'], ['toyota auris']]
+```
+
+
 ## Draw
 
 This package can actually draw the dwgs as it is populating them or just once the dwg is populated for you!

diff --git a/fast_autocomplete/dwg.py b/fast_autocomplete/dwg.py
@@ -15,6 +15,10 @@
 INF = float('inf')
 
 
+class NodeNotFound(ValueError):
+    pass
+
+
 class FindStep(Enum):
     start = 0
     descendants_only = 1
@@ -437,6 +441,26 @@ def get_all_descendent_words_for_condition(self, word, size, condition):
                     new_tokens.append(node.word)
         return new_tokens
 
+    def update_count_of_word(self, word, count=None, offset=None):
+        """
+        Update the count attribute of a node in the dwg. This only affects the autocomplete
+        object and not the original count of the node in the data that was fed into fast_autocomplete.
+        """
+        matched_prefix_of_last_word, rest_of_word, node, matched_words_part, matched_condition_ever, matched_condition_in_branch = self._prefix_autofill_part(word=word)
+        if node:
+            if offset:
+                with self._lock:
+                    node.count += offset
+            elif count:
+                with self._lock:
+                    node.count = count
+        else:
+            raise NodeNotFound(f'Unable to find a node for word {word}')
+        return node.count
+
+    def get_count_of_word(self, word):
+        return self.update_count_of_word(word)
+
 
 class _DawgNode:
     """
@@ -475,7 +499,7 @@ def insert(self, word, add_word=True, original_key=None, count=0, insert_count=T
             node.word = word
             node.original_key = original_key
             if insert_count:
-                node.count = count
+                node.count = int(count)  # converts any str to int
         return node
 
     def get_descendants_nodes(self, size, should_traverse=True, full_stop_words=None, insert_count=True):
@@ -511,7 +535,7 @@ def get_descendants_nodes(self, size, should_traverse=True, full_stop_words=None
                         que.append((letter, grand_child_node))
 
     def get_descendants_words(
-        self, size, should_traverse=True, full_stop_words=None, insert_count=True):
+            self, size, should_traverse=True, full_stop_words=None, insert_count=True):
         found_nodes_gen = self.get_descendants_nodes(
             size,
             should_traverse=should_traverse,
@@ -522,7 +546,7 @@ def get_descendants_words(
         if insert_count is True:
             found_nodes = sorted(
                 found_nodes_gen,
-                key=lambda node: int(node.count),  # converts any str to int
+                key=lambda node: node.count,
                 reverse=True
             )[:size + 1]
         else:

diff --git a/tests/test_autocomplete.py b/tests/test_autocomplete.py
@@ -445,3 +445,18 @@ def test_get_word_context(self, word, expected_results):
         results = auto_complete.get_word_context(word)
         print_results(locals())
         assert expected_results == results
+
+    @pytest.mark.parametrize("word, update_dict, expected_results, expected_new_count", [
+        ('toyota a', None, [['toyota'], ['toyota avalon'], ['toyota aurion'], ['toyota auris']], None),
+        ('toyota a', {'word': 'toyota aygo', 'count': 10000}, [['toyota'], ['toyota aygo'], ['toyota avalon'], ['toyota aurion']], 10000),
+        ('toyota a', {'word': 'toyota aurion', 'offset': -6000}, [['toyota'], ['toyota avalon'], ['toyota auris'], ['toyota aygo']], 94),
+    ])
+    def test_update_count_of_word(self, word, update_dict, expected_results, expected_new_count):
+        auto_complete = AutoComplete(words=WIKIPEDIA_WORDS, synonyms=SYNONYMS, full_stop_words=['bmw', 'alfa romeo'])
+        if update_dict:
+            new_count = auto_complete.update_count_of_word(**update_dict)
+            assert expected_new_count == new_count
+            assert expected_new_count == auto_complete.get_count_of_word(update_dict['word'])
+        results = auto_complete.search(word, max_cost=2, size=4)
+        print_results(locals())
+        assert expected_results == results