Skip to content

Commit

Permalink
update counts live
Browse files Browse the repository at this point in the history
  • Loading branch information
seperman committed Dec 10, 2019
1 parent 6e414a2 commit ace212d
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 5 deletions.
70 changes: 68 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ converted to contexts:

Most people who use Fast Autocomplete, want to control how results are sorted. If you don't control that, the results will be sorted based on the order that Autocomplete found the nodes in the graph that matched the criteria.

The easiest way to sort is to give each item a count.
The easiest way to sort is to give each item a count. **Fast AutoComplete will use the count to sort items that are partial matches.**

For example:

Expand All @@ -271,7 +271,7 @@ The format of the file needs to be:
}
```

An example is included in the <tests/fixtures/sample_words.json>
An example is included in the [sample_words.json](tests/fixtures/sample_words.json)

```json
{
Expand Down Expand Up @@ -331,6 +331,72 @@ autocomplete = autocomplete_factory(content_files=content_files)

Great question. You need to extend AutoComplete class to use these items. I will write a blog post about it.


### Change the sorting by updating counts

Fast Autocomplete by default uses the "count" of the items to sort the items in the results. Think about these counts as a "guide" to Fast autocomplete so it can polish its results. Depending on whether or not Fast autocomplete finds exact matches to user's query, the counts will be used to refine the results. You can update the counts in an autocomplete object live.

For example, in the [sample csv of car makes and models](tests/fixtures/makes_models_from_wikipedia.csv) we have:

```csv
make,model,count
Toyota,Aurion,6094
Toyota,Avalon,8803
Toyota,Avensis,1630
Toyota,Auris,4025
Toyota,Aygo,2115
```

If we use the autocomplete to search:

```py
>>> auto_complete = AutoComplete(words=WIKIPEDIA_WORDS, synonyms=SYNONYMS, full_stop_words=['bmw', 'alfa romeo'])
>>> autocomplete.search(word='toyota a')
[['toyota'], ['toyota avalon'], ['toyota aurion'], ['toyota auris']]
```

However as you can notice `toyota aygo` had the count of 2115 and thus it didn't make it to the top 3 results.

We can set the count for `toyota aygo` to a higher number to boost it in the results using `update_count_of_word`.

The `update_count_of_word` can change the count via setting the word's count directly or by offsetting its current value.

```py
>>> auto_complete = AutoComplete(words=WIKIPEDIA_WORDS, synonyms=SYNONYMS, full_stop_words=['bmw', 'alfa romeo'])
>>> auto_complete.update_count_of_word(word='toyota aygo', count=10000)
10000
```

Now if we search:

```py
>>> autocomplete.search(word='toyota a')
[['toyota'], ['toyota aygo'], ['toyota avalon'], ['toyota aurion']]
```

We can double check the count of a node:

```py
>>> autocomplete.get_count_of_word('toyota aygo')
10000
```

Now let's use the offset to offset the current count of a different node:


```py
>>> auto_complete.update_count_of_word(word='toyota aurion', offset=-6000)
94
```

When we search, `toyota aurion` is not in the top 3 results anymore!

```py
>>> autocomplete.search(word='toyota a')
[['toyota'], ['toyota aygo'], ['toyota avalon'], ['toyota auris']]
```


## Draw

This package can actually draw the dwgs as it is populating them or just once the dwg is populated for you!
Expand Down
30 changes: 27 additions & 3 deletions fast_autocomplete/dwg.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
INF = float('inf')


class NodeNotFound(ValueError):
pass


class FindStep(Enum):
start = 0
descendants_only = 1
Expand Down Expand Up @@ -437,6 +441,26 @@ def get_all_descendent_words_for_condition(self, word, size, condition):
new_tokens.append(node.word)
return new_tokens

def update_count_of_word(self, word, count=None, offset=None):
"""
Update the count attribute of a node in the dwg. This only affects the autocomplete
object and not the original count of the node in the data that was fed into fast_autocomplete.
"""
matched_prefix_of_last_word, rest_of_word, node, matched_words_part, matched_condition_ever, matched_condition_in_branch = self._prefix_autofill_part(word=word)
if node:
if offset:
with self._lock:
node.count += offset
elif count:
with self._lock:
node.count = count
else:
raise NodeNotFound(f'Unable to find a node for word {word}')
return node.count

def get_count_of_word(self, word):
return self.update_count_of_word(word)


class _DawgNode:
"""
Expand Down Expand Up @@ -475,7 +499,7 @@ def insert(self, word, add_word=True, original_key=None, count=0, insert_count=T
node.word = word
node.original_key = original_key
if insert_count:
node.count = count
node.count = int(count) # converts any str to int
return node

def get_descendants_nodes(self, size, should_traverse=True, full_stop_words=None, insert_count=True):
Expand Down Expand Up @@ -511,7 +535,7 @@ def get_descendants_nodes(self, size, should_traverse=True, full_stop_words=None
que.append((letter, grand_child_node))

def get_descendants_words(
self, size, should_traverse=True, full_stop_words=None, insert_count=True):
self, size, should_traverse=True, full_stop_words=None, insert_count=True):
found_nodes_gen = self.get_descendants_nodes(
size,
should_traverse=should_traverse,
Expand All @@ -522,7 +546,7 @@ def get_descendants_words(
if insert_count is True:
found_nodes = sorted(
found_nodes_gen,
key=lambda node: int(node.count), # converts any str to int
key=lambda node: node.count,
reverse=True
)[:size + 1]
else:
Expand Down
15 changes: 15 additions & 0 deletions tests/test_autocomplete.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,18 @@ def test_get_word_context(self, word, expected_results):
results = auto_complete.get_word_context(word)
print_results(locals())
assert expected_results == results

@pytest.mark.parametrize("word, update_dict, expected_results, expected_new_count", [
('toyota a', None, [['toyota'], ['toyota avalon'], ['toyota aurion'], ['toyota auris']], None),
('toyota a', {'word': 'toyota aygo', 'count': 10000}, [['toyota'], ['toyota aygo'], ['toyota avalon'], ['toyota aurion']], 10000),
('toyota a', {'word': 'toyota aurion', 'offset': -6000}, [['toyota'], ['toyota avalon'], ['toyota auris'], ['toyota aygo']], 94),
])
def test_update_count_of_word(self, word, update_dict, expected_results, expected_new_count):
auto_complete = AutoComplete(words=WIKIPEDIA_WORDS, synonyms=SYNONYMS, full_stop_words=['bmw', 'alfa romeo'])
if update_dict:
new_count = auto_complete.update_count_of_word(**update_dict)
assert expected_new_count == new_count
assert expected_new_count == auto_complete.get_count_of_word(update_dict['word'])
results = auto_complete.search(word, max_cost=2, size=4)
print_results(locals())
assert expected_results == results

0 comments on commit ace212d

Please sign in to comment.