Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create get_close_matches helper function that works like hashlib equivalent #53

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions Levenshtein/StringMatcher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import heapq
from Levenshtein import *
from typing import Iterable, Sequence, List
from warnings import warn

class StringMatcher:
Expand Down Expand Up @@ -67,3 +69,44 @@ def distance(self):
if not self._distance:
self._distance = distance(self._str1, self._str2)
return self._distance

def get_close_matches(word: str, possibilities: Iterable[str], n=3, cutoff=0.6) -> Iterable[str]:
"""Use StringMatcher to return list of the best "good enough" matches.
word is a sequence for which close matches are desired (typically a
string).
possibilities is a list of sequences against which to match word
(typically a list of strings).
Optional arg n (default 3) is the maximum number of close matches to
return. n must be > 0.
Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities
that don't score at least that similar to word are ignored.
The best (no more than n) matches among the possibilities are returned
in a list, sorted by similarity score, most similar first.
>>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"])
['apple', 'ape']
>>> import keyword as _keyword
>>> get_close_matches("wheel", _keyword.kwlist)
['while']
>>> get_close_matches("Apple", _keyword.kwlist)
[]
>>> get_close_matches("accept", _keyword.kwlist)
['except']
"""
if not n > 0:
raise ValueError("n must be > 0: %r" % (n,))
if not 0.0 <= cutoff <= 1.0:
raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
result = []
s = StringMatcher()
s.set_seq2(word)
for x in possibilities:
s.set_seq1(x)
if s.real_quick_ratio() >= cutoff and \
s.quick_ratio() >= cutoff and \
s.ratio() >= cutoff:
result.append((s.ratio(), x))

# Move the best scorers to head of list
result = heapq.nlargest(n, result)
# Strip scores for the best n matches
return [x for score, x in result]