Skip to content

Commit f338b5d

Browse files
Merge pull request #6 from x-tabdeveloping/levenshtein
Levenshtein
2 parents 2f3e7ab + 9de1df9 commit f338b5d

File tree

2 files changed

+10
-2
lines changed

2 files changed

+10
-2
lines changed

neofuzz/process.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ def char_ngram_process(
404404
ngram_range: Tuple[int, int] = (1, 5),
405405
tf_idf: bool = True,
406406
metric: str = "cosine",
407+
refine_levenshtein: bool = False,
407408
) -> Process:
408409
"""Basic character n-gram based fuzzy search process.
409410
@@ -416,6 +417,11 @@ def char_ngram_process(
416417
Flag signifying whether the features should be tf-idf weighted.
417418
metric: str, default 'cosine'
418419
Distance metric to use for fuzzy search.
420+
refine_levenshtein: bool, default None
421+
Indicates whether results should be refined with Levenshtein distance
422+
using TheFuzz.
423+
This can increase the accuracy of your results.
424+
If not specified, the process's attribute is used.
419425
420426
Returns
421427
-------
@@ -426,4 +432,6 @@ def char_ngram_process(
426432
vectorizer = TfidfVectorizer(ngram_range=ngram_range, analyzer="char")
427433
else:
428434
vectorizer = CountVectorizer(ngram_range=ngram_range, analyzer="char")
429-
return Process(vectorizer, metric=metric)
435+
return Process(
436+
vectorizer, metric=metric, refine_levenshtein=refine_levenshtein
437+
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
line-length=79
33
[tool.poetry]
44
name = "neofuzz"
5-
version = "0.3.0"
5+
version = "0.3.1"
66
description = "Blazing fast fuzzy text search for Python."
77
authors = ["Márton Kardos <[email protected]>"]
88
license = "MIT"

0 commit comments

Comments
 (0)