Merge pull request #6 from x-tabdeveloping/levenshtein

x-tabdeveloping · web-flow · commit f338b5d87953 · 2024-09-06T15:09:21.000+02:00
Levenshtein
diff --git a/neofuzz/process.py b/neofuzz/process.py
@@ -404,6 +404,7 @@ def char_ngram_process(
     ngram_range: Tuple[int, int] = (1, 5),
     tf_idf: bool = True,
     metric: str = "cosine",
+    refine_levenshtein: bool = False,
 ) -> Process:
     """Basic character n-gram based fuzzy search process.
 
@@ -416,6 +417,11 @@ def char_ngram_process(
         Flag signifying whether the features should be tf-idf weighted.
     metric: str, default 'cosine'
         Distance metric to use for fuzzy search.
+    refine_levenshtein: bool, default None
+        Indicates whether results should be refined with Levenshtein distance
+        using TheFuzz.
+        This can increase the accuracy of your results.
+        If not specified, the process's attribute is used.
 
     Returns
     -------
@@ -426,4 +432,6 @@ def char_ngram_process(
         vectorizer = TfidfVectorizer(ngram_range=ngram_range, analyzer="char")
     else:
         vectorizer = CountVectorizer(ngram_range=ngram_range, analyzer="char")
-    return Process(vectorizer, metric=metric)
+    return Process(
+        vectorizer, metric=metric, refine_levenshtein=refine_levenshtein
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 line-length=79
 [tool.poetry]
 name = "neofuzz"
-version = "0.3.0"
+version = "0.3.1"
 description = "Blazing fast fuzzy text search for Python."
 authors = ["Márton Kardos <power.up1163@gmail.com>"]
 license = "MIT"