Skip to content

Commit

Permalink
bug fix: entity indices
Browse files Browse the repository at this point in the history
  • Loading branch information
Hanif Yuli Abdillah P committed Sep 13, 2023
1 parent 0194d99 commit f895af2
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "lexifuzz-ner"
version = "0.0.1"
version = "0.0.2"
authors = ["Hanif Yuli Abdillah P <[email protected]>"]
description = "Python package for detecting entities in text based on a dictionary and fuzzy similarity"
readme = "README.md"
Expand Down
3 changes: 2 additions & 1 deletion src/lexifuzz_ner/ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,9 @@ def find_entity(text=None, dictionary=None, min_ratio=None):
result_detection['entities'] = []
tokens = text.split()
max_ngrams = max([max(len(phrase.split()) for phrase in phrases) for phrases in dictionary.values()])
current_index = 0
for n in range(1, max_ngrams+1):
ngrams_result = list(ngrams(tokens, n))
current_index = 0
for result in ngrams_result:
compared_text = ' '.join(result)
similarity_score = getFuzzySimilarity(token = compared_text, dictionary = dictionary, min_ratio = min_ratio)
Expand All @@ -141,6 +141,7 @@ def find_entity(text=None, dictionary=None, min_ratio=None):
}
}
)
result_detection['entities'] = sorted(result_detection['entities'], key=lambda x: x['index']['start'])
result_detection = handle_slicing(result_detection)
result_detection['text'] = text
result_detection['text_annotated'] = annotate_text(result_detection)
Expand Down

0 comments on commit f895af2

Please sign in to comment.