Skip to content

Commit

Permalink
bug fix: fix overlaping entity
Browse files Browse the repository at this point in the history
  • Loading branch information
Hanif Yuli Abdillah P committed Sep 21, 2023
1 parent 5c0d892 commit 173792c
Show file tree
Hide file tree
Showing 6 changed files with 2 additions and 6 deletions.
Binary file removed dist/lexifuzz_ner-0.0.3-py3-none-any.whl
Binary file not shown.
Binary file removed dist/lexifuzz_ner-0.0.3.tar.gz
Binary file not shown.
Binary file added dist/lexifuzz_ner-0.0.4-py3-none-any.whl
Binary file not shown.
Binary file added dist/lexifuzz_ner-0.0.4.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "lexifuzz-ner"
version = "0.0.3"
version = "0.0.4"
authors = ["Hanif Yuli Abdillah P <[email protected]>"]
description = "Python package for detecting entities in text based on a dictionary and fuzzy similarity"
readme = "README.md"
Expand Down
6 changes: 1 addition & 5 deletions src/lexifuzz_ner/ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def getFuzzySimilarity(token=None, dictionary=None, min_ratio=None):
return (match + (key, ))

def handle_slicing(data=None):

"""
This function takes a dictionary data as input and processes its 'entities' by sorting them based on their score in descending order.
It then identifies entities with the highest scores, ensuring there is no overlap in their index ranges.
Expand All @@ -39,7 +38,7 @@ def handle_slicing(data=None):
assert isinstance(data, dict), "Dictionary format should be provided in the dictionary parameter."

# Sort entities by their score in descending order
sorted_entities = sorted(data['entities'], key=lambda x: -x['score'])
sorted_entities = sorted(data['entities'], key=lambda x: (-x['score'], x['index']['start'], -x['index']['end']))

# Initialize a dictionary to keep track of which indices have been covered
indices_covered = set()
Expand All @@ -53,14 +52,11 @@ def handle_slicing(data=None):
# Check if the entity's indices overlap with previously covered indices
if all(start > end_covered or end < start_covered for start_covered, end_covered in indices_covered):
new_entities.append(entity)

# Update the covered indices
indices_covered.add((start, end))

# Update the entities in the data dictionary
data['entities'] = new_entities

# Print the modified data
return data

def annotate_text(entities = None):
Expand Down

0 comments on commit 173792c

Please sign in to comment.