Skip to content

Commit f76c562

Browse files
committed
Fixes Min Score Bug
Fixes a bug where the min_score argument in the mapper was ignored when not using TFIDF
1 parent 70d95c2 commit f76c562

File tree

3 files changed

+20
-11
lines changed

3 files changed

+20
-11
lines changed

test/simple-test.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ def main():
66
pizza = "https://protege.stanford.edu/ontologies/pizza/pizza.owl"
77
ncit = "http://purl.obolibrary.org/obo/ncit/releases/2022-08-19/ncit.owl"
88
# print(bioregistry.get_owl_download("eFo"))
9-
# if not text2term.cache_exists("EFO"):
10-
# cached_onto = text2term.cache_ontology("EFO")
11-
# # df = cached_onto.map_terms(["asthma", "disease location", "obsolete food allergy"], excl_deprecated=True, term_type="classes")
12-
# print("Cache exists:", cached_onto.cache_exists())
13-
caches = text2term.cache_ontology_set("text2term/resources/ontologies.csv")
14-
df = text2term.map_terms(["asthma", "disease location", "obsolete food allergy"], "EFO", excl_deprecated=True, use_cache=True, term_type="classes")
9+
if not text2term.cache_exists("EFO"):
10+
cached_onto = text2term.cache_ontology("EFO")
11+
# df = cached_onto.map_terms(["asthma", "disease location", "obsolete food allergy"], excl_deprecated=True, term_type="classes")
12+
print("Cache exists:", cached_onto.cache_exists())
13+
# caches = text2term.cache_ontology_set("text2term/resources/ontologies.csv")
14+
df = text2term.map_terms(["asthma", "disease location", "obsolete food allergy"], "EFO", min_score=.8, mapper=text2term.Mapper.JARO_WINKLER, excl_deprecated=True, use_cache=True, term_type="classes")
1515
# df = text2term.map_terms(["contains", "asthma"], "EFO", term_type="classes")
1616
print(df.to_string())
1717

text2term/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION = "2.3.1"
1+
VERSION = "2.3.2"

text2term/t2t.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,18 +219,27 @@ def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_typ
219219
def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score):
220220
if mapper == Mapper.TFIDF:
221221
term_mapper = TFIDFMapper(ontology_terms)
222-
return term_mapper.map(source_terms, source_term_ids, max_mappings=max_mappings, min_score=min_score)
222+
mappings_df = term_mapper.map(source_terms, source_term_ids, max_mappings=max_mappings, min_score=min_score)
223223
elif mapper == Mapper.ZOOMA:
224224
term_mapper = ZoomaMapper()
225-
return term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
225+
mappings_df = term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
226226
elif mapper == Mapper.BIOPORTAL:
227227
term_mapper = BioPortalAnnotatorMapper("8f0cbe43-2906-431a-9572-8600d3f4266e")
228-
return term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
228+
mappings_df = term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
229229
elif mapper in {Mapper.LEVENSHTEIN, Mapper.JARO, Mapper.JARO_WINKLER, Mapper.INDEL, Mapper.FUZZY, Mapper.JACCARD}:
230230
term_mapper = SyntacticMapper(ontology_terms)
231-
return term_mapper.map(source_terms, source_term_ids, mapper, max_mappings=max_mappings)
231+
mappings_df = term_mapper.map(source_terms, source_term_ids, mapper, max_mappings=max_mappings)
232232
else:
233233
raise ValueError("Unsupported mapper: " + mapper)
234+
df = _filter_mappings(mappings_df, min_score)
235+
return df
236+
237+
def _filter_mappings(mappings_df, min_score):
238+
new_df = pd.DataFrame(columns=mappings_df.columns)
239+
for index, row in mappings_df.iterrows():
240+
if row['Mapping Score'] >= min_score:
241+
new_df.loc[len(new_df.index)] = row
242+
return new_df
234243

235244
def _save_mappings(mappings, output_file, min_score, mapper, target_ontology, base_iris, excl_deprecated, max_mappings, term_type):
236245
if os.path.dirname(output_file): # create output directories if needed

0 commit comments

Comments
 (0)