From 0bc07984e205cb941d8358644f048dcbd300319c Mon Sep 17 00:00:00 2001
From: Tomo Oga <oga.t@northeastern.edu>
Date: Tue, 5 Nov 2024 11:01:17 -0500
Subject: [PATCH 1/2] add normalization to stopwords in stopwords file as
 loaded

---
 gilda/ner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gilda/ner.py b/gilda/ner.py
index 49dae9e..cf7e0d3 100644
--- a/gilda/ner.py
+++ b/gilda/ner.py
@@ -70,7 +70,7 @@ def _load_stoplist() -> Set[str]:
     """Load NER stoplist from file."""
     stoplist_path = STOPLIST_PATH
     with open(stoplist_path, 'r') as file:
-        stoplist = {line.strip() for line in file}
+        stoplist = {normalize(line.strip()) for line in file}
     return stoplist
 
 

From 692fb2f794eea2029eac2b21ac3840f0c29e72ec Mon Sep 17 00:00:00 2001
From: Tomo Oga <oga.t@northeastern.edu>
Date: Tue, 5 Nov 2024 11:22:28 -0500
Subject: [PATCH 2/2] add raw word comparison to stoplist, this approach over
 normalizing stoplist text

---
 gilda/ner.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gilda/ner.py b/gilda/ner.py
index cf7e0d3..153369f 100644
--- a/gilda/ner.py
+++ b/gilda/ner.py
@@ -70,7 +70,7 @@ def _load_stoplist() -> Set[str]:
     """Load NER stoplist from file."""
     stoplist_path = STOPLIST_PATH
     with open(stoplist_path, 'r') as file:
-        stoplist = {normalize(line.strip()) for line in file}
+        stoplist = {line.strip() for line in file}
     return stoplist
 
 
@@ -144,6 +144,8 @@ def annotate(
                 continue
             if word in stop_words:
                 continue
+            if raw_words[idx] in stop_words:
+                continue
             spans = grounder.prefix_index.get(word, set())
             if not spans:
                 continue