Skip to content

Commit f80756a

Browse files
committed
bool search bugs fix
1 parent df05e2a commit f80756a

File tree

2 files changed

+5
-11
lines changed

2 files changed

+5
-11
lines changed

bool_search.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,8 @@ def _preprocess_(query):
122122
query = sub(' {2,}', ' ',query)
123123
query = query.lower()
124124
query = query.split()
125-
data = Data()
126-
data.lemma_word(query)
127-
return data.data[0]
128-
125+
query = Data.lemma(query)
126+
return query
129127
# calculate target inverted index by operator
130128
@staticmethod
131129
def process(ii_list):

utils/data_process.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,6 @@ def dump(sentence):
6464
logging.info("Dumping input")
6565
return Data._lemma_(Data._strip_stop_words_(Data._pre_process_(sentence)))
6666

67-
def lemma_word(self, word_list):
68-
self.data.append(word_list)
69-
self._lemma_()
70-
7167
@staticmethod
7268
def _pre_process_(sentence):
7369
sentence = sentence.lower()
@@ -101,7 +97,7 @@ def _strip_stop_words_(wordbag):
10197

10298
# the slowest proceed
10399
@staticmethod
104-
def _lemma_(wordbag):
100+
def lemma(wordbag):
105101
# this method owe to https://www.machinelearningplus.com/nlp/lemmatization-examples-python/
106102
return [lemmatizer.lemmatize(
107103
word, Data.get_wordnet_pos(word)) for word in wordbag]
@@ -137,10 +133,10 @@ def process(self):
137133
for docid in range(len(self.data)):
138134
if docid % 1000 == 0:
139135
logging.debug(f"{docid} document processed")
140-
self.data[docid] = self._lemma_(
136+
self.data[docid] = self.lemma(
141137
self._strip_stop_words_(
142138
self._pre_process_(self.data[docid])))
143-
self.headerdata[docid] = self._lemma_(
139+
self.headerdata[docid] = self.lemma(
144140
self._strip_stop_words_(
145141
self._pre_process_(self.headerdata[docid])))
146142
self._gen_dict_()

0 commit comments

Comments
 (0)