File tree Expand file tree Collapse file tree 2 files changed +5
-11
lines changed Expand file tree Collapse file tree 2 files changed +5
-11
lines changed Original file line number Diff line number Diff line change @@ -122,10 +122,8 @@ def _preprocess_(query):
122
122
query = sub (' {2,}' , ' ' ,query )
123
123
query = query .lower ()
124
124
query = query .split ()
125
- data = Data ()
126
- data .lemma_word (query )
127
- return data .data [0 ]
128
-
125
+ query = Data .lemma (query )
126
+ return query
129
127
# calculate target inverted index by operator
130
128
@staticmethod
131
129
def process (ii_list ):
Original file line number Diff line number Diff line change @@ -64,10 +64,6 @@ def dump(sentence):
64
64
logging .info ("Dumping input" )
65
65
return Data ._lemma_ (Data ._strip_stop_words_ (Data ._pre_process_ (sentence )))
66
66
67
- def lemma_word (self , word_list ):
68
- self .data .append (word_list )
69
- self ._lemma_ ()
70
-
71
67
@staticmethod
72
68
def _pre_process_ (sentence ):
73
69
sentence = sentence .lower ()
@@ -101,7 +97,7 @@ def _strip_stop_words_(wordbag):
101
97
102
98
# the slowest proceed
103
99
@staticmethod
104
- def _lemma_ (wordbag ):
100
+ def lemma (wordbag ):
105
101
# this method owe to https://www.machinelearningplus.com/nlp/lemmatization-examples-python/
106
102
return [lemmatizer .lemmatize (
107
103
word , Data .get_wordnet_pos (word )) for word in wordbag ]
@@ -137,10 +133,10 @@ def process(self):
137
133
for docid in range (len (self .data )):
138
134
if docid % 1000 == 0 :
139
135
logging .debug (f"{ docid } document processed" )
140
- self .data [docid ] = self ._lemma_ (
136
+ self .data [docid ] = self .lemma (
141
137
self ._strip_stop_words_ (
142
138
self ._pre_process_ (self .data [docid ])))
143
- self .headerdata [docid ] = self ._lemma_ (
139
+ self .headerdata [docid ] = self .lemma (
144
140
self ._strip_stop_words_ (
145
141
self ._pre_process_ (self .headerdata [docid ])))
146
142
self ._gen_dict_ ()
You can’t perform that action at this time.
0 commit comments