Skip to content

Commit

Permalink
#51
Browse files Browse the repository at this point in the history
- text features dict
- feat extraction
- bash update (spacy models)
  • Loading branch information
diegoesteves committed Apr 30, 2020
1 parent 4c5a804 commit 9e3db92
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 21 deletions.
36 changes: 16 additions & 20 deletions src/features/horus_feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
from src.utils.translation.azure import bing_detect_language, bing_translate_text
from src.utils.translation.bingtranslation import BingTranslator
from src.utils.util import Util
#from tensorflow.python.keras._impl.keras.applications import InceptionV3
#from tensorflow.python.keras._impl.keras.applications import InceptionV3
# from tensorflow.python.keras._impl.keras.applications import InceptionV3
# from tensorflow.python.keras._impl.keras.applications import InceptionV3
from tensorflow.python.keras.preprocessing import image as ppimg
from keras.applications.imagenet_utils import decode_predictions
from keras.applications.inception_v3 import preprocess_input, InceptionV3
Expand Down Expand Up @@ -251,7 +251,7 @@ def extract_features(self, horus: Horus) -> bool:
i_token += 1
if token.label_pos in definitions.POS_NOUN_TAGS or token.is_compound == 1:
self.config.logger.debug(f'token: {token.text} ({i_token}/{len(sentence.tokens)}) | '
f'sentence ({i_sent}/{len(horus.sentences)})')
f'sentence ({i_sent}/{len(horus.sentences)})')

id_term_txt = token.features.text.values[tx_dict_reversed.get('id_db')]
id_ner_type = 0
Expand Down Expand Up @@ -322,9 +322,9 @@ def extract_features(self, horus: Horus) -> bool:
y_tm.append(ret_tm)

cursor.execute(SQL_TEXT_CLASS_UPD % (
ret_bow[0], ret_bow[1], ret_bow[2], ret_bow[3], ret_bow[4],
ret_tm[0], ret_tm[1], ret_tm[2], ret_tm[3], ret_tm[4],
embs[0], embs[1], embs[2], embs[3], rows[itxt][0]))
ret_bow[0], ret_bow[1], ret_bow[2], ret_bow[3], ret_bow[4],
ret_tm[0], ret_tm[1], ret_tm[2], ret_tm[3], ret_tm[4],
embs[0], embs[1], embs[2], embs[3], rows[itxt][0]))
else:
y_bow.append(rows[itxt][7:11])
y_tm.append(rows[itxt][12:16])
Expand Down Expand Up @@ -382,6 +382,8 @@ def extract_features(self, horus: Horus) -> bool:
token.features.text.values[tx_dict_reversed.get('total.topic.k.other')] = \
0 if len(tm_cnn_w) == 0 else tm_cnn_w[3]

horus_tx_ner_cnn = gpb.index(max(tm_cnn_w)) + 1

maxs_tx = heapq.nlargest(2, gpb)
maxs_tm = 0 if len(tm_cnn_w) == 0 else heapq.nlargest(2, tm_cnn_w)
dist_tx_indicator = max(maxs_tx) - min(maxs_tx)
Expand Down Expand Up @@ -454,13 +456,16 @@ def extract_features(self, horus: Horus) -> bool:
token.features.text.values[tx_dict_reversed.get('stats.topic.min.per')] = topic_min[2]
token.features.text.values[tx_dict_reversed.get('stats.topic.min.other')] = topic_min[3]


if limit_txt != 0:
self.horus_matrix[index][definitions.INDEX_MAX_KLASS_PREDICT_TX] = \
definitions.PLOMNone_index2label[horus_tx_ner]
token.features.text.values[tx_dict_reversed.get('top.binary.k')] = \
definitions.PLOMNone_index2label[horus_tx_ner]
token.features.text.values[tx_dict_reversed.get('top.topic.k')] = \
definitions.PLOMNone_index2label[horus_tx_ner_cnn]
else:
self.horus_matrix[index][definitions.INDEX_MAX_KLASS_PREDICT_TX] = \
definitions.PLOMNone_index2label[4]
token.features.text.values[tx_dict_reversed.get('top.binary.k')] = \
definitions.PLOMNone_index2label[4]
token.features.text.values[tx_dict_reversed.get('top.topic.k')] = \
definitions.PLOMNone_index2label[4]

except Exception as e:
raise e
Expand Down Expand Up @@ -588,12 +593,3 @@ def _shape(self, word):
word_shape = 11 # 'contains-hyphen'

return word_shape









2 changes: 1 addition & 1 deletion src/horus_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def get_textual() -> dict:
58: 'stats.topic.min.org',
59: 'stats.topic.min.per',
60: 'stats.topic.min.misc',
61: 'stats.topic.min.other',
61: 'stats.topic.min.other'
}

reversed_features = dict([(value, key) for key, value in features.items()])
Expand Down

0 comments on commit 9e3db92

Please sign in to comment.