forked from eraldoluis/LeNER-Br
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluateSentence.py
63 lines (54 loc) · 2.15 KB
/
evaluateSentence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
'''
Code entirely written by the leNER-Br paper authors
'''
# This file was developed as part of the project reported in the paper below.
# We kindly request that users cite our paper in any publication that is
# generated as a result of the use of our source code or our dataset.
#
# Pedro H. Luz de Araujo, Teófilo E. de Campos, Renato R. R. de Oliveira, Matheus Stauffer, Samuel Couto and Paulo Bermejo.
# LeNER-Br: a Dataset for Named Entity Recognition in Brazilian Legal Text.
# International Conference on the Computational Processing of Portuguese (PROPOR),
# September 24-26, Canela, Brazil, 2018.
#
# @InProceedings{luz_etal_propor2018,
# author = {Pedro H. {Luz de Araujo} and Te\'{o}filo E. {de Campos} and
# Renato R. R. {de Oliveira} and Matheus Stauffer and
# Samuel Couto and Paulo Bermejo},
# title = {LeNER-Br: a Dataset for Named Entity Recognition in Brazilian Legal Text},
# booktitle = {International Conference on the Computational Processing of Portuguese
# ({PROPOR})},
# year = {2018},
# month = {September 24-26},
# address = {Canela, RS, Brazil},
# note = {Available from \url{https://cic.unb.br/~teodecampos/LeNER-Br/}}
# }
from model.ner_model import NERModel
from model.config import Config
from nltk import word_tokenize
# Pessoa is blue, tempo is green, Local is yellow and organizacao is red
bcolors = {
"PESSOA": '\033[94m',
"TEMPO": '\033[92m',
"LOCAL": '\033[93m',
"ORGANIZACAO": '\033[91m',
"JURISPRUDENCIA": '\033[35m',
"LEGISLACAO": '\033[36m',
"ENDC": '\033[0m',
"O": ""
}
# create instance of config
config = Config()
# build model
model = NERModel(config)
model.build()
model.restore_session(config.dir_model)
while(True):
words = input("Escreva frase a ser analisada: ")
words = word_tokenize(words, language='portuguese')
preds = model.predict(words)
for index, word in enumerate(words):
if preds[index][0:2] in ['B-', 'I-', 'E-', 'S-']:
preds[index] = preds[index][2:]
print(bcolors[preds[index]] +
word + bcolors["ENDC"], end=' ')
print('\n')