-
Notifications
You must be signed in to change notification settings - Fork 2
/
cn_tup_matching.py
69 lines (50 loc) · 2.27 KB
/
cn_tup_matching.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# MAIN file where all main logic behind keyword spotting to find some emotive ConceptNet Tuples
import nltk
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
from conceptnet.models import *
from actual import get_synsets_for_matching
from get_correct_pos import matching_pos
from extract import extract_score
en=Language.get('en')
cn_tuples=Assertion.objects.filter(language=en) # getting all conceptnet tuples
base_synsets={}
base_synsets=get_synsets_for_matching() # all synsets with which we map our conceptnet tuples
emotions={'happy','sad','anger','disgust','surprise','fear'}
result_list={} # contains the result indexed by emotion
h=[];sa=[];a=[];d=[];s=[];f=[];n=[]
result_list['happy']=h
result_list['sad']=sa
result_list['anger']=a
result_list['disgust']=d
result_list['surprise']=s
result_list['fear']=f
result_list['neutral']=n
for cn_tuple in cn_tuples: # looping over all tuples
concept1_text=Concept.objects.filter(language=en,id=cn_tuple.concept1_id)[0].text # getting concept1 text
concept2_text=Concept.objects.filter(language=en,id=cn_tuple.concept2_id)[0].text # getting concept2 text
# getting concept1 tokenized and assiging to each word POS ...???? COULD WE USE A BETTER POS Tagger
con1_pos_tags=pos_tag(word_tokenize(concept1_text))
con2_pos_tags=pos_tag(word_tokenize(concept2_text))
# looping with each word
for i in range(len(con2_pos_tags)):
pos=matching_pos(con2_pos_tags[i][1]) # correcting POS ..example 'NN' to 'n'....
if (pos == ""):
continue;
score=0;temp_emo="neutral"
for emo in emotions: # looping over each emotion
for syn in base_synsets[emo][pos]: #looping over every synset with specifed emotion and POS....
temp_score=extract_score(con2_pos_tags[i][0],syn) # get score
# just getting the higest score for that word and the corresponding emotion
if (temp_score>score):
score=temp_score
temp_emo=emo
main_syn=syn
word=con2_pos_tags[i][0]
# for now limit is 0.13.....??? FURTHUR IMPROVEMENT
if (score>0.13):
result_list[temp_emo].append(cn_tuple)
print main_syn
print 'word->' + ' ' + word + ' ' + 'emotion' + '-> ' + temp_emo + ' ' + 'concept->' + ' ' + concept2_text + '\n'
break;
#print result_list