-
Notifications
You must be signed in to change notification settings - Fork 6
/
SentimentAnalysis - Polarity - Domain Specific Lexicon.py
119 lines (98 loc) · 2.6 KB
/
SentimentAnalysis - Polarity - Domain Specific Lexicon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import csv
import pandas as pd
import nltk
from nltk import FreqDist,ngrams
from nltk.corpus import stopwords
import string
from os import listdir
from os.path import isfile, join
def ngram_list(file,n):
f = open(file,'rU')
raw = f.read()
raw = raw.replace('\n',' ')
#raw = raw.decode('utf8')
#raw = raw.decode("utf-8", 'ignore')
ngramz = ngrams(raw.split(),n)
return ngramz
def IsNotNull(value):
return value is not None and len(value) > 0
mypath = '/Users/francis/Documents/FORDHAM/2nd Term/Text Analytics/' #path where files are located
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
dict_p = []
f = open('positive.txt', 'r')
for line in f:
t = line.strip().lower()
if IsNotNull(t):
dict_p.append(t)
f.close
dict_n = []
f = open('negative.txt', 'r')
for line in f:
t = line.strip().lower()
if IsNotNull(t):
dict_n.append(t)
f.close
totallist = []
rowlist = []
qa = 0
qb = 0
counti = 0
for i in onlyfiles:
if i.endswith('.txt'):
# get code
j = i.replace('.txt','')
# string filename
file = mypath + str(i)
print i
f = open(file,'rU')
raw = f.read()
#print type(raw)
raw = [w.translate(None, string.punctuation) for w in raw]
raw = ''.join(raw)
raw = raw.replace('\n','')
raw = raw.replace(' ','')
#print raw
qa = 0
qb = 0
for word in dict_p:
if word in raw:
qa += 1
for word in dict_n:
if word in raw:
qb += 1
qc = qa - qb
if qc > 0:
sentiment = 'POSITIVE'
elif qc == 0:
sentiment = 'NEUTRAL'
else:
sentiment = 'NEGATIVE'
rowlist.append(i)
rowlist.append(qa)
rowlist.append(qb)
rowlist.append(qc)
rowlist.append(sentiment)
print counti
counti += 1
totallist.append(rowlist)
rowlist = []
else:
pass
labels = ('file', 'P', 'N', 'NET', 'SENTIMENT')
df = pd.DataFrame.from_records(totallist, columns = labels)
df.to_csv('oursentiment.csv', index = False)
#print dict_p
# allbigrams.append(ngram_list(file,2))
# print i + ' BIGRAM - OK'
# alltrigrams.append(ngram_list(file,3))
# print i + ' TRIGRAM - OK'
# allfourgrams.append(ngram_list(file,4))
# print i + ' FOURGRAM - OK'
# allfivegrams.append(ngram_list(file,5))
# print i + ' TRIGRAM - OK'
# allsixgrams.append(ngram_list(file,6))
# print i + ' SIXGRAM - OK'
# allsevengrams.append(ngram_list(file,7))
# print i + ' SEVENGRAM - OK'
# alleightgrams.append(ngram_list(file,8))
# print i + ' EIGHTGRAM - OK'