@@ -22,8 +22,17 @@ def deEmojify(self, text):
22
22
23
23
def getKeywords (self , text ):
24
24
text = """{}""" .format (text )
25
- text = text .replace (r"\r\n" , " " )
25
+ text = text .replace (r"\n" , " " )
26
+ text = text .replace (r"\r" , " " )
27
+ text = self .deEmojify (text )
28
+ text = text .replace ("\n " , " " )
29
+ text = text .replace ("\r " , " " )
30
+ symbols = '''@#$%^&*()_+-=|[]\:"';<>?,/—’”“:..! '''
31
+ text = text .replace (symbols , " " )
26
32
33
+ while " " in text :
34
+ text = text .replace (" " , " " )
35
+ print (r"{}" .format (text ))
27
36
datadict = {} #Dictionary for dataframe
28
37
xlabels = [] #List for xlabels
29
38
@@ -34,15 +43,15 @@ def getKeywords(self, text):
34
43
for cword in wline .split ():
35
44
cword2 = cword .lower ()
36
45
cwlist .append (cword2 )
37
-
38
- symbols = '''!@#$%^&*()_+-=|[]\:"';<>?,./'''
39
- text = text .replace (" " , " " )
46
+
40
47
for word in text .split (" " ):
41
- worde = self .deEmojify (word )
42
- wordf = worde .lower ()
43
- word2 = wordf .strip (symbols )
48
+ worde = self .deEmojify (word )
49
+ wordf = worde .lower ()
50
+ word2 = wordf .strip (symbols )
51
+ word2 = word2 .rstrip (symbols )
52
+ word2 = word2 .lstrip (symbols )
44
53
#print(word)
45
- if not word2 .isnumeric ():
54
+ if not word2 .isnumeric () and word2 not in list ( symbols ): # and not (bool(re.match('^[a-zA-Z0-9]*$', word2))) :
46
55
if word2 in datadict :
47
56
datadict [word2 ]+= 1
48
57
elif word2 not in cwlist :
0 commit comments