Skip to content

Commit 371dfde

Browse files
committed
Minor fixes
1 parent 017570c commit 371dfde

File tree

2 files changed

+17
-8
lines changed

2 files changed

+17
-8
lines changed
189 Bytes
Binary file not shown.

OpnKeywords/scripts/keywords.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,17 @@ def deEmojify(self, text):
2222

2323
def getKeywords(self, text):
2424
text = """{}""".format(text)
25-
text = text.replace(r"\r\n", " ")
25+
text = text.replace(r"\n", " ")
26+
text = text.replace(r"\r", " ")
27+
text = self.deEmojify(text)
28+
text = text.replace("\n", " ")
29+
text = text.replace("\r", " ")
30+
symbols = '''@#$%^&*()_+-=|[]\:"';<>?,/—’”“:..! '''
31+
text = text.replace(symbols, " ")
2632

33+
while " " in text:
34+
text = text.replace(" ", " ")
35+
print(r"{}".format(text))
2736
datadict={} #Dictionary for dataframe
2837
xlabels=[] #List for xlabels
2938

@@ -34,15 +43,15 @@ def getKeywords(self, text):
3443
for cword in wline.split():
3544
cword2=cword.lower()
3645
cwlist.append(cword2)
37-
38-
symbols = '''!@#$%^&*()_+-=|[]\:"';<>?,./'''
39-
text = text.replace(" ", " ")
46+
4047
for word in text.split(" "):
41-
worde=self.deEmojify(word)
42-
wordf=worde.lower()
43-
word2=wordf.strip(symbols)
48+
worde = self.deEmojify(word)
49+
wordf = worde.lower()
50+
word2 = wordf.strip(symbols)
51+
word2 = word2.rstrip(symbols)
52+
word2 = word2.lstrip(symbols)
4453
#print(word)
45-
if not word2.isnumeric():
54+
if not word2.isnumeric() and word2 not in list(symbols): # and not (bool(re.match('^[a-zA-Z0-9]*$', word2))):
4655
if word2 in datadict:
4756
datadict[word2]+=1
4857
elif word2 not in cwlist:

0 commit comments

Comments
 (0)