-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
100 lines (91 loc) · 3.47 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import twint
import pandas as pd
import nest_asyncio
import spacy
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import os
from datetime import datetime
import pymongo
from pymongo import MongoClient
import os
from dotenv import load_dotenv
import yagmail
import time
load_dotenv()
nest_asyncio.apply()
password = os.getenv('mongo_pwd')
email = os.getenv('email')
email_password = os.getenv('email_password')
cluster = MongoClient("mongodb+srv://mongoadmin:{0}@cluster0.h0rrc.mongodb.net/?retryWrites=true&w=majority".format(password))
db = cluster['twittersentiment']
collection = db['emailmaster']
nlp = spacy.load("en_core_web_sm", disable=['parser', 'tagger', 'ner'])
stops = stopwords.words("english")
analyser = SentimentIntensityAnalyzer()
yag = yagmail.SMTP(email, email_password)
def cleaner(t):
t = t.lower()
t = "".join(x for x in t if x.isalpha() or x.isspace())
t = nlp(t)
lemmatized = list()
for word in t:
lemma = word.lemma_.strip()
if lemma:
if lemma not in stops:
lemmatized.append(lemma)
return " ".join(lemmatized)
def emailer(company, results, negative_tweets):
query = {'company':company}
mydoc = collection.find(query)
addresses = []
positive = results['pos']
negative = results['neg']
neutral = results['neu']
for x in mydoc:
addresses.append(x['email'])
for e in addresses:
yag.send(to=e, subject='Report About {0}'.format(company), contents='Hi! This is the report. Currently we attempted to scrape 500 of the most recent tweets, we found {0} positive tweets, {1} neutral tweets and {2} negative tweets. Attatched below is a CSV file of all negative tweets with username and tweet. Thanks!'.format(positive,neutral,negative), attachments=negative_tweets)
c=twint.Config()
while True:
queries = collection.distinct("company")
if len(queries)>0:
for i in queries:
filename = i+".csv"
c.Search=[i]
c.Limit=500
c.Store_csv = True
c.Output=filename
c.Hide_output = True
twint.run.Search(c)
try:
df = pd.read_csv(filename)
tweets = list(df['tweet'])
names = list(df['username'])
tweets_copy = tweets
tweets = [cleaner(t) for t in tweets]
print(len(tweets))
result = {'pos': 0, 'neg': 0, 'neu': 0}
negatory_comments = []
negatory_names = []
for comment in tweets:
score = analyser.polarity_scores(comment)
if score['compound'] > 0.05:
result['pos'] += 1
elif score['compound'] < -0.05:
result['neg'] += 1
negatory_comments.append(tweets_copy[tweets.index(comment)])
negatory_names.append(names[tweets.index(comment)])
else:
result['neu'] += 1
end_df = pd.DataFrame({"Name": negatory_names, "Tweet": negatory_comments})
end_df.to_csv("./tweetstorage/"+filename)
print("{0}'s breakdown is".format(i))
print(result)
os.remove(filename)
emailer(i,result,"./tweetstorage/"+filename )
except:
pass
else:
print("No distinct companies.")
time.sleep(600)