-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
112 lines (92 loc) · 3.91 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import config
import pandas as pd
from sqlalchemy import create_engine
import threading
import datetime
import tweepy
class Keyword:
"""Class for keyword object instantiation"""
def __init__(self, keyword, wait_seconds):
self.wait_seconds = wait_seconds
self.keyword = keyword
def insert_database(self):
"""Function to create a API query and database entry"""
# Variable to count succesful inserts into database
success = 0
# Create database connection
connection = config.db_dialect + '+' + config.db_driver + '://' \
+ config.db_username + ':' + config.db_password \
+ '@' + config.db_database + '.' + config.db_address\
+ ':' + config.db_port + '/' + config.db_database
engine = create_engine(connection)
# Connect to Twitter API
api = twitter()
public_tweets = api.search(self.keyword)
# Loop over extracted Tweets
for tweet in public_tweets:
# Get API values
user_id = tweet.user.id_str
user_name = tweet.user.name
tweet_id = tweet.id_str
text = tweet.text
time = tweet.created_at
location = tweet.user.location
follower_count = tweet.user.followers_count
friends_count = tweet.user.friends_count
truncated = tweet.truncated
retweet_count = tweet.retweet_count
reply_to_status_id = tweet.in_reply_to_status_id_str
reply_to_user_id = tweet.in_reply_to_user_id_str
try:
# Create Dataframe from entries
df = pd.DataFrame()
df = df.append({'tweet_id': tweet_id, 'user_id': user_id, 'user_name': user_name,
'text': text, 'time': time, 'location': location, 'follower_count': follower_count,
'tweet_keyword': self.keyword, 'friends_count': friends_count,
'truncated': truncated, 'retweet_count': retweet_count,
'reply_to_status_id': reply_to_status_id,
'reply_to_user_id': reply_to_user_id}, ignore_index=True)
# Commit do database
df.to_sql(config.db_table, engine, if_exists='append', index=False)
# Count 'try' as success
success += 1
except:
pass
finally:
# Closes database connection
engine.dispose()
# Loop for request optimization
if success > 11:
self.wait_seconds = self.wait_seconds - 5
elif success < 5:
self.wait_seconds = self.wait_seconds + 5
else:
pass
# Get Timestamp
timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("API run completed for {0:}. Success Rate: {1:.0%}. Time: {2:}"
.format(self.keyword, success / 15, timestamp))
threading.Timer(self.wait_seconds, self.insert_database).start()
class EntryAlreadyExists(Exception):
"""Exception Class if a database entry already exists"""
pass
def twitter():
"""Twitter authentification function"""
# Authentification
consumer_key = config.consumer_key
consumer_secret = config.consumer_secret
# Twitter app access data
access_token = config.access_token
access_token_secret = config.access_token_secret
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
return api
# Function to create objects
def create_object_list():
"""Function to create a list of twitter keyword objects"""
objs = list()
for tweet_keyword in config.TWEET_KEYWORDS:
obj = Keyword(tweet_keyword, config.START_TIMER)
objs.append(obj)
return objs