diff --git a/1_1/1_1_concatenate_into_streamT.py b/1_1/1_1_concatenate_into_streamT.py new file mode 100644 index 0000000..7f6357a --- /dev/null +++ b/1_1/1_1_concatenate_into_streamT.py @@ -0,0 +1,59 @@ +#Kasane Utsumi - 3/14/2015 +#1_1_concatenate_into_streamT.py +#This code dumps all seven collections starting with db_streamT%StartDate% (%StartDate% is a passed command line argument) into db_streamT + +import os +import json +import pymongo +import sys +from bson.json_util import dumps + +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +#clear table +db_streamT.drop() + +stream1 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[1]] +stream2 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[2]] +stream3 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[3]] +stream4 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[4]] +stream5 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[5]] +stream6 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[6]] +stream7 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[7]] + +#get total count for all collection so I can compare with count of db_streamT after filling it up so I know that concatenation was successful. +individualTotal = stream1.find().count() + stream2.find().count() + stream3.find().count() + stream4.find().count() + stream5.find().count() + stream6.find().count() + stream7.find().count() + + +#clear the current content +db_streamT.drop() + +def addThisCollection(collection): + for content in collection.find(): + db_streamT.insert(content) + +addThisCollection(stream1) +addThisCollection(stream2) +addThisCollection(stream3) +addThisCollection(stream4) +addThisCollection(stream5) +addThisCollection(stream6) +addThisCollection(stream7) + +print "individual total is " + str(individualTotal) +print "StreamT length is " + str(db_streamT.find().count()) +print "Number of items match?: " + str(individualTotal == db_streamT.find().count()) + diff --git a/1_1/1_1_concatenate_into_streamT.py~ b/1_1/1_1_concatenate_into_streamT.py~ new file mode 100644 index 0000000..7f6357a --- /dev/null +++ b/1_1/1_1_concatenate_into_streamT.py~ @@ -0,0 +1,59 @@ +#Kasane Utsumi - 3/14/2015 +#1_1_concatenate_into_streamT.py +#This code dumps all seven collections starting with db_streamT%StartDate% (%StartDate% is a passed command line argument) into db_streamT + +import os +import json +import pymongo +import sys +from bson.json_util import dumps + +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +#clear table +db_streamT.drop() + +stream1 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[1]] +stream2 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[2]] +stream3 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[3]] +stream4 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[4]] +stream5 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[5]] +stream6 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[6]] +stream7 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[7]] + +#get total count for all collection so I can compare with count of db_streamT after filling it up so I know that concatenation was successful. +individualTotal = stream1.find().count() + stream2.find().count() + stream3.find().count() + stream4.find().count() + stream5.find().count() + stream6.find().count() + stream7.find().count() + + +#clear the current content +db_streamT.drop() + +def addThisCollection(collection): + for content in collection.find(): + db_streamT.insert(content) + +addThisCollection(stream1) +addThisCollection(stream2) +addThisCollection(stream3) +addThisCollection(stream4) +addThisCollection(stream5) +addThisCollection(stream6) +addThisCollection(stream7) + +print "individual total is " + str(individualTotal) +print "StreamT length is " + str(db_streamT.find().count()) +print "Number of items match?: " + str(individualTotal == db_streamT.find().count()) + diff --git a/1_1/1_1_store_tweets_into_db_streamT.py b/1_1/1_1_store_tweets_into_db_streamT.py new file mode 100644 index 0000000..58a1943 --- /dev/null +++ b/1_1/1_1_store_tweets_into_db_streamT.py @@ -0,0 +1,59 @@ +#Kasane Utsumi - 3/14/2015 +#1_1_store_tweets_into_db_streamT.py +#This file retrieves tweets by search term “"#microsoft OR #mojang" specified by date range and dumps them into db_streamT%StartDate%(replace ‘%StartDate%’ with start date that was passed to the command.) database. + +import tweepy +import urllib +import pymongo +import sys +import json +from bson.json_util import dumps +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + + +#configure tweepy +consumer_key = "" +consumer_secret = "" +access_token = "" +access_token_secret = "" + +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) + +#get string to search for and start and end date from urllib +q = urllib.quote_plus("#microsoft OR #mojang") +start = urllib.quote_plus(sys.argv[1]) +end = urllib.quote_plus(sys.argv[2]) + +#set up mongodb collection +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables - will create separate table for each day and merge all of the tables into db_streamT later. +#I will do this so that if error happens while getting tweets for one day I don't have to run the program +# for entire week again. +database = mongoConnection['twitter_analyzer'] +db_streamT = database['db_streamT' + start] + +#clean table is there are any data +db_streamT.drop() + +# Additional query parameters: +# since: {date} +# until: {date} +# Just add them to the 'q' variable: q+" since: 2014-01-01 until: 2014-01-02" + +try: + for tweet in tweepy.Cursor(api.search,q=q+" since:" + start + " until:" + end).items(): + db_streamT.insert(tweet._json) +except: + print "tweet retrieval failed, exiting" + exit() diff --git a/1_1/1_1_store_tweets_into_db_streamT.py~ b/1_1/1_1_store_tweets_into_db_streamT.py~ new file mode 100644 index 0000000..198558c --- /dev/null +++ b/1_1/1_1_store_tweets_into_db_streamT.py~ @@ -0,0 +1,59 @@ +#Kasane Utsumi - 3/14/2015 +#1_1_store_tweets_into_db_streamT.py +#This file retrieves tweets by search term “"#microsoft OR #mojang" specified by date range and dumps them into db_streamT%StartDate%(replace ‘%StartDate%’ with start date that was passed to the command.) database. + +import tweepy +import urllib +import pymongo +import sys +import json +from bson.json_util import dumps +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + + +#configure tweepy +consumer_key = "10G4NlBUpM9nusmE9nSoeGQnk" +consumer_secret = "KcH2Ykf253L0tTCuzIyqDUPnkEZ7mZhIiHCYiS84LbZNCsQwRu" +access_token = "2988143343-waN3T7DFy7j0Yn95hDdXOMLpdRfHzG66SnOZlHO" +access_token_secret = "TDd8WId2f7Cw8jDLdPcjJRM5lTlMGYiuLjUl1ped21euS" + +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) + +#get string to search for and start and end date from urllib +q = urllib.quote_plus("#microsoft OR #mojang") +start = urllib.quote_plus(sys.argv[1]) +end = urllib.quote_plus(sys.argv[2]) + +#set up mongodb collection +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables - will create separate table for each day and merge all of the tables into db_streamT later. +#I will do this so that if error happens while getting tweets for one day I don't have to run the program +# for entire week again. +database = mongoConnection['twitter_analyzer'] +db_streamT = database['db_streamT' + start] + +#clean table is there are any data +db_streamT.drop() + +# Additional query parameters: +# since: {date} +# until: {date} +# Just add them to the 'q' variable: q+" since: 2014-01-01 until: 2014-01-02" + +try: + for tweet in tweepy.Cursor(api.search,q=q+" since:" + start + " until:" + end).items(): + db_streamT.insert(tweet._json) +except: + print "tweet retrieval failed, exiting" + exit() diff --git a/1_2/1_2_fill_db_tweets.py b/1_2/1_2_fill_db_tweets.py new file mode 100644 index 0000000..9779cd0 --- /dev/null +++ b/1_2/1_2_fill_db_tweets.py @@ -0,0 +1,42 @@ +#Kasane Utsumi - 3/14/2015 +#1_2_fill_db_tweets.py +#This file takes all tweets (with entire tweet information in json format) from db_streamT and stores tweet text ONLY into db_tweets collection. + +import os +import json +import pymongo +from bson.json_util import dumps +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +if db_streamT == None + print "db_streamT not found! exiting..." + exit() + +db_tweets = mongoConnection['twitter_analyzer'].db_tweets + +#clear the current content +db_tweets.drop() + +#extract tweet from tweet json +for tJson in db_streamT.find(): + #print json.loads(dumps(tJson["text"])).encode('utf8') + tweetOnlyEntry = {"text" : json.loads(dumps(tJson["text"]))} + db_tweets.insert(tweetOnlyEntry) + +#check that addition happened fine +print "Does length of db_streamT equal that of db_tweets?" + str(db_streamT.find().count() == db_tweets.find().count()) diff --git a/1_2/1_2_fill_db_tweets.py~ b/1_2/1_2_fill_db_tweets.py~ new file mode 100644 index 0000000..9779cd0 --- /dev/null +++ b/1_2/1_2_fill_db_tweets.py~ @@ -0,0 +1,42 @@ +#Kasane Utsumi - 3/14/2015 +#1_2_fill_db_tweets.py +#This file takes all tweets (with entire tweet information in json format) from db_streamT and stores tweet text ONLY into db_tweets collection. + +import os +import json +import pymongo +from bson.json_util import dumps +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +if db_streamT == None + print "db_streamT not found! exiting..." + exit() + +db_tweets = mongoConnection['twitter_analyzer'].db_tweets + +#clear the current content +db_tweets.drop() + +#extract tweet from tweet json +for tJson in db_streamT.find(): + #print json.loads(dumps(tJson["text"])).encode('utf8') + tweetOnlyEntry = {"text" : json.loads(dumps(tJson["text"]))} + db_tweets.insert(tweetOnlyEntry) + +#check that addition happened fine +print "Does length of db_streamT equal that of db_tweets?" + str(db_streamT.find().count() == db_tweets.find().count()) diff --git a/2_1/.hello.txt.swp b/2_1/.hello.txt.swp new file mode 100644 index 0000000..c95007c Binary files /dev/null and b/2_1/.hello.txt.swp differ diff --git a/2_1/2_1_get_top_30_retweets.py b/2_1/2_1_get_top_30_retweets.py new file mode 100644 index 0000000..551a98b --- /dev/null +++ b/2_1/2_1_get_top_30_retweets.py @@ -0,0 +1,104 @@ +#Kasane Utsumi - 3/14/2015 +#2_1_get_top_retweets.py +#This retrieves top 30 tweets using information in db_tweets and db_streamT and them dump top 30 retweeted users into db_top30RetweetedUsers. +import os +import json +import pymongo +from bson.json_util import dumps +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_tweets = mongoConnection['twitter_analyzer'].db_tweets +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +db_retweets = mongoConnection['twitter_analyzer'].db_all_retweets +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users + +db_retweets.drop() +db_top30RetweetedUsers.drop() + +#create a dictionary of retweeted id is key, and number of occurrence as a value +retweetDict = dict() + +#also, dump all of the tweets into db_all_retweets so it would be easy to get user and location for top 30 later. + +for tJson in db_tweets.find(timeout=False): + + tweetText = tJson['text'] + + #print "tweet is " + tweetText.encode('utf8') + + if tweetText.startswith("RT"): # thi s could be retweet. Look in db_streamT to see if corresponding tweet has a retweeted_status + + #print "starts with RT" + + fullTweets = db_streamT.find({'text':tweetText}) + + for correspondingTweet in fullTweets: #iterate just in case if there is > 1 tweet with identical name + + if 'retweeted_status' in correspondingTweet: #Now we are sure that this is a retweet because of existence of retweeted_status + + retweet = correspondingTweet['retweeted_status'] + id = retweet['id'] + + #print "original tweet id" + str(id) + + retweetDBEntry = {"id" : id, "retweetJson" : retweet} + + db_retweets.insert(retweetDBEntry) + if id in retweetDict: + retweetDict[id] += 1 + else: retweetDict[id] = 1 + + +#check the dictionary to make sure it has what I want +#for key in retweetDict: +# if (retweetDict[key] >1): +# print str(key) + " " + str(retweetDict[key]) + +#convert retweetDict into tuples so I can sort by number of frequencies, then sort by frequncy +retweetTuple = sorted(tuple(retweetDict.iteritems()),key=lambda x: (-x[1],x[0])) + +#check the tuple to see if it has what I want +#for (id,frequency) in retweetTuple: +# if frequency > 1: +# print str(id) + " " +str(frequency) + +#exit() + +#print out the top tweeted user , also store them in top30_users collection so they can be retreived for other analysis +tupleIndex = 0 +for (id,frequency) in retweetTuple: + retweet = db_retweets.find_one({"id":id}) + + if (retweet == None): + print "Something went wrong, could not find retweet with id" + str(id) + else: + retweetJson = json.loads(dumps(retweet["retweetJson"])) + topTweetedUser = retweetJson['user'] + + userDBEntry = {"id": topTweetedUser['id'], "userInfo" : topTweetedUser, "frequency": frequency} + db_top30RetweetedUsers.insert(userDBEntry) + + #print out retweet, user name and location + print "Top Retweet Rank " + str(tupleIndex+1) + print "Tweet: " + retweetJson["text"].encode('utf8') + print "User: " + topTweetedUser["name"].encode('utf8') + "(" + str(topTweetedUser["id"]) + ")" + " at " + topTweetedUser['location'].encode('utf8') + " - has follower count" + str(topTweetedUser['followers_count']) + print " " + + #get only top 30 + tupleIndex = tupleIndex + 1 + if tupleIndex == 30: + exit() diff --git a/2_1/2_1_get_top_30_retweets.py~ b/2_1/2_1_get_top_30_retweets.py~ new file mode 100644 index 0000000..d966a70 --- /dev/null +++ b/2_1/2_1_get_top_30_retweets.py~ @@ -0,0 +1,104 @@ +#Kasane Utsumi - 3/14/2015 +#2_1_get_top_retweets.py +#This code dumps all seven collections starting with db_streamT%StartDate% (%StartDate% is a passed command line argument) into db_streamT +import os +import json +import pymongo +from bson.json_util import dumps +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_tweets = mongoConnection['twitter_analyzer'].db_tweets +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +db_retweets = mongoConnection['twitter_analyzer'].db_all_retweets +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users + +db_retweets.drop() +db_top30RetweetedUsers.drop() + +#create a dictionary of retweeted id is key, and number of occurrence as a value +retweetDict = dict() + +#also, dump all of the tweets into db_all_retweets so it would be easy to get user and location for top 30 later. + +for tJson in db_tweets.find(timeout=False): + + tweetText = tJson['text'] + + #print "tweet is " + tweetText.encode('utf8') + + if tweetText.startswith("RT"): # thi s could be retweet. Look in db_streamT to see if corresponding tweet has a retweeted_status + + #print "starts with RT" + + fullTweets = db_streamT.find({'text':tweetText}) + + for correspondingTweet in fullTweets: #iterate just in case if there is > 1 tweet with identical name + + if 'retweeted_status' in correspondingTweet: #Now we are sure that this is a retweet because of existence of retweeted_status + + retweet = correspondingTweet['retweeted_status'] + id = retweet['id'] + + #print "original tweet id" + str(id) + + retweetDBEntry = {"id" : id, "retweetJson" : retweet} + + db_retweets.insert(retweetDBEntry) + if id in retweetDict: + retweetDict[id] += 1 + else: retweetDict[id] = 1 + + +#check the dictionary to make sure it has what I want +#for key in retweetDict: +# if (retweetDict[key] >1): +# print str(key) + " " + str(retweetDict[key]) + +#convert retweetDict into tuples so I can sort by number of frequencies, then sort by frequncy +retweetTuple = sorted(tuple(retweetDict.iteritems()),key=lambda x: (-x[1],x[0])) + +#check the tuple to see if it has what I want +#for (id,frequency) in retweetTuple: +# if frequency > 1: +# print str(id) + " " +str(frequency) + +#exit() + +#print out the top tweeted user , also store them in top30_users collection so they can be retreived for other analysis +tupleIndex = 0 +for (id,frequency) in retweetTuple: + retweet = db_retweets.find_one({"id":id}) + + if (retweet == None): + print "Something went wrong, could not find retweet with id" + str(id) + else: + retweetJson = json.loads(dumps(retweet["retweetJson"])) + topTweetedUser = retweetJson['user'] + + userDBEntry = {"id": topTweetedUser['id'], "userInfo" : topTweetedUser, "frequency": frequency} + db_top30RetweetedUsers.insert(userDBEntry) + + #print out retweet, user name and location + print "Top Retweet Rank " + str(tupleIndex+1) + print "Tweet: " + retweetJson["text"].encode('utf8') + print "User: " + topTweetedUser["name"].encode('utf8') + "(" + str(topTweetedUser["id"]) + ")" + " at " + topTweetedUser['location'].encode('utf8') + " - has follower count" + str(topTweetedUser['followers_count']) + print " " + + #get only top 30 + tupleIndex = tupleIndex + 1 + if tupleIndex == 30: + exit() diff --git a/2_1/final_top30_retweets.txt b/2_1/final_top30_retweets.txt new file mode 100644 index 0000000..7aea4e8 --- /dev/null +++ b/2_1/final_top30_retweets.txt @@ -0,0 +1,120 @@ +Top Retweet Rank 1 +Tweet: @iamhardwellfan @FakeContestAlrt @LumiaIndia sponsor of #fakecontest @NargisFakhri #romanceurcity #microsoft #fake no winner announcement +User: Sanjay(2206092889) at - has follower count2758 + +Top Retweet Rank 2 +Tweet: DRINGEND VERZOEK aan Microsoft: wilt u mij niet meer tien keer per dag thuis laten bellen vanuit India voor marketingdoeleinden? #Microsoft +User: Ton Elias(133272665) at Netherlands - has follower count16568 + +Top Retweet Rank 3 +Tweet: All versions of #Microsoft Windows are vulnerable to SSL/TLS FREAK #vulnerability — http://t.co/8TkisXHjWt http://t.co/ZFkCySwbne +User: The Hacker News(209811713) at THE INTERNET - has follower count188559 + +Top Retweet Rank 4 +Tweet: Epic Organizational Charts #Microsoft #Apple #Oracle http://t.co/biNu2Q109a +User: Oliver Hansen(759826807) at Geel, Belgium - has follower count24176 + +Top Retweet Rank 5 +Tweet: #Microsoft's new #Lumia #phones come with a free year of #Office 365 http://t.co/Uw3W1Hg1jD #tech #devbattles http://t.co/isAE07v5iY +User: Dev Battles(2377678050) at - has follower count7086 + +Top Retweet Rank 6 +Tweet: Le cofondateur de #Microsoft a découvert l'épave d'un cuirassé géant japonais, coulé en 1944 http://t.co/O72Wn8Kowh http://t.co/1cxtqoOYHT +User: Le Monde(24744541) at Paris - has follower count3579368 + +Top Retweet Rank 7 +Tweet: VIDEO:Cofundador de #Microsoft halla en el fondo del mar al acorazado más letal de la historia http://t.co/uk1YMdgcYp http://t.co/gkEFhgtN9A +User: RT en Español(100731315) at - has follower count540132 + +Top Retweet Rank 8 +Tweet: VIDEO:Cofundador de #Microsoft halla en el fondo del mar al acorazado más letal de la historia http://t.co/uk1YMdgcYp http://t.co/gkEFhgtN9A +User: RT en Español(100731315) at - has follower count540130 + +Top Retweet Rank 9 +Tweet: New #free ebook! #Microsoft #Azure Essentials: Azure Automation http://t.co/3V6buNJiBr #MSDev #ITPro http://t.co/G81Zhed3j9 +User: Microsoft Press(19601111) at Redmond, WA - has follower count158331 + +Top Retweet Rank 10 +Tweet: VIDEO:Cofundador de #Microsoft halla en el fondo del mar al acorazado más letal de la historia http://t.co/uk1YMdgcYp http://t.co/gkEFhgtN9A +User: RT en Español(100731315) at - has follower count540126 + +Top Retweet Rank 11 +Tweet: #Microsoft #Yammer støtter nå #Handoff for #Mac #Yosemite #iphone #ipad >>> Yammer Now Supports Handoff - YouTube https://t.co/oCQ1h1j8qX +User: Arno Vaa(3049567425) at Oslo, Norway - has follower count11 + +Top Retweet Rank 12 +Tweet: #LoMásLeídoDeLaSemana Cofundador #Microsoft halla el acorazado más letal de la historia VIDEO http://t.co/uk1YMdgcYp http://t.co/K1eOMSogGf +User: RT en Español(100731315) at - has follower count540158 + +Top Retweet Rank 13 +Tweet: Ranking:Top 20 #InternetofThings companies right now #Intel #1 http://t.co/Y6vnYct1R3 #IoT #Microsoft #Cisco #Google http://t.co/Ip1vZ0wetK +User: IoT Analytics(2809878539) at Berlin, Germany - has follower count169 + +Top Retweet Rank 14 +Tweet: #LoMásLeídoDeLaSemana Cofundador #Microsoft halla el acorazado más letal de la historia VIDEO http://t.co/uk1YMdgcYp http://t.co/K1eOMSogGf +User: RT en Español(100731315) at - has follower count540159 + +Top Retweet Rank 15 +Tweet: El cofundador de #Microsoft halla en aguas filipinas un acorazado japonés hundido http://t.co/WK3GD7VphS http://t.co/VfIPuJjw5y +User: ABC.es(19923515) at Madrid - has follower count716491 + +Top Retweet Rank 16 +Tweet: What is The Cloud & Why Move? > http://t.co/PHPkWLShkK < http://t.co/HQvA1lqCSF #thecloud #cloudcomputing #Azure #Microsoft #videomarketing +User: Queue Associates(160549126) at NYC - London - Hong Kong - has follower count278 + +Top Retweet Rank 17 +Tweet: Logging in with Google, #Microsoft and Facebook SDKs to #Azure Mobile Services: http://t.co/mcmPFrU36B http://t.co/ZLKZWr0kBj +User: Microsoft Azure(17000457) at Redmond, WA - has follower count303879 + +Top Retweet Rank 18 +Tweet: New #free ebook! #Microsoft #SystemCenter Software Update Management Field Experience http://t.co/mrpkp5Sclr #ITPro http://t.co/0ScgyZt0qF +User: Microsoft Press(19601111) at Redmond, WA - has follower count158327 + +Top Retweet Rank 19 +Tweet: #Microsoft #XboxOne DirectX 12 boosts the Xbox One GPU by 20% http://t.co/TWMyDJer7K http://t.co/h6rBwTtwYH +User: The Inner Circle(2576487215) at Xbox Live Gamertag TiC Podcast - has follower count3491 + +Top Retweet Rank 20 +Tweet: 戦艦「武蔵」を発見 マイクロソフト創業者がツイッターで発表(産経新聞) http://t.co/shaYyO6szO #microsoft +User: Microsoft なう(152585619) at 日本国東京都港区港南 - has follower count150067 + +Top Retweet Rank 21 +Tweet: "Very soon you will see #Collector working on the #Windows platform" #EsriEPC - @msretail #Microsoft #Esri #mobile +User: Esri Business Team(132987218) at Redlands, CA - has follower count932 + +Top Retweet Rank 22 +Tweet: Update: #Microsoft warns #PCs are also vulnerable to '#Freak' #security flaw: http://t.co/JSJYjwwqER #Tech #CyberSecurity +User: Debnicolina(83316868) at Sicilia,Italia|Global|✈&Hotel - has follower count27263 + +Top Retweet Rank 23 +Tweet: 8で採用されたチャームは不評だったので、廃止する方向!? Windows 10、状態の確認や各種設定が行える「アクションセンター」 http://t.co/GO3qfdPlwU #Windows10 #Microsoft http://t.co/riAZJSrH6C +User: ASCII.jp編集部(320548088) at 東京都千代田区の飯田橋 - has follower count27697 + +Top Retweet Rank 24 +Tweet: Movistar TV Go integrará #Cortana, el asistente personal de voz de #Microsoft @MicrosoftES @LumiaES #MWC15 http://t.co/WBeXJ56W2Q +User: Solange Cummins(291140092) at - has follower count1026 + +Top Retweet Rank 25 +Tweet: #Microsoft #XboxOne Microsoft working on a hardcore game controller for Xbox One http://t.co/iULMpm6TUZ http://t.co/cBl4rwyKC5 +User: The Inner Circle(2576487215) at Xbox Live Gamertag TiC Podcast - has follower count3492 + +Top Retweet Rank 26 +Tweet: #Microsoft lays off 18k workers http://t.co/FUqxsZzm1l but files for 4k #H1B foreign workers http://t.co/30oSipwIYn #immigration +User: Susan Pai(15497093) at Jacksonville, FL - has follower count2575 + +Top Retweet Rank 27 +Tweet: First look: #Microsoft's all-new Office 2016 for #Mac public preview http://t.co/GAWV8OX1mB http://t.co/fRU7mLlD5U +User: AppleInsider(20542450) at Cupertino, California - has follower count203540 + +Top Retweet Rank 28 +Tweet: 戦艦「武蔵」発見、8年にわたる探索実る ポール・アレン氏、乗組員悼み日本政府と協力の意向 http://t.co/Eie1itVIAu #microsoft +User: Microsoft なう(152585619) at 日本国東京都港区港南 - has follower count150066 + +Top Retweet Rank 29 +Tweet: Microsoft’s mind-blowing vision of the future. See more here: http://t.co/yIjqVsiFMg via @GeekWire #Microsoft #Futuretech +User: Why Microsoft(159223609) at Redmond, WA - has follower count146928 + +Top Retweet Rank 30 +Tweet: Microsoft has created 3 billionaires and about 12,000 millionaires.#Microsoft #billionair +User: Strong facts(2837996969) at US - has follower count7759 + diff --git a/2_1/old_please_ignore/test.py b/2_1/old_please_ignore/test.py new file mode 100644 index 0000000..4e323e3 --- /dev/null +++ b/2_1/old_please_ignore/test.py @@ -0,0 +1,91 @@ +#Kasane Utsumi +import os +import json +import pymongo +from bson.json_util import dumps +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_tweets = mongoConnection['twitter_analyzer'].db_tweets +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +#create a dictionary of retweeted id is key, and number of occurrence as a value +retweetDict = dict() + +#also, dump all of the tweets into db_all_retweets so it would be easy to get user and location for top 30 later. + +for tJson in db_tweets.find(): + + tweetText = tJson['text'] + + #print "tweet is " + tweetText.encode('utf8') + + if tweetText.startswith("RT"): # thi s could be retweet. Look in db_streamT to see if corresponding tweet has a retweeted_status + + #print "starts with RT" + + fullTweets = db_streamT.find({'text':tweetText}) + + for correspondingTweet in fullTweets: #iterate just in case if there is > 1 tweet with identical name + + if 'retweeted_status' in correspondingTweet: #Now we are sure that this is a retweet because of existence of retweeted_status + + retweet = correspondingTweet['retweeted_status'] + id = retweet['id'] + + #print "original tweet id" + str(id) + + if id in retweetDict: + retweetDict[id] += 1 + else: retweetDict[id] = 1 + + +#check the dictionary to make sure it has what I want +#for key in retweetDict: +# if (retweetDict[key] >1): +# print str(key) + " " + str(retweetDict[key]) + +#convert retweetDict into tuples so I can sort by number of frequencies, then sort by frequncy +retweetTuple = sorted(tuple(retweetDict.iteritems()),key=lambda x: (-x[1],x[0])) + +#check the tuple to see if it has what I want +#for (id,frequency) in retweetTuple: +# if frequency > 1: +# print str(id) + " " +str(frequency) + +exit() + +#print out the top tweeted user , also store them in top30_users collection so they can be retreived for other analysis +tupleIndex = 0 +for (id,frequency) in retweetTuple: + retweet = db_retweets.find_one({"id":id}) + + if (retweet == None): + print "Something went wrong, could not find retweet with id" + str(id) + else: + retweetJson = json.loads(dumps(retweet["retweetJson"])) + topTweetedUser = retweetJson['user'] + + + #print out retweet, user name and location + print "Top Retweet Rank " + str(tupleIndex+1) + print "Tweet: " + retweetJson["text"].encode('utf8') + print "User: " + topTweetedUser["name"].encode('utf8') + " " + topTweetedUser["id"] + " at " + topTweetedUser['location'].encode('utf8') + print " " + + #get only top 30 + tupleIndex = tupleIndex + 1 + if tupleIndex == 30: + exit() diff --git a/2_1/old_please_ignore/top30WithFollowers.txt b/2_1/old_please_ignore/top30WithFollowers.txt new file mode 100644 index 0000000..74f3f28 --- /dev/null +++ b/2_1/old_please_ignore/top30WithFollowers.txt @@ -0,0 +1,29 @@ +93268224 11 +209811713 183045 +102957958 19220 +35120520 453763 +52660746 226488 +101166742 19243 +71907486 1630 +238099363 44374 +67497512 87 +2837996969 4060 +546679647 11281 +95438524 196661 +445076288 28 +56192577 8170 +16452299 124199 +19795404 125705 +165933992 21112 +205083867 7581 +8071902 512850 +2532942559 14452 +227337569 209 +19923515 696239 +288708452 377 +556085485 62349 +18906097 35217 +1489018610 402 +15901817 53827 +1413081085 1410 +254439807 1620 diff --git a/2_1/old_please_ignore/topTweet.txt b/2_1/old_please_ignore/topTweet.txt new file mode 100644 index 0000000..cdc1c61 --- /dev/null +++ b/2_1/old_please_ignore/topTweet.txt @@ -0,0 +1,1309 @@ +574204363922501633 33489 +574879846305767424 23409 +574046926896807936 14161 +574232065299742720 13225 +572673419587796992 5625 +573408211119706113 5184 +573178618525827072 4360 +573455471442448384 3815 +573894561682046977 3721 +573586054524989441 3706 +573381604246200320 2304 +574325994007326721 2074 +570287733198999552 1681 +574146750119735296 1647 +573607752335036417 1156 +573854425103532032 1089 +572905544266203136 784 +572872831538577408 676 +573218330464681985 529 +572614168618016771 484 +574638236615602176 441 +574652092792635393 441 +574771778054025216 400 +572707984351997952 361 +573313847978803200 361 +573531630507327488 361 +573586299153727488 361 +573095845337300992 324 +573220116726489091 324 +525006205476151296 289 +572690304203026432 289 +573824764218712064 289 +573637160844177408 256 +573883310830673920 256 +573203760316612608 225 +574662509531004928 225 +574970401740226561 225 +573045228178784256 196 +573287091553107968 196 +573354931995975680 196 +573966318908534785 196 +574202751489441793 196 +572747440467664896 169 +572882737142489088 169 +573969141935161344 169 +573630889223045120 148 +526842346634493953 144 +572755851376783361 144 +573226473621987328 144 +573317007485087744 144 +573949132626259968 144 +574521215693553664 144 +572593532411637762 121 +572675713754656768 121 +572761231720894467 121 +572800621306245121 121 +573244062309978112 121 +573311753523621888 121 +573375589987430400 121 +573541108514439168 121 +573612868983209985 121 +574343771032322048 121 +574951588323856384 121 +573475423646019584 120 +573646408026316800 105 +572866008387481603 100 +573203297395482624 100 +573226411261100032 100 +573312023188008960 100 +573660672980094976 100 +573780830901895168 100 +573814929674932225 100 +574045627241402368 100 +574460228130877440 100 +575027177399402499 100 +574621365904371713 99 +572551826152787968 81 +572637515640930304 81 +572787570729480192 81 +572820260107231232 81 +573006955951857664 81 +573082842772738049 81 +573187141909422080 81 +573471391334400000 81 +573578822009880576 81 +573589386601840640 81 +573612782404300800 81 +573839272396062721 81 +573940400341147648 81 +572729626708951040 64 +572744484120858624 64 +572855729431814145 64 +572993147329896448 64 +573181528475222016 64 +573192671180677120 64 +573201237471985665 64 +573219741034266625 64 +573222809561784321 64 +573303943305945088 64 +573374380513742848 64 +573489248520814592 64 +573495062010687488 64 +573552590606073856 64 +573772149246312448 64 +574131575241207809 64 +574185719243083777 64 +574812382121218049 64 +574998676528984065 64 +575053312241762304 64 +574001715638239232 54 +572405645007314944 50 +572487791248064512 49 +572536496730710016 49 +572638116508512256 49 +572698028425019392 49 +572705383703322624 49 +572721095402364928 49 +572826322172567555 49 +572871571825565696 49 +572948769068564480 49 +573116954275291137 49 +573136757945864192 49 +573179547761295360 49 +573463561072164864 49 +573470643246776320 49 +573472094509404160 49 +573483185482289153 49 +573561010474381312 49 +573687169941925888 49 +573929101179801600 49 +574004375422525440 49 +574049420972527616 49 +574274568682016768 49 +574438664320741376 49 +575038087723614208 49 +572652708739461121 42 +573502923377782784 42 +572466401849556994 36 +572508091331162113 36 +572553237238312960 36 +572592306982789120 36 +572660237481709568 36 +572745825945837568 36 +572765470014763009 36 +572777125700362243 36 +572779621248344064 36 +572794502496313345 36 +572848263520514048 36 +572868979909255169 36 +572869220989452288 36 +572878225707163649 36 +572879761476755457 36 +572886743361314816 36 +572923612690325505 36 +573057204548472832 36 +573117113629577216 36 +573211819625455616 36 +573408356511178752 36 +573424396293033984 36 +573445256366030848 36 +573456784662360065 36 +573494032220844032 36 +573503583921967104 36 +573513624041426945 36 +573524495081713664 36 +573537961997918208 36 +573544065825959936 36 +573550373325049856 36 +573608972923506688 36 +573679760674439169 36 +573761925852631040 36 +573768613066244096 36 +573781613647118336 36 +573786242669756416 36 +573956816608043010 36 +574137697117339648 36 +574198696717500416 36 +574245894477979648 36 +574269502377959424 36 +574358869385830401 36 +574594416460963840 36 +574721625683197952 36 +574847244618366976 36 +574854403611561985 36 +574857677723074560 36 +574948273162575872 36 +574979420915527683 36 +572811986775478273 35 +574506435255607296 35 +573054570794000384 30 +573018799839961089 28 +573589870540627969 27 +574273604411617283 27 +565323160477007874 25 +572252188136759296 25 +572400939329568768 25 +572456624977145856 25 +572463674486480896 25 +572668867006341120 25 +572670909183475712 25 +572679156946898944 25 +572688755611590656 25 +572708116338368512 25 +572721166663622656 25 +572768014740951041 25 +572774819030290432 25 +572785058920189953 25 +572798557125799937 25 +572807765221117952 25 +572880163723079680 25 +572880419881807874 25 +572886338615283712 25 +572886862597111808 25 +572907205738930176 25 +572919637429657600 25 +572976799916818432 25 +573037467114315776 25 +573060434313515008 25 +573102278921625601 25 +573104788143022081 25 +573108846199214080 25 +573113294837444609 25 +573118590846681089 25 +573121662570373120 25 +573137926831390720 25 +573144808258060289 25 +573180276223950848 25 +573210415095676928 25 +573211752118165504 25 +573220291360518144 25 +573235475286249472 25 +573240631998476288 25 +573310714191982592 25 +573397596326252544 25 +573562126343794688 25 +573572270721007616 25 +573640841794875393 25 +573668951516807168 25 +573699470447108096 25 +573782657147994112 25 +573791146964553728 25 +573881389537329152 25 +573897336960729088 25 +573938872717086721 25 +574005896843427840 25 +574046052053090304 25 +574277837269639168 25 +574294085860597761 25 +574554067583827969 25 +574572632030511104 25 +574601785333018625 25 +574610592356401154 25 +574663658132799488 25 +573711113373642752 24 +574192871596666882 24 +573774400983449600 22 +572715581775863808 21 +572819242204061696 20 +572868970509811714 20 +573421631139418112 20 +573756758839181312 20 +574974438170566658 20 +573182195264856064 18 +574817631598874624 18 +572051540573327360 16 +572428150065528832 16 +572448171294638080 16 +572449800509104128 16 +572483513674567682 16 +572523305187614721 16 +572534440112496640 16 +572553049991823360 16 +572615200609681408 16 +572651358580568065 16 +572652764981043200 16 +572660217218899968 16 +572667233521950720 16 +572668822446071809 16 +572668972690247680 16 +572669736477167616 16 +572675279224737792 16 +572682313999503360 16 +572697786162016256 16 +572701124626677760 16 +572703691712667648 16 +572704793023537152 16 +572705745080336384 16 +572773937010118657 16 +572778594952482816 16 +572792409857241088 16 +572798699555844096 16 +572805453945552898 16 +572807397003010049 16 +572818723058388992 16 +572820293250465792 16 +572821296381181952 16 +572822096683929601 16 +572826374211158016 16 +572846141282394114 16 +572855132913704961 16 +572873814473764864 16 +572879373381988353 16 +572880215115841537 16 +572926373624475648 16 +573008178461458433 16 +573019177755271168 16 +573019179130998784 16 +573058129241513984 16 +573058839291039744 16 +573059096171180034 16 +573073433623339008 16 +573083111766007808 16 +573093573547167744 16 +573133448698720256 16 +573149952911650817 16 +573192633180278784 16 +573207699082641409 16 +573228062545854464 16 +573235072234479616 16 +573235216191524864 16 +573236473102794752 16 +573240888022867968 16 +573271688814579712 16 +573275940689608704 16 +573291116440629249 16 +573305117983653888 16 +573328548246638592 16 +573343963412709376 16 +573361944788275200 16 +573381186896076800 16 +573381493088657408 16 +573387744652746752 16 +573388841731686400 16 +573438883406450688 16 +573472760342650880 16 +573477571855261696 16 +573505559418245120 16 +573543687294058497 16 +573567563902382080 16 +573569786149531648 16 +573615875128889344 16 +573623398967943168 16 +573626693765607425 16 +573655984465256448 16 +573679393643589632 16 +573741280590614528 16 +573763984857395200 16 +573810352720252928 16 +573816199789809664 16 +573817910822633472 16 +573848325243342849 16 +573849212993036289 16 +573854644444729344 16 +573861264209793025 16 +573941181064744960 16 +573942181326532608 16 +573964753514590208 16 +573985097604579329 16 +574034736055783424 16 +574230523675086848 16 +574329685158555649 16 +574355957850882048 16 +574624739202830336 16 +574641856220426240 16 +574770588201263104 16 +574845848359714816 16 +574894707127418880 16 +574895401666486272 16 +574958987545808896 16 +574991008531812352 16 +575029074373971971 16 +575033590829858816 16 +575063967028088832 16 +572826373363904513 15 +572707085110157312 14 +573638073679900672 12 +573853059958185988 12 +574264714517159936 12 +574744159434768384 12 +575006199201976320 10 +546999542038921216 9 +567023036897845249 9 +567768409862332416 9 +567992726672838656 9 +568092997147090945 9 +570871349213855745 9 +572139849299460096 9 +572375190589603841 9 +572479747898265600 9 +572532289978175488 9 +572557474051596288 9 +572593140713988097 9 +572611370656481280 9 +572624194338365440 9 +572624683343863808 9 +572625594476707840 9 +572626261450891264 9 +572633779023564801 9 +572639052878512128 9 +572657682294824960 9 +572658150509293568 9 +572679014160199680 9 +572716356128411648 9 +572719400245018624 9 +572742944387674112 9 +572745218417668096 9 +572748112143360001 9 +572753297154523136 9 +572758143026917377 9 +572765099934523392 9 +572769310269034497 9 +572777261490946051 9 +572788914001453056 9 +572805737736347649 9 +572818517495455745 9 +572822038840143872 9 +572822451656232960 9 +572823795041361920 9 +572824530772615168 9 +572825522712932352 9 +572831570018615297 9 +572834146311135234 9 +572839139290320897 9 +572848449453883392 9 +572879132389863425 9 +572880765219823616 9 +572890809042792448 9 +572911677798146048 9 +572948366788710400 9 +572961897491779584 9 +573013339896487936 9 +573041022248210432 9 +573049124049850368 9 +573068647813517313 9 +573073186474106881 9 +573095589447114752 9 +573109933186682880 9 +573118433828732928 9 +573122555009220609 9 +573131899704635392 9 +573150911574507520 9 +573151050116419585 9 +573167756448686080 9 +573169073925722113 9 +573191863630368768 9 +573193696776228865 9 +573201582851952640 9 +573202643490766849 9 +573210409022316544 9 +573214732527673344 9 +573219679159762945 9 +573228080929456128 9 +573235524808392704 9 +573236030641315840 9 +573257546523267073 9 +573292334583967744 9 +573302262388297728 9 +573310090385735680 9 +573337697256751104 9 +573356483045302272 9 +573366361826586624 9 +573387430268674048 9 +573398950348259329 9 +573411576277045248 9 +573412445131300865 9 +573418158176010240 9 +573424796895211521 9 +573431717979144192 9 +573438126087143425 9 +573476809347588096 9 +573479613176877056 9 +573482977872723969 9 +573490783145324544 9 +573500509463642113 9 +573506684821356545 9 +573515856761614336 9 +573521385403744257 9 +573525723195031552 9 +573526319721480192 9 +573527985489379328 9 +573530667931295745 9 +573533328822796288 9 +573537552881205249 9 +573553693318070272 9 +573558234071486464 9 +573564183154831361 9 +573565469673717761 9 +573568712269111296 9 +573572151988649984 9 +573573534246567939 9 +573595764565413888 9 +573603630479101953 9 +573606205400088576 9 +573616286653005824 9 +573623837444694016 9 +573631579580465154 9 +573644356487270400 9 +573665029788209152 9 +573709960644788224 9 +573710255194042369 9 +573744872139571201 9 +573765243467362304 9 +573813639880306688 9 +573825304692662273 9 +573829342687592448 9 +573829895782031360 9 +573835565247938560 9 +573836965864411136 9 +573845537440444417 9 +573850837807669248 9 +573853689263292417 9 +573855779154829312 9 +573865316029087744 9 +573878628284567552 9 +573889278167613440 9 +573897518775472128 9 +573905439324303360 9 +573920364050411521 9 +573933326991499264 9 +573937286062211072 9 +573991501811740672 9 +573994543407841282 9 +574045630588411904 9 +574076638641573888 9 +574144451896983552 9 +574163645346807808 9 +574223913565093889 9 +574236312464568320 9 +574255703998787588 9 +574277516396908544 9 +574278104044191744 9 +574284663352606722 9 +574300239315746817 9 +574300295833894914 9 +574327808899612673 9 +574375358058778624 9 +574488272887382016 9 +574490629511610369 9 +574498958766129154 9 +574503585079238656 9 +574504421859655680 9 +574504668899975169 9 +574512133645471744 9 +574545320148885505 9 +574548968224464896 9 +574549786268991488 9 +574569610852311041 9 +574615323896213504 9 +574632531053703168 9 +574655219000201216 9 +574683276373323776 9 +574736686288146432 9 +574759023951532033 9 +574762491785383936 9 +574770368650473473 9 +574780259708633088 9 +574840414458507264 9 +574842022730690560 9 +574866812296392704 9 +574873626874396672 9 +574884617943035904 9 +574887353350811648 9 +574895314047401984 9 +574899587632119808 9 +574899885893357568 9 +574907270657609728 9 +574936629715980288 9 +574945280379916288 9 +574948659986624512 9 +574957349653245953 9 +574963371512045568 9 +574974263955824641 9 +574992381298151425 9 +574992383135215616 9 +574992385089781760 9 +575003393862725633 9 +575006020763656192 9 +575016101232291840 9 +575016794546556929 9 +575029131802361856 9 +573074032016404480 8 +573149904765243392 8 +573419836262490115 8 +573428222957912065 8 +573469363006738432 8 +572421167430037504 7 +573072040577642496 7 +574218574216081409 7 +574868802783035393 7 +572718507890102272 6 +573468893571719169 6 +573525264296185856 6 +573612529575796737 6 +573868158869622784 6 +573974547143262208 6 +574034966285303808 6 +574276236614090752 6 +574715725807398912 6 +574889551841394689 6 +575022187448569856 6 +572713537505787904 5 +573074559836020736 5 +573236026077921280 5 +573525365118877697 5 +573797113475260416 5 +574163157557579777 5 +574472130164879360 5 +574873039009148928 5 +575006158076833792 5 +494969568847085571 4 +495232630313713665 4 +498817438327533568 4 +533849413668831232 4 +545062666541498368 4 +545618153985028096 4 +546415979375648768 4 +546443639887757312 4 +546456727462608896 4 +562010833228546048 4 +562893398882140160 4 +565257025786249216 4 +569520536012914688 4 +570368800706547713 4 +570405250353340416 4 +570588766601125889 4 +570691917140389891 4 +570844917053853696 4 +571185281354698752 4 +571412502648623104 4 +572032929372741632 4 +572081000827629568 4 +572231547387965441 4 +572262446607429632 4 +572303234926297090 4 +572306195580645376 4 +572310663281938434 4 +572311189042163712 4 +572316911188484096 4 +572317681464020992 4 +572335034641879040 4 +572375882129661952 4 +572407599016087552 4 +572411038555107329 4 +572420363788816384 4 +572433738564558848 4 +572436463784222720 4 +572441295186108416 4 +572449287206014976 4 +572460295597240320 4 +572470573491593216 4 +572479370247462912 4 +572516812421640193 4 +572532280733908992 4 +572547753244430336 4 +572562061902913538 4 +572580961851969536 4 +572591342032973825 4 +572596772771528704 4 +572599608418959361 4 +572599953798766592 4 +572604834605449216 4 +572605083810062336 4 +572619504284516352 4 +572630569420365824 4 +572632126102773760 4 +572638887387906049 4 +572657636161802241 4 +572661460611952640 4 +572666514316378114 4 +572668655382765568 4 +572668911646322688 4 +572668997491154944 4 +572669281126629376 4 +572669490384773120 4 +572675240557342721 4 +572681132778651648 4 +572681757809614848 4 +572682298660798464 4 +572684995258683392 4 +572685443243773955 4 +572686813426413568 4 +572686814949064704 4 +572688232766443520 4 +572690974754926593 4 +572692138284208128 4 +572697092038131712 4 +572697396729143296 4 +572699037549387777 4 +572701804858908673 4 +572704427682893824 4 +572713215509045248 4 +572716260661854208 4 +572717082132930560 4 +572720675829497857 4 +572721129103609856 4 +572722806103904256 4 +572722911498379266 4 +572723848455581696 4 +572731131503779840 4 +572736503715012609 4 +572739217832480769 4 +572742137437167616 4 +572748059341283330 4 +572752594139021312 4 +572760736004505600 4 +572762938089574400 4 +572767201603817472 4 +572770296819781632 4 +572773137336684544 4 +572779868418674688 4 +572781293991292929 4 +572785051282374657 4 +572788562820653058 4 +572795968745283584 4 +572798215789150208 4 +572799755300048897 4 +572804983890022400 4 +572806535514103809 4 +572809822724235266 4 +572809845251842048 4 +572812104006176768 4 +572813839915454464 4 +572817472455364609 4 +572818538341138432 4 +572822788412600320 4 +572828046744399872 4 +572828054004756480 4 +572828855292977154 4 +572831363562409985 4 +572832730226032640 4 +572832929992335360 4 +572837262733545473 4 +572843964891471872 4 +572845927872012288 4 +572853816351014914 4 +572853985511526402 4 +572855507771244544 4 +572860686998499330 4 +572864555786747906 4 +572865108822528003 4 +572868377959510019 4 +572871622983487489 4 +572878201380159489 4 +572879664248573955 4 +572882903027195904 4 +572886736537313281 4 +572890254019039232 4 +572890649952985088 4 +572894414764433410 4 +572912245543452672 4 +572915145103421440 4 +572939882705899521 4 +572956560718106624 4 +572967283116675072 4 +572969882439979008 4 +572976599399731200 4 +572977318634774528 4 +572977424289304579 4 +572985091653173248 4 +572993727821561857 4 +573001086098796545 4 +573009803934633984 4 +573024871619305472 4 +573027061801070593 4 +573032861965651968 4 +573045604181532672 4 +573051877136445440 4 +573060429565468672 4 +573071150512775169 4 +573071417245343744 4 +573072743983394817 4 +573081516005310465 4 +573082842772738048 4 +573087636325318656 4 +573091250460626944 4 +573091826460180480 4 +573092206275371008 4 +573096374406795264 4 +573102192975990784 4 +573106964655419393 4 +573108507546943489 4 +573111212394844160 4 +573112839281516544 4 +573120892806426624 4 +573124788119605249 4 +573125274881134592 4 +573126007869349888 4 +573126303857160193 4 +573126416360972288 4 +573128727267131393 4 +573128810876440576 4 +573136729600749569 4 +573141511124070402 4 +573141652358885377 4 +573142506021371908 4 +573143686772596736 4 +573145314451853313 4 +573146853866278912 4 +573148804704165888 4 +573149873479929856 4 +573153744839299072 4 +573154905197838336 4 +573155106146807808 4 +573156239250599936 4 +573158496067194880 4 +573160947986145281 4 +573162436385234944 4 +573166720321490944 4 +573166986210893824 4 +573167060169166848 4 +573167163709648898 4 +573167973306900480 4 +573175065237307392 4 +573180607246835712 4 +573182146308800513 4 +573186524151463936 4 +573192605262983168 4 +573195700734963712 4 +573202222038851584 4 +573213946586390528 4 +573215479109705728 4 +573217688274796544 4 +573227882459185153 4 +573233184147705856 4 +573236034256830464 4 +573249280590286848 4 +573268446978416642 4 +573271709744279553 4 +573272195121729536 4 +573273808884371456 4 +573281254440742913 4 +573282339201130497 4 +573286865547100160 4 +573294743557423104 4 +573295527669862401 4 +573300722160304128 4 +573318902995918848 4 +573342450170892288 4 +573343788363481089 4 +573347454545989632 4 +573352545017442304 4 +573357683883020288 4 +573364567373303808 4 +573376761448161280 4 +573382070749175809 4 +573383890276122624 4 +573385975474208768 4 +573388786555461632 4 +573390607944577025 4 +573393783703904256 4 +573398246917337088 4 +573399319300202496 4 +573400887319789568 4 +573402816305344513 4 +573404311880908800 4 +573404782540525568 4 +573407665365377024 4 +573409768771072000 4 +573410958867111936 4 +573411998412627969 4 +573412177475870721 4 +573412393885171712 4 +573412552186585088 4 +573413136717381632 4 +573416871413858305 4 +573421632494157824 4 +573422807436038144 4 +573427544545878016 4 +573427926986817536 4 +573427927712428034 4 +573430282985959427 4 +573430286031044608 4 +573430363957166080 4 +573433840250802176 4 +573446184192237568 4 +573451252459241472 4 +573451714789015552 4 +573460467043033089 4 +573465284201345024 4 +573467964176908289 4 +573468481032683520 4 +573468539102838784 4 +573470923455610880 4 +573472041879347200 4 +573475335951552512 4 +573475783429218304 4 +573479740130074624 4 +573479889942216704 4 +573480670271512577 4 +573482990677942272 4 +573486121650372609 4 +573490866951585792 4 +573494662297595904 4 +573497126782631936 4 +573501253289906177 4 +573502819912712192 4 +573513343165538305 4 +573513470714462208 4 +573516154603339776 4 +573518894985510912 4 +573525880515006465 4 +573528599090872320 4 +573529220204331008 4 +573529956522852352 4 +573532622623744001 4 +573538028129484800 4 +573549112768921600 4 +573556838144307200 4 +573558087937753088 4 +573558901003526144 4 +573559403779002368 4 +573559724232155138 4 +573559945704005632 4 +573563202811916290 4 +573565321031868418 4 +573566530941661184 4 +573567202260996096 4 +573567452547690496 4 +573570715577892864 4 +573588571715735552 4 +573604441368104964 4 +573607487867387904 4 +573607497589657600 4 +573608449621295104 4 +573611214669897729 4 +573617750758678528 4 +573619313573302272 4 +573634707985862656 4 +573641903142535169 4 +573656684012310528 4 +573660144317288449 4 +573670553971654657 4 +573690464869593088 4 +573711731115094016 4 +573722793239515136 4 +573743813652537344 4 +573749327501352960 4 +573760516604067840 4 +573762456683597824 4 +573764044185796608 4 +573765158029410304 4 +573768197465333760 4 +573775516559405057 4 +573785442606321664 4 +573785598403723264 4 +573790758190346240 4 +573792819002916865 4 +573810160587661312 4 +573811307377520640 4 +573814798472867840 4 +573815322156793856 4 +573818191874392064 4 +573824107751608320 4 +573824275490037761 4 +573830428760502275 4 +573831757763604480 4 +573832764362874880 4 +573838238336479232 4 +573838296457048064 4 +573843123974987777 4 +573845628209512448 4 +573845762112626688 4 +573845866588536832 4 +573850369752694784 4 +573850393723211777 4 +573853380428369920 4 +573857266559926272 4 +573858093215412225 4 +573858972693721088 4 +573858999805702144 4 +573859419051724800 4 +573866844961931264 4 +573871886532108288 4 +573876190353604608 4 +573877858554138624 4 +573880391070711809 4 +573881479882743808 4 +573881579979718656 4 +573883503865163776 4 +573885292630966273 4 +573893374522097664 4 +573893568940523520 4 +573898857840558080 4 +573899718205513728 4 +573910234642579458 4 +573913821451980801 4 +573917709424951296 4 +573926781843083265 4 +573927415552016385 4 +573931368243027968 4 +573931368981270528 4 +573933304224874496 4 +573933428162220032 4 +573933431136088064 4 +573934239168794624 4 +573938407270977536 4 +573942718646390785 4 +573946549757087744 4 +573955004312207360 4 +573979749980094465 4 +573989552659959810 4 +574022939521196032 4 +574041058897588224 4 +574042465847656448 4 +574049419361812480 4 +574065537958834176 4 +574074693801672704 4 +574075052829736960 4 +574090489437122561 4 +574091979157475330 4 +574107850965823488 4 +574114576268181504 4 +574137995055341568 4 +574138672045514752 4 +574143769546657792 4 +574145265055064064 4 +574145300471922688 4 +574151048081764354 4 +574153259625111552 4 +574174669902909440 4 +574178188215074818 4 +574183736926937088 4 +574193334605889538 4 +574193548309979136 4 +574195856976535552 4 +574239373786484737 4 +574243859657445376 4 +574245896067534848 4 +574250788341440512 4 +574264323054374913 4 +574265543299100672 4 +574268613806268417 4 +574270113601449984 4 +574309260131106817 4 +574312158864891905 4 +574324345247559680 4 +574339041954521088 4 +574370967444574208 4 +574377496579170305 4 +574381308996157441 4 +574387724817469441 4 +574404074583412736 4 +574413264458772480 4 +574415156853215232 4 +574427084199759873 4 +574439654650417152 4 +574442714298642432 4 +574446950327664640 4 +574464622482186240 4 +574490252733210624 4 +574492987733905408 4 +574495779982680064 4 +574504201205760000 4 +574523756690956288 4 +574532558634946560 4 +574545013675270146 4 +574554154695327744 4 +574570549256544257 4 +574583653205848064 4 +574590411269894145 4 +574608269139963904 4 +574608666814472192 4 +574613116186591233 4 +574616710130581505 4 +574620589798780928 4 +574625234369798144 4 +574632590382088195 4 +574652197142659073 4 +574664007337910272 4 +574665204388200449 4 +574666702786461696 4 +574671571580809217 4 +574676256765186048 4 +574685147544612864 4 +574688707678236672 4 +574740925550723072 4 +574742423554912256 4 +574781013697564672 4 +574781254366855169 4 +574815786033897472 4 +574839493615751168 4 +574843356481634306 4 +574844041457721344 4 +574856641624961025 4 +574860270578876416 4 +574877231568179201 4 +574886024494608385 4 +574887058466017280 4 +574887355645104128 4 +574888615794225152 4 +574910242514989056 4 +574912346004545536 4 +574913224924995584 4 +574914083708870656 4 +574914920346710018 4 +574917848063176704 4 +574922866543624193 4 +574930235206017026 4 +574932400356073472 4 +574934794288635904 4 +574937884408606720 4 +574939765109317633 4 +574941002344988674 4 +574943320780181504 4 +574951069039489026 4 +574952481626583040 4 +574953013212676096 4 +574955708103401473 4 +574958273100644353 4 +574958672293593088 4 +574960955450331137 4 +574961971784650752 4 +574969141741776896 4 +574970979857862658 4 +574972628382785537 4 +574973646072102912 4 +574974990141034498 4 +574976763299782657 4 +574978062581809153 4 +574980921260027905 4 +574980936317669377 4 +574981359988506624 4 +574986798733991936 4 +574987624450031616 4 +574992387715416066 4 +574995215477506048 4 +575004612471586816 4 +575006081920827392 4 +575008524071100416 4 +575008601129029634 4 +575013710038618114 4 +575014591681400833 4 +575016631639760896 4 +575020589376655360 4 +575030328068300800 4 +575037006297726976 4 +575040257130782720 4 +575042564333535232 4 +575045774142124032 4 +575066028784369664 4 +572331818483097600 3 +573040056245489664 3 +573297348304101376 3 +573509930990182401 3 +573777711837151232 3 +574006050841493505 3 +574231735098982400 3 +574483345318486017 3 +574502144029540353 3 +574504478155673600 3 +574572865124618240 3 +574650857754607616 3 +574749004837191680 3 +574848098989568000 3 +574863645219250176 3 +575021988974039040 3 +571692589608525825 2 +571968153661489152 2 +572250003814555648 2 +572554760412385281 2 +572810927810191360 2 +572869055406723074 2 +573060613913628672 2 +573231816825434113 2 +573234362638045184 2 +573323333011419136 2 +573427773005516802 2 +573439908519542784 2 +573439922083921920 2 +573510176835133440 2 +573534766244438016 2 +573551332520497152 2 +573566488524779520 2 +573573874299854848 2 +573629687873691648 2 +573781401046269952 2 +573785365850497024 2 +573894737796730880 2 +573922002903281664 2 +574193792359645184 2 +574219999147069440 2 +574231919895818241 2 +574264715163082752 2 +574268310222737408 2 +574437941260500992 2 +574598502539460608 2 +574636552577380352 2 +574939397663162368 2 +574980214788227072 2 +Top Retweet Rank 1 +Tweet: @iamhardwellfan @FakeContestAlrt @LumiaIndia sponsor of #fakecontest @NargisFakhri #romanceurcity #microsoft #fake no winner announcement +User: Sanjay at + +Top Retweet Rank 2 +Tweet: DRINGEND VERZOEK aan Microsoft: wilt u mij niet meer tien keer per dag thuis laten bellen vanuit India voor marketingdoeleinden? #Microsoft +User: Ton Elias at Netherlands + +Top Retweet Rank 3 +Tweet: All versions of #Microsoft Windows are vulnerable to SSL/TLS FREAK #vulnerability — http://t.co/8TkisXHjWt http://t.co/ZFkCySwbne +User: The Hacker News at THE INTERNET + +Top Retweet Rank 4 +Tweet: Epic Organizational Charts #Microsoft #Apple #Oracle http://t.co/biNu2Q109a +User: Oliver Hansen at Geel, Belgium + +Top Retweet Rank 5 +Tweet: #Microsoft's new #Lumia #phones come with a free year of #Office 365 http://t.co/Uw3W1Hg1jD #tech #devbattles http://t.co/isAE07v5iY +User: Dev Battles at + +Top Retweet Rank 6 +Tweet: Le cofondateur de #Microsoft a découvert l'épave d'un cuirassé géant japonais, coulé en 1944 http://t.co/O72Wn8Kowh http://t.co/1cxtqoOYHT +User: Le Monde at Paris + +Top Retweet Rank 7 +Tweet: VIDEO:Cofundador de #Microsoft halla en el fondo del mar al acorazado más letal de la historia http://t.co/uk1YMdgcYp http://t.co/gkEFhgtN9A +User: RT en Español at + +Top Retweet Rank 8 +Tweet: VIDEO:Cofundador de #Microsoft halla en el fondo del mar al acorazado más letal de la historia http://t.co/uk1YMdgcYp http://t.co/gkEFhgtN9A +User: RT en Español at + +Top Retweet Rank 9 +Tweet: New #free ebook! #Microsoft #Azure Essentials: Azure Automation http://t.co/3V6buNJiBr #MSDev #ITPro http://t.co/G81Zhed3j9 +User: Microsoft Press at Redmond, WA + +Top Retweet Rank 10 +Tweet: VIDEO:Cofundador de #Microsoft halla en el fondo del mar al acorazado más letal de la historia http://t.co/uk1YMdgcYp http://t.co/gkEFhgtN9A +User: RT en Español at + +Top Retweet Rank 11 +Tweet: #Microsoft #Yammer støtter nå #Handoff for #Mac #Yosemite #iphone #ipad >>> Yammer Now Supports Handoff - YouTube https://t.co/oCQ1h1j8qX +User: Arno Vaa at Oslo, Norway + +Top Retweet Rank 12 +Tweet: #LoMásLeídoDeLaSemana Cofundador #Microsoft halla el acorazado más letal de la historia VIDEO http://t.co/uk1YMdgcYp http://t.co/K1eOMSogGf +User: RT en Español at + +Top Retweet Rank 13 +Tweet: Ranking:Top 20 #InternetofThings companies right now #Intel #1 http://t.co/Y6vnYct1R3 #IoT #Microsoft #Cisco #Google http://t.co/Ip1vZ0wetK +User: IoT Analytics at Berlin, Germany + +Top Retweet Rank 14 +Tweet: #LoMásLeídoDeLaSemana Cofundador #Microsoft halla el acorazado más letal de la historia VIDEO http://t.co/uk1YMdgcYp http://t.co/K1eOMSogGf +User: RT en Español at + +Top Retweet Rank 15 +Tweet: El cofundador de #Microsoft halla en aguas filipinas un acorazado japonés hundido http://t.co/WK3GD7VphS http://t.co/VfIPuJjw5y +User: ABC.es at Madrid + +Top Retweet Rank 16 +Tweet: What is The Cloud & Why Move? > http://t.co/PHPkWLShkK < http://t.co/HQvA1lqCSF #thecloud #cloudcomputing #Azure #Microsoft #videomarketing +User: Queue Associates at NYC - London - Hong Kong + +Top Retweet Rank 17 +Tweet: Logging in with Google, #Microsoft and Facebook SDKs to #Azure Mobile Services: http://t.co/mcmPFrU36B http://t.co/ZLKZWr0kBj +User: Microsoft Azure at Redmond, WA + +Top Retweet Rank 18 +Tweet: New #free ebook! #Microsoft #SystemCenter Software Update Management Field Experience http://t.co/mrpkp5Sclr #ITPro http://t.co/0ScgyZt0qF +User: Microsoft Press at Redmond, WA + +Top Retweet Rank 19 +Tweet: #Microsoft #XboxOne DirectX 12 boosts the Xbox One GPU by 20% http://t.co/TWMyDJer7K http://t.co/h6rBwTtwYH +User: The Inner Circle at Xbox Live Gamertag TiC Podcast + +Top Retweet Rank 20 +Tweet: 戦艦「武蔵」を発見 マイクロソフト創業者がツイッターで発表(産経新聞) http://t.co/shaYyO6szO #microsoft +User: Microsoft なう at 日本国東京都港区港南 + +Top Retweet Rank 21 +Tweet: "Very soon you will see #Collector working on the #Windows platform" #EsriEPC - @msretail #Microsoft #Esri #mobile +User: Esri Business Team at Redlands, CA + +Top Retweet Rank 22 +Tweet: Update: #Microsoft warns #PCs are also vulnerable to '#Freak' #security flaw: http://t.co/JSJYjwwqER #Tech #CyberSecurity +User: Debnicolina at Sicilia,Italia|Global|✈&Hotel + +Top Retweet Rank 23 +Tweet: 8で採用されたチャームは不評だったので、廃止する方向!? Windows 10、状態の確認や各種設定が行える「アクションセンター」 http://t.co/GO3qfdPlwU #Windows10 #Microsoft http://t.co/riAZJSrH6C +User: ASCII.jp編集部 at 東京都千代田区の飯田橋 + +Top Retweet Rank 24 +Tweet: Movistar TV Go integrará #Cortana, el asistente personal de voz de #Microsoft @MicrosoftES @LumiaES #MWC15 http://t.co/WBeXJ56W2Q +User: Solange Cummins at + +Top Retweet Rank 25 +Tweet: #Microsoft #XboxOne Microsoft working on a hardcore game controller for Xbox One http://t.co/iULMpm6TUZ http://t.co/cBl4rwyKC5 +User: The Inner Circle at Xbox Live Gamertag TiC Podcast + +Top Retweet Rank 26 +Tweet: #Microsoft lays off 18k workers http://t.co/FUqxsZzm1l but files for 4k #H1B foreign workers http://t.co/30oSipwIYn #immigration +User: Susan Pai at Jacksonville, FL + +Top Retweet Rank 27 +Tweet: First look: #Microsoft's all-new Office 2016 for #Mac public preview http://t.co/GAWV8OX1mB http://t.co/fRU7mLlD5U +User: AppleInsider at Cupertino, California + +Top Retweet Rank 28 +Tweet: 戦艦「武蔵」発見、8年にわたる探索実る ポール・アレン氏、乗組員悼み日本政府と協力の意向 http://t.co/Eie1itVIAu #microsoft +User: Microsoft なう at 日本国東京都港区港南 + +Top Retweet Rank 29 +Tweet: Microsoft’s mind-blowing vision of the future. See more here: http://t.co/yIjqVsiFMg via @GeekWire #Microsoft #Futuretech +User: Why Microsoft at Redmond, WA + +Top Retweet Rank 30 +Tweet: Microsoft has created 3 billionaires and about 12,000 millionaires.#Microsoft #billionair +User: Strong facts at US + diff --git a/2_1/test.py~ b/2_1/test.py~ new file mode 100644 index 0000000..4912642 --- /dev/null +++ b/2_1/test.py~ @@ -0,0 +1,102 @@ +#Kasane Utsumi +import os +import json +import pymongo +from bson.json_util import dumps +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_tweets = mongoConnection['twitter_analyzer'].db_tweets +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +db_retweets = mongoConnection['twitter_analyzer'].db_all_retweets +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users + +#db_retweets.drop() +#db_top30RetweetedUsers.drop() + +#create a dictionary of retweeted id is key, and number of occurrence as a value +retweetDict = dict() + +#also, dump all of the tweets into db_all_retweets so it would be easy to get user and location for top 30 later. + +for tJson in db_tweets.find(): + + tweetText = tJson['text'] + + #print "tweet is " + tweetText.encode('utf8') + + if tweetText.startswith("RT"): # thi s could be retweet. Look in db_streamT to see if corresponding tweet has a retweeted_status + + #print "starts with RT" + + fullTweets = db_streamT.find({'text':tweetText}) + + for correspondingTweet in fullTweets: #iterate just in case if there is > 1 tweet with identical name + + if 'retweeted_status' in correspondingTweet: #Now we are sure that this is a retweet because of existence of retweeted_status + + retweet = correspondingTweet['retweeted_status'] + id = retweet['id'] + + #print "original tweet id" + str(id) + + retweetDBEntry = {"id" : id, "retweetJson" : retweet} + + db_retweets.insert(retweetDBEntry) + if id in retweetDict: + retweetDict[id] += 1 + else: retweetDict[id] = 1 + + +#check the dictionary to make sure it has what I want +#for key in retweetDict: +# if (retweetDict[key] >1): +# print str(key) + " " + str(retweetDict[key]) + +#convert retweetDict into tuples so I can sort by number of frequencies, then sort by frequncy +retweetTuple = sorted(tuple(retweetDict.iteritems()),key=lambda x: (-x[1],x[0])) + +#check the tuple to see if it has what I want +for (id,frequency) in retweetTuple: + if frequency > 1: + print str(id) + " " +str(frequency) + +exit() + +#print out the top tweeted user , also store them in top30_users collection so they can be retreived for other analysis +tupleIndex = 0 +for (id,frequency) in retweetTuple: + retweet = db_retweets.find_one({"id":id}) + + if (retweet == None): + print "Something went wrong, could not find retweet with id" + str(id) + else: + retweetJson = json.loads(dumps(retweet["retweetJson"])) + topTweetedUser = retweetJson['user'] + + userDBEntry = {"id": topTweetedUser['id'], "userInfo" : topTweetedUser} + db_top30RetweetedUsers.insert(userDBEntry) + + #print out retweet, user name and location + print "Top Retweet Rank " + str(tupleIndex+1) + print "Tweet: " + retweetJson["text"].encode('utf8') + print "User: " + topTweetedUser["name"].encode('utf8') + " at " + topTweetedUser['location'].encode('utf8') + print " " + + #get only top 30 + tupleIndex = tupleIndex + 1 + if tupleIndex == 30: + exit() diff --git a/2_2/2_2_lexical_diversity.py b/2_2/2_2_lexical_diversity.py new file mode 100644 index 0000000..8be9d04 --- /dev/null +++ b/2_2/2_2_lexical_diversity.py @@ -0,0 +1,100 @@ +#Kasane Utsumi - 3/14/2015 +#2_2_lexical_diversity.py +#This code groups db_streamT by users. Then it computes number of unique words and number of words in all of tweets by each, which is used to compute lexical diversity, which is then stored in db_lexical_diversity. +import os +import json +import pymongo +from bson.json_util import dumps +from bson.son import SON +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_streamT = mongoConnection['twitter_analyzer'].db_streamT +db_lexical_diversity = mongoConnection['twitter_analyzer'].db_lexical_diversity + +if db_streamT == None: + print "db_streamT doesn't exist. Exiting.." + exit() + +#initialize lexical diversity table +db_lexical_diversity.drop() + +#group by user id +pipeline = [ + + { + "$group" : { "_id": { "userId" : "$user.id","username" : "$user.name"}, + "tweetIds" : {"$addToSet" : "$id" }, + "text" : {"$push" : "$text"}, + + } + + } +] +aggregated = db_streamT.aggregate(pipeline) + +print "number of users is: " + str(len(aggregated['result'])) + +for user in aggregated['result']: + + #print user + + #create a dictionary of retweeted id is key, and number of occurrence as a value + lexicalDict = dict() + totalWordCount = 0 + + #print user + + #for each tweet of this user + for tweetText in user["text"]: + + #print "for each tweettext" + #print "text is " + str(tweetText.encode("utf-8")) + + words = tweetText.split(" ") + for word in words: + #print word + str(totalWordCount) + + #do some cleanup + word=word.lower().strip() + #word=word.encode("utf-8").lower().strip() + + #ignore words that are necessary, user shouldn't be punished for frequently using these words. + if word == "the" or word == "a" or word=="from" or word=="to" or word=="and" or word=="for" or word.startswith("http://") or word.startswith("https://"): + continue + if word in lexicalDict: + lexicalDict[word] +=1 + else: + lexicalDict[word] = 1 + + totalWordCount +=1 + + #unique word count is number (length) of the dictionary + unqueWordsCount = len(lexicalDict) + + #print "total word" + str(totalWordCount) + #print "uniquew word" + str(unqueWordsCount) + #print "lex diverity" + str(float(unqueWordsCount) / float(totalWordCount)) + + lexicalDiv = 0.0 + + if (totalWordCount == 0): #meaning all of the tweets have been filtered out. In this case we would assume lex. diverstiy to be one. + lexicalDiv=1.0 + else: + lexicalDiv = float(unqueWordsCount) / float(totalWordCount) + + #print (lexicalDiv) + #insert into db + userDBEntry = {"id": user["_id"]["userId"], "username": user["_id"]["username"], "lexical_diversity" : str(lexicalDiv)} + db_lexical_diversity.insert(userDBEntry) + diff --git a/2_2/2_2_lexical_diversity.py~ b/2_2/2_2_lexical_diversity.py~ new file mode 100644 index 0000000..28a3d57 --- /dev/null +++ b/2_2/2_2_lexical_diversity.py~ @@ -0,0 +1,100 @@ +#Kasane Utsumi - 3/14/2015 +#2_2_lexical_diversity.py +#This code groups db_streamT by users. Then it computes number of unique words and number of words in all of tweets by a given user, which is used to compute lexical diversity, which is then stored in db_lexical_diversity. +import os +import json +import pymongo +from bson.json_util import dumps +from bson.son import SON +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_streamT = mongoConnection['twitter_analyzer'].db_streamT +db_lexical_diversity = mongoConnection['twitter_analyzer'].db_lexical_diversity + +if db_streamT == None: + print "db_streamT doesn't exist. Exiting.." + exit() + +#initialize lexical diversity table +db_lexical_diversity.drop() + +#group by user id +pipeline = [ + + { + "$group" : { "_id": { "userId" : "$user.id","username" : "$user.name"}, + "tweetIds" : {"$addToSet" : "$id" }, + "text" : {"$push" : "$text"}, + + } + + } +] +aggregated = db_streamT.aggregate(pipeline) + +print "number of users is: " + str(len(aggregated['result'])) + +for user in aggregated['result']: + + #print user + + #create a dictionary of retweeted id is key, and number of occurrence as a value + lexicalDict = dict() + totalWordCount = 0 + + #print user + + #for each tweet of this user + for tweetText in user["text"]: + + #print "for each tweettext" + #print "text is " + str(tweetText.encode("utf-8")) + + words = tweetText.split(" ") + for word in words: + #print word + str(totalWordCount) + + #do some cleanup + word=word.lower().strip() + #word=word.encode("utf-8").lower().strip() + + #ignore words that are necessary, user shouldn't be punished for frequently using these words. + if word == "the" or word == "a" or word=="from" or word=="to" or word=="and" or word=="for" or word.startswith("http://") or word.startswith("https://"): + continue + if word in lexicalDict: + lexicalDict[word] +=1 + else: + lexicalDict[word] = 1 + + totalWordCount +=1 + + #unique word count is number (length) of the dictionary + unqueWordsCount = len(lexicalDict) + + #print "total word" + str(totalWordCount) + #print "uniquew word" + str(unqueWordsCount) + #print "lex diverity" + str(float(unqueWordsCount) / float(totalWordCount)) + + lexicalDiv = 0.0 + + if (totalWordCount == 0): #meaning all of the tweets have been filtered out. In this case we would assume lex. diverstiy to be one. + lexicalDiv=1.0 + else: + lexicalDiv = float(unqueWordsCount) / float(totalWordCount) + + #print (lexicalDiv) + #insert into db + userDBEntry = {"id": user["_id"]["userId"], "username": user["_id"]["username"], "lexical_diversity" : str(lexicalDiv)} + db_lexical_diversity.insert(userDBEntry) + diff --git a/2_2/2_2_plot_lexical_diversity.py b/2_2/2_2_plot_lexical_diversity.py new file mode 100644 index 0000000..f15154f --- /dev/null +++ b/2_2/2_2_plot_lexical_diversity.py @@ -0,0 +1,51 @@ +#Kasane Utsumi - 3/14/2015 +#2_2_plot_lexical_diversity.py +#This code generates a histogram which shows frequency of lexical deversity range based on db_lexical_diversity collections. +import os +import json +import pymongo +from bson.json_util import dumps +from bson.son import SON +import numpy as np +import pylab as pl +import decimal +import matplotlib.pyplot as plt +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_lexical_diversity = mongoConnection['twitter_analyzer'].db_lexical_diversity + +if db_lexical_diversity == None: + print "db_lexical_diversity doesn't exist. Exiting.." + exit() + +plotArray = [] + +for user in db_lexical_diversity.find(): + plotArray.append(user['lexical_diversity']) +# plotDictionary[float(user['lexical_diversity'])]= user['username'] + +plt.hist(np.asarray(plotArray, dtype='float')) +plt.show() + + +exit() + +#X=np.arange(len(plotDictionary)) +#pl.bar(X,plotDictionary.keys(),width=0.2) +#pl.xticks(X,plotDictionary.values()) +#ymax= max(plotDictionary.keys())+1 +#pl.ylim(0,ymax) +#pl.show() + + diff --git a/2_2/2_2_plot_lexical_diversity.py~ b/2_2/2_2_plot_lexical_diversity.py~ new file mode 100644 index 0000000..1a2335a --- /dev/null +++ b/2_2/2_2_plot_lexical_diversity.py~ @@ -0,0 +1,49 @@ +#Kasane Utsumi +import os +import json +import pymongo +from bson.json_util import dumps +from bson.son import SON +import numpy as np +import pylab as pl +import decimal +import matplotlib.pyplot as plt +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_lexical_diversity = mongoConnection['twitter_analyzer'].db_lexical_diversity + +if db_lexical_diversity == None: + print "db_lexical_diversity doesn't exist. Exiting.." + exit() + +plotArray = [] + +for user in db_lexical_diversity.find(): + plotArray.append(user['lexical_diversity']) +# plotDictionary[float(user['lexical_diversity'])]= user['username'] + +plt.hist(np.asarray(plotArray, dtype='float')) +plt.show() + + +exit() + +#X=np.arange(len(plotDictionary)) +#pl.bar(X,plotDictionary.keys(),width=0.2) +#pl.xticks(X,plotDictionary.values()) +#ymax= max(plotDictionary.keys())+1 +#pl.ylim(0,ymax) +#pl.show() + + diff --git a/2_2/lex_div_histo.png b/2_2/lex_div_histo.png new file mode 100644 index 0000000..a562470 Binary files /dev/null and b/2_2/lex_div_histo.png differ diff --git a/2_3/2_3_followers_after_week.py b/2_3/2_3_followers_after_week.py new file mode 100644 index 0000000..68ff45e --- /dev/null +++ b/2_3/2_3_followers_after_week.py @@ -0,0 +1,79 @@ +#Kasane Utsumi - 3/14/2015 +#2_3_followers_after_week.py +#This code iterates through first 10 users in db_followers and retrieves followers for the same users after a week, then stores the result in the db_followers_after_week collection. Please see documentation for more elaboration. + +import os +import json +import pymongo +from bson.json_util import dumps +import tweepy +import time +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +#twitter setup +consumer_key = "" +consumer_secret = "" +access_token = "" +access_token_secret = "" + +auth=None +api=None + +try: + auth = tweepy.OAuthHandler(consumer_key, consumer_secret) + auth.set_access_token(access_token, access_token_secret) + api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) +except: + print "twitter setup failed" + exit() + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_followers= mongoConnection['twitter_analyzer'].db_followers + +if db_followers == "None": + print "db_followers not found" + exit() + +db_followers_after_week = mongoConnection['twitter_analyzer'].db_followers_after_week + +#empty old value from the table +db_followers_after_week.drop() + +#to get top 10 +index = 0 + +for user in db_followers.find(timeout=False): + followerList = [] + + #only look for follower ids for user I have to put into db_followers since i had to run this program few times because of network issue + if db_followers_after_week.find({'id' : user["id"]}).count() == 0: + uid = user["id"] + print "userId" + str(uid) + " count of followers from twitter call 1 week ago: " + str(len(user["followerIds"])) + + for page in tweepy.Cursor(api.followers_ids,user_id=uid).pages(): + + followerList.extend(page) + time.sleep(60) + + #see if number of followers match with number of ids retrieved. This is just a sanity check, since number of followers + #could have changed from the time tweet was collected + print "count of followers from twitter call made now:" + str(len(followerList)) + + followerOfUser = {"id": uid, "followerIds" : followerList} + db_followers_after_week.insert(followerOfUser) + + index +=1 + if index == 10: + break + diff --git a/2_3/2_3_followers_after_week.py~ b/2_3/2_3_followers_after_week.py~ new file mode 100644 index 0000000..1fb8b4f --- /dev/null +++ b/2_3/2_3_followers_after_week.py~ @@ -0,0 +1,79 @@ +#Kasane Utsumi - 3/14/2015 +#2_3_followers_after_week.py +#This code iterates through first 10 users in db_followers and retrieves followers for the same users after a week, then stores the result in the db_followers_after_week collection. Please see documentation for more elaboration. + +import os +import json +import pymongo +from bson.json_util import dumps +import tweepy +import time +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +#twitter setup +consumer_key = "10G4NlBUpM9nusmE9nSoeGQnk" +consumer_secret = "KcH2Ykf253L0tTCuzIyqDUPnkEZ7mZhIiHCYiS84LbZNCsQwRu" +access_token = "2988143343-waN3T7DFy7j0Yn95hDdXOMLpdRfHzG66SnOZlHO" +access_token_secret = "TDd8WId2f7Cw8jDLdPcjJRM5lTlMGYiuLjUl1ped21euS" + +auth=None +api=None + +try: + auth = tweepy.OAuthHandler(consumer_key, consumer_secret) + auth.set_access_token(access_token, access_token_secret) + api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) +except: + print "twitter setup failed" + exit() + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_followers= mongoConnection['twitter_analyzer'].db_followers + +if db_followers == "None": + print "db_followers not found" + exit() + +db_followers_after_week = mongoConnection['twitter_analyzer'].db_followers_after_week + +#empty old value from the table +db_followers_after_week.drop() + +#to get top 10 +index = 0 + +for user in db_followers.find(timeout=False): + followerList = [] + + #only look for follower ids for user I have to put into db_followers since i had to run this program few times because of network issue + if db_followers_after_week.find({'id' : user["id"]}).count() == 0: + uid = user["id"] + print "userId" + str(uid) + " count of followers from twitter call 1 week ago: " + str(len(user["followerIds"])) + + for page in tweepy.Cursor(api.followers_ids,user_id=uid).pages(): + + followerList.extend(page) + time.sleep(60) + + #see if number of followers match with number of ids retrieved. This is just a sanity check, since number of followers + #could have changed from the time tweet was collected + print "count of followers from twitter call made now:" + str(len(followerList)) + + followerOfUser = {"id": uid, "followerIds" : followerList} + db_followers_after_week.insert(followerOfUser) + + index +=1 + if index == 10: + break + diff --git a/2_3/2_3_get_followers_for_top_retweeters.py b/2_3/2_3_get_followers_for_top_retweeters.py new file mode 100644 index 0000000..0fa38b1 --- /dev/null +++ b/2_3/2_3_get_followers_for_top_retweeters.py @@ -0,0 +1,82 @@ +#Kasane Utsumi - 3/14/2015 +#2_3_get_followers_for_top_retweets.py +#This code retrieves a list of followers (id) for users who tweeted the top 30 tweets (stored in top30RetweetedUsers collection) and stores them in db_followers collection. Please see documentation for more elaboration. +import os +import json +import pymongo +from bson.json_util import dumps +import tweepy +import time +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + + +#twitter setup +consumer_key = "" +consumer_secret = "" +access_token = "" +access_token_secret = "" +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users +db_followers = mongoConnection['twitter_analyzer'].db_followers + + +#create an array of unique users +topRetweetedUserList = dict() + +for userJson in db_top30RetweetedUsers.find(): + + userId = userJson['id'] + + #get followers Count for cross checking + followersCount = json.loads(dumps(userJson['userInfo']))['followers_count'] + + if userId not in topRetweetedUserList: + topRetweetedUserList[userId] = followersCount + +#check the list to make sure it has what I want +#for uid in topRetweetedUserList: +# print str(uid) + " " + str(topRetweetedUserList[uid]) + +#for each user, make a twitter followers/id call to get list of his/her followers' ids and store it in the db_followers +#db_follower will have 30 rows, the format for each row is: +#{"id":user id, followerIds: list of follower's ids} +for uid in topRetweetedUserList: + followerList = [] + + #only look for follower ids for user I have to put into db_followers since i had to run this program few times because of network issue + if db_followers.find({'id' : uid}).count() == 0: + + for page in tweepy.Cursor(api.followers_ids,user_id=uid).pages(): + followerList.extend(page) + time.sleep(60) + + #see if number of followers match with number of ids retrieved. This is just a sanity check, since number of followers + #could have changed from the time tweet was collected + print "userId" + str(uid) + print "count of followers from user property:" + str(len(followerList)) + print "count of users from api call" + str(topRetweetedUserList[uid]) + print " " + + #for myId in followerList: + # print myId + + followerOfUser = {"id": uid, "followerIds" : followerList} + db_followers.insert(followerOfUser) + + + diff --git a/2_3/2_3_get_followers_for_top_retweeters.py~ b/2_3/2_3_get_followers_for_top_retweeters.py~ new file mode 100644 index 0000000..38324c1 --- /dev/null +++ b/2_3/2_3_get_followers_for_top_retweeters.py~ @@ -0,0 +1,82 @@ +#Kasane Utsumi - 3/14/2015 +#2_3_get_followers_for_top_retweets.py +#This code retrieves a list of followers (id) for users who tweeted the top 30 tweets (stored in top30RetweetedUsers collection) and stores them in db_followers collection. Please see documentation for more elaboration. +import os +import json +import pymongo +from bson.json_util import dumps +import tweepy +import time +import signal + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + + +#twitter setup +consumer_key = "10G4NlBUpM9nusmE9nSoeGQnk" +consumer_secret = "KcH2Ykf253L0tTCuzIyqDUPnkEZ7mZhIiHCYiS84LbZNCsQwRu" +access_token = "2988143343-waN3T7DFy7j0Yn95hDdXOMLpdRfHzG66SnOZlHO" +access_token_secret = "TDd8WId2f7Cw8jDLdPcjJRM5lTlMGYiuLjUl1ped21euS" +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users +db_followers = mongoConnection['twitter_analyzer'].db_followers + + +#create an array of unique users +topRetweetedUserList = dict() + +for userJson in db_top30RetweetedUsers.find(): + + userId = userJson['id'] + + #get followers Count for cross checking + followersCount = json.loads(dumps(userJson['userInfo']))['followers_count'] + + if userId not in topRetweetedUserList: + topRetweetedUserList[userId] = followersCount + +#check the list to make sure it has what I want +#for uid in topRetweetedUserList: +# print str(uid) + " " + str(topRetweetedUserList[uid]) + +#for each user, make a twitter followers/id call to get list of his/her followers' ids and store it in the db_followers +#db_follower will have 30 rows, the format for each row is: +#{"id":user id, followerIds: list of follower's ids} +for uid in topRetweetedUserList: + followerList = [] + + #only look for follower ids for user I have to put into db_followers since i had to run this program few times because of network issue + if db_followers.find({'id' : uid}).count() == 0: + + for page in tweepy.Cursor(api.followers_ids,user_id=uid).pages(): + followerList.extend(page) + time.sleep(60) + + #see if number of followers match with number of ids retrieved. This is just a sanity check, since number of followers + #could have changed from the time tweet was collected + print "userId" + str(uid) + print "count of followers from user property:" + str(len(followerList)) + print "count of users from api call" + str(topRetweetedUserList[uid]) + print " " + + #for myId in followerList: + # print myId + + followerOfUser = {"id": uid, "followerIds" : followerList} + db_followers.insert(followerOfUser) + + + diff --git a/2_3/2_3_get_unfollowers_after_week.py b/2_3/2_3_get_unfollowers_after_week.py new file mode 100644 index 0000000..259e02d --- /dev/null +++ b/2_3/2_3_get_unfollowers_after_week.py @@ -0,0 +1,165 @@ +#Kasane Utsumi - 3/14/2015 +#2_3_get_unfollowers_after_week.py +#This code compares the list of followers for a given user between db_followers and db_followers_after_week and get ids of followers who were no longer following the user after a week. Then it calls Twitter api to retrieve information about each of the followers and stores in the db_dropped_followers collection. In the end it prints out id and name of unfollowed user for each user. + +import pymongo +import tweepy +import signal +from bson.json_util import dumps +import json +import time + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +myLog = open("log.txt","w+") + + +#utility function to print out to console but also keeps same message in the log file (log.txt) +def printAndLog(message): + print message + myLog.write(message + "\n") + + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + printAndLog("Connection failed") + exit() + +#get collections +db_followers = mongoConnection['twitter_analyzer'].db_followers +db_followers_after_week = mongoConnection['twitter_analyzer'].db_followers_after_week + + +if db_followers == "None" or db_followers_after_week == "None": + printAndLog("db_followers or db_followers_after_week not found") + exit() + +#below collections will be used to store dropped users. +#format: {id:id of user who had followers, droppedFollowers: array of user json object of users who unfollowed after a week} +db_dropped_followers = mongoConnection['twitter_analyzer'].db_dropped_followers +#db_dropped_followers.drop() + + + +#twitter setup +consumer_key = "" +consumer_secret = "" +access_token = "" +access_token_secret = "" + +auth=None +api=None + +try: + auth = tweepy.OAuthHandler(consumer_key, consumer_secret) + auth.set_access_token(access_token, access_token_secret) + api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) +except: + printAndLog("twitter setup failed") + exit() + +#this function inserts dropped followers data into db_dropped_follower indexed by user Id of user who lost followers. +def insertInto_db_dropped_followers(uid, droppedUsersFromAPI): + droppedUserArray = [] + for droppedUser in droppedUsersFromAPI: + droppedUserArray.append({"droppedUserId": droppedUser._json['id'],"droppedUserName": droppedUser._json['name']}) + userDBEntry = {"id": uid, "unfollowedUsers" : droppedUserArray} + db_dropped_followers.insert(userDBEntry) + + + +for followerBefore in db_followers.find(timeout=False): + + userId = followerBefore['id'] + + #don't try to fill in db_dropped_followers if it has already been filled + if db_dropped_followers.find({'id':userId}).count() != 0: + continue + + printAndLog("User Id to look for unfollowers is" + str(userId)) + + #only 10 out of 17 users in db_followers will be in db_follower_after_week, so we can only get unfollower information for those users. + followerAfterWk = db_followers_after_week.find_one({'id' : userId}) + if followerAfterWk != None: + + originalFollowerArray = followerBefore['followerIds'] + + afterWeekFollowerArray = followerAfterWk['followerIds'] + + #keep count of droppped follower ids here + droppedFollowerId = [] + + #now find out who is missing from original array + for fid in originalFollowerArray: + if not fid in afterWeekFollowerArray: + droppedFollowerId.append(fid) + + #print droppedFollowerId + printAndLog("Unfollowed User Count is " + str(len(droppedFollowerId))) + + #twitter's look up user api only let you look up 100 users at a time + idLookUpIncrement = 100 + + #temporary array that will be passed to twitter api as a comman delimited list + tempArray = [] + + #array of users resulting from twitter api call + unfollowedUsers = [] + + #fill tempArray until its length is idLookUpIncrement then make twitter user lookup call + for num in droppedFollowerId: + tempArray.append(num) + + if len(tempArray) == idLookUpIncrement: + #print tempArray + #print "now call twitter" + + #ignore when twitter raises exception when NONE of the user Ids passed matches + try: + users = api.lookup_users(user_ids=tempArray) + unfollowedUsers.extend(users) + time.sleep(60) + #put result into db_dropped_followers + except: + print "none of the users matched for this batch. Continuing..." + + tempArray = [] + + if len(tempArray) != 0: #there are users left that still need info from twitter. + #print "call twitter for last time" + #print tempArray + + #ignore when twitter raises exception when NONE of the user Ids passed matches + try: + users = api.lookup_users(user_ids=tempArray) + time.sleep(60) + unfollowedUsers.extend(users) + except: + print "none of the users matched for this batch. Continuing..." + #print users + insertInto_db_dropped_followers(userId, unfollowedUsers) + +#now print out unfollowed user for each user Id +for unfollowedUser in db_dropped_followers.find(): + printAndLog("UserId of a user who lost followers:" + str(unfollowedUser['id'])) + + #unfollowerArray = json.loads(dumps(unfollowedUser['unfollowedUsers'])) + #print unfollowerArray + for uUser in unfollowedUser['unfollowedUsers']: + uJson = json.loads(dumps(uUser)) + printAndLog("Unfollowed User: " + str(uJson['droppedUserName'].encode('utf8')) + "(" + str(uJson['droppedUserId']) + ")") + if len(unfollowedUser['unfollowedUsers']) == 0: + print "no user unfollowed this user" + printAndLog(" ") + + +myLog.close() + + + diff --git a/2_3/2_3_get_unfollowers_after_week.py~ b/2_3/2_3_get_unfollowers_after_week.py~ new file mode 100644 index 0000000..a9a9866 --- /dev/null +++ b/2_3/2_3_get_unfollowers_after_week.py~ @@ -0,0 +1,165 @@ +#Kasane Utsumi - 3/14/2015 +#2_3_get_unfollowers_after_week.py +#This code compares the list of followers for a given user between db_followers and db_followers_after_week and get ids of followers who were no longer following the user after a week. Then it calls Twitter api to retrieve information about each of the followers and stores in the db_dropped_followers collection. In the end it prints out id and name of unfollowed user for each user. + +import pymongo +import tweepy +import signal +from bson.json_util import dumps +import json +import time + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +myLog = open("log.txt","w+") + + +#utility function to print out to console but also keeps same message in the log file (log.txt) +def printAndLog(message): + print message + myLog.write(message + "\n") + + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + printAndLog("Connection failed") + exit() + +#get collections +db_followers = mongoConnection['twitter_analyzer'].db_followers +db_followers_after_week = mongoConnection['twitter_analyzer'].db_followers_after_week + + +if db_followers == "None" or db_followers_after_week == "None": + printAndLog("db_followers or db_followers_after_week not found") + exit() + +#below collections will be used to store dropped users. +#format: {id:id of user who had followers, droppedFollowers: array of user json object of users who unfollowed after a week} +db_dropped_followers = mongoConnection['twitter_analyzer'].db_dropped_followers +#db_dropped_followers.drop() + + + +#twitter setup +consumer_key = "10G4NlBUpM9nusmE9nSoeGQnk" +consumer_secret = "KcH2Ykf253L0tTCuzIyqDUPnkEZ7mZhIiHCYiS84LbZNCsQwRu" +access_token = "2988143343-waN3T7DFy7j0Yn95hDdXOMLpdRfHzG66SnOZlHO" +access_token_secret = "TDd8WId2f7Cw8jDLdPcjJRM5lTlMGYiuLjUl1ped21euS" + +auth=None +api=None + +try: + auth = tweepy.OAuthHandler(consumer_key, consumer_secret) + auth.set_access_token(access_token, access_token_secret) + api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) +except: + printAndLog("twitter setup failed") + exit() + +#this function inserts dropped followers data into db_dropped_follower indexed by user Id of user who lost followers. +def insertInto_db_dropped_followers(uid, droppedUsersFromAPI): + droppedUserArray = [] + for droppedUser in droppedUsersFromAPI: + droppedUserArray.append({"droppedUserId": droppedUser._json['id'],"droppedUserName": droppedUser._json['name']}) + userDBEntry = {"id": uid, "unfollowedUsers" : droppedUserArray} + db_dropped_followers.insert(userDBEntry) + + + +for followerBefore in db_followers.find(timeout=False): + + userId = followerBefore['id'] + + #don't try to fill in db_dropped_followers if it has already been filled + if db_dropped_followers.find({'id':userId}).count() != 0: + continue + + printAndLog("User Id to look for unfollowers is" + str(userId)) + + #only 10 out of 17 users in db_followers will be in db_follower_after_week, so we can only get unfollower information for those users. + followerAfterWk = db_followers_after_week.find_one({'id' : userId}) + if followerAfterWk != None: + + originalFollowerArray = followerBefore['followerIds'] + + afterWeekFollowerArray = followerAfterWk['followerIds'] + + #keep count of droppped follower ids here + droppedFollowerId = [] + + #now find out who is missing from original array + for fid in originalFollowerArray: + if not fid in afterWeekFollowerArray: + droppedFollowerId.append(fid) + + #print droppedFollowerId + printAndLog("Unfollowed User Count is " + str(len(droppedFollowerId))) + + #twitter's look up user api only let you look up 100 users at a time + idLookUpIncrement = 100 + + #temporary array that will be passed to twitter api as a comman delimited list + tempArray = [] + + #array of users resulting from twitter api call + unfollowedUsers = [] + + #fill tempArray until its length is idLookUpIncrement then make twitter user lookup call + for num in droppedFollowerId: + tempArray.append(num) + + if len(tempArray) == idLookUpIncrement: + #print tempArray + #print "now call twitter" + + #ignore when twitter raises exception when NONE of the user Ids passed matches + try: + users = api.lookup_users(user_ids=tempArray) + unfollowedUsers.extend(users) + time.sleep(60) + #put result into db_dropped_followers + except: + print "none of the users matched for this batch. Continuing..." + + tempArray = [] + + if len(tempArray) != 0: #there are users left that still need info from twitter. + #print "call twitter for last time" + #print tempArray + + #ignore when twitter raises exception when NONE of the user Ids passed matches + try: + users = api.lookup_users(user_ids=tempArray) + time.sleep(60) + unfollowedUsers.extend(users) + except: + print "none of the users matched for this batch. Continuing..." + #print users + insertInto_db_dropped_followers(userId, unfollowedUsers) + +#now print out unfollowed user for each user Id +for unfollowedUser in db_dropped_followers.find(): + printAndLog("UserId of a user who lost followers:" + str(unfollowedUser['id'])) + + #unfollowerArray = json.loads(dumps(unfollowedUser['unfollowedUsers'])) + #print unfollowerArray + for uUser in unfollowedUser['unfollowedUsers']: + uJson = json.loads(dumps(uUser)) + printAndLog("Unfollowed User: " + str(uJson['droppedUserName'].encode('utf8')) + "(" + str(uJson['droppedUserId']) + ")") + if len(unfollowedUser['unfollowedUsers']) == 0: + print "no user unfollowed this user" + printAndLog(" ") + + +myLog.close() + + + diff --git a/2_3/Log_unfollowed_user_list.txt b/2_3/Log_unfollowed_user_list.txt new file mode 100644 index 0000000..8b07894 --- /dev/null +++ b/2_3/Log_unfollowed_user_list.txt @@ -0,0 +1,555 @@ +UserId of a user who lost followers:160549126 +Unfollowed User: Social N New York(459534627) +Unfollowed User: The IBM MSP Team(2650996669) +Unfollowed User: HiGH Software B.V.(274408924) + +UserId of a user who lost followers:2809878539 +Unfollowed User: Jarmo Nikula(349576314) + +UserId of a user who lost followers:3049567425 +Unfollowed User: katzee443(271212128) +Unfollowed User: Hedvig Snow(1964281615) +Unfollowed User: Galaxy Xtra(2353090658) + +UserId of a user who lost followers:83316868 +Unfollowed User: Travellers Guide(253021912) +Unfollowed User: CDECO and co(404303487) +Unfollowed User: Laconic Music Group(2291679090) +Unfollowed User: Lori H. Schwartz(5749642) +Unfollowed User: ESS(2835076415) +Unfollowed User: Best quotes & vines (310353282) +Unfollowed User: Laila Shhh(105110980) +Unfollowed User: Shea Salvaza(230240409) +Unfollowed User: ers0be' m(2149515415) +Unfollowed User: Arturyang(1875991381) +Unfollowed User: Армен Гукасян(2402718120) +Unfollowed User: Emily McNeal(2839675323) +Unfollowed User: umut yilmaz(3031673930) +Unfollowed User: Настя Образцова(1387469299) +Unfollowed User: Jessy Shackleton(354104984) +Unfollowed User: Directive Consulting(2778870025) +Unfollowed User: Arrival Mobile(86616219) +Unfollowed User: Air Shepherd(2871014888) +Unfollowed User: Михаил Зак(814512962) +Unfollowed User: Дмитрий Юров(841944127) +Unfollowed User: Stanislav(1719417510) +Unfollowed User: Vahap Gül(2733829693) +Unfollowed User: TravelZIN(170766820) +Unfollowed User: Pascual Reyes(355847466) +Unfollowed User: BAOW(593031266) +Unfollowed User: Nick(2450648918) +Unfollowed User: juneil carreon(116408601) +Unfollowed User: Dhirendra Singh(86920217) +Unfollowed User: Lauren(387285226) +Unfollowed User: Steve Spencer(24833655) +Unfollowed User: Marin(366164941) +Unfollowed User: Kim Kirkland(1212443232) +Unfollowed User: HipHop International(61373007) +Unfollowed User: Shaun DaCunha(17346323) +Unfollowed User: Women Linked(1320939613) + +UserId of a user who lost followers:15497093 +Unfollowed User: Human Rights(2985041418) +Unfollowed User: Skating AGENT(2566749678) +Unfollowed User: TheRealAbbyNormal(76952859) +Unfollowed User: Salazar(2178339595) + +UserId of a user who lost followers:2206092889 + +UserId of a user who lost followers:2837996969 + +UserId of a user who lost followers:209811713 +Unfollowed User: Frikinux(2231258106) +Unfollowed User: Eterio Herrera(27738385) +Unfollowed User: Levyx, Inc.(2990225424) +Unfollowed User: Carl(66441197) +Unfollowed User: Spider Perro!(2873199124) +Unfollowed User: Anaïs Gaudreau(3054455279) +Unfollowed User: Jon Christianson(24479107) +Unfollowed User: David Robertson(2209330070) +Unfollowed User: Noora(2799460989) +Unfollowed User: Roman Turna(244009691) +Unfollowed User: MentorMate(31162892) +Unfollowed User: Monicaaa(3067660591) +Unfollowed User: hannibal where(3003149501) +Unfollowed User: Asmawee Musor(3065738762) +Unfollowed User: Reanna Charissa(3035678508) +Unfollowed User: elida Alonso(1692227370) +Unfollowed User: Secret (2610387187) +Unfollowed User: Miranda Kelly(2200155278) +Unfollowed User: NaijaNelboy(124062779) +Unfollowed User: HeliosOmega(2315416465) +Unfollowed User: forza itti(1281665102) +Unfollowed User: XYZ(2246582162) +Unfollowed User: EMPERYmusic(1368959768) +Unfollowed User: Retailings(2154664524) +Unfollowed User: Donna Fishkin(246103799) +Unfollowed User: Vijeta Uniyal(1075971498) +Unfollowed User: Cherrie Sussanne(2767619807) +Unfollowed User: David @ PhosGraphe(2430257299) +Unfollowed User: Metro Celebration(2711893256) +Unfollowed User: Khaxe Studios(2604579602) +Unfollowed User: ♨ℜⒶ⊂Hiℵɠ∃R♨(29620023) +Unfollowed User: CryptoCoinsNews(1856523530) +Unfollowed User: Felipe Lara & Cía.(2740213361) +Unfollowed User: Michael Don Okonjo(48263163) +Unfollowed User: markITwrite Tech(479758700) +Unfollowed User: Desire(3041899130) +Unfollowed User: Lemon PVP(742357573) +Unfollowed User: NK SoftWeb Techno(3023326434) +Unfollowed User: SANS EMEA(28075747) +Unfollowed User: Ian Randall(1560499536) +Unfollowed User: #OpReform(2934668789) +Unfollowed User: liz flanagan(110575482) +Unfollowed User: Agostino Finamore(581872739) +Unfollowed User: David Demko(612693490) +Unfollowed User: Fullpro Surveillance(3049741025) +Unfollowed User: swissleak(3049821419) +Unfollowed User: Mubaraka Sani(2975071103) +Unfollowed User: NORTHERNLINE RECORDS(192404724) +Unfollowed User: SaveMoney.es (FR)(3014629906) +Unfollowed User: Theo Caber(24758523) +Unfollowed User: Vony Karundeng(2252640529) +Unfollowed User: Ahsan Bashir(37622222) +Unfollowed User: John Martinez(2991824070) +Unfollowed User: HCG Ahmedabad(2965666510) +Unfollowed User: wonderfullife929(1609878848) +Unfollowed User: Ioan Cojocaru(112102345) +Unfollowed User: Sean Gillespie(809315036) +Unfollowed User: winacarcalgary(2244137136) +Unfollowed User: MyTipsHub(2407288675) +Unfollowed User: ⊕⊗(2940522183) +Unfollowed User: cornucopia(1070268745) +Unfollowed User: Jacques Schuhmacher(14354516) +Unfollowed User: Tom Lopy(138874315) +Unfollowed User: tariq hossein(2566032186) +Unfollowed User: Alptekin Yılmaz(73891804) +Unfollowed User: Arnav Garg(2959525949) +Unfollowed User: تخم مرغ شانسی نارنجی(1080036716) +Unfollowed User: Johan Akerberg(235129342) +Unfollowed User: Ian Adams(410403616) +Unfollowed User: Desmond lamar(1599513481) +Unfollowed User: rassyhainane(2938095460) +Unfollowed User: Alex Lopez(243459444) +Unfollowed User: Piotr Chodan(1115351576) +Unfollowed User: أحمد محمد محمد مشالي(2474481619) +Unfollowed User: Behzad Aghili(2657304769) +Unfollowed User: Tamil Mani(2840232841) +Unfollowed User: Kirk(199735301) +Unfollowed User: ⋉͠₡͜h͠₹͜ī͠₴͜⋊͠(2386728087) +Unfollowed User: Island Info Samui(61156035) +Unfollowed User: Kyle Grant(1700198089) +Unfollowed User: QR➰(966845676) +Unfollowed User: mayur jain(96754611) +Unfollowed User: Nirror Live Support(2355074737) +Unfollowed User: P4nT3sSter(2815036709) +Unfollowed User: harshit ratnoo(1638336416) +Unfollowed User: Erik de Boer(450534709) +Unfollowed User: Golden Triangle Tour(397913689) +Unfollowed User: Andrii(270032963) +Unfollowed User: Jrb(514704784) +Unfollowed User: ScopeControl(244211210) +Unfollowed User: Blake Price(109893746) +Unfollowed User: shemulsikder(1039172574) +Unfollowed User: Jox UmL(2547922621) +Unfollowed User: Sunny Kabadi Sanuj(797879029) +Unfollowed User: Gabor Juhasz(226700786) +Unfollowed User: Taino Consulting(573969640) +Unfollowed User: Paola Sipione(850983474) +Unfollowed User: DSosialita(142150133) +Unfollowed User: Bleed Blue(1011813614) +Unfollowed User: Countercepts(2389559678) +Unfollowed User: The Reclaimer(2188814654) +Unfollowed User: Arif Faisal(390675160) +Unfollowed User: Dali(22361210) +Unfollowed User: Camila Qariwarmi(16145570) +Unfollowed User: Arpit Chauhan Ⓥ(120093004) +Unfollowed User: inspcv00(283043799) +Unfollowed User: twettoo (505246572) +Unfollowed User: Anna Mariano(70121186) +Unfollowed User: JamieDaBoi(2241481140) +Unfollowed User: metin daldal(167523772) +Unfollowed User: Denis Anderson(105006629) +Unfollowed User: Dan Budanov(1957745388) +Unfollowed User: Matt Skipps(2229859557) +Unfollowed User: Murat KAÇAL(262811240) +Unfollowed User: Tom Lorek(14062210) +Unfollowed User: Julian Smith(1856486670) +Unfollowed User: Nora(100950073) +Unfollowed User: Juan Carlos Díaz(60677994) +Unfollowed User: Xerlox(1926561691) +Unfollowed User: ♥ キティ ♥(1918038769) +Unfollowed User: OSX L-I-AM(271862947) +Unfollowed User: Ekahau Itah Akamai(567303670) +Unfollowed User: Andrew music lover ♥(304448202) +Unfollowed User: Crimson Agents(1732466958) +Unfollowed User: Lukas Pfeiffer(29717423) +Unfollowed User: Job(1244288564) +Unfollowed User: Jøseph(570330748) +Unfollowed User: Ece Uyanık(277284599) +Unfollowed User: °Syed [B]ilal Shah(112761744) +Unfollowed User: Master of Disaster(929820896) +Unfollowed User: tony.haddad97(1354641972) +Unfollowed User: SYLLA HAMED OFFICIEL(141950700) +Unfollowed User: BERΔT DURΔKU(350019795) +Unfollowed User: Asun ن Α † Ω (420269829) +Unfollowed User: Selva Orejón Lozano(26743035) +Unfollowed User: SeaSK(52157425) +Unfollowed User: Rifky(46064665) +Unfollowed User: Pedro Gerardo Garza(113068864) +Unfollowed User: Justin Collins(23978537) +Unfollowed User: Adyacente(551413932) +Unfollowed User: James Brett(17617061) +Unfollowed User: KPath(325058992) +Unfollowed User: Tom Graham(34972175) +Unfollowed User: Le Rêveur (218594545) +Unfollowed User: Lila Smith(212117896) +Unfollowed User: NeoCertified(348569768) +Unfollowed User: Lojepi(221103248) +Unfollowed User: Andrew Morris(379532928) +Unfollowed User: Natalie(194724011) +Unfollowed User: Matthew Leshko(23674806) +Unfollowed User: 100TV BRAZIL (55335854) +Unfollowed User: Samar(48755417) + +UserId of a user who lost followers:17000457 +Unfollowed User: E V E R L I E ♥ ✌(1923395192) +Unfollowed User: Sulist Austin(74911507) +Unfollowed User: Hussein Fathy (360828055) +Unfollowed User: みずきん(3050536002) +Unfollowed User: Rucelio silva torres(3079336433) +Unfollowed User: Vedamanikkam(318335981) +Unfollowed User: PracticalBoss88(3081643420) +Unfollowed User: Landi Cordier(1259016018) +Unfollowed User: Brian S(1960998554) +Unfollowed User: Elena Kennedy(133443632) +Unfollowed User: wOrldconnected(2294261666) +Unfollowed User: Lucas Hamet(2855645196) +Unfollowed User: Freddy Jennings(2947268302) +Unfollowed User: Ajay Pandey(1717809529) +Unfollowed User: Rebeka God Ceinture(2778145826) +Unfollowed User: Dion(2407925175) +Unfollowed User: Jess(2907909412) +Unfollowed User: BMWLA CHIKC.COM(2816105040) +Unfollowed User: Mirza Faruk(3064846452) +Unfollowed User: こーへー(2383745436) +Unfollowed User: Sharon Willis(388666561) +Unfollowed User: Rob Hollandsworth(82546930) +Unfollowed User: Join-IT(1964875627) +Unfollowed User: Onur kucik(2862886748) +Unfollowed User: facundo montellano(2368943161) +Unfollowed User: Precha Kleabvaree(194010410) +Unfollowed User: C-ZO(3041759603) +Unfollowed User: Кроссовок(3026194179) +Unfollowed User: Di(34663615) +Unfollowed User: claudio corona(2820772376) +Unfollowed User: Chris G(84214345) +Unfollowed User: porno(3068146713) +Unfollowed User: ALLAN SIFUNA(1018291730) +Unfollowed User: scimplified(2419473368) +Unfollowed User: J.Deezy(2966854790) +Unfollowed User: ashraf shalapy(1623298544) +Unfollowed User: Gilberto M Souza(488870645) +Unfollowed User: Sumeet Kale(129479378) +Unfollowed User: Bert Stomphorst(3041309109) +Unfollowed User: vKim(1530942452) +Unfollowed User: Diyettim(2651781356) +Unfollowed User: karthik swarna(44992637) +Unfollowed User: Alsson(3050625815) +Unfollowed User: عبدالله العمران (273594784) +Unfollowed User: Sabin Pokharel(322550724) +Unfollowed User: Y.E.S.(3023538468) +Unfollowed User: Eric Burgess(63607386) +Unfollowed User: Justin Campbell(40861298) +Unfollowed User: bug(233086804) +Unfollowed User: Manu Horrillo(2985924365) +Unfollowed User: athanasius kontos(3019670237) +Unfollowed User: Mesut ÖZDEMİR(1352367536) +Unfollowed User: Paulo Nascimento(281854563) +Unfollowed User: angel lopez(523754771) +Unfollowed User: Omar Agan(2949131039) +Unfollowed User: delanakeasler@aol.co(2945918744) +Unfollowed User: Amanda_Styles'(2231796596) +Unfollowed User: Ravi Patel(586020510) +Unfollowed User: Save My Mother(2558456214) +Unfollowed User: ERCULES RODRIGUES (170155706) +Unfollowed User: Graham Bless(3021705826) +Unfollowed User: KleinMalondaツ(2959585158) +Unfollowed User: Jhoy S-L(176326872) +Unfollowed User: Fábio Miguel (2414109709) +Unfollowed User: hadeelmeshal,j,m(2451468390) +Unfollowed User: Voltage Energy Drink(2994025779) +Unfollowed User: RecoveryNations(549055848) +Unfollowed User: Garydean(2996111209) +Unfollowed User: Christine Ford(2605371381) +Unfollowed User: The Workshop(2881264240) +Unfollowed User: jezequel jocelyne(2921459831) +Unfollowed User: Вячеслав Клевченя(22118774) +Unfollowed User: lishiibeth (2193501497) +Unfollowed User: Marco Foellmer(10183162) +Unfollowed User: John Schwartz(17001516) +Unfollowed User: mohammad bulbul(2320470758) +Unfollowed User: Radinzainuddin(2829764786) +Unfollowed User: rebecca roby(112397877) +Unfollowed User: GZ Dixital(2431201940) +Unfollowed User: K&B Accountancy(1245729884) +Unfollowed User: ActionApplayer(2922189382) +Unfollowed User: ابو جليبيب الجزائري(2910061054) +Unfollowed User: Klaus Gasser(2734370672) +Unfollowed User: Andrew Hoffman(1245226386) +Unfollowed User: ENG_ALaa(2865373353) +Unfollowed User: ATG-IT(285788798) +Unfollowed User: jon cerboni(129847712) +Unfollowed User: Karliga95(1600832299) +Unfollowed User: Giharto(443889694) +Unfollowed User: mobidart(1281573236) +Unfollowed User: jennifer k. riggins(16789906) +Unfollowed User: Bro Golf Association(958459220) +Unfollowed User: Ucoatl(572228832) +Unfollowed User: Chris Martin(2226602077) +Unfollowed User: Funclobile, Inc.(2720465883) +Unfollowed User: Jonatan Glad(974567797) +Unfollowed User: Dmitry(22992948) +Unfollowed User: acfal2014(2879456148) +Unfollowed User: Bianca Giordanni(2790173147) +Unfollowed User: ADNAN KÖROĞLU(2780801377) +Unfollowed User: Ssnk(1968086060) +Unfollowed User: IT SAILOR LOUISE M.(1567917614) +Unfollowed User: Luisa_Noob(1888333032) +Unfollowed User: प्रशांत Anand(1443107148) +Unfollowed User: jose chacoa(345943397) +Unfollowed User: salvachillo(2617717032) +Unfollowed User: Anand Kumar (2706949812) +Unfollowed User: kardelen toptaş(2552294689) +Unfollowed User: #มินโฮ™ บริหาร(2830023242) +Unfollowed User: Nirror Live Support(2355074737) +Unfollowed User: TmoneyMoe(2472919181) +Unfollowed User: Pirdaus(2743944460) +Unfollowed User: 姚(17184892) +Unfollowed User: CLOUDSSKY(2228811793) +Unfollowed User: Timuçin Tuncer(157271443) +Unfollowed User: Rainbow Leather(318539182) +Unfollowed User: PALANI C(1136583834) +Unfollowed User: dgit59214(1025949319) +Unfollowed User: Carlo Kalkman(37168242) +Unfollowed User: Palmyra(1092467156) +Unfollowed User: AhmetMikailBayındır(2723464097) +Unfollowed User: SULTAN ALDERAAN(523212428) +Unfollowed User: Roman Takáč(893184728) +Unfollowed User: lowell(1046646572) +Unfollowed User: Robert Mills(915210439) +Unfollowed User: destiny breanna (1385839380) +Unfollowed User: antony fernando(1406964122) +Unfollowed User: وليد صديق(792906062) +Unfollowed User: Mosaic(320822446) +Unfollowed User: Coffee For Mom(90978424) +Unfollowed User: juan testa(2285526554) +Unfollowed User:  Kamal Hussain.(102437243) +Unfollowed User: Chip Pickering(2364631970) +Unfollowed User: حفيد الحاج حمدي (611008267) +Unfollowed User: JUANS MARCELO(353841912) +Unfollowed User: Nino(435165791) +Unfollowed User: RealSports(103982695) +Unfollowed User: Jonathan Young(954479713) +Unfollowed User: Balinda Duncan(23683933) +Unfollowed User: dario armstrong(1532082516) +Unfollowed User: Kevin Styles(1533577314) +Unfollowed User: Elvis Cedeño(1592116556) +Unfollowed User: CAMILA...(2310732501) +Unfollowed User: Leonid Sukhoykov(1600568161) +Unfollowed User: François Jacques(25578617) +Unfollowed User: Andrej Heržo(2405191056) +Unfollowed User: Brian Stronach(39354056) +Unfollowed User: Ɣ!ƇƬƠƦ(2206021859) +Unfollowed User: will asrari(8865002) +Unfollowed User: shivaraman(2399316726) +Unfollowed User: Martin Halberg(31098484) +Unfollowed User: Nirali Savla(167718461) +Unfollowed User: RahMi ince(1018074121) +Unfollowed User: amanda(413744294) +Unfollowed User: Thiago Alves Araújo(393104500) +Unfollowed User: Howl Jenkins(248718534) +Unfollowed User: Mayur S. Raja(367048029) +Unfollowed User: Ángel Torres (457822183) +Unfollowed User: Enrique Cabrejas(497999615) +Unfollowed User: snappub(2263956920) +Unfollowed User: Абай Сайлауов(379694797) +Unfollowed User: Irfan Junaid(742936512) +Unfollowed User: Giorgi Nizharadze(571155939) +Unfollowed User: ᅠ(1591814690) +Unfollowed User: Julian Smith(1856486670) +Unfollowed User: 楊建隆(1958563578) +Unfollowed User: 黒井日夏(354433708) +Unfollowed User: V Cerny(1539200660) +Unfollowed User: Oğuzhan ERKAN(1110995198) +Unfollowed User: Ayu alfaniyah(1692915997) +Unfollowed User: Tomáš Dundáček(66182112) +Unfollowed User: Aravind Ravi A R(932158489) +Unfollowed User: EC(1395914767) +Unfollowed User: nikhil tom(175483184) +Unfollowed User: Hicham EL HABTI(99569572) +Unfollowed User: ♥ a pequenina ♥(1498522069) +Unfollowed User: Brian Borja(308752922) +Unfollowed User: Nubbler.com(563636944) +Unfollowed User: Anara shyn(1675244010) +Unfollowed User: The Sheriff(606530683) +Unfollowed User: إسماعيل #صانع_قرار(1019354857) +Unfollowed User: Frentona sin remedio(1581319376) +Unfollowed User: Kyle Davis(437209567) +Unfollowed User: Anthony Longoria(823398096) +Unfollowed User: Edison Group, Inc.(1473907148) +Unfollowed User: Will Slade(32003485) +Unfollowed User: BRO 1961(261990528) +Unfollowed User: SYLLA HAMED OFFICIEL(141950700) +Unfollowed User: Joe Ouimet(17238128) +Unfollowed User: iLikeSharePoint.cz(1209475818) +Unfollowed User: Marta(899597132) +Unfollowed User: karan Bisht(815429676) +Unfollowed User: ole(881808679) +Unfollowed User: Ravi Petlur(174063492) +Unfollowed User: Arun Vijayarengan(19434280) +Unfollowed User: Jairo López(245988657) +Unfollowed User: Rodger Oates(180952714) +Unfollowed User: Nam Yong-taek(271399222) +Unfollowed User: SingleHop(14789540) +Unfollowed User: John Patrick Hogan(313260006) +Unfollowed User: Ricardo Medina(47125047) +Unfollowed User: Julio César Guzmán (71332518) +Unfollowed User: Kawtar Bk' ☆(254121439) + +UserId of a user who lost followers:152585619 +Unfollowed User: suchan(2957077746) +Unfollowed User: 有吉★情報館(2176584007) +Unfollowed User: ★役に立つiPhoneニュース★(2176580467) +Unfollowed User: モロアへ二次エロ画像(3029503114) +Unfollowed User: 500ml でん(3037921931) +Unfollowed User: 小麦穀物焼きつを脱、んとおなってしもたw(1556003364) +Unfollowed User: Wesley Moore(59896304) +Unfollowed User: やすのぶ(213040839) +Unfollowed User: 亜樹野(2920580785) +Unfollowed User: Ogbole Kingsley A(1610481902) +Unfollowed User: 大庭 鉄尋(2874544056) +Unfollowed User: 紺藤ココン(2280783835) +Unfollowed User: 本田がスクデットへ導く@相互フォロー募集(3004499750) +Unfollowed User: ネオニート月収120万(2911016510) +Unfollowed User: 西野カナ共感歌詞bot(2176846784) +Unfollowed User: 合同会社ニクール(1206766933) +Unfollowed User: みきにゃん(2587745660) +Unfollowed User: どこかで誰かのつぶやき(1628996102) +Unfollowed User: G(1939530175) +Unfollowed User: 西野カナ☆ファン大集合!!(3012352910) +Unfollowed User: ネオニート月収120万(2908720453) +Unfollowed User: あやぽ(3045662118) +Unfollowed User: のび(2829141541) +Unfollowed User: DISCOTECA ÁREA 33(2314193748) +Unfollowed User: 菜摘(3042384076) +Unfollowed User: いおきべ いろり(3026893052) +Unfollowed User: gurepon 公式アカウント(1266046542) +Unfollowed User: 相互フォロー100%【フォロワー増やし】(3017801538) +Unfollowed User: 春雄(3030948183) +Unfollowed User: えろみく@エロ垢(2963877286) +Unfollowed User: 菊痴(2522049246) +Unfollowed User: みすずちん(3031408071) +Unfollowed User: riko(2914763353) +Unfollowed User: Sevilla Eventos(1358052368) +Unfollowed User: 瑠璃(2962603490) +Unfollowed User: 鬼猫背(146499534) +Unfollowed User: なぎさ♡うら(3037921322) +Unfollowed User: 龍ヶ嬢 七々々(2243726948) +Unfollowed User: Sena@チョコボ鯖(2921075125) +Unfollowed User: 貴之(1350900638) +Unfollowed User: marine(2961104811) +Unfollowed User: 高校数学bot(1629037340) +Unfollowed User: くろしろ(3011277678) +Unfollowed User: KAT-TUN歌詞bot(1901507917) +Unfollowed User: Rugrén ®(105256817) +Unfollowed User: QMA 地理検定(1629010758) +Unfollowed User: foon(552814434) +Unfollowed User: Philippe Lewin(118345831) +Unfollowed User: 宇多田ヒカル心に響く歌詞(2176826358) +Unfollowed User: Prod.Universo(2687941489) +Unfollowed User: ❦ℑuℜi₭i❦(2979539804) +Unfollowed User: 相互フォローしましょ!(3048349909) +Unfollowed User: 유노かぷり(2337709364) +Unfollowed User: オンラインカジノ紹介(2939046668) +Unfollowed User: 犬夜叉集大成bot(1901435784) +Unfollowed User: Плюс в Карму(1962037350) +Unfollowed User: 酢いか(577671387) +Unfollowed User: Michitake KAWASAKI(25452928) +Unfollowed User: ともー氏(CV:堀江由衣)(2636373402) +Unfollowed User: ラピュタ名言BOT(1901432406) +Unfollowed User: BLEACH大好きBOT(1901497746) +Unfollowed User: 山下良成(2989169674) +Unfollowed User: 松本晴彦(2923515180) +Unfollowed User: SETI(1443513384) +Unfollowed User: KEN-g(2565529106) +Unfollowed User: ✒ ANONYMOUS(1639243381) +Unfollowed User: ウエノ真里(2831295441) +Unfollowed User: boshi(2610027968) +Unfollowed User: mYamaguchi(2291176962) +Unfollowed User: 刺身(小林秀樹)(594631814) +Unfollowed User: جزل القصيد(2657674784) +Unfollowed User: 大きな乳グラビア(2971155079) +Unfollowed User: عبووود (2791707943) +Unfollowed User: ゔぇくた(2669263884) +Unfollowed User: takeru(2853201666) +Unfollowed User: 栄作/ユリア(3006335689) +Unfollowed User: Louis Vuitton♡大好き(2984078167) +Unfollowed User: tanke(2765815117) +Unfollowed User: !ジャイアンツ!(2417214913) +Unfollowed User: ガルガル(1566042902) +Unfollowed User: G V N D V(1683476208) +Unfollowed User: Kyoka@えいとん(2764769754) +Unfollowed User: Caramel (2840934913) +Unfollowed User: Astral Cae?(2800612847) +Unfollowed User: ひろし(2826090229) +Unfollowed User: 出水 雄大(2797878103) +Unfollowed User: stratoteletor(2601554502) +Unfollowed User: johnnybgoode(2481606204) +Unfollowed User: TripleEvolution(2178222272) +Unfollowed User: 旭日@復活(2528255671) +Unfollowed User: リク(1432197878) +Unfollowed User: ナノこめ(1406461268) +Unfollowed User: 楓きのこ(2417113508) +Unfollowed User: 平塚零士(2475353695) +Unfollowed User: 文房具博士(2432830279) +Unfollowed User: けんけん(1617373302) +Unfollowed User: かねちん(2464308397) +Unfollowed User: Techno Mango(2386755404) +Unfollowed User: lichang(2362375532) +Unfollowed User: でかいひと(192CM)(89205386) +Unfollowed User: 谷口英俊(399113027) +Unfollowed User: A.N.Jell(1246042705) +Unfollowed User: 濱田 潤 (Jun Hamada)(1369472161) +Unfollowed User: *藍月*@Pileさん♡提督(1527746246) +Unfollowed User: 揚げ(1221691316) +Unfollowed User: 三原健嗣(起業の基盤構築の専門家)(1277689890) +Unfollowed User: あるふぉーと(18894688) +Unfollowed User: すのん(265392850) +Unfollowed User: さんぺい動物病院(159426114) +Unfollowed User: 山口真我(1288255651) +Unfollowed User: ちかあ(1515936782) +Unfollowed User: ビスコ(1144265682) +Unfollowed User: paipuisu(1368843631) +Unfollowed User: Thay (363220958) +Unfollowed User: ルクア(1259502127) +Unfollowed User: puput(1360657616) +Unfollowed User: Vicky(800901978) +Unfollowed User: 심재현(961619815) +Unfollowed User: ハクマン(168690488) +Unfollowed User: べいやん@動画投稿系(1334526804) +Unfollowed User: monster cat(1053155418) +Unfollowed User: Nov(1366433947) +Unfollowed User: タカちゃんは裏垢(1394845789) +Unfollowed User: ももももももとん。(337101552) +Unfollowed User: 八王子に住んでます(1251787716) +Unfollowed User: 一音(133253682) +Unfollowed User: marumomix(128505728) +Unfollowed User: なりわい@真戀z(769533300) +Unfollowed User: shiro☆(244721634) +Unfollowed User: す~ちゃんのお母さん【心言言葉】(619122560) +Unfollowed User: DeeJay Ayato.N/F1垢(152927007) +Unfollowed User: オグ (158561933) +Unfollowed User: tmisaki(81554399) +Unfollowed User: 河津一郎(10438862) + diff --git a/2_3/log.txt~ b/2_3/log.txt~ new file mode 100644 index 0000000..abbc581 --- /dev/null +++ b/2_3/log.txt~ @@ -0,0 +1,560 @@ +UserId of a user who lost followers:160549126 +Unfollowed User: Social N New York(459534627) +Unfollowed User: The IBM MSP Team(2650996669) +Unfollowed User: HiGH Software B.V.(274408924) + +UserId of a user who lost followers:2809878539 +Unfollowed User: Jarmo Nikula(349576314) + +UserId of a user who lost followers:3049567425 +Unfollowed User: katzee443(271212128) +Unfollowed User: Hedvig Snow(1964281615) +Unfollowed User: Galaxy Xtra(2353090658) + +UserId of a user who lost followers:83316868 +Unfollowed User: Travellers Guide(253021912) +Unfollowed User: CDECO and co(404303487) +Unfollowed User: Laconic Music Group(2291679090) +Unfollowed User: Lori H. Schwartz(5749642) +Unfollowed User: ESS(2835076415) +Unfollowed User: Best quotes & vines (310353282) +Unfollowed User: Laila Shhh(105110980) +Unfollowed User: Shea Salvaza(230240409) +Unfollowed User: ers0be' m(2149515415) +Unfollowed User: Arturyang(1875991381) +Unfollowed User: Армен Гукасян(2402718120) +Unfollowed User: Emily McNeal(2839675323) +Unfollowed User: umut yilmaz(3031673930) +Unfollowed User: Настя Образцова(1387469299) +Unfollowed User: Jessy Shackleton(354104984) +Unfollowed User: Directive Consulting(2778870025) +Unfollowed User: Arrival Mobile(86616219) +Unfollowed User: Air Shepherd(2871014888) +Unfollowed User: Михаил Зак(814512962) +Unfollowed User: Дмитрий Юров(841944127) +Unfollowed User: Stanislav(1719417510) +Unfollowed User: Vahap Gül(2733829693) +Unfollowed User: TravelZIN(170766820) +Unfollowed User: Pascual Reyes(355847466) +Unfollowed User: BAOW(593031266) +Unfollowed User: Nick(2450648918) +Unfollowed User: juneil carreon(116408601) +Unfollowed User: Dhirendra Singh(86920217) +Unfollowed User: Lauren(387285226) +Unfollowed User: Steve Spencer(24833655) +Unfollowed User: Marin(366164941) +Unfollowed User: Kim Kirkland(1212443232) +Unfollowed User: HipHop International(61373007) +Unfollowed User: Shaun DaCunha(17346323) +Unfollowed User: Women Linked(1320939613) + +UserId of a user who lost followers:15497093 +Unfollowed User: Human Rights(2985041418) +Unfollowed User: Skating AGENT(2566749678) +Unfollowed User: TheRealAbbyNormal(76952859) +Unfollowed User: Salazar(2178339595) + +UserId of a user who lost followers:160549126 +Unfollowed User: Social N New York(459534627) +Unfollowed User: The IBM MSP Team(2650996669) +Unfollowed User: HiGH Software B.V.(274408924) + +UserId of a user who lost followers:2206092889 + +UserId of a user who lost followers:2837996969 + +UserId of a user who lost followers:209811713 +Unfollowed User: Frikinux(2231258106) +Unfollowed User: Eterio Herrera(27738385) +Unfollowed User: Levyx, Inc.(2990225424) +Unfollowed User: Carl(66441197) +Unfollowed User: Spider Perro!(2873199124) +Unfollowed User: Anaïs Gaudreau(3054455279) +Unfollowed User: Jon Christianson(24479107) +Unfollowed User: David Robertson(2209330070) +Unfollowed User: Noora(2799460989) +Unfollowed User: Roman Turna(244009691) +Unfollowed User: MentorMate(31162892) +Unfollowed User: Monicaaa(3067660591) +Unfollowed User: hannibal where(3003149501) +Unfollowed User: Asmawee Musor(3065738762) +Unfollowed User: Reanna Charissa(3035678508) +Unfollowed User: elida Alonso(1692227370) +Unfollowed User: Secret (2610387187) +Unfollowed User: Miranda Kelly(2200155278) +Unfollowed User: NaijaNelboy(124062779) +Unfollowed User: HeliosOmega(2315416465) +Unfollowed User: forza itti(1281665102) +Unfollowed User: XYZ(2246582162) +Unfollowed User: EMPERYmusic(1368959768) +Unfollowed User: Retailings(2154664524) +Unfollowed User: Donna Fishkin(246103799) +Unfollowed User: Vijeta Uniyal(1075971498) +Unfollowed User: Cherrie Sussanne(2767619807) +Unfollowed User: David @ PhosGraphe(2430257299) +Unfollowed User: Metro Celebration(2711893256) +Unfollowed User: Khaxe Studios(2604579602) +Unfollowed User: ♨ℜⒶ⊂Hiℵɠ∃R♨(29620023) +Unfollowed User: CryptoCoinsNews(1856523530) +Unfollowed User: Felipe Lara & Cía.(2740213361) +Unfollowed User: Michael Don Okonjo(48263163) +Unfollowed User: markITwrite Tech(479758700) +Unfollowed User: Desire(3041899130) +Unfollowed User: Lemon PVP(742357573) +Unfollowed User: NK SoftWeb Techno(3023326434) +Unfollowed User: SANS EMEA(28075747) +Unfollowed User: Ian Randall(1560499536) +Unfollowed User: #OpReform(2934668789) +Unfollowed User: liz flanagan(110575482) +Unfollowed User: Agostino Finamore(581872739) +Unfollowed User: David Demko(612693490) +Unfollowed User: Fullpro Surveillance(3049741025) +Unfollowed User: swissleak(3049821419) +Unfollowed User: Mubaraka Sani(2975071103) +Unfollowed User: NORTHERNLINE RECORDS(192404724) +Unfollowed User: SaveMoney.es (FR)(3014629906) +Unfollowed User: Theo Caber(24758523) +Unfollowed User: Vony Karundeng(2252640529) +Unfollowed User: Ahsan Bashir(37622222) +Unfollowed User: John Martinez(2991824070) +Unfollowed User: HCG Ahmedabad(2965666510) +Unfollowed User: wonderfullife929(1609878848) +Unfollowed User: Ioan Cojocaru(112102345) +Unfollowed User: Sean Gillespie(809315036) +Unfollowed User: winacarcalgary(2244137136) +Unfollowed User: MyTipsHub(2407288675) +Unfollowed User: ⊕⊗(2940522183) +Unfollowed User: cornucopia(1070268745) +Unfollowed User: Jacques Schuhmacher(14354516) +Unfollowed User: Tom Lopy(138874315) +Unfollowed User: tariq hossein(2566032186) +Unfollowed User: Alptekin Yılmaz(73891804) +Unfollowed User: Arnav Garg(2959525949) +Unfollowed User: تخم مرغ شانسی نارنجی(1080036716) +Unfollowed User: Johan Akerberg(235129342) +Unfollowed User: Ian Adams(410403616) +Unfollowed User: Desmond lamar(1599513481) +Unfollowed User: rassyhainane(2938095460) +Unfollowed User: Alex Lopez(243459444) +Unfollowed User: Piotr Chodan(1115351576) +Unfollowed User: أحمد محمد محمد مشالي(2474481619) +Unfollowed User: Behzad Aghili(2657304769) +Unfollowed User: Tamil Mani(2840232841) +Unfollowed User: Kirk(199735301) +Unfollowed User: ⋉͠₡͜h͠₹͜ī͠₴͜⋊͠(2386728087) +Unfollowed User: Island Info Samui(61156035) +Unfollowed User: Kyle Grant(1700198089) +Unfollowed User: QR➰(966845676) +Unfollowed User: mayur jain(96754611) +Unfollowed User: Nirror Live Support(2355074737) +Unfollowed User: P4nT3sSter(2815036709) +Unfollowed User: harshit ratnoo(1638336416) +Unfollowed User: Erik de Boer(450534709) +Unfollowed User: Golden Triangle Tour(397913689) +Unfollowed User: Andrii(270032963) +Unfollowed User: Jrb(514704784) +Unfollowed User: ScopeControl(244211210) +Unfollowed User: Blake Price(109893746) +Unfollowed User: shemulsikder(1039172574) +Unfollowed User: Jox UmL(2547922621) +Unfollowed User: Sunny Kabadi Sanuj(797879029) +Unfollowed User: Gabor Juhasz(226700786) +Unfollowed User: Taino Consulting(573969640) +Unfollowed User: Paola Sipione(850983474) +Unfollowed User: DSosialita(142150133) +Unfollowed User: Bleed Blue(1011813614) +Unfollowed User: Countercepts(2389559678) +Unfollowed User: The Reclaimer(2188814654) +Unfollowed User: Arif Faisal(390675160) +Unfollowed User: Dali(22361210) +Unfollowed User: Camila Qariwarmi(16145570) +Unfollowed User: Arpit Chauhan Ⓥ(120093004) +Unfollowed User: inspcv00(283043799) +Unfollowed User: twettoo (505246572) +Unfollowed User: Anna Mariano(70121186) +Unfollowed User: JamieDaBoi(2241481140) +Unfollowed User: metin daldal(167523772) +Unfollowed User: Denis Anderson(105006629) +Unfollowed User: Dan Budanov(1957745388) +Unfollowed User: Matt Skipps(2229859557) +Unfollowed User: Murat KAÇAL(262811240) +Unfollowed User: Tom Lorek(14062210) +Unfollowed User: Julian Smith(1856486670) +Unfollowed User: Nora(100950073) +Unfollowed User: Juan Carlos Díaz(60677994) +Unfollowed User: Xerlox(1926561691) +Unfollowed User: ♥ キティ ♥(1918038769) +Unfollowed User: OSX L-I-AM(271862947) +Unfollowed User: Ekahau Itah Akamai(567303670) +Unfollowed User: Andrew music lover ♥(304448202) +Unfollowed User: Crimson Agents(1732466958) +Unfollowed User: Lukas Pfeiffer(29717423) +Unfollowed User: Job(1244288564) +Unfollowed User: Jøseph(570330748) +Unfollowed User: Ece Uyanık(277284599) +Unfollowed User: °Syed [B]ilal Shah(112761744) +Unfollowed User: Master of Disaster(929820896) +Unfollowed User: tony.haddad97(1354641972) +Unfollowed User: SYLLA HAMED OFFICIEL(141950700) +Unfollowed User: BERΔT DURΔKU(350019795) +Unfollowed User: Asun ن Α † Ω (420269829) +Unfollowed User: Selva Orejón Lozano(26743035) +Unfollowed User: SeaSK(52157425) +Unfollowed User: Rifky(46064665) +Unfollowed User: Pedro Gerardo Garza(113068864) +Unfollowed User: Justin Collins(23978537) +Unfollowed User: Adyacente(551413932) +Unfollowed User: James Brett(17617061) +Unfollowed User: KPath(325058992) +Unfollowed User: Tom Graham(34972175) +Unfollowed User: Le Rêveur (218594545) +Unfollowed User: Lila Smith(212117896) +Unfollowed User: NeoCertified(348569768) +Unfollowed User: Lojepi(221103248) +Unfollowed User: Andrew Morris(379532928) +Unfollowed User: Natalie(194724011) +Unfollowed User: Matthew Leshko(23674806) +Unfollowed User: 100TV BRAZIL (55335854) +Unfollowed User: Samar(48755417) + +UserId of a user who lost followers:17000457 +Unfollowed User: E V E R L I E ♥ ✌(1923395192) +Unfollowed User: Sulist Austin(74911507) +Unfollowed User: Hussein Fathy (360828055) +Unfollowed User: みずきん(3050536002) +Unfollowed User: Rucelio silva torres(3079336433) +Unfollowed User: Vedamanikkam(318335981) +Unfollowed User: PracticalBoss88(3081643420) +Unfollowed User: Landi Cordier(1259016018) +Unfollowed User: Brian S(1960998554) +Unfollowed User: Elena Kennedy(133443632) +Unfollowed User: wOrldconnected(2294261666) +Unfollowed User: Lucas Hamet(2855645196) +Unfollowed User: Freddy Jennings(2947268302) +Unfollowed User: Ajay Pandey(1717809529) +Unfollowed User: Rebeka God Ceinture(2778145826) +Unfollowed User: Dion(2407925175) +Unfollowed User: Jess(2907909412) +Unfollowed User: BMWLA CHIKC.COM(2816105040) +Unfollowed User: Mirza Faruk(3064846452) +Unfollowed User: こーへー(2383745436) +Unfollowed User: Sharon Willis(388666561) +Unfollowed User: Rob Hollandsworth(82546930) +Unfollowed User: Join-IT(1964875627) +Unfollowed User: Onur kucik(2862886748) +Unfollowed User: facundo montellano(2368943161) +Unfollowed User: Precha Kleabvaree(194010410) +Unfollowed User: C-ZO(3041759603) +Unfollowed User: Кроссовок(3026194179) +Unfollowed User: Di(34663615) +Unfollowed User: claudio corona(2820772376) +Unfollowed User: Chris G(84214345) +Unfollowed User: porno(3068146713) +Unfollowed User: ALLAN SIFUNA(1018291730) +Unfollowed User: scimplified(2419473368) +Unfollowed User: J.Deezy(2966854790) +Unfollowed User: ashraf shalapy(1623298544) +Unfollowed User: Gilberto M Souza(488870645) +Unfollowed User: Sumeet Kale(129479378) +Unfollowed User: Bert Stomphorst(3041309109) +Unfollowed User: vKim(1530942452) +Unfollowed User: Diyettim(2651781356) +Unfollowed User: karthik swarna(44992637) +Unfollowed User: Alsson(3050625815) +Unfollowed User: عبدالله العمران (273594784) +Unfollowed User: Sabin Pokharel(322550724) +Unfollowed User: Y.E.S.(3023538468) +Unfollowed User: Eric Burgess(63607386) +Unfollowed User: Justin Campbell(40861298) +Unfollowed User: bug(233086804) +Unfollowed User: Manu Horrillo(2985924365) +Unfollowed User: athanasius kontos(3019670237) +Unfollowed User: Mesut ÖZDEMİR(1352367536) +Unfollowed User: Paulo Nascimento(281854563) +Unfollowed User: angel lopez(523754771) +Unfollowed User: Omar Agan(2949131039) +Unfollowed User: delanakeasler@aol.co(2945918744) +Unfollowed User: Amanda_Styles'(2231796596) +Unfollowed User: Ravi Patel(586020510) +Unfollowed User: Save My Mother(2558456214) +Unfollowed User: ERCULES RODRIGUES (170155706) +Unfollowed User: Graham Bless(3021705826) +Unfollowed User: KleinMalondaツ(2959585158) +Unfollowed User: Jhoy S-L(176326872) +Unfollowed User: Fábio Miguel (2414109709) +Unfollowed User: hadeelmeshal,j,m(2451468390) +Unfollowed User: Voltage Energy Drink(2994025779) +Unfollowed User: RecoveryNations(549055848) +Unfollowed User: Garydean(2996111209) +Unfollowed User: Christine Ford(2605371381) +Unfollowed User: The Workshop(2881264240) +Unfollowed User: jezequel jocelyne(2921459831) +Unfollowed User: Вячеслав Клевченя(22118774) +Unfollowed User: lishiibeth (2193501497) +Unfollowed User: Marco Foellmer(10183162) +Unfollowed User: John Schwartz(17001516) +Unfollowed User: mohammad bulbul(2320470758) +Unfollowed User: Radinzainuddin(2829764786) +Unfollowed User: rebecca roby(112397877) +Unfollowed User: GZ Dixital(2431201940) +Unfollowed User: K&B Accountancy(1245729884) +Unfollowed User: ActionApplayer(2922189382) +Unfollowed User: ابو جليبيب الجزائري(2910061054) +Unfollowed User: Klaus Gasser(2734370672) +Unfollowed User: Andrew Hoffman(1245226386) +Unfollowed User: ENG_ALaa(2865373353) +Unfollowed User: ATG-IT(285788798) +Unfollowed User: jon cerboni(129847712) +Unfollowed User: Karliga95(1600832299) +Unfollowed User: Giharto(443889694) +Unfollowed User: mobidart(1281573236) +Unfollowed User: jennifer k. riggins(16789906) +Unfollowed User: Bro Golf Association(958459220) +Unfollowed User: Ucoatl(572228832) +Unfollowed User: Chris Martin(2226602077) +Unfollowed User: Funclobile, Inc.(2720465883) +Unfollowed User: Jonatan Glad(974567797) +Unfollowed User: Dmitry(22992948) +Unfollowed User: acfal2014(2879456148) +Unfollowed User: Bianca Giordanni(2790173147) +Unfollowed User: ADNAN KÖROĞLU(2780801377) +Unfollowed User: Ssnk(1968086060) +Unfollowed User: IT SAILOR LOUISE M.(1567917614) +Unfollowed User: Luisa_Noob(1888333032) +Unfollowed User: प्रशांत Anand(1443107148) +Unfollowed User: jose chacoa(345943397) +Unfollowed User: salvachillo(2617717032) +Unfollowed User: Anand Kumar (2706949812) +Unfollowed User: kardelen toptaş(2552294689) +Unfollowed User: #มินโฮ™ บริหาร(2830023242) +Unfollowed User: Nirror Live Support(2355074737) +Unfollowed User: TmoneyMoe(2472919181) +Unfollowed User: Pirdaus(2743944460) +Unfollowed User: 姚(17184892) +Unfollowed User: CLOUDSSKY(2228811793) +Unfollowed User: Timuçin Tuncer(157271443) +Unfollowed User: Rainbow Leather(318539182) +Unfollowed User: PALANI C(1136583834) +Unfollowed User: dgit59214(1025949319) +Unfollowed User: Carlo Kalkman(37168242) +Unfollowed User: Palmyra(1092467156) +Unfollowed User: AhmetMikailBayındır(2723464097) +Unfollowed User: SULTAN ALDERAAN(523212428) +Unfollowed User: Roman Takáč(893184728) +Unfollowed User: lowell(1046646572) +Unfollowed User: Robert Mills(915210439) +Unfollowed User: destiny breanna (1385839380) +Unfollowed User: antony fernando(1406964122) +Unfollowed User: وليد صديق(792906062) +Unfollowed User: Mosaic(320822446) +Unfollowed User: Coffee For Mom(90978424) +Unfollowed User: juan testa(2285526554) +Unfollowed User:  Kamal Hussain.(102437243) +Unfollowed User: Chip Pickering(2364631970) +Unfollowed User: حفيد الحاج حمدي (611008267) +Unfollowed User: JUANS MARCELO(353841912) +Unfollowed User: Nino(435165791) +Unfollowed User: RealSports(103982695) +Unfollowed User: Jonathan Young(954479713) +Unfollowed User: Balinda Duncan(23683933) +Unfollowed User: dario armstrong(1532082516) +Unfollowed User: Kevin Styles(1533577314) +Unfollowed User: Elvis Cedeño(1592116556) +Unfollowed User: CAMILA...(2310732501) +Unfollowed User: Leonid Sukhoykov(1600568161) +Unfollowed User: François Jacques(25578617) +Unfollowed User: Andrej Heržo(2405191056) +Unfollowed User: Brian Stronach(39354056) +Unfollowed User: Ɣ!ƇƬƠƦ(2206021859) +Unfollowed User: will asrari(8865002) +Unfollowed User: shivaraman(2399316726) +Unfollowed User: Martin Halberg(31098484) +Unfollowed User: Nirali Savla(167718461) +Unfollowed User: RahMi ince(1018074121) +Unfollowed User: amanda(413744294) +Unfollowed User: Thiago Alves Araújo(393104500) +Unfollowed User: Howl Jenkins(248718534) +Unfollowed User: Mayur S. Raja(367048029) +Unfollowed User: Ángel Torres (457822183) +Unfollowed User: Enrique Cabrejas(497999615) +Unfollowed User: snappub(2263956920) +Unfollowed User: Абай Сайлауов(379694797) +Unfollowed User: Irfan Junaid(742936512) +Unfollowed User: Giorgi Nizharadze(571155939) +Unfollowed User: ᅠ(1591814690) +Unfollowed User: Julian Smith(1856486670) +Unfollowed User: 楊建隆(1958563578) +Unfollowed User: 黒井日夏(354433708) +Unfollowed User: V Cerny(1539200660) +Unfollowed User: Oğuzhan ERKAN(1110995198) +Unfollowed User: Ayu alfaniyah(1692915997) +Unfollowed User: Tomáš Dundáček(66182112) +Unfollowed User: Aravind Ravi A R(932158489) +Unfollowed User: EC(1395914767) +Unfollowed User: nikhil tom(175483184) +Unfollowed User: Hicham EL HABTI(99569572) +Unfollowed User: ♥ a pequenina ♥(1498522069) +Unfollowed User: Brian Borja(308752922) +Unfollowed User: Nubbler.com(563636944) +Unfollowed User: Anara shyn(1675244010) +Unfollowed User: The Sheriff(606530683) +Unfollowed User: إسماعيل #صانع_قرار(1019354857) +Unfollowed User: Frentona sin remedio(1581319376) +Unfollowed User: Kyle Davis(437209567) +Unfollowed User: Anthony Longoria(823398096) +Unfollowed User: Edison Group, Inc.(1473907148) +Unfollowed User: Will Slade(32003485) +Unfollowed User: BRO 1961(261990528) +Unfollowed User: SYLLA HAMED OFFICIEL(141950700) +Unfollowed User: Joe Ouimet(17238128) +Unfollowed User: iLikeSharePoint.cz(1209475818) +Unfollowed User: Marta(899597132) +Unfollowed User: karan Bisht(815429676) +Unfollowed User: ole(881808679) +Unfollowed User: Ravi Petlur(174063492) +Unfollowed User: Arun Vijayarengan(19434280) +Unfollowed User: Jairo López(245988657) +Unfollowed User: Rodger Oates(180952714) +Unfollowed User: Nam Yong-taek(271399222) +Unfollowed User: SingleHop(14789540) +Unfollowed User: John Patrick Hogan(313260006) +Unfollowed User: Ricardo Medina(47125047) +Unfollowed User: Julio César Guzmán (71332518) +Unfollowed User: Kawtar Bk' ☆(254121439) + +UserId of a user who lost followers:152585619 +Unfollowed User: suchan(2957077746) +Unfollowed User: 有吉★情報館(2176584007) +Unfollowed User: ★役に立つiPhoneニュース★(2176580467) +Unfollowed User: モロアへ二次エロ画像(3029503114) +Unfollowed User: 500ml でん(3037921931) +Unfollowed User: 小麦穀物焼きつを脱、んとおなってしもたw(1556003364) +Unfollowed User: Wesley Moore(59896304) +Unfollowed User: やすのぶ(213040839) +Unfollowed User: 亜樹野(2920580785) +Unfollowed User: Ogbole Kingsley A(1610481902) +Unfollowed User: 大庭 鉄尋(2874544056) +Unfollowed User: 紺藤ココン(2280783835) +Unfollowed User: 本田がスクデットへ導く@相互フォロー募集(3004499750) +Unfollowed User: ネオニート月収120万(2911016510) +Unfollowed User: 西野カナ共感歌詞bot(2176846784) +Unfollowed User: 合同会社ニクール(1206766933) +Unfollowed User: みきにゃん(2587745660) +Unfollowed User: どこかで誰かのつぶやき(1628996102) +Unfollowed User: G(1939530175) +Unfollowed User: 西野カナ☆ファン大集合!!(3012352910) +Unfollowed User: ネオニート月収120万(2908720453) +Unfollowed User: あやぽ(3045662118) +Unfollowed User: のび(2829141541) +Unfollowed User: DISCOTECA ÁREA 33(2314193748) +Unfollowed User: 菜摘(3042384076) +Unfollowed User: いおきべ いろり(3026893052) +Unfollowed User: gurepon 公式アカウント(1266046542) +Unfollowed User: 相互フォロー100%【フォロワー増やし】(3017801538) +Unfollowed User: 春雄(3030948183) +Unfollowed User: えろみく@エロ垢(2963877286) +Unfollowed User: 菊痴(2522049246) +Unfollowed User: みすずちん(3031408071) +Unfollowed User: riko(2914763353) +Unfollowed User: Sevilla Eventos(1358052368) +Unfollowed User: 瑠璃(2962603490) +Unfollowed User: 鬼猫背(146499534) +Unfollowed User: なぎさ♡うら(3037921322) +Unfollowed User: 龍ヶ嬢 七々々(2243726948) +Unfollowed User: Sena@チョコボ鯖(2921075125) +Unfollowed User: 貴之(1350900638) +Unfollowed User: marine(2961104811) +Unfollowed User: 高校数学bot(1629037340) +Unfollowed User: くろしろ(3011277678) +Unfollowed User: KAT-TUN歌詞bot(1901507917) +Unfollowed User: Rugrén ®(105256817) +Unfollowed User: QMA 地理検定(1629010758) +Unfollowed User: foon(552814434) +Unfollowed User: Philippe Lewin(118345831) +Unfollowed User: 宇多田ヒカル心に響く歌詞(2176826358) +Unfollowed User: Prod.Universo(2687941489) +Unfollowed User: ❦ℑuℜi₭i❦(2979539804) +Unfollowed User: 相互フォローしましょ!(3048349909) +Unfollowed User: 유노かぷり(2337709364) +Unfollowed User: オンラインカジノ紹介(2939046668) +Unfollowed User: 犬夜叉集大成bot(1901435784) +Unfollowed User: Плюс в Карму(1962037350) +Unfollowed User: 酢いか(577671387) +Unfollowed User: Michitake KAWASAKI(25452928) +Unfollowed User: ともー氏(CV:堀江由衣)(2636373402) +Unfollowed User: ラピュタ名言BOT(1901432406) +Unfollowed User: BLEACH大好きBOT(1901497746) +Unfollowed User: 山下良成(2989169674) +Unfollowed User: 松本晴彦(2923515180) +Unfollowed User: SETI(1443513384) +Unfollowed User: KEN-g(2565529106) +Unfollowed User: ✒ ANONYMOUS(1639243381) +Unfollowed User: ウエノ真里(2831295441) +Unfollowed User: boshi(2610027968) +Unfollowed User: mYamaguchi(2291176962) +Unfollowed User: 刺身(小林秀樹)(594631814) +Unfollowed User: جزل القصيد(2657674784) +Unfollowed User: 大きな乳グラビア(2971155079) +Unfollowed User: عبووود (2791707943) +Unfollowed User: ゔぇくた(2669263884) +Unfollowed User: takeru(2853201666) +Unfollowed User: 栄作/ユリア(3006335689) +Unfollowed User: Louis Vuitton♡大好き(2984078167) +Unfollowed User: tanke(2765815117) +Unfollowed User: !ジャイアンツ!(2417214913) +Unfollowed User: ガルガル(1566042902) +Unfollowed User: G V N D V(1683476208) +Unfollowed User: Kyoka@えいとん(2764769754) +Unfollowed User: Caramel (2840934913) +Unfollowed User: Astral Cae?(2800612847) +Unfollowed User: ひろし(2826090229) +Unfollowed User: 出水 雄大(2797878103) +Unfollowed User: stratoteletor(2601554502) +Unfollowed User: johnnybgoode(2481606204) +Unfollowed User: TripleEvolution(2178222272) +Unfollowed User: 旭日@復活(2528255671) +Unfollowed User: リク(1432197878) +Unfollowed User: ナノこめ(1406461268) +Unfollowed User: 楓きのこ(2417113508) +Unfollowed User: 平塚零士(2475353695) +Unfollowed User: 文房具博士(2432830279) +Unfollowed User: けんけん(1617373302) +Unfollowed User: かねちん(2464308397) +Unfollowed User: Techno Mango(2386755404) +Unfollowed User: lichang(2362375532) +Unfollowed User: でかいひと(192CM)(89205386) +Unfollowed User: 谷口英俊(399113027) +Unfollowed User: A.N.Jell(1246042705) +Unfollowed User: 濱田 潤 (Jun Hamada)(1369472161) +Unfollowed User: *藍月*@Pileさん♡提督(1527746246) +Unfollowed User: 揚げ(1221691316) +Unfollowed User: 三原健嗣(起業の基盤構築の専門家)(1277689890) +Unfollowed User: あるふぉーと(18894688) +Unfollowed User: すのん(265392850) +Unfollowed User: さんぺい動物病院(159426114) +Unfollowed User: 山口真我(1288255651) +Unfollowed User: ちかあ(1515936782) +Unfollowed User: ビスコ(1144265682) +Unfollowed User: paipuisu(1368843631) +Unfollowed User: Thay (363220958) +Unfollowed User: ルクア(1259502127) +Unfollowed User: puput(1360657616) +Unfollowed User: Vicky(800901978) +Unfollowed User: 심재현(961619815) +Unfollowed User: ハクマン(168690488) +Unfollowed User: べいやん@動画投稿系(1334526804) +Unfollowed User: monster cat(1053155418) +Unfollowed User: Nov(1366433947) +Unfollowed User: タカちゃんは裏垢(1394845789) +Unfollowed User: ももももももとん。(337101552) +Unfollowed User: 八王子に住んでます(1251787716) +Unfollowed User: 一音(133253682) +Unfollowed User: marumomix(128505728) +Unfollowed User: なりわい@真戀z(769533300) +Unfollowed User: shiro☆(244721634) +Unfollowed User: す~ちゃんのお母さん【心言言葉】(619122560) +Unfollowed User: DeeJay Ayato.N/F1垢(152927007) +Unfollowed User: オグ (158561933) +Unfollowed User: tmisaki(81554399) +Unfollowed User: 河津一郎(10438862) + diff --git a/2_3/old_please_disregard/log_2_3_after_first_run.txt b/2_3/old_please_disregard/log_2_3_after_first_run.txt new file mode 100644 index 0000000..455f724 --- /dev/null +++ b/2_3/old_please_disregard/log_2_3_after_first_run.txt @@ -0,0 +1,25 @@ +userId209811713 +count of followers from user property:188751 +count of users from api call188559 + +userId83316868 +count of followers from user property:27272 +count of users from api call27263 + +userId15497093 +count of followers from user property:2575 +count of users from api call2575 + +userId160549126 +count of followers from user property:277 +count of users from api call278 + +userId17000457 +count of followers from user property:304214 +count of users from api call303879 + +userId2809878539 +count of followers from user property:171 +count of users from api call169 + +Rate limit reached. Sleeping for: 976 diff --git a/2_3/old_please_disregard/log_2_3_after_second_run.txt b/2_3/old_please_disregard/log_2_3_after_second_run.txt new file mode 100644 index 0000000..def7672 --- /dev/null +++ b/2_3/old_please_disregard/log_2_3_after_second_run.txt @@ -0,0 +1,44 @@ +userId152585619 +count of followers from user property:150113 +count of users from api call150067 + +userId2206092889 +count of followers from user property:2757 +count of users from api call2758 + +userId2837996969 +count of followers from user property:7920 +count of users from api call7759 + +userId2576487215 +count of followers from user property:3523 +count of users from api call3491 + +userId100731315 +count of followers from user property:540920 +count of users from api call540132 + +userId159223609 +count of followers from user property:147050 +count of users from api call146928 + +userId19923515 +count of followers from user property:717542 +count of users from api call716491 + +userId3049567425 +count of followers from user property:14 +count of users from api call11 + +userId132987218 +count of followers from user property:942 +count of users from api call932 + +userId19601111 +count of followers from user property:158394 +count of users from api call158331 + +userId133272665 +count of followers from user property:16576 +count of users from api call16568 + diff --git a/2_3/old_please_disregard/log_2_3_after_second_run_whichstuckfo6hours.txt b/2_3/old_please_disregard/log_2_3_after_second_run_whichstuckfo6hours.txt new file mode 100644 index 0000000..b5d1b94 --- /dev/null +++ b/2_3/old_please_disregard/log_2_3_after_second_run_whichstuckfo6hours.txt @@ -0,0 +1,3 @@ +Rate limit reached. Sleeping for: 1042 +Rate limit reached. Sleeping for: 1015 +Rate limit reached. Sleeping for: 1010 diff --git a/2_3/old_please_disregard/log_2_3_after_week_first_run.txt b/2_3/old_please_disregard/log_2_3_after_week_first_run.txt new file mode 100644 index 0000000..e69de29 diff --git a/2_3/old_please_disregard/log_final_compare_num_followers_before_and_after b/2_3/old_please_disregard/log_final_compare_num_followers_before_and_after new file mode 100644 index 0000000..1ae8edc --- /dev/null +++ b/2_3/old_please_disregard/log_final_compare_num_followers_before_and_after @@ -0,0 +1,21 @@ +userId160549126 count of followers from twitter call 1 week ago: 277 +count of followers from twitter call made now:274 +userId2809878539 count of followers from twitter call 1 week ago: 171 +count of followers from twitter call made now:179 +userId3049567425 count of followers from twitter call 1 week ago: 14 +count of followers from twitter call made now:11 +userId209811713 count of followers from twitter call 1 week ago: 188751 +count of followers from twitter call made now:189414 +userId83316868 count of followers from twitter call 1 week ago: 27272 +count of followers from twitter call made now:27143 +userId15497093 count of followers from twitter call 1 week ago: 2575 +count of followers from twitter call made now:2582 +userId17000457 count of followers from twitter call 1 week ago: 304214 +count of followers from twitter call made now:305201 +userId152585619 count of followers from twitter call 1 week ago: 150113 +count of followers from twitter call made now:149962 +userId2206092889 count of followers from twitter call 1 week ago: 2757 +count of followers from twitter call made now:2756 +userId2837996969 count of followers from twitter call 1 week ago: 7920 +count of followers from twitter call made now:8214 + diff --git a/2_3/old_please_disregard/test_examine_db_followers.py b/2_3/old_please_disregard/test_examine_db_followers.py new file mode 100644 index 0000000..7534151 --- /dev/null +++ b/2_3/old_please_disregard/test_examine_db_followers.py @@ -0,0 +1,75 @@ +import os +import json +import pymongo +from bson.json_util import dumps +import tweepy +import time + + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + + +#get tables +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users + +#create an array of unique users +topRetweetedUserList = dict() + +for userJson in db_top30RetweetedUsers.find(): + + userId = userJson['id'] + + #get followers Count for cross checking + followersCount = json.loads(dumps(userJson['userInfo']))['followers_count'] + + if userId not in topRetweetedUserList: + topRetweetedUserList[userId] = followersCount + +#check the list to make sure it has what I want +for uid in topRetweetedUserList: + print str(uid) + " " + str(topRetweetedUserList[uid]) + +print " " + + +#for this module we are getting only top 10 users. Use index to keep track. +userIndex = 0 + +for userJson in db_top30RetweetedUsers.find(): + + userId = userJson['id'] + + #get followers Count for cross checking + followersCount = json.loads(dumps(userJson['userInfo']))['followers_count'] + + if userId not in topRetweetedUserList: + topRetweetedUserList[userId] = followersCount + userIndex += 1 + if userIndex == 10: + break + +#print len(topRetweetedUserList) +#print topRetweetedUserList + + +#check the list to make sure it has what I want +for uid in topRetweetedUserList: + print str(uid) + " " + str(topRetweetedUserList[uid]) + +#exit() + +db_followers = mongoConnection['twitter_analyzer'].db_followers + +#for follower in db_followers.find(): +# print follower["id"] + +print " " + +db_followers_after_week = mongoConnection['twitter_analyzer'].db_followers_after_week +for follower in db_followers_after_week.find(): + print follower["id"] diff --git a/2_3/test_examine_db_followers.py~ b/2_3/test_examine_db_followers.py~ new file mode 100644 index 0000000..d98660a --- /dev/null +++ b/2_3/test_examine_db_followers.py~ @@ -0,0 +1,75 @@ +import os +import json +import pymongo +from bson.json_util import dumps +import tweepy +import time + + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + + +#get tables +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users + +#create an array of unique users +topRetweetedUserList = dict() + +for userJson in db_top30RetweetedUsers.find(): + + userId = userJson['id'] + + #get followers Count for cross checking + followersCount = json.loads(dumps(userJson['userInfo']))['followers_count'] + + if userId not in topRetweetedUserList: + topRetweetedUserList[userId] = followersCount + +#check the list to make sure it has what I want +for uid in topRetweetedUserList: + print str(uid) + " " + str(topRetweetedUserList[uid]) + +print " " + + +#for this module we are getting only top 10 users. Use index to keep track. +userIndex = 0 + +for userJson in db_top30RetweetedUsers.find(): + + userId = userJson['id'] + + #get followers Count for cross checking + followersCount = json.loads(dumps(userJson['userInfo']))['followers_count'] + + if userId not in topRetweetedUserList: + topRetweetedUserList[userId] = followersCount + userIndex += 1 + if userIndex == 10: + break + +#print len(topRetweetedUserList) +#print topRetweetedUserList + + +#check the list to make sure it has what I want +for uid in topRetweetedUserList: + print str(uid) + " " + str(topRetweetedUserList[uid]) + +exit() + +db_followers = mongoConnection['twitter_analyzer'].db_followers + +#for follower in db_followers.find(): +# print follower["id"] + +print " " + +db_followers_after_week = mongoConnection['twitter_analyzer'].db_followers_after_week +for follower in db_followers_after_week.find(): + print follower["id"] diff --git a/3_1/3_1_load_backup_from_S3.py~ b/3_1/3_1_load_backup_from_S3.py~ new file mode 100644 index 0000000..e69de29 diff --git a/3_1/3_1_restore_backup_from_S3.py b/3_1/3_1_restore_backup_from_S3.py new file mode 100644 index 0000000..7adf926 --- /dev/null +++ b/3_1/3_1_restore_backup_from_S3.py @@ -0,0 +1,91 @@ +#Kasane Utsumi - 3/14/2015 +#3_1_restore_backup_from_S3.py +#This code restores backup files made from db_tweets and db_streamT and restores the data into db_tweetsRestored and db_streamTRestored collections. + +import json +import os +import boto +from boto.s3.key import Key +import string +import signal +import os +import pymongo +from boto.s3.connection import S3Connection +from bson.json_util import dumps +import yaml +import csv + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_tweetsRestored = mongoConnection['twitter_analyzer'].db_tweetsRestored +db_streamTRestored = mongoConnection['twitter_analyzer'].db_streamTRestored + +db_tweetsRestored.drop() +db_streamTRestored.drop() + +aws_access_key_id ='' +aws_secret_access_key= '' +aws_bucket_name='' + +conn = None +bucket = None + + +try: + conn = S3Connection(aws_access_key_id,aws_secret_access_key) + bucket = conn.get_bucket(aws_bucket_name) +except: + print "S3 connection failed or bucket connection failed" + exit() + +#get file from s3 and store locally and store into db_streamTRestored + +for key in bucket.list("db_streamT"): + + if key.name.endswith('/'): + continue + key.get_contents_to_filename(key.name) + + filer = open(os.getcwd() + "/" + key.name,"r") + + #use yaml to get result as str type and not unicode type + data = yaml.load(filer.read()) + + filer.close() + #file was read into memory, now delete it to save disk space + os.remove(os.getcwd() + "/" + key.name) + for jsonTweet in data: + del jsonTweet["_id"] + db_streamTRestored.insert(jsonTweet) + +#get file from s3 and store locally and store into db_tweetsRestored +for key in bucket.list("db_tweets"): + + if key.name.endswith('/'): + continue + key.get_contents_to_filename(key.name) + + #read from csv file, dump each text into db_tweetsRestored + with open(os.getcwd() +"/"+ key.name,'r') as csvfile: + csvreader = csv.reader(csvfile) + + for row in csvreader: + for col in row: + db_tweetsRestored.insert({"text" : col.decode('utf8')}) + + os.remove(os.getcwd() +"/"+ key.name) + + + diff --git a/3_1/3_1_restore_backup_from_S3.py~ b/3_1/3_1_restore_backup_from_S3.py~ new file mode 100644 index 0000000..396c476 --- /dev/null +++ b/3_1/3_1_restore_backup_from_S3.py~ @@ -0,0 +1,91 @@ +#Kasane Utsumi - 3/14/2015 +#3_1_restore_backup_from_S3.py +#This code restores backup files made from db_tweets and db_streamT and restores the data into db_tweetsRestored and db_streamTRestored collections. + +import json +import os +import boto +from boto.s3.key import Key +import string +import signal +import os +import pymongo +from boto.s3.connection import S3Connection +from bson.json_util import dumps +import yaml +import csv + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_tweetsRestored = mongoConnection['twitter_analyzer'].db_tweetsRestored +db_streamTRestored = mongoConnection['twitter_analyzer'].db_streamTRestored + +db_tweetsRestored.drop() +db_streamTRestored.drop() + +aws_access_key_id ='AKIAIW66YJD5QMLM6NAQ' +aws_secret_access_key= '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' +aws_bucket_name='moonlightbucket' + +conn = None +bucket = None + + +try: + conn = S3Connection(aws_access_key_id,aws_secret_access_key) + bucket = conn.get_bucket(aws_bucket_name) +except: + print "S3 connection failed or bucket connection failed" + exit() + +#get file from s3 and store locally and store into db_streamTRestored + +for key in bucket.list("db_streamT"): + + if key.name.endswith('/'): + continue + key.get_contents_to_filename(key.name) + + filer = open(os.getcwd() + "/" + key.name,"r") + + #use yaml to get result as str type and not unicode type + data = yaml.load(filer.read()) + + filer.close() + #file was read into memory, now delete it to save disk space + os.remove(os.getcwd() + "/" + key.name) + for jsonTweet in data: + del jsonTweet["_id"] + db_streamTRestored.insert(jsonTweet) + +#get file from s3 and store locally and store into db_tweetsRestored +for key in bucket.list("db_tweets"): + + if key.name.endswith('/'): + continue + key.get_contents_to_filename(key.name) + + #read from csv file, dump each text into db_tweetsRestored + with open(os.getcwd() +"/"+ key.name,'r') as csvfile: + csvreader = csv.reader(csvfile) + + for row in csvreader: + for col in row: + db_tweetsRestored.insert({"text" : col.decode('utf8')}) + + os.remove(os.getcwd() +"/"+ key.name) + + + diff --git a/3_1/3_1_store_backup_to_S3.py b/3_1/3_1_store_backup_to_S3.py new file mode 100644 index 0000000..a4cc950 --- /dev/null +++ b/3_1/3_1_store_backup_to_S3.py @@ -0,0 +1,117 @@ +#Kasane Utsumi - 3/14/2015 +#3_1_store_backup_to_S3.py +#This code make a backup of db_tweets and db_streamT and uploads to corresponding location on S3. The items in db_streamT are bundled into 500 per json file(value), and for db_tweets 1500 per csv file(value) + +import signal +import pymongo +import TweetSerializer +import boto +from boto.s3.connection import S3Connection +from bson.json_util import dumps +import csv +from boto.s3.key import Key +import os + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_tweets = mongoConnection['twitter_analyzer'].db_tweets +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +if db_tweets == None or db_streamT == None: + print "either collection does not exist!" + exit() + + +aws_access_key_id ='' +aws_secret_access_key= '' +aws_bucket_name=' +conn = None +bucket = None + + +try: + conn = S3Connection(aws_access_key_id,aws_secret_access_key) + bucket = conn.get_bucket(aws_bucket_name) +except: + print "S3 connection failed or bucket connection failed" + exit() + +#store json from db_streamT first + +#chunk into 500 tweet json per file +tweetSerializerJson = TweetSerializer.TweetSerializer(500,bucket,"db_streamT", False) + +for tweetJson in db_streamT.find(): + tweetSerializerJson.write(dumps(tweetJson)) +tweetSerializerJson.end() + +print "finished uploading json tweets, now try just tweets" + +#now store tweets only + +#utility function to open file, write content of tweet array and upload to S3 bucket +def writeCSVAndUploadTweetOnly(tweetDirectory,tweetFilePrefix,fileNumber,arrayOfTweets): + fileName = tweetFilePrefix + str(fileNumber) + ".csv" + + with open(tweetDirectory + "/" + fileName, "w+") as output: + + writer = csv.writer(output, lineterminator='\n') + + for i in range(len(arrayOfTweets)): + t = arrayOfTweets[i] + #if tweet text contains carriage return, must warp by " so that it doesn't get split into multiple tweets while restoring the db from s3. + if ('\r' in t or '\n' in t): + arrayOfTweets[i] = '"' + t + '"' + #writer.writerows(t) + + writer.writerow(arrayOfTweets) + + key = Key(bucket) + key.key=os.path.join(tweetDirectory, fileName) + try: + key.set_contents_from_filename(tweetDirectory + "/"+fileName) + except: + print "Storing to amazon failed for:" + fileName + os.remove(tweetDirectory + "/"+ fileName) + + +# number of tweet text to store per file +tweetCountPerFile = 1500 +tweetDirectory = "db_tweets" +tweetFilePrefix = "tweetOnly" +fileNumber = 1 + +#keep tweets in array, and once reached 1500, dump into csv. It is not a most elegant way to do this +#but I am running out of time... +tweetList = [] + +for tweetText in db_tweets.find(): + + tweetList.append(tweetText['text'].encode('utf8')) + + if len(tweetList) == tweetCountPerFile: + + writeCSVAndUploadTweetOnly(tweetDirectory,tweetFilePrefix,fileNumber,tweetList) + #reinitialize array + tweetList = [] + fileNumber +=1 + +#serialize leftover tweets +if len(tweetList) != 0: + writeCSVAndUploadTweetOnly(tweetDirectory,tweetFilePrefix,fileNumber,tweetList) + + + + + diff --git a/3_1/3_1_store_backup_to_S3.py~ b/3_1/3_1_store_backup_to_S3.py~ new file mode 100644 index 0000000..a4cc950 --- /dev/null +++ b/3_1/3_1_store_backup_to_S3.py~ @@ -0,0 +1,117 @@ +#Kasane Utsumi - 3/14/2015 +#3_1_store_backup_to_S3.py +#This code make a backup of db_tweets and db_streamT and uploads to corresponding location on S3. The items in db_streamT are bundled into 500 per json file(value), and for db_tweets 1500 per csv file(value) + +import signal +import pymongo +import TweetSerializer +import boto +from boto.s3.connection import S3Connection +from bson.json_util import dumps +import csv +from boto.s3.key import Key +import os + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_tweets = mongoConnection['twitter_analyzer'].db_tweets +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +if db_tweets == None or db_streamT == None: + print "either collection does not exist!" + exit() + + +aws_access_key_id ='' +aws_secret_access_key= '' +aws_bucket_name=' +conn = None +bucket = None + + +try: + conn = S3Connection(aws_access_key_id,aws_secret_access_key) + bucket = conn.get_bucket(aws_bucket_name) +except: + print "S3 connection failed or bucket connection failed" + exit() + +#store json from db_streamT first + +#chunk into 500 tweet json per file +tweetSerializerJson = TweetSerializer.TweetSerializer(500,bucket,"db_streamT", False) + +for tweetJson in db_streamT.find(): + tweetSerializerJson.write(dumps(tweetJson)) +tweetSerializerJson.end() + +print "finished uploading json tweets, now try just tweets" + +#now store tweets only + +#utility function to open file, write content of tweet array and upload to S3 bucket +def writeCSVAndUploadTweetOnly(tweetDirectory,tweetFilePrefix,fileNumber,arrayOfTweets): + fileName = tweetFilePrefix + str(fileNumber) + ".csv" + + with open(tweetDirectory + "/" + fileName, "w+") as output: + + writer = csv.writer(output, lineterminator='\n') + + for i in range(len(arrayOfTweets)): + t = arrayOfTweets[i] + #if tweet text contains carriage return, must warp by " so that it doesn't get split into multiple tweets while restoring the db from s3. + if ('\r' in t or '\n' in t): + arrayOfTweets[i] = '"' + t + '"' + #writer.writerows(t) + + writer.writerow(arrayOfTweets) + + key = Key(bucket) + key.key=os.path.join(tweetDirectory, fileName) + try: + key.set_contents_from_filename(tweetDirectory + "/"+fileName) + except: + print "Storing to amazon failed for:" + fileName + os.remove(tweetDirectory + "/"+ fileName) + + +# number of tweet text to store per file +tweetCountPerFile = 1500 +tweetDirectory = "db_tweets" +tweetFilePrefix = "tweetOnly" +fileNumber = 1 + +#keep tweets in array, and once reached 1500, dump into csv. It is not a most elegant way to do this +#but I am running out of time... +tweetList = [] + +for tweetText in db_tweets.find(): + + tweetList.append(tweetText['text'].encode('utf8')) + + if len(tweetList) == tweetCountPerFile: + + writeCSVAndUploadTweetOnly(tweetDirectory,tweetFilePrefix,fileNumber,tweetList) + #reinitialize array + tweetList = [] + fileNumber +=1 + +#serialize leftover tweets +if len(tweetList) != 0: + writeCSVAndUploadTweetOnly(tweetDirectory,tweetFilePrefix,fileNumber,tweetList) + + + + + diff --git a/3_1/3_1_store_backup_to_S3~ b/3_1/3_1_store_backup_to_S3~ new file mode 100644 index 0000000..e69de29 diff --git a/3_1/TweetSerializer.py b/3_1/TweetSerializer.py new file mode 100644 index 0000000..94d0b10 --- /dev/null +++ b/3_1/TweetSerializer.py @@ -0,0 +1,84 @@ +import json +import boto +from boto.s3.key import Key +import signal +import sys +import os + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +class TweetSerializer: + out = None + first = True + tweetCount = 0 + maxCount = 0 + fileCount = 0 + bucket = None + fileName = None + folderName = "tweetsTest" + tweetTextOnly = False + + def __init__(self, maxCount=10,bucket=None, folderName="tweetsTest", tweetTextOnly = False ): + self.maxCount = maxCount + self.bucket = bucket + self.folderName = folderName + self.tweetTextOnly = tweetTextOnly + + def start(self): + self.fileCount += 1 + self.fileName = "tweets-" +str(self.fileCount)+".json" + fname= self.folderName + "/"+ self.fileName + + try: + self.out = open(fname,"w+") + except: + print "opening file failed for " + fname + exit() + + self.out.write("[\n") + self.first = True + + def end(self): + if self.out is not None: + if not self.tweetTextOnly: #ending char required only for json tweet + self.out.write("\n]\n") + self.out.close() + key = Key(self.bucket) + key.key=os.path.join(self.folderName, self.fileName) + try: + key.set_contents_from_filename(self.folderName + "/"+self.fileName) + except: + print "Storing to amazon failed for:" + self.fileName + + self.out = None + os.remove(self.folderName + "/"+ self.fileName) + + + def write(self,tweet): + if self.tweetCount == 0: + self.start() #initialize + if not self.first: #write delimiter if not the first item in the file + + if self.tweetTextOnly: #for tweet only, just put line break. For tweet json, put comma + self.out.write("\n") + else: + self.out.write(",\n") + + self.first = False + + if self.tweetTextOnly: #for tweet only + self.out.write(tweet) + else: + self.out.write(str(tweet)) + + self.tweetCount += 1 + + if self.tweetCount == self.maxCount: + self.end() + self.tweetCount = 0 + + diff --git a/3_1/TweetSerializer.pyc b/3_1/TweetSerializer.pyc new file mode 100644 index 0000000..d8494b4 Binary files /dev/null and b/3_1/TweetSerializer.pyc differ diff --git a/3_1/TweetSerializer.py~ b/3_1/TweetSerializer.py~ new file mode 100644 index 0000000..0e43ce8 --- /dev/null +++ b/3_1/TweetSerializer.py~ @@ -0,0 +1,83 @@ +import json +import boto +from boto.s3.key import Key +import signal +import sys +import os + +def interrupt(signum, frame): + print "Interrupted, closing ..." + exit(1) + +signal.signal(signal.SIGINT, interrupt) + +class TweetSerializer: + out = None + first = True + tweetCount = 0 + maxCount = 0 + fileCount = 0 + bucket = None + fileName = None + folderName = "tweetsTest" + tweetTextOnly = False + + def __init__(self, maxCount=10,bucket=None, folderName="tweetsTest", tweetTextOnly = False ): + self.maxCount = maxCount + self.bucket = bucket + self.folderName = folderName + self.tweetTextOnly = tweetTextOnly + + def start(self): + self.fileCount += 1 + self.fileName = "tweets-" +str(self.fileCount)+".json" + fname= self.folderName + "/"+ self.fileName + + try: + self.out = open(fname,"w+") + except: + print "opening file failed for " + fname + exit() + + self.out.write("[\n") + self.first = True + + def end(self): + if self.out is not None: + if not self.tweetTextOnly: #ending char required only for json tweet + self.out.write("\n]\n") + self.out.close() + key = Key(self.bucket) + key.key=os.path.join(self.folderName, self.fileName) + try: + key.set_contents_from_filename(self.folderName + "/"+self.fileName) + except: + print "Storing to amazon failed for:" + self.fileName + os.remove(self.folderName + "/"+ self.fileName) + + self.out = None + + def write(self,tweet): + if self.tweetCount == 0: + self.start() #initialize + if not self.first: #write delimiter if not the first item in the file + + if self.tweetTextOnly: #for tweet only, just put line break. For tweet json, put comma + self.out.write("\n") + else: + self.out.write(",\n") + + self.first = False + + if self.tweetTextOnly: #for tweet only + self.out.write(tweet) + else: + self.out.write(str(tweet)) + + self.tweetCount += 1 + + if self.tweetCount == self.maxCount: + self.end() + self.tweetCount = 0 + + diff --git a/README.md b/README.md index 0d616f3..e2acfae 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,3 @@ -# Storing, Retrieving, and Analyzing Social Media Data Using MongoDB# +Please look at "ReadMe_Assignment3.pdf - - - -## Background Info ## -This assignment is built on top of the previous assignment. - -We'd like to utilize the twitter data (raw formats) that you gathered in assignment 2. - -##Lexical diversity ## -Lexical diversity is a measurement that provides a quantitative measure for the diversity of an individual's or group's vocabulary. It is calculated by finding the number of unique tokens in the text divided by the total number of tokens in the text. - -## 1-Storing Task ## - - - 1.1- Write a python program to automatically store the JSON files (associated with the #microsoft and #mojang hash tags) - returned by twitter api in a database called db_streamT. - - 1.2- Write a python program to insert the chucked data tweets (of assignment 2) that you have stored on S3 to mongoDB - in a database called db_tweets. - -## 2-Retrieving and Analyzing Task ## - 2.1- Analyze the tweets stored in db_tweets by finding the top 30 retweets as well as their associated usernames and the locations - of users. - - 2.2- Compute the lexical diversity of the tweets stored in db_streamT and store the results back to Mongodb. You need to create a collection - with appropriate structure for storing the results of your analysis. - - 2.3- Write a python program to create a db called db_followers that stores all the followers for all the users that - you find in task 2.1. Then, write a program to find the un-followed friends after a week for the top 10 users( users that have the highest number of followers in task 2.1) - since the time that you extracted the tweets. - - 2.4- .(Bonus task) Write a python program and use NLTK to analyze the top 30 retweets of task 2.1 as positive or negative (sentiment analysis). This is the bonus part of the assignment. - -##3-Storing and Retrieving Task## - - 3.1- Write a python program to create and store the backups of both db_tweets and db_streamT to S3. It also should have a capability of - loading the backups if necessary. - - -## What to Turn In ## - -1. A link to your S3 bucket that holds the backups documented in your README.md file. Make sure to make it publicly accessible. - -2. Your python codes. - -3. The plot of your lexical diversity in task 2.2 and the result of the sentiment analysis in task 2.4 if you complete the bonus part. +Please disregard this text. I just put it there so I am introducing a change in file in order to be able to create new pull request. diff --git a/README.md~ b/README.md~ new file mode 100644 index 0000000..2242ab7 --- /dev/null +++ b/README.md~ @@ -0,0 +1 @@ +Please look at "ReadMe_Assignment3.pdf" diff --git a/ReadMe_Assignment3.pdf b/ReadMe_Assignment3.pdf new file mode 100644 index 0000000..6c2f382 Binary files /dev/null and b/ReadMe_Assignment3.pdf differ diff --git a/test_please_disregard/fill_db_streamT.py b/test_please_disregard/fill_db_streamT.py new file mode 100644 index 0000000..b06bcd6 --- /dev/null +++ b/test_please_disregard/fill_db_streamT.py @@ -0,0 +1,49 @@ +import os +import sys +import tweepy +import datetime +#mport urllib +import signal +import json +import boto +from boto.s3.connection import S3Connection +#import tweetserializer +#import tweetanalyzer +from boto.s3.key import Key +#import numpy as np +#import pylab as pl +import pymongo + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() +#create table +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +aws_access_key_id ='' +aws_secret_access_key= '' +aws_bucket_name='' + + +s3conn = S3Connection(aws_access_key_id,aws_secret_access_key) +bucket = s3conn.get_bucket(aws_bucket_name) + +#get tweets from S3 and dump them into db_streamT +for key in bucket.list(): + #each file contains 500 tweets, split them up and put each tweet json data into the table + key.get_contents_to_filename("fromS3/" + key.name) + directory = os.getcwd() + "/fromS3" + filer = open(directory +"/"+ key.name,"r") + data=json.loads(filer.read()) + #tweets = {"name":key.name,"fileContent": filer.read()} + #db_streamT.insert(tweets) + filer.close() + #file was read into memory, now delete it to save disk space + os.remove(directory+"/"+key.name) + for tweet in data: + temp = {"id":tweet['id'],"tweetJson":tweet} + db_streamT.insert(temp) + + diff --git a/test_please_disregard/fill_db_streamT.py~ b/test_please_disregard/fill_db_streamT.py~ new file mode 100644 index 0000000..59b01aa --- /dev/null +++ b/test_please_disregard/fill_db_streamT.py~ @@ -0,0 +1,49 @@ +import os +import sys +import tweepy +import datetime +#mport urllib +import signal +import json +import boto +from boto.s3.connection import S3Connection +#import tweetserializer +#import tweetanalyzer +from boto.s3.key import Key +#import numpy as np +#import pylab as pl +import pymongo + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() +#create table +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +aws_access_key_id ='AKIAIW66YJD5QMLM6NAQ' +aws_secret_access_key= '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' +aws_bucket_name='moonlightbucket' + + +s3conn = S3Connection(aws_access_key_id,aws_secret_access_key) +bucket = s3conn.get_bucket(aws_bucket_name) + +#get tweets from S3 and dump them into db_streamT +for key in bucket.list(): + #each file contains 500 tweets, split them up and put each tweet json data into the table + key.get_contents_to_filename("fromS3/" + key.name) + directory = os.getcwd() + "/fromS3" + filer = open(directory +"/"+ key.name,"r") + data=json.loads(filer.read()) + #tweets = {"name":key.name,"fileContent": filer.read()} + #db_streamT.insert(tweets) + filer.close() + #file was read into memory, now delete it to save disk space + os.remove(directory+"/"+key.name) + for tweet in data: + temp = {"id":tweet['id'],"tweetJson":tweet} + db_streamT.insert(temp) + + diff --git a/test_please_disregard/fill_db_tweets.py b/test_please_disregard/fill_db_tweets.py new file mode 100644 index 0000000..f928adf --- /dev/null +++ b/test_please_disregard/fill_db_tweets.py @@ -0,0 +1,31 @@ +import os +#jjimport sys +#import tweepy +#jimport datetime +#mport urllib +#import signal +import json +#import boto +#from boto.s3.connection import S3Connection +#import tweetserializer +#import tweetanalyzer +#from boto.s3.key import Key +#import numpy as np +#import pylab as pl +import pymongo +from bson.json_util import dumps +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() +#get tables +db_streamT = mongoConnection['twitter_analyzer'].db_streamT +db_tweets = mongoConnection['twitter_analyzer'].db_tweets + +#extract tweet from tweet json +for tJson in db_streamT.find(): + tweetOnlyEntry = {"id" : tJson['id'], "text" : json.loads(dumps(tJson['tweetJson']))['text']} + db_tweets.insert(tweetOnlyEntry) +exit() + diff --git a/test_please_disregard/histTest.txt~ b/test_please_disregard/histTest.txt~ new file mode 100644 index 0000000..e69de29 diff --git a/test_please_disregard/histoTest.txt b/test_please_disregard/histoTest.txt new file mode 100644 index 0000000..3cdb8bc --- /dev/null +++ b/test_please_disregard/histoTest.txt @@ -0,0 +1,8 @@ +13 Microsoft +14 Game +15 Best +18 Wow +19 Play +20 Do +21 Team13 +21 Team diff --git a/test_please_disregard/myTestFor2_3.py b/test_please_disregard/myTestFor2_3.py new file mode 100644 index 0000000..dcf511c --- /dev/null +++ b/test_please_disregard/myTestFor2_3.py @@ -0,0 +1,56 @@ +#Kasane Utsumi +import os +import json +import pymongo +from bson.json_util import dumps +import tweepy +import time + +#twitter setup +consumer_key = "" +consumer_secret = "" +access_token = "" +access_token_secret = "" +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users +db_followers = mongoConnection['twitter_analyzer'].db_followers + + +#create an array of unique users +topRetweetedUserList = dict() + +for userJson in db_top30RetweetedUsers.find(): + + userId = userJson['id'] + + #get followers Count for cross checking + followersCount = json.loads(dumps(userJson['userInfo']))['followers_count'] + + if userId not in topRetweetedUserList: + topRetweetedUserList[userId] = followersCount + +#check the list to make sure it has what I want +#for uid in topRetweetedUserList: +# print str(uid) + " " + str(topRetweetedUserList[uid]) + +#for each user, make a twitter followers/id call to get list of his/her followers' ids and store it in the db_followers +#db_follower will have 30 rows, the format for each row is: +#{"id":user id, followerIds: list of follower's ids} +for uid in topRetweetedUserList: + if db_followers.find({'id' : uid}).count() != 0: + print "found" + else: + print "not found" + + diff --git a/test_please_disregard/myTestFor2_3.py~ b/test_please_disregard/myTestFor2_3.py~ new file mode 100644 index 0000000..9b06d93 --- /dev/null +++ b/test_please_disregard/myTestFor2_3.py~ @@ -0,0 +1,56 @@ +#Kasane Utsumi +import os +import json +import pymongo +from bson.json_util import dumps +import tweepy +import time + +#twitter setup +consumer_key = "10G4NlBUpM9nusmE9nSoeGQnk" +consumer_secret = "KcH2Ykf253L0tTCuzIyqDUPnkEZ7mZhIiHCYiS84LbZNCsQwRu" +access_token = "2988143343-waN3T7DFy7j0Yn95hDdXOMLpdRfHzG66SnOZlHO" +access_token_secret = "TDd8WId2f7Cw8jDLdPcjJRM5lTlMGYiuLjUl1ped21euS" +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) + +#mongo setup +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users +db_followers = mongoConnection['twitter_analyzer'].db_followers + + +#create an array of unique users +topRetweetedUserList = dict() + +for userJson in db_top30RetweetedUsers.find(): + + userId = userJson['id'] + + #get followers Count for cross checking + followersCount = json.loads(dumps(userJson['userInfo']))['followers_count'] + + if userId not in topRetweetedUserList: + topRetweetedUserList[userId] = followersCount + +#check the list to make sure it has what I want +#for uid in topRetweetedUserList: +# print str(uid) + " " + str(topRetweetedUserList[uid]) + +#for each user, make a twitter followers/id call to get list of his/her followers' ids and store it in the db_followers +#db_follower will have 30 rows, the format for each row is: +#{"id":user id, followerIds: list of follower's ids} +for uid in topRetweetedUserList: + if db_followers.find({'id' : uid}).count() != 0: + print "found" + else: + print "not found" + + diff --git a/test_please_disregard/original_2_2_get_top30tweets.py b/test_please_disregard/original_2_2_get_top30tweets.py new file mode 100644 index 0000000..41bf02f --- /dev/null +++ b/test_please_disregard/original_2_2_get_top30tweets.py @@ -0,0 +1,74 @@ +#Kasane Utsumi +import os +import json +import pymongo +from bson.json_util import dumps + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() + +#get tables +db_streamT = mongoConnection['twitter_analyzer'].db_streamT +db_retweets = mongoConnection['twitter_analyzer'].db_all_retweets +db_top30RetweetedUsers = mongoConnection['twitter_analyzer'].db_top30_users + +db_retweets.drop() +db_top30RetweetedUsers.drop() + +#create a dictionary of retweeted id is key, and number of occurrence as a value +retweetDict = dict() + +#also, dump all of the tweets into db_all_retweets so it would be easy to get user and location for top 30 later. + +for tJson in db_streamT.find(): + tweetJson = json.loads(dumps(tJson['tweetJson'])) + if 'retweeted_status' in tweetJson: + retweet = tweetJson['retweeted_status'] + id = retweet['id'] + retweetDBEntry = {"id" : id, "retweetJson" : retweet} + db_retweets.insert(retweetDBEntry) + if id in retweetDict: + retweetDict[id] += 1 + else: retweetDict[id] = 1 + + +#check the dictionary to make sure it has what I want +#for key in retweetDict: + #if (retweetDict[key] >1): + #print str(key) + " " + str(retweetDict[key]) + +#convert retweetDict into tuples so I can sort by number of frequencies, then sort by frequncy +retweetTuple = sorted(tuple(retweetDict.iteritems()),key=lambda x: (-x[1],x[0])) + +#check the tuple to see if it has what I want +#for (id,frequency) in retweetTuple: + #if frequency > 1: + #print str(id) + " " +str(frequency) + +#print out the top tweeted user , also store them in top30_users collection so they can be retreived for other analysis +tupleIndex = 0 +for (id,frequency) in retweetTuple: + retweet = db_retweets.find_one({"id":id}) + + if (retweet == None): + print "Something went wrong, could not find retweet with id" + str(id) + else: + retweetJson = json.loads(dumps(retweet["retweetJson"])) + topTweetedUser = retweetJson['user'] + + userDBEntry = {"id": topTweetedUser['id'], "userInfo" : topTweetedUser} + db_top30RetweetedUsers.insert(userDBEntry) + + #print out retweet, user name and location + print "Top Retweet Rank " + str(tupleIndex+1) + print "Tweet: " + retweetJson["text"] + print "User: " + topTweetedUser["name"] + " at " + topTweetedUser['location'] + print " " + + #get only top 30 + tupleIndex = tupleIndex + 1 + if tupleIndex == 30: + exit() diff --git a/test_please_disregard/test.py b/test_please_disregard/test.py new file mode 100644 index 0000000..45b08d5 --- /dev/null +++ b/test_please_disregard/test.py @@ -0,0 +1,49 @@ +import tweepy +import urllib +import pymongo +import sys +import json +from bson.json_util import dumps + + +#configure tweepy +consumer_key = "" +consumer_secret = "" +access_token = "" +access_token_secret = "" + +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) + +#get string to search for and start and end date from urllib +q = urllib.quote_plus("#microsoft OR #mojang") +start = urllib.quote_plus(sys.argv[1]) +end = urllib.quote_plus(sys.argv[2]) + +#set up mongodb collection +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() +#get tables - will create separate table for each day and merge all of the tables into db_streamT later. +#I will do this so that if error happens while getting tweets for one day I don't have to run the program +# for entire week again. +database = mongoConnection['twitter_analyzer'] +db_streamT = database['db_streamT' + start] + +#clean table is there are any data +db_streamT.drop() + + + +# Additional query parameters: +# since: {date} +# until: {date} +# Just add them to the 'q' variable: q+" since: 2014-01-01 until: 2014-01-02" + + +for tweet in tweepy.Cursor(api.search,q=q+" since:" + start + " until:" + end).items(): + db_streamT.insert(tweet._json) + diff --git a/test_please_disregard/test.py~ b/test_please_disregard/test.py~ new file mode 100644 index 0000000..d14883c --- /dev/null +++ b/test_please_disregard/test.py~ @@ -0,0 +1,49 @@ +import tweepy +import urllib +import pymongo +import sys +import json +from bson.json_util import dumps + + +#configure tweepy +consumer_key = "" +consumer_secret = "" +access_token = " +access_token_secret = "TDd8WId2f7Cw8jDLdPcjJRM5lTlMGYiuLjUl1ped21euS" + +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) + +#get string to search for and start and end date from urllib +q = urllib.quote_plus("#microsoft OR #mojang") +start = urllib.quote_plus(sys.argv[1]) +end = urllib.quote_plus(sys.argv[2]) + +#set up mongodb collection +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() +#get tables - will create separate table for each day and merge all of the tables into db_streamT later. +#I will do this so that if error happens while getting tweets for one day I don't have to run the program +# for entire week again. +database = mongoConnection['twitter_analyzer'] +db_streamT = database['db_streamT' + start] + +#clean table is there are any data +db_streamT.drop() + + + +# Additional query parameters: +# since: {date} +# until: {date} +# Just add them to the 'q' variable: q+" since: 2014-01-01 until: 2014-01-02" + + +for tweet in tweepy.Cursor(api.search,q=q+" since:" + start + " until:" + end).items(): + db_streamT.insert(tweet._json) + diff --git a/test_please_disregard/test_dbStream.py b/test_please_disregard/test_dbStream.py new file mode 100644 index 0000000..a531df6 --- /dev/null +++ b/test_please_disregard/test_dbStream.py @@ -0,0 +1,34 @@ +import os +#jjimport sys +#import tweepy +#jimport datetime +#mport urllib +#import signal +import json +#import boto +#from boto.s3.connection import S3Connection +#import tweetserializer +#import tweetanalyzer +#from boto.s3.key import Key +#import numpy as np +#import pylab as pl +import pymongo +from bson.json_util import dumps +from bson.json_util import loads + + +try: + mongoConnection = pymongo.MongoClient() +except: + print "Connection failed" + exit() +#get tables +db_streamT = mongoConnection['twitter_analyzer'].db_streamT + +#extract tweet from tweet json +for tJson in db_streamT.find(): + print json.loads(dumps(tJson['tweetJson']))["text"].encode('utf8') + #print json.loads(dumps(tJson['tweetJson'])) + exit() + + diff --git a/test_please_disregard/test_plot.py b/test_please_disregard/test_plot.py new file mode 100644 index 0000000..47df98d --- /dev/null +++ b/test_please_disregard/test_plot.py @@ -0,0 +1,15 @@ +import numpy as np +import pylab as pl +dic = {} +with open ("histoTest.txt") as sourceFile: + for line in sourceFile: + print(line) + (key,val) = line.split(" ") + dic[int(key)] = val + +X=np.arange(len(dic)) +pl.bar(X,dic.keys(),width=0.2) +pl.xticks(X,dic.values()) +ymax= max(dic.keys())+1 +pl.ylim(0,ymax) +pl.show() diff --git a/test_please_disregard/test_plot.py~ b/test_please_disregard/test_plot.py~ new file mode 100644 index 0000000..c6f2416 --- /dev/null +++ b/test_please_disregard/test_plot.py~ @@ -0,0 +1,14 @@ +import numpy as np +import pylab as pl +dic = {} +with open ("histoTest.txt") as sourceFile: +for line in sourceFile: +print(line) +(key,val) = line.split(" ") +dic[int(key)] = val +X=np.arange(len(dic)) +pl.bar(X,dic.keys(),width=0.2) +pl.xticks(X,dic.values()) +ymax= max(dic.keys())+1 +pl.ylim(0,ymax) +pl.show() diff --git a/test_please_disregard/test_s3.py b/test_please_disregard/test_s3.py new file mode 100644 index 0000000..c72a902 --- /dev/null +++ b/test_please_disregard/test_s3.py @@ -0,0 +1,29 @@ +import os +import sys +import tweepy +import datetime +#mport urllib +import signal +import json +import boto +from boto.s3.connection import S3Connection +#import tweetserializer +#import tweetanalyzer +from boto.s3.key import Key +#import numpy as np +#import pylab as pl +import pymongo + + + +aws_access_key_id ='' +aws_secret_access_key= '' +aws_bucket_name='' + + +s3conn = S3Connection(aws_access_key_id,aws_secret_access_key) + +print "conn good" +bucket = s3conn.get_bucket(aws_bucket_name) + + diff --git a/test_please_disregard/test_s3.py~ b/test_please_disregard/test_s3.py~ new file mode 100644 index 0000000..7af3afa --- /dev/null +++ b/test_please_disregard/test_s3.py~ @@ -0,0 +1,29 @@ +import os +import sys +import tweepy +import datetime +#mport urllib +import signal +import json +import boto +from boto.s3.connection import S3Connection +#import tweetserializer +#import tweetanalyzer +from boto.s3.key import Key +#import numpy as np +#import pylab as pl +import pymongo + + + +aws_access_key_id ='AKIAIW66YJD5QMLM6NAQ' +aws_secret_access_key= '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' +aws_bucket_name='moonlightbucket' + + +s3conn = S3Connection(aws_access_key_id,aws_secret_access_key) + +print "conn good" +bucket = s3conn.get_bucket(aws_bucket_name) + +