neuralinfo · kutsumigit · Mar 15, 2015 · Apr 12, 2015
diff --git a/1_1/1_1_concatenate_into_streamT.py b/1_1/1_1_concatenate_into_streamT.py
@@ -0,0 +1,59 @@
+#Kasane Utsumi - 3/14/2015
+#1_1_concatenate_into_streamT.py
+#This code dumps all seven collections starting with db_streamT%StartDate% (%StartDate% is a passed command line argument) into db_streamT
+
+import os
+import json
+import pymongo
+import sys
+from bson.json_util import dumps
+
+import signal
+
+def interrupt(signum, frame):
+   print "Interrupted, closing ..."
+   exit(1)
+
+try:
+   mongoConnection = pymongo.MongoClient()
+except:
+   print "Connection failed"
+   exit()
+
+#get tables
+db_streamT = mongoConnection['twitter_analyzer'].db_streamT
+
+#clear table
+db_streamT.drop()
+
+stream1 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[1]]
+stream2 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[2]]
+stream3 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[3]]
+stream4 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[4]]
+stream5 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[5]]
+stream6 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[6]]
+stream7 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[7]]
+
+#get total count for all collection so I can compare with count of db_streamT after filling it up so I know that concatenation was successful. 
+individualTotal =  stream1.find().count() +  stream2.find().count() + stream3.find().count() + stream4.find().count() + stream5.find().count() + stream6.find().count() + stream7.find().count()
+
+
+#clear the current content
+db_streamT.drop()
+
+def addThisCollection(collection):
+    for content in collection.find():
+       db_streamT.insert(content)
+
+addThisCollection(stream1)
+addThisCollection(stream2)
+addThisCollection(stream3)
+addThisCollection(stream4)
+addThisCollection(stream5)
+addThisCollection(stream6)
+addThisCollection(stream7)
+
+print "individual total is " + str(individualTotal)
+print "StreamT length is " + str(db_streamT.find().count())
+print "Number of items match?: " + str(individualTotal == db_streamT.find().count())
+
diff --git a/1_1/1_1_concatenate_into_streamT.py~ b/1_1/1_1_concatenate_into_streamT.py~
@@ -0,0 +1,59 @@
+#Kasane Utsumi - 3/14/2015
+#1_1_concatenate_into_streamT.py
+#This code dumps all seven collections starting with db_streamT%StartDate% (%StartDate% is a passed command line argument) into db_streamT
+
+import os
+import json
+import pymongo
+import sys
+from bson.json_util import dumps
+
+import signal
+
+def interrupt(signum, frame):
+   print "Interrupted, closing ..."
+   exit(1)
+
+try:
+   mongoConnection = pymongo.MongoClient()
+except:
+   print "Connection failed"
+   exit()
+
+#get tables
+db_streamT = mongoConnection['twitter_analyzer'].db_streamT
+
+#clear table
+db_streamT.drop()
+
+stream1 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[1]]
+stream2 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[2]]
+stream3 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[3]]
+stream4 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[4]]
+stream5 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[5]]
+stream6 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[6]]
+stream7 = mongoConnection['twitter_analyzer']['db_streamT' + sys.argv[7]]
+
+#get total count for all collection so I can compare with count of db_streamT after filling it up so I know that concatenation was successful. 
+individualTotal =  stream1.find().count() +  stream2.find().count() + stream3.find().count() + stream4.find().count() + stream5.find().count() + stream6.find().count() + stream7.find().count()
+
+
+#clear the current content
+db_streamT.drop()
+
+def addThisCollection(collection):
+    for content in collection.find():
+       db_streamT.insert(content)
+
+addThisCollection(stream1)
+addThisCollection(stream2)
+addThisCollection(stream3)
+addThisCollection(stream4)
+addThisCollection(stream5)
+addThisCollection(stream6)
+addThisCollection(stream7)
+
+print "individual total is " + str(individualTotal)
+print "StreamT length is " + str(db_streamT.find().count())
+print "Number of items match?: " + str(individualTotal == db_streamT.find().count())
+
diff --git a/1_1/1_1_store_tweets_into_db_streamT.py b/1_1/1_1_store_tweets_into_db_streamT.py
@@ -0,0 +1,59 @@
+#Kasane Utsumi - 3/14/2015
+#1_1_store_tweets_into_db_streamT.py
+#This file retrieves tweets by search term “"#microsoft OR #mojang" specified by date range and dumps them into db_streamT%StartDate%(replace ‘%StartDate%’ with start date that was passed to the command.) database. 
+
+import tweepy
+import urllib
+import pymongo
+import sys
+import json
+from bson.json_util import dumps
+import signal
+
+def interrupt(signum, frame):
+   print "Interrupted, closing ..."
+   exit(1)
+
+
+#configure tweepy
+consumer_key = ""
+consumer_secret = ""
+access_token = ""
+access_token_secret = ""
+
+auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
+auth.set_access_token(access_token, access_token_secret)
+api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
+
+#get string to search for and start and end date from urllib
+q = urllib.quote_plus("#microsoft OR #mojang")
+start = urllib.quote_plus(sys.argv[1])
+end = urllib.quote_plus(sys.argv[2])
+
+#set up mongodb collection
+try:
+   mongoConnection = pymongo.MongoClient()
+except:
+   print "Connection failed"
+   exit()
+
+#get tables - will create separate table for each day and merge all of the tables into db_streamT later. 
+#I will do this so that if error happens while getting tweets for one day I don't have to run the program
+# for entire week again. 
+database = mongoConnection['twitter_analyzer']
+db_streamT = database['db_streamT' + start]
+
+#clean table is there are any data
+db_streamT.drop()
+
+# Additional query parameters:
+# since: {date}
+# until: {date}
+# Just add them to the 'q' variable: q+" since: 2014-01-01 until: 2014-01-02"
+
+try:
+   for tweet in tweepy.Cursor(api.search,q=q+" since:" + start + " until:" + end).items():
+      db_streamT.insert(tweet._json)
+except:
+      print "tweet retrieval failed, exiting"
+      exit()       
diff --git a/1_1/1_1_store_tweets_into_db_streamT.py~ b/1_1/1_1_store_tweets_into_db_streamT.py~
@@ -0,0 +1,59 @@
+#Kasane Utsumi - 3/14/2015
+#1_1_store_tweets_into_db_streamT.py
+#This file retrieves tweets by search term “"#microsoft OR #mojang" specified by date range and dumps them into db_streamT%StartDate%(replace ‘%StartDate%’ with start date that was passed to the command.) database. 
+
+import tweepy
+import urllib
+import pymongo
+import sys
+import json
+from bson.json_util import dumps
+import signal
+
+def interrupt(signum, frame):
+   print "Interrupted, closing ..."
+   exit(1)
+
+
+#configure tweepy
+consumer_key = "10G4NlBUpM9nusmE9nSoeGQnk"
+consumer_secret = "KcH2Ykf253L0tTCuzIyqDUPnkEZ7mZhIiHCYiS84LbZNCsQwRu"
+access_token = "2988143343-waN3T7DFy7j0Yn95hDdXOMLpdRfHzG66SnOZlHO"
+access_token_secret = "TDd8WId2f7Cw8jDLdPcjJRM5lTlMGYiuLjUl1ped21euS"
+
+auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
+auth.set_access_token(access_token, access_token_secret)
+api = tweepy.API(auth_handler=auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
+
+#get string to search for and start and end date from urllib
+q = urllib.quote_plus("#microsoft OR #mojang")
+start = urllib.quote_plus(sys.argv[1])
+end = urllib.quote_plus(sys.argv[2])
+
+#set up mongodb collection
+try:
+   mongoConnection = pymongo.MongoClient()
+except:
+   print "Connection failed"
+   exit()
+
+#get tables - will create separate table for each day and merge all of the tables into db_streamT later. 
+#I will do this so that if error happens while getting tweets for one day I don't have to run the program
+# for entire week again. 
+database = mongoConnection['twitter_analyzer']
+db_streamT = database['db_streamT' + start]
+
+#clean table is there are any data
+db_streamT.drop()
+
+# Additional query parameters:
+# since: {date}
+# until: {date}
+# Just add them to the 'q' variable: q+" since: 2014-01-01 until: 2014-01-02"
+
+try:
+   for tweet in tweepy.Cursor(api.search,q=q+" since:" + start + " until:" + end).items():
+      db_streamT.insert(tweet._json)
+except:
+      print "tweet retrieval failed, exiting"
+      exit()       
diff --git a/1_2/1_2_fill_db_tweets.py b/1_2/1_2_fill_db_tweets.py
@@ -0,0 +1,42 @@
+#Kasane Utsumi - 3/14/2015
+#1_2_fill_db_tweets.py
+#This file takes all tweets (with entire tweet information in json format) from db_streamT and stores tweet text ONLY into db_tweets collection.
+
+import os
+import json
+import pymongo
+from bson.json_util import dumps
+import signal
+
+def interrupt(signum, frame):
+   print "Interrupted, closing ..."
+   exit(1)
+
+signal.signal(signal.SIGINT, interrupt)
+
+try:
+   mongoConnection = pymongo.MongoClient()
+except:
+   print "Connection failed"
+   exit()
+
+#get tables
+db_streamT = mongoConnection['twitter_analyzer'].db_streamT
+
+if db_streamT == None
+   print "db_streamT not found! exiting..." 
+   exit()
+
+db_tweets = mongoConnection['twitter_analyzer'].db_tweets
+
+#clear the current content
+db_tweets.drop()
+
+#extract tweet from tweet json 
+for tJson in db_streamT.find():
+   #print json.loads(dumps(tJson["text"])).encode('utf8')
+   tweetOnlyEntry = {"text" : json.loads(dumps(tJson["text"]))}
+   db_tweets.insert(tweetOnlyEntry)
+
+#check that addition happened fine
+print "Does length of db_streamT equal that of db_tweets?" + str(db_streamT.find().count() == db_tweets.find().count())
diff --git a/1_2/1_2_fill_db_tweets.py~ b/1_2/1_2_fill_db_tweets.py~
@@ -0,0 +1,42 @@
+#Kasane Utsumi - 3/14/2015
+#1_2_fill_db_tweets.py
+#This file takes all tweets (with entire tweet information in json format) from db_streamT and stores tweet text ONLY into db_tweets collection.
+
+import os
+import json
+import pymongo
+from bson.json_util import dumps
+import signal
+
+def interrupt(signum, frame):
+   print "Interrupted, closing ..."
+   exit(1)
+
+signal.signal(signal.SIGINT, interrupt)
+
+try:
+   mongoConnection = pymongo.MongoClient()
+except:
+   print "Connection failed"
+   exit()
+
+#get tables
+db_streamT = mongoConnection['twitter_analyzer'].db_streamT
+
+if db_streamT == None
+   print "db_streamT not found! exiting..." 
+   exit()
+
+db_tweets = mongoConnection['twitter_analyzer'].db_tweets
+
+#clear the current content
+db_tweets.drop()
+
+#extract tweet from tweet json 
+for tJson in db_streamT.find():
+   #print json.loads(dumps(tJson["text"])).encode('utf8')
+   tweetOnlyEntry = {"text" : json.loads(dumps(tJson["text"]))}
+   db_tweets.insert(tweetOnlyEntry)
+
+#check that addition happened fine
+print "Does length of db_streamT equal that of db_tweets?" + str(db_streamT.find().count() == db_tweets.find().count())
diff --git a/2_1/.hello.txt.swp b/2_1/.hello.txt.swp