-
Notifications
You must be signed in to change notification settings - Fork 3
/
province_connection_table.py
125 lines (103 loc) · 4.96 KB
/
province_connection_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import sys
sys.path.insert(0, './Province')
import csv
import twit_extract_feature
import pandas
from provinces import *
from user_tracker import *
from math import floor
class ProvinceTable:
def __init__(self, provinces):
self.provinces = provinces
self.table = [[0 for x in range(len(provinces))] for y in range(len(provinces))]
self.table_norm = [[0 for x in range(len(provinces))] for y in range(len(provinces))]
self.provinceNameList = []
for province in self.provinces:
self.provinceNameList.append(province.name)
def setTimeWindow(self, hours, days):
'Set travel time window in seconds'
self.timeWindow = days * (24*60*60) + hours * (60*60)
def createTableOfCommonUID(self, divider = None):
for f1 in range(len(self.provinces)):
for f2 in range(f1 + 1, len(self.provinces)):
totalIntersect = len(self.provinces[f1].findCommonUID(targetProvi= self.provinces[f2]))
if divider == None:
self.table[f1][f2] = totalIntersect
self.table[f2][f1] = totalIntersect
else:
roundVal = int(floor(totalIntersect/divider))
self.table[f1][f2] = roundVal
self.table[f2][f1] = roundVal
try:
norm = float(totalIntersect) / ( len(self.provinces[f1].uidList) + len(self.provinces[f2].uidList) - totalIntersect)
self.table_norm[f1][f2] = norm
self.table_norm[f2][f1] = norm
except:
self.table_norm[f1][f2] = 0
self.table_norm[f2][f1] = 0
self.__tableToDataFrame()
def normConnBySelfOverallConn(self):
for y in range(len(self.table)):
totalCommon = 0.0
for x in range(len(self.table)):
totalCommon += self.table[y][x]
if totalCommon == 0:
continue
for x in range(len(self.table)):
self.table[y][x] /= totalCommon
self.__tableToDataFrame()
def createTableOfTwosideConnection(self, userTracker):
for user in userTracker.uidList.values():
startHist = True
preHist = None
for hist in user.mergeHist.items():
if startHist:
startHist = False
preHist = hist
continue
if (hist[0] - preHist[0]) <= self.timeWindow:
self.table[self.provinceNameList.index(preHist[1].name)][self.provinceNameList.index(hist[1].name)] += 1
preHist = hist
self.__tableToDataFrame()
def __tableToDataFrame(self):
self.dataFrame = pandas.DataFrame(data = self.table, index= self.provinceNameList, columns= self.provinceNameList)
self.dataFrame_norm = pandas.DataFrame(data = self.table_norm, index= self.provinceNameList, columns= self.provinceNameList)
def exportToCSV(self, filename, majorCol = False):
if majorCol:
self.dataFrame = self.dataFrame.transpose()
self.dataFrame.to_csv(filename)
def exportToCSV_NormalizePopulation(self, filename, majorCol = False):
if majorCol:
self.dataFrame_norm = self.dataFrame_norm.transpose()
self.dataFrame_norm.to_csv(filename)
def createConnectionTable(dataCsv, outputCsv, mode):
provinceHolder = ProvinceHolder()
provinceTable = ProvinceTable(provinceHolder.provinces)
isMajorCol = raw_input('Is column major? (y/n): ')
isMajorCol = True if isMajorCol=='y' else False
if mode == 1:
in_divider = raw_input('Input divider (left blank if not divide): ')
provinceHolder.readDataFromCsv(csvFile = dataCsv)
provinceTable.createTableOfCommonUID(divider = None if len(in_divider) == 0 else float(in_divider))
provinceTable.exportToCSV_NormalizePopulation(outputCsv[0:len(outputCsv)-4] +'_Norm_Population'+outputCsv[len(outputCsv)-4::1], isMajorCol)
elif mode == 2:
provinceHolder.readDataFromCsv(csvFile = dataCsv)
provinceTable.createTableOfCommonUID()
provinceTable.normConnBySelfOverallConn()
elif mode == 3:
print 'Insert travel time window in days and hours.'
days = input('Days: ')
hours = input('Hours: ')
provinceTable.setTimeWindow(days=days, hours=hours)
userTracker = UserTracker(twitDataCsv= dataCsv)
provinceTable.createTableOfTwosideConnection(userTracker)
else:
print 'Insert wrong mode.'
exit()
provinceTable.exportToCSV(outputCsv, isMajorCol)
if __name__ == '__main__':
if(len(sys.argv) < 3):
print 'Please insert processed twitdata .csv and output file name.'
exit()
mode = input('1. Table of common UID\n2. Table of common UID divided by total common UID of that province\n3. Table of two-way connection\nMode: ')
createConnectionTable(dataCsv = sys.argv[1], outputCsv = sys.argv[2], mode = mode)