-
Notifications
You must be signed in to change notification settings - Fork 0
/
Descriptive_Statistics.py
174 lines (160 loc) · 10.3 KB
/
Descriptive_Statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os
import pandas as pd
import statistics
import numpy as np
os.chdir('C:/Users/uf11/Desktop/OutFin') # NOTE: change the path to where OutFin dataset resides
os.chdir('Measurements')
# RPs visited for a given day:
day1 = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
day2 = [21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42]
day3 = [43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60]
day4 = [61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77]
day5 = [78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99]
day6 = [100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122]
def Stats_WiFi(phone):
df_all = pd.DataFrame() # initializing a dataframe to aggregate all WiFi .csv files for a given phone
l1 = [] # initializing a list to hold the number of detected SSIDs for each RP
l2 = [] # initializing a list to hold the number of detected BSSIDs for each RP
for i in range(122): # iterate over RPs
df_temp = pd.read_csv(phone+'_WiFi_' + str(i + 1) + '.csv') # read the WiFi .csv file
temp = df_temp.iloc[:, 8:] # select all RSS readings of the APs
df_temp['RSS'] = temp.mean(axis=1) # calculate the mean RSS value of the APs
df_temp = df_temp[['SSID', 'BSSID', 'RSS']] # leave only the columns of interest
SSIDs = df_temp.SSID.unique() # obtain unique SSIDs
l1.append(len(SSIDs)) # get number of unique SSIDs and append to the l1 list
BSSIDs = df_temp.BSSID.unique() # obtain unique BSSIDs
l2.append(len(BSSIDs)) # get number of unique BSSIDs and append to the l2 list
df_all = df_all.append(df_temp, ignore_index=True) # append to the df_all dataframe
print('========= Descriptive Statistics for',phone,'(WiFi) =========')
print('Detected SSID (min): ', min(l1))
print('Detected SSID (max): ', max(l1))
print('Detected SSID (mean):', statistics.mean(l1))
print('Detected SSID (SD): ', statistics.pstdev(l1))
print('Detected BSSID (min): ', min(l2))
print('Detected BSSID (max): ', max(l2))
print('Detected BSSID (mean):', statistics.mean(l2))
print('Detected BSSID (SD): ', statistics.pstdev(l2))
print('RSS (min): ', df_all.RSS.min(skipna=True))
print('RSS (max): ',df_all.RSS.max(skipna=True))
print('RSS (mean):',df_all.RSS.mean(skipna=True))
print('RSS (SD): ',df_all.RSS.std(skipna=True))
print('==========================================================')
def Stats_Bluetooth(phone):
df_all = pd.DataFrame() # initializing a dataframe to aggregate all Bluetooth .csv files for a given phone
l = [] # initializing a list to hold the number of detected Bluetooth devices for each RP
for i in range(122): # iterate over RPs
df_temp = pd.read_csv(phone+'_Bluetooth_' + str(i + 1) + '.csv') # read the Bluetooth .csv file
df_temp = df_temp[['MAC_address', 'RSS']] # leave only the columns of interest
MAC_addresses = df_temp.MAC_address.unique() # obtain unique MAC addresses
l.append(len(MAC_addresses)) # get number of unique MAC addresses and append to the l list
df_temp = df_temp.groupby(['MAC_address'], as_index=False).mean() # calculate the mean RSS value of the MAC
# addresses
df_all = df_all.append(df_temp, ignore_index=True) # append to the df_all dataframe
print('========= Descriptive Statistics for',phone,'(Bluetooth) =========')
print('Detected MAC addresses (min): ', min(l))
print('Detected MAC addresses (max): ', max(l))
print('Detected MAC addresses (mean):', statistics.mean(l))
print('Detected MAC addresses (SD): ', statistics.pstdev(l))
print('RSS (min): ', df_all.RSS.min(skipna=True))
print('RSS (max): ',df_all.RSS.max(skipna=True))
print('RSS (mean):',df_all.RSS.mean(skipna=True))
print('RSS (SD): ',df_all.RSS.std(skipna=True))
print('==========================================================')
def Stats_Cellular(phone):
df_all = pd.DataFrame() # initializing a dataframe to aggregate all cellular .csv files for a given phone
l = [] # initializing a list to hold the number of detected ECIs devices for each RP
for i in range(122): # iterate over RPs
df_temp = pd.read_csv(phone+'_Cellular_' + str(i + 1) + '.csv') # read the cellular .csv file
df_temp = df_temp[['LTE_neighbors', 'RSRP_strongest', 'ECI', 'RSRP', 'RSRQ']] # leave only the columns of
# interest
ECIs = df_temp.ECI.unique() # obtain unique ECIs
l.append(len(ECIs)) # get number of unique ECIs and append to the l list
df_all = df_all.append(df_temp, ignore_index=True) # append to the df_all dataframe
print('========= Descriptive Statistics for',phone,'(Cellular) =========')
print('Detected ECIs (min): ', min(l))
print('Detected ECIs (max): ', max(l))
print('Detected ECIs (mean):', statistics.mean(l))
print('Detected ECIs (SD): ', statistics.pstdev(l))
print('LTE neighbors (min): ', df_all.LTE_neighbors.min(skipna=True))
print('LTE neighbors (max): ', df_all.LTE_neighbors.max(skipna=True))
print('LTE neighbors (mean):', df_all.LTE_neighbors.mean(skipna=True))
print('LTE neighbors (SD): ', df_all.LTE_neighbors.std(skipna=True))
print('RSRP strongest (min): ', df_all.RSRP_strongest.min(skipna=True))
print('RSRP strongest (max): ', df_all.RSRP_strongest.max(skipna=True))
print('RSRP strongest (mean):', df_all.RSRP_strongest.mean(skipna=True))
print('RSRP strongest (SD): ', df_all.RSRP_strongest.std(skipna=True))
print('RSRP (min): ', df_all.RSRP.min(skipna=True))
print('RSRP (max): ', df_all.RSRP.max(skipna=True))
print('RSRP (mean):', df_all.RSRP.mean(skipna=True))
print('RSRP (SD): ', df_all.RSRP.std(skipna=True))
print('RSRQ (min): ', df_all.RSRQ.min(skipna=True))
print('RSRQ (max): ', df_all.RSRQ.max(skipna=True))
print('RSRQ (mean):', df_all.RSRQ.mean(skipna=True))
print('RSRQ (SD): ', df_all.RSRQ.std(skipna=True))
print('==========================================================')
def Stats_Sensors(phone):
df_all = pd.DataFrame() # initializing a dataframe to aggregate all sensors .csv files for a given phone
if phone == 'Phone1':
offset = [0.9908,-1.8639,0.4391,-0.8586,1.7032,-0.4984,-1.1204,-0.3366,1.1165,-2.0947,23.6993,-13.5657,-0.3352,
8.5964,-1.2695,-1.5723,2.1944,0.7699] # calibration offsets as obtained by Calibration.py (three
# components for each day (offset_x, offset_y, offset_z))
scale = [0.9951,1.0562,0.9538,1.0092,0.9945,0.9963,1.0227,0.9899,0.9880,1.0003,1.0129,0.9870,1.0882,0.8722,
1.0698,1.0209,0.9821,0.9976] # calibration scales as obtained by Calibration.py (three components for
# each day (scale_x, scale_y, scale_z))
elif phone == 'Phone2':
offset = [0.0700,0.6768,0.0131,1.0061,0.1598,1.2743,2.3444,0.3117,-0.5986,0.2895,-0.8330,-0.2518,-0.7629,0.6813,
0.1127,0.6256,0.2184,-0.2729]
scale = [1.0428,0.9899,0.9700,1.0219,0.9891,0.9896,1.0904,0.9693,0.9512,1.0295,0.9684,1.0039,1.0756,0.9763,
0.9559,1.0406,0.9838,0.9778]
for i in range(122): # iterate over RPs
df_temp = pd.read_csv(phone+'_Sensors_' + str(i + 1) + '.csv') # read the sensors .csv file
df_temp = df_temp[['Bx','By','Bz','Pressure','Illuminance']] # leave only the columns of interest
# calibrate magnetic filed readings:
if i + 1 in day1:
df_temp['Bx'] = (df_temp['Bx'] - offset[0]) * scale[0]
df_temp['By'] = (df_temp['By'] - offset[1]) * scale[1]
df_temp['Bz'] = (df_temp['Bz'] - offset[2]) * scale[2]
elif i + 1 in day2:
df_temp['Bx'] = (df_temp['Bx'] - offset[3]) * scale[3]
df_temp['By'] = (df_temp['By'] - offset[4]) * scale[4]
df_temp['Bz'] = (df_temp['Bz'] - offset[5]) * scale[5]
elif i + 1 in day3:
df_temp['Bx'] = (df_temp['Bx'] - offset[6]) * scale[6]
df_temp['By'] = (df_temp['By'] - offset[7]) * scale[7]
df_temp['Bz'] = (df_temp['Bz'] - offset[8]) * scale[8]
elif i + 1 in day4:
df_temp['Bx'] = (df_temp['Bx'] - offset[9]) * scale[9]
df_temp['By'] = (df_temp['By'] - offset[10]) * scale[10]
df_temp['Bz'] = (df_temp['Bz'] - offset[11]) * scale[11]
elif i + 1 in day5:
df_temp['Bx'] = (df_temp['Bx'] - offset[12]) * scale[12]
df_temp['By'] = (df_temp['By'] - offset[13]) * scale[13]
df_temp['Bz'] = (df_temp['Bz'] - offset[14]) * scale[14]
elif i + 1 in day6:
df_temp['Bx'] = (df_temp['Bx'] - offset[15]) * scale[15]
df_temp['By'] = (df_temp['By'] - offset[16]) * scale[16]
df_temp['Bz'] = (df_temp['Bz'] - offset[17]) * scale[17]
df_all = df_all.append(df_temp, ignore_index=True) # append to the df_all dataframe
df_all['Magnitude'] = (df_all['Bx']**2 + df_temp['By']**2 + df_temp['Bz']**2).apply(np.sqrt) # calculate magnitude
# of magnetic filed
print('========= Descriptive Statistics for' ,phone, '(Sensors) =========')
print('Magnitude of magnetic filed (min): ', df_all.Magnitude.min(skipna=True))
print('Magnitude of magnetic filed (max): ', df_all.Magnitude.max(skipna=True))
print('Magnitude of magnetic filed (mean):', df_all.Magnitude.mean(skipna=True))
print('Magnitude of magnetic filed (SD): ', df_all.Magnitude.std(skipna=True))
print('Pressure (min): ', df_all.Pressure.min(skipna=True))
print('Pressure (max): ', df_all.Pressure.max(skipna=True))
print('Pressure (mean):', df_all.Pressure.mean(skipna=True))
print('Pressure (SD): ', df_all.Pressure.std(skipna=True))
print('Illuminance (min): ', df_all.Illuminance.min(skipna=True)*10**-6)
print('Illuminance (max): ', df_all.Illuminance.max(skipna=True)*10**-6)
print('Illuminance (mean):', df_all.Illuminance.mean(skipna=True)*10**-6)
print('Illuminance (SD): ', df_all.Illuminance.std(skipna=True)*10**-6)
print('==========================================================')
Phone1 = 'Phone1'
Phone2 = 'Phone2'
# uncomment any of the functions below to activate
# Stats_WiFi(Phone2)
# Stats_Bluetooth(Phone2)
# Stats_Cellular(Phone2)
# Stats_Sensors(Phone2)