forked from MichiganDataScienceTeam/googleanalytics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexplore.py
41 lines (28 loc) · 1.2 KB
/
explore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from __future__ import print_function
import dataset
import explore_utils
import argparse
def main(args):
print("Loading the dataset...")
data = dataset.Dataset(args.debug)
print("Number of rows in the training set: ", len(data.train))
print("Number of columns in the training set: ", len(data.train.columns))
print("Number of rows in the test set: ", len(data.test))
print("Number of columns in the test set: ", len(data.test.columns))
# Number of visits
print("The most visit times for a customer in train set is: ",
explore_utils.find_most_visit(data))
# Customer spending percentiles
percentiles = [95, 97.5, 99, 99.9, 99.99]
percentile_values = explore_utils.find_customer_revenue_percentiles(
data,
percentiles)
for p, pv in zip(percentiles, percentile_values):
print("%2.2f%% of customers spend less than: $%.2f" % (p, pv))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Explore the Google Analytics dataset.')
parser.add_argument('--debug', dest='debug', action='store_true',
help='run in debug mode')
args = parser.parse_args()
main(args)