Skip to content

Commit

Permalink
Explore enron data
Browse files Browse the repository at this point in the history
  • Loading branch information
tmaila committed Jan 19, 2017
1 parent 8b79cb9 commit 35b9aa6
Showing 1 changed file with 27 additions and 3 deletions.
30 changes: 27 additions & 3 deletions datasets_questions/explore_enron_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python

"""
"""
Starter code for exploring the Enron dataset (emails + finances);
loads up the dataset (pickled dict of dicts).
Expand All @@ -12,11 +12,35 @@
but here's an example to get you started:
enron_data["SKILLING JEFFREY K"]["bonus"] = 5600000
"""

import pickle
import pandas as pd
import numpy as np
import math

enron_data = pickle.load(open("../final_project/final_project_dataset.pkl", "r"))


email_count = 0
salary_count = 0
total_payments_count = 0


for k in enron_data:
if not enron_data[k]["salary"] == "NaN":
salary_count = salary_count + 1
if not enron_data[k]["email_address"] == "NaN":
email_count = email_count + 1
if not enron_data[k]["total_payments"] == "NaN":
total_payments_count = total_payments_count + 1
print salary_count
print email_count
print total_payments_count
print len(enron_data)
print float(total_payments_count)/len(enron_data)
print 1.0 - float(total_payments_count)/len(enron_data)

#names = ["Lay Kenneth L", "Skilling Jeffrey K", "Fastow Andrew S"]
#for name in names:
# print enron_data[name.upper()]

0 comments on commit 35b9aa6

Please sign in to comment.