From 35b9aa65e33b7c071d8e5f637c4bccbcde56f26f Mon Sep 17 00:00:00 2001 From: Tomi Maila Date: Thu, 19 Jan 2017 08:24:02 -0800 Subject: [PATCH] Explore enron data --- datasets_questions/explore_enron_data.py | 30 +++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/datasets_questions/explore_enron_data.py b/datasets_questions/explore_enron_data.py index 65096aeed2f..3bbc035ab09 100644 --- a/datasets_questions/explore_enron_data.py +++ b/datasets_questions/explore_enron_data.py @@ -1,6 +1,6 @@ #!/usr/bin/python -""" +""" Starter code for exploring the Enron dataset (emails + finances); loads up the dataset (pickled dict of dicts). @@ -12,11 +12,35 @@ but here's an example to get you started: enron_data["SKILLING JEFFREY K"]["bonus"] = 5600000 - + """ import pickle +import pandas as pd +import numpy as np +import math enron_data = pickle.load(open("../final_project/final_project_dataset.pkl", "r")) - +email_count = 0 +salary_count = 0 +total_payments_count = 0 + + +for k in enron_data: + if not enron_data[k]["salary"] == "NaN": + salary_count = salary_count + 1 + if not enron_data[k]["email_address"] == "NaN": + email_count = email_count + 1 + if not enron_data[k]["total_payments"] == "NaN": + total_payments_count = total_payments_count + 1 +print salary_count +print email_count +print total_payments_count +print len(enron_data) +print float(total_payments_count)/len(enron_data) +print 1.0 - float(total_payments_count)/len(enron_data) + +#names = ["Lay Kenneth L", "Skilling Jeffrey K", "Fastow Andrew S"] +#for name in names: +# print enron_data[name.upper()]