From 5dec3d402b6974c5b059286c47b48ffd6e14ae5e Mon Sep 17 00:00:00 2001 From: Jeremy Silver Date: Wed, 11 Feb 2015 15:30:24 -0800 Subject: [PATCH] Add key sort flag in feature format and final project --- final_project/poi_id.py | 2 +- final_project/tester.py | 2 +- tools/feature_format.py | 9 +++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/final_project/poi_id.py b/final_project/poi_id.py index d7067c4080c..60b9e10428e 100644 --- a/final_project/poi_id.py +++ b/final_project/poi_id.py @@ -21,7 +21,7 @@ my_dataset = data_dict ### Extract features and labels from dataset for local testing -data = featureFormat(my_dataset, features_list) +data = featureFormat(my_dataset, features_list, sort_keys = True) labels, features = targetFeatureSplit(data) ### Task 4: Try a varity of classifiers diff --git a/final_project/tester.py b/final_project/tester.py index a037dc61c7a..6895e978fd7 100644 --- a/final_project/tester.py +++ b/final_project/tester.py @@ -24,7 +24,7 @@ RESULTS_FORMAT_STRING = "\tTotal predictions: {:4d}\tTrue positives: {:4d}\tFalse positives: {:4d}\tFalse negatives: {:4d}\tTrue negatives: {:4d}" def test_classifier(clf, dataset, feature_list, folds = 1000): - data = featureFormat(dataset, feature_list) + data = featureFormat(dataset, feature_list, sort_keys = True) labels, features = targetFeatureSplit(data) cv = StratifiedShuffleSplit(labels, folds, random_state = 42) true_negatives = 0 diff --git a/tools/feature_format.py b/tools/feature_format.py index 372372dea16..a771251989e 100644 --- a/tools/feature_format.py +++ b/tools/feature_format.py @@ -33,7 +33,7 @@ import numpy as np -def featureFormat( dictionary, features, remove_NaN=True, remove_all_zeroes=True, remove_any_zeroes=False ): +def featureFormat( dictionary, features, remove_NaN=True, remove_all_zeroes=True, remove_any_zeroes=False, sort_keys = False): """ convert dictionary to numpy array of features remove_NaN=True will convert "NaN" string to 0.0 remove_all_zeroes=True will omit any data points for which @@ -45,7 +45,12 @@ def featureFormat( dictionary, features, remove_NaN=True, remove_all_zeroes=True return_list = [] - for key in sorted(dictionary.keys()): + if sort_keys: + keys = sorted(dictionary.keys()) + else: + keys = dictionary.keys() + + for key in keys: tmp_list = [] append = False for feature in features: