|
| 1 | +import numpy as np |
| 2 | +import matplotlib.pyplot as plt |
| 3 | +import pandas as pd |
| 4 | +import pydotplus |
| 5 | + |
| 6 | +from sklearn import datasets |
| 7 | +from sklearn.metrics import accuracy_score |
| 8 | +from sklearn.model_selection import train_test_split |
| 9 | +from sklearn.tree import DecisionTreeClassifier |
| 10 | +from sklearn.tree import export_graphviz |
| 11 | + |
| 12 | +from IPython.display import Image |
| 13 | +from six import StringIO |
| 14 | + |
| 15 | +# Load the iris flowers dataset |
| 16 | +iris_dataset = datasets.load_iris() |
| 17 | + |
| 18 | +df_iris = pd.DataFrame(data=np.c_[iris_dataset['data'], iris_dataset['target']], |
| 19 | + columns=iris_dataset['feature_names'] + ['target']) |
| 20 | +# Verify the shape of the dataset |
| 21 | +print('df_iris has ' + str(df_iris.shape[0]) + ' rows and ' + str(df_iris.shape[1]) + ' columns') |
| 22 | +# Inspect the first few rows (5) of the dataframe |
| 23 | +pd.set_option('display.max_columns', None) |
| 24 | +df_iris.head() |
| 25 | +df_iris.info() |
| 26 | +# Histogram of target attribute |
| 27 | +# df_iris['target'].hist(figsize=(7, 4)) |
| 28 | + |
| 29 | +# Change the data type of the target column to string |
| 30 | +df_iris[['target']] = df_iris[['target']].astype(np.uint8) |
| 31 | +df_iris['target'] = df_iris['target'].apply(str) |
| 32 | + |
| 33 | +df_iris.isnull().sum() |
| 34 | +# Split the df_iris dataframe into two, one with the features and the other with the target |
| 35 | +df_iris_target = df_iris.loc[:, ['target']] |
| 36 | +df_iris_features = df_iris.drop(['target'], axis=1) |
| 37 | +# Create training & test sets |
| 38 | +iris_split = train_test_split(df_iris_features, df_iris_target, test_size=0.25, random_state=17, stratify=df_iris_target['target']) |
| 39 | +df_iris_features_train = iris_split[0] |
| 40 | +df_iris_features_test = iris_split[1] |
| 41 | +df_iris_target_train = iris_split[2] |
| 42 | +df_iris_target_test = iris_split[3] |
| 43 | +# Create the Decision Tree Classification Model (DTM) with Scikit-learn |
| 44 | +# https:/scikit-learn.org/stable/modules;generated/sklearn.tree.DecisionTreeClassifier.html |
| 45 | +# Train a DTM |
| 46 | +model = DecisionTreeClassifier(random_state=17) |
| 47 | +model.fit(df_iris_features_train, df_iris_target_train.values.ravel()) |
| 48 | +print(model.feature_importances_) |
| 49 | +# Get predictions from model, and compute accuracy |
| 50 | +predictions = model.predict(df_iris_features_test) |
| 51 | +print(predictions) |
| 52 | +accuracy = accuracy_score(df_iris_target_test, predictions) |
| 53 | +print(accuracy) |
| 54 | +# Visualize the Decision Tree |
| 55 | +dot_data = StringIO() |
| 56 | +export_graphviz(model, out_file=dot_data, filled=True, rounded=True, |
| 57 | + special_characters=True, feature_names=df_iris_features.columns) |
| 58 | +graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) |
| 59 | +graph.write_png("iris_flow_dtm.png") |
| 60 | +Image(graph.create_png()) |
| 61 | + |
| 62 | + |
| 63 | + |
0 commit comments