Skip to content

Commit 4b7b849

Browse files
committed
Creating a Decision Tree Classifier with Scikit-learn's toy dataset module's Iris flowers datasets
1 parent fdaa32d commit 4b7b849

File tree

6 files changed

+67
-1
lines changed

6 files changed

+67
-1
lines changed

.idea/inspectionProfiles/profiles_settings.xml

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

usecases/tabular_classifier/decision_tree_classifer/converting_skitlearn_model_coreml.py

Whitespace-only changes.
146 KB
Loading
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
import pandas as pd
4+
import pydotplus
5+
6+
from sklearn import datasets
7+
from sklearn.metrics import accuracy_score
8+
from sklearn.model_selection import train_test_split
9+
from sklearn.tree import DecisionTreeClassifier
10+
from sklearn.tree import export_graphviz
11+
12+
from IPython.display import Image
13+
from six import StringIO
14+
15+
# Load the iris flowers dataset
16+
iris_dataset = datasets.load_iris()
17+
18+
df_iris = pd.DataFrame(data=np.c_[iris_dataset['data'], iris_dataset['target']],
19+
columns=iris_dataset['feature_names'] + ['target'])
20+
# Verify the shape of the dataset
21+
print('df_iris has ' + str(df_iris.shape[0]) + ' rows and ' + str(df_iris.shape[1]) + ' columns')
22+
# Inspect the first few rows (5) of the dataframe
23+
pd.set_option('display.max_columns', None)
24+
df_iris.head()
25+
df_iris.info()
26+
# Histogram of target attribute
27+
# df_iris['target'].hist(figsize=(7, 4))
28+
29+
# Change the data type of the target column to string
30+
df_iris[['target']] = df_iris[['target']].astype(np.uint8)
31+
df_iris['target'] = df_iris['target'].apply(str)
32+
33+
df_iris.isnull().sum()
34+
# Split the df_iris dataframe into two, one with the features and the other with the target
35+
df_iris_target = df_iris.loc[:, ['target']]
36+
df_iris_features = df_iris.drop(['target'], axis=1)
37+
# Create training & test sets
38+
iris_split = train_test_split(df_iris_features, df_iris_target, test_size=0.25, random_state=17, stratify=df_iris_target['target'])
39+
df_iris_features_train = iris_split[0]
40+
df_iris_features_test = iris_split[1]
41+
df_iris_target_train = iris_split[2]
42+
df_iris_target_test = iris_split[3]
43+
# Create the Decision Tree Classification Model (DTM) with Scikit-learn
44+
# https:/scikit-learn.org/stable/modules;generated/sklearn.tree.DecisionTreeClassifier.html
45+
# Train a DTM
46+
model = DecisionTreeClassifier(random_state=17)
47+
model.fit(df_iris_features_train, df_iris_target_train.values.ravel())
48+
print(model.feature_importances_)
49+
# Get predictions from model, and compute accuracy
50+
predictions = model.predict(df_iris_features_test)
51+
print(predictions)
52+
accuracy = accuracy_score(df_iris_target_test, predictions)
53+
print(accuracy)
54+
# Visualize the Decision Tree
55+
dot_data = StringIO()
56+
export_graphviz(model, out_file=dot_data, filled=True, rounded=True,
57+
special_characters=True, feature_names=df_iris_features.columns)
58+
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
59+
graph.write_png("iris_flow_dtm.png")
60+
Image(graph.create_png())
61+
62+
63+

usecases/tabular_classifier/preprocessing_the_wines_dataset.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
# composition. The model will be trained on the popular UCI ML wine dataset and export the dataset to CSV files.
66

77
import numpy as np
8-
import matplotlib
98
import pandas as pd
109

1110
from sklearn import datasets

0 commit comments

Comments
 (0)