-
Notifications
You must be signed in to change notification settings - Fork 0
/
DT Regressor.py
93 lines (65 loc) · 3.23 KB
/
DT Regressor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# -*- coding: utf-8 -*-
"""Task2.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Gmr3HV-_31TMX_NlPYZh4XeXqvT91u8E
"""
from google.colab import files
uploaded = files.upload()#taking the csv file
import pandas as pd
import io
data = pd.read_csv(io.BytesIO(uploaded['Concrete_Data.xls - Sheet1.csv']))
"""1 Identify the features and target from the data.
Features- Cement (component 1)(kg in a m^3 mixture) Blast Furnace Slag (component 2)(kg in a m^3 mixture) Fly Ash (component 3)(kg in a m^3 mixture) Water (component 4)(kg in a m^3 mixture) Superplasticizer (component 5)(kg in a m^3 mixture) Coarse Aggregate (component 6)(kg in a m^3 mixture) Fine Aggregate (component 7)(kg in a m^3 mixture) Age (day)
Target-Concrete compressive strength(MPa, megapascals)
2 Split dataset into training and test set using the following formula:
Let your roll number be B20XX207, and the last three digits of your roll number be S. If
S is odd split ratio is 70:30, and if it is even then, the split ratio is 80:20. In the above
example, S turns out to be 207, which is odd; hence split the data in 70:30.
4 Seed=2021
B20ME010 010-Even, Splitting will be in 80:20
"""
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data, test_size=0.2, shuffle = True, random_state=2021) # taking testing data to be 20%
X_train = train_data.iloc[:,:-1]
y_train = train_data.iloc[:,-1]
X_test = test_data.iloc[:,:-1]
y_test = test_data.iloc[:,-1]
"""6 Train a decision tree regressor and report model accuracy.
3 Use MSE and MAE
4 Seed=2021, 5 set node selection strategy as ‘best’.
"""
from sklearn.tree import DecisionTreeRegressor
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
classifier = DecisionTreeRegressor(criterion="mse", random_state=2021, splitter="best", max_depth=12)#each parameter is explained in report
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)
print(mean_absolute_error(y_test, y_pred))#why accuracy can't be calculated here is also mentioned in report.
print(mean_squared_error(y_test,y_pred))
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
classifier1 = DecisionTreeRegressor(criterion="mae", random_state=2021, splitter="best", max_depth=12)
classifier1.fit(X_train,y_train)
y_pred = classifier1.predict(X_test)
print(mean_absolute_error(y_test, y_pred))
print(mean_squared_error(y_test,y_pred))
"""7 Provide a graphical visualization of the tree."""
import graphviz
from sklearn import tree
dot_tree = tree.export_graphviz(classifier,filled=True,rounded=True)
graph = graphviz.Source(dot_tree, format="png")
graph
import graphviz
from sklearn import tree
dot_tree = tree.export_graphviz(classifier1,filled=True,rounded=True)
graph = graphviz.Source(dot_tree, format="png")
graph
"""8 Prepare confusion matrix and classification report."""
from sklearn import metrics
from sklearn.metrics import classification_report
#print(classification_report(y_test, y_pred))
#cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
#print(cnf_matrix)
#We can't get confusion matrix for this dataset reason explained in report