-
Notifications
You must be signed in to change notification settings - Fork 0
/
forestfires1.sas
149 lines (115 loc) · 3.57 KB
/
forestfires1.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
data forestfires;
infile "Z:\Desktop\forestfires.csv" dlm=',' firstobs=2;
input X Y month $ day $ FFMC DMC DC ISI temp RH wind rain area;
run;
* Step 1: Add 1 to area and take a log transform;
data forestfires1;
set forestfires;
area = area + 1;
logArea = log(area);
run;
* Step 2: Filter observations which have logArea = 0;
data filterforestfires;
set forestfires1;
if logArea ^= 0;
run;
* Step 3: Recode the month variable into season;
data ff;
set filterforestfires;
if month in ("dec", "jan", "feb") then season = "winter";
else if month in ("sep", "oct", "nov") then season = "fall";
else if month in ("jun", "jul", "aug") then season = "summer";
else season = "spring";
run;
proc print data=ff; run;
* Step 4: Include RH^2 and Wind^2 predictors;
data ff1;
set ff;
RH2 = RH**2;
wind2 = wind**2;
run;
proc print data =ff1; run;
* Step 5: Include interactions between predictors;
* FFMC*DMC
* FFMC*DC
* FFMC*ISI
* DMC*DC
* DMC*ISI
* DC*ISI
;
data ff2;
set ff1;
FFMC_DMC = FFMC*DMC;
FFMC_DC = FFMC*DC;
FFMC_ISI = FFMC*ISI;
DMC_DC = DMC*DC;
DMC_ISI = DMC*ISI;
DC_ISI = DC*ISI;
run;
proc print data = ff2; run;
* Step 6: Create dummy variables for categorical variables;
data ff3;
set ff2;
DumWinter = (season='winter');
DumFall = (season='fall');
DumSummer = (season='summer');
DumSpring = (season='spring');
run;
proc print data = ff3; run;
* Step 7: Create dummy variables for day categorical variable;
data ff4;
set ff3;
DumMon = (day='mon');
DumTue = (day='tue');
DumWed = (day='wed');
DumThu = (day='thu');
DumFri = (day='fri');
DumSat = (day='sat');
DumSun = (day='sun');
run;
* Run the model with interactions and squared terms;
proc reg data = ff4 plots(unpack label);
model logArea = DumWinter DumFall DumSummer DumSpring DumMon DumTue DumWed DumThu DumFri DumSat DumSun FFMC DMC DC ISI temp RH wind rain RH2 wind2 FFMC_DMC FFMC_DC FFMC_ISI DMC_DC DMC_ISI DC_ISI/ VIF;
run;
* Try using proc glm;
*proc glm data = ff2;
*model logArea = FFMC DMC DC ISI temp RH wind rain RH2 wind2 FFMC_DMC FFMC_DC FFMC_ISI DMC_DC DMC_ISI DC_ISI;
*class season day;
*run;
ods graphics on;
proc glmselect data=ff4 plots(stepAxis=number)=(criterionPanel ASEPlot CRITERIONPANEL);
class season;
model logArea = FFMC DMC DC ISI temp rain wind RH RH2 wind2 FFMC_DMC FFMC_DC FFMC_ISI DMC_DC DMC_ISI DC_ISI / selection=LASSO(choose=CV stop=AIC) CVdetails ;
run;
quit;
ods graphics off;
proc print data=ff2; run;
ods graphics on;
proc glmselect data=ff2 plots=none;
class season;
model logArea = FFMC DMC DC ISI temp RH wind rain RH2 wind2 season
FFMC_DMC FFMC_DC FFMC_ISI DMC_DC DMC_ISI DC_ISI / details=all stats=all;
run;
ods graphics off;
proc glmselect data=ff2 plots=none;
class season;
model logArea = FFMC_DMC FFMC DMC /details=all stats=all;
run;
ods graphics on;
proc glmselect data=ff2
seed=1 plots(stepAxis=number)=(criterionPanel ASEPlot CRITERIONPANEL);
class season;
model logArea = season FFMC DMC DC ISI temp RH wind rain RH2 wind2
FFMC_DMC FFMC_DC FFMC_ISI DMC_DC DMC_ISI DC_ISI / selection=LASSO(choose=CV stop=AIC) CVdetails ;
run;
quit;
ods graphics off;
ods graphics on;
proc glmselect data=ff2
seed=1 plots(stepAxis=number)=(criterionPanel ASEPlot CRITERIONPANEL);
class season day;
model logArea = season day FFMC DMC DC ISI temp RH wind rain RH2 wind2
FFMC_DMC FFMC_DC FFMC_ISI DMC_DC DMC_ISI DC_ISI / selection=LASSO(choose=AIC stop=CV) CVdetails ;
run;
quit;
ods graphics off;