-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfit-testing.R
107 lines (94 loc) · 2.79 KB
/
fit-testing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# This dataset is a simplified, pre-processed version of the
# "Adult Income Dataset" from the UCI machine learning repository.
# Download the dataset from Brightspace and save it in your working
# directory, if you haven't already.
#mat <- read.csv("C:/python/bn/student-mat.csv", sep=';')
d <- read.csv("C:/python/bn/student-por.csv", sep=';')
#mat["subject"] <- "mat"
#por["subject"] <- "por"
#d <- rbind(mat, por)
d$G1 <- NULL
d$G2 <- NULL
library(Hmisc)
quantiles <- quantile(d$age, prob = seq(0, 1, length = 11), type = 5)
d$age <- cut(df$age , breaks = quantile(df$vec, c(0, .1,.., 1)),
labels=1:10, include.lowest=TRUE)
d$age <- cut2(d$age, g = 4, oneval=FALSE)
d$absences <- cut2(d$absences, g = 4, oneval=FALSE)
d$G3 <- cut2(d$G3, g = 4, oneval=FALSE)
# Let's take a quick look at the levels and distributions of each variable.
for( i in colnames(d) ){
print(i)
print(table(d[,i]))
}
# Let us now define a simple DAG for these variables.
# Use the below command to install the latest version of the dagitty
# R package, if you haven't already. It is important to get the latest
# version, otherwise the testing will not work with categorical data!
#
# devtools::install_github("jtextor/dagitty/r")
library( dagitty )
g <- dagitty('
dag {
absences -> failures
absences -> G3
activities -> freetime
age -> activities
age -> romantic
age -> studytime
failures -> G3
failures -> paid
failures -> schoolsup
failures -> studytime
famrel -> famsup
famrel -> paid
famsup -> G3
famsup -> paid
famsup -> studytime
Fedu -> Fjob
Fedu -> higher
Fjob -> famrel
freetime -> G3
freetime -> studytime
goout -> freetime
goout -> romantic
health -> absences
health -> G3
health -> studytime
higher -> famsup
higher -> school
higher -> studytime
Medu -> higher
Medu -> Mjob
Mjob -> famrel
paid -> freetime
paid -> G3
paid -> studytime
romantic -> freetime
school -> schoolsup
school -> traveltime
schoolsup -> freetime
schoolsup -> G3
schoolsup -> studytime
studytime -> G3
traveltime -> absences
traveltime -> freetime
}
')
# Print out all d-separation statements implied by the DAG.
impliedConditionalIndependencies(g)
sink()# Manually test the implication: HoursPerWeek _||_ Race | Immigrant
chisq <- 0; df <- 0
for( a in unique( d$Immigrant ) ){
tst <- chisq.test( d$absences[d$traveltime==a], d$G3[d$traveltime==a] )
chisq <- chisq + tst$statistic
df <- df + tst$parameter
}
cat( chisq, df, "\n" )
pchisq( chisq, df, lower.tail=FALSE )
# This single command performs a chi-square test for all implied conditional
# independencies from g on the dataset g, and reports the statistic, p-value
# and the RMSEA for each test.
options(max.print=1000000)
sink("C:/python/bn/localtests.txt")
localTests(g,d,type="cis.chisq")