-
Notifications
You must be signed in to change notification settings - Fork 0
/
friedman.r
187 lines (150 loc) · 6.88 KB
/
friedman.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
friedman.test.with.post.hoc <- function(formu, data, to.print.friedman = T, to.post.hoc.if.signif = T, to.plot.parallel = T, to.plot.boxplot = T, signif.P = .05, color.blocks.in.cor.plot = T, jitter.Y.in.cor.plot =F)
{
# formu is a formula of the shape: Y ~ X | block
# data is a long data.frame with three columns: [[ Y (numeric), X (factor), block (factor) ]]
# Note: This function doesn't handle NA's! In case of NA in Y in one of the blocks, then that entire block should be removed.
# Loading needed packages
if(!require(coin))
{
print("You are missing the package 'coin', we will now try to install it...")
install.packages("coin")
library(coin)
}
if(!require(multcomp))
{
print("You are missing the package 'multcomp', we will now try to install it...")
install.packages("multcomp")
library(multcomp)
}
if(!require(colorspace))
{
print("You are missing the package 'colorspace', we will now try to install it...")
install.packages("colorspace")
library(colorspace)
}
# get the names out of the formula
formu.names <- all.vars(formu)
Y.name <- formu.names[1]
X.name <- formu.names[2]
block.name <- formu.names[3]
if(dim(data)[2] >3) data <- data[,c(Y.name,X.name,block.name)] # In case we have a "data" data frame with more then the three columns we need. This code will clean it from them...
# Note: the function doesn't handle NA's. In case of NA in one of the block T outcomes, that entire block should be removed.
# stopping in case there is NA in the Y vector
if(sum(is.na(data[,Y.name])) > 0) stop("Function stopped: This function doesn't handle NA's. In case of NA in Y in one of the blocks, then that entire block should be removed.")
# make sure that the number of factors goes with the actual values present in the data:
data[,X.name ] <- factor(data[,X.name ])
data[,block.name ] <- factor(data[,block.name ])
number.of.X.levels <- length(levels(data[,X.name ]))
if(number.of.X.levels == 2) { warning(paste("'",X.name,"'", "has only two levels. Consider using paired wilcox.test instead of friedman test"))}
# making the object that will hold the friedman test and the other.
the.sym.test <- symmetry_test(formu, data = data, ### all pairwise comparisons
teststat = "max",
xtrafo = function(Y.data) { trafo( Y.data, factor_trafo = function(x) { model.matrix(~ x - 1) %*% t(contrMat(table(x), "Tukey")) } ) },
ytrafo = function(Y.data){ trafo(Y.data, numeric_trafo = rank, block = data[,block.name] ) }
)
# if(to.print.friedman) { print(the.sym.test) }
if(to.post.hoc.if.signif)
{
if(pvalue(the.sym.test) < signif.P)
{
print(the.sym.test)
# the post hoc test
The.post.hoc.P.values <- pvalue(the.sym.test, method = "single-step") # this is the post hoc of the friedman test
# plotting
if(to.plot.parallel & to.plot.boxplot) par(mfrow = c(1,2)) # if we are plotting two plots, let's make sure we'll be able to see both
if(to.plot.parallel)
{
X.names <- levels(data[, X.name])
X.for.plot <- seq_along(X.names)
plot.xlim <- c(.7 , length(X.for.plot)+.3) # adding some spacing from both sides of the plot
if(color.blocks.in.cor.plot)
{
blocks.col <- rainbow_hcl(length(levels(data[,block.name])))
} else {
blocks.col <- 1 # black
}
data2 <- data
if(jitter.Y.in.cor.plot) {
data2[,Y.name] <- jitter(data2[,Y.name])
par.cor.plot.text <- "Parallel coordinates plot (with Jitter)"
} else {
par.cor.plot.text <- "Parallel coordinates plot"
}
# adding a Parallel coordinates plot
matplot(as.matrix(reshape(data2, idvar=X.name, timevar=block.name,
direction="wide")[,-1]) ,
type = "l", lty = 1, axes = FALSE, ylab = Y.name,
xlim = plot.xlim,
col = blocks.col,
main = par.cor.plot.text)
axis(1, at = X.for.plot , labels = X.names) # plot X axis
axis(2) # plot Y axis
points(tapply(data[,Y.name], data[,X.name], median) ~ X.for.plot, col = "red",pch = 4, cex = 2, lwd = 5)
}
if(to.plot.boxplot)
{
# first we create a function to create a new Y, by substracting different combinations of X levels from each other.
subtract.a.from.b <- function(a.b , the.data)
{
the.data[,a.b[2]] - the.data[,a.b[1]]
}
temp.wide <- reshape(data, idvar=X.name, timevar=block.name,
direction="wide") #[,-1]
wide.data <- as.matrix(t(temp.wide[,-1]))
colnames(wide.data) <- temp.wide[,1]
Y.b.minus.a.combos <- apply(with(data,combn(levels(data[,X.name]), 2)), 2, subtract.a.from.b, the.data =wide.data)
names.b.minus.a.combos <- apply(with(data,combn(levels(data[,X.name]), 2)), 2, function(a.b) {paste(a.b[2],a.b[1],sep=" - ")})
the.ylim <- range(Y.b.minus.a.combos)
the.ylim[2] <- the.ylim[2] + max(sd(Y.b.minus.a.combos)) # adding some space for the labels
is.signif.color <- ifelse(The.post.hoc.P.values < .05 , "green", "grey")
boxplot(Y.b.minus.a.combos,
names = names.b.minus.a.combos ,
col = is.signif.color,
main = "Boxplots (of the differences)",
ylim = the.ylim
)
legend("topright", legend = paste(names.b.minus.a.combos, rep(" ; PostHoc P.value:", number.of.X.levels),round(The.post.hoc.P.values,5)) , fill = is.signif.color )
abline(h = 0, col = "blue")
}
list.to.return <- list(Friedman.Test = the.sym.test, PostHoc.Test = The.post.hoc.P.values)
if(to.print.friedman) {print(list.to.return)}
return(list.to.return)
} else {
print("The results where not significant, There is no need for a post hoc test")
print(the.sym.test)
return(the.sym.test)
}
}
# Original credit (for linking online, to the package that performs the post hoc test) goes to "David Winsemius", see:
# http://tolstoy.newcastle.edu.au/R/e8/help/09/10/1416.html
}
#source("http://www.r-statistics.com/wp-content/uploads/2010/02/Friedman-Test-with-Post-Hoc.r.txt") # loading the friedman.test.with.post.hoc function from the internet
### Comparison of three Wine ("Wine A", "Wine B", and
### "Wine C") for rounding first base.
#WineTasting <- data.frame(
# Taste = c(5.40, 5.50, 5.55,
# 5.85, 5.70, 5.75,
# 5.20, 5.60, 5.50,
# 5.55, 5.50, 5.40,
# 5.90, 5.85, 5.70,
# 5.45, 5.55, 5.60,
# 5.40, 5.40, 5.35,
# 5.45, 5.50, 5.35,
# 5.25, 5.15, 5.00,
# 5.85, 5.80, 5.70,
# 5.25, 5.20, 5.10,
# 5.65, 5.55, 5.45,
# 5.60, 5.35, 5.45,
# 5.05, 5.00, 4.95,
# 5.50, 5.50, 5.40,
# 5.45, 5.55, 5.50,
# 5.55, 5.55, 5.35,
# 5.45, 5.50, 5.55,
# 5.50, 5.45, 5.25,
# 5.65, 5.60, 5.40,
# 5.70, 5.65, 5.55,
# 6.30, 6.30, 6.25,),
# Wine = factor(rep(c("Wine A", "Wine B", "Wine C"), 22)),
# Taster = factor(rep(1:22, rep(3, 22))))
#with(WineTasting , boxplot( Taste ~ Wine )) # boxploting
#friedman.test.with.post.hoc(Taste ~ Wine | Taster ,WineTasting) # the same with our function. With post hoc, and cool plots