-
Notifications
You must be signed in to change notification settings - Fork 2
/
normal function.R
198 lines (167 loc) · 11.7 KB
/
normal function.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
## This is a simple function for calculating and visualizing bayesian inference
## The inference method is bayes factors by savage-dickey density ratio
## The function works by inputting the various distribution hyperparameters and data summaries
## The variables are all pretty straightforward, but I'll define them anyway
## priorMean is the location parameter (mu) for the normal (gaussian) prior distribution
## priorSD is the scale parameter (sigma) for the prior distribution
## sampleMean is the mean value calculated from sample data
## sampleSE is the standard **ERROR** calculated from sample data
## Remember, t is the sampleMean divided by sampleSE, so if you know any two you know the other
## CI is the credible interval limits you want reported and shaded. .95 is recommended and standard
## null is the null value of interest (if left NULL it won't calculate a bayes factor)
## this function returns plots, bayes factors, posterior distribution hyperparameters, and 95% credible interval limits
## The function
plot.norm <- function(priorMean = 0, priorSD = 1, sampleMean = NULL, sampleSE = NULL, CI = NULL, null = NULL, xlow = "auto", xup = "auto", xby = "auto"){
w <- seq(from=priorMean-100*priorSD, to=priorMean+100*priorSD, by=min(priorSD,sampleSE)/100) ##Set up for distributions
y1 <- dnorm(w, priorMean, priorSD) ## data for plotting prior curve
if(is.numeric(sampleMean) == T){
y2 <- dnorm(w, sampleMean, sampleSE) ## data for plotting likelihood curve
##Calculate and prepare to plot posterior distribution
priorPrec <- (1/priorSD)^2 ## prior precision, 1 / sigma^2
dataPrec <- (1/sampleSE)^2 ## data (likelihood) precision, 1 / SE^2
postPrec <- priorPrec + dataPrec
postSD <- sqrt(1/postPrec) ## 1/precision = var
postMean <- postSD^2 * (priorMean/priorSD^2 + sampleMean/sampleSE^2) ##calculate mean of posterior
y3 <- dnorm(w, postMean, postSD) #data for posterior line
}
##Defining x axis limits and ticks
##If there is sample data, and automatic xlow, base lower x dimensions off of posterior scale
if(is.numeric(sampleMean) == T && is.numeric(xlow) == F){
xlower <- qnorm(.005, postMean, postSD)
##if the null is off the plot, rescale so that the null is the new end point
if(is.numeric(null) == T && null < xlower){
xlower <- null
}
}
##If there is no sample data, use the prior distribution for scale
if(is.numeric(sampleMean) == F && is.numeric(xlow) == F){
xlower <- qnorm(.001,priorMean,priorSD)
}
##If there is user specified x lower limit, use that above all else
if(is.numeric(xlow) == T){
xlower <- xlow
}
## Specifying xupper limits similarly
if(is.numeric(sampleMean) == T && is.numeric(xup) == F){
xupper <- qnorm(.995, postMean, postSD)
if(is.numeric(null) == T && null > xupper){
xupper <- null
}
}
if(is.numeric(sampleMean) == F && is.numeric(xup) == F){
xupper <- qnorm(.999,priorMean,priorSD)
}
if(is.numeric(xup) == T){
xupper <- xup
}
##Set up y-axis automatically. If there is sample data, base y-axis off posterior.
if(is.numeric(sampleMean) == T){
yupper <- 1.25 * max(y1,y2,y3)
ylower <- 0
}
##If there is no sample data, base y axis dimensions off of prior scale
else{
yupper <- 1.25 * max(y1)
ylower <- 0
}
## If the x dmiensions are *all* manually set, create tick marks based on user preference
if(is.numeric(xby) == T && is.numeric(xlow) == T && is.numeric(xup) == T){
xlims <- seq(xlower,xupper, xby)
}
## If the user doesn't specify *all* aspects of x-axis, use pretty() to set x-axis tick marks roughly at user preference
else{
xlims <- pretty(c(xlower,xupper))
}
ylims <- pretty(c(ylower,yupper)) ##To be used for y axis ticks, no user preference allowed
xrange <- max(xlims)-min(xlims) ##For some easier inputs later
##Plot only the prior distribution if there is no sample data. Notice the titles are different
if(is.numeric(sampleMean) == F){
plot(w, y1, xlim= c(min(xlims),max(xlims)), ylim=c(ylower,max(ylims)), type = "l", ylab= "Density",
xlab= "Mu", las=1, main="Prior distribution with normal shape",lwd=3, lty =2,
cex.lab=1.5, cex.main=1.5, col = "skyblue",axes=F)
#Format the axes so they look good
axis(1, at = xlims, pos= -.02 * (max(ylims))) #adds x axis
axis(2, at = ylims, las=1, pos= min(xlims)- abs(.02*xrange ))#adds y axis
}
##Plot all three distributions if there is sample data. Notice the titles and x axis caption are different than if it's just a prior
if(is.numeric(sampleMean) == T){
plot(w[w > (min(xlims) - .02 * xrange)], y1[w > (min(xlims) - .02 * xrange)], xlim= c(min(xlims),max(xlims)),
ylim=c(0,max(ylims)), type = "l", ylab= "Density", xlab= "Mu", las=1,lwd=3, lty = 2,
cex.lab=1.5, cex.main=1.5, col = "skyblue",axes=F)
title(main="Prior-to-Posterior Transformation", sub = "(normal prior with normal data)")
#Format the axes so they look good
axis(1, at = xlims, pos= -.02 * (max(ylims))) #adds x axis
axis(2, at = ylims, las=1, pos= min(xlims)- abs(.02* xrange ))#adds y axis
##Add lines for likelihood and posterior
lines(w[w> (min(xlims) - .02 * xrange)], y2[w> (min(xlims) - .02 * xrange)], lwd =2, lty =3, col = "darkorange") #if there is sample data, plot the likelihood
lines(w[w> (min(xlims) - .02 * xrange)], y3[w> (min(xlims) - .02 * xrange)], lwd = 4, col = "darkorchid1") # if there is sample data, plot the posterior
##Add legend in the top left
legend("topleft", c("Prior", "Posterior", "Likelihood"), col = c("skyblue", "darkorchid1", "darkorange"),
lty = c(2,1,3), lwd = c(3,5,2), bty = "n", y.intersp = .55, x.intersp = .1, seg.len=.7)
}
##If there is sample data and a null value specified, but no CI cutoff then add points and return BFs
if(is.numeric(null) == T && is.numeric(sampleMean) == T && is.numeric(CI) == F){
## Adds points on the distributions at the null value
points(null, dnorm(null, priorMean, priorSD),
pch = 21, bg = "blue", cex = 1.5) #adds point to prior
points(null, dnorm(null, postMean, postSD),
pch = 21, bg = "darkorchid", cex = 1.5) #adds point to posterior
lines(c(null,null),c(-.02 * (max(ylims)),1.11*max(y1,y2,y3)), lwd=1, lty = 5, col ="grey73")##adds vertical line at null value
##Calculate BF using Savage-Dickey density ratio
null.H0 <- dnorm(null, priorMean, priorSD)
null.H1 <- dnorm(null, postMean, postSD)
return( list("BF01 (in favor of H0)" = round(null.H1/null.H0,2), "BF10 (in favor of H1)" = round(null.H0/null.H1,2),
"Posterior Mean" = round(postMean,3), "Posterior SD" = round(postSD,3)))
}
##If there is sample data, a CI cutoff, but no null value, only return posterior descriptives and shade graph
if(is.numeric(sampleMean) == T && is.numeric(null) == F && is.numeric(CI) == T){
##Calculate CI limits
CI.low <- qnorm((1-CI)/2, postMean, postSD)
CI.high <- qnorm(1-(1-CI)/2, postMean, postSD)
##Adds shading for the area beyond the curve
CI.lowtail <- mean(xlims) - abs(.55* xrange) ##Set up the max range of the shading
CI.hightail <- mean(xlims) + abs(.55 * xrange)
SEQlow<-seq(CI.lowtail, CI.low, (CI.low - CI.lowtail)/1000)
SEQhigh <- seq(CI.hightail,CI.high, (CI.high - CI.hightail)/1000)
##Adds shaded area for x% Posterior CIs
cord.x <- c(CI.lowtail, SEQlow, CI.low) ##set up for shading
cord.y <- c(0,dnorm(SEQlow,postMean, postSD),0) ##set up for shading
polygon(cord.x,cord.y,col='orchid', lty= 3) ##shade left tail
cord.xx <- c(CI.hightail, SEQhigh,CI.high)
cord.yy <- c(0,dnorm(SEQhigh,postMean, postSD), 0)
polygon(cord.xx,cord.yy,col='orchid', lty=3) ##shade right tail
return( list("Posterior Mean" = round(postMean,3), "Posterior SD" = round(postSD,3),
"Posterior CI lower" = round(CI.low,3), "Posterior CI upper" = round(CI.high,3)))
}
##If there is sample data and a null value, add null line, null points,
##and return posterior descriptives and Savage-Dickey bayes factors
if(is.numeric(null) == T && is.numeric(sampleMean) == T && is.numeric(CI) == T){
##Calculate CI limits
CI.low <- qnorm((1-CI)/2, postMean, postSD)
CI.high <- qnorm(1-(1-CI)/2, postMean, postSD)
##Adds shading for the area beyond the curve
CI.lowtail <- mean(xlims) - abs(.55* xrange) ##Set up the max range of the shading
CI.hightail <- mean(xlims) + abs(.55 * xrange)
SEQlow<-seq(CI.lowtail, CI.low, (CI.low - CI.lowtail)/1000)
SEQhigh <- seq(CI.hightail,CI.high, (CI.high - CI.hightail)/1000)
##Adds shaded area for x% Posterior CIs
cord.x <- c(CI.lowtail, SEQlow, CI.low) ##set up for shading
cord.y <- c(0,dnorm(SEQlow,postMean, postSD),0) ##set up for shading
polygon(cord.x,cord.y,col='orchid', lty= 3) ##shade left tail
cord.xx <- c(CI.hightail, SEQhigh,CI.high)
cord.yy <- c(0,dnorm(SEQhigh,postMean, postSD), 0)
polygon(cord.xx,cord.yy,col='orchid', lty=3) ##shade right tail
## Adds points on the distributions at the null value
points(null, dnorm(null, priorMean, priorSD),
pch = 21, bg = "blue", cex = 1.5) #adds point to prior
points(null, dnorm(null, postMean, postSD),
pch = 21, bg = "darkorchid", cex = 1.5) #adds point to posterior
lines(c(null,null),c(-.02 * (max(ylims)),1.11*max(y1,y2,y3)), lwd=1, lty = 5, col ="grey73")##adds vertical line at null value
##Calculate BF using Savage-Dickey density ratio
null.H0 <- dnorm(null, priorMean, priorSD)
null.H1 <- dnorm(null, postMean, postSD)
return( list("BF01 (in favor of H0)" = round(null.H1/null.H0,2), "BF10 (in favor of H1)" = round(null.H0/null.H1,2),
"Posterior Mean" = round(postMean,3), "Posterior SD" = round(postSD,3),
"Posterior CI lower" = round(CI.low,3), "Posterior CI upper" = round(CI.high,3)))
}
}