-
Notifications
You must be signed in to change notification settings - Fork 0
/
hwk6.rmd
286 lines (238 loc) · 11.4 KB
/
hwk6.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
<h5>Problem 2</h5>
(a)
```{r load_packages, include=FALSE, echo=FALSE}
library(compiler)
library(ggplot2)
```
```{r}
#step = number of iterations
sampler <- function(step) {
#initialize intial graph
M <- matrix(0,100,100)
sums <- 0
for(i in 1:step) {
x <- trunc(runif(1,1,101)) #choose an x value at random
y <- trunc(runif(1,1,101)) #choose a y value at random
change <- trunc(runif(1,0,2)) #determine 1 try to flip to 1, or 0, flip to 0
if(change) {
#Now we will check to see if we can flip our new point
flip <- TRUE
if(x+1 <= 100) {
if(M[x+1,y]==1) {flip <- FALSE} #Check right
}
if (x >=2 & flip) {
if(M[x-1,y]==1) {flip <- FALSE} #Check left
}
if(y+1<=100 & flip) {
if(M[x,y+1]==1) {flip<-FALSE} #Check above
}
if(y>=2 & flip) {
if(M[x,y-1] == 1) {flip<-FALSE} #Check below
}
if(flip) {M[x,y]<-1}
} else { M[x,y]<-0 }
sums <- rbind(sums,sum(M)/10000)
}
return(sums)
}
```
<p>Now run the sampler 50,000 times and graph to find the steady state.</p>
```{r echo = FALSE}
M2 <- sampler(50000)
qplot(y = as.numeric(M2)) + xlab("Iteration") + ylab("Percent Occupied")
```
<p>It appears to reach a steady state around 20,000 - 30,000 iterations at roughly 20 percent occupied, so I will run the sampler 1000 times for 25,000 steps per chain and generate a histogram.</p>
```{r echo=FALSE}
M3 <- sapply(1:100, function(x) sampler(25000))
qplot(as.numeric(M3), geom="histogram", binwidth=.005) + xlab("f(x)/10000")
```
<p>(b) We can see from the previous graphs that the chain reaches a steady state around .23, so the probability that it will reach .4 is near zero.</p>
```{r}
montecarlo <- function(p, matrix) {
M <- matrix #using the matrix we generated for the histogram for part a
results <- 0
for (i in 1:100) {
p <- ifelse(M[i] > .4, 1, 0)
results <- c(results, p)
}
return(sum(results)/100)
}
```
```{r echo=FALSE}
p <- montecarlo(.4, M3)
ci.right <- p + 2 * sqrt((p*(1-p)/100))
ci.left <- p - 2 * sqrt((p*(1-p)/100))
```
<p>Doing Monte Carlo integration yields a p-value of `r p` for the null hypothesis that e was drawn uniformly from all possible configurations.</p>
(c)
```{r}
sampler2 <- function(step) {
#initialize intial graph
M <- matrix(0,100,100)
M[50,50] <- 1 #because we can't divide by 0
sums <- 0 #place to store sums of each step
#now we need to come up with a valid proposal and the test whether we accept or reject based on whether a uniform rv < nu(j)/nu(i)
for(i in 1:step) {
base <- M
#step 1: find a valid proposal
x <- trunc(runif(1,1,101)) #choose an x coordinate
y <- trunc(runif(1,1,101)) #choose a y value at random
change <- trunc(runif(1,0,2)) #determine 1 try to flip to 1, or 0, flip to 0
if(change) {
flip <- TRUE #Now we will check to see if we can flip our new point
if(x+1 <= 100) {
if(M[x+1,y]==1) {flip <- FALSE} #Check right
}
if (x >=2 & flip) {
if(M[x-1,y]==1) {flip <- FALSE} #Check left
}
if(y+1<=100 & flip) {
if(M[x,y+1]==1) {flip<-FALSE} #Check above
}
if(y>=2 & flip) {
if(M[x,y-1] == 1) {flip<-FALSE} #Check below
}
if(flip) {M[x,y]<-1}
} else
{ M[x,y]<-0 }
#2. now that we have a proposal, we test:
U <- runif(1)
if(U < (sum(M)^2)/(sum(base)^2)){
sums <- rbind(sums,(sum(M))^2/10000)
} else {
sums <- rbind(sums,(sum(base))^2/10000)
M <- base
}
}
return(sums) #this returns a list of values of (f(e))^2
}
```
<p>Now generate a histogram in a similar fashion to part a, generating 100 25,000-step chains:</p>
```{r echo=FALSE}
M4 <- sapply(1:100, function(x) sampler2(25000))
qplot(as.numeric(M4), geom="histogram", binwidth=10) + xlab("f(Y)/10000")
```
<p>The new histogram still has the majority of instances trend toward the steady state as the other one did, but since $\frac{\nu(i)}{\nu(j)}$ will actually lead to some moves being rejected, there is a greater concentration of values closer to 0, meaning the number of occupied spaces took longer to increase in the new sampler.</p>
<h5>Problem 3</h5>
</p>(a) Since the order the heads and tails are achieved matters, the space $\Omega$ is found by raising the formula for a permutation of 20 spaces with 11 heads and 9 tails, raised to the 10th power.</p>
$$ \left(\frac{20!}{11!9!}\right)^{10} = 167,960^{10} $$
<p>(b) The formula for $P(\omega | D)$ is the conditional probability of $\omega$ given the results laid out in the problem.</p>
$$ P(\omega | D) = \frac{P(D | \omega)P(\omega)}{P(D)} $$
<p>Logically, since all $\omega$ in $\Omega$ sum to 20 dollars, with each bucket of 20 sequential flips totaling 2 dollars, then $P(D|\omega) = 1$. </p>
<p>The probability of 200 flips resulting in any given $\omega$ is $\frac{1}{2^{200}}$, or one over all possible sequences of 200 flips. </p>
<p>The probability of the given data would be all possible sequences of 200 meeting the conditions, $\Omega$, divided by all possible combinations of 200 flips, or $2^{200}$. </p>
<p>Substituting into the original equation, we get:</p>
$$ P(\omega | D) = \cfrac{1 \times
\cfrac{1}{2^{200}}}{
\cfrac{\Omega}{2^{200}}} $$
$$ P(\omega | D) = \frac{1}{\Omega} $$
<p>Thus, the probability of $P(\omega | D) = \frac{1}{\Omega}$, which is uniform over $\Omega$.</p>
<p>(c) The f($\omega$) I decided on to determine the steady state is the amount of money one has after the first 10 flips of a bucket of 20 minus the money collected during the second 10 flips. I chose this function for several reasons:</p>
<ul>
<li>It is never 0, based on the conditions of our problem. If one group of 10 has 5 heads and 5 tails, the other group of 10 cannot have 5 and 5 as well, since there are only 9 tails in each bucket of 20. This is important because the Metropolis-Hastings ratio requires dividing f(j)/f(i).</li>
<li>While the acceptance rate is about 80 percent, it is much lower than other measures I tried, which almost always resulted in M-H ratios around 1.</li>
<li>It involves all the data in the sequence of 200 in a somewhat meaningful way. Larger test groups, of say 50, would basically be determined by the odd group of 10, because each group of 20 always equals 2 dollars.</li>
</ul>
```{r}
#proposal generating function
flips <- function(chain, num) {
#now we use the current sequence to propose a new sequence by switching up one group of 20 flips
choice <- toString(num)
#there is most definitely a better way to do this, possibly select range from list of ranges then wipe out tails/assign new values in one step
switch(choice,
'1'={chain[1:20] <- 1 #knock out all tails
chain[sample(1:20, 9, replace=F)] <- -1 #place new tails
},
'2'={chain[21:40] <- 1
chain[sample(21:40, 9, replace=F)] <- -1},
'3'={chain[41:60] <- 1
chain[sample(41:60, 9, replace=F)] <- -1},
'4'={chain[61:80] <- 1
chain[sample(61:80, 9, replace=F)] <- -1},
'5'={chain[81:100] <- 1
chain[sample(81:100, 9, replace=F)] <- -1},
'6'={chain[101:120] <- 1
chain[sample(101:120, 9, replace=F)] <- -1},
'7'={chain[121:140] <- 1
chain[sample(121:140, 9, replace=F)] <- -1},
'8'={chain[141:160] <- 1
chain[sample(141:160, 9, replace=F)] <- -1},
'9'={chain[161:180] <- 1
chain[sample(161:180, 9, replace=F)] <- -1},
'10'={chain[181:200] <- 1
chain[sample(181:200, 9, replace=F)] <- -1},
stop(chain)
)
return(chain)
}
```
```{r}
#function to sample from all sequences fitting the conditions
omega <- function(N){
results <- data.frame("Step" = 0, "Move" =0, "Diffs" = 0)
chain <- rep(c(rep(1,11), rep(-1,9)),10) #start with a matrix that fits the conditions
num <- trunc(runif(1,1,11)) #now we randomnly pick a bucket of 20 to change
base <- flips(chain, num) #change bucket in base matrix
#moving through the desired number of steps
for ( i in 1:N) {
num <- trunc(runif(1,1,11)) #choose bucket to change
proposal <- flips(base, num) #generate a proposal
#f(x) is the difference in the amount of money generated in the first 10 flips of any bucket of 20 minus the amount of money earned in the final 10 flips
proposal.m <- sum(proposal[c(1:10, 21:30, 41:50, 61:70, 81:90, 101:110, 121:130, 141:150, 161:170, 181:190)]) - sum(proposal[c(11:20, 31:40, 51:60, 71:80, 91:100, 111:120, 131:140, 151:160, 171:180, 191:200)])
base.m <- sum(base[c(1:10, 21:30, 41:50, 61:70, 81:90, 101:110, 121:130, 141:150, 161:170, 181:190)]) - sum(base[c(11:20, 31:40, 51:60, 71:80, 91:100, 111:120, 131:140, 151:160, 171:180, 191:200)])
test <- proposal.m/base.m #this results in the proposal being accepted roughly 78% of the time
if(runif(1) < min(1,test, na.rm=TRUE)) {
base <- proposal
results <- rbind(results, c(results <- data.frame("Step" = i, "Move" = 1, "Diffs" = proposal.m)))
} else {
base <- base
results <- rbind(results, c(results <- data.frame("Step" = i, "Move" = 0, "Diffs" = base.m)))
}
}
return(results)
}
```
<p>Running the sampler 10,000 times shows it reaches a steady state nearly immediately, so I will use 3,000 steps to generate the histogram for part d.</p>
```{r echo=FALSE}
#test runs to show acceptance rate
tests <- omega(10000)
qplot(x = as.numeric(tests$Step), y = as.numeric(tests$Diffs), geom="line") + ylab("Difference in number of tails") + xlab("Step")
```
<p>(d) In order to count the money at each step, we will adjust the output of the omega function:</p>
```{r}
omega2 <- function(N){
results <- data.frame("Step" = 0, "Round" =0, "Amount" = 0)
#start with a matrix that fits the conditions, but with 1's and 0's instead of 1 and -1 so that any function we do to count tails will always be greater than 0
chain <- rep(c(rep(1,11), rep(-1,9)),10)
#now we randomnly pick a bucket of 20 to change
num <- trunc(runif(1,1,11))
#assign it to the base function
base <- flips(chain, num)
for ( i in 1:N) {
num <- trunc(runif(1,1,11)) #choose bucket to change
proposal <- flips(base, num) #generate a proposal
proposal.m <- sum(proposal[c(1:10, 21:30, 41:50, 61:70, 81:90, 101:110, 121:130, 141:150, 161:170, 181:190)]) - sum(proposal[c(11:20, 31:40, 51:60, 71:80, 91:100, 111:120, 131:140, 151:160, 171:180, 191:200)])
base.m <- sum(base[c(1:10, 21:30, 41:50, 61:70, 81:90, 101:110, 121:130, 141:150, 161:170, 181:190)]) - sum(base[c(11:20, 31:40, 51:60, 71:80, 91:100, 111:120, 131:140, 151:160, 171:180, 191:200)])
test <- proposal.m/base.m
#this results in the proposal being accepted roughly 78% of the time
if(runif(1) < min(1,test, na.rm=TRUE)) {
base <- proposal
total <- cumsum(as.numeric(base))
top.dollar <- which.max(total)
results <- rbind(results, c(results <- data.frame("Step" = i, "Round" = top.dollar, "Amount" = total[top.dollar])))
} else {
base <- base
total <- cumsum(as.numeric(base))
top.dollar <- which.max(total)
results <- rbind(results, c(results <- data.frame("Step" = i, "Round" = top.dollar, "Amount" = total[top.dollar])))
}
}
return(results)
}
```
<p>And the resulting histograms for 3,000 steps:</p>
```{r echo =FALSE}
tests2 <- omega2(3000)
qplot(as.numeric(tests2$Round[2:3000]), geom="histogram", binwidth = 1, fill=..count.., main = "Histogram for T") + xlab("Round Max Achieved")
qplot(as.numeric(tests2$Amount[2:3000]), geom="histogram", binwidth = 1, fill = ..count.., main = "Histogram for Z") + xlab("Highest Amount")
```