-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMarkovEx4b.py
125 lines (109 loc) · 4.72 KB
/
MarkovEx4b.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# -*- coding: utf-8 -*-"""
"""
Created on Mon Feb 09 23:11:37 2015
@author: Glaucia
"""
import random
mat = [[0.1,0.9],[0.1,0.9]]
import scipy
def discSamp(events,probs):##importing the discrete sampling function
ranNum = scipy.random.random()
cumulProbs = []
cumulProbs.extend([probs[0]])
for i in range(1,len(probs)):
cumulProbs.extend([probs[i]+cumulProbs[-1]])
for i in range(0,len(probs)):
if ranNum < cumulProbs[i]:
return events[i]
return None
##############################################################################
from numpy.random import uniform
def MarkovChain (x,matrix):##importing the MarkovChain fucntion I made for two states
rn=uniform(low=0.0, high=1.0, size=None)
currState=discSamp(events=[0,1],probs=[rn,1-rn])
list1=[currState]
for i in range (x):
currState=discSamp(events=[currState,1-currState],probs=matrix[currState])
list1.append(currState)
return list1
###############################################################################
# ----> Try to finish the above lines before Tues, Feb. 10th <----
# Now try running 100 simulations of 100 steps each. How often does the chain
# end in each state? How does this change as you change the transition matrix?
mat1 = [[0.1,0.9],[0.1,0.9]] ###### 0 and 1 have similar frequencies
sim=[]
for i in range(100):
r=MarkovChain (100,matrix=mat1)
p=r[99]
sim.append(p)
print sim##this is the simulation of MarkovChain 100 times
sim.count(0)##this wiil give the frequencies of ending in each state
sim.count(1)##this wiil give the frequencies of ending in each state
mat2 = [[0.9,0.1],[0.1,0.9]] #### Repetitions of 0 have a large probability here,
sim=[]
for i in range(100):
r=MarkovChain (100,matrix=mat2)
p=r[99]
sim.append(p)
print sim
sim.count(0)##this shows high frequencies of zeros
sim.count(1)
mat3 = [[0.1,0.9],[0.9,0.1]] #### Repetitions of 1 have a large probability here,
sim=[]
for i in range(100):
r=MarkovChain (100,matrix=mat3)
p=r[99]
sim.append(p)
print sim
sim.count(0)
sim.count(1)
###############################################################################
# Try defining a state space for nucleotides: A, C, G, and T. Now define a
# transition matrix with equal probabilities of change between states.
tup=("A","C","T","G")
######Matrix with equal probabilities for each transition
matNucl1=[[0.25,0.25,0.25,0.25],[0.25,0.25,0.25,0.25],[0.25,0.25,0.25,0.25],[0.25,0.25,0.25,0.25]]
######Defining a new function to run Markov Chain using state space with length equal to 4
def MarkovChainNucl (x,matrix,Event):
tup=tuple(Event)##this will transform a list of events in a tuple
currState=random.choice(tup)##this will sample a random state from my tuple
list1=[currState]## creating the list to put the sampled states
for i in range (x):##here x is the siz of the chain (you decide x)
if currState == Event[0]:##this is an if statement to allow choosing the next state according to the respective probabilities in transition matrix
currState=discSamp(events=Event,probs=matrix[0])##this will sample a new current state according to the respective row in the transition matrix
elif currState == Event[1]:
currState=discSamp(events=Event,probs=matrix[1])
elif currState == Event[2]:
currState=discSamp(events=Event,probs=matrix[2])
else:
currState=discSamp(events=Event,probs=matrix[3])
list1.append(currState)##Appending all the States in a list
return list1
#######Testing the function
MarkovChainNucl (10,matrix=matNucl1,Event=["A","C","T","G"])
###############################################################################
# Again, run 100 simulations of 100 steps and look at the ending states. Then
# try changing the transition matrix.
simNucl1=[]
for y in range(100):##simulating for 100 steps
w=MarkovChainNucl (100,matrix=matNucl1,Event=["A","C","T","G"])
z=w[99]
simNucl1.append(z)
print simNucl1
simNucl1.count("A")##Couting the frequencies of the ending nucleotide
simNucl1.count("C")##The frequencies are very similar here, because all the probabilities are the same in the transition matrix
simNucl1.count("T")
simNucl1.count("G")
#######Changing the matrix and getting far more repetitions of C
matNucl2=[[0.25,0.25,0.25,0.25],[0.05,0.85,0.05,0.05],[0.25,0.25,0.25,0.25],[0.25,0.25,0.25,0.25]]##I changed the value in the position Matrix [2][2] to allow repetitions of C
simNucl2=[]
for y in range(100):
w=MarkovChainNucl (100,matrix=matNucl2,Event=["A","C","T","G"])
z=w[99]
simNucl2.append(z)
print simNucl2
#######And also a high frequency of Cs
simNucl2.count("A")
simNucl2.count("C")
simNucl2.count("T")
simNucl2.count("G")