-
Notifications
You must be signed in to change notification settings - Fork 0
/
viterbi_pandas_adv.py
122 lines (112 loc) · 6.1 KB
/
viterbi_pandas_adv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from pandas import DataFrame, IndexSlice
from seaborn import light_palette
# Initialize tuples of conditions. Observations are the input
observations = ("Wearing Trenchcoat & Fedora", "Eating Pizza",
"Eating Doritos", "Browsing Reddit", "Playing WoW", "Smelly",
"Vaping", "Listening to Power Metal", "Brandishing Katana",
"Wearing Trenchcoat & Fedora", "Browsing 4chan",
"Playing Magic the Gathering", "Drinking Mountain Dew")
hidden_states = ("Depressed", "Confident", "Tired", "Hungry", "Thirsty",
"Angry", "Gamer", "Brony", "Libertarian", "Atheist")
emit_states = ("Eating Pizza", "Browsing Reddit", "Drinking Mountain Dew",
"Eating Doritos", "Wearing Trenchcoat & Fedora",
"Browsing 4chan", "Playing Magic the Gathering", "Playing WoW",
"Brandishing Katana", "Watching My Little Pony",
"Listening to Power Metal", "Vaping", "Smelly")
# Probability of transition from state to state, remaining static, etc.
trans_prob_df = DataFrame(
data={
"Depressed": (0.10, 0.10, 0.10, 0.15, 0.05, 0.15, 0.05, 0.05, 0.05,
0.05),
"Confident": (0.05, 0.05, 0.05, 0.10, 0.10, 0.15, 0.05, 0.15, 0.10,
0.10),
"Tired": (0.15, 0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.05, 0.05, 0.05),
"Hungry": (0.15, 0.05, 0.10, 0.15, 0.10, 0.05, 0.05, 0.05, 0.05, 0.05),
"Thirsty": (0.10, 0.15, 0.15, 0.10, 0.10, 0.05, 0.05, 0.10, 0.05,
0.05),
"Angry": (0.05, 0.10, 0.15, 0.05, 0.15, 0.15, 0.15, 0.05, 0.15, 0.15),
"Gamer": (0.10, 0.10, 0.10, 0.10, 0.15, 0.10, 0.20, 0.15, 0.05, 0.05),
"Brony": (0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 0.15, 0.30, 0.05, 0.05),
"Libertarian": (0.10, 0.15, 0.10, 0.10, 0.10, 0.10, 0.10, 0.05, 0.30,
0.15),
"Atheist": (0.10, 0.15, 0.10, 0.10, 0.10, 0.10, 0.10, 0.05, 0.15, 0.30)
},
columns=hidden_states,
index=hidden_states) # All should vertically sum to 1
# Probability of observation given the hidden state
emit_prob_df = DataFrame(
data={
"Eating Pizza": (0.10, 0.05, 0.05, 0.20, 0.05, 0.05, 0.05, 0.05, 0.05,
0.05),
"Browsing Reddit": (0.10, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.05,
0.15, 0.15),
"Drinking Mountain Dew": (0.10, 0.10, 0.15, 0.10, 0.20, 0.05, 0.15,
0.05, 0.05, 0.05),
"Eating Doritos": (0.10, 0.05, 0.05, 0.15, 0.10, 0.05, 0.10, 0.05,
0.05, 0.05),
"Wearing Trenchcoat & Fedora": (0.05, 0.20, 0.05, 0.05, 0.05, 0.05,
0.05, 0.10, 0.10, 0.10),
"Browsing 4chan": (0.10, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.10,
0.05, 0.05),
"Playing Magic the Gathering": (0.05, 0.05, 0.05, 0.05, 0.05, 0.10,
0.15, 0.05, 0.05, 0.05),
"Playing WoW": (0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.15, 0.05, 0.05,
0.05),
"Brandishing Katana": (0.05, 0.10, 0.05, 0.05, 0.05, 0.10, 0.05, 0.05,
0.15, 0.15),
"Watching My Little Pony": (0.05, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05,
0.25, 0.05, 0.10),
"Listening to Power Metal": (0.05, 0.05, 0.05, 0.05, 0.10, 0.15, 0.05,
0.05, 0.05, 0.10),
"Vaping": (0.05, 0.10, 0.05, 0.05, 0.10, 0.05, 0.05, 0.05, 0.10, 0.05),
"Smelly": (0.15, 0.10, 0.15, 0.10, 0.10, 0.15, 0.05, 0.10, 0.10, 0.05)
},
columns=emit_states,
index=hidden_states) # All should vertically sum to 1
# Initialize starting probabilities
start_probs = DataFrame(
data={
"(0) {}".format(observations[0]): (0.10, 0.10, 0.10, 0.15, 0.10, 0.10,
0.15, 0.10, 0.05, 0.05)
},
index=hidden_states)
# Initialize dynammic programming matrix at probability 0
viterbi_df = start_probs.multiply(emit_prob_df[observations[0]], axis="index")
# Start dynammic programming
for i, observation in enumerate(observations[1:]):
max_trans_prob_df = trans_prob_df.multiply( # Offset by 1
viterbi_df.iloc[:, i], axis="index").max()
# Multiply entire trans_prob df by previous viterbi_df
# column and take vertical maximums
viterbi_df["({}) {}".format(
i + 1, # Then multiply the result by the observation emissions
observation)] = max_trans_prob_df * emit_prob_df.loc[:, observation]
# Provide the entire matrix with highest values darkest
viterbi_traceback_df = viterbi_df.style.background_gradient(
cmap=light_palette("green", as_cmap=True))
# At the last column, use the maximum value to begin traceback
traceback_prob = [viterbi_df.iloc[:, -1].max()]
dyn_prog_path = [viterbi_df.iloc[:, -1].idxmax()] # And its index
viterbi_traceback_df.highlight_max( # Highlight it
color="red", subset=IndexSlice[[viterbi_df.columns[-1]]])
# Start traceback
for i, observation in zip( # Reverse enumerate with offset
range(len(observations) - 2, -1, -1), reversed(observations[1:])):
# Isolate the previous location that gives the current probability
traceback_loc = viterbi_df.loc[ # Always going left-most
viterbi_df.iloc[:, i] * trans_prob_df.loc[:, dyn_prog_path[0]] *
emit_prob_df.loc[dyn_prog_path[0], observation] == traceback_prob[
0]].index[0]
# Record the value and its state
traceback_prob.insert(0,
viterbi_df.loc[traceback_loc, viterbi_df.columns[i]])
dyn_prog_path.insert(0, traceback_loc)
viterbi_traceback_df = viterbi_traceback_df.applymap(
lambda x: "background-color: red", # Color the path red
subset=IndexSlice[[dyn_prog_path[0]], [viterbi_df.columns[i]]])
# Print dynammic programming matrix and traceback results
print("The observations:", ", ".join(observations))
print("The sequence of hidden states is most likely:")
print((viterbi_df.isin(traceback_prob)).idxmax())
print("The final probability:", traceback_prob[-1])
viterbi_traceback_df