-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmcts.py
139 lines (119 loc) · 5.03 KB
/
mcts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import random
import numpy as np
from common import *
from node import *
# given a hero card and an opponent range of cards:
# returns average percent of hero winning the hand: 0..1
def compute_hero_p_win(hero_card, opp_range, board):
n_win,n_lost,n_tie = 0,0,0
v_hero = cardValue[hero_card[0]]
v_board = cardValue[board[0][0]]
for opp_card in opp_range:
v_opp = cardValue[opp_card[0]]
if v_opp == v_hero: n_tie += 1
else:
if v_hero == v_board: n_win += 1
elif v_opp == v_board: n_lost += 1
elif v_hero > v_opp: n_win += 1
else: n_lost += 1
n = n_win+n_lost+n_tie
assert n > 0
p_win = (n_win+0.5*n_tie)/n
return p_win
# chooses a child to explore for the node based on UCT (Upper Confidence bound applied to Trees)
# first if there are any unexplored nodes: choose one randomly
# if all nodes are explored: use UCT formula to choose the one with highest score
def mcts_choose_child_node(node):
n_children = len(node.nodes)
unexplored_nodes = [i for i in range(n_children) if node.n_sim[i]==0]
if unexplored_nodes != []:
return random.choice(unexplored_nodes)
# at this point: all nodes are explored, compute UCT for each and return node with max UCT
list_uct = []
c = np.sqrt(2) # a constant to guide the search in choosing the right amount between exploitation vs exploration
for i in range(n_children):
wi = node.ev[i]
si = node.n_sim[i]
sp = node.parent_n_sim
uct = wi/si + c*np.sqrt(np.log(sp)/si)
list_uct += [(i,uct)]
list_uct.sort(key=lambda x:x[1], reverse=True)
return list_uct[0][0]
# main MonteCarloTreeSearch function
# parameters:
# node: a Node where we conduct the search
# hero_card: the card hero is holding
# opp_range: list of cards opponent can possibly hold (must not be empty)
# player_id: player for which we want to conduct the search, either PLAYER1 or PLAYER0 constants (see common.py)
# returns EV (expected value) for player PLAYER_ID
#
# Note: for simplicity we just use an opponent that always calls as his strategy
# in a real simulation we would compute a strategy for each player and we would use that strategy for opponent
def mcts_search(node, hero_card, opp_range, board, PLAYER_ID):
# searches recursively a child node
# if it is a chance node it averages all possible board cards
# if it is a terminal node it returns EV won by hero
# otherwise just call mcts_search for the child node
def search_child_node(child_node, hero_card, opp_range, board):
if child_node != None:
# if it is a chance node: returns an average of all possible flops scores
if child_node.action_str[-1:] == "|":
assert board==[]
list_ev = []
for flop_card in cards:
if flop_card == hero_card: continue
new_opp_range = [card for card in opp_range if card != flop_card]
if new_opp_range == []: continue # cannot have an empty opponent list
list_ev += [mcts_search(child_node, hero_card, new_opp_range, [flop_card], PLAYER_ID)]
assert list_ev != []
avg_ev = sum(list_ev)*1.0/len(list_ev)
return avg_ev
else:
return mcts_search(child_node, hero_card, opp_range, board, PLAYER_ID)
else:
# leaf node, showdown: return a weighted score
p_win = compute_hero_p_win(hero_card, opp_range, board)
half_pot = 0.5*node.pot
ev = p_win*half_pot + (1.0-p_win)*-half_pot # when we win we win half of the pot (money opponent put into pot), when we lose we lose half of the pot (money we put into pot)
return ev
if node.player_id != PLAYER_ID: # opp turn, choose check/call as strategy
for action, child_node in node.nodes:
if action == 'C':
ev = search_child_node(child_node, hero_card, opp_range, board)
return ev
assert False # we reach this point only if no Check action is found for villain, which should not happen
else:
# hero's turn
# choose either a random unexplored node or highest uct node if all nodes are explored
child_idx = mcts_choose_child_node(node)
# compute the ev of the selected child node
action, child_node = node.nodes[child_idx]
if action == 'F': # Fold node, hero loses 50% of pot
ev = -0.5*node.pot
else:
ev = search_child_node(child_node, hero_card, opp_range, board)
# finally update node data
node.parent_n_sim += 1
node.ev[child_idx] += ev
node.n_sim[child_idx] += 1
return ev
if __name__ == "__main__":
for hero_card in ['Ah','Kh','Qh']:
opp_range = [x for x in cards if x != hero_card]
board = []
# we re-build the game tree with each hero card
# this is because strategy is computed for a given hero hole card
root = node_build_tree(2, 0, "", PLAYER1)
for _ in range(1000):
ev = mcts_search(root, hero_card, opp_range, board, root.player_id)
#print(ev)
#print(root.nodes)
#print(root.parent_n_sim)
#print(root.ev)
#print(root.n_sim)
#print("%.2f %.2f" % (root.ev[0]/root.n_sim[0], root.ev[1]/root.n_sim[1]))
print("results for hero holding %s:" % hero_card)
for child_idx in range(len(root.nodes)):
action, _ = root.nodes[child_idx]
ev = root.ev[child_idx]/root.n_sim[child_idx]
print(" action: %s ev: %.4f" % (action, ev))