-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_custom_player_2.py
133 lines (109 loc) · 5.11 KB
/
my_custom_player_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from sample_players import DataPlayer
import random
HASH_SIZE = 10000000
class CustomPlayer(DataPlayer):
""" Implement your own agent to play knight's Isolation
The get_action() method is the only required method for this project.
You can modify the interface for get_action by adding named parameters
with default values, but the function MUST remain compatible with the
default interface.
**********************************************************************
NOTES:
- The test cases will NOT be run on a machine with GPU access, nor be
suitable for using any other machine learning techniques.
- You can pass state forward to your agent on the next turn by assigning
any pickleable object to the self.context attribute.
**********************************************************************
"""
def __init__(self, player_id):
super().__init__(player_id)
def get_action(self, state):
""" Employ an adversarial search technique to choose an action
available in the current state calls self.queue.put(ACTION) at least
This method must call self.queue.put(ACTION) at least once, and may
call it as many times as you want; the caller will be responsible
for cutting off the function after the search time limit has expired.
See RandomPlayer and GreedyPlayer in sample_players for more examples.
**********************************************************************
NOTE:
- The caller is responsible for cutting off search, so calling
get_action() from your own code will create an infinite loop!
Refer to (and use!) the Isolation.play() function to run games.
**********************************************************************
"""
""" Retrieve transposition table """
if state.ply_count < 2:
self.queue.put(random.choice(state.actions()))
else:
tt = self.context if self.context else {}
depth_limit = 3
guess = 2
for depth in range(1, depth_limit + 1):
best_move = self._mtdf(state, guess, depth, tt)
self.queue.put(best_move)
self.context = tt if tt else None
def _mtdf(self, state, guess, depth = 4, tt = None):
self.tt = tt
def store_node(state, alpha, beta, depth):
zobrist_key = int(state.board % HASH_SIZE)
node = {'alpha':alpha,
'beta':beta,
'depth':depth}
self.tt[zobrist_key] = node
def retrieve_node(state):
zobrist_key = int(state.board % HASH_SIZE)
return None if not(zobrist_key in self.tt.keys()) else self.tt[zobrist_key]
def check_TT(state, alpha, beta, depth):
node = retrieve_node(state)
if not(node is None):
if node['depth'] >= depth:
return (node['alpha'], node['beta'])
return (None, None)
def alpha_beta_TT(state, alpha, beta, depth):
if not(self.tt is None):
a, b = check_TT(state, alpha, beta, depth)
if a or b:
if a == b: return b
if b <= alpha: return b
if a >= beta: return a
alpha = max(a, alpha)
if state.terminal_test() or depth <= 0:
value = state.utility(self.player_id) if state.terminal_test() else self.utility(state)
a = b = value
else:
value = float("-inf")
a = alpha
for action in state.actions():
value = max(value, -alpha_beta_TT(state.result(action), -beta, -a, depth - 1))
a = max (a, value)
b = beta
if value >= beta: break
if value <= alpha: b = value
elif alpha < value and value < beta: a = b = value
elif value >= beta: a = value
if not(self.tt is None): store_node(state, a, b, depth)
return value
def AB_SSS(state, depth):
value = float("inf")
gamma = 0
while value < gamma:
gamma = value
value = alpha_beta_TT(state, gamma - 1, gamma, depth)
return value
def mtdf(state, guess, depth):
value = guess
alpha, beta = float("-inf"), float("inf")
while alpha < beta:
if value == alpha: gamma = value + 1
else: gamma = value
value = alpha_beta_TT(state, gamma - 1, gamma, depth)
if value < gamma: beta = value
else: alpha = value
return value
return max(state.actions(), key=lambda x: mtdf(state.result(x), guess, depth - 1))
def utility(self, state):
player_loc = state.locs[self.player_id]
player_liberties = state.liberties(player_loc)
opponent_loc = state.locs[1- self.player_id]
opponent_liberties = state.liberties(opponent_loc)
return len(player_liberties) - len(opponent_liberties)