-
Notifications
You must be signed in to change notification settings - Fork 0
/
tic_tac_toe[ai].py
91 lines (78 loc) · 3.5 KB
/
tic_tac_toe[ai].py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import random
from matplotlib import pyplot as plt
from game import TicTacToe
from agents import DQNAgent, Q_learning
import torch
from models import CNN_Model,DNN
random.seed(42)
torch.manual_seed(42)
# more negative reward is better than more positive reward
#steps to use a q-learning agent
#1. create a game object
#2. create a q-learning object
#3. play the game
#4. update the q-learning object
"""Types of agents:
1. Q_learning(letter): Q-learning agent with letter as the player
2. DQNAgent(letter,model): DQN agent with letter as the player and model as the model
3. RandomAgent(letter): Random agent with letter as the player
"""
if __name__ == '__main__':
board_size = 3
game = TicTacToe(board_size)
plt.ion()
X_agent = DQNAgent('X',CNN_Model(board_size))
# X_agent.load_q(r"E:\re_inforcement_learning\x.pth")
O_agent = Q_learning('O')#DQNAgent('X',CNN_Model(board_size))
x_score = []
o_score = []
tie_score = []
episodes = 100_00
for i in range(episodes):
# first = [X_agent, O_agent][(i+1)%2] #uncomment this line to make the game random
first = X_agent #uncomment this line to make the first player deterministic
second = X_agent if first == O_agent else O_agent
game.reset()
temp = 0
while True:
while True:
actionfirst = first.get_action(game)
if game.make_move(actionfirst, first.letter):break
if temp:
second.update(game)
else:
temp = 1
if game.current_winner is not None:
game.x_wins += 1 if game.current_winner == 'X' else 0
game.o_wins += 1 if game.current_winner == 'O' else 0
game.tie += 1 if game.current_winner == 'T' else 0
first.update(game)
break
while True:
secondaction = second.get_action(game)
if game.make_move(secondaction, second.letter):break
first.update(game)
if game.current_winner is not None:
game.x_wins += 1 if game.current_winner == 'X' else 0
game.o_wins += 1 if game.current_winner == 'O' else 0
game.tie += 1 if game.current_winner == 'T' else 0
second.update(game)
break
x_score.append(game.x_wins/(game.o_wins+game.tie+game.x_wins))
o_score.append(game.o_wins/(game.o_wins+game.tie+game.x_wins))
tie_score.append(game.tie/(game.o_wins+game.tie+game.x_wins))
if (i+1)%1000 == 0:
print(f"i={i}",game.x_wins/(game.o_wins+game.tie+game.x_wins),game.o_wins/(game.o_wins+game.tie+game.x_wins),game.tie/(game.o_wins+game.tie+game.x_wins))
plt.title('Trained')
plt.xlabel('Number of Games')
plt.ylabel('Score %')
plt.plot(x_score)
plt.plot(o_score)
plt.plot(tie_score)
plt.legend([f'X-{(game.x_wins/(game.x_wins+game.tie+game.o_wins))*100}',f'O-{(game.o_wins/(game.o_wins+game.tie+game.x_wins))*100}',f'Tie-{(game.tie/(game.o_wins+game.tie+game.x_wins))*100}'])
plt.ylim(ymin=0)
plt.show(block=True)
if plt.waitforbuttonpress(0.000001):pass
# X_agent.save_q(r"E:\re_inforcement_learning\x.pth")
# O_agent.save_q(r"E:\re_inforcement_learning\o.pth")
# print(game.x_wins/(game.o_wins+game.tie+game.x_wins),game.o_wins/(game.o_wins+game.tie+game.x_wins),game.tie/(game.o_wins+game.tie+game.x_wins),len(X_agent.q))#,set(game.prev_game).__len__())