forked from blakeMilner/DeepQLearning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.lua
57 lines (38 loc) · 1.25 KB
/
test.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
require 'xlua'
local Brain = require 'deepqlearn'
function randtable(size, startnum, endnum)
local rtable = {}
for i = 1, size do
rtable[i+1] = randf(startnum, endnum)
end
return rtable
end
-- simple test found in readme.md
num_outcomes = 3
Brain.init(num_outcomes, num_outcomes)
nb_train = 1000
nb_test = 1000
for k = 0, nb_train do
rand_outcome = math.random(1, num_outcomes)
state = randtable(num_outcomes, rand_outcome, rand_outcome + 1)
xlua.progress(k, nb_train)
newstate = table.copy(state) -- make a deep copy
action = Brain.forward(newstate); -- returns index of chosen action
reward = (action == rand_outcome) and 1 or 0
Brain.backward(reward); -- learning magic happens
end
Brain.epsilon_test_time = 0.0; -- don't make any more random choices
Brain.learning = false;
-- get an optimal action from the learned policy
local cnt = 0
for k = 1, nb_test do
xlua.progress(k, nb_test)
rand_outcome = math.random(1, num_outcomes)
state = randtable(num_outcomes, rand_outcome, rand_outcome + 1)
newstate = table.copy(state)
output = Brain.forward(newstate)
if rand_outcome == output then
cnt = cnt + 1
end
end
print("Test cases correct: " .. tostring(100 * cnt/nb_test) .. " %")