-
Notifications
You must be signed in to change notification settings - Fork 0
/
q3_sgd.py
141 lines (110 loc) · 4.37 KB
/
q3_sgd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Save parameters every a few SGD iterations as fail-safe
SAVE_PARAMS_EVERY = 1000
import glob
import random
import numpy as np
import os.path as op
import pickle as pickle
def load_saved_params():
""" A helper function that loads previously saved parameters and resets iteration start """
st = 0
for f in glob.glob("saved_params_*.npy"):
iter = int(op.splitext(op.basename(f))[0].split("_")[2])
if (iter > st):
st = iter
if st > 0:
print ("saved_params_%d.npy % st", "saved_params_%d.npy" % st)
pickleFileName = "saved_params_%d.npy" % st
pickleFile = open(pickleFileName, 'rb')
params = pickle.load(pickleFile)
state = pickle.load(pickleFile)
##if st > 0:
## with open("saved_params_%d.npy" % st, "rb") as f:
## params = pickle.load(f)
## state = pickle.load(f)
return st, params, state
else:
return st, None, None
def save_params(iter, params):
with open("saved_params_%d.npy" % iter, "w") as f:
pickle.dump(params, f)
pickle.dump(random.getstate(), f)
def sgd(f, x0, step, iterations, postprocessing = None, useSaved = False, PRINT_EVERY=10):
""" Stochastic Gradient Descent """
# Implement the stochastic gradient descent method in this
# function.
# Inputs:
# - f: the function to optimize, it should take a single
# argument and yield two outputs, a cost and the gradient
# with respect to the arguments
# - x0: the initial point to start SGD from
# - step: the step size for SGD
# - iterations: total iterations to run SGD for
# - postprocessing: postprocessing function for the parameters
# if necessary. In the case of word2vec we will need to
# normalize the word vectors to have unit length.
# - PRINT_EVERY: specifies every how many iterations to output
# Output:
# - x: the parameter value after SGD finishes
# Anneal learning rate every several iterations
ANNEAL_EVERY = 20000
if useSaved:
start_iter, oldx, state = load_saved_params()
if start_iter > 0:
x0 = oldx;
step *= 0.5 ** (start_iter / ANNEAL_EVERY)
if state:
random.setstate(state)
else:
start_iter = 0
x = x0
if not postprocessing:
postprocessing = lambda x: x
expcost = None
for iter in range(start_iter + 1, iterations + 1):
### Don't forget to apply the postprocessing after every iteration!
### You might want to print the progress every few iterations.
cost = None
### YOUR CODE HERE
cost, grad = f(x)
x -= step * grad
x = postprocessing(x)
### END YOUR CODE
if iter % PRINT_EVERY == 0:
if not expcost:
expcost = cost
else:
expcost = .95 * expcost + .05 * cost
print ("iter %d: %f", iter, expcost)
if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
save_params(iter, x)
if iter % ANNEAL_EVERY == 0:
step *= 0.5
return x
def sanity_check():
quad = lambda x: (np.sum(x ** 2), x * 2)
print ("Running sanity checks...")
t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)
print ("test 1 result:", t1)
assert abs(t1) <= 1e-6
t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)
print ("test 2 result:", t2)
assert abs(t2) <= 1e-6
t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)
print ("test 3 result:", t3)
assert abs(t3) <= 1e-6
print ("")
def your_sanity_checks():
"""
Use this space add any additional sanity checks by running:
python q3_sgd.py
This function will not be called by the autograder, nor will
your additional tests be graded.
"""
print ("Running your sanity checks...")
### YOUR CODE HERE
"""raise NotImplementedError"""
### END YOUR CODE
if __name__ == "__main__":
sanity_check();
your_sanity_checks();