@@ -145,7 +145,7 @@ def _choose_step_sgd(initial, k):
145
145
146
146
@normalize
147
147
def sgd (X , y , max_iter = 1e3 , tol = 1e-8 , family = Logistic , batch_size = 64 ,
148
- initial_step = 10.0 , n = None , ** kwargs ):
148
+ initial_step = 1.0 , ** kwargs ):
149
149
"""Stochastic Gradient Descent.
150
150
151
151
Parameters
@@ -164,34 +164,33 @@ def sgd(X, y, max_iter=1e3, tol=1e-8, family=Logistic, batch_size=64,
164
164
initial_step : float
165
165
Initial step size used in the optimization. The step size decays like
166
166
initial_step/(1 + iter_count).
167
- n : int
168
- The number of examples, or the first dimension of the matrix X. This argument will only be used if X.shape[1] is NaN.
169
167
family : Family
170
168
171
169
Returns
172
170
-------
173
171
beta : array-like, shape (n_features,)
174
172
"""
175
- gradient , hessian = family .gradient , family .hessian
176
- n_examples , p = X .shape
177
- if not np .isnan (n_examples ):
178
- n = n_examples
179
- if n is None :
180
- raise ValueError ('Pass number of examples in with kwarg `n`' )
181
- beta = np .zeros (p ) # always init to zeros?
173
+ gradient = family .gradient
174
+ n , p = X .shape
175
+ if np .isnan (n ):
176
+ raise ValueError ('SGD needs shape information to allow indexing. '
177
+ 'Possible by passing a computed array in (`X.compute()` '
178
+ 'or `X.values.compute()`), then doing using '
179
+ '`dask.array.from_array ' )
180
+
181
+ beta = np .zeros (p )
182
182
183
183
iter_count = 0
184
184
converged = False
185
185
186
186
while not converged :
187
- beta_old = beta
187
+ beta_old = beta . copy ()
188
188
iter_count += 1
189
189
190
190
i = np .random .choice (n , size = (batch_size ,))
191
191
Xbeta = dot (X [i ], beta )
192
192
193
- grad = gradient (Xbeta , X [i ], y [i ])
194
- (grad ,) = compute ((grad ,))
193
+ grad = gradient (Xbeta , X [i ], y [i ]).compute ()
195
194
196
195
beta -= _choose_step_sgd (initial_step , iter_count ) * grad / batch_size
197
196
0 commit comments