@@ -74,19 +74,33 @@ def train(self, id, model_parameters, global_c, train_loader):
74
74
self .optimizer .zero_grad ()
75
75
loss .backward ()
76
76
77
- grad = self .model_gradients
77
+ # grad = self.model_gradients
78
+ grad = self .model_grads
78
79
grad = grad - self .cs [id ] + global_c
79
80
idx = 0
80
- for parameter in self ._model .parameters ():
81
- layer_size = parameter .grad .numel ()
82
- shape = parameter .grad .shape
83
- #parameter.grad = parameter.grad - self.cs[id][idx:idx + layer_size].view(parameter.grad.shape) + global_c[idx:idx + layer_size].view(parameter.grad.shape)
84
- parameter .grad .data [:] = grad [idx :idx + layer_size ].view (shape )[:]
81
+
82
+ parameters = self ._model .parameters ()
83
+ for p in self ._model .state_dict ().values ():
84
+ if p .grad is None : # Batchnorm have no grad
85
+ layer_size = p .numel ()
86
+ else :
87
+ parameter = next (parameters )
88
+ layer_size = parameter .data .numel ()
89
+ shape = parameter .grad .shape
90
+ parameter .grad .data [:] = grad [idx :idx + layer_size ].view (shape )[:]
85
91
idx += layer_size
86
92
93
+ # for parameter in self._model.parameters():
94
+ # layer_size = parameter.grad.numel()
95
+ # shape = parameter.grad.shape
96
+ # #parameter.grad = parameter.grad - self.cs[id][idx:idx + layer_size].view(parameter.grad.shape) + global_c[idx:idx + layer_size].view(parameter.grad.shape)
97
+ # parameter.grad.data[:] = grad[idx:idx+layer_size].view(shape)[:]
98
+ # idx += layer_size
99
+
87
100
self .optimizer .step ()
88
101
89
102
dy = self .model_parameters - frz_model
90
103
dc = - 1.0 / (self .epochs * len (train_loader ) * self .lr ) * dy - global_c
91
104
self .cs [id ] += dc
92
105
return [dy , dc ]
106
+
0 commit comments