@@ -38,7 +38,7 @@ class ConvDescriptor(Structure):
38
38
infiniopConvDescriptor_t = POINTER (ConvDescriptor )
39
39
40
40
41
- def conv (x , w , stride , padding , dilation ):
41
+ def conv (x , w , b , stride , padding , dilation ):
42
42
ndim = len (x .shape ) - 2
43
43
conv_func_map = {
44
44
1 : F .conv1d ,
@@ -54,10 +54,10 @@ def conv(x, w, stride, padding, dilation):
54
54
conv_func = conv_func_map [ndim ]
55
55
56
56
if PROFILE :
57
- ans = conv_func (x , w , stride = stride , padding = padding , dilation = dilation )
57
+ ans = conv_func (x , w , b , stride = stride , padding = padding , dilation = dilation )
58
58
torch .cuda .synchronize ()
59
59
return ans
60
- return conv_func (x , w , stride = stride , padding = padding , dilation = dilation )
60
+ return conv_func (x , w , b , stride = stride , padding = padding , dilation = dilation )
61
61
62
62
63
63
# infer the shape of the output given the inputs for a N-ary convolution
@@ -98,30 +98,33 @@ def test(
98
98
pads ,
99
99
strides ,
100
100
dilations ,
101
- tensor_stride = None ,
101
+ add_bias ,
102
102
tensor_dtype = torch .float16 ,
103
103
):
104
104
assert len (pads ) == len (strides ) == len (dilations )
105
105
print (
106
- f"Testing Conv on { torch_device } with x_shape: { x_shape } , w_shape: { w_shape } , b_shape: { w_shape [0 ]} , pads: { pads } , strides: { strides } , dilations: { dilations } , x_stride: { tensor_stride } dtype:{ tensor_dtype } "
106
+ f"Testing Conv on { torch_device } with x_shape: { x_shape } , w_shape: { w_shape } , add_bias: { add_bias } , "
107
+ f"b_shape: { w_shape [0 ]} , pads: { pads } , strides: { strides } , dilations: { dilations } , dtype:{ tensor_dtype } "
107
108
)
108
109
x = torch .rand (x_shape , dtype = tensor_dtype ).to (torch_device )
109
110
w = torch .rand (w_shape , dtype = tensor_dtype ).to (torch_device )
111
+ b = torch .round ((torch .rand (w_shape [0 ], dtype = tensor_dtype ).to (torch_device ) * 2 - 1 ) * 1000 ) / 1000 if add_bias else None
110
112
y = torch .zeros (
111
113
inferShape (x .shape , w .shape , pads , strides , dilations ), dtype = tensor_dtype
112
114
).to (torch_device )
113
115
114
116
for i in range (NUM_PRERUN if PROFILE else 1 ):
115
- ans = conv (x , w , strides , pads , dilations )
117
+ ans = conv (x , w , b , strides , pads , dilations )
116
118
if PROFILE :
117
119
start_time = time .time ()
118
120
for i in range (NUM_ITERATIONS ):
119
- _ = conv (x , w , strides , pads , dilations )
121
+ _ = conv (x , w , b , strides , pads , dilations )
120
122
elapsed = (time .time () - start_time ) / NUM_ITERATIONS
121
123
print (f"pytorch time: { elapsed :6f} " )
122
124
123
125
x_tensor = to_tensor (x , lib )
124
126
w_tensor = to_tensor (w , lib )
127
+ b_tensor = to_tensor (b , lib ) if b is not None else None
125
128
y_tensor = to_tensor (y , lib )
126
129
descriptor = infiniopConvDescriptor_t ()
127
130
@@ -132,6 +135,7 @@ def test(
132
135
y_tensor .descriptor ,
133
136
x_tensor .descriptor ,
134
137
w_tensor .descriptor ,
138
+ b_tensor .descriptor if b_tensor else None ,
135
139
tuple_to_void_p (pads ),
136
140
tuple_to_void_p (strides ),
137
141
tuple_to_void_p (dilations ),
@@ -154,6 +158,7 @@ def test(
154
158
y_tensor .data ,
155
159
x_tensor .data ,
156
160
w_tensor .data ,
161
+ b_tensor .data if b_tensor else None ,
157
162
None ,
158
163
)
159
164
)
@@ -168,6 +173,10 @@ def test(
168
173
y_tensor .data ,
169
174
x_tensor .data ,
170
175
w_tensor .data ,
176
+ << << << < HEAD
177
+ == == == =
178
+ b_tensor .data if b_tensor else None ,
179
+ >> >> >> > 5 b25aa1 (Rename ConvBiasAct to ConvAct , make bias optional for both conv and conAct , add WARN , etc . )
171
180
None ,
172
181
)
173
182
)
@@ -184,18 +193,18 @@ def test(
184
193
def test_cpu (lib , test_cases ):
185
194
device = DeviceEnum .DEVICE_CPU
186
195
handle = create_handle (lib , device )
187
- for x_shape , w_shape , pads , strides , dilations , x_strides in test_cases :
188
- test (lib , handle , "cpu" , x_shape , w_shape , pads , strides , dilations , x_strides , tensor_dtype = torch .float16 )
189
- test (lib , handle , "cpu" , x_shape , w_shape , pads , strides , dilations , x_strides , tensor_dtype = torch .float32 )
196
+ for x_shape , w_shape , pads , strides , dilations , add_bias in test_cases :
197
+ test (lib , handle , "cpu" , x_shape , w_shape , pads , strides , dilations , add_bias , tensor_dtype = torch .float16 )
198
+ test (lib , handle , "cpu" , x_shape , w_shape , pads , strides , dilations , add_bias , tensor_dtype = torch .float32 )
190
199
destroy_handle (lib , handle )
191
200
192
201
193
202
def test_cuda (lib , test_cases ):
194
203
device = DeviceEnum .DEVICE_CUDA
195
204
handle = create_handle (lib , device )
196
- for x_shape , w_shape , pads , strides , dilations , x_strides in test_cases :
197
- test (lib , handle , "cuda" , x_shape , w_shape , pads , strides , dilations , x_strides , tensor_dtype = torch .float16 )
198
- test (lib , handle , "cuda" , x_shape , w_shape , pads , strides , dilations , x_strides , tensor_dtype = torch .float32 )
205
+ for x_shape , w_shape , pads , strides , dilations , add_bias in test_cases :
206
+ test (lib , handle , "cuda" , x_shape , w_shape , pads , strides , dilations , add_bias , tensor_dtype = torch .float16 )
207
+ test (lib , handle , "cuda" , x_shape , w_shape , pads , strides , dilations , add_bias , tensor_dtype = torch .float32 )
199
208
destroy_handle (lib , handle )
200
209
201
210
@@ -204,54 +213,62 @@ def test_bang(lib, test_cases):
204
213
205
214
device = DeviceEnum .DEVICE_BANG
206
215
handle = create_handle (lib , device )
207
- for x_shape , w_shape , pads , strides , dilations , x_strides in test_cases :
208
- test (lib , handle , "mlu" , x_shape , w_shape , pads , strides , dilations , x_strides , tensor_dtype = torch .float16 )
209
- test (lib , handle , "mlu" , x_shape , w_shape , pads , strides , dilations , x_strides , tensor_dtype = torch .float32 )
216
+ for x_shape , w_shape , pads , strides , dilations , add_bias in test_cases :
217
+ test (lib , handle , "mlu" , x_shape , w_shape , pads , strides , dilations , add_bias , tensor_dtype = torch .float16 )
218
+ test (lib , handle , "mlu" , x_shape , w_shape , pads , strides , dilations , add_bias , tensor_dtype = torch .float32 )
210
219
destroy_handle (lib , handle )
211
220
212
221
213
222
if __name__ == "__main__" :
214
223
test_cases = [
215
- # x_shape, w_shape, pads, strides, dilations, x_strides
224
+ # x_shape, w_shape, pads, strides, dilations, add_bias
216
225
(
217
226
(32 , 3 , 4 ),
218
227
(32 , 3 , 5 ),
219
228
(1 ,),
220
229
(1 ,),
221
230
(1 ,),
222
- None ,
231
+ False ,
232
+ ),
233
+ (
234
+ (3 , 7 , 4 ),
235
+ (3 , 7 , 5 ),
236
+ (1 ,),
237
+ (1 ,),
238
+ (1 ,),
239
+ True ,
223
240
),
224
241
(
225
242
(1 , 3 , 4 , 4 ),
226
243
(2 , 3 , 3 , 3 ),
227
244
(1 , 1 ),
228
245
(1 , 2 ),
229
246
(2 , 1 ),
230
- None ,
247
+ True ,
231
248
),
232
249
(
233
250
(32 , 3 , 128 , 128 ),
234
251
(64 , 3 , 5 , 5 ),
235
252
(2 , 2 ),
236
253
(2 , 2 ),
237
254
(1 , 1 ),
238
- None ,
255
+ False ,
239
256
),
240
257
(
241
258
(1 , 1 , 4 , 4 , 4 ),
242
259
(1 , 1 , 5 , 5 , 5 ),
243
260
(1 , 1 , 1 ),
244
261
(1 , 1 , 1 ),
245
262
(1 , 1 , 1 ),
246
- None ,
263
+ True ,
247
264
),
248
265
(
249
266
(32 , 3 , 32 , 32 , 32 ),
250
267
(64 , 3 , 5 , 5 , 5 ),
251
268
(3 , 2 , 2 ),
252
269
(4 , 3 , 3 ),
253
270
(2 , 2 , 1 ),
254
- None ,
271
+ False ,
255
272
),
256
273
]
257
274
args = get_args ()
@@ -263,6 +280,7 @@ def test_bang(lib, test_cases):
263
280
infiniopTensorDescriptor_t ,
264
281
infiniopTensorDescriptor_t ,
265
282
infiniopTensorDescriptor_t ,
283
+ infiniopTensorDescriptor_t ,
266
284
c_void_p ,
267
285
c_void_p ,
268
286
c_void_p ,
@@ -277,6 +295,7 @@ def test_bang(lib, test_cases):
277
295
c_void_p ,
278
296
c_void_p ,
279
297
c_void_p ,
298
+ c_void_p ,
280
299
]
281
300
lib .infiniopDestroyConvDescriptor .restype = c_int32
282
301
lib .infiniopDestroyConvDescriptor .argtypes = [
0 commit comments