@@ -40,6 +40,7 @@ opencl_srcs = [
40
40
" src/core/CL/cl_kernels/common/floor.cl" ,
41
41
" src/core/CL/cl_kernels/common/gather.cl" ,
42
42
" src/core/CL/cl_kernels/common/gemm.cl" ,
43
+ " src/core/CL/cl_kernels/common/gemm_utils.cl" ,
43
44
" src/core/CL/cl_kernels/common/gemmlowp.cl" ,
44
45
" src/core/CL/cl_kernels/common/gemv.cl" ,
45
46
" src/core/CL/cl_kernels/common/generate_proposals.cl" ,
@@ -80,17 +81,13 @@ opencl_srcs = [
80
81
" src/core/CL/cl_kernels/nchw/channel_shuffle.cl" ,
81
82
" src/core/CL/cl_kernels/nchw/depth_to_space.cl" ,
82
83
" src/core/CL/cl_kernels/nchw/dequantization_layer.cl" ,
83
- " src/core/CL/cl_kernels/nchw/direct_convolution1x1.cl" ,
84
- " src/core/CL/cl_kernels/nchw/direct_convolution3x3.cl" ,
85
- " src/core/CL/cl_kernels/nchw/direct_convolution5x5.cl" ,
86
- " src/core/CL/cl_kernels/nchw/direct_convolution_quantized.cl" ,
84
+ " src/core/CL/cl_kernels/nchw/direct_convolution.cl" ,
87
85
" src/core/CL/cl_kernels/nchw/im2col.cl" ,
88
86
" src/core/CL/cl_kernels/nchw/normalization_layer.cl" ,
89
87
" src/core/CL/cl_kernels/nchw/normalize_planar_yuv_layer.cl" ,
90
88
" src/core/CL/cl_kernels/nchw/normalize_planar_yuv_layer_quantized.cl" ,
91
89
" src/core/CL/cl_kernels/nchw/pooling_layer.cl" ,
92
90
" src/core/CL/cl_kernels/nchw/prior_box_layer.cl" ,
93
- " src/core/CL/cl_kernels/nchw/remap.cl" ,
94
91
" src/core/CL/cl_kernels/nchw/reorg_layer.cl" ,
95
92
" src/core/CL/cl_kernels/nchw/scale.cl" ,
96
93
" src/core/CL/cl_kernels/nchw/space_to_batch.cl" ,
@@ -114,7 +111,6 @@ opencl_srcs = [
114
111
" src/core/CL/cl_kernels/nhwc/normalize_planar_yuv_layer_quantized.cl" ,
115
112
" src/core/CL/cl_kernels/nhwc/pooling_layer.cl" ,
116
113
" src/core/CL/cl_kernels/nhwc/pooling_layer_quantized.cl" ,
117
- " src/core/CL/cl_kernels/nhwc/remap.cl" ,
118
114
" src/core/CL/cl_kernels/nhwc/reorg_layer.cl" ,
119
115
" src/core/CL/cl_kernels/nhwc/scale.cl" ,
120
116
" src/core/CL/cl_kernels/nhwc/space_to_batch.cl" ,
@@ -241,7 +237,6 @@ cc_library_static {
241
237
" src/core/CL/kernels/CLROIPoolingLayerKernel.cpp" ,
242
238
" src/core/CL/kernels/CLRangeKernel.cpp" ,
243
239
" src/core/CL/kernels/CLReductionOperationKernel.cpp" ,
244
- " src/core/CL/kernels/CLRemapKernel.cpp" ,
245
240
" src/core/CL/kernels/CLReorgLayerKernel.cpp" ,
246
241
" src/core/CL/kernels/CLReverseKernel.cpp" ,
247
242
" src/core/CL/kernels/CLSelectKernel.cpp" ,
@@ -293,7 +288,6 @@ cc_library_static {
293
288
" src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp" ,
294
289
" src/core/NEON/kernels/NERangeKernel.cpp" ,
295
290
" src/core/NEON/kernels/NEReductionOperationKernel.cpp" ,
296
- " src/core/NEON/kernels/NERemapKernel.cpp" ,
297
291
" src/core/NEON/kernels/NEReorgLayerKernel.cpp" ,
298
292
" src/core/NEON/kernels/NEReverseKernel.cpp" ,
299
293
" src/core/NEON/kernels/NESelectKernel.cpp" ,
@@ -336,8 +330,6 @@ cc_library_static {
336
330
" src/core/NEON/kernels/arm_gemm/transform.cpp" ,
337
331
" src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp" ,
338
332
" src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp" ,
339
- " src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp" ,
340
- " src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp" ,
341
333
" src/core/NEON/kernels/convolution/common/padding.cpp" ,
342
334
" src/core/NEON/kernels/convolution/common/qasymm8.cpp" ,
343
335
" src/core/NEON/kernels/convolution/common/qsymm8.cpp" ,
@@ -431,44 +423,79 @@ cc_library_static {
431
423
" src/cpu/kernels/CpuTransposeKernel.cpp" ,
432
424
" src/cpu/kernels/CpuWeightsReshapeKernel.cpp" ,
433
425
" src/cpu/kernels/CpuWinogradConv2dKernel.cpp" ,
434
- " src/cpu/kernels/activation/neon/fp16.cpp" ,
435
- " src/cpu/kernels/activation/neon/fp32.cpp" ,
436
- " src/cpu/kernels/activation/neon/qasymm8.cpp" ,
437
- " src/cpu/kernels/activation/neon/qasymm8_signed.cpp" ,
438
- " src/cpu/kernels/activation/neon/qsymm16.cpp" ,
439
- " src/cpu/kernels/activation/sve/fp16.cpp" ,
440
- " src/cpu/kernels/activation/sve/fp32.cpp" ,
441
- " src/cpu/kernels/activation/sve/qasymm8.cpp" ,
442
- " src/cpu/kernels/activation/sve/qasymm8_signed.cpp" ,
443
- " src/cpu/kernels/activation/sve/qsymm16.cpp" ,
444
- " src/cpu/kernels/add/neon/qasymm8.cpp" ,
445
- " src/cpu/kernels/add/neon/qasymm8_signed.cpp" ,
446
- " src/cpu/kernels/add/neon/qsymm16.cpp" ,
447
- " src/cpu/kernels/add/sve/impl.cpp" ,
448
- " src/cpu/kernels/add/sve/qasymm8.cpp" ,
449
- " src/cpu/kernels/add/sve/qasymm8_signed.cpp" ,
450
- " src/cpu/kernels/add/sve/qsymm16.cpp" ,
451
- " src/cpu/kernels/elementwise/sve/elementwise.cpp" ,
452
- " src/cpu/kernels/elementwise/sve/elementwise_unary.cpp" ,
426
+ " src/cpu/kernels/activation/generic/neon/fp16.cpp" ,
427
+ " src/cpu/kernels/activation/generic/neon/fp32.cpp" ,
428
+ " src/cpu/kernels/activation/generic/neon/qasymm8.cpp" ,
429
+ " src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp" ,
430
+ " src/cpu/kernels/activation/generic/neon/qsymm16.cpp" ,
431
+ " src/cpu/kernels/add/generic/neon/fp16.cpp" ,
432
+ " src/cpu/kernels/add/generic/neon/fp32.cpp" ,
433
+ " src/cpu/kernels/add/generic/neon/impl.cpp" ,
434
+ " src/cpu/kernels/add/generic/neon/integer.cpp" ,
435
+ " src/cpu/kernels/add/generic/neon/qasymm8.cpp" ,
436
+ " src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp" ,
437
+ " src/cpu/kernels/add/generic/neon/qsymm16.cpp" ,
438
+ " src/cpu/kernels/boundingboxtransform/generic/neon/fp16.cpp" ,
439
+ " src/cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp" ,
440
+ " src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp" ,
441
+ " src/cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp" ,
442
+ " src/cpu/kernels/crop/generic/neon/fp16.cpp" ,
443
+ " src/cpu/kernels/crop/generic/neon/fp32.cpp" ,
444
+ " src/cpu/kernels/crop/generic/neon/impl.cpp" ,
445
+ " src/cpu/kernels/crop/generic/neon/integer.cpp" ,
446
+ " src/cpu/kernels/elementwise_binary/generic/neon/fp16.cpp" ,
447
+ " src/cpu/kernels/elementwise_binary/generic/neon/fp32.cpp" ,
448
+ " src/cpu/kernels/elementwise_binary/generic/neon/integer.cpp" ,
449
+ " src/cpu/kernels/elementwise_binary/generic/neon/qasymm8.cpp" ,
450
+ " src/cpu/kernels/elementwise_binary/generic/neon/qasymm8_signed.cpp" ,
451
+ " src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp" ,
452
+ " src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp" ,
453
+ " src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp" ,
453
454
" src/cpu/kernels/floor/neon/fp16.cpp" ,
454
455
" src/cpu/kernels/floor/neon/fp32.cpp" ,
456
+ " src/cpu/kernels/genproposals/generic/neon/fp16.cpp" ,
457
+ " src/cpu/kernels/genproposals/generic/neon/fp32.cpp" ,
458
+ " src/cpu/kernels/genproposals/generic/neon/impl.cpp" ,
459
+ " src/cpu/kernels/genproposals/generic/neon/qsymm16.cpp" ,
460
+ " src/cpu/kernels/instancenorm/generic/neon/fp16.cpp" ,
461
+ " src/cpu/kernels/instancenorm/generic/neon/fp32.cpp" ,
462
+ " src/cpu/kernels/instancenorm/generic/neon/impl.cpp" ,
455
463
" src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp" ,
456
464
" src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp" ,
465
+ " src/cpu/kernels/maxunpool/generic/neon/fp16.cpp" ,
466
+ " src/cpu/kernels/maxunpool/generic/neon/fp32.cpp" ,
467
+ " src/cpu/kernels/maxunpool/generic/neon/impl.cpp" ,
468
+ " src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp" ,
469
+ " src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp" ,
470
+ " src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp" ,
471
+ " src/cpu/kernels/meanstddevnorm/generic/neon/fp32.cpp" ,
472
+ " src/cpu/kernels/meanstddevnorm/generic/neon/impl.cpp" ,
457
473
" src/cpu/kernels/pool2d/neon/fp16.cpp" ,
458
474
" src/cpu/kernels/pool2d/neon/fp32.cpp" ,
459
475
" src/cpu/kernels/pool2d/neon/nchw/all.cpp" ,
460
476
" src/cpu/kernels/pool2d/neon/qasymm8.cpp" ,
461
477
" src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" ,
478
+ " src/cpu/kernels/range/generic/neon/fp16.cpp" ,
479
+ " src/cpu/kernels/range/generic/neon/fp32.cpp" ,
480
+ " src/cpu/kernels/range/generic/neon/impl.cpp" ,
481
+ " src/cpu/kernels/range/generic/neon/integer.cpp" ,
482
+ " src/cpu/kernels/roialign/generic/neon/fp16.cpp" ,
483
+ " src/cpu/kernels/roialign/generic/neon/fp32.cpp" ,
484
+ " src/cpu/kernels/roialign/generic/neon/impl.cpp" ,
485
+ " src/cpu/kernels/roialign/generic/neon/qasymm8.cpp" ,
486
+ " src/cpu/kernels/roialign/generic/neon/qasymm8_signed.cpp" ,
462
487
" src/cpu/kernels/scale/neon/fp16.cpp" ,
463
488
" src/cpu/kernels/scale/neon/integer.cpp" ,
464
489
" src/cpu/kernels/scale/neon/qasymm8.cpp" ,
465
490
" src/cpu/kernels/scale/neon/qasymm8_signed.cpp" ,
466
- " src/cpu/kernels/scale/sve/fp16.cpp" ,
467
- " src/cpu/kernels/scale/sve/fp32.cpp" ,
468
- " src/cpu/kernels/scale/sve/integer.cpp" ,
469
- " src/cpu/kernels/scale/sve/qasymm8.cpp" ,
470
- " src/cpu/kernels/scale/sve/qasymm8_signed.cpp" ,
471
- " src/cpu/kernels/softmax/impl/sve/impl.cpp" ,
491
+ " src/cpu/kernels/select/generic/neon/fp16.cpp" ,
492
+ " src/cpu/kernels/select/generic/neon/fp32.cpp" ,
493
+ " src/cpu/kernels/select/generic/neon/impl.cpp" ,
494
+ " src/cpu/kernels/select/generic/neon/integer.cpp" ,
495
+ " src/cpu/kernels/softmax/generic/neon/fp16.cpp" ,
496
+ " src/cpu/kernels/softmax/generic/neon/fp32.cpp" ,
497
+ " src/cpu/kernels/softmax/generic/neon/qasymm8.cpp" ,
498
+ " src/cpu/kernels/softmax/generic/neon/qasymm8_signed.cpp" ,
472
499
" src/cpu/kernels/sub/neon/qasymm8.cpp" ,
473
500
" src/cpu/kernels/sub/neon/qasymm8_signed.cpp" ,
474
501
" src/cpu/kernels/sub/neon/qsymm16.cpp" ,
@@ -683,7 +710,6 @@ cc_library_static {
683
710
" src/runtime/CL/functions/CLRange.cpp" ,
684
711
" src/runtime/CL/functions/CLReduceMean.cpp" ,
685
712
" src/runtime/CL/functions/CLReductionOperation.cpp" ,
686
- " src/runtime/CL/functions/CLRemap.cpp" ,
687
713
" src/runtime/CL/functions/CLReorgLayer.cpp" ,
688
714
" src/runtime/CL/functions/CLReshapeLayer.cpp" ,
689
715
" src/runtime/CL/functions/CLReverse.cpp" ,
@@ -794,7 +820,6 @@ cc_library_static {
794
820
" src/runtime/NEON/functions/NERange.cpp" ,
795
821
" src/runtime/NEON/functions/NEReduceMean.cpp" ,
796
822
" src/runtime/NEON/functions/NEReductionOperation.cpp" ,
797
- " src/runtime/NEON/functions/NERemap.cpp" ,
798
823
" src/runtime/NEON/functions/NEReorgLayer.cpp" ,
799
824
" src/runtime/NEON/functions/NEReshapeLayer.cpp" ,
800
825
" src/runtime/NEON/functions/NEReverse.cpp" ,
@@ -1013,8 +1038,11 @@ cc_library_static {
1013
1038
" src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp" ,
1014
1039
" src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp" ,
1015
1040
" src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp" ,
1041
+ " src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp" ,
1016
1042
" src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp" ,
1043
+ " src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp" ,
1017
1044
" src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp" ,
1045
+ " src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp" ,
1018
1046
" src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp" ,
1019
1047
" src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp" ,
1020
1048
" src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp" ,
0 commit comments