Adding support for contrib ops: DynamicQuantizeMatMul, FusedMatMul, Q…

…uickGelu, SkipSimplifiedLayerNormalization
microsoft · Feb 24, 2025 · 9734944 · 9734944
1 parent e46c0d8
commit 9734944
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 5 deletions.
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -121,6 +121,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"DepthToSpace", V_2020_4, {"CPU", "GPU"}},
     {"DequantizeLinear", V_2021_4, {"CPU", "GPU"}},
     {"DequantizeLinear", V_2024_4, {"NPU"}},
+    {"DynamicQuantizeMatMul", V_2025_0, {"CPU", "GPU"}},
     {"Div", V_2020_4, {"CPU", "GPU"}},
     {"Dropout", V_2020_4, {"CPU", "GPU"}},
     {"Elu", V_2020_4, {"CPU", "GPU"}},
@@ -136,6 +137,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Floor", V_2020_4, {"CPU", "GPU"}},
     {"FusedConv", V_2023_0, {"CPU", "GPU"}},
     {"FusedGemm", V_2023_0, {"CPU", "GPU"}},
+    {"FusedMatMul", V_2025_0, {"CPU", "GPU"}},
     {"Gather", V_2020_4, {"CPU", "GPU"}},
     {"GatherElements", V_2022_2, {"CPU", "GPU"}},
     {"GatherND", V_2021_4, {"CPU", "GPU"}},
@@ -190,6 +192,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"PRelu", V_2020_4, {"CPU", "GPU"}},
     {"QLinearMatMul", V_2022_3, {"CPU"}},
     {"QuantizeLinear", V_2021_4, {"CPU", "GPU"}},
+    {"QuickGelu", V_2025_0, {"CPU", "GPU"}},
     {"RNN", V_2023_1, {"CPU", "GPU"}},
     {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
     {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
@@ -229,6 +232,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Sinh", V_2020_4, {"CPU"}},
     {"Size", V_2022_1, {"CPU", "GPU"}},
     {"SkipLayerNormalization", V_2024_5, {"CPU", "GPU"}},
+    {"SkipSimplifiedLayerNormalization", V_2025_0, {"CPU", "GPU"}},
     {"Slice", V_2020_4, {"CPU", "GPU"}},
     {"Softmax", V_2020_4, {"CPU", "GPU"}},
     {"Softplus", V_2022_1, {"CPU", "GPU"}},

diff --git a/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc b/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc
@@ -171,6 +171,7 @@ void RunDynamicQuantizeMatMulTest() {
   );
 }
 
+#if !defined(USE_OPENVINO)
 TEST(DynamicQuantizeMatMul, HasZeroPoint_NoBias_test_S8) {
   RunDynamicQuantizeMatMulTest<int8_t, true, false>();
 }
@@ -202,6 +203,7 @@ TEST(DynamicQuantizeMatMul, HasZeroPoint_HasBias_test_S8) {
 TEST(DynamicQuantizeMatMul, HasZeroPoint_HasBias_test_U8) {
   RunDynamicQuantizeMatMulTest<uint8_t, true, true>();
 }
+#endif
 
 TEST(DynamicQuantizeMatMul, UInt8_test_with_empty_input) {
   std::vector<int64_t> A_dims{0, 2};

diff --git a/onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc b/onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc
@@ -19,9 +19,10 @@ static void RunTest(const embedlayernorm::OpData& data,
   bool enable_cuda = HasCudaEnvironment(min_cuda_architecture);
   bool enable_rocm = DefaultRocmExecutionProvider().get() != nullptr;
   bool enable_dml = DefaultDmlExecutionProvider().get() != nullptr;
+  bool enable_openvino = DefaultOpenVINOExecutionProvider().get() != nullptr;
   bool enable_cpu = !use_float16;
 
-  if (enable_cpu || enable_cuda || enable_dml || enable_rocm) {
+  if (enable_cpu || enable_cuda || enable_dml || enable_rocm || enable_openvino) {
     // Input and output shapes
     //   Input 0 - input_ids          : (batch_size, sequence_size)
     //   Input 1 - segment_ids        : (batch_size, sequence_size)
@@ -157,12 +158,12 @@ static void RunTest(const embedlayernorm::OpData& data,
       std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
       execution_providers.push_back(DefaultDmlExecutionProvider());
       tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+    } else if (enable_openvino) {
+      std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+      execution_providers.push_back(DefaultOpenVINOExecutionProvider());
+      tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
     } else {
-#if defined(USE_OPENVINO)
-      tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
-#else
       tester.Run();
-#endif
     }
   }
 }
@@ -190,6 +191,7 @@ TEST(EmbedLayerNormTest, EmbedLayerNormBatch3_PositionIds_BroadCast) {
           /*broadcast_position_ids=*/true);
 }
 
+#if !defined(USE_OPENVINO)
 TEST(EmbedLayerNormTest, EmbedLayerNormBatch1_EmbeddingSum) {
   RunTest(embedlayernorm::EmbedLayerNormBatch1_EmbeddingSum(), false, true);
 }
@@ -203,6 +205,7 @@ TEST(EmbedLayerNormTest, EmbedLayerNormBatch1_EmbeddingSum_NoMaskIndex) {
           /* use_float16 = */ false,
           /* sum_output = */ true);
 }
+#endif
 
 TEST(EmbedLayerNormTest, EmbedLayerNormBatch2) {
   RunTest(embedlayernorm::EmbedLayerNormBatch2());

diff --git a/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc b/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc
@@ -255,6 +255,7 @@ TEST(FusedMatMulOpTest, FloatTypeScale) {
   RunFusedMatMulTest<float>("FusedMatMul", 1, true, true, false, false, 4.0f, true);
 }
 
+#if !defined(USE_OPENVINO)
 TEST(FusedMatMulOpTest, FloatTypeTransposeBatch) {
   RunFusedMatMulTest<float>("FusedMatMul", 1, false, false, true, false);
   RunFusedMatMulTest<float>("FusedMatMul", 1, false, false, false, true);
@@ -269,6 +270,7 @@ TEST(FusedMatMulOpTest, FloatTypeTransposeBatch) {
   RunFusedMatMulTest<float>("FusedMatMul", 1, true, true, false, true);
   RunFusedMatMulTest<float>("FusedMatMul", 1, true, true, true, true);
 }
+#endif
 
 #if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
 TEST(FusedMatMulOpTest, Float16_NoTranspose) {