From 1437b7a57de2da102b5abba17c16f5ff10b90850 Mon Sep 17 00:00:00 2001 From: Cindy Jiang <47068112+cindyyuanjiang@users.noreply.github.com> Date: Tue, 26 Sep 2023 17:30:23 -0700 Subject: [PATCH] [FEA] Add json_tuple function as supported in qualification tool (#589) * add json_tuple support Signed-off-by: cindyyuanjiang * updated unit test name Signed-off-by: cindyyuanjiang * updated unit test name Signed-off-by: cindyyuanjiang --------- Signed-off-by: cindyyuanjiang --- core/src/main/resources/operatorsScore-databricks-aws.csv | 1 + core/src/main/resources/operatorsScore-databricks-azure.csv | 1 + core/src/main/resources/operatorsScore-dataproc-l4.csv | 1 + core/src/main/resources/operatorsScore-dataproc-t4.csv | 1 + core/src/main/resources/operatorsScore-emr-a10.csv | 1 + core/src/main/resources/operatorsScore-emr-t4.csv | 1 + core/src/main/resources/operatorsScore.csv | 1 + core/src/main/resources/supportedExprs.csv | 3 +++ .../spark/rapids/tool/planparser/SqlPlanParserSuite.scala | 5 ++--- user_tools/custom_speedup_factors/operatorsList.csv | 1 + 10 files changed, 13 insertions(+), 3 deletions(-) diff --git a/core/src/main/resources/operatorsScore-databricks-aws.csv b/core/src/main/resources/operatorsScore-databricks-aws.csv index 573b4c57b..a7e9a72da 100644 --- a/core/src/main/resources/operatorsScore-databricks-aws.csv +++ b/core/src/main/resources/operatorsScore-databricks-aws.csv @@ -130,6 +130,7 @@ IntegralDivide,2.45 IsNaN,2.45 IsNotNull,2.45 IsNull,2.45 +JsonTuple,2.45 KnownFloatingPointNormalized,2.45 KnownNotNull,2.45 Lag,2.45 diff --git a/core/src/main/resources/operatorsScore-databricks-azure.csv b/core/src/main/resources/operatorsScore-databricks-azure.csv index 6acf4c295..77cc14302 100644 --- a/core/src/main/resources/operatorsScore-databricks-azure.csv +++ b/core/src/main/resources/operatorsScore-databricks-azure.csv @@ -130,6 +130,7 @@ IntegralDivide,2.73 IsNaN,2.73 IsNotNull,2.73 IsNull,2.73 +JsonTuple,2.73 KnownFloatingPointNormalized,2.73 KnownNotNull,2.73 Lag,2.73 diff --git a/core/src/main/resources/operatorsScore-dataproc-l4.csv b/core/src/main/resources/operatorsScore-dataproc-l4.csv index a3d9aeba5..ad525fa89 100644 --- a/core/src/main/resources/operatorsScore-dataproc-l4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-l4.csv @@ -130,6 +130,7 @@ IntegralDivide,4.16 IsNaN,4.16 IsNotNull,4.16 IsNull,4.16 +JsonTuple,4.16 KnownFloatingPointNormalized,4.16 KnownNotNull,4.16 Lag,4.16 diff --git a/core/src/main/resources/operatorsScore-dataproc-t4.csv b/core/src/main/resources/operatorsScore-dataproc-t4.csv index 20e8cd820..59a8808b9 100644 --- a/core/src/main/resources/operatorsScore-dataproc-t4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-t4.csv @@ -130,6 +130,7 @@ IntegralDivide,4.88 IsNaN,4.88 IsNotNull,4.88 IsNull,4.88 +JsonTuple,4.88 KnownFloatingPointNormalized,4.88 KnownNotNull,4.88 Lag,4.88 diff --git a/core/src/main/resources/operatorsScore-emr-a10.csv b/core/src/main/resources/operatorsScore-emr-a10.csv index ea283c9d8..ccb1dfeab 100644 --- a/core/src/main/resources/operatorsScore-emr-a10.csv +++ b/core/src/main/resources/operatorsScore-emr-a10.csv @@ -130,6 +130,7 @@ IntegralDivide,2.59 IsNaN,2.59 IsNotNull,2.59 IsNull,2.59 +JsonTuple,2.59 KnownFloatingPointNormalized,2.59 KnownNotNull,2.59 Lag,2.59 diff --git a/core/src/main/resources/operatorsScore-emr-t4.csv b/core/src/main/resources/operatorsScore-emr-t4.csv index 9b517152d..31beecd4f 100644 --- a/core/src/main/resources/operatorsScore-emr-t4.csv +++ b/core/src/main/resources/operatorsScore-emr-t4.csv @@ -130,6 +130,7 @@ IntegralDivide,2.07 IsNaN,2.07 IsNotNull,2.07 IsNull,2.07 +JsonTuple,2.07 KnownFloatingPointNormalized,2.07 KnownNotNull,2.07 Lag,2.07 diff --git a/core/src/main/resources/operatorsScore.csv b/core/src/main/resources/operatorsScore.csv index 5d4a022e1..c03805c87 100644 --- a/core/src/main/resources/operatorsScore.csv +++ b/core/src/main/resources/operatorsScore.csv @@ -135,6 +135,7 @@ IntegralDivide,4 IsNaN,4 IsNotNull,4 IsNull,4 +JsonTuple,4 KnownFloatingPointNormalized,4 KnownNotNull,4 Lag,4 diff --git a/core/src/main/resources/supportedExprs.csv b/core/src/main/resources/supportedExprs.csv index 3b335fd8f..f1a657dbb 100644 --- a/core/src/main/resources/supportedExprs.csv +++ b/core/src/main/resources/supportedExprs.csv @@ -270,6 +270,9 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,S,`json_tuple`,None,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,S,`json_tuple`,None,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,S,`json_tuple`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/planparser/SqlPlanParserSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/planparser/SqlPlanParserSuite.scala index 6772e6140..a09472c1f 100644 --- a/core/src/test/scala/com/nvidia/spark/rapids/tool/planparser/SqlPlanParserSuite.scala +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/planparser/SqlPlanParserSuite.scala @@ -781,7 +781,7 @@ class SQLPlanParserSuite extends BaseTestSuite { } } - test("Expression not supported in Generate") { + test("json_tuple is supported in Generate") { TrampolineUtil.withTempDir { eventLogDir => val (eventLog, _) = ToolTestUtils.generateEventLog(eventLogDir, "Expressions in Generate") { spark => @@ -791,7 +791,6 @@ class SQLPlanParserSuite extends BaseTestSuite { |"City":"ABCDE","State":"YZ"}""".stripMargin val data = Seq((1, jsonString)) val df = data.toDF("id", "jsonValues") - //json_tuple which is called from GenerateExec is not supported in GPU yet. df.select(col("id"), json_tuple(col("jsonValues"), "Zipcode", "ZipCodeType", "City")) } val pluginTypeChecker = new PluginTypeChecker() @@ -802,7 +801,7 @@ class SQLPlanParserSuite extends BaseTestSuite { } val execInfo = getAllExecsFromPlan(parsedPlans.toSeq) val generateExprs = execInfo.filter(_.exec == "Generate") - assertSizeAndNotSupported(1, generateExprs) + assertSizeAndSupported(1, generateExprs) } } diff --git a/user_tools/custom_speedup_factors/operatorsList.csv b/user_tools/custom_speedup_factors/operatorsList.csv index d382da6e7..b05bec1e0 100644 --- a/user_tools/custom_speedup_factors/operatorsList.csv +++ b/user_tools/custom_speedup_factors/operatorsList.csv @@ -123,6 +123,7 @@ IntegralDivide IsNaN IsNotNull IsNull +JsonTuple KnownFloatingPointNormalized KnownNotNull Lag