Skip to content

Commit

Permalink
added xxhash64 function as supported (#597)
Browse files Browse the repository at this point in the history
Signed-off-by: cindyyuanjiang <[email protected]>
  • Loading branch information
cindyyuanjiang authored Sep 28, 2023
1 parent 22747b1 commit ef076a3
Show file tree
Hide file tree
Showing 10 changed files with 41 additions and 1 deletion.
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-databricks-aws.csv
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ VarianceSamp,2.45
WeekDay,2.45
WindowExpression,2.45
WindowSpecDefinition,2.45
XxHash64,2.45
Year,2.45
AggregateInPandasExec,1.2
ArrowEvalPythonExec,1.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ VarianceSamp,2.73
WeekDay,2.73
WindowExpression,2.73
WindowSpecDefinition,2.73
XxHash64,2.73
Year,2.73
AggregateInPandasExec,1.2
ArrowEvalPythonExec,1.2
Expand Down
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-dataproc-l4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ VarianceSamp,4.16
WeekDay,4.16
WindowExpression,4.16
WindowSpecDefinition,4.16
XxHash64,4.16
Year,4.16
AggregateInPandasExec,1.2
ArrowEvalPythonExec,1.2
Expand Down
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-dataproc-t4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ VarianceSamp,4.88
WeekDay,4.88
WindowExpression,4.88
WindowSpecDefinition,4.88
XxHash64,4.88
Year,4.88
AggregateInPandasExec,1.2
ArrowEvalPythonExec,1.2
Expand Down
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-emr-a10.csv
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ VarianceSamp,2.59
WeekDay,2.59
WindowExpression,2.59
WindowSpecDefinition,2.59
XxHash64,2.59
Year,2.59
AggregateInPandasExec,1.2
ArrowEvalPythonExec,1.2
Expand Down
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-emr-t4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ VarianceSamp,2.07
WeekDay,2.07
WindowExpression,2.07
WindowSpecDefinition,2.07
XxHash64,2.07
Year,2.07
AggregateInPandasExec,1.2
ArrowEvalPythonExec,1.2
Expand Down
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore.csv
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ VarianceSamp,4
WeekDay,4
WindowExpression,4
WindowSpecDefinition,4
XxHash64,4
Year,4
KMeans-pyspark,8.86
KMeans-scala,1
Expand Down
2 changes: 2 additions & 0 deletions core/src/main/resources/supportedExprs.csv
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,8 @@ WindowExpression,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,
WindowSpecDefinition,S, ,None,project,partition,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS
WindowSpecDefinition,S, ,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS
WindowSpecDefinition,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS
XxHash64,S,`xxhash64`,None,project,input,S,S,S,S,S,NS,NS,S,PS,S,S,S,NS,NS,NS,NS,NS,NS
XxHash64,S,`xxhash64`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Year,S,`year`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
Year,S,`year`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
AggregateExpression,S, ,None,aggregation,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.scalatest.exceptions.TestFailedException

import org.apache.spark.sql.TrampolineUtil
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions.{ceil, col, collect_list, count, explode, flatten, floor, hex, json_tuple, round, row_number, sum, translate}
import org.apache.spark.sql.functions.{ceil, col, collect_list, count, explode, flatten, floor, hex, json_tuple, round, row_number, sum, translate, xxhash64}
import org.apache.spark.sql.rapids.tool.ToolUtils
import org.apache.spark.sql.rapids.tool.qualification.QualificationAppInfo
import org.apache.spark.sql.rapids.tool.util.RapidsToolsConfUtil
Expand Down Expand Up @@ -1005,6 +1005,36 @@ class SQLPlanParserSuite extends BaseTestSuite {
}
}

test("xxhash64 is supported in ProjectExec") {
TrampolineUtil.withTempDir { parquetoutputLoc =>
TrampolineUtil.withTempDir { eventLogDir =>
val (eventLog, _) = ToolTestUtils.generateEventLog(eventLogDir,
"ProjectExprsSupported") { spark =>
import spark.implicits._
val df1 = Seq("spark", "", "abc").toDF("value")
// write df1 to parquet to transform LocalTableScan to ProjectExec
df1.write.parquet(s"$parquetoutputLoc/testtext")
val df2 = spark.read.parquet(s"$parquetoutputLoc/testtext")
// xxhash64 should be part of ProjectExec
df2.select(xxhash64(df2("value")))
}
val pluginTypeChecker = new PluginTypeChecker()
val app = createAppFromEventlog(eventLog)
assert(app.sqlPlans.size == 2)
val parsedPlans = app.sqlPlans.map { case (sqlID, plan) =>
SQLPlanParser.parseSQLPlan(app.appId, plan, sqlID, "", pluginTypeChecker, app)
}
val allExecInfo = getAllExecsFromPlan(parsedPlans.toSeq)
val wholeStages = allExecInfo.filter(_.exec.contains("WholeStageCodegen"))
assert(wholeStages.size == 1)
assert(wholeStages.forall(_.duration.nonEmpty))
val allChildren = wholeStages.flatMap(_.children).flatten
val projects = allChildren.filter(_.exec == "Project")
assertSizeAndSupported(1, projects)
}
}
}

test("Parse SQL function Name in HashAggregateExec") {
TrampolineUtil.withTempDir { eventLogDir =>
val (eventLog, _) = ToolTestUtils.generateEventLog(eventLogDir, "sqlmetric") { spark =>
Expand Down
1 change: 1 addition & 0 deletions user_tools/custom_speedup_factors/operatorsList.csv
Original file line number Diff line number Diff line change
Expand Up @@ -244,4 +244,5 @@ VarianceSamp
WeekDay
WindowExpression
WindowSpecDefinition
XxHash64
Year

0 comments on commit ef076a3

Please sign in to comment.