From 49a0f21ef435da98547b466294842e3c5a0e0544 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Tue, 10 Oct 2023 10:19:09 -0700 Subject: [PATCH] Update scores for Dataproc GKE Signed-off-by: Partho Sarthi --- .../operatorsScore-dataproc-gke-t4.csv | 510 +++++++++--------- .../qualification/PluginTypeChecker.scala | 3 +- 2 files changed, 262 insertions(+), 251 deletions(-) diff --git a/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv b/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv index 493f1154c..e5b3f9525 100644 --- a/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv @@ -1,256 +1,268 @@ CPUOperator,Score -CoalesceExec,4.25 -CollectLimitExec,4.25 -ExpandExec,7.76 -FileSourceScanExec,3.64 -FilterExec,4.47 -GenerateExec,4.25 -GlobalLimitExec,4.25 -LocalLimitExec,4.25 -ProjectExec,4.25 -RangeExec,4.25 -SampleExec,4.25 -SortExec,4.25 -TakeOrderedAndProjectExec,20.96 -HashAggregateExec,5.54 -ObjectHashAggregateExec,5.54 -SortAggregateExec,5.54 -DataWritingCommandExec,4.25 -ExecutedCommandExec,4.25 -BatchScanExec,3.64 -ShuffleExchangeExec,5.21 -BroadcastHashJoinExec,6.42 -BroadcastNestedLoopJoinExec,17.46 -CartesianProductExec,4.25 -ShuffledHashJoinExec,4.25 -SortMergeJoinExec,7.4 -WindowExec,4.25 -Abs,4.25 -Acos,4.25 -Acosh,4.25 -Add,4.25 -AggregateExpression,4.25 -Alias,4.25 -And,4.25 -ApproximatePercentile,4.25 -ArrayContains,4.25 -ArrayExcept,4.25 -ArrayExists,4.25 -ArrayIntersect,4.25 -ArrayMax,4.25 -ArrayMin,4.25 -ArrayRemove,4.25 -ArrayRepeat,4.25 -ArrayTransform,4.25 -ArrayUnion,4.25 -ArraysOverlap,4.25 -ArraysZip,4.25 -Asin,4.25 -Asinh,4.25 -AtLeastNNonNulls,4.25 -Atan,4.25 -Atanh,4.25 -AttributeReference,4.25 -Average,4.25 -BRound,4.25 -BitLength,4.25 -BitwiseAnd,4.25 -BitwiseNot,4.25 -BitwiseOr,4.25 -BitwiseXor,4.25 -CaseWhen,4.25 -Cbrt,4.25 -Ceil,4.25 -CheckOverflow,4.25 -Coalesce,4.25 -CollectList,4.25 -CollectSet,4.25 -Concat,4.25 -ConcatWs,4.25 -Contains,4.25 -Conv,4.25 -Cos,4.25 -Cosh,4.25 -Cot,4.25 -Count,4.25 -CreateArray,4.25 -CreateMap,4.25 -CreateNamedStruct,4.25 -CurrentRow$,4.25 -DateAdd,4.25 -DateAddInterval,4.25 -DateDiff,4.25 -DateFormatClass,4.25 -DateSub,4.25 -DayOfMonth,4.25 -DayOfWeek,4.25 -DayOfYear,4.25 -DenseRank,4.25 -Divide,4.25 -DynamicPruningExpression,4.25 -ElementAt,4.25 -EndsWith,4.25 -EqualNullSafe,4.25 -EqualTo,4.25 -Exp,4.25 -Explode,4.25 -Expm1,4.25 -First,4.25 -Flatten,4.25 -Floor,4.25 -FromUTCTimestamp,4.25 -FromUnixTime,4.25 -GetArrayItem,4.25 -GetArrayStructFields,4.25 -GetJsonObject,4.25 -GetMapValue,4.25 -GetStructField,4.25 -GetTimestamp,4.25 -GreaterThan,4.25 -GreaterThanOrEqual,4.25 -Greatest,4.25 -HiveGenericUDF,4.25 -HiveSimpleUDF,4.25 -Hour,4.25 -Hypot,4.25 -If,4.25 -In,4.25 -InSet,4.25 -InitCap,4.25 -InputFileBlockLength,4.25 -InputFileBlockStart,4.25 -InputFileName,4.25 -IntegralDivide,4.25 -IsNaN,4.25 -IsNotNull,4.25 -IsNull,4.25 -JsonToStructs,4.25 -JsonTuple,4.25 -KnownFloatingPointNormalized,4.25 -KnownNotNull,4.25 -Lag,4.25 -LambdaFunction,4.25 -Last,4.25 -LastDay,4.25 -Lead,4.25 -Least,4.25 -Length,4.25 -LessThan,4.25 -LessThanOrEqual,4.25 -Like,4.25 -Literal,4.25 -Log,4.25 -Log10,4.25 -Log1p,4.25 -Log2,4.25 -Logarithm,4.25 -Lower,4.25 -MakeDecimal,4.25 -MapConcat,4.25 -MapEntries,4.25 -MapFilter,4.25 -MapKeys,4.25 -MapValues,4.25 -Max,4.25 -Md5,4.25 -MicrosToTimestamp,4.25 -MillisToTimestamp,4.25 -Min,4.25 -Minute,4.25 -MonotonicallyIncreasingID,4.25 -Month,4.25 -Multiply,4.25 -Murmur3Hash,4.25 -NaNvl,4.25 -NamedLambdaVariable,4.25 -NormalizeNaNAndZero,4.25 -Not,4.25 -NthValue,4.25 -OctetLength,4.25 -Or,4.25 -PercentRank,4.25 -PivotFirst,4.25 -Pmod,4.25 -PosExplode,4.25 -Pow,4.25 -PreciseTimestampConversion,4.25 -PromotePrecision,4.25 -PythonUDF,4.25 -Quarter,4.25 -RLike,4.25 -RaiseError,4.25 -Rand,4.25 -Rank,4.25 -RegExpExtract,4.25 -RegExpExtractAll,4.25 -RegExpReplace,4.25 -Remainder,4.25 -ReplicateRows,4.25 -Reverse,4.25 -Rint,4.25 -Round,4.25 -RowNumber,4.25 -ScalaUDF,4.25 -ScalarSubquery,4.25 -Second,4.25 -SecondsToTimestamp,4.25 -Sequence,4.25 -ShiftLeft,4.25 -ShiftRight,4.25 -ShiftRightUnsigned,4.25 -Signum,4.25 -Sin,4.25 -Sinh,4.25 -Size,4.25 -SortArray,4.25 -SortOrder,4.25 -SparkPartitionID,4.25 -SpecifiedWindowFrame,4.25 -Sqrt,4.25 -StartsWith,4.25 -StddevPop,4.25 -StddevSamp,4.25 -StringInstr,4.25 -StringLPad,4.25 -StringLocate,4.25 -StringRPad,4.25 -StringRepeat,4.25 -StringReplace,4.25 -StringSplit,4.25 -StringToMap,4.25 -StringTranslate,4.25 -StringTrim,4.25 -StringTrimLeft,4.25 -StringTrimRight,4.25 -Substring,4.25 -SubstringIndex,4.25 -Subtract,4.25 -Sum,4.25 -Tan,4.25 -Tanh,4.25 -TimeAdd,4.25 -ToDegrees,4.25 -ToRadians,4.25 -ToUnixTimestamp,4.25 -TransformKeys,4.25 -TransformValues,4.25 -UnaryMinus,4.25 -UnaryPositive,4.25 -UnboundedFollowing$,4.25 -UnboundedPreceding$,4.25 -UnixTimestamp,4.25 -UnscaledValue,4.25 -Upper,4.25 -VariancePop,4.25 -VarianceSamp,4.25 -WeekDay,4.25 -WindowExpression,4.25 -WindowSpecDefinition,4.25 -XxHash64,4.25 -Year,4.25 +CoalesceExec,3.65 +CollectLimitExec,3.65 +ExpandExec,3.76 +FileSourceScanExec,2.84 +FilterExec,3.79 +GenerateExec,3.65 +GlobalLimitExec,3.65 +LocalLimitExec,3.65 +ProjectExec,3.65 +RangeExec,3.65 +SampleExec,3.65 +SortExec,3.65 +TakeOrderedAndProjectExec,3.65 +HashAggregateExec,4.1 +ObjectHashAggregateExec,4.1 +SortAggregateExec,4.1 +DataWritingCommandExec,3.65 +ExecutedCommandExec,3.65 +BatchScanExec,2.84 +ShuffleExchangeExec,3.69 +BroadcastHashJoinExec,3.72 +BroadcastNestedLoopJoinExec,1.66 +CartesianProductExec,3.65 +ShuffledHashJoinExec,3.65 +SortMergeJoinExec,5.64 +WindowExec,3.65 +Abs,3.65 +Acos,3.65 +Acosh,3.65 +Add,3.65 +AggregateExpression,3.65 +Alias,3.65 +And,3.65 +ApproximatePercentile,3.65 +ArrayContains,3.65 +ArrayExcept,3.65 +ArrayExists,3.65 +ArrayIntersect,3.65 +ArrayMax,3.65 +ArrayMin,3.65 +ArrayRemove,3.65 +ArrayRepeat,3.65 +ArrayTransform,3.65 +ArrayUnion,3.65 +ArraysOverlap,3.65 +ArraysZip,3.65 +Asin,3.65 +Asinh,3.65 +AtLeastNNonNulls,3.65 +Atan,3.65 +Atanh,3.65 +AttributeReference,3.65 +Average,3.65 +BRound,3.65 +BitLength,3.65 +BitwiseAnd,3.65 +BitwiseNot,3.65 +BitwiseOr,3.65 +BitwiseXor,3.65 +CaseWhen,3.65 +Cbrt,3.65 +Ceil,3.65 +CheckOverflow,3.65 +Coalesce,3.65 +CollectList,3.65 +CollectSet,3.65 +Concat,3.65 +ConcatWs,3.65 +Contains,3.65 +Conv,3.65 +Cos,3.65 +Cosh,3.65 +Cot,3.65 +Count,3.65 +CreateArray,3.65 +CreateMap,3.65 +CreateNamedStruct,3.65 +CurrentRow$,3.65 +DateAdd,3.65 +DateAddInterval,3.65 +DateDiff,3.65 +DateFormatClass,3.65 +DateSub,3.65 +DayOfMonth,3.65 +DayOfWeek,3.65 +DayOfYear,3.65 +DenseRank,3.65 +Divide,3.65 +DynamicPruningExpression,3.65 +ElementAt,3.65 +EndsWith,3.65 +EqualNullSafe,3.65 +EqualTo,3.65 +Exp,3.65 +Explode,3.65 +Expm1,3.65 +First,3.65 +Flatten,3.65 +Floor,3.65 +FromUTCTimestamp,3.65 +FromUnixTime,3.65 +GetArrayItem,3.65 +GetArrayStructFields,3.65 +GetJsonObject,3.65 +GetMapValue,3.65 +GetStructField,3.65 +GetTimestamp,3.65 +GreaterThan,3.65 +GreaterThanOrEqual,3.65 +Greatest,3.65 +HiveGenericUDF,3.65 +HiveSimpleUDF,3.65 +Hour,3.65 +Hypot,3.65 +If,3.65 +In,3.65 +InSet,3.65 +InitCap,3.65 +InputFileBlockLength,3.65 +InputFileBlockStart,3.65 +InputFileName,3.65 +IntegralDivide,3.65 +IsNaN,3.65 +IsNotNull,3.65 +IsNull,3.65 +JsonToStructs,3.65 +JsonTuple,3.65 +KnownFloatingPointNormalized,3.65 +KnownNotNull,3.65 +Lag,3.65 +LambdaFunction,3.65 +Last,3.65 +LastDay,3.65 +Lead,3.65 +Least,3.65 +Length,3.65 +LessThan,3.65 +LessThanOrEqual,3.65 +Like,3.65 +Literal,3.65 +Log,3.65 +Log10,3.65 +Log1p,3.65 +Log2,3.65 +Logarithm,3.65 +Lower,3.65 +MakeDecimal,3.65 +MapConcat,3.65 +MapEntries,3.65 +MapFilter,3.65 +MapKeys,3.65 +MapValues,3.65 +Max,3.65 +Md5,3.65 +MicrosToTimestamp,3.65 +MillisToTimestamp,3.65 +Min,3.65 +Minute,3.65 +MonotonicallyIncreasingID,3.65 +Month,3.65 +Multiply,3.65 +Murmur3Hash,3.65 +NaNvl,3.65 +NamedLambdaVariable,3.65 +NormalizeNaNAndZero,3.65 +Not,3.65 +NthValue,3.65 +OctetLength,3.65 +Or,3.65 +PercentRank,3.65 +PivotFirst,3.65 +Pmod,3.65 +PosExplode,3.65 +Pow,3.65 +PreciseTimestampConversion,3.65 +PromotePrecision,3.65 +PythonUDF,3.65 +Quarter,3.65 +RLike,3.65 +RaiseError,3.65 +Rand,3.65 +Rank,3.65 +RegExpExtract,3.65 +RegExpExtractAll,3.65 +RegExpReplace,3.65 +Remainder,3.65 +ReplicateRows,3.65 +Reverse,3.65 +Rint,3.65 +Round,3.65 +RowNumber,3.65 +ScalaUDF,3.65 +ScalarSubquery,3.65 +Second,3.65 +SecondsToTimestamp,3.65 +Sequence,3.65 +ShiftLeft,3.65 +ShiftRight,3.65 +ShiftRightUnsigned,3.65 +Signum,3.65 +Sin,3.65 +Sinh,3.65 +Size,3.65 +SortArray,3.65 +SortOrder,3.65 +SparkPartitionID,3.65 +SpecifiedWindowFrame,3.65 +Sqrt,3.65 +StartsWith,3.65 +StddevPop,3.65 +StddevSamp,3.65 +StringInstr,3.65 +StringLPad,3.65 +StringLocate,3.65 +StringRPad,3.65 +StringRepeat,3.65 +StringReplace,3.65 +StringSplit,3.65 +StringToMap,3.65 +StringTranslate,3.65 +StringTrim,3.65 +StringTrimLeft,3.65 +StringTrimRight,3.65 +Substring,3.65 +SubstringIndex,3.65 +Subtract,3.65 +Sum,3.65 +Tan,3.65 +Tanh,3.65 +TimeAdd,3.65 +ToDegrees,3.65 +ToRadians,3.65 +ToUnixTimestamp,3.65 +TransformKeys,3.65 +TransformValues,3.65 +UnaryMinus,3.65 +UnaryPositive,3.65 +UnboundedFollowing$,3.65 +UnboundedPreceding$,3.65 +UnixTimestamp,3.65 +UnscaledValue,3.65 +Upper,3.65 +VariancePop,3.65 +VarianceSamp,3.65 +WeekDay,3.65 +WindowExpression,3.65 +WindowSpecDefinition,3.65 +XxHash64,3.65 +Year,3.65 AggregateInPandasExec,1.2 ArrowEvalPythonExec,1.2 FlatMapGroupsInPandasExec,1.2 FlatMapCoGroupsInPandasExec,1.2 MapInPandasExec,1.2 WindowInPandasExec,1.2 +KMeans-pyspark,8.86 +KMeans-scala,1.0 +PCA-pyspark,2.24 +PCA-scala,2.69 +LinearRegression-pyspark,2.0 +LinearRegression-scala,1.0 +RandomForestClassifier-pyspark,6.31 +RandomForestClassifier-scala,1.0 +RandomForestRegressor-pyspark,3.66 +RandomForestRegressor-scala,1.0 +XGBoost-pyspark,1.0 +XGBoost-scala,3.31 diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala index c01586159..d14336618 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/PluginTypeChecker.scala @@ -48,8 +48,7 @@ class PluginTypeChecker(platform: String = "onprem", private val OPERATORS_SCORE_FILE_DATAPROC_T4 = "operatorsScore-dataproc-t4.csv" private val OPERATORS_SCORE_FILE_DATAPROC_L4 = "operatorsScore-dataproc-l4.csv" private val OPERATORS_SCORE_FILE_DATAPROC_SL_L4 = "operatorsScore-dataproc-serverless-l4.csv" - // TODO: Replace this with GKE T4 speedup scores - private val OPERATORS_SCORE_FILE_DATAPROC_GKE_T4 = "operatorsScore.csv" + private val OPERATORS_SCORE_FILE_DATAPROC_GKE_T4 = "operatorsScore-dataproc-gke-t4.csv" private val OPERATORS_SCORE_FILE_EMR_T4 = "operatorsScore-emr-t4.csv" private val OPERATORS_SCORE_FILE_EMR_A10 = "operatorsScore-emr-a10.csv" private val OPERATORS_SCORE_FILE_DATABRICKS_AWS = "operatorsScore-databricks-aws.csv"