From b218a23ba0a1c32915abebfa0839131de88eb7cb Mon Sep 17 00:00:00 2001 From: "Ahmed Hussein (amahussein)" Date: Fri, 13 Dec 2024 14:40:11 -0600 Subject: [PATCH] Deduplicate calls to aggregateSparkMetricsBySql Signed-off-by: Ahmed Hussein (amahussein) Contributes to #1461 AppSparkMetricsAnalyzer was calling `aggregateSparkMetricsBySql` twice. This code change eleiminates this redundancy to save CPU time and memory allocations. --- .../spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala index 0f43ae8b2..30fb10ac9 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala @@ -35,12 +35,13 @@ trait AppSparkMetricsAggTrait extends AppIndexMapperTrait { def getAggRawMetrics(app: AppBase, index: Int, sqlAnalyzer: Option[AppSQLPlanAnalyzer] = None): AggRawMetricsResult = { val analysisObj = new AppSparkMetricsAnalyzer(app) + val sqlMetricsAgg = analysisObj.aggregateSparkMetricsBySql(index) AggRawMetricsResult( analysisObj.aggregateSparkMetricsByJob(index), analysisObj.aggregateSparkMetricsByStage(index), analysisObj.shuffleSkewCheck(index), - analysisObj.aggregateSparkMetricsBySql(index), - analysisObj.aggregateIOMetricsBySql(analysisObj.aggregateSparkMetricsBySql(index)), + sqlMetricsAgg, + analysisObj.aggregateIOMetricsBySql(sqlMetricsAgg), analysisObj.aggregateDurationAndCPUTimeBySql(index), Seq(analysisObj.maxTaskInputSizeBytesPerSQL(index)), analysisObj.aggregateDiagnosticMetricsByStage(index, sqlAnalyzer))