From b218a23ba0a1c32915abebfa0839131de88eb7cb Mon Sep 17 00:00:00 2001
From: "Ahmed Hussein (amahussein)"
Date: Fri, 13 Dec 2024 14:40:11 -0600
Subject: [PATCH] Deduplicate calls to aggregateSparkMetricsBySql
Signed-off-by: Ahmed Hussein (amahussein)
Contributes to #1461
AppSparkMetricsAnalyzer was calling `aggregateSparkMetricsBySql` twice.
This code change eleiminates this redundancy to save CPU time and memory
allocations.
---
.../spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala
index 0f43ae8b2..30fb10ac9 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala
@@ -35,12 +35,13 @@ trait AppSparkMetricsAggTrait extends AppIndexMapperTrait {
def getAggRawMetrics(app: AppBase, index: Int, sqlAnalyzer: Option[AppSQLPlanAnalyzer] = None):
AggRawMetricsResult = {
val analysisObj = new AppSparkMetricsAnalyzer(app)
+ val sqlMetricsAgg = analysisObj.aggregateSparkMetricsBySql(index)
AggRawMetricsResult(
analysisObj.aggregateSparkMetricsByJob(index),
analysisObj.aggregateSparkMetricsByStage(index),
analysisObj.shuffleSkewCheck(index),
- analysisObj.aggregateSparkMetricsBySql(index),
- analysisObj.aggregateIOMetricsBySql(analysisObj.aggregateSparkMetricsBySql(index)),
+ sqlMetricsAgg,
+ analysisObj.aggregateIOMetricsBySql(sqlMetricsAgg),
analysisObj.aggregateDurationAndCPUTimeBySql(index),
Seq(analysisObj.maxTaskInputSizeBytesPerSQL(index)),
analysisObj.aggregateDiagnosticMetricsByStage(index, sqlAnalyzer))