From 7c7afc0bb11a8ef4ff7f837e5f8a5736d5f9aa7d Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Thu, 26 Oct 2023 18:01:52 -0700 Subject: [PATCH] Profiling tool : Update readSchema string parser Signed-off-by: Niranjan Artal --- .../org/apache/spark/sql/rapids/tool/AppBase.scala | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/AppBase.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/AppBase.scala index 769389371..527b7de58 100644 --- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/AppBase.scala +++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/AppBase.scala @@ -272,6 +272,15 @@ abstract class AppBase( } } + private def trimSchema(str: String): String = { + val index = str.lastIndexOf(",") + if (index != -1 && str.contains("...")) { + str.substring(0, index) + } else { + str + } + } + // The ReadSchema metadata is only in the eventlog for DataSource V1 readers protected def checkMetadataForReadSchema(sqlID: Long, planInfo: SparkPlanInfo): Unit = { // check if planInfo has ReadSchema @@ -284,7 +293,7 @@ abstract class AppBase( val readSchema = ReadParser.formatSchemaStr(meta.getOrElse("ReadSchema", "")) val scanNode = allNodes.filter(node => { // Get ReadSchema of each Node and sanitize it for comparison - val trimmedNode = ReadParser.parseReadNode(node).schema.replace("...", "") + val trimmedNode = trimSchema(ReadParser.parseReadNode(node).schema) readSchema.contains(trimmedNode) }).filter(x => x.name.startsWith("Scan")).head