From 1d1f0fb167ca88d83dab6b1f70311aadc8603ffe Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Tue, 3 Dec 2024 15:33:12 -0800 Subject: [PATCH] qualx related changes --- .../tools/qualx/preprocess.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py b/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py index 320615b7c..2961f9f90 100644 --- a/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py +++ b/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py @@ -109,6 +109,10 @@ 'sqlOp_DeserializeToObject', 'sqlOp_Exchange', 'sqlOp_Execute InsertIntoHadoopFsRelationCommand', + 'sqlOp_Execute InsertIntoHadoopFsRelationCommand csv', + 'sqlOp_Execute InsertIntoHadoopFsRelationCommand parquet', + 'sqlOp_Execute InsertIntoHadoopFsRelationCommand orc', + 'sqlOp_Execute InsertIntoHadoopFsRelationCommand json', 'sqlOp_Expand', 'sqlOp_Filter', 'sqlOp_Generate', @@ -125,15 +129,16 @@ 'sqlOp_Project', 'sqlOp_ReusedSort', 'sqlOp_RunningWindowFunction', - 'sqlOp_Scan csv ', + 'sqlOp_Scan csv', 'sqlOp_Scan ExistingRDD Delta Table Checkpoint', 'sqlOp_Scan ExistingRDD Delta Table State', - 'sqlOp_Scan JDBCRelation', - 'sqlOp_Scan json ', + 'sqlOp_Scan ExistingRDD', + 'sqlOp_Scan jdbc', + 'sqlOp_Scan json', 'sqlOp_Scan OneRowRelation', - 'sqlOp_Scan orc ', - 'sqlOp_Scan parquet ', - 'sqlOp_Scan text ', + 'sqlOp_Scan orc', + 'sqlOp_Scan parquet', + 'sqlOp_Scan text', 'sqlOp_SerializeFromObject', 'sqlOp_Sort', 'sqlOp_SortAggregate', @@ -481,8 +486,8 @@ def combine_tables(table_name: str) -> pd.DataFrame: 'Scan DeltaCDFRelation', 'Scan ExistingRDD Delta Table Checkpoint', 'Scan ExistingRDD Delta Table State', - 'Scan JDBCRelation', - 'Scan parquet ', # trailing space is also in default sql op name + 'Scan jdbc', + 'Scan parquet', # GPU 'GpuScan parquet', ]