Skip to content

Commit

Permalink
Merge dev into main
Browse files Browse the repository at this point in the history
Signed-off-by: spark-rapids automation <[email protected]>
  • Loading branch information
nvauto committed Dec 12, 2024
2 parents 217bf7b + 8df1407 commit 7d3a64a
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 7 deletions.
2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<artifactId>rapids-4-spark-tools_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark tools</name>
<description>RAPIDS Accelerator for Apache Spark tools</description>
<version>24.10.2</version>
<version>24.10.3-SNAPSHOT</version>
<packaging>jar</packaging>
<url>http://github.com/NVIDIA/spark-rapids-tools</url>

Expand Down
2 changes: 1 addition & 1 deletion user_tools/src/spark_rapids_pytools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from spark_rapids_pytools.build import get_version, get_spark_dep_version

VERSION = '24.10.2'
VERSION = '24.10.3'
# defines the default runtime build version for the user tools environment
SPARK_DEP_VERSION = '350'
__version__ = get_version(VERSION)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@ def _read_csv_files(self) -> None:
'toolOutput', 'csv', 'unsupportedOperatorsReport', 'fileName')
rapids_unsupported_operators_file = FSUtil.build_path(
qual_output_dir, unsupported_operator_report_file)
self.unsupported_operators_df = pd.read_csv(rapids_unsupported_operators_file)
# load the unsupported operators and drop operators that have no names.
self.unsupported_operators_df = (
pd.read_csv(rapids_unsupported_operators_file,
dtype={'Unsupported Operator': str})).dropna(subset=['Unsupported Operator'])

stages_report_file = self.ctxt.get_value('toolOutput', 'csv', 'stagesInformation',
'fileName')
Expand All @@ -84,7 +87,14 @@ def _read_csv_files(self) -> None:

rapids_execs_file = self.ctxt.get_value('toolOutput', 'csv', 'execsInformation',
'fileName')
self.execs_df = pd.read_csv(FSUtil.build_path(qual_output_dir, rapids_execs_file))
# Load the execs CSV file and drop execs that have no stages or name
self.execs_df = (
pd.read_csv(FSUtil.build_path(qual_output_dir, rapids_execs_file),
dtype={'Exec Name': str,
'Exec Stages': str,
'Exec Children': str,
'Exec Children Node Ids': str})
.dropna(subset=['Exec Stages', 'Exec Name']))
self.logger.info('Reading CSV files completed.')

def _convert_durations(self) -> None:
Expand All @@ -103,7 +113,6 @@ def _preprocess_dataframes(self) -> None:
# from this dataframe can be matched with the stageID of stages dataframe
self.execs_df['Exec Stages'] = self.execs_df['Exec Stages'].str.split(':')
self.execs_df = (self.execs_df.explode('Exec Stages').
dropna(subset=['Exec Stages']).
rename(columns={'Exec Stages': 'Stage ID'}))
self.execs_df['Stage ID'] = self.execs_df['Stage ID'].astype(int)

Expand Down
4 changes: 2 additions & 2 deletions user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def combine_tables(table_name: str) -> pd.DataFrame:

# normalize WholeStageCodegen labels
ops_tbl.loc[
ops_tbl['nodeName'].str.startswith('WholeStageCodegen'), 'nodeName'
ops_tbl['nodeName'].astype(str).str.startswith('WholeStageCodegen'), 'nodeName'
] = 'WholeStageCodegen'

# format WholeStageCodegen for merging
Expand Down Expand Up @@ -1140,7 +1140,7 @@ def _is_ignore_no_perf(action: str) -> bool:
node_level_supp['Exec Is Supported'] = (
node_level_supp['Exec Is Supported']
| node_level_supp['Action'].apply(_is_ignore_no_perf)
| node_level_supp['Exec Name'].apply(
| node_level_supp['Exec Name'].astype(str).apply(
lambda x: x.startswith('WholeStageCodegen')
)
)
Expand Down

0 comments on commit 7d3a64a

Please sign in to comment.