Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix future warnings for pandas>=2.2 #1494

Merged
merged 2 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions user_tools/src/spark_rapids_pytools/rapids/qualification.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -555,9 +555,9 @@ def __update_apps_with_prediction_info(self,
# Rename the source column to the destination column
result_df.rename(columns={src_col: dst_col}, errors='ignore', inplace=True)
# if the qualx does not have a speedup value, default to 1.0
result_df['Estimated GPU Speedup'].fillna(1.0, inplace=True)
result_df.fillna({'Estimated GPU Speedup': 1.0}, inplace=True)
# if the qualx does not have a duration value, default to App Duration
result_df['Estimated GPU Duration'].fillna(result_df['App Duration'], inplace=True)
result_df.fillna({'Estimated GPU Duration': result_df['App Duration']}, inplace=True)
# We need to be careful about other columns that depend on remapped columns
result_df['Estimated GPU Time Saved'] = result_df['App Duration'] - result_df['Estimated GPU Duration']
return result_df
Expand Down
4 changes: 2 additions & 2 deletions user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -810,7 +810,7 @@ def scan_tbl(

if not app_info.empty:
app_info['appName'] = app_name
app_info['sparkVersion'].fillna('Unknown', inplace=True)
app_info.fillna({'sparkVersion': 'Unknown'}, inplace=True)

# Get jar versions:
cudf_version = '-'
Expand Down
11 changes: 6 additions & 5 deletions user_tools/src/spark_rapids_tools/tools/top_candidates.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -101,13 +101,14 @@ def _generate_output_table_internal(self, output_df: pd.DataFrame) -> pd.DataFra
"""
# Create and append 'Speedup Category Order' column to output_df for sorting order
speedup_category_order = self.props.get('ineligibleCategory') + self.props.get('eligibleCategories')
output_df['Speedup Category Order'] = \
output_df['Estimated GPU Speedup Category'].map({name: i for i, name in enumerate(speedup_category_order)})
df = output_df.copy()
df['Speedup Category Order'] = \
df['Estimated GPU Speedup Category'].map({name: i for i, name in enumerate(speedup_category_order)})
# Sort columns and select output columns
output_columns = self.props.get('outputColumns')
sorting_columns = self.props.get('sortingColumns')
valid_output_columns = list(output_df.columns.intersection(output_columns))
res_df = output_df.sort_values(by=sorting_columns, ascending=False)[valid_output_columns]
valid_output_columns = list(df.columns.intersection(output_columns))
res_df = df.sort_values(by=sorting_columns, ascending=False)[valid_output_columns]
# this is a bit weird since hardcoding, but we don't want this to have ** for csv output
if 'Estimated GPU Speedup Category' in res_df:
res_df.rename(columns={'Estimated GPU Speedup Category': 'Estimated GPU Speedup Category**'},
Expand Down
Loading