From 6bd038c7389f01d660ad799793ef03c9763c6ba3 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Mon, 23 Oct 2023 15:36:02 -0700 Subject: [PATCH 1/2] Fix extraction of zone from cluster properties in Dataproc user tools Signed-off-by: Partho Sarthi --- .../src/spark_rapids_pytools/cloud_api/dataproc.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py index bd17016bd..e1ead67d0 100644 --- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py +++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py @@ -422,10 +422,20 @@ def _init_nodes(self): SparkNodeType.MASTER: master_node } + def _set_zone_from_props(self, prop_container: JSONPropertiesContainer): + """ + Extracts the 'zoneUri' from the properties container and updates the environment variable dictionary. + """ + if prop_container: + zone_uri = prop_container.get_value_silent('config', 'gceClusterConfig', 'zoneUri') + if zone_uri: + self.cli.env_vars['zone'] = FSUtil.get_resource_name(zone_uri) + def _init_connection(self, cluster_id: str = None, props: str = None) -> dict: cluster_args = super()._init_connection(cluster_id=cluster_id, props=props) - # propagate zone to the cluster + # extract and update zone to the environment variable and cluster + self._set_zone_from_props(cluster_args['props']) cluster_args.setdefault('zone', self.cli.get_env_var('zone')) return cluster_args @@ -514,6 +524,7 @@ class DataprocSavingsEstimator(SavingsEstimator): """ A class that calculates the savings based on Dataproc price provider """ + def _calculate_group_cost(self, cluster_inst: ClusterGetAccessor, node_type: SparkNodeType): nodes_cnt = cluster_inst.get_nodes_cnt(node_type) cores_count = cluster_inst.get_node_core_count(node_type) From 1ebc6acbdbae9533e4db1cef4db5bd6b489ecc50 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Mon, 23 Oct 2023 15:45:29 -0700 Subject: [PATCH 2/2] Add comments and fix linting Signed-off-by: Partho Sarthi --- user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py index e1ead67d0..d3a24e648 100644 --- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py +++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py @@ -434,8 +434,9 @@ def _set_zone_from_props(self, prop_container: JSONPropertiesContainer): def _init_connection(self, cluster_id: str = None, props: str = None) -> dict: cluster_args = super()._init_connection(cluster_id=cluster_id, props=props) - # extract and update zone to the environment variable and cluster + # extract and update zone to the environment variable self._set_zone_from_props(cluster_args['props']) + # propagate zone to the cluster cluster_args.setdefault('zone', self.cli.get_env_var('zone')) return cluster_args @@ -524,7 +525,6 @@ class DataprocSavingsEstimator(SavingsEstimator): """ A class that calculates the savings based on Dataproc price provider """ - def _calculate_group_cost(self, cluster_inst: ClusterGetAccessor, node_type: SparkNodeType): nodes_cnt = cluster_inst.get_nodes_cnt(node_type) cores_count = cluster_inst.get_node_core_count(node_type)