From 7dcd26ce299e06bfb0c903b9e0712702b1a2b2fb Mon Sep 17 00:00:00 2001 From: mattahrens Date: Thu, 2 Nov 2023 16:55:31 -0500 Subject: [PATCH] Simplifying changes for only Dataproc and not Dataproc GKE Signed-off-by: mattahrens --- user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py | 8 +++++--- .../src/spark_rapids_pytools/cloud_api/dataproc_gke.py | 3 ++- user_tools/src/spark_rapids_pytools/cloud_api/onprem.py | 6 ++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py index 45984e1bb..15d466ab9 100644 --- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py +++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py @@ -534,16 +534,18 @@ def _calculate_group_cost(self, cluster_inst: ClusterGetAccessor, node_type: Spa mem_gb = float(mem_mb) / 1024 cores_cost = self.price_provider.get_cpu_price(node_mc_type) * int(cores_count) memory_cost = self.price_provider.get_ram_price(node_mc_type) * mem_gb - dataproc_cost = self.price_provider.get_container_cost() * int(cores_count) # calculate the GPU cost gpu_per_machine, gpu_type = cluster_inst.get_gpu_per_node(node_type) gpu_cost = 0.0 if gpu_per_machine > 0: gpu_unit_price = self.price_provider.get_gpu_price(gpu_type) gpu_cost = gpu_unit_price * gpu_per_machine - return nodes_cnt * (cores_cost + memory_cost + dataproc_cost + gpu_cost) + return nodes_cnt * (cores_cost + memory_cost + gpu_cost) def _get_cost_per_cluster(self, cluster: ClusterGetAccessor): master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER) workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER) - return master_cost + workers_cost + master_cores = cluster.get_nodes_cnt(SparkNodeType.MASTER) * cluster.get_node_core_count(SparkNodeType.MASTER) + worker_cores = cluster.get_nodes_cnt(SparkNodeType.WORKER) * cluster.get_node_core_count(SparkNodeType.WORKER) + dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores) + return master_cost + workers_cost + dataproc_cost diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py index 65ad0754c..06a71d342 100644 --- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py +++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py @@ -196,4 +196,5 @@ class DataprocGkeSavingsEstimator(DataprocSavingsEstimator): def _get_cost_per_cluster(self, cluster: ClusterGetAccessor): master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER) workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER) - return master_cost + workers_cost + dataproc_gke_cost = self.price_provider.get_container_cost() + return master_cost + workers_cost + dataproc_gke_cost diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py index 1e2b46e80..cbd28ae5e 100644 --- a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py +++ b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py @@ -300,7 +300,6 @@ def __calculate_dataproc_group_cost(self, cluster_inst: ClusterGetAccessor, node cores_cost = self.price_provider.get_cpu_price(node_mc_type) * int(cores_count) memory_cost = self.price_provider.get_ram_price(node_mc_type) * mem_gb - dataproc_cost = self.price_provider.get_container_cost() * int(cores_count) # calculate the GPU cost gpu_per_machine, gpu_type = cluster_inst.get_gpu_per_node(node_type) gpu_cost = 0.0 @@ -313,5 +312,8 @@ def _get_cost_per_cluster(self, cluster: ClusterGetAccessor): if self.price_provider.name.casefold() == 'dataproc': master_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.MASTER) workers_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.WORKER) - total_cost = master_cost + workers_cost + master_cores = cluster.get_nodes_cnt(SparkNodeType.MASTER) * cluster.get_node_core_count(SparkNodeType.MASTER) + worker_cores = cluster.get_nodes_cnt(SparkNodeType.WORKER) * cluster.get_node_core_count(SparkNodeType.WORKER) + dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores) + total_cost = master_cost + workers_cost + dataproc_cost return total_cost