Simplifying changes for only Dataproc and not Dataproc GKE

Signed-off-by: mattahrens <[email protected]>
NVIDIA · Nov 2, 2023 · 7dcd26c · 7dcd26c
1 parent 5915598
commit 7dcd26c
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 6 deletions.
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py
@@ -534,16 +534,18 @@ def _calculate_group_cost(self, cluster_inst: ClusterGetAccessor, node_type: Spa
         mem_gb = float(mem_mb) / 1024
         cores_cost = self.price_provider.get_cpu_price(node_mc_type) * int(cores_count)
         memory_cost = self.price_provider.get_ram_price(node_mc_type) * mem_gb
-        dataproc_cost = self.price_provider.get_container_cost() * int(cores_count)
         # calculate the GPU cost
         gpu_per_machine, gpu_type = cluster_inst.get_gpu_per_node(node_type)
         gpu_cost = 0.0
         if gpu_per_machine > 0:
             gpu_unit_price = self.price_provider.get_gpu_price(gpu_type)
             gpu_cost = gpu_unit_price * gpu_per_machine
-        return nodes_cnt * (cores_cost + memory_cost + dataproc_cost + gpu_cost)
+        return nodes_cnt * (cores_cost + memory_cost + gpu_cost)
 
     def _get_cost_per_cluster(self, cluster: ClusterGetAccessor):
         master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER)
         workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER)
-        return master_cost + workers_cost
+        master_cores = cluster.get_nodes_cnt(SparkNodeType.MASTER) * cluster.get_node_core_count(SparkNodeType.MASTER)
+        worker_cores = cluster.get_nodes_cnt(SparkNodeType.WORKER) * cluster.get_node_core_count(SparkNodeType.WORKER)
+        dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores)
+        return master_cost + workers_cost + dataproc_cost
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py
@@ -196,4 +196,5 @@ class DataprocGkeSavingsEstimator(DataprocSavingsEstimator):
     def _get_cost_per_cluster(self, cluster: ClusterGetAccessor):
         master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER)
         workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER)
-        return master_cost + workers_cost
+        dataproc_gke_cost = self.price_provider.get_container_cost()
+        return master_cost + workers_cost + dataproc_gke_cost
diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
@@ -300,7 +300,6 @@ def __calculate_dataproc_group_cost(self, cluster_inst: ClusterGetAccessor, node
 
         cores_cost = self.price_provider.get_cpu_price(node_mc_type) * int(cores_count)
         memory_cost = self.price_provider.get_ram_price(node_mc_type) * mem_gb
-        dataproc_cost = self.price_provider.get_container_cost() * int(cores_count)
         # calculate the GPU cost
         gpu_per_machine, gpu_type = cluster_inst.get_gpu_per_node(node_type)
         gpu_cost = 0.0
@@ -313,5 +312,8 @@ def _get_cost_per_cluster(self, cluster: ClusterGetAccessor):
         if self.price_provider.name.casefold() == 'dataproc':
             master_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.MASTER)
             workers_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.WORKER)
-            total_cost = master_cost + workers_cost
+            master_cores = cluster.get_nodes_cnt(SparkNodeType.MASTER) * cluster.get_node_core_count(SparkNodeType.MASTER)
+            worker_cores = cluster.get_nodes_cnt(SparkNodeType.WORKER) * cluster.get_node_core_count(SparkNodeType.WORKER)
+            dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores)
+            total_cost = master_cost + workers_cost + dataproc_cost
         return total_cost