Skip to content

Commit

Permalink
Simplifying changes for only Dataproc and not Dataproc GKE
Browse files Browse the repository at this point in the history
Signed-off-by: mattahrens <[email protected]>
  • Loading branch information
mattahrens committed Nov 2, 2023
1 parent 5915598 commit 7dcd26c
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 6 deletions.
8 changes: 5 additions & 3 deletions user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,16 +534,18 @@ def _calculate_group_cost(self, cluster_inst: ClusterGetAccessor, node_type: Spa
mem_gb = float(mem_mb) / 1024
cores_cost = self.price_provider.get_cpu_price(node_mc_type) * int(cores_count)
memory_cost = self.price_provider.get_ram_price(node_mc_type) * mem_gb
dataproc_cost = self.price_provider.get_container_cost() * int(cores_count)
# calculate the GPU cost
gpu_per_machine, gpu_type = cluster_inst.get_gpu_per_node(node_type)
gpu_cost = 0.0
if gpu_per_machine > 0:
gpu_unit_price = self.price_provider.get_gpu_price(gpu_type)
gpu_cost = gpu_unit_price * gpu_per_machine
return nodes_cnt * (cores_cost + memory_cost + dataproc_cost + gpu_cost)
return nodes_cnt * (cores_cost + memory_cost + gpu_cost)

def _get_cost_per_cluster(self, cluster: ClusterGetAccessor):
master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER)
workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER)
return master_cost + workers_cost
master_cores = cluster.get_nodes_cnt(SparkNodeType.MASTER) * cluster.get_node_core_count(SparkNodeType.MASTER)
worker_cores = cluster.get_nodes_cnt(SparkNodeType.WORKER) * cluster.get_node_core_count(SparkNodeType.WORKER)
dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores)
return master_cost + workers_cost + dataproc_cost
Original file line number Diff line number Diff line change
Expand Up @@ -196,4 +196,5 @@ class DataprocGkeSavingsEstimator(DataprocSavingsEstimator):
def _get_cost_per_cluster(self, cluster: ClusterGetAccessor):
master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER)
workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER)
return master_cost + workers_cost
dataproc_gke_cost = self.price_provider.get_container_cost()
return master_cost + workers_cost + dataproc_gke_cost
6 changes: 4 additions & 2 deletions user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,6 @@ def __calculate_dataproc_group_cost(self, cluster_inst: ClusterGetAccessor, node

cores_cost = self.price_provider.get_cpu_price(node_mc_type) * int(cores_count)
memory_cost = self.price_provider.get_ram_price(node_mc_type) * mem_gb
dataproc_cost = self.price_provider.get_container_cost() * int(cores_count)
# calculate the GPU cost
gpu_per_machine, gpu_type = cluster_inst.get_gpu_per_node(node_type)
gpu_cost = 0.0
Expand All @@ -313,5 +312,8 @@ def _get_cost_per_cluster(self, cluster: ClusterGetAccessor):
if self.price_provider.name.casefold() == 'dataproc':
master_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.MASTER)
workers_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.WORKER)
total_cost = master_cost + workers_cost
master_cores = cluster.get_nodes_cnt(SparkNodeType.MASTER) * cluster.get_node_core_count(SparkNodeType.MASTER)
worker_cores = cluster.get_nodes_cnt(SparkNodeType.WORKER) * cluster.get_node_core_count(SparkNodeType.WORKER)
dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores)
total_cost = master_cost + workers_cost + dataproc_cost
return total_cost

0 comments on commit 7dcd26c

Please sign in to comment.