Skip to content

Commit

Permalink
Updating dataproc container cost to be multiplied by number of cores (#…
Browse files Browse the repository at this point in the history
…648)

* Updating dataproc container cost to be multiplied by number of cores

Signed-off-by: mattahrens <[email protected]>

* Simplifying changes for only Dataproc and not Dataproc GKE

Signed-off-by: mattahrens <[email protected]>

* Fixing bug with extraneous dataproc_cost reference

Signed-off-by: mattahrens <[email protected]>

* Fixing pylint with lines too long

Signed-off-by: mattahrens <[email protected]>

* Fixing flake issue with indentation

Signed-off-by: mattahrens <[email protected]>

* Fixing flake issue with indentation

Signed-off-by: mattahrens <[email protected]>

* Fixing flake issue with whitespace

Signed-off-by: mattahrens <[email protected]>

* Fixing Dataproc GKE costs for dataproc container

Signed-off-by: mattahrens <[email protected]>

* Fixing Dataproc GKE costs for dataproc container

Signed-off-by: mattahrens <[email protected]>

---------

Signed-off-by: mattahrens <[email protected]>
  • Loading branch information
mattahrens authored Nov 3, 2023
1 parent 2f1a9fb commit 67200e9
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 7 deletions.
4 changes: 3 additions & 1 deletion user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,5 +545,7 @@ def _calculate_group_cost(self, cluster_inst: ClusterGetAccessor, node_type: Spa
def _get_cost_per_cluster(self, cluster: ClusterGetAccessor):
master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER)
workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER)
dataproc_cost = self.price_provider.get_container_cost()
master_cores = cluster.get_nodes_cnt(SparkNodeType.MASTER) * cluster.get_node_core_count(SparkNodeType.MASTER)
worker_cores = cluster.get_nodes_cnt(SparkNodeType.WORKER) * cluster.get_node_core_count(SparkNodeType.WORKER)
dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores)
return master_cost + workers_cost + dataproc_cost
5 changes: 2 additions & 3 deletions user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ class DataprocGkeSavingsEstimator(DataprocSavingsEstimator):
"""

def _get_cost_per_cluster(self, cluster: ClusterGetAccessor):
master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER)
workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER)
dataproc_cost = super()._get_cost_per_cluster(cluster)
dataproc_gke_cost = self.price_provider.get_container_cost()
return master_cost + workers_cost + dataproc_gke_cost
return dataproc_cost + dataproc_gke_cost
6 changes: 5 additions & 1 deletion user_tools/src/spark_rapids_pytools/cloud_api/onprem.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,10 @@ def _get_cost_per_cluster(self, cluster: ClusterGetAccessor):
if self.price_provider.name.casefold() == 'dataproc':
master_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.MASTER)
workers_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.WORKER)
dataproc_cost = self.price_provider.get_container_cost()
master_cores = (cluster.get_nodes_cnt(SparkNodeType.MASTER)
* cluster.get_node_core_count(SparkNodeType.MASTER))
worker_cores = (cluster.get_nodes_cnt(SparkNodeType.WORKER)
* cluster.get_node_core_count(SparkNodeType.WORKER))
dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores)
total_cost = master_cost + workers_cost + dataproc_cost
return total_cost
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ class DataprocGkePriceProvider(DataprocPriceProvider):
name = 'DataprocGke'

def get_container_cost(self) -> float:
dataproc_cost = super().get_container_cost()
gke_container_cost = self.__get_gke_container_cost()
return dataproc_cost + gke_container_cost
return gke_container_cost

def __get_gke_container_cost(self) -> float:
lookup_key = 'CP-GKE-CONTAINER-MANAGMENT-COST'
Expand Down

0 comments on commit 67200e9

Please sign in to comment.