From 67200e971e4a2ffc51317ae6232fd1c3a6c63f6f Mon Sep 17 00:00:00 2001 From: Matt Ahrens Date: Fri, 3 Nov 2023 14:39:49 -0500 Subject: [PATCH] Updating dataproc container cost to be multiplied by number of cores (#648) * Updating dataproc container cost to be multiplied by number of cores Signed-off-by: mattahrens * Simplifying changes for only Dataproc and not Dataproc GKE Signed-off-by: mattahrens * Fixing bug with extraneous dataproc_cost reference Signed-off-by: mattahrens * Fixing pylint with lines too long Signed-off-by: mattahrens * Fixing flake issue with indentation Signed-off-by: mattahrens * Fixing flake issue with indentation Signed-off-by: mattahrens * Fixing flake issue with whitespace Signed-off-by: mattahrens * Fixing Dataproc GKE costs for dataproc container Signed-off-by: mattahrens * Fixing Dataproc GKE costs for dataproc container Signed-off-by: mattahrens --------- Signed-off-by: mattahrens --- user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py | 4 +++- .../src/spark_rapids_pytools/cloud_api/dataproc_gke.py | 5 ++--- user_tools/src/spark_rapids_pytools/cloud_api/onprem.py | 6 +++++- .../spark_rapids_pytools/pricing/dataproc_gke_pricing.py | 3 +-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py index d3a24e648..15d466ab9 100644 --- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py +++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc.py @@ -545,5 +545,7 @@ def _calculate_group_cost(self, cluster_inst: ClusterGetAccessor, node_type: Spa def _get_cost_per_cluster(self, cluster: ClusterGetAccessor): master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER) workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER) - dataproc_cost = self.price_provider.get_container_cost() + master_cores = cluster.get_nodes_cnt(SparkNodeType.MASTER) * cluster.get_node_core_count(SparkNodeType.MASTER) + worker_cores = cluster.get_nodes_cnt(SparkNodeType.WORKER) * cluster.get_node_core_count(SparkNodeType.WORKER) + dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores) return master_cost + workers_cost + dataproc_cost diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py index 06a71d342..6039914bd 100644 --- a/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py +++ b/user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke.py @@ -194,7 +194,6 @@ class DataprocGkeSavingsEstimator(DataprocSavingsEstimator): """ def _get_cost_per_cluster(self, cluster: ClusterGetAccessor): - master_cost = self._calculate_group_cost(cluster, SparkNodeType.MASTER) - workers_cost = self._calculate_group_cost(cluster, SparkNodeType.WORKER) + dataproc_cost = super()._get_cost_per_cluster(cluster) dataproc_gke_cost = self.price_provider.get_container_cost() - return master_cost + workers_cost + dataproc_gke_cost + return dataproc_cost + dataproc_gke_cost diff --git a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py index 28584617b..c2fc5f19b 100644 --- a/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py +++ b/user_tools/src/spark_rapids_pytools/cloud_api/onprem.py @@ -312,6 +312,10 @@ def _get_cost_per_cluster(self, cluster: ClusterGetAccessor): if self.price_provider.name.casefold() == 'dataproc': master_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.MASTER) workers_cost = self.__calculate_dataproc_group_cost(cluster, SparkNodeType.WORKER) - dataproc_cost = self.price_provider.get_container_cost() + master_cores = (cluster.get_nodes_cnt(SparkNodeType.MASTER) + * cluster.get_node_core_count(SparkNodeType.MASTER)) + worker_cores = (cluster.get_nodes_cnt(SparkNodeType.WORKER) + * cluster.get_node_core_count(SparkNodeType.WORKER)) + dataproc_cost = self.price_provider.get_container_cost() * (master_cores + worker_cores) total_cost = master_cost + workers_cost + dataproc_cost return total_cost diff --git a/user_tools/src/spark_rapids_pytools/pricing/dataproc_gke_pricing.py b/user_tools/src/spark_rapids_pytools/pricing/dataproc_gke_pricing.py index 5c01d3296..3c06e370b 100644 --- a/user_tools/src/spark_rapids_pytools/pricing/dataproc_gke_pricing.py +++ b/user_tools/src/spark_rapids_pytools/pricing/dataproc_gke_pricing.py @@ -27,9 +27,8 @@ class DataprocGkePriceProvider(DataprocPriceProvider): name = 'DataprocGke' def get_container_cost(self) -> float: - dataproc_cost = super().get_container_cost() gke_container_cost = self.__get_gke_container_cost() - return dataproc_cost + gke_container_cost + return gke_container_cost def __get_gke_container_cost(self) -> float: lookup_key = 'CP-GKE-CONTAINER-MANAGMENT-COST'