Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unit tests for Dataproc GKE with mock GKE cluster #618

Merged
merged 1 commit into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions user_tools/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ dependencies = [
"pygments==2.15.0",
# used to apply validator on objects and models
"pydantic==2.1.1",
# used to help pylint understand pydantic
"pylint-pydantic==0.3.0",
# used for common API to access remote filesystems like local/s3/gcs/hdfs
# this will include numpy
"pyarrow==12.0.1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,35 @@
"""

from typing import ClassVar, Type
from pydantic import field_validator

from spark_rapids_tools.cloud.cluster import ClientCluster, register_client_cluster, ClusterPropMgr, register_cluster_prop_mgr
from spark_rapids_tools.cloud.cluster import ClientCluster, register_client_cluster, ClusterPropMgr, \
register_cluster_prop_mgr
from spark_rapids_tools.utils.propmanager import PropValidatorSchemaCamel, PropValidatorSchema


class DataprocClusterSchema(PropValidatorSchemaCamel):
class DataprocClusterSchema(PropValidatorSchemaCamel): # pylint: disable=missing-class-docstring)
cluster_name: str
cluster_uuid: str
project_id: str
config: dict

@field_validator('config')
def validate_config(cls, config: dict) -> dict:
"""
Validates the cluster config to ensure it is for GCE instead of GKE.
"""
if 'gceClusterConfig' not in config:
raise ValueError("'gceClusterConfig' key is missing in config.")
return config


class DataprocGkeClusterSchema(PropValidatorSchemaCamel):
cluster_name: str
cluster_uuid: str
project_id: str
config: dict
virtual_cluster_config: dict
nartal1 marked this conversation as resolved.
Show resolved Hide resolved


@register_cluster_prop_mgr('dataproc')
Expand All @@ -42,7 +54,7 @@ class DataprocClusterPropMgr(ClusterPropMgr):


@register_client_cluster('dataproc')
class DataprocClientCluster(ClientCluster): # pylint: disable=too-few-public-methods
class DataprocClientCluster(ClientCluster): # pylint: disable=too-few-public-methods
pass


Expand All @@ -52,5 +64,5 @@ class DataprocGkeClusterPropMgr(ClusterPropMgr):


@register_client_cluster('dataproc_gke')
class DataprocGkeClientCluster(ClientCluster): # pylint: disable=too-few-public-methods
class DataprocGkeClientCluster(ClientCluster): # pylint: disable=too-few-public-methods
pass
3 changes: 2 additions & 1 deletion user_tools/tests/spark_rapids_tools_ut/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def get_test_resources_path():
def gen_cpu_cluster_props():
return [
('dataproc', 'cluster/dataproc/cpu-00.yaml'),
('dataproc_gke', 'cluster/dataproc_gke/cpu-00.yaml'),
('emr', 'cluster/emr/cpu-00.json'),
('onprem', 'cluster/onprem/cpu-00.yaml'),
('databricks_aws', 'cluster/databricks/aws-cpu-00.json'),
Expand All @@ -43,7 +44,7 @@ def gen_cpu_cluster_props():
# all cpu_cluster_props except the onPrem
csp_cpu_cluster_props = [(e_1, e_2) for (e_1, e_2) in all_cpu_cluster_props if e_1 != 'onprem']
# all csps except onprem
csps = ['dataproc', 'emr', 'databricks_aws', 'databricks_azure']
csps = ['dataproc', 'dataproc_gke', 'emr', 'databricks_aws', 'databricks_azure']
all_csps = csps + ['onprem']


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
clusterName: dataproc-gke-test-nongpu-cluster
clusterUuid: 11111111-1111-1111-1111-111111111111
config:
softwareConfig: {}
labels:
goog-dataproc-cluster-name: dataproc-gke-test-nongpu-cluster
goog-dataproc-cluster-uuid: 11111111-1111-1111-1111-111111111111
goog-dataproc-location: us-central1
projectId: dataproc-gke-project
status:
state: RUNNING
stateStartTime: '2022-12-06T23:21:07.637345Z'
statusHistory:
- state: CREATING
stateStartTime: '2022-11-08T18:02:00.300481Z'
virtualClusterConfig:
auxiliaryServicesConfig:
sparkHistoryServerConfig:
dataprocCluster: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-phs-test
kubernetesClusterConfig:
gkeClusterConfig:
gkeClusterTarget: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-gke-test
nodePoolTarget:
- nodePool: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-gke-test/nodePools/controller-pool
roles:
- DEFAULT
- nodePool: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-gke-test/nodePools/driver-pool
roles:
- SPARK_DRIVER
- nodePool: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-gke-test/nodePools/executor-pool-cpu
roles:
- SPARK_EXECUTOR
kubernetesNamespace: dataproc-gke-test-nongpu-cluster
kubernetesSoftwareConfig:
componentVersion:
SPARK: 3.1-dataproc-14
properties:
dataproc:dataproc.gke.agent.google-service-account: [email protected]
dataproc:dataproc.gke.spark.driver.google-service-account: [email protected]
dataproc:dataproc.gke.spark.executor.google-service-account: [email protected]
dpgke:dpgke.unstable.outputOnly.endpoints.sparkHistoryServer: https://eeeeeeeeeeeeee-dot-us-central1.dataproc.googleusercontent.com/sparkhistory/?eventLogDirFilter=11111111-1111-1111-1111-111111111111
spark:spark.eventLog.dir: gs://dataproc-gke-test-bucket/11111111-1111-1111-1111-111111111111/spark-job-history
spark:spark.eventLog.enabled: 'true'
stagingBucket: dataproc-gke-test-bucket
2 changes: 1 addition & 1 deletion user_tools/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ commands =

[testenv:pylint]
deps = pylint
commands = pylint -d fixme --rcfile=../.pylintrc \
commands = pylint -d fixme --load-plugins pylint_pydantic --rcfile=../.pylintrc \
tests \
src

Expand Down
Loading