From 1e4eeb4817420d1db7e511c18709e29b71896b91 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Tue, 10 Oct 2023 19:07:41 -0700 Subject: [PATCH] Update tests Signed-off-by: Partho Sarthi --- user_tools/pyproject.toml | 2 + .../cloud/dataproc/dataproccluster.py | 17 +++++-- user_tools/tests/mock_cluster.py | 3 +- .../tests/spark_rapids_tools_ut/conftest.py | 3 +- .../cluster/dataproc_gke/cpu-00.yaml | 44 +++++++++++++++++++ user_tools/tox.ini | 2 +- 6 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 user_tools/tests/spark_rapids_tools_ut/resources/cluster/dataproc_gke/cpu-00.yaml diff --git a/user_tools/pyproject.toml b/user_tools/pyproject.toml index fe3844f52..803708c94 100644 --- a/user_tools/pyproject.toml +++ b/user_tools/pyproject.toml @@ -33,6 +33,8 @@ dependencies = [ "pygments==2.15.0", # used to apply validator on objects and models "pydantic==2.1.1", + # used to help pylint understand pydantic + "pylint-pydantic==0.3.0", # used for common API to access remote filesystems like local/s3/gcs/hdfs # this will include numpy "pyarrow==12.0.1", diff --git a/user_tools/src/spark_rapids_tools/cloud/dataproc/dataproccluster.py b/user_tools/src/spark_rapids_tools/cloud/dataproc/dataproccluster.py index 6018a1e79..b9c995ece 100644 --- a/user_tools/src/spark_rapids_tools/cloud/dataproc/dataproccluster.py +++ b/user_tools/src/spark_rapids_tools/cloud/dataproc/dataproccluster.py @@ -17,23 +17,32 @@ """ from typing import ClassVar, Type +from pydantic import field_validator -from spark_rapids_tools.cloud.cluster import ClientCluster, register_client_cluster, ClusterPropMgr, register_cluster_prop_mgr +from spark_rapids_tools.cloud.cluster import ClientCluster, register_client_cluster, ClusterPropMgr, \ + register_cluster_prop_mgr from spark_rapids_tools.utils.propmanager import PropValidatorSchemaCamel, PropValidatorSchema -class DataprocClusterSchema(PropValidatorSchemaCamel): +class DataprocClusterSchema(PropValidatorSchemaCamel): # pylint: disable=missing-class-docstring) cluster_name: str cluster_uuid: str project_id: str config: dict + @field_validator('config') + def validate_config(cls, config: dict) -> dict: + if 'gceClusterConfig' not in config: + raise ValueError("'gceClusterConfig' key is missing in config.") + return config + class DataprocGkeClusterSchema(PropValidatorSchemaCamel): cluster_name: str cluster_uuid: str project_id: str config: dict + virtual_cluster_config: dict @register_cluster_prop_mgr('dataproc') @@ -42,7 +51,7 @@ class DataprocClusterPropMgr(ClusterPropMgr): @register_client_cluster('dataproc') -class DataprocClientCluster(ClientCluster): # pylint: disable=too-few-public-methods +class DataprocClientCluster(ClientCluster): # pylint: disable=too-few-public-methods pass @@ -52,5 +61,5 @@ class DataprocGkeClusterPropMgr(ClusterPropMgr): @register_client_cluster('dataproc_gke') -class DataprocGkeClientCluster(ClientCluster): # pylint: disable=too-few-public-methods +class DataprocGkeClientCluster(ClientCluster): # pylint: disable=too-few-public-methods pass diff --git a/user_tools/tests/mock_cluster.py b/user_tools/tests/mock_cluster.py index 547de368b..65b74155f 100644 --- a/user_tools/tests/mock_cluster.py +++ b/user_tools/tests/mock_cluster.py @@ -34,7 +34,8 @@ "workerConfig": { "accelerators": [{ "acceleratorTypeUri": "https://www.googleapis.com/compute/beta/projects/project-id/zones/"\ - "us-central1-a/acceleratorTypes/nvidia-tesla-t4" + "us-central1-a/acceleratorTypes/nvidia-tesla-t4", + "acceleratorCount": 1, }], "instanceNames": [ "test-worker-0", diff --git a/user_tools/tests/spark_rapids_tools_ut/conftest.py b/user_tools/tests/spark_rapids_tools_ut/conftest.py index dbb5fb383..e29cefc3c 100644 --- a/user_tools/tests/spark_rapids_tools_ut/conftest.py +++ b/user_tools/tests/spark_rapids_tools_ut/conftest.py @@ -32,6 +32,7 @@ def get_test_resources_path(): def gen_cpu_cluster_props(): return [ ('dataproc', 'cluster/dataproc/cpu-00.yaml'), + ('dataproc_gke', 'cluster/dataproc_gke/cpu-00.yaml'), ('emr', 'cluster/emr/cpu-00.json'), ('onprem', 'cluster/onprem/cpu-00.yaml'), ('databricks_aws', 'cluster/databricks/aws-cpu-00.json'), @@ -43,7 +44,7 @@ def gen_cpu_cluster_props(): # all cpu_cluster_props except the onPrem csp_cpu_cluster_props = [(e_1, e_2) for (e_1, e_2) in all_cpu_cluster_props if e_1 != 'onprem'] # all csps except onprem -csps = ['dataproc', 'emr', 'databricks_aws', 'databricks_azure'] +csps = ['dataproc', 'dataproc_gke', 'emr', 'databricks_aws', 'databricks_azure'] all_csps = csps + ['onprem'] diff --git a/user_tools/tests/spark_rapids_tools_ut/resources/cluster/dataproc_gke/cpu-00.yaml b/user_tools/tests/spark_rapids_tools_ut/resources/cluster/dataproc_gke/cpu-00.yaml new file mode 100644 index 000000000..39bd70ddf --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_ut/resources/cluster/dataproc_gke/cpu-00.yaml @@ -0,0 +1,44 @@ +clusterName: dataproc-gke-test-nongpu-cluster +clusterUuid: 11111111-1111-1111-1111-111111111111 +config: + softwareConfig: {} +labels: + goog-dataproc-cluster-name: dataproc-gke-test-nongpu-cluster + goog-dataproc-cluster-uuid: 11111111-1111-1111-1111-111111111111 + goog-dataproc-location: us-central1 +projectId: dataproc-gke-project +status: + state: RUNNING + stateStartTime: '2022-12-06T23:21:07.637345Z' +statusHistory: +- state: CREATING + stateStartTime: '2022-11-08T18:02:00.300481Z' +virtualClusterConfig: + auxiliaryServicesConfig: + sparkHistoryServerConfig: + dataprocCluster: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-phs-test + kubernetesClusterConfig: + gkeClusterConfig: + gkeClusterTarget: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-gke-test + nodePoolTarget: + - nodePool: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-gke-test/nodePools/controller-pool + roles: + - DEFAULT + - nodePool: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-gke-test/nodePools/driver-pool + roles: + - SPARK_DRIVER + - nodePool: projects/dataproc-gke-project/regions/us-central1/clusters/dataproc-gke-test/nodePools/executor-pool-cpu + roles: + - SPARK_EXECUTOR + kubernetesNamespace: dataproc-gke-test-nongpu-cluster + kubernetesSoftwareConfig: + componentVersion: + SPARK: 3.1-dataproc-14 + properties: + dataproc:dataproc.gke.agent.google-service-account: eeeeeeeee@dataproc-gke-project.iam.gserviceaccount.com + dataproc:dataproc.gke.spark.driver.google-service-account: eeeeeeeee@dataproc-gke-project.iam.gserviceaccount.com + dataproc:dataproc.gke.spark.executor.google-service-account: eeeeeeeee@dataproc-gke-project.iam.gserviceaccount.com + dpgke:dpgke.unstable.outputOnly.endpoints.sparkHistoryServer: https://eeeeeeeeeeeeee-dot-us-central1.dataproc.googleusercontent.com/sparkhistory/?eventLogDirFilter=11111111-1111-1111-1111-111111111111 + spark:spark.eventLog.dir: gs://dataproc-gke-test-bucket/11111111-1111-1111-1111-111111111111/spark-job-history + spark:spark.eventLog.enabled: 'true' + stagingBucket: dataproc-gke-test-bucket diff --git a/user_tools/tox.ini b/user_tools/tox.ini index 031bf6f20..2ba7eb6b6 100644 --- a/user_tools/tox.ini +++ b/user_tools/tox.ini @@ -32,7 +32,7 @@ commands = [testenv:pylint] deps = pylint -commands = pylint -d fixme --rcfile=../.pylintrc \ +commands = pylint -d fixme --load-plugins pylint_pydantic --rcfile=../.pylintrc \ tests \ src