From 9c71e45e87f81a577ae413efef06d489a10556b4 Mon Sep 17 00:00:00 2001 From: Will Johnson Date: Mon, 12 Dec 2022 00:25:53 -0600 Subject: [PATCH] Testing Overhaul * Adding tests/environment folder to store datasets and bicep templates for test sources * Added scripts to create databricks jobs and a notebook to mount storage on Databricks * Making test environments more consistent across notebooks (secret scope, environment variables) * Handle of tests were modified to correct mistakes not caught in source controlled versions * Added documentation for testing environment including what secrets are used and what they look like * Adding requirements.txt file for environment deployment * Hive tests should run without additional intervention (i.e. use CREATE IF NOT EXISTS) * Removing production env deployment * Remove the wasbs with parameters test * After updating all jobdefs to be ready for upload, the run-tests script needed to look at .name instead of .settings.name * Unfortunately, when calling the jobs API, it returns a .settings.name which must be used --- .github/workflows/build-release.yml | 37 +- .gitignore | 1 + tests/environment/README.md | 122 ++++++ tests/environment/config.json | 366 +++++++++++++++++ tests/environment/datasets/azsql.sql | 27 ++ tests/environment/datasets/make-data.py | 112 ++++++ tests/environment/datasets/sqlpool.sql | 30 ++ tests/environment/dbfs/create-job.py | 48 +++ tests/environment/dbfs/mounts.py | 34 ++ tests/environment/requirements.txt | 29 ++ tests/environment/sources/adlsg2.bicep | 30 ++ tests/environment/sources/sql.bicep | 22 + tests/environment/sources/sqlserver.bicep | 21 + tests/environment/sources/synapse.bicep | 75 ++++ .../sparksubmit-test-def.json | 2 +- .../integration/jobdefs/hive3-tests-def.json | 178 ++++----- .../jobdefs/hive3-tests-expectations.json | 12 +- .../integration/jobdefs/jarjob-test-def.json | 48 ++- .../jobdefs/jarjob-test-expectations.json | 2 +- .../jobdefs/pythonscript-test-def.json | 72 ++-- .../pythonscript-test-expectations.json | 2 +- .../jobdefs/pythonwheel-test-def.json | 46 +-- .../pythonwheel-test-expectations.json | 2 +- .../integration/jobdefs/spark2-tests-def.json | 116 +++--- .../jobdefs/spark2-tests-expectations.json | 6 +- .../integration/jobdefs/spark3-tests-def.json | 376 +++++++++--------- .../jobdefs/spark3-tests-expectations.json | 28 +- .../wasbs-in-wasbs-out-with-param-def.json | 25 -- ...-in-wasbs-out-with-param-expectations.json | 6 - tests/integration/run-test.sh | 4 +- .../app/src/main/java/SparkApp/Basic/App.java | 2 +- .../notebooks/abfss-in-abfss-out-oauth.scala | 6 +- .../notebooks/abfss-in-abfss-out-root.scala | 2 +- .../notebooks/abfss-in-abfss-out.scala | 2 +- ...abfss-in-hive+notmgd+saveAsTable-out.scala | 2 +- .../abfss-in-hive+saveAsTable-out.scala | 2 +- .../notebooks/azuresql-in-azuresql-out.scala | 8 +- .../notebooks/call-via-adf-spark2.scala | 2 +- .../notebooks/call-via-adf-spark3.scala | 2 +- .../notebooks/delta-in-delta-merge.scala | 8 +- .../notebooks/delta-in-delta-out-abfss.scala | 8 +- .../notebooks/delta-in-delta-out-fs.scala | 8 +- .../notebooks/delta-in-delta-out-mnt.scala | 8 +- .../hive+abfss-in-hive+abfss-out-insert.py | 44 +- ...ault-in-hive+mgd+not+default-out-insert.py | 37 +- .../notebooks/hive-in-hive-out-insert.py | 27 +- .../notebooks/intermix-languages.scala | 5 +- .../spark-apps/notebooks/mnt-in-mnt-out.scala | 2 +- .../notebooks/name-with-periods.scala | 2 +- .../spark-apps/notebooks/nested-child.scala | 2 +- .../spark-sql-table-in-abfss-out.scala | 2 +- .../notebooks/synapse-in-synapse-out.scala | 20 +- .../notebooks/synapse-in-wasbs-out.scala | 22 +- .../synapse-wasbs-in-synapse-out.scala | 22 +- .../wasbs-in-wasbs-out-with-param.py | 39 -- .../notebooks/wasbs-in-wasbs-out.scala | 8 +- .../spark-apps/pythonscript/pythonscript.json | 2 +- .../spark-apps/sparksubmit/sparksubmit.json | 2 +- .../abfssInAbfssOut/abfssintest/main.py | 2 +- .../wheeljobs/abfssInAbfssOut/db-job-def.json | 18 - 60 files changed, 1486 insertions(+), 709 deletions(-) create mode 100644 tests/environment/README.md create mode 100644 tests/environment/config.json create mode 100644 tests/environment/datasets/azsql.sql create mode 100644 tests/environment/datasets/make-data.py create mode 100644 tests/environment/datasets/sqlpool.sql create mode 100644 tests/environment/dbfs/create-job.py create mode 100644 tests/environment/dbfs/mounts.py create mode 100644 tests/environment/requirements.txt create mode 100644 tests/environment/sources/adlsg2.bicep create mode 100644 tests/environment/sources/sql.bicep create mode 100644 tests/environment/sources/sqlserver.bicep create mode 100644 tests/environment/sources/synapse.bicep delete mode 100644 tests/integration/jobdefs/wasbs-in-wasbs-out-with-param-def.json delete mode 100644 tests/integration/jobdefs/wasbs-in-wasbs-out-with-param-expectations.json delete mode 100644 tests/integration/spark-apps/notebooks/wasbs-in-wasbs-out-with-param.py delete mode 100644 tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/db-job-def.json diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 1de752da..34b8b97a 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -85,18 +85,19 @@ jobs: name: FunctionZip path: ./artifacts - - name: Azure Functions Action + - name: Deploy Azure Function to Integration Env uses: Azure/functions-action@v1.4.6 with: app-name: ${{ secrets.INT_FUNC_NAME }} package: ./artifacts/FunctionZip.zip publish-profile: ${{ secrets.INT_PUBLISH_PROFILE }} - - uses: azure/login@v1 + - name: Azure Login + uses: azure/login@v1 with: creds: ${{ secrets.INT_AZ_CLI_CREDENTIALS }} - - name: Azure CLI script + - name: Compare and Update App Settings on Deployed Function uses: azure/CLI@v1 with: azcliversion: 2.34.1 @@ -108,7 +109,7 @@ jobs: # Start up Synapse Pool and Execute Tests - name: Start Integration Synapse SQL Pool - run: source tests/integration/manage-sql-pool.sh start ${{ secrets.INT_SUBSCRIPTION_ID }} ${{ secrets.INT_RG_NAME }} ${{ secrets.INT_SYNAPSE_WKSP_NAME }} ${{ secrets.INT_SYNAPSE_SQLPOOL_NAME }} + run: source tests/integration/manage-sql-pool.sh start ${{ secrets.INT_SUBSCRIPTION_ID }} ${{ secrets.INT_SYNAPSE_SQLPOOL_RG_NAME }} ${{ secrets.INT_SYNAPSE_WKSP_NAME }} ${{ secrets.INT_SYNAPSE_SQLPOOL_NAME }} env: AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} @@ -124,6 +125,10 @@ jobs: token = ${{ secrets.INT_DATABRICKS_ACCESS_TOKEN }}" > ./config.ini export DATABRICKS_CONFIG_FILE=./config.ini + - name: Confirm Databricks CLI is configured + run: databricks clusters spark-versions + env: + DATABRICKS_CONFIG_FILE: ./config.ini - name: Cleanup Integration Environment run: python ./tests/integration/runner.py --cleanup --dontwait None None None @@ -144,7 +149,7 @@ jobs: DATABRICKS_CONFIG_FILE: ./config.ini - name: Stop Integration Synapse SQL Pool - run: source tests/integration/manage-sql-pool.sh stop ${{ secrets.INT_SUBSCRIPTION_ID }} ${{ secrets.INT_RG_NAME }} ${{ secrets.INT_SYNAPSE_WKSP_NAME }} ${{ secrets.INT_SYNAPSE_SQLPOOL_NAME }} + run: source tests/integration/manage-sql-pool.sh stop ${{ secrets.INT_SUBSCRIPTION_ID }} ${{ secrets.INT_SYNAPSE_SQLPOOL_RG_NAME }} ${{ secrets.INT_SYNAPSE_WKSP_NAME }} ${{ secrets.INT_SYNAPSE_SQLPOOL_NAME }} env: AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} @@ -172,25 +177,3 @@ jobs: with: artifacts: ~/artifacts/FunctionZip.zip token: ${{ secrets.GITHUB_TOKEN }} - - deployProductionEnvironment: - name: Release to Production Environment - needs: [createRelease] - runs-on: ubuntu-latest - environment: - name: Production - steps: - - uses: actions/checkout@v3 - - - name: Download Artifact - uses: actions/download-artifact@v3 - with: - name: FunctionZip - path: ./artifacts - - - name: Azure Functions Action - uses: Azure/functions-action@v1.4.6 - with: - app-name: ${{ secrets.FUNC_NAME }} - package: ./artifacts/FunctionZip.zip - publish-profile: ${{ secrets.PUBLISH_PROFILE }} diff --git a/.gitignore b/.gitignore index 0c3849b5..9343284f 100644 --- a/.gitignore +++ b/.gitignore @@ -161,3 +161,4 @@ build # Ignore local settings localsettingsdutils.py +*.ini diff --git a/tests/environment/README.md b/tests/environment/README.md new file mode 100644 index 00000000..7ae4daa9 --- /dev/null +++ b/tests/environment/README.md @@ -0,0 +1,122 @@ +# Deploying the Test Environment + +## Deploying the Connector + +## Deploying the Data Sources + +``` +az deployment group create \ +--template-file ./tests/environment/sources/adlsg2.bicep \ +--resource-group db2pvsasources + +``` + +## Manual Steps + +Create a config.ini file: + +```ini +databricks_workspace_host_id = adb-workspace.id +databricks_personal_access_token = PERSONAL_ACCESS_TOKEN +databricks_spark3_cluster = CLUSTER_ID +databricks_spark2_cluster = CLUSTER_ID +``` + +Assign Service Principal Storage Blob Data Contributor to the main ADLS G2 instance + +Add Service Principal as user in Databricks. + +Enable mount points with `./tests/environment/dbfs/mounts.py` + +Add Key Vault Secrets + * `tenant-id` + * `storage-service-key` + * `azuresql-username` + * `azuresql-password` + * `azuresql-jdbc-conn-str` should be of the form `jdbc:sqlserver://SERVER_NAME.database.windows.net:1433;database=DATABASE_NAME;encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;` + * `synapse-storage-key` + * `synapse-query-username` + * `synapse-query-password` +* Update SQL Db and Synapse Server with AAD Admin +* Add Service Principal for Databricks to connect to SQL sources + +Set the following system environments: + +* `SYNAPSE_SERVICE_NAME` +* `STORAGE_SERVICE_NAME` +* `SYNAPSE_STORAGE_SERVICE_NAME` + +Upload notebooks in `./tests/integration/spark-apps/notebooks/` to dbfs' `/Shared/examples/` + +* Manually for now. TODO: Automate this in Python + +Compile the following apps and upload them to `/dbfs/FileStore/testcases/` + +* `./tests/integration/spark-apps/jarjobs/abfssInAbfssOut/` with `./gradlew build` +* `./tests/integration/spark-apps/pythonscript/pythonscript.py` by just uploading. +* `./tests/integration/spark-apps/wheeljobs/abfssintest/` with `python -m build` + +Upload the job definitions using the python script `python .\tests\environment\dbfs\create-job.py` + +## Github Actions + +* AZURE_CLIENT_ID +* AZURE_CLIENT_SECRET +* AZURE_TENANT_ID +* INT_AZ_CLI_CREDENTIALS + ```json + { + "clientId": "xxxx", + "clientSecret": "yyyy", + "subscriptionId": "zzzz", + "tenantId": "μμμμ", + "activeDirectoryEndpointUrl": "https://login.microsoftonline.com", + "resourceManagerEndpointUrl": "https://management.azure.com/", + "activeDirectoryGraphResourceId": "https://graph.windows.net/", + "sqlManagementEndpointUrl": "https://management.core.windows.net:8443/", + "galleryEndpointUrl": "https://gallery.azure.com/", + "managementEndpointUrl": "https://management.core.windows.net/" + } + ``` +* INT_DATABRICKS_ACCESS_TOKEN +* INT_DATABRICKS_WKSP_ID: adb-xxxx.y +* INT_FUNC_NAME +* INT_PUBLISH_PROFILE from the Azure Function's publish profile XML +* INT_PURVIEW_NAME +* INT_RG_NAME +* INT_SUBSCRIPTION_ID +* INT_SYNAPSE_SQLPOOL_NAME +* INT_SYNAPSE_WKSP_NAME +* INT_SYNAPSE_WKSP_NAME + +## config.json + +```json +{ + "datasets":{ + "datasetName": { + "schema": [ + "field1", + "field2" + ], + "data": [ + [ + "val1", + "val2" + ] + ] + } + }, + "jobs": { + "job-name": [ + [ + ("storage"|"sql"|"noop"), + ("csv"|"delta"|"azuresql"|"synapse"), + "rawdata/testcase/one/", + "exampleInputA" + ] + ] + } +} + +``` diff --git a/tests/environment/config.json b/tests/environment/config.json new file mode 100644 index 00000000..79e47213 --- /dev/null +++ b/tests/environment/config.json @@ -0,0 +1,366 @@ +{ + "dataset": { + "exampleInputA": { + "schema": [ + "id", + "postalCode", + "street" + ], + "data": [ + [ + 1, + "555", + "742 Evergreen Terrace" + ] + ] + }, + "exampleInputB": { + "schema": [ + "id", + "city", + "stateAbbreviation" + ], + "data": [ + [ + 1, + "Springfield", + "??" + ] + ] + } + }, + "jobs": { + "jarjobs-abfssInAbfssOut": [ + [ + "storage", + "csv", + "rawdata/testcase/eighteen/", + "exampleInputA" + ] + ], + "pythonscript-pythonscript.py": [ + [ + "storage", + "csv", + "rawdata/testcase/twenty/", + "exampleInputA" + ] + ], + "wheeljobs-abfssintest": [ + [ + "storage", + "csv", + "rawdata/testcase/seventeen/", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/seventeen/", + "exampleInputB" + ] + ], + "abfss-in-abfss-out-oauth.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/two/", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/two/", + "exampleInputB" + ] + ], + "abfss-in-abfss-out-root.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/three/", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/three/", + "exampleInputB" + ] + ], + "abfss-in-abfss-out.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/one/", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/one/", + "exampleInputB" + ] + ], + "abfss-in-hive+notmgd+saveAsTable-out.scala": [ + [ + "storage", + "delta", + "rawdata/testcase/abfss-in-hive+notmgd+saveAsTable-out/", + "exampleInputA" + ] + ], + "abfss-in-hive+saveAsTable-out.scala": [ + [ + "storage", + "delta", + "rawdata/testcase/abfss-in-hive+saveAsTable-out/", + "exampleInputA" + ] + ], + "azuresql-in-azuresql-out.scala": [ + [ + "azuresql", + "table", + "dbo", + "exampleInputA" + ], + [ + "azuresql", + "table", + "dbo", + "exampleInputB" + ], + [ + "azuresql", + "table", + "dbo.exampleInputC" + ], + [ + "azuresql", + "table", + "dbo.exampleOutput" + ] + ], + "call-via-adf-spark2.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/thirteen/", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/thirteen/", + "exampleInputB" + ] + ], + "call-via-adf-spark3.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/fourteen/", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/fourteen/", + "exampleInputB" + ] + ], + "delta-in-delta-merge.scala": [ + [ + "storage", + "delta", + "rawdata/testcase/sixteen/", + "exampleInputA" + ], + [ + "storage", + "delta", + "rawdata/testcase/sixteen/", + "exampleInputB" + ] + ], + "delta-in-delta-out-abfss.scala": [ + [ + "storage", + "delta", + "rawdata/testcase/four/", + "exampleInputA" + ], + [ + "storage", + "delta", + "rawdata/testcase/four/", + "exampleInputB" + ] + ], + "delta-in-delta-out-fs.scala": [ + [ + "storage", + "delta", + "rawdata/testcase/five/", + "exampleInputA" + ], + [ + "storage", + "delta", + "rawdata/testcase/five/", + "exampleInputB" + ] + ], + "delta-in-delta-out-mnt.scala": [ + [ + "storage", + "delta", + "rawdata/testcase/six/", + "exampleInputA" + ], + [ + "storage", + "delta", + "rawdata/testcase/six/", + "exampleInputB" + ] + ], + "hive-in-hive-out-insert.py": [ + [ + "noop" + ] + ], + "hive+abfss-in-hive+abfss-out-insert.py": [ + [ + "storage", + "delta", + "rawdata/testcase/twentyone/", + "exampleInputA" + ] + ], + "hive+mgd+not+default-in-hive+mgd+not+default-out-insert.py": [ + [ + "noop" + ] + ], + "hive+mnt-in-hive+mnt-out-insert.py": [ + [ + "noop" + ] + ], + "intermix-languages.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/fifteen/", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/fifteen/", + "exampleInputB" + ] + ], + "mnt-in-mnt-out.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/seven/", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/seven/", + "exampleInputB" + ] + ], + "name-with-periods.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/namewithperiods/", + "exampleInputA" + ] + ], + "nested-child.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/eight/", + "exampleInputA" + ] + ], + "nested-parent.scala": [ + [ + "noop" + ] + ], + "spark-sql-table-in-abfss-out.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/nine/", + "exampleInputB" + ] + ], + "synapse-in-synapse-out.scala": [ + [ + "synapse", + "table", + "dbo", + "exampleInputA" + ], + [ + "synapse", + "table", + "Sales", + "Region" + ] + ], + "synapse-in-wasbs-out.scala": [ + [ + "synapse", + "table", + "dbo", + "exampleInputA" + ], + [ + "synapse", + "table", + "dbo", + "exampleInputB" + ] + ], + "synapse-wasbs-in-synapse-out.scala": [ + [ + "synapse", + "table", + "dbo", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/eleven/", + "exampleInputA" + ] + ], + "wasbs-in-wasbs-out.scala": [ + [ + "storage", + "csv", + "rawdata/testcase/wasinwasout/", + "exampleInputA" + ], + [ + "storage", + "csv", + "rawdata/testcase/wasinwasout/", + "exampleInputB" + ] + ] + } +} \ No newline at end of file diff --git a/tests/environment/datasets/azsql.sql b/tests/environment/datasets/azsql.sql new file mode 100644 index 00000000..8eeba280 --- /dev/null +++ b/tests/environment/datasets/azsql.sql @@ -0,0 +1,27 @@ +CREATE SCHEMA nondbo + +CREATE TABLE nondbo.exampleInputC ( +id int +,cityPopulation int +) + +CREATE TABLE dbo.exampleInputB ( +id int +,city varchar(30) +,stateAbbreviation varchar(2) +) + +CREATE TABLE dbo.exampleInputA ( +id int +,postalcode varchar(5) +,street varchar(50) +) + +INSERT INTO nondbo.exampleInputC(id, cityPopulation) +VALUES(1, 1000) + +INSERT INTO dbo.exampleInputB(id, city, stateAbbreviation) +VALUES(1, 'Springfield', '??') + +INSERT INTO dbo.exampleInputA(id, postalcode, street) +VALUES(1, '55555', '742 Evergreen Terrace') diff --git a/tests/environment/datasets/make-data.py b/tests/environment/datasets/make-data.py new file mode 100644 index 00000000..21fb1a08 --- /dev/null +++ b/tests/environment/datasets/make-data.py @@ -0,0 +1,112 @@ +import argparse +import configparser +from io import BytesIO +import json +import pathlib +import re + + +from azure.identity import DefaultAzureCredential +from azure.storage.blob import BlobServiceClient, BlobClient + +def make_or_get_connection_client(connection_string, cached_connections, **kwargs): + if connection_string in cached_connections: + return cached_connections[connection_string] + + elif re.search(r'EndpointSuffix=', connection_string): # Is Blob + _client = BlobServiceClient.from_connection_string(connection_string) + cached_connections[connection_string] = _client + return _client + else: + raise NotImplementedError("Connection String not supported") + + +def make_and_upload_data(client, storage_path, dataset_name, storage_format, data): + if isinstance(client, BlobServiceClient): + blob_full_path = pathlib.Path(storage_path) + container = blob_full_path.parts[0] + blob_relative_path = '/'.join(list(blob_full_path.parts[1:])+[dataset_name, dataset_name+"."+storage_format]) + + _blob_client = client.get_blob_client(container, blob_relative_path) + blob_stream = BytesIO() + with blob_stream as fp: + for row in data["data"]: + fp.write(bytes(','.join(str(r) for r in row), encoding="utf-8")) + fp.seek(0) + _blob_client.upload_blob(blob_stream.read(), blob_type="BlockBlob", overwrite=True) + else: + raise NotImplementedError(f"{type(client)} not supported") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--test_case", "-t", type=str, action="append", help="Name of the test case(s) to be deployed. If not specified, upload all datasets") + parser.add_argument("--config", type=str, help="Path to the json config file", default="./tests/environment/config.json") + parser.add_argument("--ini", type=str, help="Path to the ini config file", default="./tests/environment/config.ini") + args = parser.parse_args() + + # Datasets + ## CSV + ## Parquet + ## Delta + ## SQL + ## COSMOS + ## Kusto + + # Load Test Cases + ## jobs and dataset + _connections = configparser.ConfigParser() + _connections.read(args.ini) + + with open(args.config, 'r') as fp: + _config = json.load(fp) + TEST_JOBS = _config["jobs"] + TEST_DATASET = _config["dataset"] + + # Filter based on test cases provided + if args.test_case: + print(args.test_case) + jobs_to_build_data = {k:v for k,v in TEST_JOBS.items() if k in args.test_case} + else: + jobs_to_build_data = TEST_JOBS + + + # Make the data only one time + cached_data = {} + # Make the connections only one time + cached_connections = {} + # Iterate over every job and build the dataset + for job_name, dataset_def in jobs_to_build_data.items(): + if len(dataset_def) == 0 or dataset_def[0] == ["noop"]: + print(f"{job_name}: skipped") + continue + + for dataset in dataset_def: + _connection_name = dataset[0] + _storage_format = dataset[1] + _storage_path = dataset[2] + _dataset_name = dataset[3] + + print(f"{job_name}: {_storage_path}") + + _connection_string = _connections["DEFAULT"][_connection_name+"_connection_string"] + + _client = make_or_get_connection_client(_connection_string, cached_connections) + + _data = TEST_DATASET[_dataset_name] + + make_and_upload_data( + _client, + _storage_path, + _dataset_name, + _storage_format, + _data + ) + + + + # Check which storage engine is necessary + # Check what format the data will be stored in + # Check the pat + + \ No newline at end of file diff --git a/tests/environment/datasets/sqlpool.sql b/tests/environment/datasets/sqlpool.sql new file mode 100644 index 00000000..3b357c2a --- /dev/null +++ b/tests/environment/datasets/sqlpool.sql @@ -0,0 +1,30 @@ +CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'xxxx' ; /* Necessary for Synapse External tables */ +CREATE SCHEMA Sales + +CREATE TABLE Sales.Region ( +id int +,regionId int +) + +CREATE TABLE dbo.exampleInputB ( +id int +,city varchar(30) +,stateAbbreviation varchar(2) +) + +CREATE TABLE dbo.exampleInputA ( +id int +,postalcode varchar(5) +,street varchar(50) +) + + + +INSERT INTO Sales.Region(id, regionId) +VALUES(1, 1000) + +INSERT INTO dbo.exampleInputB(id, city, stateAbbreviation) +VALUES(1, 'Springfield', '??') + +INSERT INTO dbo.exampleInputA(id, postalcode, street) +VALUES(1, '55555', '742 Evergreen Terrace') diff --git a/tests/environment/dbfs/create-job.py b/tests/environment/dbfs/create-job.py new file mode 100644 index 00000000..3e7f7bee --- /dev/null +++ b/tests/environment/dbfs/create-job.py @@ -0,0 +1,48 @@ +# https://learn.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/workspace#--import +import argparse +import configparser +import json +import os + +import requests + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--folder", default="./tests/integration/jobdefs") + parser.add_argument("--ini", default="./tests/environment/config.ini") + args = parser.parse_args() + + cfp = configparser.ConfigParser() + + cfp.read(args.ini) + db_host_id = cfp["DEFAULT"]["databricks_workspace_host_id"] + db_pat = cfp["DEFAULT"]["databricks_personal_access_token"] + + JOB_URL = f"https://{db_host_id}.azuredatabricks.net/api/2.1/jobs/create" + for job_def in os.listdir(args.folder): + if not job_def.endswith("-def.json"): + continue + + print(job_def) + with open(os.path.join(args.folder, job_def), 'r') as fp: + job_json = json.load(fp) + + job_str = json.dumps(job_json) + if job_def.startswith("spark2"): + job_str = job_str.replace("", cfp["DEFAULT"]["databricks_spark2_cluster"]) + else: + job_str = job_str.replace("", cfp["DEFAULT"]["databricks_spark3_cluster"]) + + job_json_to_submit = json.loads(job_str) + + resp = requests.post( + url=JOB_URL, + json=job_json_to_submit, + headers={ + "Authorization": f"Bearer {db_pat}" + } + ) + print(resp.content) + + diff --git a/tests/environment/dbfs/mounts.py b/tests/environment/dbfs/mounts.py new file mode 100644 index 00000000..d03a9429 --- /dev/null +++ b/tests/environment/dbfs/mounts.py @@ -0,0 +1,34 @@ +# Databricks notebook source +import os + +storage_acct_name = os.environ.get("STORAGE_SERVICE_NAME") +configs = {"fs.azure.account.auth.type": "OAuth", + "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider", + "fs.azure.account.oauth2.client.id": dbutils.secrets.get("purview-to-adb-kv", 'clientIdKey'), + "fs.azure.account.oauth2.client.secret": dbutils.secrets.get("purview-to-adb-kv", 'clientSecretKey'), + "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{dbutils.secrets.get('purview-to-adb-kv', 'tenant-id')}/oauth2/token"} + +# COMMAND ---------- + +# Optionally, you can add to the source URI of your mount point. +try: + dbutils.fs.mount( + source = f"abfss://rawdata@{storage_acct_name}.dfs.core.windows.net/", + mount_point = "/mnt/rawdata", + extra_configs = configs) +except Exception as e: + print(e) + +# COMMAND ---------- + +try: + dbutils.fs.mount( + source = f"abfss://outputdata@{storage_acct_name}.dfs.core.windows.net/", + mount_point = "/mnt/outputdata", + extra_configs = configs) +except Exception as e: + print(e) + +# COMMAND ---------- + + diff --git a/tests/environment/requirements.txt b/tests/environment/requirements.txt new file mode 100644 index 00000000..30cf059f --- /dev/null +++ b/tests/environment/requirements.txt @@ -0,0 +1,29 @@ +azure-core==1.26.1 +azure-identity==1.12.0 +azure-storage-blob==12.14.1 +build==0.9.0 +certifi==2022.12.7 +cffi==1.15.1 +charset-normalizer==2.1.1 +colorama==0.4.6 +cryptography==38.0.4 +idna==3.4 +importlib-metadata==5.1.0 +isodate==0.6.1 +msal==1.20.0 +msal-extensions==1.0.0 +msrest==0.7.1 +oauthlib==3.2.2 +packaging==22.0 +pep517==0.13.0 +portalocker==2.6.0 +pycparser==2.21 +PyJWT==2.6.0 +pywin32==305 +requests==2.28.1 +requests-oauthlib==1.3.1 +six==1.16.0 +tomli==2.0.1 +typing_extensions==4.4.0 +urllib3==1.26.13 +zipp==3.11.0 diff --git a/tests/environment/sources/adlsg2.bicep b/tests/environment/sources/adlsg2.bicep new file mode 100644 index 00000000..de698838 --- /dev/null +++ b/tests/environment/sources/adlsg2.bicep @@ -0,0 +1,30 @@ +@description('Location of the data factory.') +param location string = resourceGroup().location + +@description('Name of the Azure storage account that contains the input/output data.') +param storageAccountName string = 'storage${uniqueString(resourceGroup().id)}' + +resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' = { + name: storageAccountName + location: location + sku: { + name: 'Standard_LRS' + } + kind: 'StorageV2' + properties:{ + isHnsEnabled: true + } + +} + +resource rawdataContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { + name: '${storageAccount.name}/default/rawdata' +} + +resource writeToRootContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { + name: '${storageAccount.name}/default/writetoroot' +} + +resource outputdataContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { + name: '${storageAccount.name}/default/outputdata' +} diff --git a/tests/environment/sources/sql.bicep b/tests/environment/sources/sql.bicep new file mode 100644 index 00000000..af14c468 --- /dev/null +++ b/tests/environment/sources/sql.bicep @@ -0,0 +1,22 @@ +@description('The name of the SQL logical server.') +param serverName string = uniqueString('sql', resourceGroup().id) + +@description('The name of the SQL Database.') +param sqlDBName string = 'SampleDB' + +@description('Location for all resources.') +param location string = resourceGroup().location + +resource sqlServer 'Microsoft.Sql/servers@2022-05-01-preview' existing = { + name: serverName +} + +resource sqlDB 'Microsoft.Sql/servers/databases@2022-05-01-preview' = { + parent: sqlServer + name: sqlDBName + location: location + sku: { + name: 'Basic' + tier: 'Basic' + } +} diff --git a/tests/environment/sources/sqlserver.bicep b/tests/environment/sources/sqlserver.bicep new file mode 100644 index 00000000..6d503a17 --- /dev/null +++ b/tests/environment/sources/sqlserver.bicep @@ -0,0 +1,21 @@ +@description('The name of the SQL logical server.') +param serverName string = uniqueString('sql', resourceGroup().id) + +@description('Location for all resources.') +param location string = resourceGroup().location + +@description('The administrator username of the SQL logical server.') +param administratorLogin string + +@description('The administrator password of the SQL logical server.') +@secure() +param administratorLoginPassword string + +resource sqlServer 'Microsoft.Sql/servers@2022-05-01-preview' = { + name: serverName + location: location + properties: { + administratorLogin: administratorLogin + administratorLoginPassword: administratorLoginPassword + } +} diff --git a/tests/environment/sources/synapse.bicep b/tests/environment/sources/synapse.bicep new file mode 100644 index 00000000..916ac652 --- /dev/null +++ b/tests/environment/sources/synapse.bicep @@ -0,0 +1,75 @@ +@description('The Synapse Workspace name.') +param workspaceName string = uniqueString('synwksp', resourceGroup().id) + +@description('Location for all resources.') +param location string = resourceGroup().location + +@description('The administrator username of the SQL logical server.') +@secure() +param administratorLogin string + +@description('The administrator password of the SQL logical server.') +@secure() +param administratorLoginPassword string + +var supportingStorageName = '${workspaceName}sa' + +resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' = { + name: supportingStorageName + location: location + sku: { + name: 'Standard_LRS' + } + kind: 'StorageV2' + properties:{ + isHnsEnabled: true + } + +} + +resource rawdataContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { + name: '${storageAccount.name}/default/defaultcontainer' +} + +resource tempContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { + name: '${storageAccount.name}/default/temp' +} + +resource synapseWorkspace 'Microsoft.Synapse/workspaces@2021-06-01' = { + name: workspaceName + location: location + identity: { + type: 'SystemAssigned' + } + properties: { + azureADOnlyAuthentication: false + defaultDataLakeStorage: { + accountUrl: 'https://${storageAccount.name}.dfs.core.windows.net' + createManagedPrivateEndpoint: false + filesystem: 'synapsefs' + resourceId: resourceId('Microsoft.Storage/storageAccounts/', storageAccount.name) + } + managedResourceGroupName: '${workspaceName}rg' + + publicNetworkAccess: 'Enabled' + sqlAdministratorLogin: administratorLogin + sqlAdministratorLoginPassword: administratorLoginPassword + trustedServiceBypassEnabled: true + } +} + +resource symbolicname 'Microsoft.Synapse/workspaces/sqlPools@2021-06-01' = { + name: 'sqlpool1' + location: location + sku: { + name: 'DW100c' + capacity: 0 + } + parent: synapseWorkspace + properties: { + collation: 'SQL_Latin1_General_CP1_CI_AS' + createMode: 'Default' + + storageAccountType: 'LRS' + } +} diff --git a/tests/integration/jobdefs-inactive/sparksubmit-test-def.json b/tests/integration/jobdefs-inactive/sparksubmit-test-def.json index 8400a9e5..508c6090 100644 --- a/tests/integration/jobdefs-inactive/sparksubmit-test-def.json +++ b/tests/integration/jobdefs-inactive/sparksubmit-test-def.json @@ -19,7 +19,7 @@ "cluster_name": "", "spark_version": "9.1.x-scala2.12", "spark_conf": { - "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-scope/Ol-Output-Api-Key}}", + "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-kv/Ol-Output-Api-Key}}", "spark.openlineage.host": "https://.azurewebsites.net", "spark.openlineage.namespace": "#ABC123", "spark.openlineage.version": "1" diff --git a/tests/integration/jobdefs/hive3-tests-def.json b/tests/integration/jobdefs/hive3-tests-def.json index 25aea815..8fb99963 100644 --- a/tests/integration/jobdefs/hive3-tests-def.json +++ b/tests/integration/jobdefs/hive3-tests-def.json @@ -1,98 +1,96 @@ { - "settings": { - "name": "hive3-tests", - "email_notifications": { - "no_alert_for_skipped_runs": false + "name": "hive3-tests", + "email_notifications": { + "no_alert_for_skipped_runs": false + }, + "timeout_seconds": 0, + "max_concurrent_runs": 1, + "tasks": [ + { + "task_key": "hive-in-hive-out-insert", + "notebook_task": { + "notebook_path": "/Shared/examples/hive-in-hive-out-insert", + "source": "WORKSPACE" + }, + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} }, - "timeout_seconds": 0, - "max_concurrent_runs": 1, - "tasks": [ - { - "task_key": "hive-in-hive-out-insert", - "notebook_task": { - "notebook_path": "/Shared/examples/hive-in-hive-out-insert", - "source": "WORKSPACE" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + { + "task_key": "hive_abfss-in-hive_abfss-out-insert", + "depends_on": [ + { + "task_key": "hive-in-hive-out-insert" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/hive+abfss-in-hive+abfss-out-insert", + "source": "WORKSPACE" }, - { - "task_key": "hive_abfss-in-hive_abfss-out-insert", - "depends_on": [ - { - "task_key": "hive-in-hive-out-insert" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/hive+abfss-in-hive+abfss-out-insert", - "source": "WORKSPACE" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "hive_mgd_not_default-in-hive_mgd_not_default-out-insert", + "depends_on": [ + { + "task_key": "hive_abfss-in-hive_abfss-out-insert" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/hive+mgd+not+default-in-hive+mgd+not+default-out-insert", + "source": "WORKSPACE" }, - { - "task_key": "hive_mgd_not_default-in-hive_mgd_not_default-out-insert", - "depends_on": [ - { - "task_key": "hive_abfss-in-hive_abfss-out-insert" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/hive+mgd+not+default-in-hive+mgd+not+default-out-insert", - "source": "WORKSPACE" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "hive_mnt-in-hive_mnt-out-insert", + "depends_on": [ + { + "task_key": "hive_mgd_not_default-in-hive_mgd_not_default-out-insert" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/hive+mnt-in-hive+mnt-out-insert", + "source": "WORKSPACE" }, - { - "task_key": "hive_mnt-in-hive_mnt-out-insert", - "depends_on": [ - { - "task_key": "hive_mgd_not_default-in-hive_mgd_not_default-out-insert" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/hive+mnt-in-hive+mnt-out-insert", - "source": "WORKSPACE" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "abfss-in-hive_notmgd_saveAsTable-out", + "depends_on": [ + { + "task_key": "hive_mnt-in-hive_mnt-out-insert" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/abfss-in-hive+notmgd+saveAsTable-out", + "source": "WORKSPACE" }, - { - "task_key": "abfss-in-hive_notmgd_saveAsTable-out", - "depends_on": [ - { - "task_key": "hive_mnt-in-hive_mnt-out-insert" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/abfss-in-hive+notmgd+saveAsTable-out", - "source": "WORKSPACE" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "abfss-in-hive_saveAsTable-out", + "depends_on": [ + { + "task_key": "abfss-in-hive_notmgd_saveAsTable-out" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/abfss-in-hive+saveAsTable-out", + "source": "WORKSPACE" }, - { - "task_key": "abfss-in-hive_saveAsTable-out", - "depends_on": [ - { - "task_key": "abfss-in-hive_notmgd_saveAsTable-out" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/abfss-in-hive+saveAsTable-out", - "source": "WORKSPACE" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} - } - ], - "format": "MULTI_TASK" - } + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + } + ], + "format": "MULTI_TASK" } \ No newline at end of file diff --git a/tests/integration/jobdefs/hive3-tests-expectations.json b/tests/integration/jobdefs/hive3-tests-expectations.json index bcd13dd4..2d894ef5 100644 --- a/tests/integration/jobdefs/hive3-tests-expectations.json +++ b/tests/integration/jobdefs/hive3-tests-expectations.json @@ -1,21 +1,21 @@ [ "databricks://.azuredatabricks.net/jobs/", "databricks://.azuredatabricks.net/jobs//tasks/hive-in-hive-out-insert", - "databricks://.azuredatabricks.net/jobs//tasks/hive-in-hive-out-insert/processes/9EA618584B76AF154FF5885F070A753F->8846F7679DA958CC91AEB2B6311C97D2", + "databricks://.azuredatabricks.net/jobs//tasks/hive-in-hive-out-insert/processes/2CE3088B4BAADD102F97D92B97F3AB79->E14B63BA5130659288E6B5DB7FC7F232", "databricks://.azuredatabricks.net/notebooks/Shared/examples/hive-in-hive-out-insert", "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_notmgd_saveAsTable-out", - "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_notmgd_saveAsTable-out/processes/DADD88BC04CD758A0D2EB06CE6F86431->C4D873C9A86F827AB135E541A4952BCD", + "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_notmgd_saveAsTable-out/processes/575BF7CF92625D35D6B9309C9561FE0A->43E1EB2B6E2B692F3AFDDDBD63762F41", "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-hive+notmgd+saveAsTable-out", "databricks://.azuredatabricks.net/jobs//tasks/hive_abfss-in-hive_abfss-out-insert", - "databricks://.azuredatabricks.net/jobs//tasks/hive_abfss-in-hive_abfss-out-insert/processes/B5EA0788D2DFDD6724C9638A23C72530->C45E275909E82D362F516CB3DF62F01E", + "databricks://.azuredatabricks.net/jobs//tasks/hive_abfss-in-hive_abfss-out-insert/processes/0366CD2735F426A339DB69EBB00A6ABC->95F7EE6DC3AB03275F8FE27E98838D54", "databricks://.azuredatabricks.net/notebooks/Shared/examples/hive+abfss-in-hive+abfss-out-insert", "databricks://.azuredatabricks.net/jobs//tasks/hive_mgd_not_default-in-hive_mgd_not_default-out-insert", - "databricks://.azuredatabricks.net/jobs//tasks/hive_mgd_not_default-in-hive_mgd_not_default-out-insert/processes/ABC9D8E9383FFB2295BA21732E71BDE5->819BA9557FE05FAFFCD1E6C8C8B12239", + "databricks://.azuredatabricks.net/jobs//tasks/hive_mgd_not_default-in-hive_mgd_not_default-out-insert/processes/13AA3B6322616FF3E554C6A109EBAB5C->6FCA021CCAD4C906D5C29512215F86C9", "databricks://.azuredatabricks.net/notebooks/Shared/examples/hive+mgd+not+default-in-hive+mgd+not+default-out-insert", "databricks://.azuredatabricks.net/jobs//tasks/hive_mnt-in-hive_mnt-out-insert", - "databricks://.azuredatabricks.net/jobs//tasks/hive_mnt-in-hive_mnt-out-insert/processes/B5EA0788D2DFDD6724C9638A23C72530->C45E275909E82D362F516CB3DF62F01E", + "databricks://.azuredatabricks.net/jobs//tasks/hive_mnt-in-hive_mnt-out-insert/processes/0366CD2735F426A339DB69EBB00A6ABC->95F7EE6DC3AB03275F8FE27E98838D54", "databricks://.azuredatabricks.net/notebooks/Shared/examples/hive+mnt-in-hive+mnt-out-insert", "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_saveAsTable-out", - "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_saveAsTable-out/processes/B97ED17F23A32D631D1A53C1AE3A009A->7799B858F5B94A237932CDF9F987F8E0", + "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_saveAsTable-out/processes/D691CD0248B7A179C249AE6DA86A9A69->1073C801CC5F362F10F1CD1FFBA1972C", "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-hive+saveAsTable-out" ] diff --git a/tests/integration/jobdefs/jarjob-test-def.json b/tests/integration/jobdefs/jarjob-test-def.json index 25dca17f..4e664897 100644 --- a/tests/integration/jobdefs/jarjob-test-def.json +++ b/tests/integration/jobdefs/jarjob-test-def.json @@ -1,27 +1,25 @@ { - "settings": { - "name": "JarJob", - "email_notifications": { - "no_alert_for_skipped_runs": false - }, - "max_concurrent_runs": 1, - "tasks": [ - { - "task_key": "JarJob", - "spark_jar_task": { - "jar_uri": "", - "main_class_name": "SparkApp.Basic.App", - "run_as_repl": true - }, - "existing_cluster_id": "", - "libraries": [ - { - "jar": "dbfs:/FileStore/testcases/app.jar" - } - ], - "timeout_seconds": 0 - } - ], - "format": "MULTI_TASK" - } + "name": "JarJob", + "email_notifications": { + "no_alert_for_skipped_runs": false + }, + "max_concurrent_runs": 1, + "tasks": [ + { + "task_key": "JarJob", + "spark_jar_task": { + "jar_uri": "", + "main_class_name": "SparkApp.Basic.App", + "run_as_repl": true + }, + "existing_cluster_id": "", + "libraries": [ + { + "jar": "dbfs:/FileStore/testcases/app.jar" + } + ], + "timeout_seconds": 0 + } + ], + "format": "MULTI_TASK" } \ No newline at end of file diff --git a/tests/integration/jobdefs/jarjob-test-expectations.json b/tests/integration/jobdefs/jarjob-test-expectations.json index 304192f5..06b31d83 100644 --- a/tests/integration/jobdefs/jarjob-test-expectations.json +++ b/tests/integration/jobdefs/jarjob-test-expectations.json @@ -1,5 +1,5 @@ [ "databricks://.azuredatabricks.net/jobs/", "databricks://.azuredatabricks.net/jobs//tasks/JarJob", - "databricks://.azuredatabricks.net/jobs//tasks/JarJob/processes/B4CFB465D62A3D282313EF88E9E4779C->2B1635731ED472A95FC7A53B61F02674" + "databricks://.azuredatabricks.net/jobs//tasks/JarJob/processes/CA1C8F378EABC4EF08062103C5D51CBE->560CF14B3818EF6B8FF5D0BC6AF7BCE9" ] diff --git a/tests/integration/jobdefs/pythonscript-test-def.json b/tests/integration/jobdefs/pythonscript-test-def.json index e9c1282d..23585011 100644 --- a/tests/integration/jobdefs/pythonscript-test-def.json +++ b/tests/integration/jobdefs/pythonscript-test-def.json @@ -1,42 +1,40 @@ { - "settings": { - "name": "PythonScriptJob", - "email_notifications": {}, - "max_concurrent_runs": 1, - "tasks": [ - { - "task_key": "PythonScriptJob", - "spark_python_task": { - "python_file": "dbfs:/FileStore/testcases/pythonscript.py" + "name": "PythonScriptJob", + "email_notifications": {}, + "max_concurrent_runs": 1, + "tasks": [ + { + "task_key": "PythonScriptJob", + "spark_python_task": { + "python_file": "dbfs:/FileStore/testcases/pythonscript.py" + }, + "new_cluster": { + "spark_version": "9.1.x-scala2.12", + "spark_conf": { + "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-kv/Ol-Output-Api-Key}}", + "spark.openlineage.host": "https://.azurewebsites.net", + "spark.openlineage.namespace": "#ABC123", + "spark.openlineage.version": "v1" }, - "new_cluster": { - "spark_version": "9.1.x-scala2.12", - "spark_conf": { - "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-scope/Ol-Output-Api-Key}}", - "spark.openlineage.host": "https://.azurewebsites.net", - "spark.openlineage.namespace": "#ABC123", - "spark.openlineage.version": "1" - }, - "node_type_id": "Standard_DS3_v2", - "enable_elastic_disk": true, - "init_scripts": [ - { - "dbfs": { - "destination": "dbfs:/databricks/openlineagehardcoded/release-candidate.sh" - } + "node_type_id": "Standard_DS3_v2", + "enable_elastic_disk": true, + "init_scripts": [ + { + "dbfs": { + "destination": "dbfs:/databricks/openlineage/open-lineage-init-script.sh" } - ], - "azure_attributes": { - "availability": "ON_DEMAND_AZURE" - }, - "num_workers": 1 + } + ], + "azure_attributes": { + "availability": "ON_DEMAND_AZURE" }, - "max_retries": 1, - "min_retry_interval_millis": 0, - "retry_on_timeout": false, - "timeout_seconds": 3600 - } - ], - "format": "MULTI_TASK" - } + "num_workers": 1 + }, + "max_retries": 1, + "min_retry_interval_millis": 0, + "retry_on_timeout": false, + "timeout_seconds": 3600 + } + ], + "format": "MULTI_TASK" } \ No newline at end of file diff --git a/tests/integration/jobdefs/pythonscript-test-expectations.json b/tests/integration/jobdefs/pythonscript-test-expectations.json index 4fa1f04a..077a08c6 100644 --- a/tests/integration/jobdefs/pythonscript-test-expectations.json +++ b/tests/integration/jobdefs/pythonscript-test-expectations.json @@ -1,5 +1,5 @@ [ "databricks://.azuredatabricks.net/jobs/", "databricks://.azuredatabricks.net/jobs//tasks/PythonScriptJob", - "databricks://.azuredatabricks.net/jobs//tasks/PythonScriptJob/processes/EAEFBD6BB0CA1156256F42C7E3234487->FC65543BD0CEE9FB45BDD86AF033D876" + "databricks://.azuredatabricks.net/jobs//tasks/PythonScriptJob/processes/16D109EA9E8BC7329A7365311F917C1F->C862A921EE653ED2F3101026739FB936" ] \ No newline at end of file diff --git a/tests/integration/jobdefs/pythonwheel-test-def.json b/tests/integration/jobdefs/pythonwheel-test-def.json index adbd354c..96196bc5 100644 --- a/tests/integration/jobdefs/pythonwheel-test-def.json +++ b/tests/integration/jobdefs/pythonwheel-test-def.json @@ -1,26 +1,24 @@ { - "settings": { - "name": "WheelJob", - "email_notifications": { - "no_alert_for_skipped_runs": false - }, - "max_concurrent_runs": 1, - "tasks": [ - { - "task_key": "WheelJob", - "python_wheel_task": { - "package_name": "abfssintest", - "entry_point": "runapp" - }, - "existing_cluster_id": "", - "libraries": [ - { - "whl": "dbfs:/wheels/abfssintest-0.0.3-py3-none-any.whl" - } - ], - "timeout_seconds": 0 - } - ], - "format": "MULTI_TASK" - } + "name": "WheelJob", + "email_notifications": { + "no_alert_for_skipped_runs": false + }, + "max_concurrent_runs": 1, + "tasks": [ + { + "task_key": "WheelJob", + "python_wheel_task": { + "package_name": "abfssintest", + "entry_point": "runapp" + }, + "existing_cluster_id": "", + "libraries": [ + { + "whl": "dbfs:/FileStore/testcases/abfssintest-0.0.3-py3-none-any.whl" + } + ], + "timeout_seconds": 0 + } + ], + "format": "MULTI_TASK" } \ No newline at end of file diff --git a/tests/integration/jobdefs/pythonwheel-test-expectations.json b/tests/integration/jobdefs/pythonwheel-test-expectations.json index 922d6deb..12ba6847 100644 --- a/tests/integration/jobdefs/pythonwheel-test-expectations.json +++ b/tests/integration/jobdefs/pythonwheel-test-expectations.json @@ -1,5 +1,5 @@ [ "databricks://.azuredatabricks.net/jobs/", "databricks://.azuredatabricks.net/jobs//tasks/WheelJob", - "databricks://.azuredatabricks.net/jobs//tasks/WheelJob/processes/D18BCD0504F8604104FE4D0E7C821E13->50430216FAFCCDD3BFD497A1FA0C14D0" + "databricks://.azuredatabricks.net/jobs//tasks/WheelJob/processes/6438ED307BBA90F1285E1229E67E020B->5560AE0F6CE4403CC559ECF1821CCE47" ] \ No newline at end of file diff --git a/tests/integration/jobdefs/spark2-tests-def.json b/tests/integration/jobdefs/spark2-tests-def.json index b88b9bed..e64037e1 100644 --- a/tests/integration/jobdefs/spark2-tests-def.json +++ b/tests/integration/jobdefs/spark2-tests-def.json @@ -1,65 +1,63 @@ { - "settings": { - "name": "test-examples-spark-2", - "email_notifications": { - "no_alert_for_skipped_runs": false + "name": "test-examples-spark-2", + "email_notifications": { + "no_alert_for_skipped_runs": false + }, + "timeout_seconds": 0, + "max_concurrent_runs": 2, + "tasks": [ + { + "task_key": "spark2-abfss-in-abfss-out", + "notebook_task": { + "notebook_path": "/Shared/examples/abfss-in-abfss-out" + }, + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {}, + "description": "" }, - "timeout_seconds": 0, - "max_concurrent_runs": 2, - "tasks": [ - { - "task_key": "spark2-abfss-in-abfss-out", - "notebook_task": { - "notebook_path": "/Shared/examples/abfss-in-abfss-out" - }, - "existing_cluster_id": "0505-211804-c5x0jm8p", - "timeout_seconds": 0, - "email_notifications": {}, - "description": "" + { + "task_key": "spark2-abfss-oauth", + "depends_on": [ + { + "task_key": "spark2-abfss-in-abfss-out" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/abfss-in-abfss-out-oauth" }, - { - "task_key": "spark2-abfss-oauth", - "depends_on": [ - { - "task_key": "spark2-abfss-in-abfss-out" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/abfss-in-abfss-out-oauth" - }, - "existing_cluster_id": "0505-211804-c5x0jm8p", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "spark2-mnt", + "depends_on": [ + { + "task_key": "spark2-abfss-oauth" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/mnt-in-mnt-out" }, - { - "task_key": "spark2-mnt", - "depends_on": [ - { - "task_key": "spark2-abfss-oauth" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/mnt-in-mnt-out" - }, - "existing_cluster_id": "0505-211804-c5x0jm8p", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "spark2-Synapse-wasbs-synapse", + "depends_on": [ + { + "task_key": "spark2-mnt" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/synapse-wasbs-in-synapse-out" }, - { - "task_key": "spark2-Synapse-wasbs-synapse", - "depends_on": [ - { - "task_key": "spark2-mnt" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/synapse-wasbs-in-synapse-out" - }, - "existing_cluster_id": "0505-211804-c5x0jm8p", - "timeout_seconds": 0, - "email_notifications": {} - } - ], - "format": "MULTI_TASK" - } + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + } + ], + "format": "MULTI_TASK" } \ No newline at end of file diff --git a/tests/integration/jobdefs/spark2-tests-expectations.json b/tests/integration/jobdefs/spark2-tests-expectations.json index 2e6cefc7..3b31266b 100644 --- a/tests/integration/jobdefs/spark2-tests-expectations.json +++ b/tests/integration/jobdefs/spark2-tests-expectations.json @@ -1,13 +1,13 @@ [ "databricks://.azuredatabricks.net/jobs/", "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-in-abfss-out", - "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-in-abfss-out/processes/2796E46D0CCD18971A9C936C1EB97B1E->34BBA1402F1BAE560BFEA804B83FED62", + "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-in-abfss-out/processes/58C1F24BA6C6FF7592F786C9FA8A3451->BA6B11F82FDCE37E849D25D545E6FB7A", "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-abfss-out", "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-oauth", - "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-oauth/processes/56EE0B098A9A3D07DC11F4C6EA9BF71C->E6B1D99B74724B48DAB2BCB79142CB65", + "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-oauth/processes/BD4A7A895E605BF6C4DE003D3F6B3F39->A3B52DA733083E4642E1C3DB6B093E84", "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-abfss-out-oauth", "databricks://.azuredatabricks.net/jobs//tasks/spark2-mnt", - "databricks://.azuredatabricks.net/jobs//tasks/spark2-mnt/processes/EAEEF594372A61E0E1B545C0B430E966->ADFAB39F64A04DBD087DC73F8DF4EA47", + "databricks://.azuredatabricks.net/jobs//tasks/spark2-mnt/processes/336D6FD3010382DAB8351BFF026B2CBE->C60C4BAB82567905C64B99E2DCBCA711", "databricks://.azuredatabricks.net/notebooks/Shared/examples/mnt-in-mnt-out", "databricks://.azuredatabricks.net/jobs//tasks/spark2-Synapse-wasbs-synapse", "databricks://.azuredatabricks.net/jobs//tasks/spark2-Synapse-wasbs-synapse/processes/B596CF432EE21C0349CD0770BC839867->F1AD7C08349CD0A30B47392F787D6364", diff --git a/tests/integration/jobdefs/spark3-tests-def.json b/tests/integration/jobdefs/spark3-tests-def.json index e58b1c1f..4a8f4343 100644 --- a/tests/integration/jobdefs/spark3-tests-def.json +++ b/tests/integration/jobdefs/spark3-tests-def.json @@ -1,205 +1,203 @@ { - "settings": { - "name": "test-examples-spark-3", - "email_notifications": { - "no_alert_for_skipped_runs": false + "name": "test-examples-spark-3", + "email_notifications": { + "no_alert_for_skipped_runs": false + }, + "timeout_seconds": 0, + "max_concurrent_runs": 1, + "tasks": [ + { + "task_key": "abfss-in-abfss-out", + "notebook_task": { + "notebook_path": "/Shared/examples/abfss-in-abfss-out" + }, + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {}, + "description": "" }, - "timeout_seconds": 0, - "max_concurrent_runs": 1, - "tasks": [ - { - "task_key": "abfss-in-abfss-out", - "notebook_task": { - "notebook_path": "/Shared/examples/abfss-in-abfss-out" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {}, - "description": "" + { + "task_key": "abfss-oauth", + "depends_on": [ + { + "task_key": "abfss-in-abfss-out" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/abfss-in-abfss-out-oauth" }, - { - "task_key": "abfss-oauth", - "depends_on": [ - { - "task_key": "abfss-in-abfss-out" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/abfss-in-abfss-out-oauth" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "azuresql-in-out", + "depends_on": [ + { + "task_key": "ab-in-ab-out-root" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/azuresql-in-azuresql-out" }, - { - "task_key": "azuresql-in-out", - "depends_on": [ - { - "task_key": "ab-in-ab-out-root" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/azuresql-in-azuresql-out" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "delta-abfss", + "depends_on": [ + { + "task_key": "azuresql-in-out" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/delta-in-delta-out-abfss" }, - { - "task_key": "delta-abfss", - "depends_on": [ - { - "task_key": "azuresql-in-out" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/delta-in-delta-out-abfss" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "delta-fs", + "depends_on": [ + { + "task_key": "delta-abfss" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/delta-in-delta-out-fs" }, - { - "task_key": "delta-fs", - "depends_on": [ - { - "task_key": "delta-abfss" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/delta-in-delta-out-fs" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "delta-mnt", + "depends_on": [ + { + "task_key": "delta-fs" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/delta-in-delta-out-mnt" }, - { - "task_key": "delta-mnt", - "depends_on": [ - { - "task_key": "delta-fs" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/delta-in-delta-out-mnt" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "mnt", + "depends_on": [ + { + "task_key": "intermix-languages" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/mnt-in-mnt-out" }, - { - "task_key": "mnt", - "depends_on": [ - { - "task_key": "intermix-languages" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/mnt-in-mnt-out" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "synapse-in-wasbs-out", + "depends_on": [ + { + "task_key": "nested-parent" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/synapse-in-wasbs-out" }, - { - "task_key": "synapse-in-wasbs-out", - "depends_on": [ - { - "task_key": "nested-parent" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/synapse-in-wasbs-out" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "Syn-in-WB-in-Syn-Out", + "depends_on": [ + { + "task_key": "synapse-in-wasbs-out" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/synapse-wasbs-in-synapse-out" }, - { - "task_key": "Syn-in-WB-in-Syn-Out", - "depends_on": [ - { - "task_key": "synapse-in-wasbs-out" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/synapse-wasbs-in-synapse-out" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "wasbs-in-wasbs-out", + "depends_on": [ + { + "task_key": "Syn-in-WB-in-Syn-Out" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/wasbs-in-wasbs-out" }, - { - "task_key": "wasbs-in-wasbs-out", - "depends_on": [ - { - "task_key": "Syn-in-WB-in-Syn-Out" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/wasbs-in-wasbs-out" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "ab-in-ab-out-root", + "depends_on": [ + { + "task_key": "abfss-oauth" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/abfss-in-abfss-out-root" }, - { - "task_key": "ab-in-ab-out-root", - "depends_on": [ - { - "task_key": "abfss-oauth" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/abfss-in-abfss-out-root" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "nested-parent", + "depends_on": [ + { + "task_key": "mnt" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/nested-parent" }, - { - "task_key": "nested-parent", - "depends_on": [ - { - "task_key": "mnt" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/nested-parent" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "intermix-languages", + "depends_on": [ + { + "task_key": "delta-mnt" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/intermix-languages" }, - { - "task_key": "intermix-languages", - "depends_on": [ - { - "task_key": "delta-mnt" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/intermix-languages" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + }, + { + "task_key": "output-with-period", + "depends_on": [ + { + "task_key": "nested-parent" + } + ], + "notebook_task": { + "notebook_path": "/Shared/examples/name-with-periods" }, - { - "task_key": "output-with-period", - "depends_on": [ - { - "task_key": "nested-parent" - } - ], - "notebook_task": { - "notebook_path": "/Shared/examples/name-with-periods" - }, - "existing_cluster_id": "", - "timeout_seconds": 0, - "email_notifications": {} - } - ], - "format": "MULTI_TASK" - } + "existing_cluster_id": "", + "timeout_seconds": 0, + "email_notifications": {} + } + ], + "format": "MULTI_TASK" } \ No newline at end of file diff --git a/tests/integration/jobdefs/spark3-tests-expectations.json b/tests/integration/jobdefs/spark3-tests-expectations.json index 2dca4620..277e0536 100644 --- a/tests/integration/jobdefs/spark3-tests-expectations.json +++ b/tests/integration/jobdefs/spark3-tests-expectations.json @@ -2,31 +2,31 @@ "databricks://.azuredatabricks.net/jobs/", "databricks://.azuredatabricks.net/jobs//tasks/Syn-in-WB-in-Syn-Out", "databricks://.azuredatabricks.net/jobs//tasks/ab-in-ab-out-root", - "databricks://.azuredatabricks.net/jobs//tasks/ab-in-ab-out-root/processes/E74B887E65E059D38DAB51D31A5432D8->F6D2E2554E30B80D0E6901908AADEDBB", + "databricks://.azuredatabricks.net/jobs//tasks/ab-in-ab-out-root/processes/0AFD1EDAC25ECE8253F387E74F28629A->1BB1B95C33EDB21B2D3903B9A8103087", "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-abfss-out", - "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-abfss-out/processes/2796E46D0CCD18971A9C936C1EB97B1E->34BBA1402F1BAE560BFEA804B83FED62", + "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-abfss-out/processes/58C1F24BA6C6FF7592F786C9FA8A3451->BA6B11F82FDCE37E849D25D545E6FB7A", "databricks://.azuredatabricks.net/jobs//tasks/abfss-oauth", - "databricks://.azuredatabricks.net/jobs//tasks/abfss-oauth/processes/56EE0B098A9A3D07DC11F4C6EA9BF71C->E6B1D99B74724B48DAB2BCB79142CB65", + "databricks://.azuredatabricks.net/jobs//tasks/abfss-oauth/processes/BD4A7A895E605BF6C4DE003D3F6B3F39->A3B52DA733083E4642E1C3DB6B093E84", "databricks://.azuredatabricks.net/jobs//tasks/azuresql-in-out", - "databricks://.azuredatabricks.net/jobs//tasks/azuresql-in-out/processes/03CC0799BCA86B4A823AD9B6C9A772A1->1A1EF10BC89D10CA52B3559833DAC1F3", + "databricks://.azuredatabricks.net/jobs//tasks/azuresql-in-out/processes/B95334DF8F53EB63EDBA24AF88CFC7AA->80FC7C28AF3F669752CE8F2DA1987526", "databricks://.azuredatabricks.net/jobs//tasks/delta-abfss", - "databricks://.azuredatabricks.net/jobs//tasks/delta-abfss/processes/EEDA606783A7DD68C6A6C60221608209->0533EFDC2210DD1546DACD3291D14EE9", + "databricks://.azuredatabricks.net/jobs//tasks/delta-abfss/processes/CE0291670068E208B1A9621C1721730D->FD3D635F915390056518ECC38AB07DCC", "databricks://.azuredatabricks.net/jobs//tasks/delta-fs", - "databricks://.azuredatabricks.net/jobs//tasks/delta-fs/processes/C8A21C9CC03564B883DD6A2E4174F9AE->FC6DC149F25D86CE472C77290596DD9F", + "databricks://.azuredatabricks.net/jobs//tasks/delta-fs/processes/F0F4F25C04BAFB0FBFF90BE92709E7E4->9557C9A65FE7A9A7A89B6D9061C55B5A", "databricks://.azuredatabricks.net/jobs//tasks/delta-mnt", - "databricks://.azuredatabricks.net/jobs//tasks/delta-mnt/processes/F33E9424B73DFD1C3B8D0259EB772F87->E443883D4B66E0DBEE76C5331401E533", + "databricks://.azuredatabricks.net/jobs//tasks/delta-mnt/processes/A191E946F919A0717BB4FF2A79221996->3718CE24F8FCB01C633CF37CED45B3FC", "databricks://.azuredatabricks.net/jobs//tasks/intermix-languages", - "databricks://.azuredatabricks.net/jobs//tasks/intermix-languages/processes/837D6375622EA0C277BB0275C5B2E4BE->A950ACA0CBDF8EABD0C758E01B8893B3", + "databricks://.azuredatabricks.net/jobs//tasks/intermix-languages/processes/7D3D5D44FDC1DC865806712E633C5E56->B3CF5624F08EEEDF819869D074FA7774", "databricks://.azuredatabricks.net/jobs//tasks/mnt", - "databricks://.azuredatabricks.net/jobs//tasks/mnt/processes/EAEEF594372A61E0E1B545C0B430E966->ADFAB39F64A04DBD087DC73F8DF4EA47", - "databricks://.azuredatabricks.net/jobs//tasks/output-with-period", - "databricks://.1.azuredatabricks.net/jobs//tasks/output-with-period/processes/4AF18D6C70DDBCA092FC53396B2C908F->F0460570010BB248E2256F0F932A82B8", + "databricks://.azuredatabricks.net/jobs//tasks/mnt/processes/336D6FD3010382DAB8351BFF026B2CBE->C60C4BAB82567905C64B99E2DCBCA711", "databricks://.azuredatabricks.net/jobs//tasks/nested-parent", - "databricks://.azuredatabricks.net/jobs//tasks/nested-parent/processes/1611F7AEE100534D05476B8D8D8096A2->8F13AE1A6297C7B53E82AD0862A258C5", + "databricks://.azuredatabricks.net/jobs//tasks/nested-parent/processes/8514E8FCB25E967BC6DA61D1A48E2CD4->7C40325C08313ADDF8F653CACEAAA8C1", + "databricks://.azuredatabricks.net/jobs//tasks/output-with-period", + "databricks://.azuredatabricks.net/jobs//tasks/output-with-period/processes/8530DB90732944CA2C3C02E4FEE633E2->054707838715BECB4629ECF6B398BF1A", "databricks://.azuredatabricks.net/jobs//tasks/synapse-in-wasbs-out", - "databricks://.azuredatabricks.net/jobs//tasks/synapse-in-wasbs-out/processes/F8D4C5D21F4175BBF75031FA6F5C3C81->367B7F5964BD2C529BD8BF705A32802A", + "databricks://.azuredatabricks.net/jobs//tasks/synapse-in-wasbs-out/processes/FC4F9610428CB3C9FBCB97DF6D2B939D->76AAA8ADC61434F8BFB7C92E6ABF8C85", "databricks://.azuredatabricks.net/jobs//tasks/wasbs-in-wasbs-out", - "databricks://.azuredatabricks.net/jobs//tasks/wasbs-in-wasbs-out/processes/A85A7E9B093C1A818F2C4276C5A9A871->C903E6160BE06DD452AFE1AAD278162C", + "databricks://.azuredatabricks.net/jobs//tasks/wasbs-in-wasbs-out/processes/34DA3FD40AC2F55C125A86039355D6ED->4A56EEA94A2A249B6FA359EC03F43FF7", "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-abfss-out", "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-abfss-out-oauth", "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-abfss-out-root", diff --git a/tests/integration/jobdefs/wasbs-in-wasbs-out-with-param-def.json b/tests/integration/jobdefs/wasbs-in-wasbs-out-with-param-def.json deleted file mode 100644 index e3c32df5..00000000 --- a/tests/integration/jobdefs/wasbs-in-wasbs-out-with-param-def.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "settings": { - "name": "wasbs-in-wasbs-out-with-param", - "email_notifications": { - "no_alert_for_skipped_runs": false - }, - "timeout_seconds": 0, - "max_concurrent_runs": 1, - "tasks": [ - { - "task_key": "wasbs-in-wasbs-out-with-param", - "notebook_task": { - "notebook_path": "/Shared/examples/wasbs-in-wasbs-out-with-param", - "base_parameters": { - "myval": "10" - } - }, - "existing_cluster_id": "0326-140927-mc4qzaj5", - "timeout_seconds": 0, - "email_notifications": {} - } - ], - "format": "MULTI_TASK" - } -} \ No newline at end of file diff --git a/tests/integration/jobdefs/wasbs-in-wasbs-out-with-param-expectations.json b/tests/integration/jobdefs/wasbs-in-wasbs-out-with-param-expectations.json deleted file mode 100644 index a850eb64..00000000 --- a/tests/integration/jobdefs/wasbs-in-wasbs-out-with-param-expectations.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - "databricks://.azuredatabricks.net/jobs//tasks/wasbs-in-wasbs-out-with-param", - "databricks://.azuredatabricks.net/notebooks/Shared/examples/wasbs-in-wasbs-out-with-param", - "databricks://.azuredatabricks.net/jobs//tasks/wasbs-in-wasbs-out-with-param/processes/9E51CA344228BD2C592091F34BCF81B8->D4051C5A34E3E2812E191B59E82CB1B1", - "databricks://.azuredatabricks.net/jobs//tasks/wasbs-in-wasbs-out-with-param/processes/D4051C5A34E3E2812E191B59E82CB1B1->8031DA4E838D99236E94A4CE72C951BC" -] \ No newline at end of file diff --git a/tests/integration/run-test.sh b/tests/integration/run-test.sh index e31bc8ed..002f5cd5 100644 --- a/tests/integration/run-test.sh +++ b/tests/integration/run-test.sh @@ -43,8 +43,8 @@ for fn in `ls ./tests/integration/jobdefs`; do continue fi - # For each file, get the settings.name - job_name=$(cat "$TESTS_DIRECTORY/$fn" | jq -r '.settings.name') + # For each file, get the .name + job_name=$(cat "$TESTS_DIRECTORY/$fn" | jq -r '.name') echo "Preparing to run JobDef:$fn JobName:$job_name JobId:${jobnametoid[$job_name]}" temp_job_id=${jobnametoid[$job_name]} # Get the expectation file diff --git a/tests/integration/spark-apps/jarjobs/abfssInAbfssOut/app/src/main/java/SparkApp/Basic/App.java b/tests/integration/spark-apps/jarjobs/abfssInAbfssOut/app/src/main/java/SparkApp/Basic/App.java index 87b4bec2..68da198c 100644 --- a/tests/integration/spark-apps/jarjobs/abfssInAbfssOut/app/src/main/java/SparkApp/Basic/App.java +++ b/tests/integration/spark-apps/jarjobs/abfssInAbfssOut/app/src/main/java/SparkApp/Basic/App.java @@ -32,7 +32,7 @@ public static void main(String[] args) { System.out.println(new App().getGreeting()); - String storageKey = dbutils.secrets().get("purview-to-adb-scope", "storage-service-key"); + String storageKey = dbutils.secrets().get("purview-to-adb-kv", "storage-service-key"); spark.conf().set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey); diff --git a/tests/integration/spark-apps/notebooks/abfss-in-abfss-out-oauth.scala b/tests/integration/spark-apps/notebooks/abfss-in-abfss-out-oauth.scala index 05702ab7..59d3b7c8 100644 --- a/tests/integration/spark-apps/notebooks/abfss-in-abfss-out-oauth.scala +++ b/tests/integration/spark-apps/notebooks/abfss-in-abfss-out-oauth.scala @@ -11,9 +11,9 @@ val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs spark.conf.set("fs.azure.account.auth.type."+storageServiceName+".dfs.core.windows.net", "OAuth") spark.conf.set("fs.azure.account.oauth.provider.type."+storageServiceName+".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") -spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) -spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-scope", "tenant-id")+"/oauth2/token") +spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) +spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-kv", "tenant-id")+"/oauth2/token") // COMMAND ---------- diff --git a/tests/integration/spark-apps/notebooks/abfss-in-abfss-out-root.scala b/tests/integration/spark-apps/notebooks/abfss-in-abfss-out-root.scala index 7ed5f0ab..6177e48f 100644 --- a/tests/integration/spark-apps/notebooks/abfss-in-abfss-out-root.scala +++ b/tests/integration/spark-apps/notebooks/abfss-in-abfss-out-root.scala @@ -9,7 +9,7 @@ val ouptutContainerName = "writetoroot" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputAbfssRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net/root" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/abfss-in-abfss-out.scala b/tests/integration/spark-apps/notebooks/abfss-in-abfss-out.scala index 82e3738e..04c49a2f 100644 --- a/tests/integration/spark-apps/notebooks/abfss-in-abfss-out.scala +++ b/tests/integration/spark-apps/notebooks/abfss-in-abfss-out.scala @@ -9,7 +9,7 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/abfss-in-hive+notmgd+saveAsTable-out.scala b/tests/integration/spark-apps/notebooks/abfss-in-hive+notmgd+saveAsTable-out.scala index 648fc458..d7ccd8d1 100644 --- a/tests/integration/spark-apps/notebooks/abfss-in-hive+notmgd+saveAsTable-out.scala +++ b/tests/integration/spark-apps/notebooks/abfss-in-hive+notmgd+saveAsTable-out.scala @@ -18,7 +18,7 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/abfss-in-hive+saveAsTable-out.scala b/tests/integration/spark-apps/notebooks/abfss-in-hive+saveAsTable-out.scala index c6d65651..2922297d 100644 --- a/tests/integration/spark-apps/notebooks/abfss-in-hive+saveAsTable-out.scala +++ b/tests/integration/spark-apps/notebooks/abfss-in-hive+saveAsTable-out.scala @@ -22,7 +22,7 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/azuresql-in-azuresql-out.scala b/tests/integration/spark-apps/notebooks/azuresql-in-azuresql-out.scala index a412c0f4..c6bf66fb 100644 --- a/tests/integration/spark-apps/notebooks/azuresql-in-azuresql-out.scala +++ b/tests/integration/spark-apps/notebooks/azuresql-in-azuresql-out.scala @@ -10,12 +10,10 @@ import java.lang.{ClassNotFoundException} // COMMAND ---------- -val server_name = "jdbc:sqlserver://FILL-IN-CONNECTION-STRING" -val database_name = "purview-to-adb-sqldb" -val url = server_name + ";" + "database=" + database_name + ";" +val url = dbutils.secrets.get("purview-to-adb-kv", "azuresql-jdbc-conn-str") -val username = dbutils.secrets.get("purview-to-adb-scope", "azuresql-username") -val password = dbutils.secrets.get("purview-to-adb-scope", "azuresql-password") +val username = dbutils.secrets.get("purview-to-adb-kv", "azuresql-username") +val password = dbutils.secrets.get("purview-to-adb-kv", "azuresql-password") // COMMAND ---------- diff --git a/tests/integration/spark-apps/notebooks/call-via-adf-spark2.scala b/tests/integration/spark-apps/notebooks/call-via-adf-spark2.scala index f2e147cc..58c40395 100644 --- a/tests/integration/spark-apps/notebooks/call-via-adf-spark2.scala +++ b/tests/integration/spark-apps/notebooks/call-via-adf-spark2.scala @@ -9,7 +9,7 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/call-via-adf-spark3.scala b/tests/integration/spark-apps/notebooks/call-via-adf-spark3.scala index f4be3978..c283939f 100644 --- a/tests/integration/spark-apps/notebooks/call-via-adf-spark3.scala +++ b/tests/integration/spark-apps/notebooks/call-via-adf-spark3.scala @@ -9,7 +9,7 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/delta-in-delta-merge.scala b/tests/integration/spark-apps/notebooks/delta-in-delta-merge.scala index 81d88ff4..f2fc4edd 100644 --- a/tests/integration/spark-apps/notebooks/delta-in-delta-merge.scala +++ b/tests/integration/spark-apps/notebooks/delta-in-delta-merge.scala @@ -11,13 +11,13 @@ val storageServiceName = sys.env("STORAGE_SERVICE_NAME") val storageContainerName = "rawdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "example-sa-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.auth.type."+storageServiceName+".dfs.core.windows.net", "OAuth") spark.conf.set("fs.azure.account.oauth.provider.type."+storageServiceName+".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") -spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) -spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-scope", "tenant-id")+"/oauth2/token") +spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) +spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-kv", "tenant-id")+"/oauth2/token") // COMMAND ---------- diff --git a/tests/integration/spark-apps/notebooks/delta-in-delta-out-abfss.scala b/tests/integration/spark-apps/notebooks/delta-in-delta-out-abfss.scala index 03e31de2..69893565 100644 --- a/tests/integration/spark-apps/notebooks/delta-in-delta-out-abfss.scala +++ b/tests/integration/spark-apps/notebooks/delta-in-delta-out-abfss.scala @@ -5,13 +5,13 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "example-sa-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.auth.type."+storageServiceName+".dfs.core.windows.net", "OAuth") spark.conf.set("fs.azure.account.oauth.provider.type."+storageServiceName+".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") -spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) -spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-scope", "tenant-id")+"/oauth2/token") +spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) +spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-kv", "tenant-id")+"/oauth2/token") // COMMAND ---------- diff --git a/tests/integration/spark-apps/notebooks/delta-in-delta-out-fs.scala b/tests/integration/spark-apps/notebooks/delta-in-delta-out-fs.scala index d247e4fb..3d3131a6 100644 --- a/tests/integration/spark-apps/notebooks/delta-in-delta-out-fs.scala +++ b/tests/integration/spark-apps/notebooks/delta-in-delta-out-fs.scala @@ -5,14 +5,14 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "example-sa-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") //spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) spark.conf.set("fs.azure.account.auth.type."+storageServiceName+".dfs.core.windows.net", "OAuth") spark.conf.set("fs.azure.account.oauth.provider.type."+storageServiceName+".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") -spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) -spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-scope", "tenant-id")+"/oauth2/token") +spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) +spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-kv", "tenant-id")+"/oauth2/token") // COMMAND ---------- diff --git a/tests/integration/spark-apps/notebooks/delta-in-delta-out-mnt.scala b/tests/integration/spark-apps/notebooks/delta-in-delta-out-mnt.scala index 487c2e07..1d2f8c2b 100644 --- a/tests/integration/spark-apps/notebooks/delta-in-delta-out-mnt.scala +++ b/tests/integration/spark-apps/notebooks/delta-in-delta-out-mnt.scala @@ -5,14 +5,14 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "example-sa-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") //spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) spark.conf.set("fs.azure.account.auth.type."+storageServiceName+".dfs.core.windows.net", "OAuth") spark.conf.set("fs.azure.account.oauth.provider.type."+storageServiceName+".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") -spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) -spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-scope", "tenant-id")+"/oauth2/token") +spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) +spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-kv", "tenant-id")+"/oauth2/token") // COMMAND ---------- diff --git a/tests/integration/spark-apps/notebooks/hive+abfss-in-hive+abfss-out-insert.py b/tests/integration/spark-apps/notebooks/hive+abfss-in-hive+abfss-out-insert.py index d0ea40df..d95ced10 100644 --- a/tests/integration/spark-apps/notebooks/hive+abfss-in-hive+abfss-out-insert.py +++ b/tests/integration/spark-apps/notebooks/hive+abfss-in-hive+abfss-out-insert.py @@ -7,7 +7,7 @@ abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) spark.conf.set('spark.query.rootPath',abfssRootPath) @@ -15,25 +15,27 @@ # COMMAND ---------- -# MAGIC %sql -# MAGIC CREATE TABLE IF NOT EXISTS default.hiveExampleA001 ( -# MAGIC tableId INT, -# MAGIC x INT -# MAGIC ) -# MAGIC LOCATION 'abfss://rawdata@.dfs.core.windows.net/testcase/twentyone/exampleInputA/' -# MAGIC ; -# MAGIC -# MAGIC CREATE TABLE IF NOT EXISTS default.hiveExampleOutput001( -# MAGIC tableId INT, -# MAGIC x INT -# MAGIC ) -# MAGIC LOCATION 'abfss://rawdata@.dfs.core.windows.net/testcase/twentyone/exampleOutput/' -# MAGIC ; +spark.sql(f""" +CREATE TABLE IF NOT EXISTS default.testSample ( +tableId INT, +x INT +) +LOCATION 'abfss://rawdata@{storageServiceName}.dfs.core.windows.net/testcase/twentyone/exampleInputA/' +; +""" +) # COMMAND ---------- -# %sql -# INSERT INTO default.hiveExampleA001 (tableId, x) VALUES(1,2) +spark.sql(f""" +CREATE TABLE IF NOT EXISTS default.hiveExampleOutput001 ( +tableId INT, +x INT +) +LOCATION 'abfss://rawdata@{storageServiceName}.dfs.core.windows.net/testcase/twentyone/exampleOutput/' +; +""" +) # COMMAND ---------- @@ -44,12 +46,4 @@ # COMMAND ---------- -spark.read.table("default.hiveExampleOutput001").inputFiles() - -# COMMAND ---------- - -dbutils.fs.ls("abfss://rawdata@.dfs.core.windows.net/testcase/twentyone/exampleInputA/") - -# COMMAND ---------- - diff --git a/tests/integration/spark-apps/notebooks/hive+mgd+not+default-in-hive+mgd+not+default-out-insert.py b/tests/integration/spark-apps/notebooks/hive+mgd+not+default-in-hive+mgd+not+default-out-insert.py index 3bd5be94..18a99c84 100644 --- a/tests/integration/spark-apps/notebooks/hive+mgd+not+default-in-hive+mgd+not+default-out-insert.py +++ b/tests/integration/spark-apps/notebooks/hive+mgd+not+default-in-hive+mgd+not+default-out-insert.py @@ -1,28 +1,24 @@ # Databricks notebook source -# %sql -# CREATE DATABASE IF NOT EXISTS notdefault; +# MAGIC %sql +# MAGIC CREATE DATABASE IF NOT EXISTS notdefault; # COMMAND ---------- -# %sql -# CREATE TABLE IF NOT EXISTS notdefault.hiveExampleA ( -# tableId INT, -# x INT -# ); - -# CREATE TABLE notdefault.hiveExampleOutput( -# tableId INT, -# x INT -# ) - -# COMMAND ---------- +# MAGIC %sql +# MAGIC CREATE TABLE IF NOT EXISTS notdefault.hiveExampleA ( +# MAGIC tableId INT, +# MAGIC x INT +# MAGIC ); -# %sql -# INSERT INTO notdefault.hiveExampleA (tableId, x) VALUES(1,2) +# MAGIC CREATE TABLE IF NOT EXISTS notdefault.hiveExampleOutput( +# MAGIC tableId INT, +# MAGIC x INT +# MAGIC ) # COMMAND ---------- -spark.sparkContext.setLogLevel("DEBUG") +# MAGIC %sql +# MAGIC INSERT INTO notdefault.hiveExampleA (tableId, x) VALUES(1,2) # COMMAND ---------- @@ -32,10 +28,3 @@ # MAGIC FROM notdefault.hiveExampleA # COMMAND ---------- - -# MAGIC %md -# MAGIC # Exploring the File Path - -# COMMAND ---------- - -# dbutils.fs.ls("/user/hive/warehouse/notdefault.db/hiveexamplea") diff --git a/tests/integration/spark-apps/notebooks/hive-in-hive-out-insert.py b/tests/integration/spark-apps/notebooks/hive-in-hive-out-insert.py index 914a4c09..e68d33b5 100644 --- a/tests/integration/spark-apps/notebooks/hive-in-hive-out-insert.py +++ b/tests/integration/spark-apps/notebooks/hive-in-hive-out-insert.py @@ -1,19 +1,14 @@ -# Datricks notebook source -# %sql -# CREATE TABLE IF NOT EXISTS default.hiveExampleA000 ( -# tableId INT, -# x INT -# ); - -# CREATE TABLE default.hiveExampleOutput000( -# tableId INT, -# x INT -# ) - -# COMMAND ---------- - -# %sql -# INSERT INTO default.hiveExampleA000 (tableId, x) VALUES(1,2) +# Databricks notebook source +# MAGIC %sql +# MAGIC CREATE TABLE IF NOT EXISTS default.hiveExampleA000 ( +# MAGIC tableId INT, +# MAGIC x INT +# MAGIC ); +# MAGIC +# MAGIC CREATE TABLE IF NOT EXISTS default.hiveExampleOutput000( +# MAGIC tableId INT, +# MAGIC x INT +# MAGIC ) # COMMAND ---------- diff --git a/tests/integration/spark-apps/notebooks/intermix-languages.scala b/tests/integration/spark-apps/notebooks/intermix-languages.scala index dee27287..83d51cf3 100644 --- a/tests/integration/spark-apps/notebooks/intermix-languages.scala +++ b/tests/integration/spark-apps/notebooks/intermix-languages.scala @@ -9,14 +9,15 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) // COMMAND ---------- // MAGIC %python -// MAGIC storageServiceName = sys.env("STORAGE_SERVICE_NAME") +// MAGIC import os +// MAGIC storageServiceName = os.environ.get("STORAGE_SERVICE_NAME") // MAGIC storageContainerName = "rawdata" // MAGIC ouptutContainerName = "outputdata" // MAGIC abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" diff --git a/tests/integration/spark-apps/notebooks/mnt-in-mnt-out.scala b/tests/integration/spark-apps/notebooks/mnt-in-mnt-out.scala index 24852880..92ed5d82 100644 --- a/tests/integration/spark-apps/notebooks/mnt-in-mnt-out.scala +++ b/tests/integration/spark-apps/notebooks/mnt-in-mnt-out.scala @@ -7,7 +7,7 @@ val storageServiceName = sys.env("STORAGE_SERVICE_NAME") val storageContainerName = "rawdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "example-sa-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/name-with-periods.scala b/tests/integration/spark-apps/notebooks/name-with-periods.scala index f26aced9..151b3d07 100644 --- a/tests/integration/spark-apps/notebooks/name-with-periods.scala +++ b/tests/integration/spark-apps/notebooks/name-with-periods.scala @@ -9,7 +9,7 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/nested-child.scala b/tests/integration/spark-apps/notebooks/nested-child.scala index 5b98bbe5..3cd2b5f0 100644 --- a/tests/integration/spark-apps/notebooks/nested-child.scala +++ b/tests/integration/spark-apps/notebooks/nested-child.scala @@ -9,7 +9,7 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/spark-sql-table-in-abfss-out.scala b/tests/integration/spark-apps/notebooks/spark-sql-table-in-abfss-out.scala index 74f2f8fd..aa4fc639 100644 --- a/tests/integration/spark-apps/notebooks/spark-sql-table-in-abfss-out.scala +++ b/tests/integration/spark-apps/notebooks/spark-sql-table-in-abfss-out.scala @@ -46,7 +46,7 @@ val ouptutContainerName = "outputdata" val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "example-sa-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/synapse-in-synapse-out.scala b/tests/integration/spark-apps/notebooks/synapse-in-synapse-out.scala index cfc10dc8..c5e98921 100644 --- a/tests/integration/spark-apps/notebooks/synapse-in-synapse-out.scala +++ b/tests/integration/spark-apps/notebooks/synapse-in-synapse-out.scala @@ -1,33 +1,33 @@ // Databricks notebook source //Defining the service principal credentials for the Azure storage account -val tenantid = dbutils.secrets.get("purview-to-adb-scope", "tenant-id") +val tenantid = dbutils.secrets.get("purview-to-adb-kv", "tenant-id") val synapseStorageAccount = sys.env("SYNAPSE_STORAGE_SERVICE_NAME") spark.conf.set("fs.azure.account.auth.type", "OAuth") spark.conf.set("fs.azure.account.oauth.provider.type", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") -spark.conf.set("fs.azure.account.oauth2.client.id", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("fs.azure.account.oauth2.client.secret", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) +spark.conf.set("fs.azure.account.oauth2.client.id", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("fs.azure.account.oauth2.client.secret", dbutils.secrets.get("purview-to-adb-kv", "project-spn-secret")) spark.conf.set("fs.azure.account.oauth2.client.endpoint", "https://login.microsoftonline.com/" + tenantid + "/oauth2/token") //# Defining a separate set of service principal credentials for Azure Synapse Analytics (If not defined, the connector will use the Azure storage account credentials) -spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.id", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.secret", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) -spark.conf.set("fs.azure.account.key."+synapseStorageAccount+".blob.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "synapse-storage-key")) +spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.id", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.secret", dbutils.secrets.get("purview-to-adb-kv", "project-spn-secret")) +spark.conf.set("fs.azure.account.key."+synapseStorageAccount+".blob.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "synapse-storage-key")) // COMMAND ---------- //Azure Synapse related settings -val dwDatabase = "SQLPool1" +val dwDatabase = "sqlpool1" val dwServer = sys.env("SYNAPSE_SERVICE_NAME")+".sql.azuresynapse.net" -val dwUser = dbutils.secrets.get("purview-to-adb-scope", "synapse-query-username") -val dwPass = dbutils.secrets.get("purview-to-adb-scope", "synapse-query-password") +val dwUser = dbutils.secrets.get("purview-to-adb-kv", "synapse-query-username") +val dwPass = dbutils.secrets.get("purview-to-adb-kv", "synapse-query-password") val dwJdbcPort = "1433" val dwJdbcExtraOptions = "encrypt=true;trustServerCertificate=true;hostNameInCertificate=*.database.windows.net;loginTimeout=30;" val sqlDwUrl = "jdbc:sqlserver://" + dwServer + ":" + dwJdbcPort + ";database=" + dwDatabase + ";user=" + dwUser+";password=" + dwPass + ";" + dwJdbcExtraOptions val blobStorage = synapseStorageAccount+".blob.core.windows.net" val blobContainer = "temp" -val blobAccessKey = dbutils.secrets.get("purview-to-adb-scope", "synapse-storage-key") +val blobAccessKey = dbutils.secrets.get("purview-to-adb-kv", "synapse-storage-key") val tempDir = "wasbs://" + blobContainer + "@" + blobStorage +"/tempfolder" // COMMAND ---------- diff --git a/tests/integration/spark-apps/notebooks/synapse-in-wasbs-out.scala b/tests/integration/spark-apps/notebooks/synapse-in-wasbs-out.scala index 90e0dc44..34ced24a 100644 --- a/tests/integration/spark-apps/notebooks/synapse-in-wasbs-out.scala +++ b/tests/integration/spark-apps/notebooks/synapse-in-wasbs-out.scala @@ -1,40 +1,40 @@ // Databricks notebook source //Defining the service principal credentials for the Azure storage account -val tenantid = dbutils.secrets.get("purview-to-adb-scope", "tenant-id") +val tenantid = dbutils.secrets.get("purview-to-adb-kv", "tenant-id") val synapseStorageAccount = sys.env("SYNAPSE_STORAGE_SERVICE_NAME") spark.conf.set("fs.azure.account.auth.type", "OAuth") spark.conf.set("fs.azure.account.oauth.provider.type", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") -spark.conf.set("fs.azure.account.oauth2.client.id", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("fs.azure.account.oauth2.client.secret", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) +spark.conf.set("fs.azure.account.oauth2.client.id", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("fs.azure.account.oauth2.client.secret", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) spark.conf.set("fs.azure.account.oauth2.client.endpoint", "https://login.microsoftonline.com/" + tenantid + "/oauth2/token") //# Defining a separate set of service principal credentials for Azure Synapse Analytics (If not defined, the connector will use the Azure storage account credentials) -spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.id", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.secret", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) -spark.conf.set("fs.azure.account.key."+synapseStorageAccount+".blob.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "synapse-storage-key")) +spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.id", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.secret", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) +spark.conf.set("fs.azure.account.key."+synapseStorageAccount+".blob.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "synapse-storage-key")) // COMMAND ---------- //Azure Synapse related settings -val dwDatabase = "SQLPool1" +val dwDatabase = "sqlpool1" val dwServer = sys.env("SYNAPSE_SERVICE_NAME")+".sql.azuresynapse.net" -val dwUser = dbutils.secrets.get("purview-to-adb-scope", "synapse-query-username") -val dwPass = dbutils.secrets.get("purview-to-adb-scope", "synapse-query-password") +val dwUser = dbutils.secrets.get("purview-to-adb-kv", "synapse-query-username") +val dwPass = dbutils.secrets.get("purview-to-adb-kv", "synapse-query-password") val dwJdbcPort = "1433" val dwJdbcExtraOptions = "encrypt=true;trustServerCertificate=true;hostNameInCertificate=*.database.windows.net;loginTimeout=30;" val sqlDwUrl = "jdbc:sqlserver://" + dwServer + ":" + dwJdbcPort + ";database=" + dwDatabase + ";user=" + dwUser+";password=" + dwPass + ";" + dwJdbcExtraOptions val blobStorage = synapseStorageAccount+".blob.core.windows.net" val blobContainer = "temp" -val blobAccessKey = dbutils.secrets.get("purview-to-adb-scope", "synapse-storage-key") +val blobAccessKey = dbutils.secrets.get("purview-to-adb-kv", "synapse-storage-key") val tempDir = "wasbs://" + blobContainer + "@" + blobStorage +"/tempfolder" val storageServiceName = sys.env("STORAGE_SERVICE_NAME") val storageContainerName = "outputdata" val wasbsRootPath = "wasbs://"+storageContainerName+"@"+storageServiceName+".blob.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "example-sa-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".blob.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/notebooks/synapse-wasbs-in-synapse-out.scala b/tests/integration/spark-apps/notebooks/synapse-wasbs-in-synapse-out.scala index 4c7205e9..643b5bf8 100644 --- a/tests/integration/spark-apps/notebooks/synapse-wasbs-in-synapse-out.scala +++ b/tests/integration/spark-apps/notebooks/synapse-wasbs-in-synapse-out.scala @@ -2,42 +2,42 @@ import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} //Defining the service principal credentials for the Azure storage account -val tenantid = dbutils.secrets.get("purview-to-adb-scope", "tenant-id") +val tenantid = dbutils.secrets.get("purview-to-adb-kv", "tenant-id") val synapseStorageAccount = sys.env("SYNAPSE_STORAGE_SERVICE_NAME") spark.conf.set("fs.azure.account.auth.type", "OAuth") spark.conf.set("fs.azure.account.oauth.provider.type", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") -spark.conf.set("fs.azure.account.oauth2.client.id", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("fs.azure.account.oauth2.client.secret", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) +spark.conf.set("fs.azure.account.oauth2.client.id", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("fs.azure.account.oauth2.client.secret", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) spark.conf.set("fs.azure.account.oauth2.client.endpoint", "https://login.microsoftonline.com/" + tenantid + "/oauth2/token") //# Defining a separate set of service principal credentials for Azure Synapse Analytics (If not defined, the connector will use the Azure storage account credentials) -spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.id", dbutils.secrets.get("purview-to-adb-scope", "project-spn-client-id")) -spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.secret", dbutils.secrets.get("purview-to-adb-scope", "project-spn-secret")) -spark.conf.set("fs.azure.account.key."+synapseStorageAccount+".blob.core.windows.net", dbutils.secrets.get("purview-to-adb-scope", "synapse-storage-key")) +spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.id", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) +spark.conf.set("spark.databricks.sqldw.jdbc.service.principal.client.secret", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) +spark.conf.set("fs.azure.account.key."+synapseStorageAccount+".blob.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "synapse-storage-key")) val storageServiceName = sys.env("STORAGE_SERVICE_NAME") val storageContainerName = "rawdata" val wasbsRootPath = "wasbs://"+storageContainerName+"@"+storageServiceName+".blob.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "example-sa-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".blob.core.windows.net", storageKey) // COMMAND ---------- //Azure Synapse related settings -val dwDatabase = "SQLPool1" +val dwDatabase = "sqlpool1" val dwServer = sys.env("SYNAPSE_SERVICE_NAME")+".sql.azuresynapse.net" -val dwUser = dbutils.secrets.get("purview-to-adb-scope", "synapse-query-username") -val dwPass = dbutils.secrets.get("purview-to-adb-scope", "synapse-query-password") +val dwUser = dbutils.secrets.get("purview-to-adb-kv", "synapse-query-username") +val dwPass = dbutils.secrets.get("purview-to-adb-kv", "synapse-query-password") val dwJdbcPort = "1433" val dwJdbcExtraOptions = "encrypt=true;trustServerCertificate=true;hostNameInCertificate=*.database.windows.net;loginTimeout=30;" val sqlDwUrl = "jdbc:sqlserver://" + dwServer + ":" + dwJdbcPort + ";database=" + dwDatabase + ";user=" + dwUser+";password=" + dwPass + ";" + dwJdbcExtraOptions val blobStorage = synapseStorageAccount+".blob.core.windows.net" val blobContainer = "temp" -val blobAccessKey = dbutils.secrets.get("purview-to-adb-scope", "synapse-storage-key") +val blobAccessKey = dbutils.secrets.get("purview-to-adb-kv", "synapse-storage-key") val tempDir = "wasbs://" + blobContainer + "@" + blobStorage +"/tempfolder" // COMMAND ---------- diff --git a/tests/integration/spark-apps/notebooks/wasbs-in-wasbs-out-with-param.py b/tests/integration/spark-apps/notebooks/wasbs-in-wasbs-out-with-param.py deleted file mode 100644 index 8dcb9a73..00000000 --- a/tests/integration/spark-apps/notebooks/wasbs-in-wasbs-out-with-param.py +++ /dev/null @@ -1,39 +0,0 @@ -# Databricks notebook source -# MAGIC %md -# MAGIC # Sample Databricks Lineage Extraction witrh param - -# COMMAND ---------- - -myval = dbutils.widgets.text('mayval','') -print(myval) - -# COMMAND ---------- - -key = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") - -spark.conf.set( - "fs.azure.account.key..blob.core.windows.net", - key) - -# COMMAND ---------- - -retail = ( - spark.read.csv("wasbs://rawdata@.blob.core.windows.net/retail/", inferSchema=True, header=True) - .withColumnRenamed('Customer ID', 'CustomerId' ) - .drop("Invoice") -) -retail.write.mode("overwrite").parquet("wasbs://outputdata@.blob.core.windows.net/retail/wasbdemo") - -# COMMAND ---------- - -display(retail.take(2)) - -# COMMAND ---------- - -retail2 = spark.read.parquet("wasbs://outputdata@.blob.core.windows.net/retail/wasbdemo") -retail2 = retail2.withColumnRenamed('Quantity', 'QuantitySold').drop('Country') -retail2.write.mode("overwrite").parquet("wasbs://outputdata@.blob.core.windows.net/retail/wasbdemo_updated") - -# COMMAND ---------- - -# display(retail2.take(2)) diff --git a/tests/integration/spark-apps/notebooks/wasbs-in-wasbs-out.scala b/tests/integration/spark-apps/notebooks/wasbs-in-wasbs-out.scala index f369f99a..4368da8d 100644 --- a/tests/integration/spark-apps/notebooks/wasbs-in-wasbs-out.scala +++ b/tests/integration/spark-apps/notebooks/wasbs-in-wasbs-out.scala @@ -7,7 +7,7 @@ val storageServiceName = sys.env("STORAGE_SERVICE_NAME") val storageContainerName = "rawdata" val wasbsRootPath = "wasbs://"+storageContainerName+"@"+storageServiceName+".blob.core.windows.net" -val storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") +val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".blob.core.windows.net", storageKey) @@ -22,7 +22,7 @@ val exampleA = ( spark.read.format("csv") .schema(exampleASchema) .option("header", true) - .load(wasbsRootPath+"/examples/data/csv/exampleInputA/exampleInputA.csv") + .load(wasbsRootPath+"/testcase/wasinwasout/exampleInputA/") ) @@ -35,14 +35,14 @@ val exampleB = ( spark.read.format("csv") .schema(exampleBSchema) .option("header", true) - .load(wasbsRootPath+"/examples/data/csv/exampleInputB/exampleInputB.csv") + .load(wasbsRootPath+"/testcase/wasinwasout/exampleInputB/") ) // COMMAND ---------- val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) -outputDf.repartition(1).write.mode("overwrite").format("csv").save(wasbsRootPath+"/examples/data/csv/exampleOutputWASBS/") +outputDf.repartition(1).write.mode("overwrite").format("csv").save(wasbsRootPath+"/testcase/wasinwasout/exampleOutputWASBS/") // COMMAND ---------- diff --git a/tests/integration/spark-apps/pythonscript/pythonscript.json b/tests/integration/spark-apps/pythonscript/pythonscript.json index 37325323..a58a07be 100644 --- a/tests/integration/spark-apps/pythonscript/pythonscript.json +++ b/tests/integration/spark-apps/pythonscript/pythonscript.json @@ -4,7 +4,7 @@ "num_workers": 1, "spark_version": "9.1.x-scala2.12", "spark_conf": { - "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-scope/Ol-Output-Api-Key}}", + "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-kv/Ol-Output-Api-Key}}", "spark.openlineage.host": "https://YOURFUNCTION.azurewebsites.net", "spark.openlineage.namespace": "adb-123.1#ABC123", "spark.openlineage.version": "1" diff --git a/tests/integration/spark-apps/sparksubmit/sparksubmit.json b/tests/integration/spark-apps/sparksubmit/sparksubmit.json index 3fd25b5d..d62c1b6b 100644 --- a/tests/integration/spark-apps/sparksubmit/sparksubmit.json +++ b/tests/integration/spark-apps/sparksubmit/sparksubmit.json @@ -4,7 +4,7 @@ "num_workers": 1, "spark_version": "9.1.x-scala2.12", "spark_conf": { - "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-scope/Ol-Output-Api-Key}}", + "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-kv/Ol-Output-Api-Key}}", "spark.openlineage.host": "https://YOURFUNCTION.azurewebsites.net", "spark.openlineage.namespace": "YOURNAMESPACE#JOBNAME", "spark.openlineage.version": "1" diff --git a/tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/abfssintest/main.py b/tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/abfssintest/main.py index e36cf252..32486a4e 100644 --- a/tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/abfssintest/main.py +++ b/tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/abfssintest/main.py @@ -17,7 +17,7 @@ def runapp(): abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" - storageKey = dbutils.secrets.get("purview-to-adb-scope", "storage-service-key") + storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) diff --git a/tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/db-job-def.json b/tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/db-job-def.json deleted file mode 100644 index e89c7c1c..00000000 --- a/tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/db-job-def.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "settings": { - "existing_cluster_id": "TEST-CLUSTER-ID", - "libraries": [ - { - "whl": "dbfs:/wheels/abfssintest-0.0.3-py3-none-any.whl" - } - ], - "python_wheel_task": { - "packageName": "abfssintest", - "entryPoint": "runapp" - }, - "timeout_seconds": 0, - "email_notifications": {}, - "name": "WheelJob", - "max_concurrent_runs": 1 - } -} \ No newline at end of file