diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl index c4e88965..dd6b6d64 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl @@ -59,7 +59,7 @@ jobs: # Update databricks.yml - script: | - echo -e " {{ .input_staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .input_prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .input_test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)/databricks.yml" + echo -e " staging:\n variables:\n catalog_name: {{ .input_staging_catalog_name }}\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n prod:\n variables:\n catalog_name: {{ .input_prod_catalog_name }}\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n test:\n variables:\n catalog_name: {{ .input_test_catalog_name }}\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)\databricks.yml" displayName: 'Update databricks.yml' # Initialize CICD Bundle diff --git a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl index 1dba276b..84513181 100644 --- a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl @@ -53,7 +53,7 @@ jobs: - name: Update databricks.yml id: update run: | - echo -e " {{ .input_staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .input_prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .input_test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$PROJECT_NAME_ALPHA/databricks.yml" + echo -e " staging:\n variables:\n catalog_name: {{ .input_staging_catalog_name }}\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n prod:\n variables:\n catalog_name: {{ .input_prod_catalog_name }}\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n test:\n variables:\n catalog_name: {{ .input_test_catalog_name }}\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)\databricks.yml" - name: Initialize Bundle id: initialize run: | diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl index 4543a637..f6402b95 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl @@ -55,18 +55,18 @@ jobs: - name: Validate Bundle For Test Deployment Target in Staging Workspace id: validate run: | - databricks bundle validate -t {{ .input_test_catalog_name }} + databricks bundle validate -t test - name: Deploy Bundle to Test Deployment Target in Staging Workspace id: deploy run: | - databricks bundle deploy -t {{ .input_test_catalog_name }} + databricks bundle deploy -t test {{- if (eq .input_include_feature_store `yes`) }} - name: Run Feature Engineering Workflow for Test Deployment Target in Staging Workspace id: feature_engineering run: | - databricks bundle run write_feature_table_job -t {{ .input_test_catalog_name }} + databricks bundle run write_feature_table_job -t test {{- end }} - name: Run Training Workflow for Test Deployment Target in Staging Workspace id: training run: | - databricks bundle run model_training_job -t {{ .input_test_catalog_name }} + databricks bundle run model_training_job -t test diff --git a/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-cd-prod.yml.tmpl b/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-cd-prod.yml.tmpl index 51a67e3a..a729e654 100644 --- a/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-cd-prod.yml.tmpl +++ b/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-cd-prod.yml.tmpl @@ -9,7 +9,7 @@ deploy-production: DATABRICKS_CLIENT_SECRET: "${SP_CLIENT_SECRET}" script: - cd {{template `project_name_alphanumeric_underscore` .}} - - databricks bundle validate -t {{ .input_prod_catalog_name }} - - databricks bundle deploy -t {{ .input_prod_catalog_name }} + - databricks bundle validate -t prod + - databricks bundle deploy -t prod rules: - when: on_success # mandatory to ensure this job can be called by parent on merge request diff --git a/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-cd-staging.yml.tmpl b/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-cd-staging.yml.tmpl index 72c1222b..de3f3832 100644 --- a/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-cd-staging.yml.tmpl +++ b/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-cd-staging.yml.tmpl @@ -9,7 +9,7 @@ deploy-stage: DATABRICKS_CLIENT_SECRET: "${SP_CLIENT_SECRET}" script: - cd {{template `project_name_alphanumeric_underscore` .}} - - databricks bundle validate -t {{ .input_staging_catalog_name }} - - databricks bundle deploy -t {{ .input_staging_catalog_name }} + - databricks bundle validate -t staging + - databricks bundle deploy -t staging rules: - when: on_success # mandatory to ensure this job can be called by parent on merge request diff --git a/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-ci.yml.tmpl b/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-ci.yml.tmpl index 169bd993..81859eb2 100644 --- a/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.gitlab/pipelines/{{.input_project_name}}-bundle-ci.yml.tmpl @@ -20,9 +20,9 @@ integration-test: DATABRICKS_CLIENT_SECRET: "${SP_CLIENT_SECRET}" script: - cd {{template `project_name_alphanumeric_underscore` .}} - - databricks bundle validate -t {{ .input_test_catalog_name }} - - databricks bundle deploy -t {{ .input_test_catalog_name }} - - databricks bundle run write_feature_table_job -t {{ .input_test_catalog_name }} - - databricks bundle run model_training_job -t {{ .input_test_catalog_name }} + - databricks bundle validate -t test + - databricks bundle deploy -t test + - databricks bundle run write_feature_table_job -t test + - databricks bundle run model_training_job -t test rules: - when: on_success # mandatory to ensure this job can be called by parent cicd on merge request diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl index aa2955aa..1816090b 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl @@ -13,7 +13,9 @@ variables: description: Model name for the model training. {{ if (eq .input_include_models_in_unity_catalog `no`) }}default: ${bundle.target}-{{template `model_name` .}} {{- else -}}default: {{template `model_name` .}}{{end}} - + catalog_name: + description: The catalog name to save the trained model + include: # Resources folder contains ML artifact resources for the ML project that defines model and experiment # And workflows resources for the ML project including model training -> validation -> deployment, @@ -38,15 +40,21 @@ targets: {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} - {{ .input_staging_catalog_name }}: + staging: + variables: + catalog_name: {{ .input_staging_catalog_name }} workspace: host: {{template `databricks_staging_workspace_host` .}} - {{ .input_prod_catalog_name }}: + prod: + variables: + catalog_name: {{ .input_prod_catalog_name }} workspace: host: {{template `databricks_prod_workspace_host` .}} - {{ .input_test_catalog_name }}: + test: + variables: + catalog_name: {{ .input_test_catalog_name }} workspace: host: {{template `databricks_staging_workspace_host` .}} {{ end }} \ No newline at end of file diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl index e82f35c7..e11b6306 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl @@ -27,9 +27,9 @@ resources: {{- else -}}input_table_name: hive_metastore.default.taxi_scoring_sample_feature_store_inference_input{{ end }} {{- else -}}input_table_name: taxi_scoring_sample # TODO: create input table for inference{{ end }} {{ if (eq .input_include_models_in_unity_catalog `no`) }}output_table_name: ${bundle.target}_{{template `project_name_alphanumeric_underscore` .}}_predictions - {{- else -}}output_table_name: ${bundle.target}.{{ .input_schema_name }}.predictions{{ end }} + {{- else -}}output_table_name: ${var.catalog_name}.{{ .input_schema_name }}.predictions{{ end }} {{ if (eq .input_include_models_in_unity_catalog `no`) }}model_name: ${var.model_name} - {{- else -}}model_name: ${bundle.target}.{{ .input_schema_name }}.${var.model_name}{{ end }} + {{- else -}}model_name: ${var.catalog_name}.{{ .input_schema_name }}.${var.model_name}{{ end }} # git source information of current ML resource deployment. It will be persisted as part of the workflow run git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl index 40555df4..8818181e 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl @@ -32,7 +32,7 @@ resources: input_end_date: "" timestamp_column: tpep_pickup_datetime {{ if (eq .input_include_models_in_unity_catalog `no`) }}output_table_name: feature_store_taxi_example.${bundle.target}_{{template `project_name_alphanumeric_underscore` .}}_trip_pickup_features - {{- else -}}output_table_name: ${bundle.target}.{{ .input_schema_name }}.trip_pickup_features{{ end }} + {{- else -}}output_table_name: ${var.catalog_name}.{{ .input_schema_name }}.trip_pickup_features{{ end }} features_transform_module: pickup_features primary_keys: zip # git source information of current ML resource deployment. It will be persisted as part of the workflow run @@ -48,8 +48,8 @@ resources: input_start_date: "" input_end_date: "" timestamp_column: tpep_dropoff_datetime - {{ if (eq .input_include_models_in_unity_catalog `no`) }}output_table_name: feature_store_taxi_example.${bundle.target}_{{template `project_name_alphanumeric_underscore` .}}_trip_dropoff_features - {{- else -}}output_table_name: ${bundle.target}.{{ .input_schema_name }}.trip_dropoff_features{{ end }} + {{ if (eq .input_include_models_in_unity_catalog `no`) }}output_table_name: feature_store_taxi_example.${var.catalog_name}_{{template `project_name_alphanumeric_underscore` .}}_trip_dropoff_features + {{- else -}}output_table_name: ${var.catalog_name}.{{ .input_schema_name }}.trip_dropoff_features{{ end }} features_transform_module: dropoff_features primary_keys: zip # git source information of current ML resource deployment. It will be persisted as part of the workflow run diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl index 1217dd79..64ceba20 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl @@ -22,7 +22,7 @@ resources: registered_models: model: name: ${var.model_name} - catalog_name: ${bundle.target} + catalog_name: ${var.catalog_name} schema_name: {{ .input_schema_name }} comment: Registered model in Unity Catalog for the "{{ .input_project_name }}" ML Project for ${bundle.target} deployment target. <<: *grants{{end}} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl index ca10e52b..3c063b75 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl @@ -30,7 +30,7 @@ resources: training_data_path: /databricks-datasets/nyctaxi-with-zipcodes/subsampled experiment_name: ${var.experiment_name} {{ if (eq .input_include_models_in_unity_catalog `no`) }}model_name: ${var.model_name} - {{- else -}}model_name: ${bundle.target}.{{ .input_schema_name }}.${var.model_name}{{ end }} + {{- else -}}model_name: ${var.catalog_name}.{{ .input_schema_name }}.${var.model_name}{{ end }} # git source information of current ML resource deployment. It will be persisted as part of the workflow run git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} {{ else if (eq .input_include_feature_store `yes`) }}notebook_task: @@ -41,11 +41,11 @@ resources: training_data_path: /databricks-datasets/nyctaxi-with-zipcodes/subsampled experiment_name: ${var.experiment_name} {{ if (eq .input_include_models_in_unity_catalog `no`) }}model_name: ${var.model_name} - {{- else -}}model_name: ${bundle.target}.{{ .input_schema_name }}.${var.model_name}{{ end }} + {{- else -}}model_name: ${var.catalog_name}.{{ .input_schema_name }}.${var.model_name}{{ end }} {{ if (eq .input_include_models_in_unity_catalog `no`) }}pickup_features_table: feature_store_taxi_example.${bundle.target}_{{template `project_name_alphanumeric_underscore` .}}_trip_pickup_features - {{- else -}}pickup_features_table: ${bundle.target}.{{ .input_schema_name }}.trip_pickup_features{{ end }} + {{- else -}}pickup_features_table: ${var.catalog_name}.{{ .input_schema_name }}.trip_pickup_features{{ end }} {{ if (eq .input_include_models_in_unity_catalog `no`) }}dropoff_features_table: feature_store_taxi_example.${bundle.target}_{{template `project_name_alphanumeric_underscore` .}}_trip_dropoff_features - {{- else -}}dropoff_features_table: ${bundle.target}.{{ .input_schema_name }}.trip_dropoff_features{{ end }} + {{- else -}}dropoff_features_table: ${var.catalog_name}.{{ .input_schema_name }}.trip_dropoff_features{{ end }} # git source information of current ML resource deployment. It will be persisted as part of the workflow run git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} {{- else -}}notebook_task: