diff --git a/dataeng/jobs/analytics/DetectNewDBTManualModelsFields.groovy b/dataeng/jobs/analytics/DetectNewDBTManualModelsFields.groovy new file mode 100644 index 000000000..a0414d718 --- /dev/null +++ b/dataeng/jobs/analytics/DetectNewDBTManualModelsFields.groovy @@ -0,0 +1,59 @@ +package analytics + +import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator +import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers +import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers +import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm +import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters + + +class DetectNewDBTManualModelsFields { + public static def job = { dslFactory, allVars -> + dslFactory.job("detect-new-dbt-manual-models-fields"){ + description("This job detects new columns in tables in raw schemas that have yet to be manually added to safe schema models.") + // Set a definite log rotation, if defined. + logRotator common_log_rotator(allVars) + // Set the analytics-secure parameters for repo and branch from the common helpers + parameters secure_scm_parameters(allVars) + // Add the analytics-tools parameters for repo and branch information + parameters { + stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') + stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.') + stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') + } + // Set the Snowflake authentication information as environment variables + environmentVariables { + env('KEY_PATH', allVars.get('KEY_PATH')) + env('PASSPHRASE_PATH', allVars.get('PASSPHRASE_PATH')) + env('USER', allVars.get('USER')) + env('ACCOUNT', allVars.get('ACCOUNT')) + } + // Set the trigger using cron + triggers common_triggers(allVars) + // SCM settings for analytics-secure and analytics-tools + multiscm secure_scm(allVars) << { + git { + remote { + url('$ANALYTICS_TOOLS_URL') + branch('$ANALYTICS_TOOLS_BRANCH') + credentials('1') + } + extensions { + relativeTargetDirectory('analytics-tools') + pruneBranches() + cleanAfterCheckout() + } + } + } + wrappers { + timestamps() + colorizeOutput('xterm') + } + // Notifications on build failures + publishers common_publishers(allVars) + steps { + shell(dslFactory.readFileFromWorkspace('dataeng/resources/detect-new-dbt-manual-models-fields.sh')) + } + } + } +} diff --git a/dataeng/jobs/createJobsNew.groovy b/dataeng/jobs/createJobsNew.groovy index a52168894..5139c03ee 100644 --- a/dataeng/jobs/createJobsNew.groovy +++ b/dataeng/jobs/createJobsNew.groovy @@ -2,6 +2,7 @@ import static analytics.DBTDocs.job as DBTDocsJob import static analytics.DBTRun.job as DBTRunJob import static analytics.DBTSourceFreshness.job as DBTSourceFreshnessJob import static analytics.DeployCluster.job as DeployClusterJob +import static analytics.DetectNewDBTManualModelsFields.job as DetectNewDBTManualModelsFields import static analytics.EmrCostReporter.job as EmrCostReporterJob import static analytics.ModelTransfers.job as ModelTransfersJob import static analytics.RetirementJobEdxTriggers.job as RetirementJobEdxTriggersJob @@ -45,6 +46,7 @@ def taskMap = [ DBT_RUN_JOB: DBTRunJob, DBT_SOURCE_FRESHNESS_JOB: DBTSourceFreshnessJob, DEPLOY_CLUSTER_JOB: DeployClusterJob, + DETECT_NEW_DBT_MANUAL_MODELS_FIELDS: DetectNewDBTManualModelsFields, EMR_COST_REPORTER_JOB: EmrCostReporterJob, MODEL_TRANSFERS_JOB: ModelTransfersJob, RETIREMENT_JOB_EDX_TRIGGERS_JOB: RetirementJobEdxTriggersJob, diff --git a/dataeng/resources/detect-new-dbt-manual-models-fields.sh b/dataeng/resources/detect-new-dbt-manual-models-fields.sh new file mode 100644 index 000000000..48eef398b --- /dev/null +++ b/dataeng/resources/detect-new-dbt-manual-models-fields.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -ex + +# Setup a virtual environment +PYTHON38_VENV="py38_venv" +virtualenv --python=python3.8 --clear "${PYTHON38_VENV}" +source "${PYTHON38_VENV}/bin/activate" + +# Go into analytics-tools and install the dependencies +cd ${WORKSPACE}/analytics-tools/snowflake +make requirements + +# I am not able to get make upgrade to work after adding pyyaml to base.in. +# So, I am going to install pyyaml manually. +pip install pyyaml + +# The extra vars file for this job contains both field mappings and the necessary credentials for Snowflake and Jenkins. +# Therefore, the options to the script are read from the config file. +CONFIG_PATH=${WORKSPACE}/analytics-secure/job-configs/DETECT_NEW_DBT_MANUAL_MODELS_FIELDS_JOB_EXTRA_VARS.yaml +# Invoke the script to detect new fields that need to be added manually +python detect_new_raw_columns.py ${CONFIG_PATH}