-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add a job to detect new raw schema fields to add to safe schema…
… manual models
- Loading branch information
Showing
3 changed files
with
150 additions
and
0 deletions.
There are no files selected for viewing
64 changes: 64 additions & 0 deletions
64
dataeng/jobs/analytics/DetectNewDBTManualModelsFields.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package analytics | ||
|
||
import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator | ||
import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers | ||
import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers | ||
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm | ||
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters | ||
|
||
|
||
class DetectNewDBTManualModelsFields { | ||
public static def job = { dslFactory, allVars -> | ||
dslFactory.job("detect-new-dbt-manual-models-fields") { | ||
// If the DISABLED is set to true by the job's extra vars, then disable the job. | ||
disabled(allVars.get('DISABLED', false)) | ||
description("This job detects new columns in tables in raw schemas that have yet to be manually added to safe schema models.") | ||
// Set a definite log rotation, if defined. | ||
logRotator common_log_rotator(allVars) | ||
// Set the analytics-secure parameters for repo and branch from the common helpers | ||
parameters secure_scm_parameters(allVars) | ||
// Add the analytics-tools parameters for repo and branch information | ||
parameters { | ||
stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') | ||
stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.') | ||
stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') | ||
} | ||
// Set the necessary VAULT kv paths of credentials as environment variables | ||
environmentVariables { | ||
env('JIRA_WEBHOOK_VAULT_KV_PATH', allVars.get('JIRA_WEBHOOK_VAULT_KV_PATH')) | ||
env('JIRA_WEBHOOK_VAULT_KV_VERSION', allVars.get('JIRA_WEBHOOK_VAULT_KV_VERSION')) | ||
env('AUTOMATION_TASK_USER_VAULT_KV_PATH', allVars.get('AUTOMATION_TASK_USER_VAULT_KV_PATH')) | ||
env('AUTOMATION_TASK_USER_VAULT_KV_VERSION', allVars.get('AUTOMATION_TASK_USER_VAULT_KV_VERSION')) | ||
} | ||
// SCM settings for analytics-secure and analytics-tools | ||
multiscm secure_scm(allVars) << { | ||
git { | ||
remote { | ||
url('$ANALYTICS_TOOLS_URL') | ||
branch('$ANALYTICS_TOOLS_BRANCH') | ||
credentials('1') | ||
} | ||
extensions { | ||
relativeTargetDirectory('analytics-tools') | ||
pruneBranches() | ||
cleanAfterCheckout() | ||
} | ||
} | ||
} | ||
wrappers { | ||
colorizeOutput('xterm') | ||
timestamps() | ||
credentialsBinding { | ||
usernamePassword('ANALYTICS_VAULT_ROLE_ID', 'ANALYTICS_VAULT_SECRET_ID', 'analytics-vault') | ||
} | ||
} | ||
// Set the trigger using cron | ||
triggers common_triggers(allVars) | ||
// Notifications on build failures | ||
publishers common_publishers(allVars) | ||
steps { | ||
shell(dslFactory.readFileFromWorkspace('dataeng/resources/detect-new-dbt-manual-models-fields.sh')) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#!/usr/bin/env bash | ||
set -ex | ||
|
||
# Setup a virtual environment | ||
PYTHON38_VENV="py38_venv" | ||
virtualenv --python=python3.8 --clear "${PYTHON38_VENV}" | ||
source "${PYTHON38_VENV}/bin/activate" | ||
|
||
# Go into analytics-tools and install the dependencies | ||
cd ${WORKSPACE}/analytics-tools/snowflake | ||
make requirements | ||
|
||
# Create a function to clean up the credential files, and trap EXIT with it | ||
function clean_up_files() { | ||
rm -rf .private_key_file .private_key_passphrase_file | ||
} | ||
trap clean_up_files EXIT | ||
|
||
# Fetch credentials from vault | ||
# Do not print commands in this function since they may contain secrets. | ||
set +x | ||
|
||
# Retrieve a vault token corresponding to the jenkins AppRole. The token is then stored in the VAULT_TOKEN variable | ||
# which is implicitly used by subsequent vault commands within this script. | ||
# Instructions followed: https://learn.hashicorp.com/tutorials/vault/approle#step-4-login-with-roleid-secretid | ||
export VAULT_TOKEN=$(vault write -field=token auth/approle/login \ | ||
role_id=${ANALYTICS_VAULT_ROLE_ID} \ | ||
secret_id=${ANALYTICS_VAULT_SECRET_ID} | ||
) | ||
|
||
set -x | ||
|
||
# JIRA webhook URL and secret string from vault | ||
WEBHOOK_URL=$( | ||
vault kv get \ | ||
-version=${JIRA_WEBHOOK_VAULT_KV_VERSION} \ | ||
-field=JIRA_WEBHOOK_URL \ | ||
${JIRA_WEBHOOK_VAULT_KV_PATH} \ | ||
) | ||
WEBHOOK_SECRET=$( | ||
vault kv get \ | ||
-version=${JIRA_WEBHOOK_VAULT_KV_VERSION} \ | ||
-field=JIRA_WEBHOOK_SECRET \ | ||
${JIRA_WEBHOOK_VAULT_KV_PATH} \ | ||
) | ||
|
||
# Snowflake credentials from vault | ||
SNOWFLAKE_ACCOUNT=$( | ||
vault kv get \ | ||
-version=${AUTOMATION_TASK_USER_VAULT_KV_VERSION} \ | ||
-field=account \ | ||
${AUTOMATION_TASK_USER_VAULT_KV_PATH} \ | ||
) | ||
|
||
SNOWFLAKE_USER=$( | ||
vault kv get \ | ||
-version=${AUTOMATION_TASK_USER_VAULT_KV_VERSION} \ | ||
-field=user \ | ||
${AUTOMATION_TASK_USER_VAULT_KV_PATH} \ | ||
) | ||
# The detect_new_raw_columns.py script, much like all other scripts that connect to Snowflake, | ||
# expects the private key and the privarte key passphrase to be in files. | ||
# As a result, SNOWFLAKE_PRIVATE_KEY and SNOWFLAKE_PRIVATE_KEY_PASSPHRASE are stored in files. | ||
vault kv get \ | ||
-version=${AUTOMATION_TASK_USER_VAULT_KV_VERSION} \ | ||
-field=private_key \ | ||
${AUTOMATION_TASK_USER_VAULT_KV_PATH} > .private_key_file | ||
|
||
vault kv get \ | ||
-version=${AUTOMATION_TASK_USER_VAULT_KV_VERSION} \ | ||
-field=private_key_passphrase \ | ||
${AUTOMATION_TASK_USER_VAULT_KV_PATH} > .private_key_passphrase_file | ||
|
||
# The extra vars file for this job contains both field mappings and the necessary credentials for Snowflake and Jenkins. | ||
# Therefore, the options to the script are read from the config file. | ||
CONFIG_PATH=${WORKSPACE}/analytics-secure/automation-configs/DETECT_NEW_DBT_MANUAL_MODELS_FIELDS_JOB_MAPPINGS.yaml | ||
|
||
# Invoke the script to detect new fields that need to be added manually | ||
python detect_new_raw_columns.py ${CONFIG_PATH} \ | ||
--user ${SNOWFLAKE_USER} --account ${SNOWFLAKE_ACCOUNT} \ | ||
--key-path .private_key_file --passphrase-path .private_key_passphrase_file \ | ||
--jira-webhook-url ${WEBHOOK_URL} \ | ||
--jira-webhook-secret ${WEBHOOK_SECRET} | ||
|