-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add a job to detect new raw schema fields to add to safe schema…
… manual models
- Loading branch information
Showing
3 changed files
with
181 additions
and
0 deletions.
There are no files selected for viewing
80 changes: 80 additions & 0 deletions
80
dataeng/jobs/analytics/DetectNewDBTManualModelsFields.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package analytics | ||
|
||
import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator | ||
import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers | ||
import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers | ||
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm | ||
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters | ||
|
||
|
||
class DetectNewDBTManualModelsFields { | ||
public static def job = { dslFactory, allVars -> | ||
dslFactory.job("detect-new-dbt-manual-models-fields") { | ||
// If the DISABLED is set to true by the job's extra vars, then disable the job. | ||
disabled(allVars.get('DISABLED', false)) | ||
description("This job detects new columns in tables in raw schemas that have yet to be manually added to safe schema models.") | ||
// Set a definite log rotation, if defined. | ||
logRotator common_log_rotator(allVars) | ||
// Set the analytics-secure parameters for repo and branch from the common helpers | ||
parameters secure_scm_parameters(allVars) | ||
// Add parameters to use analytics-tools and warehouse-transforms | ||
parameters { | ||
stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') | ||
stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.') | ||
stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the warehouse-transforms repository.') | ||
stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of warehouse-transforms repository to use.') | ||
stringParam('DBT_TARGET', allVars.get('DBT_TARGET'), 'DBT target from profiles.yml in analytics-secure.') | ||
stringParam('DBT_PROFILE', allVars.get('DBT_PROFILE'), 'DBT profile from profiles.yml in analytics-secure.') | ||
stringParam('DBT_PROJECT_PATH', allVars.get('DBT_PROJECT_PATH'), 'Path in warehouse-transforms to use as the dbt project, relative to "projects" (usually automated/applications or reporting).') | ||
stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') | ||
} | ||
// Set the necessary VAULT kv paths of credentials as environment variables | ||
environmentVariables { | ||
env('JIRA_WEBHOOK_VAULT_KV_PATH', allVars.get('JIRA_WEBHOOK_VAULT_KV_PATH')) | ||
env('JIRA_WEBHOOK_VAULT_KV_VERSION', allVars.get('JIRA_WEBHOOK_VAULT_KV_VERSION')) | ||
env('AUTOMATION_TASK_USER_VAULT_KV_PATH', allVars.get('AUTOMATION_TASK_USER_VAULT_KV_PATH')) | ||
env('AUTOMATION_TASK_USER_VAULT_KV_VERSION', allVars.get('AUTOMATION_TASK_USER_VAULT_KV_VERSION')) | ||
} | ||
// SCM settings for analytics-secure and analytics-tools | ||
multiscm secure_scm(allVars) << { | ||
git { | ||
remote { | ||
url('$WAREHOUSE_TRANSFORMS_URL') | ||
branch('$WAREHOUSE_TRANSFORMS_BRANCH') | ||
} | ||
extensions { | ||
relativeTargetDirectory('warehouse-transforms') | ||
pruneBranches() | ||
cleanAfterCheckout() | ||
} | ||
} | ||
git { | ||
remote { | ||
url('$ANALYTICS_TOOLS_URL') | ||
branch('$ANALYTICS_TOOLS_BRANCH') | ||
credentials('1') | ||
} | ||
extensions { | ||
relativeTargetDirectory('analytics-tools') | ||
pruneBranches() | ||
cleanAfterCheckout() | ||
} | ||
} | ||
} | ||
wrappers { | ||
colorizeOutput('xterm') | ||
timestamps() | ||
credentialsBinding { | ||
usernamePassword('ANALYTICS_VAULT_ROLE_ID', 'ANALYTICS_VAULT_SECRET_ID', 'analytics-vault') | ||
} | ||
} | ||
// Set the trigger using cron | ||
triggers common_triggers(allVars) | ||
// Notifications on build failures | ||
publishers common_publishers(allVars) | ||
steps { | ||
shell(dslFactory.readFileFromWorkspace('dataeng/resources/detect-new-dbt-manual-models-fields.sh')) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/usr/bin/env bash | ||
set -ex | ||
|
||
# Global variables | ||
DBT_PROFILE_ARGS="--profiles-dir ${WORKSPACE}/analytics-secure/warehouse-transforms/ --profile ${DBT_PROFILE} --target ${DBT_TARGET}" | ||
DBT_TARGET_PATH=${WORKSPACE}/warehouse-transforms/projects/${DBT_PROJECT_PATH}/target | ||
|
||
# Set up a virtual environment for warehouse-transforms | ||
PYTHON38_VENV="py38_venv_warehouse_transforms" | ||
virtualenv --python=python3.8 --clear "${PYTHON38_VENV}" | ||
source "${PYTHON38_VENV}/bin/activate" | ||
pip install -U pip | ||
pip install -r ${WORKSPACE}/warehouse-transforms/requirements.txt | ||
|
||
# Go into the automated/applications project and compile the manual models | ||
cd ${WORKSPACE}/warehouse-transforms/projects/${DBT_PROJECT_PATH} | ||
dbt clean ${DBT_PROFILE_ARGS} | ||
dbt deps ${DBT_PROFILE_ARGS} | ||
dbt compile ${DBT_PROFILE_ARGS} --select tag:manual | ||
|
||
# Deactivate the virtual environment, so we can create and activate another one | ||
deactivate | ||
|
||
# Setup a virtual environment for analytics-tools | ||
PYTHON38_VENV="py38_venv_analytics_tools" | ||
virtualenv --python=python3.8 --clear "${PYTHON38_VENV}" | ||
source "${PYTHON38_VENV}/bin/activate" | ||
|
||
# Go into analytics-tools and install the dependencies | ||
cd ${WORKSPACE}/analytics-tools/snowflake | ||
make requirements | ||
|
||
# Create a function to clean up the credential files, and trap EXIT with it | ||
function clean_up_files() { | ||
rm -rf .snowflake_private_key .snowflake_private_key_passphrase | ||
} | ||
trap clean_up_files EXIT | ||
|
||
# Fetch credentials from vault | ||
# Do not print commands in this function since they may contain secrets. | ||
set +x | ||
|
||
# Retrieve a vault token corresponding to the jenkins AppRole. The token is then stored in the VAULT_TOKEN variable | ||
# which is implicitly used by subsequent vault commands within this script. | ||
# Instructions followed: https://learn.hashicorp.com/tutorials/vault/approle#step-4-login-with-roleid-secretid | ||
export VAULT_TOKEN=$(vault write -field=token auth/approle/login \ | ||
role_id=${ANALYTICS_VAULT_ROLE_ID} \ | ||
secret_id=${ANALYTICS_VAULT_SECRET_ID} | ||
) | ||
|
||
set -x | ||
|
||
# JIRA webhook URL and secret string from vault | ||
JIRA_WEBHOOK_URL=$( | ||
vault kv get \ | ||
-version=${JIRA_WEBHOOK_VAULT_KV_VERSION} \ | ||
-field=JIRA_WEBHOOK_URL \ | ||
${JIRA_WEBHOOK_VAULT_KV_PATH} \ | ||
) | ||
JIRA_WEBHOOK_SECRET=$( | ||
vault kv get \ | ||
-version=${JIRA_WEBHOOK_VAULT_KV_VERSION} \ | ||
-field=JIRA_WEBHOOK_SECRET \ | ||
${JIRA_WEBHOOK_VAULT_KV_PATH} \ | ||
) | ||
|
||
# Snowflake credentials from vault | ||
SNOWFLAKE_ACCOUNT=$( | ||
vault kv get \ | ||
-version=${AUTOMATION_TASK_USER_VAULT_KV_VERSION} \ | ||
-field=account \ | ||
${AUTOMATION_TASK_USER_VAULT_KV_PATH} \ | ||
) | ||
|
||
SNOWFLAKE_USER=$( | ||
vault kv get \ | ||
-version=${AUTOMATION_TASK_USER_VAULT_KV_VERSION} \ | ||
-field=user \ | ||
${AUTOMATION_TASK_USER_VAULT_KV_PATH} \ | ||
) | ||
# The detect_new_raw_columns.py script, much like all other scripts that connect to Snowflake, | ||
# expects the private key and the privarte key passphrase to be in files. | ||
# As a result, SNOWFLAKE_PRIVATE_KEY and SNOWFLAKE_PRIVATE_KEY_PASSPHRASE are stored in files. | ||
vault kv get \ | ||
-version=${AUTOMATION_TASK_USER_VAULT_KV_VERSION} \ | ||
-field=private_key \ | ||
${AUTOMATION_TASK_USER_VAULT_KV_PATH} > .snowflake_private_key | ||
|
||
vault kv get \ | ||
-version=${AUTOMATION_TASK_USER_VAULT_KV_VERSION} \ | ||
-field=private_key_passphrase \ | ||
${AUTOMATION_TASK_USER_VAULT_KV_PATH} > .snowflake_private_key_passphrase | ||
|
||
# Invoke the script to detect new fields that need to be added manually | ||
python detect_new_raw_columns.py ${DBT_TARGET_PATH} \ | ||
--user ${SNOWFLAKE_USER} --account ${SNOWFLAKE_ACCOUNT} \ | ||
--key-path .snowflake_private_key --passphrase-path .snowflake_private_key_passphrase \ | ||
--jira-webhook-url ${JIRA_WEBHOOK_URL} \ | ||
--jira-webhook-secret ${JIRA_WEBHOOK_SECRET} |