Skip to content

Commit 76f1c0c

Browse files
authored
Merge pull request #1664 from edx/musama/amplitude_user_properties_backfill
feat: Jenkins job to backfill user properties on Amplitude.
2 parents f0110cb + fd85f9e commit 76f1c0c

File tree

3 files changed

+102
-0
lines changed

3 files changed

+102
-0
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package analytics
2+
3+
import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization
4+
import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator
5+
import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers
6+
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters
7+
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm
8+
9+
10+
class AmplitudeUserPropertiesBackfill {
11+
public static def job = { dslFactory, allVars ->
12+
dslFactory.job("amplitude-user-properties-backfill") {
13+
logRotator common_log_rotator(allVars)
14+
authorization common_authorization(allVars)
15+
parameters secure_scm_parameters(allVars)
16+
parameters {
17+
stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.')
18+
stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.')
19+
stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.')
20+
stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.')
21+
stringParam('AMPLITUDE_DATA_SOURCE_TABLE', '', 'Table name that has data which needs to be updated on Amplitude. It should have format like database.schema.table.')
22+
stringParam('COLUMNS_TO_UPDATE', '', 'Columns that you want to update. Separate multiple columns with commas.')
23+
stringParam('RESPONSE_TABLE', '', 'Output table which will store the updated data along with response from API endpoint.')
24+
stringParam('AMPLITUDE_OPERATION_NAME', '', 'Amplitude user property operation name. e.g: set or setOnce.')
25+
}
26+
environmentVariables {
27+
env('KEY_PATH', allVars.get('KEY_PATH'))
28+
env('PASSPHRASE_PATH', allVars.get('PASSPHRASE_PATH'))
29+
env('USER', allVars.get('USER'))
30+
env('ACCOUNT', allVars.get('ACCOUNT'))
31+
env('AMPLITUDE_VAULT_KV_PATH', allVars.get('AMPLITUDE_VAULT_KV_PATH'))
32+
env('AMPLITUDE_VAULT_KV_VERSION', allVars.get('AMPLITUDE_VAULT_KV_VERSION'))
33+
}
34+
multiscm secure_scm(allVars) << {
35+
git {
36+
remote {
37+
url('$ANALYTICS_TOOLS_URL')
38+
branch('$ANALYTICS_TOOLS_BRANCH')
39+
credentials('1')
40+
}
41+
extensions {
42+
relativeTargetDirectory('analytics-tools')
43+
pruneBranches()
44+
cleanAfterCheckout()
45+
}
46+
}
47+
}
48+
wrappers {
49+
timestamps()
50+
credentialsBinding {
51+
usernamePassword('ANALYTICS_VAULT_ROLE_ID', 'ANALYTICS_VAULT_SECRET_ID', 'analytics-vault');
52+
}
53+
}
54+
publishers common_publishers(allVars)
55+
steps {
56+
shell(dslFactory.readFileFromWorkspace('dataeng/resources/amplitude-properties-backfill.sh'))
57+
}
58+
}
59+
}
60+
}

dataeng/jobs/createJobsNew.groovy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import static analytics.RetirementJobEdxTriggers.job as RetirementJobEdxTriggers
88
import static analytics.RetirementJobs.job as RetirementJobsJob
99
import static analytics.SnowflakeCollectMetrics.job as SnowflakeCollectMetricsJob
1010
import static analytics.SnowflakeExpirePasswords.job as SnowflakeExpirePasswordsJob
11+
import static analytics.AmplitudeUserPropertiesBackfill.job as AmplitudeUserPropertiesBackfillJob
1112
import static analytics.SnowflakeSchemaBuilder.job as SnowflakeSchemaBuilderJob
1213
import static analytics.SnowflakeUserRetirementStatusCleanup.job as SnowflakeUserRetirementStatusCleanupJob
1314
import static analytics.PrefectFlowsDeployment.job as PrefectFlowsDeploymentJob
@@ -50,6 +51,7 @@ def taskMap = [
5051
RETIREMENT_JOBS_JOB: RetirementJobsJob,
5152
SNOWFLAKE_COLLECT_METRICS_JOB: SnowflakeCollectMetricsJob,
5253
SNOWFLAKE_EXPIRE_PASSWORDS_JOB: SnowflakeExpirePasswordsJob,
54+
AMPLITUDE_USER_PROPERTIES_BACKFILL_JOB: AmplitudeUserPropertiesBackfillJob,
5355
SNOWFLAKE_SCHEMA_BUILDER_JOB: SnowflakeSchemaBuilderJob,
5456
SNOWFLAKE_USER_RETIREMENT_STATUS_CLEANUP_JOB: SnowflakeUserRetirementStatusCleanupJob,
5557
PREFECT_FLOWS_DEPLOYMENT_JOB: PrefectFlowsDeploymentJob,
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env bash
2+
set -ex
3+
4+
# Creating Python virtual env
5+
PYTHON_VENV="python_venv"
6+
virtualenv --python=$PYTHON_VENV_VERSION --clear "${PYTHON_VENV}"
7+
source "${PYTHON_VENV}/bin/activate"
8+
9+
# Setup
10+
cd $WORKSPACE/analytics-tools/snowflake
11+
make requirements
12+
13+
# Do not print commands in this function since they may contain secrets.
14+
set +x
15+
16+
# Retrieve a vault token corresponding to the jenkins AppRole. The token is then stored in the VAULT_TOKEN variable
17+
# which is implicitly used by subsequent vault commands within this script.
18+
# Instructions followed: https://learn.hashicorp.com/tutorials/vault/approle#step-4-login-with-roleid-secretid
19+
export VAULT_TOKEN=$(vault write -field=token auth/approle/login \
20+
role_id=${ANALYTICS_VAULT_ROLE_ID} \
21+
secret_id=${ANALYTICS_VAULT_SECRET_ID}
22+
)
23+
24+
API_KEY=$(
25+
vault kv get \
26+
-version=${AMPLITUDE_VAULT_KV_VERSION} \
27+
-field=API_KEY \
28+
${AMPLITUDE_VAULT_KV_PATH} \
29+
)
30+
31+
python amplitude_user_properties_update.py \
32+
--key_path $KEY_PATH \
33+
--passphrase_path $PASSPHRASE_PATH \
34+
--automation_user $USER \
35+
--account $ACCOUNT \
36+
--amplitude_data_source_table $AMPLITUDE_DATA_SOURCE_TABLE \
37+
--columns_to_update $COLUMNS_TO_UPDATE \
38+
--response_table $RESPONSE_TABLE \
39+
--amplitude_operation_name $AMPLITUDE_OPERATION_NAME \
40+
--amplitude_api_key $API_KEY

0 commit comments

Comments
 (0)