From 9da0b3a6f0c6bc14faa153ae519d5ea3a526c79f Mon Sep 17 00:00:00 2001 From: k8 Date: Mon, 27 Nov 2023 16:23:26 -0500 Subject: [PATCH] chore: analytics secure calls JIRA:CLOUDSEC-12 --- .../analytics_exporter/course_exporter.yaml | 49 ++++++++++++++++++ dataeng/analytics_exporter/default.yaml | 50 +++++++++++++++++++ dataeng/analytics_exporter/email_optin.yaml | 35 +++++++++++++ dataeng/resources/event-export-incremental.sh | 2 +- dataeng/resources/run-course-exporter.sh | 2 +- .../resources/run-pipeline-acceptance-test.sh | 2 +- .../resources/setup-exporter-email-optin.sh | 2 +- dataeng/resources/setup-exporter.sh | 2 +- 8 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 dataeng/analytics_exporter/course_exporter.yaml create mode 100644 dataeng/analytics_exporter/default.yaml create mode 100644 dataeng/analytics_exporter/email_optin.yaml diff --git a/dataeng/analytics_exporter/course_exporter.yaml b/dataeng/analytics_exporter/course_exporter.yaml new file mode 100644 index 000000000..61f56c512 --- /dev/null +++ b/dataeng/analytics_exporter/course_exporter.yaml @@ -0,0 +1,49 @@ +defaults: + auth_file: '{WORKSPACE}/analytics-secure/analytics-exporter/task-auth.json' + work_dir: /var/lib/jenkins/tmp/analytics-course-exporter + external_prefix: '' + output_bucket: edx-analytics-rdx + pipeline_bucket: edx-analytics-export + output_prefix: 'course_exports/raw/' + django_admin: '' + django_pythonpath: '' + django_settings: lms.envs.analytics_exporter + django_cms_settings: cms.envs.analytics_exporter + django_database: read_replica + mongo_collection: contents + sql_user: analytics001 + sql_db: wwc + monitor: true + +environments: + prod: + lms_config: ${WORKSPACE}/remote-config/prod-edx/lms.yml + studio_config: ${WORKSPACE}/remote-config/prod-edx/studio.yml + mongo_host: prod-edx-mongo-1.edx.org:27017,prod-edx-mongo-2.edx.org:27017,prod-edx-mongo-3.edx.org:27017,prod-edx-mongo-4.edx.org:27017 + mongo_db: comments-prod + name: prod-analytics + sql_host: prod-edx-edxapp-readonly.rds.edx.org + edge: + lms_config: ${WORKSPACE}/remote-config/prod-edge/lms.yml + studio_config: ${WORKSPACE}/remote-config/prod-edge/studio.yml + mongo_host: prod-edge-mongo-1.edx.org:27017,prod-edge-mongo-2.edx.org:27017,prod-edge-mongo-3.edx.org:27017,prod-edge-mongo-4.edx.org:27017 + mongo_db: comments-edge + name: prod-edge-analytics + sql_host: prod-edge-edxapp-readonly.rds.edx.org + +tasks: + - StudentModuleTask + - TeamsTask + - TeamsMembershipTask + - CourseEnrollmentTask + - GeneratedCertificateTask + - InCourseReverificationTask + - AuthUserTask + - AuthUserProfileTask + - StudentLanguageProficiencyTask + - WikiArticleTask + - WikiArticleRevisionTask + - UserCourseTagTask + - ForumsTask + - CourseStructureTask + - CourseContentTask \ No newline at end of file diff --git a/dataeng/analytics_exporter/default.yaml b/dataeng/analytics_exporter/default.yaml new file mode 100644 index 000000000..af550b474 --- /dev/null +++ b/dataeng/analytics_exporter/default.yaml @@ -0,0 +1,50 @@ +# Configuration for exporting data for all orgs from Jenkins. +# This config makes some assumptions that are only valid for +# Jenkins-managed shiningpanda virtual environments. +# +# As part of the job, a symlink is created to the django-admin.py +# executable in the virtual environment workspace, which is subject to +# change. To fix this, we use the variable ${WORKSPACE} that is +# overridden in the Jenkins shell task with the appropiate value. + +defaults: + auth_file: '{WORKSPACE}/analytics-secure/analytics-exporter/task-auth.json' + gpg_master_key: analytics@edx.org + gpg_keys: gpg-keys + work_dir: /var/lib/jenkins/tmp/analytics-exporter + external_prefix: '' + output_bucket: course-data + pipeline_bucket: edx-analytics-export + output_prefix: '' + se_bucket: edx-analytics-stackexchange + django_admin: '' + django_pythonpath: '' + django_settings: lms.envs.analytics_exporter + django_cms_settings: cms.envs.analytics_exporter + django_database: read_replica + mongo_collection: contents + sql_user: analytics001 + sql_db: wwc + monitor: true + graphite_host: graphite.analytics.edx.org + graphite_port: 2003 + graphite_prefix: edx.analytics.research + # Config to limit how many years of data the analytics exporter job dumps. + # This is the number of years in the past from today's date + time_constraint: 3 + +environments: + prod: + lms_config: ${WORKSPACE}/remote-config/prod-edx/lms.yml + studio_config: ${WORKSPACE}/remote-config/prod-edx/studio.yml + mongo_host: prod-edx-mongo-1.edx.org:27017,prod-edx-mongo-2.edx.org:27017,prod-edx-mongo-3.edx.org:27017,prod-edx-mongo-4.edx.org:27017 + mongo_db: comments-prod + name: prod-analytics + sql_host: prod-edx-edxapp-readonly.rds.edx.org + edge: + lms_config: ${WORKSPACE}/remote-config/prod-edge/lms.yml + studio_config: ${WORKSPACE}/remote-config/prod-edge/studio.yml + mongo_host: prod-edge-mongo-1.edx.org:27017,prod-edge-mongo-2.edx.org:27017,prod-edge-mongo-3.edx.org:27017,prod-edge-mongo-4.edx.org:27017 + mongo_db: comments-edge + name: prod-edge-analytics + sql_host: prod-edge-edxapp-readonly.rds.edx.org \ No newline at end of file diff --git a/dataeng/analytics_exporter/email_optin.yaml b/dataeng/analytics_exporter/email_optin.yaml new file mode 100644 index 000000000..2b96f89a3 --- /dev/null +++ b/dataeng/analytics_exporter/email_optin.yaml @@ -0,0 +1,35 @@ +defaults: + auth_file: '{WORKSPACE}/analytics-secure/analytics-exporter/task-auth.json' + gpg_master_key: analytics@edx.org + gpg_keys: gpg-keys + work_dir: /var/lib/jenkins/tmp/analytics-exporter + external_prefix: '' + output_bucket: course-data + pipeline_bucket: edx-analytics-export + output_prefix: '' + se_bucket: edx-analytics-stackexchange + django_admin: '' + django_pythonpath: '' + django_settings: lms.envs.analytics_exporter + django_cms_settings: cms.envs.analytics_exporter + django_database: read_replica + mongo_collection: contents + sql_user: analytics001 + sql_db: wwc + monitor: true + +environments: + prod: + lms_config: ${WORKSPACE}/remote-config/prod-edx/lms.yml + studio_config: ${WORKSPACE}/remote-config/prod-edx/studio.yml + mongo_host: prod-edx-mongo-1.edx.org:27017,prod-edx-mongo-2.edx.org:27017,prod-edx-mongo-3.edx.org:27017,prod-edx-mongo-4.edx.org:27017 + mongo_db: comments-prod + name: prod-analytics + sql_host: prod-edx-edxapp-readonly.rds.edx.org + edge: + lms_config: ${WORKSPACE}/remote-config/prod-edge/lms.yml + studio_config: ${WORKSPACE}/remote-config/prod-edge/studio.yml + mongo_host: prod-edge-mongo-1.edx.org:27017,prod-edge-mongo-2.edx.org:27017,prod-edge-mongo-3.edx.org:27017,prod-edge-mongo-4.edx.org:27017 + mongo_db: comments-edge + name: prod-edge-analytics + sql_host: prod-edge-edxapp-readonly.rds.edx.org \ No newline at end of file diff --git a/dataeng/resources/event-export-incremental.sh b/dataeng/resources/event-export-incremental.sh index b1596c19e..2b74219a9 100755 --- a/dataeng/resources/event-export-incremental.sh +++ b/dataeng/resources/event-export-incremental.sh @@ -3,7 +3,7 @@ #################################################################### # Exporter configuration -ROOT=${WORKSPACE}/analytics-secure/analytics-exporter +ROOT=${WORKSPACE}/analytics-exporter SECURE_HASH=`GIT_DIR=./analytics-secure/.git git rev-parse HEAD` EXPORTER_CONFIG_BUCKET=s3://edx-analytics-scratch/exporter/config/$SECURE_HASH EXPORTER_CONFIG_PATH=${EXPORTER_CONFIG_BUCKET}/${EXPORTER_CONFIG} diff --git a/dataeng/resources/run-course-exporter.sh b/dataeng/resources/run-course-exporter.sh index 2617bfaa2..78ccc568b 100644 --- a/dataeng/resources/run-course-exporter.sh +++ b/dataeng/resources/run-course-exporter.sh @@ -21,7 +21,7 @@ popd source platform_venv_path # Configuration paths in analytics-secure -SECURE_ROOT=${WORKSPACE}/analytics-secure/analytics-exporter +SECURE_ROOT=${WORKSPACE}/analytics-exporter CONFIG_PATH=${SECURE_ROOT}/${EXPORTER_CONFIG_FILENAME} DATE=$(date +%d ${DATE_MODIFIER}) diff --git a/dataeng/resources/run-pipeline-acceptance-test.sh b/dataeng/resources/run-pipeline-acceptance-test.sh index f54932528..58ca09b0c 100644 --- a/dataeng/resources/run-pipeline-acceptance-test.sh +++ b/dataeng/resources/run-pipeline-acceptance-test.sh @@ -26,7 +26,7 @@ export COURSE_EXPORTER=$EXPORTER_BIN/course-exporter # Exporter configuration destination -ROOT=${WORKSPACE}/analytics-secure/analytics-exporter +ROOT=${WORKSPACE}/analytics-exporter SECURE_HASH=`GIT_DIR=./analytics-secure/.git git rev-parse HEAD` EXPORTER_CONFIG_BUCKET=$EXPORTER_BUCKET_PATH/$SECURE_HASH EXPORTER_CONFIG_PATH=${EXPORTER_CONFIG_BUCKET}/${EXPORTER_CONFIG} diff --git a/dataeng/resources/setup-exporter-email-optin.sh b/dataeng/resources/setup-exporter-email-optin.sh index f8dd3f5a6..13baf7835 100644 --- a/dataeng/resources/setup-exporter-email-optin.sh +++ b/dataeng/resources/setup-exporter-email-optin.sh @@ -17,7 +17,7 @@ pip install mysql-connector-python -e . popd # Configuration paths in analytics-secure -SECURE_ROOT=${WORKSPACE}/analytics-secure/analytics-exporter +SECURE_ROOT=${WORKSPACE}/analytics-exporter CONFIG_PATH=${SECURE_ROOT}/${EXPORTER_CONFIG_FILENAME} GPG_KEYS_PATH=${WORKSPACE}/data-czar-keys diff --git a/dataeng/resources/setup-exporter.sh b/dataeng/resources/setup-exporter.sh index 5466c09e3..b13b642eb 100644 --- a/dataeng/resources/setup-exporter.sh +++ b/dataeng/resources/setup-exporter.sh @@ -23,7 +23,7 @@ pip install mysql-connector-python -e . popd # Configuration paths in analytics-secure -SECURE_ROOT=${WORKSPACE}/analytics-secure/analytics-exporter +SECURE_ROOT=${WORKSPACE}/analytics-exporter CONFIG_PATH=${SECURE_ROOT}/${EXPORTER_CONFIG_FILENAME} GPG_KEYS_PATH=${WORKSPACE}/data-czar-keys