|
| 1 | +from datetime import datetime, timedelta |
| 2 | + |
| 3 | +from airflow import DAG |
| 4 | +from airflow.operators.python import PythonOperator |
| 5 | +from airflow.providers.amazon.aws.operators.glue import GlueJobOperator |
| 6 | +from airflow.providers.amazon.aws.hooks.s3 import S3Hook |
| 7 | + |
| 8 | +from airflow.providers.amazon.aws.sensors.glue import GlueJobSensor |
| 9 | +from jinja2 import Template |
| 10 | + |
| 11 | + |
| 12 | +def upload_rendered_script_to_s3( |
| 13 | + bucket_name, template_s3_key, rendered_s3_key, aws_conn_id, **kwargs |
| 14 | +): |
| 15 | + # S3Hook 인스턴스 생성 |
| 16 | + s3_hook = S3Hook(aws_conn_id=aws_conn_id) |
| 17 | + |
| 18 | + # S3에서 Jinja 템플릿 파일 읽기 |
| 19 | + template_str = s3_hook.read_key(template_s3_key, bucket_name) |
| 20 | + |
| 21 | + # Jinja 템플릿 렌더링 |
| 22 | + template = Template(template_str) |
| 23 | + rendered_script = template.render(**kwargs) |
| 24 | + |
| 25 | + # 렌더링된 스크립트를 S3에 업로드 |
| 26 | + s3_hook.load_string( |
| 27 | + string_data=rendered_script, |
| 28 | + bucket_name=bucket_name, |
| 29 | + key=rendered_s3_key, |
| 30 | + replace=True, |
| 31 | + ) |
| 32 | + |
| 33 | + |
| 34 | +with DAG( |
| 35 | + "glue_game_ccu", |
| 36 | + default_args={ |
| 37 | + "owner": "airflow", |
| 38 | + "depends_on_past": False, |
| 39 | + "start_date": datetime(2024, 2, 22), |
| 40 | + "retries": 0, |
| 41 | + "retry_delay": timedelta(minutes=5), |
| 42 | + }, |
| 43 | + schedule_interval="5 * * * *", |
| 44 | + tags=["glue", "Game_CCU"], |
| 45 | + catchup=False, |
| 46 | +) as dag: |
| 47 | + |
| 48 | + bucket_name = "de-2-1-bucket" |
| 49 | + current_time = "{{ data_interval_end.in_timezone('Asia/Seoul').strftime('%Y-%m-%dT%H:%M:%S+00:00') }}" |
| 50 | + year = "{{ data_interval_end.in_timezone('Asia/Seoul').year }}" |
| 51 | + month = "{{ data_interval_end.in_timezone('Asia/Seoul').month }}" |
| 52 | + day = "{{ data_interval_end.in_timezone('Asia/Seoul').day }}" |
| 53 | + hour = "{{ (data_interval_end - macros.timedelta(hours=1)).in_timezone('Asia/Seoul').hour }}" # before 1 hour |
| 54 | + |
| 55 | + upload_script = PythonOperator( |
| 56 | + task_id="upload_script_to_s3", |
| 57 | + python_callable=upload_rendered_script_to_s3, |
| 58 | + op_kwargs={ |
| 59 | + "bucket_name": bucket_name, |
| 60 | + "aws_conn_id": "aws_conn_id", |
| 61 | + "template_s3_key": "source/script/glue_game_ccu_template.py", |
| 62 | + "rendered_s3_key": "source/script/glue_game_ccu_script.py", |
| 63 | + # into template |
| 64 | + "input_path": f"s3://de-2-1-bucket/source/json/table_name=raw_game_ccu/year={year}/month={month}/day={day}/hour={hour}/", |
| 65 | + "output_path": f"s3://de-2-1-bucket/source/parquet/table_name=raw_game_ccu/year={year}/month={month}/day={day}/hour={hour}/", |
| 66 | + "collect_time": f"{year}-{month}-{day} {hour}:00", |
| 67 | + }, |
| 68 | + ) |
| 69 | + |
| 70 | + run_glue_job = GlueJobOperator( |
| 71 | + task_id="run_glue_job", |
| 72 | + job_name="de-2-1_game_ccu", |
| 73 | + script_location="s3://de-2-1-bucket/source/script/glue_game_ccu_script.py", |
| 74 | + aws_conn_id="aws_conn_id", |
| 75 | + region_name="ap-northeast-2", |
| 76 | + iam_role_name="AWSGlueServiceRole-crawler", |
| 77 | + dag=dag, |
| 78 | + ) |
| 79 | + |
| 80 | + wait_for_job = GlueJobSensor( # trigger |
| 81 | + task_id="wait_for_job_game_ccu_glue_job", # task_id 직관적으로 알 수 있도록 변경 권장 |
| 82 | + job_name="de-2-1_game_ccu", |
| 83 | + # Job ID extracted from previous Glue Job Operator task |
| 84 | + run_id=run_glue_job.output, |
| 85 | + verbose=True, # prints glue job logs in airflow logs |
| 86 | + # region_name="ap-northeast-2", |
| 87 | + aws_conn_id="aws_conn_id", |
| 88 | + ) |
| 89 | + |
| 90 | +upload_script >> run_glue_job >> wait_for_job |
0 commit comments