From c9ad8abe7ca7b82f317d56b90287fdd3bc34c6e5 Mon Sep 17 00:00:00 2001 From: Pablo Rosado Date: Mon, 30 Sep 2024 13:59:39 +0200 Subject: [PATCH] Update documentation --- .../indicator_upgrade/charts_update.py | 4 +- docs/guides/data-work/update-data.md | 66 ++++++++++--------- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/apps/wizard/app_pages/indicator_upgrade/charts_update.py b/apps/wizard/app_pages/indicator_upgrade/charts_update.py index 4a8d0f917d3..d4215cdd49c 100644 --- a/apps/wizard/app_pages/indicator_upgrade/charts_update.py +++ b/apps/wizard/app_pages/indicator_upgrade/charts_update.py @@ -127,5 +127,7 @@ def push_new_charts(charts: List[gm.Chart]) -> None: ) st.exception(e) else: - st.success("The charts were successfully updated! Review the changes with `chart diff`") + st.success( + "The charts were successfully updated! If indicators from other datasets also need to be upgraded, simply refresh this page, otherwise move on to `chart diff` to review all changes." + ) st_page_link("chart-diff") diff --git a/docs/guides/data-work/update-data.md b/docs/guides/data-work/update-data.md index efcceb641ba..e2a898db898 100644 --- a/docs/guides/data-work/update-data.md +++ b/docs/guides/data-work/update-data.md @@ -11,20 +11,25 @@ This guide explains the general workflow to update a dataset that already exists In a nutshell, these are the steps to follow: - - Switch to `master` branch, ensure it's up-to-date and **not dirty**. - - Use the ETL Dashboard to create new versions of the steps (by duplicating the old ones). - - Create a PR with a staging server with + - Switch to `master` branch (`git switch master`), and ensure it's up-to-date (`git pull`). + - Create a new branch and a draft pull request (PR) with a staging server: ```bash - etl pr update-{short_name} --title ":bar_chart: Update {short_name}" + etl pr update-{short_name} --title "Update {short_name}" --category data ``` - - Commit files generated by the ETL dashboard and push them. + - Use the ETL Dashboard to create new versions of the steps (this will duplicate the code of the old steps). + - Execute the newly created snapshot scripts, if any. + - If any of them fails, skip this step. + - Commit files generated by the ETL dashboard and push them to the branch. + - Note that, if snapshot steps were not successfully executed, buildkite will fail to build. Ignore this for now. - Adapt the code of the new steps and ensure ETL (e.g. `etlr step-names --grapher`) can execute them successfully. - Commit changes to the code. - Use Indicator Upgrader to update the charts (so they use the new variables instead of the old ones). - If needed, adapt existing charts or create new ones on the staging server. - - Archive old steps (i.e. move old steps from the dag to the archive dag). + - Use Chart Diff to approve changes in charts and newly created charts. + - Use the ETL Dashboard to archive old steps (this will move old steps from the active dag to the archive dag). - Commit all your final work and set your PR to be ready for review. - - Make further changes, if suggested by the reviewer. + - Select which commits need to be reviewed omitting the very first one (so that reviewer only sees the changes with respect to the old version of the step). + - Make further changes, if suggested by the reviewer. - Once approved, merge the PR. - Archive old grapher dataset(s). - Announce your update. @@ -36,9 +41,19 @@ This guide assumes you have already a [working installation of `etl`](../../../g ## 1. Duplicate the old steps and set up your staging server - **Update your `master` and configuration**: - - Go to ETL `master` branch, and ensure it's up-to-date in your local repository (by running `git pull`). + - Go to ETL `master` branch (by running `git switch master`), and ensure it's up-to-date in your local repository (`git pull`). - Ensure that, in your `.env` file, you have set `STAGING=1`. +- **Create a draft PR and a temporary staging server** + - Create a PR with the following command (replace `{short_name}` with the short name of the dataset, e.g. `temperature-anomaly`): + ```bash + etl pr update-{short_name} --title "Update {short_name}" --category data + ``` + + This will create a new git branch in your local repository with an empty commit, which will be pushed to remote. + It will also create a draft pull request in github, and a staging server. + - Wait for a notification from `owidbot`. It should take a few minutes, and will inform you that the staging server [http://staging-site-update-temperature-anomaly/admin](http://staging-site-update-temperature-anomaly/admin) has been created. + - **Update steps using the ETL Dashboard**: - Start the ETL Wizard, by running: ```bash @@ -58,29 +73,18 @@ This guide assumes you have already a [working installation of `etl`](../../../g ![Chart Upgrader](../../../assets/etl-dashboard-update-steps.gif)
Animation of how to update steps in ETL Dashboard.
- - Copy the recommended command from the output (i.e. `etl pr ...`). - - You can close the Wizard (kill it with ++ctrl+c++). - -- **Run `etl pr ..` to create a PR** - - Create a PR with the following command (replace `{short_name}` with the short name of the dataset, e.g. `temperature-anomaly`): - - ```bash - etl pr update-{short_name} --title ":bar_chart: Update {short_name}" - ``` - - This will create a new git branch in your local repository with empty commit, which will be pushed. - It will also create a draft pull request in github, and a staging server. - - Then, commit generated files and push them to the new branch. This commit will contain only copied steps, subsequent changes - will be added as separate commits. It's likely that CI/CD checks will fail at this point, since the snapshot hasn't been run yet. - Wait for a notification from `owidbot`. It should take a few minutes, and will inform you that the staging server [http://staging-site-update-temperature-anomaly/admin](http://staging-site-update-temperature-anomaly/admin) has been created. +- **Execute new snapshots**: + - Try to execute any newly created snapshots scripts. + - If any of the scripts fail, don't fix them. This will be done later. For now, move to the next step. +- **Commit the new files**: + - Commit the newly generated files and push them to the new branch. + - If any snapshot failed in the previous step, CI/CD checks will fail. Ignore this for now. ## 2. Update and run the new steps -So far we have prepared the working environment for the update. Now, you'll be adding new ETL steps, editing scripts, etc. +Ensure that all snapshot scripts and ETL steps run successfully. Adapt the code if needed. - **Edit the snapshot metadata files**: Some modifications may be needed, for example, the `date_published` field may need to be manually updated. @@ -95,13 +99,13 @@ So far we have prepared the working environment for the update. Now, you'll be a !!! note We should always quickly have a look at the license URL, to ensure it has not changed (see [our guide on source's licenses](https://www.notion.so/owid/How-to-check-a-source-s-license-ade23e5e1e0f4610b98598f9d459f96e)). -- **Run the snapshot step**: +- **Ensure the snapshot steps work**: ```bash python snapshots/met_office_hadley_centre/2024-07-02/near_surface_temperature.py ``` -- **Run the `meadow`, `garden`, and `grapher` steps**: Edit these steps and execute them. You can do that either one by one: +- **Ensure the `meadow`, `garden`, and `grapher` steps work**: Edit these steps and execute them. You can do that either one by one: ```bash etlr meadow/met_office_hadley_centre/2024-07-02/near_surface_temperature @@ -152,9 +156,9 @@ After updating the data, it is time to update the affected charts! This involves - **Do further chart changes**: You can make any further changes to charts in your staging server, if needed. -## 4. Approve chart differences +## 4. Approve chart changes -Review all changes in charts. +Review all changes in existing charts, and also new charts. - **Start Chart Diff in Wizard**: A link will appear at the bottom of the page when you've submitted the changes in the Indicator Upgrader. Alternatively, you can select it on the Wizard menu on the sidebar. - **Review the chart changes**: @@ -175,7 +179,7 @@ After your updates, the old steps are no longer relevant. Therefore, we move the - Go to ETL Dashboard in your local Wizard. - On the Steps table, select the old step (the one that you have just updated, and that now should appear as "Archivable"), and click on "Add selected steps to the Operations list". - Scroll down to the Operations list, and click on "Add all dependencies". - - Scroll down and expand the "Additional parameters to archive steps" box, to deactivate the "Dry run" option. + - Scroll down and expand the "Additional parameters to archive steps" box, to deactivate the "Dry run" option. You can keep "Include usages" activated (it will never archive a step that is used by a chart), but you may want to deactivate it if you created a step that is not yet used by any charts (to avoid archiving it). - Then click on "Archive X steps" (in this case, X equals 6).