owid · paarriagadap · Sep 22, 2024 · Sep 22, 2024 · Oct 5, 2024 · Oct 5, 2024
diff --git a/.github/workflows/publish-owid-catalog.yml b/.github/workflows/publish-owid-catalog.yml
@@ -19,12 +19,14 @@ jobs:
       with:
         python-version: '3.x'
 
-    - name: Install Poetry
-      run: curl -sSL https://install.python-poetry.org | python3 -
+    - name: Install UV
+      run: curl -LsSf https://astral.sh/uv/install.sh | sh
 
     - name: Publish
       env:
-        POETRY_PYPI_TOKEN_PYPI: ${{ secrets.POETRY_PYPI_TOKEN_PYPI }}
+        TWINE_USERNAME: ourworldindata
+        TWINE_PASSWORD: ${{ secrets.POETRY_PYPI_TOKEN_PYPI }}
       run: |
         cd lib/catalog &&
-        poetry publish --build -u ourworldindata
+        uv build &&
+        uvx twine upload dist/*
diff --git a/.gitignore b/.gitignore
@@ -56,3 +56,5 @@ site/
 notebooks/
 
 zpop/
+node_modules/
+dist/
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -7,9 +7,9 @@ build:
   commands:
     - git submodule init
     - git submodule update
-    - pip install -U poetry
-    - poetry install
-    - poetry run mkdocs build
+    - pip install -U uv
+    - uv sync --all-extras
+    - uv run mkdocs build
     - mkdir -p _readthedocs
     - cp -r site/ _readthedocs/html/
 

diff --git a/Makefile b/Makefile
@@ -2,7 +2,7 @@
 #  Makefile
 #
 
-.PHONY: etl docs full lab test-default publish grapher dot watch clean clobber deploy api
+.PHONY: etl docs full lab test-default publish grapher dot watch clean clobber deploy api activate
 
 include default.mk
 

diff --git a/apps/backport/datasync/data_metadata.py b/apps/backport/datasync/data_metadata.py
@@ -17,7 +17,7 @@
 from tenacity.stop import stop_after_attempt
 from tenacity.wait import wait_fixed
 
-from etl import config, files
+from etl import config
 from etl.config import OWIDEnv
 from etl.db import read_sql
 
@@ -394,18 +394,6 @@ def _omit_nullable_values(d: dict) -> dict:
     return {k: v for k, v in d.items() if v is not None and (isinstance(v, list) and len(v) or not pd.isna(v))}
 
 
-def checksum_data_str(var_data_str: str) -> str:
-    return files.checksum_str(var_data_str)
-
-
-def checksum_metadata(meta: Dict[str, Any]) -> str:
-    """Calculate checksum for metadata. It modifies the metadata dict!"""
-    # Drop fields not needed for checksum computation
-    meta = filter_out_fields_in_metadata_for_checksum(meta)
-
-    return files.checksum_str(json.dumps(meta, default=str))
-
-
 def filter_out_fields_in_metadata_for_checksum(meta: Dict[str, Any]) -> Dict[str, Any]:
     """Drop fields that are not needed to estimate the checksum."""
     meta_ = deepcopy(meta)

diff --git a/apps/chart_sync/admin_api.py b/apps/chart_sync/admin_api.py
@@ -13,8 +13,7 @@
 from sqlalchemy.orm import Session
 
 from etl import grapher_model as gm
-from etl.config import GRAPHER_USER_ID, TAILSCALE_ADMIN_HOST
-from etl.db import Engine
+from etl.config import GRAPHER_USER_ID, OWIDEnv
 
 log = structlog.get_logger()
 
@@ -25,7 +24,9 @@ def is_502_error(exception):
 
 
 class AdminAPI(object):
-    def __init__(self, engine: Engine, grapher_user_id: Optional[int] = None):
+    def __init__(self, owid_env: OWIDEnv, grapher_user_id: Optional[int] = None):
+        self.owid_env = owid_env
+        engine = owid_env.get_engine()
         with Session(engine) as session:
             if grapher_user_id:
                 user = session.get(gm.User, grapher_user_id)
@@ -35,11 +36,6 @@ def __init__(self, engine: Engine, grapher_user_id: Optional[int] = None):
             self.session_id = _create_user_session(session, user.email)
             session.commit()
 
-        if engine.url.database == "live_grapher" and "prod-db" in str(engine.url.host):
-            self.base_url = TAILSCALE_ADMIN_HOST
-        else:
-            self.base_url = f"http://{engine.url.host}.tail6e23.ts.net"
-
     def _json_from_response(self, resp: requests.Response) -> dict:
         if resp.status_code != 200:
             log.error("Admin API error", status_code=resp.status_code, text=resp.text)
@@ -52,15 +48,15 @@ def _json_from_response(self, resp: requests.Response) -> dict:
 
     def get_chart_config(self, chart_id: int) -> dict:
         resp = requests.get(
-            f"{self.base_url}/admin/api/charts/{chart_id}.config.json",
+            f"{self.owid_env.admin_api}/charts/{chart_id}.config.json",
             cookies={"sessionid": self.session_id},
         )
         js = self._json_from_response(resp)
         return js
 
     def create_chart(self, chart_config: dict) -> dict:
         resp = requests.post(
-            self.base_url + "/admin/api/charts",
+            self.owid_env.admin_api + "/charts",
             cookies={"sessionid": self.session_id},
             json=chart_config,
         )
@@ -70,7 +66,7 @@ def create_chart(self, chart_config: dict) -> dict:
 
     def update_chart(self, chart_id: int, chart_config: dict) -> dict:
         resp = requests.put(
-            f"{self.base_url}/admin/api/charts/{chart_id}",
+            f"{self.owid_env.admin_api}/charts/{chart_id}",
             cookies={"sessionid": self.session_id},
             json=chart_config,
         )
@@ -80,7 +76,7 @@ def update_chart(self, chart_id: int, chart_config: dict) -> dict:
 
     def set_tags(self, chart_id: int, tags: List[Dict[str, Any]]) -> dict:
         resp = requests.post(
-            f"{self.base_url}/admin/api/charts/{chart_id}/setTags",
+            f"{self.owid_env.admin_api}/charts/{chart_id}/setTags",
             cookies={"sessionid": self.session_id},
             json={"tags": tags},
         )
@@ -91,7 +87,7 @@ def set_tags(self, chart_id: int, tags: List[Dict[str, Any]]) -> dict:
     def put_grapher_config(self, variable_id: int, grapher_config: Dict[str, Any]) -> dict:
         # Retry in case we're restarting Admin on staging server
         resp = requests_with_retry().put(
-            self.base_url + f"/admin/api/variables/{variable_id}/grapherConfigETL",
+            self.owid_env.admin_api + f"/variables/{variable_id}/grapherConfigETL",
             cookies={"sessionid": self.session_id},
             json=grapher_config,
         )
@@ -101,7 +97,7 @@ def put_grapher_config(self, variable_id: int, grapher_config: Dict[str, Any]) -
 
     def delete_grapher_config(self, variable_id: int) -> dict:
         resp = requests.delete(
-            self.base_url + f"/admin/api/variables/{variable_id}/grapherConfigETL",
+            self.owid_env.admin_api + f"/variables/{variable_id}/grapherConfigETL",
             cookies={"sessionid": self.session_id},
         )
         js = self._json_from_response(resp)

diff --git a/apps/chart_sync/cli.py b/apps/chart_sync/cli.py
@@ -97,11 +97,12 @@ def cli(
         log.info("chart_sync.use_branch", branch=source)
 
     source_engine = OWIDEnv.from_staging_or_env_file(source).get_engine()
-    target_engine = OWIDEnv.from_staging_or_env_file(target).get_engine()
+    target_env = OWIDEnv.from_staging_or_env_file(target)
+    target_engine = target_env.get_engine()
 
     # go through Admin API as creating / updating chart has side effects like
     # adding entries to chart_dimensions. We can't directly update it in MySQL
-    target_api: AdminAPI = AdminAPI(target_engine) if not dry_run else None  # type: ignore
+    target_api: AdminAPI = AdminAPI(target_env) if not dry_run else None  # type: ignore
 
     with Session(source_engine) as source_session:
         with Session(target_engine) as target_session:

diff --git a/apps/owidbot/data_diff.py b/apps/owidbot/data_diff.py
@@ -1,3 +1,4 @@
+import os
 import re
 import subprocess
 from typing import Tuple
@@ -72,7 +73,7 @@ def format_etl_diff(lines: list[str]) -> Tuple[str, str]:
 
 def call_etl_diff(include: str) -> list[str]:
     cmd = [
-        "poetry",
+        "uv",
         "run",
         "etl",
         "diff",
@@ -89,7 +90,10 @@ def call_etl_diff(include: str) -> list[str]:
 
     print(" ".join(cmd))
 
-    result = subprocess.Popen(cmd, cwd=BASE_DIR, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    env = os.environ.copy()
+    env["PATH"] = os.path.expanduser("~/.cargo/bin") + ":" + env["PATH"]
+
+    result = subprocess.Popen(cmd, cwd=BASE_DIR, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
     stdout, stderr = result.communicate()
 
     stdout = stdout.decode()

diff --git a/apps/utils/profile.py b/apps/utils/profile.py
@@ -14,7 +14,7 @@
 
 To profile grapher upserts, it is better to use cProfile and run something like this:
 ```
-ssh owid@staging-site-my-branch "cd etl && poetry run python -m cProfile -s cumtime etl/command.py grapher://grapher/biodiversity/2024-01-25/cherry_blossom --grapher --only --force --workers 1" | head -n 100
+ssh owid@staging-site-my-branch "cd etl && uv run python -m cProfile -s cumtime etl/command.py grapher://grapher/biodiversity/2024-01-25/cherry_blossom --grapher --only --force --workers 1" | head -n 100
 ```
 """
 

diff --git a/apps/wizard/app_pages/chart_diff/chart_diff.py b/apps/wizard/app_pages/chart_diff/chart_diff.py
@@ -139,7 +139,11 @@ def slug(self) -> str:
         If slug of the chart miss-matches between target and source sessions, an error is displayed.
         """
         if self.target_chart:
-            assert self.source_chart.slug == self.target_chart.slug, "Slug mismatch!"
+            # Only published charts have slugs
+            if self.target_chart.publishedAt is not None:
+                assert (
+                    self.source_chart.slug == self.target_chart.slug
+                ), f"Slug mismatch! {self.source_chart.slug} != {self.target_chart.slug}"
         return self.source_chart.slug or "no-slug"
 
     @property

diff --git a/apps/wizard/app_pages/chart_diff/chart_diff_show.py b/apps/wizard/app_pages/chart_diff/chart_diff_show.py
@@ -28,16 +28,19 @@
     gm.ChartStatus.APPROVED.value: {
         "label": "Approve",
         "color": "green",
+        # "icon": ":material/done_outline:",
         "icon": "✅",
     },
     gm.ChartStatus.REJECTED.value: {
         "label": "Reject",
         "color": "red",
+        # "icon": ":material/delete:",
         "icon": "❌",
     },
     gm.ChartStatus.PENDING.value: {
         "label": "Pending",
         "color": "gray",
+        # "icon": ":material/schedule:",
         "icon": "⏳",
     },
 }
@@ -89,20 +92,20 @@ def box_label(self):
 
         This contains the state of the approval (by means of an emoji), the slug of the chart, and any tags (like "NEW" or "DRAFT").
         """
-        label = self.diff.slug
+        label = f"{self.diff.slug}  "
         tags = []
         if self.diff.is_new:
-            tags.append(" :blue-background[**NEW**]")
+            tags.append(" :blue-background[:material/grade: **NEW**]")
         if self.diff.is_draft:
-            tags.append(" :gray-background[**DRAFT**]")
+            tags.append(" :gray-background[:material/draft: **DRAFT**]")
         for change in self.diff.change_types:
-            tags.append(f":red-background[**{change.upper()} CHANGE**]")
+            tags.append(f":red-background[:material/refresh: **{change.upper()} CHANGE**]")
 
         # Add TAG if modified and no change_types is provided
         if (self.diff.is_modified) and (tags == []):
-            label += ":break[:rainbow-background[**UNKNOWN -- REPORT THIS**]]"
+            label += ":rainbow-background[**UNKNOWN -- REPORT THIS**]"
         else:
-            label += f":break[{' '.join(tags)}]"
+            label += f"{' '.join(tags)}"
         return label
 
     @property
@@ -497,7 +500,7 @@ def _show(self) -> None:
 
         # SHOW MODIFIED CHART
         if self.diff.is_modified:
-            tab1, tab2, tab3 = st.tabs(["Charts", "Config diff", "Change history"])
+            tab1, tab2, tab3 = st.tabs(["Charts", "Config diff", "Status log"])
             with tab1:
                 self._show_chart_comparison()
             with tab2:
@@ -507,7 +510,7 @@ def _show(self) -> None:
 
         # SHOW NEW CHART
         elif self.diff.is_new:
-            tab1, tab2 = st.tabs(["Chart", "Change history"])
+            tab1, tab2 = st.tabs(["Chart", "Status log"])
             with tab1:
                 self._show_chart_comparison()
             with tab2:

diff --git a/apps/wizard/app_pages/chart_diff/conflict_resolver.py b/apps/wizard/app_pages/chart_diff/conflict_resolver.py
@@ -154,7 +154,7 @@ def resolve_conflicts(self, rerun: bool = False):
             # Verify config
             config_new = validate_chart_config_and_set_defaults(config, schema=get_schema_from_url(config["$schema"]))
 
-            api = AdminAPI(SOURCE.engine, grapher_user_id=1)
+            api = AdminAPI(SOURCE, grapher_user_id=1)
             try:
                 # Push new chart to staging
                 api.update_chart(

diff --git a/apps/wizard/app_pages/expert/prompts.py b/apps/wizard/app_pages/expert/prompts.py
@@ -153,6 +153,7 @@ def read_page_md(page_path: str) -> str:
 """
 
 # DATASETTE ORACLE
+# TODO: Schema should be auto-generated. Maybe extract from http://analytics/private.json? Problem: how can one get the variable names linking tables?
 SYSTEM_PROMPT_DATASETTE = """
 ## Datasette Oracle V2
 
@@ -635,5 +636,5 @@ def read_page_md(page_path: str) -> str:
 
 Your job is to create a SQL query for the user that answers their question given the schema above. You may ask the user for clarification, e.g. if it is unclear if unpublished items should be included (when applicable) or if there is ambiguity in which tables to use to answer a question.
 
-Upon generating a query, Datasette Oracle will always provide the SQL query both as text and as a clickable Datasette link, formatted for the user's convenience. The datasette URL is http://datasette-private and the database name is owid. An example query to get all rows from the algolia_searches_by_week table is this one that demonstrates the escaping: `http://datasette-private/owid?sql=select+*+from+algolia_searches_by_week` Remember, you cannot actually run the SQL query, you are just to output the query as text and a datasette link that will run that query!
+Upon generating a query, Datasette Oracle will always provide the SQL query both as text and as a clickable Datasette link, formatted for the user's convenience. The datasette URL is http://analytics/private and the database name is owid. An example query to get all rows from the algolia_searches_by_week table is this one that demonstrates the escaping: `http://analytics/private?sql=select+*+from+algolia_searches_by_week` Remember, you cannot actually run the SQL query, you are just to output the query as text and a datasette link that will run that query!
 """
diff --git a/apps/wizard/app_pages/indicator_upgrade/app.py b/apps/wizard/app_pages/indicator_upgrade/app.py
@@ -56,7 +56,7 @@
 st.markdown("Update indicators to their new versions.")  # Get datasets (might take some time)
 
 # Get all datasets
-DATASETS = get_datasets()
+DATASETS = get_datasets(archived=True)
 # Session states
 utils.set_states(
     {

diff --git a/apps/wizard/app_pages/indicator_upgrade/charts_update.py b/apps/wizard/app_pages/indicator_upgrade/charts_update.py
@@ -11,7 +11,6 @@
 from apps.chart_sync.admin_api import AdminAPI
 from apps.wizard.utils import get_schema_from_url, set_states, st_page_link, st_toast_error
 from etl.config import OWID_ENV
-from etl.db import get_engine
 from etl.indicator_upgrade.indicator_update import find_charts_from_variable_ids, update_chart_config
 
 # Logger
@@ -92,9 +91,8 @@ def get_affected_charts_and_preview(indicator_mapping: Dict[int, int]) -> List[g
 def push_new_charts(charts: List[gm.Chart]) -> None:
     """Updating charts in the database."""
     # API to interact with the admin tool
-    engine = get_engine()
     # HACK: Forcing grapher user to be Admin so that it is detected by chart sync.
-    api = AdminAPI(engine, grapher_user_id=1)
+    api = AdminAPI(OWID_ENV, grapher_user_id=1)
     # Update charts
     progress_text = "Updating charts..."
     bar = st.progress(0, progress_text)
@@ -127,5 +125,7 @@ def push_new_charts(charts: List[gm.Chart]) -> None:
         )
         st.exception(e)
     else:
-        st.success("The charts were successfully updated! Review the changes with `chart diff`")
+        st.success(
+            "The charts were successfully updated! If indicators from other datasets also need to be upgraded, simply refresh this page, otherwise move on to `chart diff` to review all changes."
+        )
         st_page_link("chart-diff")
diff --git a/apps/wizard/app_pages/indicator_upgrade/dataset_selection.py b/apps/wizard/app_pages/indicator_upgrade/dataset_selection.py
@@ -52,11 +52,12 @@ def build_dataset_form(df: pd.DataFrame, similarity_names: Dict[str, Any]) -> "S
     # Create a column to display the dataset by its dataset id followed by its title.
     df["display_name"] = "[" + df["id"].astype(str) + "] " + df["name"]
     version = df["step"].str.split("/").str[-2]
-    df["display_name"] = df["display_name"] + " [" + version.fillna("unknown version") + "]"
+    is_archived = df["isArchived"].replace({0: "", 1: " (ARCHIVED) "}).fillna("")
+    df["display_name"] = is_archived + df["display_name"] + " [" + version.fillna("unknown version") + "]"
     # Create a dictionary mapping from that display to dataset id.
     display_name_to_id_mapping = df.set_index("display_name")["id"].to_dict()
     # Create a column to display the dataset by its dataset id followed by its ETL step.
-    df["display_step"] = "[" + df["id"].astype(str) + "] " + df["step"]
+    df["display_step"] = is_archived + "[" + df["id"].astype(str) + "] " + df["step"]
     # Create a dictionary mapping from that display to dataset id.
     display_step_to_id_mapping = df.set_index("display_step")["id"].to_dict()
 
@@ -69,9 +70,15 @@ def build_dataset_form(df: pd.DataFrame, similarity_names: Dict[str, Any]) -> "S
     # View options
     with st.popover("View options"):
         st.markdown("Change the default dataset view.")
+        # st.toggle(
+        #     "Show archived datasets",
+        #     help="By default, archived datasets are not shown. Change this by checking this box.",
+        #     on_change=set_states_if_form_is_modified,
+        #     key="show_archived_datasets",
+        # )
         st.toggle(
-            "Show all datasets (manual mapping)",
-            help="Show all datasets, including those not detected by the grapher.",
+            "Show all datasets",
+            help="Show all datasets. By default, Indicator Upgrader will try to present only those datasets that are new. You can disable this by ckecking this box. You can also check this box to show archived datasets.",
             on_change=set_states_if_form_is_modified,
             key="show_all_datasets",
         )
@@ -88,6 +95,8 @@ def build_dataset_form(df: pd.DataFrame, similarity_names: Dict[str, Any]) -> "S
         # the dropdown of new datasets should only show the detected new datasets.
         options = df[df["migration_new"]].reset_index(drop=True)
     else:
+        if not st.session_state.show_all_datasets:
+            df = df.loc[df["isArchived"] == 0, :]
         # Otherwise, show all datasets in grapher.
         options = df.reset_index(drop=True)