Merge branch 'master' into wizard-anomalist

owid · Oct 15, 2024 · 09184fc · 09184fc
2 parents 8a6569b + 190f4ac
commit 09184fc
Show file tree

Hide file tree

Showing 81 changed files with 2,612 additions and 968 deletions.
diff --git a/dag/covid.yml b/dag/covid.yml
@@ -250,6 +250,14 @@ steps:
   data-private://grapher/excess_mortality/latest/excess_mortality_economist:
     - data://garden/excess_mortality/latest/excess_mortality_economist
 
+  # Excess Mortality (WHO)
+  data://meadow/covid/latest/xm_who:
+    - snapshot://covid/latest/xm_who.zip
+  data://garden/covid/latest/xm_who:
+    - data://meadow/covid/latest/xm_who
+  data://grapher/covid/latest/xm_who:
+    - data://garden/covid/latest/xm_who
+
   # COVAX
   data://meadow/covid/latest/covax:
     - snapshot://covid/latest/covax.csv
@@ -291,3 +299,5 @@ steps:
     - grapher://grapher/covid/latest/google_mobility
     - grapher://grapher/excess_mortality/latest/excess_mortality
     - grapher://grapher/covid/latest/covax
+    - grapher://grapher/covid/latest/infections_model
+    - grapher://grapher/covid/latest/xm_who
diff --git a/dag/health.yml b/dag/health.yml
@@ -842,3 +842,17 @@ steps:
     - data://meadow/antibiotics/2024-10-09/gram_children
   data://grapher/antibiotics/2024-10-09/gram_children:
     - data://garden/antibiotics/2024-10-09/gram_children
+
+
+  # Cervical cancer incidence rates GCO - Cancer Today (2022)
+  data://meadow/cancer/2024-10-13/gco_cancer_today_cervical:
+    - snapshot://cancer/2024-10-13/gco_cancer_today_cervical.csv
+  # Cervical cancer incidence rates GCO - Cancer Over Time
+  data://meadow/cancer/2024-10-13/gco_cancer_over_time_cervical:
+    - snapshot://cancer/2024-10-13/gco_cancer_over_time_cervical.csv
+  data://garden/cancer/2024-10-13/gco_cervical_cancer:
+    - data://meadow/cancer/2024-10-13/gco_cancer_today_cervical
+    - data://meadow/cancer/2024-10-13/gco_cancer_over_time_cervical
+  data://grapher/cancer/2024-10-13/gco_cervical_cancer:
+    - data://garden/cancer/2024-10-13/gco_cervical_cancer
+
diff --git a/dag/poverty_inequality.yml b/dag/poverty_inequality.yml
@@ -15,8 +15,9 @@ steps:
 
   # World Bank Poverty and Inequality Platform
   data://meadow/wb/2024-10-07/world_bank_pip:
-    - snapshot://wb/2024-10-07/world_bank_pip_percentiles.csv
     - snapshot://wb/2024-10-07/world_bank_pip.csv
+    - snapshot://wb/2024-10-07/world_bank_pip_percentiles.csv
+    - snapshot://wb/2024-10-07/world_bank_pip_regions.csv
   data://garden/wb/2024-10-07/world_bank_pip:
     - data://meadow/wb/2024-10-07/world_bank_pip
   data://grapher/wb/2024-10-07/world_bank_pip_2011ppp:

diff --git a/etl/helpers.py b/etl/helpers.py
@@ -33,9 +33,12 @@
 from owid.datautils.common import ExceptionFromDocstring, ExceptionFromDocstringWithKwargs
 from owid.walden import Catalog as WaldenCatalog
 from owid.walden import Dataset as WaldenDataset
+from sqlalchemy.orm import Session
 
+import etl.grapher_model as gm
 from etl import paths
 from etl.config import DEFAULT_GRAPHER_SCHEMA, TLS_VERIFY
+from etl.db import get_engine
 from etl.explorer import Explorer
 from etl.explorer_helpers import Explorer as ExplorerOld
 from etl.snapshot import Snapshot, SnapshotMeta
@@ -1186,6 +1189,19 @@ def create_explorer(
     return explorer
 
 
+def map_indicator_path_to_id(catalog_path: str) -> str | int:
+    # Check if given path is actually an ID
+    if str(catalog_path).isdigit():
+        return catalog_path
+
+    # Get ID, assuming given path is a catalog path
+    engine = get_engine()
+    with Session(engine) as session:
+        db_indicator = gm.Variable.from_id_or_path(session, catalog_path)
+        assert db_indicator.id is not None
+        return db_indicator.id
+
+
 @cache
 def get_schema_from_url(schema_url: str) -> dict:
     """Get the schema of a chart configuration. Schema URL is saved in config["$schema"] and looks like:

diff --git a/etl/multidim.py b/etl/multidim.py
@@ -7,11 +7,30 @@
 
 from etl import grapher_model as gm
 from etl.db import read_sql
+from etl.helpers import map_indicator_path_to_id
 
 
 def upsert_multidim_data_page(slug: str, config: dict, engine: Engine) -> None:
     validate_multidim_config(config, engine)
 
+    # TODO: Improve this. Could also go into etl.helpers.load_mdim_config
+    # Change catalogPaths into variable IDs
+    if "views" in config:
+        views = config["views"]
+        for view in views:
+            if "config" in view:
+                if "sortColumnSlug" in view["config"]:
+                    # Check if catalogPath
+                    # Map to variable ID
+                    view["config"]["sortColumnSlug"] = str(map_indicator_path_to_id(view["config"]["sortColumnSlug"]))
+                if "dimensions" in view["config"]:
+                    dimensions = view["config"]["dimensions"]
+                    for dim in dimensions:
+                        if "variableId" in dim:
+                            # Check if catalogPath
+                            # Map to variable ID
+                            dim["variableId"] = map_indicator_path_to_id(dim["variableId"])
+
     with Session(engine) as session:
         mdd_page = gm.MultiDimDataPage(
             slug=slug,

diff --git a/etl/steps/data/explorers/wb/latest/world_bank_pip.py b/etl/steps/data/explorers/wb/latest/world_bank_pip.py
@@ -1,7 +1,7 @@
 """
 World Bank PIP explorer data step.
 
-Loads the latest PIP data from garden and stores multiple tables as csv diles.
+Loads the latest PIP data from garden and stores multiple tables as csv files.
 
 """
 
@@ -33,6 +33,7 @@ def run(dest_dir: str) -> None:
             "surveys_past_decade",
             "reporting_level",
             "welfare_type",
+            "region_name",
         ],
     )
 

diff --git a/etl/steps/data/garden/cancer/2024-10-13/gco_cervical_cancer.countries.json b/etl/steps/data/garden/cancer/2024-10-13/gco_cervical_cancer.countries.json
@@ -0,0 +1,194 @@
+{
+  "Afghanistan": "Afghanistan",
+  "Albania": "Albania",
+  "Algeria": "Algeria",
+  "Angola": "Angola",
+  "Argentina": "Argentina",
+  "Armenia": "Armenia",
+  "Australia": "Australia",
+  "Austria": "Austria",
+  "Azerbaijan": "Azerbaijan",
+  "Bahamas": "Bahamas",
+  "Bahrain": "Bahrain",
+  "Bangladesh": "Bangladesh",
+  "Barbados": "Barbados",
+  "Belarus": "Belarus",
+  "Belgium": "Belgium",
+  "Belize": "Belize",
+  "Benin": "Benin",
+  "Bhutan": "Bhutan",
+  "Bolivia": "Bolivia",
+  "Bosnia Herzegovina": "Bosnia and Herzegovina",
+  "Botswana": "Botswana",
+  "Brazil": "Brazil",
+  "Brunei Darussalam": "Brunei",
+  "Bulgaria": "Bulgaria",
+  "Burkina Faso": "Burkina Faso",
+  "Burundi": "Burundi",
+  "Cambodia": "Cambodia",
+  "Cameroon": "Cameroon",
+  "Canada": "Canada",
+  "Cape Verde": "Cape Verde",
+  "Central African Republic": "Central African Republic",
+  "Chad": "Chad",
+  "Chile": "Chile",
+  "China": "China",
+  "Colombia": "Colombia",
+  "Comoros": "Comoros",
+  "Costa Rica": "Costa Rica",
+  "Croatia": "Croatia",
+  "Cuba": "Cuba",
+  "Cyprus": "Cyprus",
+  "Czechia": "Czechia",
+  "C\u00f4te d'Ivoire": "Cote d'Ivoire",
+  "Denmark": "Denmark",
+  "Djibouti": "Djibouti",
+  "Dominican Republic": "Dominican Republic",
+  "Ecuador": "Ecuador",
+  "Egypt": "Egypt",
+  "El Salvador": "El Salvador",
+  "Equatorial Guinea": "Equatorial Guinea",
+  "Eritrea": "Eritrea",
+  "Estonia": "Estonia",
+  "Eswatini": "Eswatini",
+  "Ethiopia": "Ethiopia",
+  "Fiji": "Fiji",
+  "Finland": "Finland",
+  "French Guyana": "French Guiana",
+  "French Polynesia": "French Polynesia",
+  "Gabon": "Gabon",
+  "Georgia": "Georgia",
+  "Germany": "Germany",
+  "Ghana": "Ghana",
+  "Greece": "Greece",
+  "Guam": "Guam",
+  "Guatemala": "Guatemala",
+  "Guinea": "Guinea",
+  "Guinea-Bissau": "Guinea-Bissau",
+  "Guyana": "Guyana",
+  "Haiti": "Haiti",
+  "Honduras": "Honduras",
+  "Hungary": "Hungary",
+  "Iceland": "Iceland",
+  "India": "India",
+  "Indonesia": "Indonesia",
+  "Iran, Islamic Republic of": "Iran",
+  "Iraq": "Iraq",
+  "Ireland": "Ireland",
+  "Israel": "Israel",
+  "Italy": "Italy",
+  "Jamaica": "Jamaica",
+  "Japan": "Japan",
+  "Jordan": "Jordan",
+  "Kazakhstan": "Kazakhstan",
+  "Kenya": "Kenya",
+  "Korea, Republic of": "South Korea",
+  "Kuwait": "Kuwait",
+  "Kyrgyzstan": "Kyrgyzstan",
+  "Lao People's Democratic Republic": "Laos",
+  "Latvia": "Latvia",
+  "Lebanon": "Lebanon",
+  "Lesotho": "Lesotho",
+  "Liberia": "Liberia",
+  "Libya": "Libya",
+  "Lithuania": "Lithuania",
+  "Luxembourg": "Luxembourg",
+  "Madagascar": "Madagascar",
+  "Malawi": "Malawi",
+  "Malaysia": "Malaysia",
+  "Maldives": "Maldives",
+  "Mali": "Mali",
+  "Malta": "Malta",
+  "Mauritania": "Mauritania",
+  "Mauritius": "Mauritius",
+  "Mexico": "Mexico",
+  "Moldova": "Moldova",
+  "Mongolia": "Mongolia",
+  "Montenegro": "Montenegro",
+  "Morocco": "Morocco",
+  "Mozambique": "Mozambique",
+  "Myanmar": "Myanmar",
+  "Namibia": "Namibia",
+  "Nepal": "Nepal",
+  "New Caledonia": "New Caledonia",
+  "New Zealand": "New Zealand",
+  "Nicaragua": "Nicaragua",
+  "Niger": "Niger",
+  "Nigeria": "Nigeria",
+  "North Macedonia": "North Macedonia",
+  "Norway": "Norway",
+  "Oman": "Oman",
+  "Pakistan": "Pakistan",
+  "Panama": "Panama",
+  "Papua New Guinea": "Papua New Guinea",
+  "Paraguay": "Paraguay",
+  "Peru": "Peru",
+  "Philippines": "Philippines",
+  "Poland": "Poland",
+  "Portugal": "Portugal",
+  "Puerto Rico": "Puerto Rico",
+  "Qatar": "Qatar",
+  "Romania": "Romania",
+  "Russian Federation": "Russia",
+  "Rwanda": "Rwanda",
+  "Saint Lucia": "Saint Lucia",
+  "Samoa": "Samoa",
+  "Sao Tome and Principe": "Sao Tome and Principe",
+  "Saudi Arabia": "Saudi Arabia",
+  "Senegal": "Senegal",
+  "Serbia": "Serbia",
+  "Sierra Leone": "Sierra Leone",
+  "Singapore": "Singapore",
+  "Slovakia": "Slovakia",
+  "Slovenia": "Slovenia",
+  "Solomon Islands": "Solomon Islands",
+  "Somalia": "Somalia",
+  "South Africa": "South Africa",
+  "South Sudan": "South Sudan",
+  "Spain": "Spain",
+  "Sri Lanka": "Sri Lanka",
+  "Sudan": "Sudan",
+  "Suriname": "Suriname",
+  "Sweden": "Sweden",
+  "Switzerland": "Switzerland",
+  "Syrian Arab Republic": "Syria",
+  "Tajikistan": "Tajikistan",
+  "Tanzania, United Republic of": "Tanzania",
+  "Thailand": "Thailand",
+  "Timor-Leste": "East Timor",
+  "Togo": "Togo",
+  "Trinidad and Tobago": "Trinidad and Tobago",
+  "Tunisia": "Tunisia",
+  "Turkmenistan": "Turkmenistan",
+  "USA": "United States",
+  "Uganda": "Uganda",
+  "Ukraine": "Ukraine",
+  "United Arab Emirates": "United Arab Emirates",
+  "United Kingdom": "United Kingdom",
+  "United States of America": "United States",
+  "Uruguay": "Uruguay",
+  "Uzbekistan": "Uzbekistan",
+  "Vanuatu": "Vanuatu",
+  "Venezuela": "Venezuela",
+  "Viet Nam": "Vietnam",
+  "Yemen": "Yemen",
+  "Zambia": "Zambia",
+  "Zimbabwe": "Zimbabwe",
+  "Congo, Democratic Republic of": "Democratic Republic of Congo",
+  "Congo, Republic of": "Congo",
+  "France (metropolitan)": "France",
+  "France, Guadeloupe": "Guadeloupe",
+  "France, La R\u00e9union": "Reunion",
+  "France, Martinique": "Martinique",
+  "Gaza Strip and West Bank": "Palestine",
+  "Korea, Democratic People Republic of": "North Korea",
+  "The Netherlands": "Netherlands",
+  "The Republic of the Gambia": "Gambia",
+  "T\u00fcrkiye": "Turkey",
+  "UK, England": "England",
+  "UK, Northern Ireland": "Northern Ireland",
+  "UK, Scotland": "Scotland",
+  "UK, Wales": "Wales",
+  "USA: Black": "United States (Black)",
+  "USA: White": "United States (White)"
+}
diff --git a/etl/steps/data/garden/cancer/2024-10-13/gco_cervical_cancer.meta.yml b/etl/steps/data/garden/cancer/2024-10-13/gco_cervical_cancer.meta.yml
@@ -0,0 +1,36 @@
+# NOTE: To learn more about the fields, hover over their names.
+definitions:
+  common:
+    presentation:
+      topic_tags:
+        - Cancer
+
+
+# Learn more about the available fields:
+# http://docs.owid.io/projects/etl/architecture/metadata/reference/
+dataset:
+  update_period_days: 365
+
+
+tables:
+  gco_cancer_today_cervical:
+    variables:
+      asr:
+        title: Age-standardized cervical cancer incidence rate per 100,000 women
+        unit: 'per 100,000 women'
+        description_short: |-
+          Estimated number of new cervical [cancer](#dod:cancer) cases per 100,000 women.
+        description_from_producer: |-
+          An age-standardized rate (ASR) is a summary measure of the rate that would have been observed if the population had a standard age structure. Standardization is necessary when comparing several populations that differ with respect to age, because age has a strong influence on the risk of cancer. An ASR is a weighted mean of the age-specific rates; the weighting is based on the population distribution of a standard population. The most frequently used standard population is the World (W) Standard Population. The calculated incidence rate is then called the age-standardized incidence or mortality rate (W), and is expressed per 100 000 person-years. The World Standard Population used in GLOBOCAN was first proposed by Segi (1960)a and later modified by Doll et al. (1966)b.
+        presentation:
+            grapher_config:
+              note: To allow for comparisons between countries and over time, this metric is [age-standardized](#dod:age_standardized).
+
+      crude_rate:
+        title: Crude cervical cancer incidence rate per 100,000 women
+        unit: 'per 100,000 women'
+        description_from_producer: |-
+          For a specific tumour in a given population, crude rates are calculated simply by dividing the number of new cancers or cancer deaths observed during a given time period by the corresponding number of individuals in the population at risk. For cancer, the result is commonly expressed as an annual rate per 100 000 individuals at risk.
+      total:
+       title: Total number of cervical cancer cases
+       unit: 'cases'