Skip to content

Commit

Permalink
Merge branch 'master' into wizard-anomalist
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasrodes committed Oct 15, 2024
2 parents 8a6569b + 190f4ac commit 09184fc
Show file tree
Hide file tree
Showing 81 changed files with 2,612 additions and 968 deletions.
10 changes: 10 additions & 0 deletions dag/covid.yml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,14 @@ steps:
data-private://grapher/excess_mortality/latest/excess_mortality_economist:
- data://garden/excess_mortality/latest/excess_mortality_economist

# Excess Mortality (WHO)
data://meadow/covid/latest/xm_who:
- snapshot://covid/latest/xm_who.zip
data://garden/covid/latest/xm_who:
- data://meadow/covid/latest/xm_who
data://grapher/covid/latest/xm_who:
- data://garden/covid/latest/xm_who

# COVAX
data://meadow/covid/latest/covax:
- snapshot://covid/latest/covax.csv
Expand Down Expand Up @@ -291,3 +299,5 @@ steps:
- grapher://grapher/covid/latest/google_mobility
- grapher://grapher/excess_mortality/latest/excess_mortality
- grapher://grapher/covid/latest/covax
- grapher://grapher/covid/latest/infections_model
- grapher://grapher/covid/latest/xm_who
14 changes: 14 additions & 0 deletions dag/health.yml
Original file line number Diff line number Diff line change
Expand Up @@ -842,3 +842,17 @@ steps:
- data://meadow/antibiotics/2024-10-09/gram_children
data://grapher/antibiotics/2024-10-09/gram_children:
- data://garden/antibiotics/2024-10-09/gram_children


# Cervical cancer incidence rates GCO - Cancer Today (2022)
data://meadow/cancer/2024-10-13/gco_cancer_today_cervical:
- snapshot://cancer/2024-10-13/gco_cancer_today_cervical.csv
# Cervical cancer incidence rates GCO - Cancer Over Time
data://meadow/cancer/2024-10-13/gco_cancer_over_time_cervical:
- snapshot://cancer/2024-10-13/gco_cancer_over_time_cervical.csv
data://garden/cancer/2024-10-13/gco_cervical_cancer:
- data://meadow/cancer/2024-10-13/gco_cancer_today_cervical
- data://meadow/cancer/2024-10-13/gco_cancer_over_time_cervical
data://grapher/cancer/2024-10-13/gco_cervical_cancer:
- data://garden/cancer/2024-10-13/gco_cervical_cancer

3 changes: 2 additions & 1 deletion dag/poverty_inequality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ steps:

# World Bank Poverty and Inequality Platform
data://meadow/wb/2024-10-07/world_bank_pip:
- snapshot://wb/2024-10-07/world_bank_pip_percentiles.csv
- snapshot://wb/2024-10-07/world_bank_pip.csv
- snapshot://wb/2024-10-07/world_bank_pip_percentiles.csv
- snapshot://wb/2024-10-07/world_bank_pip_regions.csv
data://garden/wb/2024-10-07/world_bank_pip:
- data://meadow/wb/2024-10-07/world_bank_pip
data://grapher/wb/2024-10-07/world_bank_pip_2011ppp:
Expand Down
16 changes: 16 additions & 0 deletions etl/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@
from owid.datautils.common import ExceptionFromDocstring, ExceptionFromDocstringWithKwargs
from owid.walden import Catalog as WaldenCatalog
from owid.walden import Dataset as WaldenDataset
from sqlalchemy.orm import Session

import etl.grapher_model as gm
from etl import paths
from etl.config import DEFAULT_GRAPHER_SCHEMA, TLS_VERIFY
from etl.db import get_engine
from etl.explorer import Explorer
from etl.explorer_helpers import Explorer as ExplorerOld
from etl.snapshot import Snapshot, SnapshotMeta
Expand Down Expand Up @@ -1186,6 +1189,19 @@ def create_explorer(
return explorer


def map_indicator_path_to_id(catalog_path: str) -> str | int:
# Check if given path is actually an ID
if str(catalog_path).isdigit():
return catalog_path

# Get ID, assuming given path is a catalog path
engine = get_engine()
with Session(engine) as session:
db_indicator = gm.Variable.from_id_or_path(session, catalog_path)
assert db_indicator.id is not None
return db_indicator.id


@cache
def get_schema_from_url(schema_url: str) -> dict:
"""Get the schema of a chart configuration. Schema URL is saved in config["$schema"] and looks like:
Expand Down
19 changes: 19 additions & 0 deletions etl/multidim.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,30 @@

from etl import grapher_model as gm
from etl.db import read_sql
from etl.helpers import map_indicator_path_to_id


def upsert_multidim_data_page(slug: str, config: dict, engine: Engine) -> None:
validate_multidim_config(config, engine)

# TODO: Improve this. Could also go into etl.helpers.load_mdim_config
# Change catalogPaths into variable IDs
if "views" in config:
views = config["views"]
for view in views:
if "config" in view:
if "sortColumnSlug" in view["config"]:
# Check if catalogPath
# Map to variable ID
view["config"]["sortColumnSlug"] = str(map_indicator_path_to_id(view["config"]["sortColumnSlug"]))
if "dimensions" in view["config"]:
dimensions = view["config"]["dimensions"]
for dim in dimensions:
if "variableId" in dim:
# Check if catalogPath
# Map to variable ID
dim["variableId"] = map_indicator_path_to_id(dim["variableId"])

with Session(engine) as session:
mdd_page = gm.MultiDimDataPage(
slug=slug,
Expand Down
3 changes: 2 additions & 1 deletion etl/steps/data/explorers/wb/latest/world_bank_pip.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
World Bank PIP explorer data step.
Loads the latest PIP data from garden and stores multiple tables as csv diles.
Loads the latest PIP data from garden and stores multiple tables as csv files.
"""

Expand Down Expand Up @@ -33,6 +33,7 @@ def run(dest_dir: str) -> None:
"surveys_past_decade",
"reporting_level",
"welfare_type",
"region_name",
],
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
{
"Afghanistan": "Afghanistan",
"Albania": "Albania",
"Algeria": "Algeria",
"Angola": "Angola",
"Argentina": "Argentina",
"Armenia": "Armenia",
"Australia": "Australia",
"Austria": "Austria",
"Azerbaijan": "Azerbaijan",
"Bahamas": "Bahamas",
"Bahrain": "Bahrain",
"Bangladesh": "Bangladesh",
"Barbados": "Barbados",
"Belarus": "Belarus",
"Belgium": "Belgium",
"Belize": "Belize",
"Benin": "Benin",
"Bhutan": "Bhutan",
"Bolivia": "Bolivia",
"Bosnia Herzegovina": "Bosnia and Herzegovina",
"Botswana": "Botswana",
"Brazil": "Brazil",
"Brunei Darussalam": "Brunei",
"Bulgaria": "Bulgaria",
"Burkina Faso": "Burkina Faso",
"Burundi": "Burundi",
"Cambodia": "Cambodia",
"Cameroon": "Cameroon",
"Canada": "Canada",
"Cape Verde": "Cape Verde",
"Central African Republic": "Central African Republic",
"Chad": "Chad",
"Chile": "Chile",
"China": "China",
"Colombia": "Colombia",
"Comoros": "Comoros",
"Costa Rica": "Costa Rica",
"Croatia": "Croatia",
"Cuba": "Cuba",
"Cyprus": "Cyprus",
"Czechia": "Czechia",
"C\u00f4te d'Ivoire": "Cote d'Ivoire",
"Denmark": "Denmark",
"Djibouti": "Djibouti",
"Dominican Republic": "Dominican Republic",
"Ecuador": "Ecuador",
"Egypt": "Egypt",
"El Salvador": "El Salvador",
"Equatorial Guinea": "Equatorial Guinea",
"Eritrea": "Eritrea",
"Estonia": "Estonia",
"Eswatini": "Eswatini",
"Ethiopia": "Ethiopia",
"Fiji": "Fiji",
"Finland": "Finland",
"French Guyana": "French Guiana",
"French Polynesia": "French Polynesia",
"Gabon": "Gabon",
"Georgia": "Georgia",
"Germany": "Germany",
"Ghana": "Ghana",
"Greece": "Greece",
"Guam": "Guam",
"Guatemala": "Guatemala",
"Guinea": "Guinea",
"Guinea-Bissau": "Guinea-Bissau",
"Guyana": "Guyana",
"Haiti": "Haiti",
"Honduras": "Honduras",
"Hungary": "Hungary",
"Iceland": "Iceland",
"India": "India",
"Indonesia": "Indonesia",
"Iran, Islamic Republic of": "Iran",
"Iraq": "Iraq",
"Ireland": "Ireland",
"Israel": "Israel",
"Italy": "Italy",
"Jamaica": "Jamaica",
"Japan": "Japan",
"Jordan": "Jordan",
"Kazakhstan": "Kazakhstan",
"Kenya": "Kenya",
"Korea, Republic of": "South Korea",
"Kuwait": "Kuwait",
"Kyrgyzstan": "Kyrgyzstan",
"Lao People's Democratic Republic": "Laos",
"Latvia": "Latvia",
"Lebanon": "Lebanon",
"Lesotho": "Lesotho",
"Liberia": "Liberia",
"Libya": "Libya",
"Lithuania": "Lithuania",
"Luxembourg": "Luxembourg",
"Madagascar": "Madagascar",
"Malawi": "Malawi",
"Malaysia": "Malaysia",
"Maldives": "Maldives",
"Mali": "Mali",
"Malta": "Malta",
"Mauritania": "Mauritania",
"Mauritius": "Mauritius",
"Mexico": "Mexico",
"Moldova": "Moldova",
"Mongolia": "Mongolia",
"Montenegro": "Montenegro",
"Morocco": "Morocco",
"Mozambique": "Mozambique",
"Myanmar": "Myanmar",
"Namibia": "Namibia",
"Nepal": "Nepal",
"New Caledonia": "New Caledonia",
"New Zealand": "New Zealand",
"Nicaragua": "Nicaragua",
"Niger": "Niger",
"Nigeria": "Nigeria",
"North Macedonia": "North Macedonia",
"Norway": "Norway",
"Oman": "Oman",
"Pakistan": "Pakistan",
"Panama": "Panama",
"Papua New Guinea": "Papua New Guinea",
"Paraguay": "Paraguay",
"Peru": "Peru",
"Philippines": "Philippines",
"Poland": "Poland",
"Portugal": "Portugal",
"Puerto Rico": "Puerto Rico",
"Qatar": "Qatar",
"Romania": "Romania",
"Russian Federation": "Russia",
"Rwanda": "Rwanda",
"Saint Lucia": "Saint Lucia",
"Samoa": "Samoa",
"Sao Tome and Principe": "Sao Tome and Principe",
"Saudi Arabia": "Saudi Arabia",
"Senegal": "Senegal",
"Serbia": "Serbia",
"Sierra Leone": "Sierra Leone",
"Singapore": "Singapore",
"Slovakia": "Slovakia",
"Slovenia": "Slovenia",
"Solomon Islands": "Solomon Islands",
"Somalia": "Somalia",
"South Africa": "South Africa",
"South Sudan": "South Sudan",
"Spain": "Spain",
"Sri Lanka": "Sri Lanka",
"Sudan": "Sudan",
"Suriname": "Suriname",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Syrian Arab Republic": "Syria",
"Tajikistan": "Tajikistan",
"Tanzania, United Republic of": "Tanzania",
"Thailand": "Thailand",
"Timor-Leste": "East Timor",
"Togo": "Togo",
"Trinidad and Tobago": "Trinidad and Tobago",
"Tunisia": "Tunisia",
"Turkmenistan": "Turkmenistan",
"USA": "United States",
"Uganda": "Uganda",
"Ukraine": "Ukraine",
"United Arab Emirates": "United Arab Emirates",
"United Kingdom": "United Kingdom",
"United States of America": "United States",
"Uruguay": "Uruguay",
"Uzbekistan": "Uzbekistan",
"Vanuatu": "Vanuatu",
"Venezuela": "Venezuela",
"Viet Nam": "Vietnam",
"Yemen": "Yemen",
"Zambia": "Zambia",
"Zimbabwe": "Zimbabwe",
"Congo, Democratic Republic of": "Democratic Republic of Congo",
"Congo, Republic of": "Congo",
"France (metropolitan)": "France",
"France, Guadeloupe": "Guadeloupe",
"France, La R\u00e9union": "Reunion",
"France, Martinique": "Martinique",
"Gaza Strip and West Bank": "Palestine",
"Korea, Democratic People Republic of": "North Korea",
"The Netherlands": "Netherlands",
"The Republic of the Gambia": "Gambia",
"T\u00fcrkiye": "Turkey",
"UK, England": "England",
"UK, Northern Ireland": "Northern Ireland",
"UK, Scotland": "Scotland",
"UK, Wales": "Wales",
"USA: Black": "United States (Black)",
"USA: White": "United States (White)"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- Cancer


# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365


tables:
gco_cancer_today_cervical:
variables:
asr:
title: Age-standardized cervical cancer incidence rate per 100,000 women
unit: 'per 100,000 women'
description_short: |-
Estimated number of new cervical [cancer](#dod:cancer) cases per 100,000 women.
description_from_producer: |-
An age-standardized rate (ASR) is a summary measure of the rate that would have been observed if the population had a standard age structure. Standardization is necessary when comparing several populations that differ with respect to age, because age has a strong influence on the risk of cancer. An ASR is a weighted mean of the age-specific rates; the weighting is based on the population distribution of a standard population. The most frequently used standard population is the World (W) Standard Population. The calculated incidence rate is then called the age-standardized incidence or mortality rate (W), and is expressed per 100 000 person-years. The World Standard Population used in GLOBOCAN was first proposed by Segi (1960)a and later modified by Doll et al. (1966)b.
presentation:
grapher_config:
note: To allow for comparisons between countries and over time, this metric is [age-standardized](#dod:age_standardized).

crude_rate:
title: Crude cervical cancer incidence rate per 100,000 women
unit: 'per 100,000 women'
description_from_producer: |-
For a specific tumour in a given population, crude rates are calculated simply by dividing the number of new cancers or cancer deaths observed during a given time period by the corresponding number of individuals in the population at risk. For cancer, the result is commonly expressed as an annual rate per 100 000 individuals at risk.
total:
title: Total number of cervical cancer cases
unit: 'cases'
Loading

0 comments on commit 09184fc

Please sign in to comment.