Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📊 covid: mdim #3321

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions dag/covid.yml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,14 @@ steps:
data-private://grapher/excess_mortality/latest/excess_mortality_economist:
- data://garden/excess_mortality/latest/excess_mortality_economist

# Excess Mortality (WHO)
data://meadow/covid/latest/xm_who:
- snapshot://covid/latest/xm_who.zip
data://garden/covid/latest/xm_who:
- data://meadow/covid/latest/xm_who
data://grapher/covid/latest/xm_who:
- data://garden/covid/latest/xm_who

# COVAX
data://meadow/covid/latest/covax:
- snapshot://covid/latest/covax.csv
Expand Down Expand Up @@ -291,3 +299,5 @@ steps:
- grapher://grapher/covid/latest/google_mobility
- grapher://grapher/excess_mortality/latest/excess_mortality
- grapher://grapher/covid/latest/covax
- grapher://grapher/covid/latest/infections_model
- grapher://grapher/covid/latest/xm_who
16 changes: 16 additions & 0 deletions etl/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@
from owid.datautils.common import ExceptionFromDocstring, ExceptionFromDocstringWithKwargs
from owid.walden import Catalog as WaldenCatalog
from owid.walden import Dataset as WaldenDataset
from sqlalchemy.orm import Session

import etl.grapher_model as gm
from etl import paths
from etl.config import DEFAULT_GRAPHER_SCHEMA, TLS_VERIFY
from etl.db import get_engine
from etl.explorer import Explorer
from etl.explorer_helpers import Explorer as ExplorerOld
from etl.snapshot import Snapshot, SnapshotMeta
Expand Down Expand Up @@ -1186,6 +1189,19 @@ def create_explorer(
return explorer


def map_indicator_path_to_id(catalog_path: str) -> str | int:
# Check if given path is actually an ID
if str(catalog_path).isdigit():
return catalog_path

# Get ID, assuming given path is a catalog path
engine = get_engine()
with Session(engine) as session:
db_indicator = gm.Variable.load_from_catalog_path(session, catalog_path)
assert db_indicator.id is not None
return db_indicator.id


@cache
def get_schema_from_url(schema_url: str) -> dict:
"""Get the schema of a chart configuration. Schema URL is saved in config["$schema"] and looks like:
Expand Down
19 changes: 19 additions & 0 deletions etl/multidim.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,30 @@

from etl import grapher_model as gm
from etl.db import read_sql
from etl.helpers import map_indicator_path_to_id


def upsert_multidim_data_page(slug: str, config: dict, engine: Engine) -> None:
validate_multidim_config(config, engine)

# TODO: Improve this. Could also go into etl.helpers.load_mdim_config
# Change catalogPaths into variable IDs
if "views" in config:
views = config["views"]
for view in views:
if "config" in view:
if "sortColumnSlug" in view["config"]:
# Check if catalogPath
# Map to variable ID
view["config"]["sortColumnSlug"] = str(map_indicator_path_to_id(view["config"]["sortColumnSlug"]))
if "dimensions" in view["config"]:
dimensions = view["config"]["dimensions"]
for dim in dimensions:
if "variableId" in dim:
# Check if catalogPath
# Map to variable ID
dim["variableId"] = map_indicator_path_to_id(dim["variableId"])

with Session(engine) as session:
mdd_page = gm.MultiDimDataPage(
slug=slug,
Expand Down
207 changes: 207 additions & 0 deletions etl/steps/data/garden/covid/latest/xm_who.countries.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
{
"Afghanistan": "Afghanistan",
"Albania": "Albania",
"Algeria": "Algeria",
"Andorra": "Andorra",
"Angola": "Angola",
"Antigua and Barbuda": "Antigua and Barbuda",
"Argentina": "Argentina",
"Armenia": "Armenia",
"Australia": "Australia",
"Austria": "Austria",
"Azerbaijan": "Azerbaijan",
"Bahamas": "Bahamas",
"Bahrain": "Bahrain",
"Bangladesh": "Bangladesh",
"Barbados": "Barbados",
"Belarus": "Belarus",
"Belgium": "Belgium",
"Belize": "Belize",
"Benin": "Benin",
"Bhutan": "Bhutan",
"Bolivia (Plurinational State of)": "Bolivia",
"Bosnia and Herzegovina": "Bosnia and Herzegovina",
"Botswana": "Botswana",
"Brazil": "Brazil",
"Brunei Darussalam": "Brunei",
"Bulgaria": "Bulgaria",
"Burkina Faso": "Burkina Faso",
"Burundi": "Burundi",
"Cabo Verde": "Cape Verde",
"Cambodia": "Cambodia",
"Cameroon": "Cameroon",
"Canada": "Canada",
"Central African Republic": "Central African Republic",
"Chad": "Chad",
"Chile": "Chile",
"China": "China",
"Colombia": "Colombia",
"Comoros": "Comoros",
"Congo": "Congo",
"Cook Islands": "Cook Islands",
"Costa Rica": "Costa Rica",
"Croatia": "Croatia",
"Cuba": "Cuba",
"Cyprus": "Cyprus",
"Czechia": "Czechia",
"C\u00c3\u00b4te d'Ivoire": "Cote d'Ivoire",
"Democratic People's Republic of Korea": "North Korea",
"Democratic Republic of the Congo": "Democratic Republic of Congo",
"Denmark": "Denmark",
"Djibouti": "Djibouti",
"Dominica": "Dominica",
"Dominican Republic": "Dominican Republic",
"Ecuador": "Ecuador",
"Egypt": "Egypt",
"El Salvador": "El Salvador",
"Equatorial Guinea": "Equatorial Guinea",
"Eritrea": "Eritrea",
"Estonia": "Estonia",
"Eswatini": "Eswatini",
"Ethiopia": "Ethiopia",
"Fiji": "Fiji",
"Finland": "Finland",
"France": "France",
"Gabon": "Gabon",
"Gambia": "Gambia",
"Georgia": "Georgia",
"Germany": "Germany",
"Ghana": "Ghana",
"Greece": "Greece",
"Grenada": "Grenada",
"Guatemala": "Guatemala",
"Guinea": "Guinea",
"Guinea-Bissau": "Guinea-Bissau",
"Guyana": "Guyana",
"Haiti": "Haiti",
"Honduras": "Honduras",
"Hungary": "Hungary",
"Iceland": "Iceland",
"India": "India",
"Indonesia": "Indonesia",
"Iran (Islamic Republic of)": "Iran",
"Iraq": "Iraq",
"Ireland": "Ireland",
"Israel": "Israel",
"Italy": "Italy",
"Jamaica": "Jamaica",
"Japan": "Japan",
"Jordan": "Jordan",
"Kazakhstan": "Kazakhstan",
"Kenya": "Kenya",
"Kiribati": "Kiribati",
"Kuwait": "Kuwait",
"Kyrgyzstan": "Kyrgyzstan",
"Lao People's Democratic Republic": "Laos",
"Latvia": "Latvia",
"Lebanon": "Lebanon",
"Lesotho": "Lesotho",
"Liberia": "Liberia",
"Libya": "Libya",
"Lithuania": "Lithuania",
"Luxembourg": "Luxembourg",
"Madagascar": "Madagascar",
"Malawi": "Malawi",
"Malaysia": "Malaysia",
"Maldives": "Maldives",
"Mali": "Mali",
"Malta": "Malta",
"Marshall Islands": "Marshall Islands",
"Mauritania": "Mauritania",
"Mauritius": "Mauritius",
"Mexico": "Mexico",
"Micronesia (Federated States of)": "Micronesia (country)",
"Monaco": "Monaco",
"Mongolia": "Mongolia",
"Montenegro": "Montenegro",
"Morocco": "Morocco",
"Mozambique": "Mozambique",
"Myanmar": "Myanmar",
"Namibia": "Namibia",
"Nauru": "Nauru",
"Nepal": "Nepal",
"Netherlands": "Netherlands",
"New Zealand": "New Zealand",
"Nicaragua": "Nicaragua",
"Niger": "Niger",
"Nigeria": "Nigeria",
"Niue": "Niue",
"North Macedonia": "North Macedonia",
"Norway": "Norway",
"Oman": "Oman",
"Pakistan": "Pakistan",
"Palau": "Palau",
"Panama": "Panama",
"Papua New Guinea": "Papua New Guinea",
"Paraguay": "Paraguay",
"Peru": "Peru",
"Philippines": "Philippines",
"Poland": "Poland",
"Portugal": "Portugal",
"Qatar": "Qatar",
"Republic of Korea": "South Korea",
"Republic of Moldova": "Moldova",
"Romania": "Romania",
"Russian Federation": "Russia",
"Rwanda": "Rwanda",
"Saint Kitts and Nevis": "Saint Kitts and Nevis",
"Saint Lucia": "Saint Lucia",
"Saint Vincent and the Grenadines": "Saint Vincent and the Grenadines",
"Samoa": "Samoa",
"San Marino": "San Marino",
"Sao Tome and Principe": "Sao Tome and Principe",
"Saudi Arabia": "Saudi Arabia",
"Senegal": "Senegal",
"Serbia": "Serbia",
"Seychelles": "Seychelles",
"Sierra Leone": "Sierra Leone",
"Singapore": "Singapore",
"Slovakia": "Slovakia",
"Slovenia": "Slovenia",
"Solomon Islands": "Solomon Islands",
"Somalia": "Somalia",
"South Africa": "South Africa",
"South Sudan": "South Sudan",
"Spain": "Spain",
"Sri Lanka": "Sri Lanka",
"Sudan": "Sudan",
"Suriname": "Suriname",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Syrian Arab Republic": "Syria",
"Tajikistan": "Tajikistan",
"Thailand": "Thailand",
"Timor-Leste": "East Timor",
"Togo": "Togo",
"Tonga": "Tonga",
"Trinidad and Tobago": "Trinidad and Tobago",
"Tunisia": "Tunisia",
"Turkmenistan": "Turkmenistan",
"Tuvalu": "Tuvalu",
"Uganda": "Uganda",
"Ukraine": "Ukraine",
"United Arab Emirates": "United Arab Emirates",
"United Republic of Tanzania": "Tanzania",
"United States of America": "United States",
"Uruguay": "Uruguay",
"Uzbekistan": "Uzbekistan",
"Vanuatu": "Vanuatu",
"Venezuela (Bolivarian Republic of)": "Venezuela",
"Viet Nam": "Vietnam",
"Yemen": "Yemen",
"Zambia": "Zambia",
"Zimbabwe": "Zimbabwe",
"The United Kingdom": "United Kingdom",
"T\u00c3\u00bcrkiye": "Turkey",
"Global": "World",
"AFRO": "Africa (WHO)",
"AMRO": "Americas (WHO)",
"EMRO": "Eastern Mediterranean (WHO)",
"EURO": "Europe (WHO)",
"HIC": "High-income countries",
"LIC": "Low-income countries",
"LMIC": "Lower-middle-income countries",
"SEARO": "South-East Asia (WHO)",
"UMIC": "Upper-middle-income countries",
"WPRO": "Western Pacific (WHO)"
}
43 changes: 43 additions & 0 deletions etl/steps/data/garden/covid/latest/xm_who.meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- COVID-19
display:
numDecimalPlaces: 2


# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365
title: Global excess deaths associated with COVID-19 (modelled estimates)

tables:
xm_who:
variables:
expected_mean:
title: Expected deaths from all-causes by location, year and month (mean)
unit: deaths
acm_mean:
title: Estimated deaths from all-causes by location, year and month (mean)
unit: deaths
excess_mean:
title: Excess deaths associated with COVID-19 pandemic from all-causes by location, year and month (mean)
unit: deaths
cumul_excess_mean:
title: Cumulative excess deaths associated with COVID-19 pandemic (mean)
unit: death
description_short: |-
Cumulative difference (mean estimate) between the number of reported or estimated deaths in 2020–2021 and the projected number of deaths for the same period based on previous years.
cumul_excess_low:
title: Cumulative excess deaths associated with COVID-19 pandemic (low 95% CI)
unit: deaths
description_short: |-
Cumulative difference (lower 95% confidence interval) between the number of reported or estimated deaths in 2020–2021 and the projected number of deaths for the same period based on previous years.
cumul_excess_high:
title: Cumulative excess deaths associated with COVID-19 pandemic (upper 95% CI)
unit: deaths
description_short: |-
Cumulative difference (upper 95% confidence interval) between the number of reported or estimated deaths in 2020–2021 and the projected number of deaths for the same period based on previous years.
49 changes: 49 additions & 0 deletions etl/steps/data/garden/covid/latest/xm_who.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Load a meadow dataset and create a garden dataset."""

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("xm_who")

# Read table from meadow dataset.
tb = ds_meadow["xm_who"].reset_index()

#
# Process data.
#
# Harmonize countries.
tb = geo.harmonize_countries(
df=tb,
countries_file=paths.country_mapping_path,
)

# Drop unused columns
tb = tb.drop(
columns=[
"iso3",
"type",
]
)

# Harmonize
tb = tb.format(["country", "date"])

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()
Loading
Loading