-
-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
📊 Adding global health data for mpox (#3229)
* adding global health data * using global health for explorer * update explorer * auto-update script * combine mpox scripts * adding metadata * update snapshot
- Loading branch information
Showing
9 changed files
with
197 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
etl/steps/data/garden/health/latest/global_health_mpox.countries.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"Burundi": "Burundi", | ||
"Cameroon": "Cameroon", | ||
"Central African Republic": "Central African Republic", | ||
"Cote d'Ivoire": "Cote d'Ivoire", | ||
"Democratic Republic of the Congo": "Democratic Republic of Congo", | ||
"Gabon": "Gabon", | ||
"Ghana": "Ghana", | ||
"Kenya": "Kenya", | ||
"Liberia": "Liberia", | ||
"Nigeria": "Nigeria", | ||
"Rwanda": "Rwanda", | ||
"South Africa": "South Africa", | ||
"Sweden": "Sweden", | ||
"Thailand": "Thailand", | ||
"Uganda": "Uganda", | ||
"Burundi ": "Burundi", | ||
"Republic of the Congo": "Congo" | ||
} |
25 changes: 25 additions & 0 deletions
25
etl/steps/data/garden/health/latest/global_health_mpox.meta.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# NOTE: To learn more about the fields, hover over their names. | ||
definitions: | ||
common: | ||
presentation: | ||
topic_tags: | ||
- Global Health | ||
- Mpox (monkeypox) | ||
|
||
|
||
# Learn more about the available fields: | ||
# http://docs.owid.io/projects/etl/architecture/metadata/reference/ | ||
dataset: | ||
update_period_days: 1 | ||
|
||
|
||
tables: | ||
global_health_mpox: | ||
variables: | ||
reported_cases: | ||
title: Reported suspected mpox cases | ||
unit: cases | ||
suspected_cases_cumulative: | ||
title: Cumulative suspected mpox cases | ||
unit: cases | ||
description_processing: Data for 2024 is taken from Global.health, data for 2023 is taken from Africa CDC. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
"""Load a meadow dataset and create a garden dataset.""" | ||
|
||
from owid.catalog import Table | ||
from owid.catalog import processing as pr | ||
|
||
from etl.data_helpers import geo | ||
from etl.helpers import PathFinder, create_dataset | ||
|
||
# Get paths and naming conventions for current step. | ||
paths = PathFinder(__file__) | ||
|
||
|
||
def run(dest_dir: str) -> None: | ||
# | ||
# Load inputs. | ||
# | ||
# Load meadow dataset. | ||
ds_meadow = paths.load_dataset("global_health_mpox") | ||
|
||
# Read table from meadow dataset. | ||
tb = ds_meadow["global_health_mpox"].reset_index() | ||
|
||
# | ||
# Process data. | ||
# | ||
tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) | ||
tb = tb[tb["case_status"] == "suspected"] | ||
# Calculate the frequency of suspected cases per reported date | ||
tb = tb.groupby(["country", "date"], observed=True).count().reset_index().drop(columns=["id"]) | ||
# add suspected cases for 2023 | ||
tb_2023 = Table( | ||
{ | ||
"country": ["Cameroon", "Congo", "Democratic Republic of Congo"], | ||
"date": ["2023-12-24", "2023-12-24", "2023-12-24"], | ||
"case_status": ["113", "74", "12985"], | ||
} | ||
) | ||
tb = pr.concat([tb, tb_2023]).sort_values(["country", "date"]) | ||
# Calculate the cumulative | ||
tb["case_status"] = tb["case_status"].astype("int") | ||
tb["suspected_cases_cumulative"] = tb.groupby(["country"])["case_status"].cumsum() | ||
tb = tb.rename(columns={"case_status": "reported_cases"}) | ||
tb = tb.format(["country", "date"]) | ||
|
||
# | ||
# Save outputs. | ||
# | ||
# Create a new garden dataset with the same metadata as the meadow dataset. | ||
ds_garden = create_dataset( | ||
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata | ||
) | ||
|
||
# Save changes in the new garden dataset. | ||
ds_garden.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
"""Load a snapshot and create a meadow dataset.""" | ||
|
||
from etl.helpers import PathFinder, create_dataset | ||
|
||
# Get paths and naming conventions for current step. | ||
paths = PathFinder(__file__) | ||
|
||
|
||
def run(dest_dir: str) -> None: | ||
# | ||
# Load inputs. | ||
# | ||
# Retrieve snapshot. | ||
snap = paths.load_snapshot("global_health_mpox.csv") | ||
|
||
# Load data from snapshot. | ||
tb = snap.read(low_memory=False) | ||
tb = tb[["ID", "Case_status", "Location_Admin0", "Date_report_source_I"]] | ||
assert all(tb["Date_report_source_I"].notna()) | ||
|
||
tb = tb.rename(columns={"Date_report_source_I": "date", "Location_Admin0": "country"}) | ||
# | ||
# Process data. | ||
# | ||
# Ensure all columns are snake-case, set an appropriate index, and sort conveniently. | ||
# Row per individual - will aggregate in garden step so will keep ID as index for now | ||
tb = tb.format(["id", "country", "date"]) | ||
|
||
# | ||
# Save outputs. | ||
# | ||
# Create a new meadow dataset with the same metadata as the snapshot. | ||
ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) | ||
|
||
# Save changes in the new meadow dataset. | ||
ds_meadow.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Learn more at: | ||
# http://docs.owid.io/projects/etl/architecture/metadata/reference/ | ||
meta: | ||
origin: | ||
# Data product / Snapshot | ||
title: Mpox - 2024 | ||
date_published: "2024-09-02" | ||
|
||
# Citation | ||
producer: Global.health | ||
citation_full: |- | ||
Global.health Mpox (accessed on 2024-09-02) | ||
|
||
# Files | ||
url_main: https://global.health/ | ||
url_download: https://mpox-2024.s3.eu-central-1.amazonaws.com/latest.csv | ||
date_accessed: 2024-09-02 | ||
|
||
# License | ||
license: | ||
name: CC BY 4.0 | ||
url: https://global.health/terms-of-use/ | ||
|
||
outs: | ||
- md5: cf3c0ac7af89613fc2aa7e6dcdf954d0 | ||
size: 7902134 | ||
path: global_health_mpox.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
"""Script to create a snapshot of dataset.""" | ||
|
||
from pathlib import Path | ||
|
||
import click | ||
|
||
from etl.snapshot import Snapshot | ||
|
||
# Version for current snapshot dataset. | ||
SNAPSHOT_VERSION = Path(__file__).parent.name | ||
|
||
|
||
@click.command() | ||
@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") | ||
def main(upload: bool) -> None: | ||
# Create a new snapshot. | ||
snap = Snapshot(f"health/{SNAPSHOT_VERSION}/global_health_mpox.csv") | ||
|
||
# Download data from source, add file to DVC and upload to S3. | ||
snap.create_snapshot(upload=upload) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |