Skip to content

Commit

Permalink
📊 Improving GBD metadata (#1380)
Browse files Browse the repository at this point in the history
* fixing the child mortality percent data

* tidying metadata

* improving metadata

* prepping for improving metadata

* refactor(gbd): trying to improve metadata

* remove accidental index

* trying to sort metadata

* update

* fixing mental health issue

* fix missing MH step

* fix issue with prevalence_dalys_world

* removing metadata table

* trying to fix metadata

* fix metadata

* adding in lucas' comments

* sorting out variable names

* incorporating risk factors into metadata

* typing fix

* small adjustment

* experiment with removing empty variables

* test

* removing excess dataset.save()

* fixing error in add_metadata function

* remove print

* fixing prevalence_dalys_world

* format
  • Loading branch information
spoonerf authored Jul 28, 2023
1 parent 897f058 commit 616b016
Show file tree
Hide file tree
Showing 32 changed files with 372 additions and 339 deletions.
44 changes: 30 additions & 14 deletions dag/health.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,46 +47,62 @@ steps:
data://grapher/who/2023-03-09/gho_suicides:
- data://garden/who/2023-03-09/gho_suicides

# IHME Global Burden of Disease
# IHME Global Burden of Disease - Child mortality long run
data://meadow/ihme_gbd/2020-12-19/child_mortality:
- walden://ihme_gbd/2020-12-19/child_mortality
data://garden/ihme_gbd/2020-12-19/child_mortality:
- data://meadow/ihme_gbd/2020-12-19/child_mortality
data://grapher/ihme_gbd/2020-12-19/child_mortality:
- data://garden/ihme_gbd/2020-12-19/child_mortality

# IHME Global Burden of Disease - Deaths and DALYs
data://meadow/ihme_gbd/2019/gbd_cause:
- walden://ihme_gbd/2019/gbd_cause
data://garden/ihme_gbd/2019/gbd_cause:
- data://meadow/ihme_gbd/2019/gbd_cause
data://grapher/ihme_gbd/2019/gbd_cause:
- data://garden/ihme_gbd/2019/gbd_cause

# IHME Global Burden of Disease - Child mortality
data://meadow/ihme_gbd/2019/gbd_child_mortality:
- walden://ihme_gbd/2019/gbd_child_mortality
data://garden/ihme_gbd/2019/gbd_child_mortality:
- data://meadow/ihme_gbd/2019/gbd_child_mortality
data://grapher/ihme_gbd/2019/gbd_child_mortality:
- data://garden/ihme_gbd/2019/gbd_child_mortality

# IHME Global Burden of Disease - Mental health
data://meadow/ihme_gbd/2019/gbd_mental_health:
- walden://ihme_gbd/2019/gbd_mental_health
data://garden/ihme_gbd/2019/gbd_mental_health:
- data://meadow/ihme_gbd/2019/gbd_mental_health
data://grapher/ihme_gbd/2019/gbd_mental_health:
- data://garden/ihme_gbd/2019/gbd_mental_health

# IHME Global Burden of Disease - Risk factors
data://meadow/ihme_gbd/2019/gbd_risk:
- walden://ihme_gbd/2019/gbd_risk
data://meadow/ihme_gbd/2019/gbd_prevalence:
- walden://ihme_gbd/2019/gbd_prevalence
data://garden/ihme_gbd/2019/gbd_cause:
- data://meadow/ihme_gbd/2019/gbd_cause
data://garden/ihme_gbd/2019/gbd_risk:
- data://meadow/ihme_gbd/2019/gbd_risk
data://garden/ihme_gbd/2019/gbd_child_mortality:
- data://meadow/ihme_gbd/2019/gbd_child_mortality
data://garden/ihme_gbd/2019/gbd_mental_health:
- data://meadow/ihme_gbd/2019/gbd_mental_health
data://grapher/ihme_gbd/2019/gbd_risk:
- data://garden/ihme_gbd/2019/gbd_risk

# IHME Global Burden of Disease - Prevalence and incidence
data://meadow/ihme_gbd/2019/gbd_prevalence:
- walden://ihme_gbd/2019/gbd_prevalence
data://garden/ihme_gbd/2019/gbd_prevalence:
- data://meadow/ihme_gbd/2019/gbd_prevalence
data://grapher/ihme_gbd/2019/gbd_cause:
- data://garden/ihme_gbd/2019/gbd_cause
data://grapher/ihme_gbd/2019/gbd_prevalence:
- data://garden/ihme_gbd/2019/gbd_prevalence
data://grapher/ihme_gbd/2019/gbd_child_mortality:
- data://garden/ihme_gbd/2019/gbd_child_mortality

# IHME GBD Mental Health Prevalence
data://meadow/ihme_gbd/2023-05-15/gbd_mental_health_prevalence_rate:
- snapshot://ihme_gbd/2023-05-15/gbd_mental_health_prevalence_rate.zip
data://garden/ihme_gbd/2023-05-15/gbd_mental_health_prevalence_rate:
- data://meadow/ihme_gbd/2023-05-15/gbd_mental_health_prevalence_rate
data://grapher/ihme_gbd/2023-05-15/gbd_mental_health_prevalence_rate:
- data://garden/ihme_gbd/2023-05-15/gbd_mental_health_prevalence_rate
# DALYs vs Prevalence
# IHME GBD Mental Health DALYs vs Prevalence
data://garden/ihme_gbd/2023-06-14/prevalence_dalys_world:
- data://garden/ihme_gbd/2023-05-15/gbd_mental_health_prevalence_rate
- data://garden/ihme_gbd/2019/gbd_cause
Expand Down
35 changes: 0 additions & 35 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_cause.meta.yml

This file was deleted.

13 changes: 6 additions & 7 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_cause.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,22 @@
from etl.helpers import PathFinder

# naming conventions
N = PathFinder(__file__)
paths = PathFinder(__file__)
log = get_logger()


def run(dest_dir: str) -> None:

# Name the dimensions we are keeping and pivoting by - this varies for gbd_risk
dims = ["sex", "age", "cause"]

# Get dataset level variables

dataset = N.short_name
dataset = paths.short_name
log.info(f"{dataset}.start")
country_mapping_path = N.directory / "gbd.countries.json"
excluded_countries_path = N.directory / "gbd.excluded_countries.json"
metadata_path = N.directory / f"{dataset}.meta.yml"
country_mapping_path = paths.directory / "gbd.countries.json"
excluded_countries_path = paths.directory / "gbd.excluded_countries.json"
# metadata_path = N.directory / f"{dataset}.meta.yml"

# Run the function to produce garden dataset
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, metadata_path, dims)
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, dims)
log.info(f"{dataset}.end")
35 changes: 0 additions & 35 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_child_mortality.meta.yml

This file was deleted.

12 changes: 5 additions & 7 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_child_mortality.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,21 @@
from etl.helpers import PathFinder

# naming conventions
N = PathFinder(__file__)
paths = PathFinder(__file__)
log = get_logger()


def run(dest_dir: str) -> None:

# Name the dimensions we are keeping and pivoting by - this varies for gbd_risk
dims = ["sex", "age", "cause"]

# Get dataset level variables

dataset = N.short_name
dataset = paths.short_name
log.info(f"{dataset}.start")
country_mapping_path = N.directory / "gbd.countries.json"
excluded_countries_path = N.directory / "gbd.excluded_countries.json"
metadata_path = N.directory / f"{dataset}.meta.yml"
country_mapping_path = paths.directory / "gbd.countries.json"
excluded_countries_path = paths.directory / "gbd.excluded_countries.json"

# Run the function to produce garden dataset
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, metadata_path, dims)
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, dims)
log.info(f"{dataset}.end")
19 changes: 0 additions & 19 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_mental_health.meta.yml

This file was deleted.

11 changes: 5 additions & 6 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_mental_health.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,17 @@
from etl.helpers import PathFinder

# naming conventions
N = PathFinder(__file__)
paths = PathFinder(__file__)
log = get_logger()


def run(dest_dir: str) -> None:
dims = ["sex", "age", "cause"]
# Get dataset level variables
dataset = N.short_name
dataset = paths.short_name
log.info(f"{dataset}.start")
country_mapping_path = N.directory / "gbd.countries.json"
excluded_countries_path = N.directory / "gbd.excluded_countries.json"
metadata_path = N.directory / f"{dataset}.meta.yml"
country_mapping_path = paths.directory / "gbd.countries.json"
excluded_countries_path = paths.directory / "gbd.excluded_countries.json"
# Run the function to produce garden dataset
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, metadata_path, dims)
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, dims)
log.info(f"{dataset}.end")
35 changes: 0 additions & 35 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_prevalence.meta.yml

This file was deleted.

12 changes: 5 additions & 7 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_prevalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,21 @@
from etl.helpers import PathFinder

# naming conventions
N = PathFinder(__file__)
paths = PathFinder(__file__)
log = get_logger()


def run(dest_dir: str) -> None:

# Name the dimensions we are keeping and pivoting by - this varies for gbd_risk
dims = ["sex", "age", "cause"]

# Get dataset level variables

dataset = N.short_name
dataset = paths.short_name
log.info(f"{dataset}.start")
country_mapping_path = N.directory / "gbd.countries.json"
excluded_countries_path = N.directory / "gbd.excluded_countries.json"
metadata_path = N.directory / f"{dataset}.meta.yml"
country_mapping_path = paths.directory / "gbd.countries.json"
excluded_countries_path = paths.directory / "gbd.excluded_countries.json"

# Run the function to produce garden dataset
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, metadata_path, dims)
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, dims)
log.info(f"{dataset}.end")
36 changes: 0 additions & 36 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_risk.meta.yml

This file was deleted.

12 changes: 5 additions & 7 deletions etl/steps/data/garden/ihme_gbd/2019/gbd_risk.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,17 @@
from etl.helpers import PathFinder

# naming conventions
N = PathFinder(__file__)
paths = PathFinder(__file__)
log = get_logger()


def run(dest_dir: str) -> None:

dims = ["sex", "age", "cause", "rei"]
# Get dataset level variables
dataset = N.short_name
dataset = paths.short_name
log.info(f"{dataset}.start")
country_mapping_path = N.directory / "gbd.countries.json"
excluded_countries_path = N.directory / "gbd.excluded_countries.json"
metadata_path = N.directory / f"{dataset}.meta.yml"
country_mapping_path = paths.directory / "gbd.countries.json"
excluded_countries_path = paths.directory / "gbd.excluded_countries.json"
# Run the function to produce garden dataset
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, metadata_path, dims)
run_wrapper(dataset, country_mapping_path, excluded_countries_path, dest_dir, dims)
log.info(f"{dataset}.end")
Loading

0 comments on commit 616b016

Please sign in to comment.