Skip to content

Commit

Permalink
📊 famines: minor edits to metadata and by factor plots (#3471)
Browse files Browse the repository at this point in the history
  • Loading branch information
veronikasamborska1994 authored Oct 30, 2024
1 parent 3395f80 commit 3e7d8e4
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 22 deletions.
28 changes: 23 additions & 5 deletions etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ definitions:
common:
display:
numDecimalPlaces: 0
description_short: Famines that are estimated to have killed 100,000 people or more.
description_processing: The deaths were assumed to have been distributed evenly over the duration of the famine.
presentation:
topic_tags:
- Famines
Expand All @@ -18,14 +18,32 @@ dataset:
tables:
famines_by_factor:
variables:
sum_conflict_mortality:
title: Deaths from famines where conflict was not a factor
sum_conflict_mortality_factor:
title: Deaths from famines where conflict was a factor
description_short: Famines estimated to have caused over 100,000 deaths, where factors like "war," "blockade," "counterinsurgency," "occupation," "siege," "forced starvation," and "genocide" contributed to disrupting food supply and access.
unit: 'deaths'

sum_government_policy_overall_mortality:
sum_government_policy_overall_mortality_factor:
title: Deaths from famines where government policy was a factor
description_short: Famines estimated to have killed 100,000 or more, where government actions like "economic policy," "colonial policy," "taxation," "state capacity," and "forced labor" contributed to limiting food availability and access.
unit: 'deaths'

sum_external_factors_mortality_factor:
title: Deaths from famines where external factors was a factor
description_short: Famines estimated to have killed 100,000 or more, where factors like "climate," "environment," and "disease" contributed to reduced food production and access.
unit: 'deaths'

sum_conflict_mortality_not_a_factor:
title: Deaths from famines where conflict was not a factor
description_short: Famines estimated to have caused over 100,000 deaths, where factors like "war," "blockade," "counterinsurgency," "occupation," "siege," "forced starvation," and "genocide" did not contribute to disrupting food supply and access.
unit: 'deaths'

sum_government_policy_overall_mortality_not_a_factor:
title: Deaths from famines where government policy was not a factor
description_short: Famines estimated to have killed 100,000 or more, where government actions like "economic policy," "colonial policy," "taxation," "state capacity," and "forced labor" did not contribute to limiting food availability and access.
unit: 'deaths'

sum_external_factors_mortality:
sum_external_factors_mortality_not_a_factor:
title: Deaths from famines where external factors was not a factor
description_short: Famines estimated to have killed 100,000 or more, where factors like "climate," "environment," and "disease" did not contribute to reduced food production and access.
unit: 'deaths'
41 changes: 31 additions & 10 deletions etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Load a meadow dataset and create a garden dataset."""

import owid.catalog.processing as pr
import pandas as pd
from owid.catalog import Table

Expand Down Expand Up @@ -49,9 +50,16 @@ def run(dest_dir: str) -> None:
tb["year"] = tb["year"].astype(int)
tb["region"] = tb["region"].astype("category")

# Create new columns for the sum of mortality estimates for where each cause was not (0) a factor.
# Create new columns for the sum of mortality estimates where each cause was a factor.
for factor in ["conflict", "government_policy_overall", "external_factors"]:
new_column_name = f"sum_{factor}_mortality"
new_column_name = f"sum_{factor}_mortality_factor"
tb[new_column_name] = tb.apply(
lambda row: row["wpf_authoritative_mortality_estimate"] if row[factor] == 1 else 0, axis=1
)

# Create new columns for the sum of mortality estimates where each cause was not a factor.
for factor in ["conflict", "government_policy_overall", "external_factors"]:
new_column_name = f"sum_{factor}_mortality_not_a_factor"
tb[new_column_name] = tb.apply(
lambda row: row["wpf_authoritative_mortality_estimate"] if row[factor] == 0 else 0, axis=1
)
Expand All @@ -60,24 +68,37 @@ def run(dest_dir: str) -> None:
grouped_tb = tb.groupby(["year", "region"]).sum().reset_index()

# Keep only the relevant columns
relevant_columns = ["year", "region"] + [
f"sum_{factor}_mortality" for factor in ["conflict", "government_policy_overall", "external_factors"]
]
relevant_columns = (
["year", "region"]
+ [f"sum_{factor}_mortality_factor" for factor in ["conflict", "government_policy_overall", "external_factors"]]
+ [
f"sum_{factor}_mortality_not_a_factor"
for factor in ["conflict", "government_policy_overall", "external_factors"]
]
)
grouped_tb = grouped_tb[relevant_columns]

# Rename and format columns
grouped_tb = Table(grouped_tb, short_name=paths.short_name)
grouped_tb = grouped_tb.rename({"region": "country"}, axis=1)
grouped_tb = grouped_tb.format(["year", "country"])
# Creating a 'World' row by summing mortality estimates across all regions for each group
world_agg = tb.groupby(["year"])[relevant_columns[2:]].sum().reset_index()
world_agg["region"] = "World"

# Concatenating the world row data with the regional data
tb = pr.concat([grouped_tb, world_agg], ignore_index=True)

tb = tb.rename({"region": "country"}, axis=1)
tb = tb.format(["year", "country"])

for col in ["sum_conflict_mortality", "sum_government_policy_overall_mortality", "sum_external_factors_mortality"]:
grouped_tb[col].metadata.origins = origins
for col in relevant_columns[2:]:
tb[col].metadata.origins = origins

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[grouped_tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,40 +20,41 @@ tables:
famine_deaths:
title: Deaths from famines
unit: 'deaths'
short_unit: ''
description_short: Deaths in famines that are estimated to have killed 100,000 people or more.
display:
numDecimalPlaces: 0

decadal_famine_deaths:
title: Deaths from famines by decade
unit: 'deaths'
short_unit: ''
description_processing: The deaths were assumed to have been distributed evenly over the duration of the famine.
description_short: Deaths in famines that are estimated to have killed 100,000 people or more.
display:
numDecimalPlaces: 0

famine_deaths_per_rate:
title: Death rates from famines
unit: 'deaths per 100,000 people'
description_short: Deaths in famines that are estimated to have killed 100,000 people or more, per 100,000 people.
display:
numDecimalPlaces: 0

decadal_famine_deaths_rate:
title: Death rates from famines by decade
unit: 'deaths per 100,000 people'
description_short: Deaths in famines that are estimated to have killed 100,000 people or more, per 100,000 people.
display:
numDecimalPlaces: 1

famine_count:
title: Number of famines
unit: ''
short_unit: ''
unit: 'famines'
description_short: Famines that are estimated to have killed 100,000 people or more.
display:
numDecimalPlaces: 0

decadal_famine_count:
title: Number of famines by decade
unit: ''
short_unit: ''
unit: 'famines'
description_short: Famines that are estimated to have killed 100,000 people or more.
display:
numDecimalPlaces: 0

0 comments on commit 3e7d8e4

Please sign in to comment.