diff --git a/etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.meta.yml b/etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.meta.yml index 810c10275e0..2e6930a2f16 100644 --- a/etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.meta.yml +++ b/etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.meta.yml @@ -3,7 +3,7 @@ definitions: common: display: numDecimalPlaces: 0 - description_short: Famines that are estimated to have killed 100,000 people or more. + description_processing: The deaths were assumed to have been distributed evenly over the duration of the famine. presentation: topic_tags: - Famines @@ -18,14 +18,32 @@ dataset: tables: famines_by_factor: variables: - sum_conflict_mortality: - title: Deaths from famines where conflict was not a factor + sum_conflict_mortality_factor: + title: Deaths from famines where conflict was a factor + description_short: Famines estimated to have caused over 100,000 deaths, where factors like "war," "blockade," "counterinsurgency," "occupation," "siege," "forced starvation," and "genocide" contributed to disrupting food supply and access. unit: 'deaths' - sum_government_policy_overall_mortality: + sum_government_policy_overall_mortality_factor: + title: Deaths from famines where government policy was a factor + description_short: Famines estimated to have killed 100,000 or more, where government actions like "economic policy," "colonial policy," "taxation," "state capacity," and "forced labor" contributed to limiting food availability and access. + unit: 'deaths' + + sum_external_factors_mortality_factor: + title: Deaths from famines where external factors was a factor + description_short: Famines estimated to have killed 100,000 or more, where factors like "climate," "environment," and "disease" contributed to reduced food production and access. + unit: 'deaths' + + sum_conflict_mortality_not_a_factor: + title: Deaths from famines where conflict was not a factor + description_short: Famines estimated to have caused over 100,000 deaths, where factors like "war," "blockade," "counterinsurgency," "occupation," "siege," "forced starvation," and "genocide" did not contribute to disrupting food supply and access. + unit: 'deaths' + + sum_government_policy_overall_mortality_not_a_factor: title: Deaths from famines where government policy was not a factor + description_short: Famines estimated to have killed 100,000 or more, where government actions like "economic policy," "colonial policy," "taxation," "state capacity," and "forced labor" did not contribute to limiting food availability and access. unit: 'deaths' - sum_external_factors_mortality: + sum_external_factors_mortality_not_a_factor: title: Deaths from famines where external factors was not a factor + description_short: Famines estimated to have killed 100,000 or more, where factors like "climate," "environment," and "disease" did not contribute to reduced food production and access. unit: 'deaths' \ No newline at end of file diff --git a/etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.py b/etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.py index 8d6c3e225c4..096713af190 100644 --- a/etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.py +++ b/etl/steps/data/garden/wpf/2024-10-03/famines_by_factor.py @@ -1,5 +1,6 @@ """Load a meadow dataset and create a garden dataset.""" +import owid.catalog.processing as pr import pandas as pd from owid.catalog import Table @@ -49,9 +50,16 @@ def run(dest_dir: str) -> None: tb["year"] = tb["year"].astype(int) tb["region"] = tb["region"].astype("category") - # Create new columns for the sum of mortality estimates for where each cause was not (0) a factor. + # Create new columns for the sum of mortality estimates where each cause was a factor. for factor in ["conflict", "government_policy_overall", "external_factors"]: - new_column_name = f"sum_{factor}_mortality" + new_column_name = f"sum_{factor}_mortality_factor" + tb[new_column_name] = tb.apply( + lambda row: row["wpf_authoritative_mortality_estimate"] if row[factor] == 1 else 0, axis=1 + ) + + # Create new columns for the sum of mortality estimates where each cause was not a factor. + for factor in ["conflict", "government_policy_overall", "external_factors"]: + new_column_name = f"sum_{factor}_mortality_not_a_factor" tb[new_column_name] = tb.apply( lambda row: row["wpf_authoritative_mortality_estimate"] if row[factor] == 0 else 0, axis=1 ) @@ -60,24 +68,37 @@ def run(dest_dir: str) -> None: grouped_tb = tb.groupby(["year", "region"]).sum().reset_index() # Keep only the relevant columns - relevant_columns = ["year", "region"] + [ - f"sum_{factor}_mortality" for factor in ["conflict", "government_policy_overall", "external_factors"] - ] + relevant_columns = ( + ["year", "region"] + + [f"sum_{factor}_mortality_factor" for factor in ["conflict", "government_policy_overall", "external_factors"]] + + [ + f"sum_{factor}_mortality_not_a_factor" + for factor in ["conflict", "government_policy_overall", "external_factors"] + ] + ) grouped_tb = grouped_tb[relevant_columns] + # Rename and format columns grouped_tb = Table(grouped_tb, short_name=paths.short_name) - grouped_tb = grouped_tb.rename({"region": "country"}, axis=1) - grouped_tb = grouped_tb.format(["year", "country"]) + # Creating a 'World' row by summing mortality estimates across all regions for each group + world_agg = tb.groupby(["year"])[relevant_columns[2:]].sum().reset_index() + world_agg["region"] = "World" + + # Concatenating the world row data with the regional data + tb = pr.concat([grouped_tb, world_agg], ignore_index=True) + + tb = tb.rename({"region": "country"}, axis=1) + tb = tb.format(["year", "country"]) - for col in ["sum_conflict_mortality", "sum_government_policy_overall_mortality", "sum_external_factors_mortality"]: - grouped_tb[col].metadata.origins = origins + for col in relevant_columns[2:]: + tb[col].metadata.origins = origins # # Save outputs. # # Create a new garden dataset with the same metadata as the meadow dataset. ds_garden = create_dataset( - dest_dir, tables=[grouped_tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata ) # Save changes in the new garden dataset. diff --git a/etl/steps/data/garden/wpf/2024-10-03/total_famines_by_year_decade.meta.yml b/etl/steps/data/garden/wpf/2024-10-03/total_famines_by_year_decade.meta.yml index 30e3de73c6f..52c22674679 100644 --- a/etl/steps/data/garden/wpf/2024-10-03/total_famines_by_year_decade.meta.yml +++ b/etl/steps/data/garden/wpf/2024-10-03/total_famines_by_year_decade.meta.yml @@ -20,40 +20,41 @@ tables: famine_deaths: title: Deaths from famines unit: 'deaths' - short_unit: '' + description_short: Deaths in famines that are estimated to have killed 100,000 people or more. display: numDecimalPlaces: 0 decadal_famine_deaths: title: Deaths from famines by decade unit: 'deaths' - short_unit: '' - description_processing: The deaths were assumed to have been distributed evenly over the duration of the famine. + description_short: Deaths in famines that are estimated to have killed 100,000 people or more. display: numDecimalPlaces: 0 famine_deaths_per_rate: title: Death rates from famines unit: 'deaths per 100,000 people' + description_short: Deaths in famines that are estimated to have killed 100,000 people or more, per 100,000 people. display: numDecimalPlaces: 0 decadal_famine_deaths_rate: title: Death rates from famines by decade unit: 'deaths per 100,000 people' + description_short: Deaths in famines that are estimated to have killed 100,000 people or more, per 100,000 people. display: numDecimalPlaces: 1 famine_count: title: Number of famines - unit: '' - short_unit: '' + unit: 'famines' + description_short: Famines that are estimated to have killed 100,000 people or more. display: numDecimalPlaces: 0 decadal_famine_count: title: Number of famines by decade - unit: '' - short_unit: '' + unit: 'famines' + description_short: Famines that are estimated to have killed 100,000 people or more. display: numDecimalPlaces: 0