From a54453f704aa1edc448deefea1259606d4cf65c5 Mon Sep 17 00:00:00 2001 From: Pablo Rosado Date: Tue, 25 Jul 2023 15:19:19 +0200 Subject: [PATCH] feat(wb): Add snapshot, meadow, garden and grapher steps for food affordability dataset --- dag/agriculture.yml | 9 + .../food_prices_for_nutrition.countries.json | 188 +++++++++++++++ .../food_prices_for_nutrition.meta.yml | 218 ++++++++++++++++++ .../2023-07-24/food_prices_for_nutrition.py | 70 ++++++ .../2023-07-24/food_prices_for_nutrition.py | 33 +++ .../2023-07-24/food_prices_for_nutrition.py | 70 ++++++ .../food_prices_for_nutrition.csv.dvc | 7 +- .../2023-07-24/food_prices_for_nutrition.py | 63 +++++ 8 files changed, 656 insertions(+), 2 deletions(-) create mode 100644 etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.countries.json create mode 100644 etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.meta.yml create mode 100644 etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.py create mode 100644 etl/steps/data/grapher/wb/2023-07-24/food_prices_for_nutrition.py create mode 100644 etl/steps/data/meadow/wb/2023-07-24/food_prices_for_nutrition.py create mode 100644 snapshots/wb/2023-07-24/food_prices_for_nutrition.py diff --git a/dag/agriculture.yml b/dag/agriculture.yml index d4b606149eb..7d1673122ce 100644 --- a/dag/agriculture.yml +++ b/dag/agriculture.yml @@ -63,3 +63,12 @@ steps: # data://explorers/agriculture/2023-06-12/crop_yields: - data://grapher/agriculture/2023-06-12/attainable_yields + # + # Food prices for nutrition - World Bank based on FAO. + # + data://meadow/wb/2023-07-24/food_prices_for_nutrition: + - snapshot://wb/2023-07-24/food_prices_for_nutrition.csv + data://garden/wb/2023-07-24/food_prices_for_nutrition: + - data://meadow/wb/2023-07-24/food_prices_for_nutrition + data://grapher/wb/2023-07-24/food_prices_for_nutrition: + - data://garden/wb/2023-07-24/food_prices_for_nutrition diff --git a/etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.countries.json b/etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.countries.json new file mode 100644 index 00000000000..b61727ddc72 --- /dev/null +++ b/etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.countries.json @@ -0,0 +1,188 @@ +{ + "ABW": "Aruba", + "AGO": "Angola", + "AIA": "Anguilla", + "ALB": "Albania", + "ARE": "United Arab Emirates", + "ARG": "Argentina", + "ARM": "Armenia", + "ATG": "Antigua and Barbuda", + "AUS": "Australia", + "AUT": "Austria", + "AZE": "Azerbaijan", + "BDI": "Burundi", + "BEL": "Belgium", + "BEN": "Benin", + "BFA": "Burkina Faso", + "BGD": "Bangladesh", + "BGR": "Bulgaria", + "BHR": "Bahrain", + "BHS": "Bahamas", + "BIH": "Bosnia and Herzegovina", + "BLR": "Belarus", + "BLZ": "Belize", + "BMU": "Bermuda", + "BOL": "Bolivia", + "BRA": "Brazil", + "BRB": "Barbados", + "BRN": "Brunei", + "BTN": "Bhutan", + "BWA": "Botswana", + "CAF": "Central African Republic", + "CAN": "Canada", + "CHE": "Switzerland", + "CHL": "Chile", + "CHN": "China", + "CIV": "Cote d'Ivoire", + "CMR": "Cameroon", + "COD": "Democratic Republic of Congo", + "COG": "Congo", + "COL": "Colombia", + "COM": "Comoros", + "CPV": "Cape Verde", + "CRI": "Costa Rica", + "CUW": "Curacao", + "CYM": "Cayman Islands", + "CYP": "Cyprus", + "CZE": "Czechia", + "DEU": "Germany", + "DJI": "Djibouti", + "DMA": "Dominica", + "DNK": "Denmark", + "DOM": "Dominican Republic", + "DZA": "Algeria", + "ECU": "Ecuador", + "EGY": "Egypt", + "ESP": "Spain", + "EST": "Estonia", + "ETH": "Ethiopia", + "FIN": "Finland", + "FJI": "Fiji", + "FRA": "France", + "GAB": "Gabon", + "GBR": "United Kingdom", + "GHA": "Ghana", + "GIN": "Guinea", + "GMB": "Gambia", + "GNB": "Guinea-Bissau", + "GNQ": "Equatorial Guinea", + "GRC": "Greece", + "GRD": "Grenada", + "GUY": "Guyana", + "HKG": "Hong Kong", + "HND": "Honduras", + "HRV": "Croatia", + "HTI": "Haiti", + "HUN": "Hungary", + "IDN": "Indonesia", + "IND": "India", + "IRL": "Ireland", + "IRN": "Iran", + "IRQ": "Iraq", + "ISL": "Iceland", + "ISR": "Israel", + "ITA": "Italy", + "JAM": "Jamaica", + "JOR": "Jordan", + "JPN": "Japan", + "KAZ": "Kazakhstan", + "KEN": "Kenya", + "KGZ": "Kyrgyzstan", + "KHM": "Cambodia", + "KNA": "Saint Kitts and Nevis", + "KOR": "South Korea", + "KWT": "Kuwait", + "LAO": "Laos", + "LBR": "Liberia", + "LCA": "Saint Lucia", + "LKA": "Sri Lanka", + "LSO": "Lesotho", + "LTU": "Lithuania", + "LUX": "Luxembourg", + "LVA": "Latvia", + "MAR": "Morocco", + "MDA": "Moldova", + "MDG": "Madagascar", + "MDV": "Maldives", + "MEX": "Mexico", + "MKD": "North Macedonia", + "MLI": "Mali", + "MLT": "Malta", + "MMR": "Myanmar", + "MNE": "Montenegro", + "MNG": "Mongolia", + "MOZ": "Mozambique", + "MRT": "Mauritania", + "MSR": "Montserrat", + "MUS": "Mauritius", + "MWI": "Malawi", + "MYS": "Malaysia", + "NAM": "Namibia", + "NER": "Niger", + "NGA": "Nigeria", + "NIC": "Nicaragua", + "NLD": "Netherlands", + "NOR": "Norway", + "NPL": "Nepal", + "NZL": "New Zealand", + "OMN": "Oman", + "PAK": "Pakistan", + "PAN": "Panama", + "PER": "Peru", + "PHL": "Philippines", + "POL": "Poland", + "PRT": "Portugal", + "PRY": "Paraguay", + "PSE": "Palestine", + "QAT": "Qatar", + "ROU": "Romania", + "RUS": "Russia", + "RWA": "Rwanda", + "SAU": "Saudi Arabia", + "SDN": "Sudan", + "SEN": "Senegal", + "SGP": "Singapore", + "SLE": "Sierra Leone", + "SLV": "El Salvador", + "SRB": "Serbia", + "STP": "Sao Tome and Principe", + "SUR": "Suriname", + "SVK": "Slovakia", + "SVN": "Slovenia", + "SWE": "Sweden", + "SWZ": "Eswatini", + "SXM": "Sint Maarten (Dutch part)", + "SYC": "Seychelles", + "TCA": "Turks and Caicos Islands", + "TCD": "Chad", + "TGO": "Togo", + "THA": "Thailand", + "TJK": "Tajikistan", + "TTO": "Trinidad and Tobago", + "TUN": "Tunisia", + "TUR": "Turkey", + "TWN": "Taiwan", + "TZA": "Tanzania", + "UGA": "Uganda", + "URY": "Uruguay", + "USA": "United States", + "VCT": "Saint Vincent and the Grenadines", + "VGB": "British Virgin Islands", + "VNM": "Vietnam", + "ZAF": "South Africa", + "ZMB": "Zambia", + "ZWE": "Zimbabwe", + "BON": "Bonaire (WB)", + "EAS": "East Asia & Pacific (WB)", + "ECS": "Europe & Central Asia (WB)", + "HIC": "High-income countries", + "LCN": "Latin America & Caribbean (WB)", + "LIC": "Low-income countries (WB)", + "LMC": "Lower-middle-income countries", + "MEA": "Middle East & North Africa (WB)", + "NAC": "North America (WB)", + "SAS": "South Asia (WB)", + "SSF": "Sub-Saharan Africa (WB)", + "UMC": "Upper-middle-income countries", + "WLD": "World" +} \ No newline at end of file diff --git a/etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.meta.yml b/etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.meta.yml new file mode 100644 index 00000000000..5e7c894701b --- /dev/null +++ b/etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.meta.yml @@ -0,0 +1,218 @@ +dataset: + title: "Food prices for nutrition (World Bank, 2023)" + description: | + This dataset is based on the methodology developed in Herforth et al. (2022), which is a background paper for the UN agencies' annual report on The State of Food Security and Nutrition in the World 2022 (https://www.fao.org/publications/sofi). It is based on data on prices for locally available food items from the World Bank's International Comparison Program (ICP) (https://icp.worldbank.org/) matched to other data on food composition and dietary requirements. + + The nutritional requirements used in this study are in line with the WHO's recommendations for the median woman of reproductive age. The authors note two key two reasons for this: + (1) Requirements fall roughly at the median of the entire population distribution, in the sense that least-cost diets to meet energy and nutrient requirements for people in this reference group approximate the median level of least costs for all sex-age groups over the entire life cycle. This reference group is therefore a good representation of the population as a whole. + (2) Women of reproductive age are typically a nutritionally vulnerable population group, as seen in their increased risk of dietary inadequacies (due to social practices and norms that often disadvantage them in terms of access to food), which have important consequences for themselves and their children. Previous studies have also based their analyses on this reference group. + +tables: + food_prices_for_nutrition: + variables: + cost_of_an_energy_sufficient_diet: + title: Cost of an energy sufficient diet + unit: 2017 PPP$/person/day + short_unit: $ + description: | + Cost of the least expensive starchy staple for energy balance for a representative person at 2330 kcal/day. Data available for 2017. + affordability_of_an_energy_sufficient_diet__ratio_of_cost_to_food_expenditures: + title: "Affordability of an energy sufficient diet: ratio of cost to food expenditures" + unit: "" + short_unit: "" + description: | + The ratio of the cost of an energy sufficient diet to total food expenditure per capita per day from national accounts. + percent_of_the_population_who_cannot_afford_sufficient_calories: + title: Percent of the population who cannot afford sufficient calories + unit: "%" + short_unit: "%" + description: | + The indicator expresses the percentage of the total population unable to afford an energy sufficient diet. A healthy diet is considered unaffordable in a country when its cost exceeds 52 percent of income per capita per day. This percentage accounts for a portion of income that can be credibly reserved for food, based on observations that the population in low-income countries spend, on average, 52 percent of their income on food, as derived from the 2017 national accounts household expenditure data of the World Bank's International Comparison Programme (ICP). Income data are provided by the World Bank's Poverty and Inequality Platform. A value of zero indicates a null or a small number rounded down at the current precision level. + affordability_of_an_energy_sufficient_diet__ratio_of_cost_to_the_food_poverty_line: + title: "Affordability of an energy sufficient diet: ratio of cost to the food poverty line" + unit: "" + short_unit: "" + description: | + The ratio of the cost of an energy sufficient diet to the $1.12 food poverty line (52% of the international poverty line of 2.15/day in 2017 PPP$). + people_who_cannot_afford_sufficient_calories: + title: Number of people who cannot afford sufficient calories + unit: people + short_unit: "" + description: | + The indicator expresses the total number of people who cannot afford an energy-sufficient diet in a given country and year. The indicator is computed by multiplying the percentage of the population in a country unable to afford a healthy diet by population data taken from the World Development Indicators (WDI) of the World Bank. A value of zero indicates a null or a small number rounded down at the current precision level. + cost_of_a_healthy_diet: + title: Cost of a healthy diet + unit: current PPP$/person/day + short_unit: $ + description: | + The cost of a healthy diet indicator is the cost of purchasing the least expensive locally available foods to meet requirements for energy and food-based dietary guidelines (FBDGs) for a representative person within energy balance at 2330 kcal/day. PPPs in 2018, and/or 2019, and/or 2020, and/or 2021 are imputed by the Food and Agriculture Organization of the United Nations for the given countries of Argentina, Aruba, Bermuda, British Virgin Islands, Cayman Islands, CuraƧao, Democratic Republic of the Congo, Dominica, Djibouti, Equatorial Guinea, Eswatini, Gabon, Kazakhstan, Liberia, Malawi, Myanmar, Sao Tome and Principe, Senegal, Seychelles, Sint Maarten (Dutch part), Suriname, Tajikistan, United Arab Emirates and Zimbabwe. + cost_of_a_healthy_diet_relative_to_the_cost_of_sufficient_energy_from_starchy_staples: + title: Cost of a healthy diet relative to the cost of sufficient energy from starchy staples + unit: "" + short_unit: "" + description: | + Ratio between the cost of a healthy diet (CoHD) that meets requirements for energy and food-based dietary guidelines (FBDGs) and the cost of caloric adequacy (CoCA) that uses only starchy staples to meet energy requirements. + cost_of_animal_source_foods: + title: Cost of animal-source foods + unit: 2017 PPP$/person/day + short_unit: $ + description: | + The cost of purchasing the least expensive locally available animal-source foods to meet daily intake levels recommended in food-based dietary guidelines (FBDGs). Animal-source foods are one of the six food groups within the Healthy Diet Basket. + cost_share_for_animal_sourced_foods_in_a_least_cost_healthy_diet: + title: Cost share for animal-sourced foods in a least-cost healthy diet + unit: "%" + short_unit: "%" + description: | + Share of costs for the least expensive animal-source foods to meet daily recommendations in food-based dietary guidelines (FBDGs), as a proportion of the total cost of a healthy diet. + cost_of_animal_sourced_foods_relative_to_the_starchy_staples_in_a_least_cost_healthy_diet: + title: Cost of animal-sourced foods relative to the starchy staples in a least-cost healthy diet + unit: "" + short_unit: "" + description: | + Cost of the least expensive animal-source foods as a multiple of the least expensive starchy staples to meet daily recommendations in food-based dietary guidelines (FBDGs). + cost_of_fruits: + title: Cost of fruits + unit: 2017 PPP$/person/day + short_unit: $ + description: | + The cost of purchasing the least expensive locally available fruits to meet daily intake levels recommended in food-based dietary guidelines (FBDGs). Fruits are one of the six food groups within the Healthy Diet Basket. Data available for 2017. + cost_share_for_fruits_in_a_least_cost_healthy_diet: + title: Cost share for fruits in a least-cost healthy diet + unit: "%" + short_unit: "%" + description: | + Share of costs for the least expensive fruits to meet daily recommendations in food-based dietary guidelines (FBDGs), as a proportion of the total cost of a healthy diet. + cost_of_fruits_relative_to_the_starchy_staples_in_a_least_cost_healthy_diet: + title: Cost of fruits relative to the starchy staples in a least-cost healthy diet + unit: "" + short_unit: "" + description: | + Cost of the least expensive fruits as a multiple of the least expensive starchy staples to meet daily recommendations in food-based dietary guidelines (FBDGs). + affordability_of_a_healthy_diet__ratio_of_cost_to_food_expenditures: + title: "Affordability of a healthy diet: ratio of cost to food expenditures" + unit: "" + short_unit: "" + description: | + The ratio of the cost of a healthy diet to total food expenditure per capita per day from national accounts. + percent_of_the_population_who_cannot_afford_a_healthy_diet: + title: Percent of the population who cannot afford a healthy diet + unit: "%" + short_unit: "%" + description: | + The indicator expresses the percentage of the total population unable to afford a healthy diet. A healthy diet is considered unaffordable in a country when its cost exceeds 52 percent of income per capita per day. This percentage accounts for a portion of income that can be credibly reserved for food, based on observations that the population in low-income countries spend, on average, 52 percent of their income on food, as derived from the 2017 national accounts household expenditure data of the World Bank's International Comparison Programme (ICP). Income data are provided by the World Bank's Poverty and Inequality Platform. A value of zero indicates a null or a small number rounded down at the current precision level. + cost_of_legumes__nuts_and_seeds: + title: Cost of legumes, nuts and seeds + unit: 2017 PPP$/person/day + short_unit: $ + description: | + The cost of purchasing the least expensive locally available legumes, nuts and seeds to meet daily intake levels recommended in food-based dietary guidelines (FBDGs). Legumes, nuts and seeds are one of the six food groups within the Healthy Diet Basket. + cost_share_for_legumes__nuts_and_seeds_in_a_least_cost_healthy_diet: + title: Cost share for legumes, nuts and seeds in a least-cost healthy diet + unit: "%" + short_unit: "%" + description: | + Share of costs for the least expensive legumes, nuts or seeds to meet daily recommendations in food-based dietary guidelines (FBDGs), as a proportion of the total cost of a healthy diet. + cost_of_legumes__nuts_and_seeds_relative_to_the_starchy_staples_in_a_least_cost_healthy_diet: + title: Cost of legumes, nuts and seeds relative to the starchy staples in a least-cost healthy diet + unit: "" + short_unit: "" + description: | + Cost of the least expensive legumes, nuts and seeds as a multiple of the least expensive starchy staples to meet daily recommendations in food-based dietary guidelines (FBDGs). + cost_of_oils_and_fats: + title: Cost of oils and fats + unit: 2017 PPP$/person/day + short_unit: $ + description: | + The cost of purchasing the least expensive locally available fats or oils to meet daily intake levels recommended in food-based dietary guidelines (FBDGs). Fats and oils are one of the six food groups within the Healthy Diet Basket. + cost_share_for_oils_and_fats_in_a_least_cost_healthy_diet: + title: Cost share for oils and fats in a least-cost healthy diet + unit: "%" + short_unit: "%" + description: | + Share of costs for the least expensive oils or fats to meet daily recommendations in food-based dietary guidelines (FBDGs), as a proportion of the total cost of a healthy diet. + cost_of_oils_and_fats_relative_to_the_starchy_staples_in_a_least_cost_healthy_diet: + title: Cost of oils and fats relative to the starchy staples in a least-cost healthy diet + unit: "" + short_unit: "" + description: | + Cost of the least expensive oils and fats as a multiple of the least expensive starchy staples to meet daily recommendations in food-based dietary guidelines (FBDGs). + affordability_of_a_healthy_diet__ratio_of_cost_to_the_food_poverty_line: + title: "Affordability of a healthy diet: ratio of cost to the food poverty line" + unit: "" + short_unit: "" + description: | + The ratio of the cost of a healthy diet to the $1.12 food poverty line (52% of the international poverty line of 2.15/day in 2017 PPP$). + cost_of_starchy_staples: + title: Cost of starchy staples + unit: 2017 PPP$/person/day + short_unit: $ + description: | + The cost of purchasing the least expensive locally available starchy staples to meet daily intake levels recommended in food-based dietary guidelines (FBDGs). Starchy staples are one of the six food groups within the Healthy Diet Basket. + cost_share_for_starchy_staples_in_a_least_cost_healthy_diet: + title: Cost share for starchy staples in a least-cost healthy diet + unit: "%" + short_unit: "%" + description: | + Share of costs for the least expensive starchy staples to meet daily recommendations in food-based dietary guidelines (FBDGs), as a proportion of the total cost of a healthy diet. + people_who_cannot_afford_a_healthy_diet: + title: Number of people who cannot afford a healthy diet + unit: people + short_unit: "" + description: | + The indicator expresses the total number of people who cannot afford a healthy diet in a given country and year. The indicator is computed by multiplying the percentage of the population in a country unable to afford a healthy diet by population data taken from the World Development Indicators (WDI) of the World Bank. A value of zero indicates a null or a small number rounded down at the current precision level. Data are currently available for 2017, 2018, 2019, 2020 and 2021. + cost_of_vegetables: + title: Cost of vegetables + unit: 2017 PPP$/person/day + short_unit: $ + description: | + The cost of purchasing the least expensive locally available vegetables to meet daily intake levels recommended in food-based dietary guidelines (FBDGs), in 2017 PPP$/person/day. Vegetables are one of the six food groups within the Healthy Diet Basket. + cost_share_for_vegetables_in_a_least_cost_healthy_diet: + title: Cost share for vegetables in a least-cost healthy diet + unit: "%" + short_unit: "%" + description: | + Share of costs for the least expensive vegetables to meet daily recommendations in food-based dietary guidelines (FBDGs), as a proportion of the total cost of a healthy diet. Data available for 2017. + cost_of_vegetables_relative_to_the_starchy_staples_in_a_least_cost_healthy_diet: + title: Cost of vegetables relative to the starchy staples in a least-cost healthy diet + unit: "" + short_unit: "" + description: | + Cost of the least expensive vegetables as a multiple of the least expensive starchy staples to meet daily recommendations in food-based dietary guidelines (FBDGs). + cost_of_a_nutrient_adequate_diet: + title: Cost of a nutrient adequate diet + unit: 2017 PPP$/person/day + short_unit: $ + description: | + Cost of the least expensive locally-available foods for nutrient adequacy for a representative person within upper and lower bounds for 23 essential macro- and micronutrients plus energy balance at 2330 kcal/day. Data available for 2017. + affordability_of_a_nutrient_adequate_diet__ratio_of_cost_to_food_expenditures: + title: "Affordability of a nutrient adequate diet: ratio of cost to food expenditures" + unit: "" + short_unit: "" + description: | + The ratio of the cost of a nutrient adequate diet to total food expenditure per capita per day from national accounts. + percent_of_the_population_who_cannot_afford_nutrient_adequacy: + title: Percent of the population who cannot afford nutrient adequacy + unit: "%" + short_unit: "%" + description: | + The indicator expresses the percentage of the total population unable to afford a nutrient adequate diet. A healthy diet is considered unaffordable in a country when its cost exceeds 52 percent of income per capita per day. This percentage accounts for a portion of income that can be credibly reserved for food, based on observations that the population in low-income countries spend, on average, 52 percent of their income on food, as derived from the 2017 national accounts household expenditure data of the World Bank's International Comparison Programme (ICP). Income data are provided by the World Bank's Poverty and Inequality Platform. A value of zero indicates a null or a small number rounded down at the current precision level. + affordability_of_a_nutrient_adequate_diet__ratio_of_cost_to_the_food_poverty_line: + title: "Affordability of a nutrient adequate diet: ratio of cost to the food poverty line" + unit: "" + short_unit: "" + description: | + The ratio of the cost of a nutrient adequate diet to the $1.12 food poverty line (52% of the international poverty line of 2.15/day in 2017 PPP$). + people_who_cannot_afford_nutrient_adequacy: + title: Number of people who cannot afford nutrient adequacy + unit: people + short_unit: "" + description: | + The indicator expresses the total number of people who cannot afford a nutrient adequate diet in a given country and year. The indicator is computed by multiplying the percentage of the population in a country unable to afford a healthy diet by population data taken from the World Development Indicators (WDI) of the World Bank. A value of zero indicates a null or a small number rounded down at the current precision level. + population: + title: Population according to the World Bank + unit: people + short_unit: people + description: | + Total population is based on the de facto definition of population, which counts all residents regardless of legal status or citizenship. The values shown are midyear estimates. + + Sources: (1) United Nations Population Division. World Population Prospects: 2022 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Reprot (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme. diff --git a/etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.py b/etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.py new file mode 100644 index 00000000000..93b9821dcca --- /dev/null +++ b/etl/steps/data/garden/wb/2023-07-24/food_prices_for_nutrition.py @@ -0,0 +1,70 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from typing import cast + +from owid.catalog import Dataset, Table + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +# Expected classifications, sorted from oldest to newest. +EXPECTED_CLASSIFICATIONS = ["FPN 1.0", "FPN 1.1", "FPN 2.0"] +# Classification to adopt (by default, the latest one). +CLASSIFICATION = EXPECTED_CLASSIFICATIONS[-1] + + +def adapt_units(tb: Table) -> Table: + # Change units from million people to people. + for column in [column for column in tb.columns if column.startswith("millions_of_people")]: + tb[column] *= 1e6 + tb = tb.rename(columns={column: column.replace("millions_of_people", "people")}) + + # Convert units expressed as fractions to percentages. + for column in [column for column in tb.columns if "cost_share" in column]: + tb[column] *= 100 + + return tb + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset and read its main table. + ds_meadow = cast(Dataset, paths.load_dependency("food_prices_for_nutrition")) + tb = ds_meadow["food_prices_for_nutrition"].reset_index() + + # + # Process data. + # + # Sanity check. + error = "Expected classifications have changed." + assert set(tb["classification"]) == set(EXPECTED_CLASSIFICATIONS), error + + # Select the latest classification. + tb = tb[tb["classification"] == CLASSIFICATION].drop(columns=["classification"]).reset_index(drop=True) + + # Rename columns conveniently. + tb = tb.rename(columns={"economy": "country"}) + + # Harmonize country names. + tb: Table = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + + # Adapt units. + tb = adapt_units(tb=tb) + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["country", "year"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], default_metadata=ds_meadow.metadata, check_variables_metadata=True + ) + ds_garden.save() diff --git a/etl/steps/data/grapher/wb/2023-07-24/food_prices_for_nutrition.py b/etl/steps/data/grapher/wb/2023-07-24/food_prices_for_nutrition.py new file mode 100644 index 00000000000..7290dc59c4d --- /dev/null +++ b/etl/steps/data/grapher/wb/2023-07-24/food_prices_for_nutrition.py @@ -0,0 +1,33 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from typing import cast + +from owid.catalog import Dataset + +from etl.helpers import PathFinder, create_dataset, grapher_checks + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset and read its main table. + ds_garden = cast(Dataset, paths.load_dependency("food_prices_for_nutrition")) + tb = ds_garden["food_prices_for_nutrition"] + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset(dest_dir, tables=[tb], default_metadata=ds_garden.metadata) + + # + # Checks. + # + grapher_checks(ds_grapher) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/wb/2023-07-24/food_prices_for_nutrition.py b/etl/steps/data/meadow/wb/2023-07-24/food_prices_for_nutrition.py new file mode 100644 index 00000000000..6f623b851e1 --- /dev/null +++ b/etl/steps/data/meadow/wb/2023-07-24/food_prices_for_nutrition.py @@ -0,0 +1,70 @@ +"""Load a snapshot and create a meadow dataset.""" + +from typing import cast + +import owid.catalog.processing as pr +from owid.catalog import Table + +from etl.helpers import PathFinder, create_dataset +from etl.snapshot import Snapshot + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def prepare_data(tb: Table) -> Table: + # Years are given in columns, like "YR2017". Make them integers. + tb = tb.rename(columns={column: int(column.replace("YR", "")) for column in tb.columns if column.startswith("YR")}) + + # Create a column for years. + tb = tb.melt(id_vars=["classification", "economy", "id", "variable_title"], var_name="year") + + # Gather a mapping of variable ids and titles. + variable_id_to_title = tb[["id", "variable_title"]].drop_duplicates().set_index(["id"])["variable_title"].to_dict() + + # Transpose the dataframe to have a column per variable. + tb = tb.drop(columns=["variable_title"]).pivot( + index=["classification", "economy", "year"], columns="id", join_column_levels_with="_" + ) + + # Columns now start with "value_", remove that prefix to recover the original names. + tb = tb.rename(columns={column: column.replace("value_", "") for column in tb.columns}) + + # Add titles to each variable metadata. + for variable_id, variable_title in variable_id_to_title.items(): + tb[variable_id].metadata.title = variable_title + # tb[variable_id].metadata.description = f"{variable_title}.\n"\ + # "This indicator corresponds to the World Bank's '{variable_id}'." + tb = tb.rename(columns={variable_id: variable_title}) + + # Ensure all column names are snake-case. + tb = tb.underscore() + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["classification", "economy", "year"], verify_integrity=True).sort_index() + + return tb + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = cast(Snapshot, paths.load_dependency("food_prices_for_nutrition.csv")) + + # Load data from snapshot. + tb = pr.read_csv(snap.path, metadata=snap.to_table_metadata()) + + # + # Process data. + # + # Create a new table and ensure all columns are snake-case. + tb = prepare_data(tb=tb) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], default_metadata=snap.metadata, check_variables_metadata=True) + ds_meadow.save() diff --git a/snapshots/wb/2023-07-24/food_prices_for_nutrition.csv.dvc b/snapshots/wb/2023-07-24/food_prices_for_nutrition.csv.dvc index 21bda360d87..057016cc690 100644 --- a/snapshots/wb/2023-07-24/food_prices_for_nutrition.csv.dvc +++ b/snapshots/wb/2023-07-24/food_prices_for_nutrition.csv.dvc @@ -3,8 +3,9 @@ meta: publication_year: 2023 publication_date: '2023-07-01' - source_name: World Bank - source_published_by: World Bank + source_name: World Bank based on different sources + source_published_by: | + World Bank, adapted from Herforth, A., Venkat, A., Bai, Y., Costlow, L., Holleman, C. & Masters, W.A. 2022. Methods and options to monitor globally the cost and affordability of a healthy diet. Background paper for The State of Food Security and Nutrition in the World 2022. Rome, FAO. url: https://databank.worldbank.org/source/food-prices-for-nutrition source_data_url: license_url: https://www.worldbank.org/en/about/legal/terms-of-use-for-datasets @@ -13,7 +14,9 @@ meta: date_accessed: 2023-07-24 is_public: true description: | + Food Prices for Nutrition provides indicators on the cost and affordability of healthy diets in each country, showing the population's physical and economic access to sufficient quantities of locally available items for an active and healthy life. It also provides indicators on the cost and affordability of an energy-sufficient diet and of a nutrient-adequate diet. These indicators are explained in detail in the Food Prices for Nutrition DataHub here: https://www.worldbank.org/foodpricesfornutrition. + Version 2.0 uses income and international poverty line data from the World Bank's Poverty and Inequality Platform (PIP) that are based on 2017 purchasing power parities (PPPs), published by the International Comparison Program, for the following diet affordability indicators: (i) ratio of the cost of the diet to the food poverty line, set at 52 percent of the 2017 PPP-based international poverty line of $2.15 per person per day; and (ii) the share and volume of the population that cannot afford the diet, based on national income distributions expressed in 2017 PPP dollars. Note that data on the Cost and Affordability of a Healthy Diet indicators reported in the State of Food Security and Nutrition in the World 2023 correspond to those data in Version 2.0. wdir: ../../../data/snapshots/wb/2023-07-24 outs: - md5: 7f34bb7f7b08cacc0481af3c9d6eb579 diff --git a/snapshots/wb/2023-07-24/food_prices_for_nutrition.py b/snapshots/wb/2023-07-24/food_prices_for_nutrition.py new file mode 100644 index 00000000000..ec758b020ef --- /dev/null +++ b/snapshots/wb/2023-07-24/food_prices_for_nutrition.py @@ -0,0 +1,63 @@ +"""Script to create a snapshot of dataset 'Food Prices for Nutrition'.""" + +from pathlib import Path + +import click +import pandas as pd +import wbgapi as wb +from owid.datautils.dataframes import map_series +from tqdm.auto import tqdm + +from etl.snapshot import Snapshot, add_snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + +# Dataset id of the World Bank's Food Prices for Nutrition dataset. +WB_FOOD_PRICES_DATASET_ID = 88 + + +@click.command() +@click.option( + "--upload/--skip-upload", + default=True, + type=bool, + help="Upload dataset to Snapshot", +) +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"wb/{SNAPSHOT_VERSION}/food_prices_for_nutrition.csv") + + # List all variable ids and titles in the food prices dataset + variables = wb.series.info(db=WB_FOOD_PRICES_DATASET_ID) + + # Load data for each variable. + # Get data for all variables one by one. + data = [] + # Note: This takes a few minutes and could possibly be parallelized. + for variable in tqdm(variables.items): + # Load data for current variable and add it to the list of all dataframes. + variable_df = wb.data.DataFrame(db=WB_FOOD_PRICES_DATASET_ID, series=variable["id"]) + variable_df["id"] = variable["id"] + data.append(variable_df) + + # Note: In theory, metadata can also be fetched with the API, but if fails with JSONDecodeError. + # variable_metadata = wb.series.metadata.get(variable["id"]) + + # Combine all dataframes into one. + df = pd.concat(data) + + # Add variable titles to the datafrme as a new column. + df["variable_title"] = map_series( + series=df["id"], + mapping={variable["id"]: variable["value"] for variable in variables.items}, + warn_on_missing_mappings=True, + warn_on_unused_mappings=True, + ) + + # Add file to DVC and upload to S3. + add_snapshot(uri=snap.uri, dataframe=df, upload=upload) # type: ignore + + +if __name__ == "__main__": + main()