Skip to content

Commit

Permalink
edstats from a spreadsheet test
Browse files Browse the repository at this point in the history
  • Loading branch information
veronikasamborska1994 committed Nov 5, 2024
1 parent dc6cee9 commit 038e139
Show file tree
Hide file tree
Showing 6 changed files with 420 additions and 0 deletions.
8 changes: 8 additions & 0 deletions dag/education.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,11 @@ steps:
- snapshot://wb/2024-06-18/edstats_metadata.xls
data://grapher/unesco/2024-06-25/education_sdgs:
- data://garden/unesco/2024-06-25/education_sdgs

# World Bank EdStats
data://meadow/wb/2024-11-04/edstats:
- snapshot://wb/2024-11-04/edstats.csv
data://garden/wb/2024-11-04/edstats:
- data://meadow/wb/2024-11-04/edstats
data://grapher/wb/2024-11-04/edstats:
- data://garden/wb/2024-11-04/edstats
222 changes: 222 additions & 0 deletions etl/steps/data/garden/wb/2024-11-04/edstats.countries.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
{
"Afghanistan": "Afghanistan",
"Albania": "Albania",
"Algeria": "Algeria",
"American Samoa": "American Samoa",
"Andorra": "Andorra",
"Angola": "Angola",
"Antigua and Barbuda": "Antigua and Barbuda",
"Argentina": "Argentina",
"Armenia": "Armenia",
"Aruba": "Aruba",
"Australia": "Australia",
"Austria": "Austria",
"Azerbaijan": "Azerbaijan",
"Bahamas, The": "Bahamas",
"Bahrain": "Bahrain",
"Bangladesh": "Bangladesh",
"Barbados": "Barbados",
"Belarus": "Belarus",
"Belgium": "Belgium",
"Belize": "Belize",
"Benin": "Benin",
"Bermuda": "Bermuda",
"Bhutan": "Bhutan",
"Bolivia": "Bolivia",
"Bosnia and Herzegovina": "Bosnia and Herzegovina",
"Botswana": "Botswana",
"Brazil": "Brazil",
"British Virgin Islands": "British Virgin Islands",
"Brunei Darussalam": "Brunei",
"Bulgaria": "Bulgaria",
"Burkina Faso": "Burkina Faso",
"Burundi": "Burundi",
"Cabo Verde": "Cape Verde",
"Cambodia": "Cambodia",
"Cameroon": "Cameroon",
"Canada": "Canada",
"Cayman Islands": "Cayman Islands",
"Central African Republic": "Central African Republic",
"Chad": "Chad",
"Channel Islands": "Channel Islands",
"Chile": "Chile",
"China": "China",
"Colombia": "Colombia",
"Comoros": "Comoros",
"Costa Rica": "Costa Rica",
"Cote d'Ivoire": "Cote d'Ivoire",
"Croatia": "Croatia",
"Cuba": "Cuba",
"Curacao": "Curacao",
"Cyprus": "Cyprus",
"Czech Republic": "Czechia",
"Czechia": "Czechia",
"Denmark": "Denmark",
"Djibouti": "Djibouti",
"Dominica": "Dominica",
"Dominican Republic": "Dominican Republic",
"Ecuador": "Ecuador",
"El Salvador": "El Salvador",
"Equatorial Guinea": "Equatorial Guinea",
"Eritrea": "Eritrea",
"Estonia": "Estonia",
"Eswatini": "Eswatini",
"Ethiopia": "Ethiopia",
"Faroe Islands": "Faroe Islands",
"Fiji": "Fiji",
"Finland": "Finland",
"France": "France",
"French Polynesia": "French Polynesia",
"Gabon": "Gabon",
"Gambia, The": "Gambia",
"Georgia": "Georgia",
"Germany": "Germany",
"Ghana": "Ghana",
"Gibraltar": "Gibraltar",
"Greece": "Greece",
"Greenland": "Greenland",
"Grenada": "Grenada",
"Guam": "Guam",
"Guatemala": "Guatemala",
"Guinea": "Guinea",
"Guinea-Bissau": "Guinea-Bissau",
"Guyana": "Guyana",
"Haiti": "Haiti",
"Honduras": "Honduras",
"Hong Kong SAR, China": "Hong Kong",
"Hungary": "Hungary",
"Iceland": "Iceland",
"India": "India",
"Indonesia": "Indonesia",
"Iraq": "Iraq",
"Ireland": "Ireland",
"Isle of Man": "Isle of Man",
"Israel": "Israel",
"Italy": "Italy",
"Jamaica": "Jamaica",
"Japan": "Japan",
"Jordan": "Jordan",
"Kazakhstan": "Kazakhstan",
"Kenya": "Kenya",
"Kiribati": "Kiribati",
"Kosovo": "Kosovo",
"Kuwait": "Kuwait",
"Kyrgyz Republic": "Kyrgyzstan",
"Lao PDR": "Laos",
"Latvia": "Latvia",
"Lebanon": "Lebanon",
"Lesotho": "Lesotho",
"Liberia": "Liberia",
"Libya": "Libya",
"Liechtenstein": "Liechtenstein",
"Lithuania": "Lithuania",
"Luxembourg": "Luxembourg",
"Macao SAR, China": "Macao",
"Madagascar": "Madagascar",
"Malawi": "Malawi",
"Malaysia": "Malaysia",
"Maldives": "Maldives",
"Mali": "Mali",
"Malta": "Malta",
"Marshall Islands": "Marshall Islands",
"Mauritania": "Mauritania",
"Mauritius": "Mauritius",
"Mexico": "Mexico",
"Moldova": "Moldova",
"Monaco": "Monaco",
"Mongolia": "Mongolia",
"Montenegro": "Montenegro",
"Morocco": "Morocco",
"Mozambique": "Mozambique",
"Myanmar": "Myanmar",
"Namibia": "Namibia",
"Nauru": "Nauru",
"Nepal": "Nepal",
"Netherlands": "Netherlands",
"New Caledonia": "New Caledonia",
"New Zealand": "New Zealand",
"Nicaragua": "Nicaragua",
"Niger": "Niger",
"Nigeria": "Nigeria",
"North Macedonia": "North Macedonia",
"Northern Mariana Islands": "Northern Mariana Islands",
"Norway": "Norway",
"Oman": "Oman",
"Pakistan": "Pakistan",
"Palau": "Palau",
"Panama": "Panama",
"Papua New Guinea": "Papua New Guinea",
"Paraguay": "Paraguay",
"Peru": "Peru",
"Philippines": "Philippines",
"Poland": "Poland",
"Portugal": "Portugal",
"Puerto Rico": "Puerto Rico",
"Qatar": "Qatar",
"Romania": "Romania",
"Russian Federation": "Russia",
"Rwanda": "Rwanda",
"Samoa": "Samoa",
"San Marino": "San Marino",
"Sao Tome and Principe": "Sao Tome and Principe",
"Saudi Arabia": "Saudi Arabia",
"Senegal": "Senegal",
"Serbia": "Serbia",
"Seychelles": "Seychelles",
"Sierra Leone": "Sierra Leone",
"Singapore": "Singapore",
"Sint Maarten (Dutch part)": "Sint Maarten (Dutch part)",
"Slovak Republic": "Slovakia",
"Slovenia": "Slovenia",
"Solomon Islands": "Solomon Islands",
"Somalia": "Somalia",
"South Africa": "South Africa",
"South Sudan": "South Sudan",
"Spain": "Spain",
"Sri Lanka": "Sri Lanka",
"Sudan": "Sudan",
"Suriname": "Suriname",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Syrian Arab Republic": "Syria",
"Tajikistan": "Tajikistan",
"Tanzania": "Tanzania",
"Thailand": "Thailand",
"Timor-Leste": "East Timor",
"Togo": "Togo",
"Tonga": "Tonga",
"Trinidad and Tobago": "Trinidad and Tobago",
"Tunisia": "Tunisia",
"Turkey": "Turkey",
"Turkmenistan": "Turkmenistan",
"Turks and Caicos Islands": "Turks and Caicos Islands",
"Tuvalu": "Tuvalu",
"Uganda": "Uganda",
"Ukraine": "Ukraine",
"United Arab Emirates": "United Arab Emirates",
"United Kingdom": "United Kingdom",
"United States": "United States",
"Uruguay": "Uruguay",
"Uzbekistan": "Uzbekistan",
"Vanuatu": "Vanuatu",
"Venezuela, RB": "Venezuela",
"Viet Nam": "Vietnam",
"Vietnam": "Vietnam",
"West Bank and Gaza": "Palestine",
"Zambia": "Zambia",
"Zimbabwe": "Zimbabwe",
"Congo, Dem Rep": "Democratic Republic of Congo",
"Congo, Rep": "Congo",
"Egypt, Arab Rep": "Egypt",
"Iran, Islamic Rep": "Iran",
"Korea, Dem People\u2019s Rep": "North Korea",
"Korea, Rep": "South Korea",
"Micronesia, Fed Sts": "Micronesia (country)",
"St Kitts and Nevis": "Saint Kitts and Nevis",
"St Lucia": "Saint Lucia",
"St Martin (French part)": "Saint Martin (French part)",
"St Vincent and the Grenadines": "Saint Vincent and the Grenadines",
"Turkiye": "Turkey",
"Virgin Islands (US)": "United States Virgin Islands",
"Yemen, Rep": "Yemen"
}
55 changes: 55 additions & 0 deletions etl/steps/data/garden/wb/2024-11-04/edstats.meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- Global Education


# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 364


tables:
edstats:
variables:
value:
title: << indicator_name >>

description_from_producer: |-
<% if source_note != 'nan' %>
<< source_note >>
<%- endif -%>
unit: |-
<% if unit_measure == NUMBER %>
number
<% elsif unit_measure == SHARE %>
%
<% elsif unit_measure == USD %>
US dollars
<% elsif unit_measure == IX_0T1 %>
index
<% elsif unit_measure == USD_CONST %>
constant US dollars
<% elsif unit_measure == YR %>
years
<% elsif unit_measure == nan %>
''
<%- endif -%>
short_unit: |-
<% if unit_measure == SHARE %>
$
<% elsif unit_measure == USD %>
$
<% elsif unit_measure == USD_CONST %>
constant $
<%- endif -%>
40 changes: 40 additions & 0 deletions etl/steps/data/garden/wb/2024-11-04/edstats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Load a meadow dataset and create a garden dataset."""

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("edstats")

# Read table from meadow dataset.
tb = ds_meadow["edstats"].reset_index()

#
# Process data.
#
tb = geo.harmonize_countries(
df=tb,
countries_file=paths.country_mapping_path,
)

tb = tb.format(["country", "year", "indicator_name", "source_note", "unit_measure"])
print(tb.columns)

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()
28 changes: 28 additions & 0 deletions etl/steps/data/grapher/wb/2024-11-04/edstats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Load a garden dataset and create a grapher dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset.
ds_garden = paths.load_dataset("edstats")

# Read table from garden dataset.
tb = ds_garden["edstats"]

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
ds_grapher = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
)

# Save changes in the new grapher dataset.
ds_grapher.save()
Loading

0 comments on commit 038e139

Please sign in to comment.