📊 Data on share of births by decadal age of mother (#3350)

* adding data * births by decade grapher step * clean up * rename grapher step * add short name * fix * fix * udpating display names * update dp * update age
owid · Oct 1, 2024 · 0043ee7 · 0043ee7
1 parent 12271c7
commit 0043ee7
Show file tree

Hide file tree

Showing 4 changed files with 125 additions and 0 deletions.
diff --git a/dag/demography.yml b/dag/demography.yml
@@ -263,3 +263,9 @@ steps:
     - data://garden/demography/2024-07-15/population
   data://grapher/unicef/2024-07-30/child_migration:
     - data://garden/unicef/2024-07-30/child_migration
+
+  # Mothers by decadal age-group
+  data://garden/un/2024-10-01/births_by_age:
+    - data://garden/un/2024-07-12/un_wpp
+  data://grapher/un/2024-10-01/births_by_age:
+    - data://garden/un/2024-10-01/births_by_age
diff --git a/etl/steps/data/garden/un/2024-10-01/births_by_age.meta.yml b/etl/steps/data/garden/un/2024-10-01/births_by_age.meta.yml
@@ -0,0 +1,37 @@
+definitions:
+   common:
+    presentation:
+      attribution_short: UN WPP
+      grapher_config:
+        originUrl: "https://ourworldindata.org/population-growth"
+
+
+# this metadata file is not used in garden step, but in grapher step
+tables:
+  births_by_age:
+    variables:
+      births:
+        title: Number of births by mothers in their <<decadal_age.lower()>>
+        unit: births
+        description_processing: Values calculated by Our World in Data based on UN WPP data.
+        display:
+          name: <<decadal_age>>
+          numDecimalPlaces: 0
+        presentation:
+          title_public: |-
+            Births by mothers in their <<decadal_age.lower()>>
+          grapher_config:
+            note: "Values as of 1 July of the indicated year."
+      share:
+        title: Share of births by mothers in their <<decadal_age.lower()>>
+        unit: "%"
+        short_unit: "%"
+        description_processing: Values calculated by Our World in Data based on UN WPP data.
+        display:
+          name: <<decadal_age>>
+          numDecimalPlaces: 1
+        presentation:
+          title_public: |-
+            Share of births by mothers in their <<decadal_age.lower()>>
+          grapher_config:
+            note: "Values as of 1 July of the indicated year."
diff --git a/etl/steps/data/garden/un/2024-10-01/births_by_age.py b/etl/steps/data/garden/un/2024-10-01/births_by_age.py
@@ -0,0 +1,53 @@
+import owid.catalog.processing as pr
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+YEAR_SPLIT = 2024
+COLUMNS_INDEX = ["country", "year", "sex", "age", "variant"]
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load meadow dataset.
+    ds_garden = paths.load_dataset("un_wpp")
+    #
+    # Process data.
+    #
+    tb = ds_garden["births"].reset_index()
+
+    # Get only the estimates data
+    tb = tb[(tb["variant"] == "estimates") & (tb["sex"] == "all")]
+    tb = tb.drop(columns=["variant", "birth_rate", "sex"])
+    # Get a separate table for all births, so it can be merged as a column
+    msk = tb["age"] == "all"
+    tb_all = tb[msk]
+    tb = tb[~msk]
+    # Move each age-group to a decade
+    dict_age = {
+        "10-14": "Teenage",
+        "15-19": "Teenage",
+        "20-24": "Twenties",
+        "25-29": "Twenties",
+        "30-34": "Thirties",
+        "35-39": "Thirties",
+        "40-44": "Forties",
+        "45-49": "Forties",
+        "50-54": "Fifties",
+    }
+    tb["decadal_age"] = tb["age"].map(dict_age)
+    tb = tb.groupby(["country", "year", "decadal_age"])["births"].sum().reset_index()
+
+    tb_all = tb_all.rename(columns={"births": "all_births"}).drop(columns=["age"])
+    # Combine with original tb
+    tb = pr.merge(tb, tb_all, on=["country", "year"])
+    tb["share"] = (tb["births"] / tb["all_births"]) * 100
+    tb = tb.drop(columns=["all_births"])
+    tb = tb.format(["country", "year", "decadal_age"], short_name=paths.short_name)
+    ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True)
+    # Save changes in the new garden dataset.
+    ds_garden.save()
diff --git a/etl/steps/data/grapher/un/2024-10-01/births_by_age.py b/etl/steps/data/grapher/un/2024-10-01/births_by_age.py
@@ -0,0 +1,29 @@
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load garden dataset.
+    ds_garden = paths.load_dataset("births_by_age")
+    #
+    # Process data.
+    #
+    tb = ds_garden["births_by_age"]
+    #
+    # Save outputs.
+    #
+    # Create grapher dataset
+    ds_grapher = create_dataset(
+        dest_dir,
+        tables=[tb],
+        check_variables_metadata=True,
+        default_metadata=ds_garden.metadata,
+    )
+
+    # Save changes in the new grapher dataset.
+    ds_grapher.save()