annual averages

owid · Sep 12, 2024 · 561aee3 · 561aee3
1 parent 3cd7851
commit 561aee3
Show file tree

Hide file tree

Showing 4 changed files with 131 additions and 4 deletions.
diff --git a/etl/steps/data/garden/climate/2024-09-11/total_precipitation.meta.yml b/etl/steps/data/garden/climate/2024-09-11/total_precipitation.meta.yml
@@ -1,13 +1,13 @@
 
 definitions:
   common:
-    unit: meters
-    short_unit: m
+    unit: centimeters
+    short_unit: cm
     presentation:
       topic_tags:
       - Climate Change
     display:
-      numDecimalPlaces: 2
+      numDecimalPlaces: 1
     description_from_producer:
       This parameter is the accumulated liquid and frozen water, comprising rain and snow, that falls to the Earth's surface. It is the sum of large-scale precipitation and convective precipitation. Large-scale precipitation is generated by the cloud scheme in the ECMWF Integrated Forecasting System (IFS). The cloud scheme represents the formation and dissipation of clouds and large-scale precipitation due to changes in atmospheric quantities (such as pressure, temperature and moisture) predicted directly by the IFS at spatial scales of the grid box or larger. Convective precipitation is generated by the convection scheme in the IFS, which represents convection at spatial scales smaller than the grid box. This parameter does not include fog, dew or the precipitation that evaporates in the atmosphere before it lands at the surface of the Earth. This parameter is accumulated over a particular time period which depends on the data extracted. For the monthly averaged reanalysis and the monthly averaged ensemble members, the accumulation period is 1 day. For the monthly averaged reanalysis by hour of day, the accumulation period is 1 hour and for the monthly averaged ensemble members by hour of day, the accumulation period is 3 hours. The units of this parameter are depth in metres of water equivalent. It is the depth the water would have if it were spread evenly over the grid box. Care should be taken when comparing model parameters with observations, because observations are often local to a particular point in space and time, rather than representing averages over a model grid box.
     processing_level: major
@@ -38,7 +38,7 @@ tables:
       precipitation_anomaly:
         title: Precipitation anomaly
         description_short: |-
-          The difference between the total precipitation in a specific month and the average total precipitation for that month, in meters.
+          The difference between the total precipitation in a specific month and the average total precipitation for that month, in centimeters.
         description_processing: |-
             {definitions.common_processing}
             {definitions.precipitation_anomaly}
diff --git a/etl/steps/data/garden/climate/2024-09-11/total_precipitation.py b/etl/steps/data/garden/climate/2024-09-11/total_precipitation.py
@@ -27,6 +27,7 @@ def run(dest_dir: str) -> None:
 
     tb["year"] = tb["time"].astype(str).str[0:4]
     tb["month"] = tb["time"].astype(str).str[5:7]
+    tb["total_precipitation"] = tb["total_precipitation"] * 100  # Convert from m to cm
     # Use the baseline from the Copernicus Climate Service https://climate.copernicus.eu/surface-air-temperature-january-2024
     tb_baseline = tb[(tb["year"].astype(int) > 1990) & (tb["year"].astype(int) < 2021)]
     tb_baseline = tb_baseline.groupby(["country", "month"], as_index=False)["total_precipitation"].mean()

diff --git a/etl/steps/data/grapher/climate/2024-09-11/total_precipitation_annual_average.meta.yml b/etl/steps/data/grapher/climate/2024-09-11/total_precipitation_annual_average.meta.yml
@@ -0,0 +1,64 @@
+definitions:
+  common:
+    unit: centimeters
+    short_unit: cm
+    presentation:
+      topic_tags:
+      - Climate Change
+    display:
+      numDecimalPlaces: 1
+    description_from_producer:
+      This parameter is the accumulated liquid and frozen water, comprising rain and snow, that falls to the Earth's surface. It is the sum of large-scale precipitation and convective precipitation. Large-scale precipitation is generated by the cloud scheme in the ECMWF Integrated Forecasting System (IFS). The cloud scheme represents the formation and dissipation of clouds and large-scale precipitation due to changes in atmospheric quantities (such as pressure, temperature and moisture) predicted directly by the IFS at spatial scales of the grid box or larger. Convective precipitation is generated by the convection scheme in the IFS, which represents convection at spatial scales smaller than the grid box. This parameter does not include fog, dew or the precipitation that evaporates in the atmosphere before it lands at the surface of the Earth. This parameter is accumulated over a particular time period which depends on the data extracted. For the monthly averaged reanalysis and the monthly averaged ensemble members, the accumulation period is 1 day. For the monthly averaged reanalysis by hour of day, the accumulation period is 1 hour and for the monthly averaged ensemble members by hour of day, the accumulation period is 3 hours. The units of this parameter are depth in metres of water equivalent. It is the depth the water would have if it were spread evenly over the grid box. Care should be taken when comparing model parameters with observations, because observations are often local to a particular point in space and time, rather than representing averages over a model grid box.
+    processing_level: major
+  common_processing: |-
+    - Initially, the dataset is provided with specific coordinates in terms of longitude and latitude. To tailor this data to each country, we use geographical boundaries as defined by the World Bank. The method involves trimming the precipitation dataset to match the exact geographical shape of each country. To correct for potential distortions caused by projecting the Earth's curved surface onto a flat map, we apply a latitude-based weighting. This step is essential for maintaining accuracy, particularly in high-latitude regions where distortion is more pronounced. The result of this process is a latitude-weighted average precipitation for each nation.
+    - It’s important to note, however, that due to the resolution constraints of the Copernicus dataset, this methodology might not be as effective for countries with very small landmasses. In such cases, the process may not yield reliable data.
+    - The derived precipitation for each country is calculated based on administrative borders, encompassing all land surface types within these areas. As a result, precipitation over oceans and seas is not included in these averages, keeping the data focused on terrestrial environments.
+    - Global precipitation averages and anomalies, however, are calculated over both land and ocean surfaces.
+  precipitation_anomaly: |-
+    - The precipitation anomaly is calculated by comparing the average precipitation of a specific time period (e.g., a particular year or month) to the mean surface precipitation of the same period from 1991 to 2020.
+    - When calculating anomalies for each country, the average precipitation of a given year or month is compared to the 1991-2020 mean precipitation for that specific country.
+    - The reason for using the 1991-2020 period as the reference mean is that it is the standard reference period used by our data source, the Copernicus Climate Change Service. This period is also adopted by the UK Met Office. This approach ensures consistency in identifying climate variations over time.
+
+  desc_update: The 2024 data is incomplete and was last updated {TODAY}.
+
+dataset:
+  title: Annual precipitation and anomalies by country
+
+tables:
+  total_precipitation:
+    variables:
+      total_precipitation:
+        title: Total precipitation
+        description_short: Total annual amount of rain and snow that falls to the Earth's surface, reported as the depth of water over a given time period, excluding fog, dew, and evaporated precipitation.  {definitions.desc_update}
+        description_processing: |-
+            {definitions.common_processing}
+            {definitions.precipitation_anomaly}
+
+
+      precipitation_anomaly:
+        title: Precipitation anomaly
+        description_short: |-
+          The deviation of a specific year's average amount of rain and snow from the 1991-2020 mean, in centimeters. {definitions.desc_update}
+        description_processing: |-
+            {definitions.common_processing}
+            {definitions.precipitation_anomaly}
+
+      total_precipitation_anomaly_decadal:
+        title: Decadal average precipitation anomaly
+        description_short: |-
+          The deviation of a specific decade's average precipitation from the 1991-2020 mean, in centimeters. {definitions.desc_update}
+
+        description_processing: |-
+            {definitions.common_processing}
+            {definitions.precipitation_anomaly}
+            - Decadal average precipitation anomaly is calculated by averaging the annual precipitation anomaly values for each decade.
+
+      total_precipitation_decadal:
+        title: Decadal average total precipitation
+        description_short: |-
+          Average decadal amount of rain and snow that falls to the Earth's surface, reported as the depth of water over a given time period, excluding fog, dew, and evaporated precipitation.
+        description_processing: |-
+            {definitions.common_processing}
+            {definitions.precipitation_anomaly}
+            - Decadal average total precipitation is calculated by averaging the annual total precipitation values for each decade.
diff --git a/etl/steps/data/grapher/climate/2024-09-11/total_precipitation_annual_average.py b/etl/steps/data/grapher/climate/2024-09-11/total_precipitation_annual_average.py
@@ -0,0 +1,62 @@
+"""Load a garden dataset and create a grapher dataset."""
+
+import numpy as np
+import owid.catalog.processing as pr
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load garden dataset.
+    ds_garden = paths.load_dataset("total_precipitation")
+    tb = ds_garden["total_precipitation"].reset_index()
+
+    #
+    # Process data.
+    #
+
+    # Get the year
+    tb["year"] = tb["time"].astype(str).str[0:4]
+
+    # Group by year and calculate the mean of the specified columns
+    tb_annual_average = (
+        tb.groupby(["year", "country"]).agg({"temperature_2m": "mean", "temperature_anomaly": "mean"}).reset_index()
+    )
+
+    # Convert the 'year' column to integer type
+    tb_annual_average["year"] = tb_annual_average["year"].astype(int)
+
+    # Create a new column for the decade
+    tb_annual_average["decade"] = (tb_annual_average["year"] // 10) * 10
+
+    # Group by decade and country, then calculate the mean for specified columns
+    tb_decadal_average = (
+        tb_annual_average.groupby(["decade", "country"])[["temperature_anomaly", "temperature_2m"]].mean().reset_index()
+    )
+    # Set the decadal values for 2020 to NaN
+    tb_decadal_average.loc[tb_decadal_average["decade"] == 2020, ["temperature_anomaly", "temperature_2m"]] = np.nan
+    # Merge the decadal average Table with the original Table
+    combined = pr.merge(
+        tb_annual_average, tb_decadal_average, on=["decade", "country"], how="left", suffixes=("", "_decadal")
+    )
+
+    # Replace the decadal values with NaN for all years except the start of each decade
+    combined.loc[combined["year"] % 10 != 0, ["temperature_anomaly_decadal", "temperature_2m_decadal"]] = np.nan
+    combined = combined.drop(columns=["decade"])
+    # Filter rows where the year is less than or equal to 2024
+    combined = combined.set_index(["year", "country"], verify_integrity=True)
+
+    # Save outputs.
+    #
+    # Create a new grapher dataset with the same metadata as the garden dataset.
+    ds_grapher = create_dataset(
+        dest_dir, tables=[combined], default_metadata=ds_garden.metadata, check_variables_metadata=True
+    )
+
+    ds_grapher.save()