From ce3cae1d196a955df05bf2093fba06bec1c929fc Mon Sep 17 00:00:00 2001 From: Pablo Rosado Date: Sun, 22 Sep 2024 19:16:59 +0200 Subject: [PATCH] Fix inconsistencies in Arabias wind generation and capacity --- .../statistical_review_of_world_energy.py | 24 ------------------- .../renewable_electricity_capacity.py | 17 +++++++++++++ 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/etl/steps/data/garden/energy_institute/2024-06-20/statistical_review_of_world_energy.py b/etl/steps/data/garden/energy_institute/2024-06-20/statistical_review_of_world_energy.py index 09a144ea8d1..f352584c1b2 100644 --- a/etl/steps/data/garden/energy_institute/2024-06-20/statistical_review_of_world_energy.py +++ b/etl/steps/data/garden/energy_institute/2024-06-20/statistical_review_of_world_energy.py @@ -630,30 +630,6 @@ def run(dest_dir: str) -> None: (tb["country"] == "Saudi Arabia") & (tb["year"] >= 2000), ["hydro_consumption_equivalent_ej", "hydro_electricity_generation_twh"], ] = 0 - - # Wind generation (and consumption) for Saudi Arabia in 2022 and 2023 is possibly wrong. - # It goes from 0.005678 TWh in 2021 to 1.45 TWh in 2022 and 2023. - # According to IRENA, Saudi Arabia's wind capacity was 3MW in 2022: - # https://www.irena.org/Publications/2023/Jul/Renewable-energy-statistics-2023 - # Either generation or capacity must be wrong. - # For now, first assert that the (possibly spurious) jump is in the data, and then remove those points. - error = "Data for Saudi Arabia may have changed (possibly fixing a data issue). Remove this part of the code." - assert ( - tb[(tb["country"] == "Saudi Arabia") & (tb["year"] == 2021)]["wind_electricity_generation_twh"].item() < 0.006 - ), error - assert ( - tb[(tb["country"] == "Saudi Arabia") & (tb["year"] == 2022)]["wind_electricity_generation_twh"].item() > 1.45 - ), error - assert ( - tb[(tb["country"] == "Saudi Arabia") & (tb["year"] == 2021)]["wind_consumption_equivalent_ej"].item() < 0.00006 - ), error - assert ( - tb[(tb["country"] == "Saudi Arabia") & (tb["year"] == 2022)]["wind_consumption_equivalent_ej"].item() > 0.01 - ), error - tb.loc[ - (tb["country"] == "Saudi Arabia") & (tb["year"].isin([2022, 2023])), - ["wind_consumption_equivalent_ej", "wind_electricity_generation_twh"], - ] = None #################################################################################################################### # Create additional variables, like primary energy consumption in TWh (both direct and in input-equivalents). diff --git a/etl/steps/data/garden/irena/2023-12-12/renewable_electricity_capacity.py b/etl/steps/data/garden/irena/2023-12-12/renewable_electricity_capacity.py index 07d8dbefc5d..eb8997276af 100644 --- a/etl/steps/data/garden/irena/2023-12-12/renewable_electricity_capacity.py +++ b/etl/steps/data/garden/irena/2023-12-12/renewable_electricity_capacity.py @@ -50,6 +50,23 @@ def run(dest_dir: str) -> None: # For convenience, remove parentheses from column names. tb = tb.rename(columns={column: column.replace("(", "").replace(")", "") for column in tb.columns}, errors="raise") + #################################################################################################################### + # The latest Statistical Review reports a very high wind generation in Saudi Arabia in 2022. + # The reported value would be physically impossible, given the reported capacity from IRENA for 2022. + # However, IRENA has recently updated their data, reporting a much higher capacity for Arabia, of 403MW in 2022. + # https://www.irena.org/Publications/2024/Mar/Renewable-capacity-statistics-2024 + # That would be consistent with the reported generation. + # For now, I will remove the old data point for 2022 (which was an estimate by IRENA). + # Next time capacity data is updated, the following assertion will fail, and we will remove this part of the code. + wind_columns = ["Onshore wind energy", "Wind energy"] + error = ( + "IRENA's reported wind capacity for Saudi Arabia used to be 3MW (incompatible with generation reported by " + "the Statistical Review). This may have changed now. Remove this code." + ) + assert tb.loc[(tb["country"] == "Saudi Arabia") & (tb["year"] == 2022), "Onshore wind energy"].item() == 3.25, error + tb.loc[(tb["country"] == "Saudi Arabia") & (tb["year"] == 2022), wind_columns] = None + #################################################################################################################### + # Add region aggregates. tb = geo.add_regions_to_table( tb, regions=REGIONS, ds_regions=ds_regions, ds_income_groups=ds_income_groups, min_num_values_per_year=1