diff --git a/dag/archive/climate.yml b/dag/archive/climate.yml index 1b9a3fce7bb..711fb854c45 100644 --- a/dag/archive/climate.yml +++ b/dag/archive/climate.yml @@ -450,3 +450,175 @@ steps: # data://grapher/climate/2024-05-20/climate_change_impacts_monthly: - data://garden/climate/2024-05-20/climate_change_impacts + # + # GISS - Surface temperature analysis. + # + data://meadow/climate/2024-07-23/surface_temperature_analysis: + - snapshot://climate/2024-07-23/surface_temperature_analysis_world.csv + - snapshot://climate/2024-07-23/surface_temperature_analysis_northern_hemisphere.csv + - snapshot://climate/2024-07-23/surface_temperature_analysis_southern_hemisphere.csv + # + # NOAA National Centers for Environmental Information - Ocean Heat Content. + # + data://meadow/climate/2024-07-23/ocean_heat_content: + - snapshot://climate/2024-07-23/ocean_heat_content_annual_world_2000m.csv + - snapshot://climate/2024-07-23/ocean_heat_content_monthly_world_2000m.csv + - snapshot://climate/2024-07-23/ocean_heat_content_annual_world_700m.csv + - snapshot://climate/2024-07-23/ocean_heat_content_monthly_world_700m.csv + # + # GISS - Surface temperature analysis. + # + data://garden/climate/2024-07-23/surface_temperature_analysis: + - data://meadow/climate/2024-07-23/surface_temperature_analysis + # + # NOAA Global Monitoring Laboratory - GHG concentration. + # + data://meadow/climate/2024-07-23/ghg_concentration: + - snapshot://climate/2024-07-23/n2o_concentration_monthly.csv + - snapshot://climate/2024-07-23/ch4_concentration_monthly.csv + - snapshot://climate/2024-07-23/co2_concentration_monthly.csv + # + # Rutgers University Global Snow Lab - Snow Cover Extent. + # + data://meadow/climate/2024-07-23/snow_cover_extent: + - snapshot://climate/2024-07-23/snow_cover_extent_northern_hemisphere.csv + - snapshot://climate/2024-07-23/snow_cover_extent_north_america.csv + # + # NSIDC - Arctic sea ice extent. + # + data://meadow/climate/2024-05-20/sea_ice_index: + - snapshot://climate/2024-05-20/sea_ice_index.xlsx + # + # GISS - Surface temperature analysis. + # + data://meadow/climate/2024-05-20/surface_temperature_analysis: + - snapshot://climate/2024-05-20/surface_temperature_analysis_southern_hemisphere.csv + - snapshot://climate/2024-05-20/surface_temperature_analysis_northern_hemisphere.csv + - snapshot://climate/2024-05-20/surface_temperature_analysis_world.csv + # + # Met Office Hadley Centre - Sea surface temperature. + # + data://meadow/climate/2024-07-23/sea_surface_temperature: + - snapshot://climate/2024-07-23/sea_surface_temperature_southern_hemisphere.csv + - snapshot://climate/2024-07-23/sea_surface_temperature_northern_hemisphere.csv + - snapshot://climate/2024-07-23/sea_surface_temperature_world.csv + # + # NSIDC - Arctic sea ice extent. + # + data://meadow/climate/2024-07-23/sea_ice_index: + - snapshot://climate/2024-07-23/sea_ice_index.xlsx + # + # NOAA National Centers for Environmental Information - Ocean Heat Content. + # + data://garden/climate/2024-07-23/ocean_heat_content: + - data://meadow/climate/2024-07-23/ocean_heat_content + # + # School of Ocean and Earth Science and Technology - Hawaii Ocean Time-series (HOT). + # + data://meadow/climate/2024-05-20/hawaii_ocean_time_series: + - snapshot://climate/2024-05-20/hawaii_ocean_time_series.csv + # + # Met Office Hadley Centre - Sea surface temperature. + # + data://meadow/climate/2024-05-20/sea_surface_temperature: + - snapshot://climate/2024-05-20/sea_surface_temperature_southern_hemisphere.csv + - snapshot://climate/2024-05-20/sea_surface_temperature_world.csv + - snapshot://climate/2024-05-20/sea_surface_temperature_northern_hemisphere.csv + # + # School of Ocean and Earth Science and Technology - Hawaii Ocean Time-series (HOT). + # + data://meadow/climate/2024-07-23/hawaii_ocean_time_series: + - snapshot://climate/2024-07-23/hawaii_ocean_time_series.csv + # + # NOAA Global Monitoring Laboratory - GHG concentration. + # + data://meadow/climate/2024-05-20/ghg_concentration: + - snapshot://climate/2024-05-20/n2o_concentration_monthly.csv + - snapshot://climate/2024-05-20/ch4_concentration_monthly.csv + - snapshot://climate/2024-05-20/co2_concentration_monthly.csv + # + # NOAA National Centers for Environmental Information - Ocean Heat Content. + # + data://meadow/climate/2024-05-20/ocean_heat_content: + - snapshot://climate/2024-05-20/ocean_heat_content_annual_world_2000m.csv + - snapshot://climate/2024-05-20/ocean_heat_content_annual_world_700m.csv + - snapshot://climate/2024-05-20/ocean_heat_content_monthly_world_2000m.csv + - snapshot://climate/2024-05-20/ocean_heat_content_monthly_world_700m.csv + # + # Rutgers University Global Snow Lab - Snow Cover Extent. + # + data://meadow/climate/2024-05-20/snow_cover_extent: + - snapshot://climate/2024-05-20/snow_cover_extent_northern_hemisphere.csv + - snapshot://climate/2024-05-20/snow_cover_extent_north_america.csv + # + # NOAA Global Monitoring Laboratory - GHG concentration. + # + data://garden/climate/2024-07-23/ghg_concentration: + - data://meadow/climate/2024-07-23/ghg_concentration + # + # Rutgers University Global Snow Lab - Snow Cover Extent. + # + data://garden/climate/2024-07-23/snow_cover_extent: + - data://meadow/climate/2024-07-23/snow_cover_extent + # + # Met Office Hadley Centre - Sea surface temperature. + # + data://garden/climate/2024-07-23/sea_surface_temperature: + - data://meadow/climate/2024-07-23/sea_surface_temperature + # + # NSIDC - Arctic sea ice extent. + # + data://garden/climate/2024-07-23/sea_ice_index: + - data://meadow/climate/2024-07-23/sea_ice_index + # + # School of Ocean and Earth Science and Technology - Hawaii Ocean Time-series (HOT). + # + data://garden/climate/2024-07-23/ocean_ph_levels: + - data://meadow/climate/2024-07-23/hawaii_ocean_time_series + # + # Various sources - Long-run greenhouse gas concentration. + # + data://garden/climate/2024-07-23/long_run_ghg_concentration: + - data://garden/climate/2024-07-23/ghg_concentration + - data://garden/epa/2024-04-17/ghg_concentration + # + # NSIDC - Monthly sea ice extent ("country" for decades and latest year, "year" for month number, one indicator per hemisphere). + # + data://grapher/climate/2024-07-23/sea_ice_extent_by_decade: + - data://garden/climate/2024-07-23/sea_ice_index + # + # NSIDC - Monthly sea ice extent ("country" for years, "year" for month number, one indicator per hemisphere). + # + data://grapher/climate/2024-07-23/sea_ice_extent_by_year: + - data://garden/climate/2024-07-23/sea_ice_index + # + # NSIDC - Monthly sea ice anomalies ("country" for month names, "year" for years, one indicator per hemisphere). + # + data://grapher/climate/2024-07-23/sea_ice_anomalies_by_month: + - data://garden/climate/2024-07-23/sea_ice_index + # + # Various sources - Climate change impacts. + # + data://garden/climate/2024-07-23/climate_change_impacts: + - data://garden/climate/2024-07-23/surface_temperature_analysis + - data://garden/climate/2024-07-23/snow_cover_extent + - data://garden/climate/2024-01-28/global_sea_level + - data://garden/epa/2024-04-17/ocean_heat_content + - data://garden/climate/2024-07-23/long_run_ghg_concentration + - data://garden/epa/2024-04-17/ice_sheet_mass_balance + - data://garden/epa/2024-04-17/mass_balance_us_glaciers + - data://garden/climate/2024-07-23/sea_ice_index + - data://garden/climate/2024-07-23/ghg_concentration + - data://garden/climate/2024-07-23/ocean_ph_levels + - data://garden/climate/2024-07-23/sea_surface_temperature + - data://garden/climate/2024-07-23/ocean_heat_content + # + # Various sources - Climate change impacts (monthly). + # + data://grapher/climate/2024-07-23/climate_change_impacts_monthly: + - data://garden/climate/2024-07-23/climate_change_impacts + # + # Various sources - Climate change impacts (annual). + # + data://grapher/climate/2024-07-23/climate_change_impacts_annual: + - data://garden/climate/2024-07-23/climate_change_impacts diff --git a/dag/climate.yml b/dag/climate.yml index c4e17e14a03..ba4b5e141ea 100644 --- a/dag/climate.yml +++ b/dag/climate.yml @@ -54,7 +54,7 @@ steps: # Climate change impacts data explorer. # data://explorers/climate/latest/climate_change_impacts: - - data://garden/climate/2024-07-23/climate_change_impacts + - data://garden/climate/2024-09-30/climate_change_impacts # # Global Wildfire Information System - Monthly burned area. # @@ -122,51 +122,6 @@ steps: data://garden/epa/2024-04-17/mass_balance_us_glaciers: - data://meadow/epa/2024-04-17/mass_balance_us_glaciers # - # School of Ocean and Earth Science and Technology - Hawaii Ocean Time-series (HOT). - # - data://meadow/climate/2024-05-20/hawaii_ocean_time_series: - - snapshot://climate/2024-05-20/hawaii_ocean_time_series.csv - # - # NOAA National Centers for Environmental Information - Ocean Heat Content. - # - data://meadow/climate/2024-05-20/ocean_heat_content: - - snapshot://climate/2024-05-20/ocean_heat_content_annual_world_2000m.csv - - snapshot://climate/2024-05-20/ocean_heat_content_monthly_world_700m.csv - - snapshot://climate/2024-05-20/ocean_heat_content_monthly_world_2000m.csv - - snapshot://climate/2024-05-20/ocean_heat_content_annual_world_700m.csv - # - # GISS - Surface temperature analysis. - # - data://meadow/climate/2024-05-20/surface_temperature_analysis: - - snapshot://climate/2024-05-20/surface_temperature_analysis_northern_hemisphere.csv - - snapshot://climate/2024-05-20/surface_temperature_analysis_world.csv - - snapshot://climate/2024-05-20/surface_temperature_analysis_southern_hemisphere.csv - # - # Rutgers University Global Snow Lab - Snow Cover Extent. - # - data://meadow/climate/2024-05-20/snow_cover_extent: - - snapshot://climate/2024-05-20/snow_cover_extent_north_america.csv - - snapshot://climate/2024-05-20/snow_cover_extent_northern_hemisphere.csv - # - # Met Office Hadley Centre - Sea surface temperature. - # - data://meadow/climate/2024-05-20/sea_surface_temperature: - - snapshot://climate/2024-05-20/sea_surface_temperature_northern_hemisphere.csv - - snapshot://climate/2024-05-20/sea_surface_temperature_southern_hemisphere.csv - - snapshot://climate/2024-05-20/sea_surface_temperature_world.csv - # - # NOAA Global Monitoring Laboratory - GHG concentration. - # - data://meadow/climate/2024-05-20/ghg_concentration: - - snapshot://climate/2024-05-20/ch4_concentration_monthly.csv - - snapshot://climate/2024-05-20/co2_concentration_monthly.csv - - snapshot://climate/2024-05-20/n2o_concentration_monthly.csv - # - # NSIDC - Arctic sea ice extent. - # - data://meadow/climate/2024-05-20/sea_ice_index: - - snapshot://climate/2024-05-20/sea_ice_index.xlsx - # # Met Office Hadley Centre - Near surface temperature anomaly. # data://meadow/met_office_hadley_centre/2024-05-20/near_surface_temperature: @@ -184,129 +139,129 @@ steps: data://grapher/met_office_hadley_centre/2024-05-20/near_surface_temperature: - data://garden/met_office_hadley_centre/2024-05-20/near_surface_temperature # - # NOAA Global Monitoring Laboratory - GHG concentration. - # - data://meadow/climate/2024-07-23/ghg_concentration: - - snapshot://climate/2024-07-23/ch4_concentration_monthly.csv - - snapshot://climate/2024-07-23/n2o_concentration_monthly.csv - - snapshot://climate/2024-07-23/co2_concentration_monthly.csv - # # Met Office Hadley Centre - Sea surface temperature. # - data://meadow/climate/2024-07-23/sea_surface_temperature: - - snapshot://climate/2024-07-23/sea_surface_temperature_southern_hemisphere.csv - - snapshot://climate/2024-07-23/sea_surface_temperature_world.csv - - snapshot://climate/2024-07-23/sea_surface_temperature_northern_hemisphere.csv + data://meadow/climate/2024-09-30/sea_surface_temperature: + - snapshot://climate/2024-09-30/sea_surface_temperature_world.csv + - snapshot://climate/2024-09-30/sea_surface_temperature_northern_hemisphere.csv + - snapshot://climate/2024-09-30/sea_surface_temperature_southern_hemisphere.csv # - # GISS - Surface temperature analysis. + # NOAA National Centers for Environmental Information - Ocean Heat Content. # - data://meadow/climate/2024-07-23/surface_temperature_analysis: - - snapshot://climate/2024-07-23/surface_temperature_analysis_northern_hemisphere.csv - - snapshot://climate/2024-07-23/surface_temperature_analysis_southern_hemisphere.csv - - snapshot://climate/2024-07-23/surface_temperature_analysis_world.csv + data://meadow/climate/2024-09-30/ocean_heat_content: + - snapshot://climate/2024-07-23/ocean_heat_content_monthly_world_700m.csv + - snapshot://climate/2024-07-23/ocean_heat_content_monthly_world_2000m.csv + - snapshot://climate/2024-07-23/ocean_heat_content_annual_world_700m.csv + - snapshot://climate/2024-07-23/ocean_heat_content_annual_world_2000m.csv # # NSIDC - Arctic sea ice extent. # - data://meadow/climate/2024-07-23/sea_ice_index: - - snapshot://climate/2024-07-23/sea_ice_index.xlsx + data://meadow/climate/2024-09-30/sea_ice_index: + - snapshot://climate/2024-09-30/sea_ice_index.xlsx # # Rutgers University Global Snow Lab - Snow Cover Extent. # - data://meadow/climate/2024-07-23/snow_cover_extent: - - snapshot://climate/2024-07-23/snow_cover_extent_north_america.csv - - snapshot://climate/2024-07-23/snow_cover_extent_northern_hemisphere.csv + data://meadow/climate/2024-09-30/snow_cover_extent: + - snapshot://climate/2024-09-30/snow_cover_extent_north_america.csv + - snapshot://climate/2024-09-30/snow_cover_extent_northern_hemisphere.csv # - # School of Ocean and Earth Science and Technology - Hawaii Ocean Time-series (HOT). + # GISS - Surface temperature analysis. # - data://meadow/climate/2024-07-23/hawaii_ocean_time_series: - - snapshot://climate/2024-07-23/hawaii_ocean_time_series.csv + data://meadow/climate/2024-09-30/surface_temperature_analysis: + - snapshot://climate/2024-07-23/surface_temperature_analysis_world.csv + - snapshot://climate/2024-07-23/surface_temperature_analysis_southern_hemisphere.csv + - snapshot://climate/2024-07-23/surface_temperature_analysis_northern_hemisphere.csv # - # NOAA National Centers for Environmental Information - Ocean Heat Content. + # School of Ocean and Earth Science and Technology - Hawaii Ocean Time-series (HOT). # - data://meadow/climate/2024-07-23/ocean_heat_content: - - snapshot://climate/2024-07-23/ocean_heat_content_monthly_world_2000m.csv - - snapshot://climate/2024-07-23/ocean_heat_content_annual_world_700m.csv - - snapshot://climate/2024-07-23/ocean_heat_content_annual_world_2000m.csv - - snapshot://climate/2024-07-23/ocean_heat_content_monthly_world_700m.csv + data://meadow/climate/2024-09-30/hawaii_ocean_time_series: + - snapshot://climate/2024-09-30/hawaii_ocean_time_series.csv # # NOAA Global Monitoring Laboratory - GHG concentration. # - data://garden/climate/2024-07-23/ghg_concentration: - - data://meadow/climate/2024-07-23/ghg_concentration + data://meadow/climate/2024-09-30/ghg_concentration: + - snapshot://climate/2024-09-30/n2o_concentration_monthly.csv + - snapshot://climate/2024-09-30/co2_concentration_monthly.csv + - snapshot://climate/2024-09-30/ch4_concentration_monthly.csv # # Met Office Hadley Centre - Sea surface temperature. # - data://garden/climate/2024-07-23/sea_surface_temperature: - - data://meadow/climate/2024-07-23/sea_surface_temperature + data://garden/climate/2024-09-30/sea_surface_temperature: + - data://meadow/climate/2024-09-30/sea_surface_temperature # - # GISS - Surface temperature analysis. + # NOAA National Centers for Environmental Information - Ocean Heat Content. # - data://garden/climate/2024-07-23/surface_temperature_analysis: - - data://meadow/climate/2024-07-23/surface_temperature_analysis + data://garden/climate/2024-09-30/ocean_heat_content: + - data://meadow/climate/2024-09-30/ocean_heat_content # # NSIDC - Arctic sea ice extent. # - data://garden/climate/2024-07-23/sea_ice_index: - - data://meadow/climate/2024-07-23/sea_ice_index + data://garden/climate/2024-09-30/sea_ice_index: + - data://meadow/climate/2024-09-30/sea_ice_index # # Rutgers University Global Snow Lab - Snow Cover Extent. # - data://garden/climate/2024-07-23/snow_cover_extent: - - data://meadow/climate/2024-07-23/snow_cover_extent + data://garden/climate/2024-09-30/snow_cover_extent: + - data://meadow/climate/2024-09-30/snow_cover_extent + # + # GISS - Surface temperature analysis. + # + data://garden/climate/2024-09-30/surface_temperature_analysis: + - data://meadow/climate/2024-09-30/surface_temperature_analysis # # School of Ocean and Earth Science and Technology - Hawaii Ocean Time-series (HOT). # - data://garden/climate/2024-07-23/ocean_ph_levels: - - data://meadow/climate/2024-07-23/hawaii_ocean_time_series + data://garden/climate/2024-09-30/ocean_ph_levels: + - data://meadow/climate/2024-09-30/hawaii_ocean_time_series # - # NOAA National Centers for Environmental Information - Ocean Heat Content. + # NOAA Global Monitoring Laboratory - GHG concentration. # - data://garden/climate/2024-07-23/ocean_heat_content: - - data://meadow/climate/2024-07-23/ocean_heat_content + data://garden/climate/2024-09-30/ghg_concentration: + - data://meadow/climate/2024-09-30/ghg_concentration # - # Various sources - Long-run greenhouse gas concentration. + # NSIDC - Monthly sea ice anomalies ("country" for month names, "year" for years, one indicator per hemisphere). # - data://garden/climate/2024-07-23/long_run_ghg_concentration: - - data://garden/climate/2024-07-23/ghg_concentration - - data://garden/epa/2024-04-17/ghg_concentration + data://grapher/climate/2024-09-30/sea_ice_anomalies_by_month: + - data://garden/climate/2024-09-30/sea_ice_index # - # Various sources - Climate change impacts. + # NSIDC - Monthly sea ice extent ("country" for years, "year" for month number, one indicator per hemisphere). # - data://garden/climate/2024-07-23/climate_change_impacts: - - data://garden/climate/2024-07-23/surface_temperature_analysis - - data://garden/climate/2024-07-23/ocean_ph_levels - - data://garden/climate/2024-07-23/sea_ice_index - - data://garden/climate/2024-07-23/long_run_ghg_concentration - - data://garden/climate/2024-07-23/snow_cover_extent - - data://garden/climate/2024-07-23/sea_surface_temperature - - data://garden/climate/2024-01-28/global_sea_level - - data://garden/epa/2024-04-17/mass_balance_us_glaciers - - data://garden/epa/2024-04-17/ice_sheet_mass_balance - - data://garden/climate/2024-07-23/ghg_concentration - - data://garden/climate/2024-07-23/ocean_heat_content - - data://garden/epa/2024-04-17/ocean_heat_content + data://grapher/climate/2024-09-30/sea_ice_extent_by_year: + - data://garden/climate/2024-09-30/sea_ice_index # - # Various sources - Climate change impacts (annual). + # NSIDC - Monthly sea ice extent ("country" for decades and latest year, "year" for month number, one indicator per hemisphere). # - data://grapher/climate/2024-07-23/climate_change_impacts_annual: - - data://garden/climate/2024-07-23/climate_change_impacts + data://grapher/climate/2024-09-30/sea_ice_extent_by_decade: + - data://garden/climate/2024-09-30/sea_ice_index # - # Various sources - Climate change impacts (monthly). + # Various sources - Long-run greenhouse gas concentration. # - data://grapher/climate/2024-07-23/climate_change_impacts_monthly: - - data://garden/climate/2024-07-23/climate_change_impacts + data://garden/climate/2024-09-30/long_run_ghg_concentration: + - data://garden/epa/2024-04-17/ghg_concentration + - data://garden/climate/2024-09-30/ghg_concentration # - # NSIDC - Monthly sea ice anomalies ("country" for month names, "year" for years, one indicator per hemisphere). + # Various sources - Climate change impacts. # - data://grapher/climate/2024-07-23/sea_ice_anomalies_by_month: - - data://garden/climate/2024-07-23/sea_ice_index + data://garden/climate/2024-09-30/climate_change_impacts: + - data://garden/epa/2024-04-17/ocean_heat_content + - data://garden/climate/2024-09-30/long_run_ghg_concentration + - data://garden/epa/2024-04-17/ice_sheet_mass_balance + - data://garden/climate/2024-09-30/sea_surface_temperature + - data://garden/epa/2024-04-17/mass_balance_us_glaciers + - data://garden/climate/2024-01-28/global_sea_level + - data://garden/climate/2024-09-30/ocean_heat_content + - data://garden/climate/2024-09-30/ocean_ph_levels + - data://garden/climate/2024-09-30/snow_cover_extent + - data://garden/climate/2024-09-30/surface_temperature_analysis + - data://garden/climate/2024-09-30/sea_ice_index + - data://garden/climate/2024-09-30/ghg_concentration # - # NSIDC - Monthly sea ice extent ("country" for years, "year" for month number, one indicator per hemisphere). + # Various sources - Climate change impacts (annual). # - data://grapher/climate/2024-07-23/sea_ice_extent_by_year: - - data://garden/climate/2024-07-23/sea_ice_index + data://grapher/climate/2024-09-30/climate_change_impacts_annual: + - data://garden/climate/2024-09-30/climate_change_impacts # - # NSIDC - Monthly sea ice extent ("country" for decades and latest year, "year" for month number, one indicator per hemisphere). + # Various sources - Climate change impacts (monthly). # - data://grapher/climate/2024-07-23/sea_ice_extent_by_decade: - - data://garden/climate/2024-07-23/sea_ice_index + data://grapher/climate/2024-09-30/climate_change_impacts_monthly: + - data://garden/climate/2024-09-30/climate_change_impacts diff --git a/etl/steps/data/garden/climate/2024-09-30/climate_change_impacts.meta.yml b/etl/steps/data/garden/climate/2024-09-30/climate_change_impacts.meta.yml new file mode 100644 index 00000000000..2a5bbd540b2 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/climate_change_impacts.meta.yml @@ -0,0 +1,24 @@ +definitions: + common: + presentation: + topic_tags: + - Climate Change + +dataset: + title: Climate Change Impacts + update_period_days: 60 + +tables: + climate_change_impacts_annual: + title: Climate Change Impacts - Annual + variables: + arctic_sea_ice_extent_min: + title: Minimum Arctic sea ice extent + arctic_sea_ice_extent_max: + title: Maximum Arctic sea ice extent + antarctic_sea_ice_extent_min: + title: Minimum Antarctic sea ice extent + antarctic_sea_ice_extent_max: + title: Maximum Antarctic sea ice extent + climate_change_impacts_monthly: + title: Climate Change Impacts - Monthly diff --git a/etl/steps/data/garden/climate/2024-09-30/climate_change_impacts.py b/etl/steps/data/garden/climate/2024-09-30/climate_change_impacts.py new file mode 100644 index 00000000000..38f00ffd808 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/climate_change_impacts.py @@ -0,0 +1,174 @@ +"""Create a garden dataset with all climate change impacts data. + +""" + +from owid.catalog import Table +from owid.datautils.dataframes import combine_two_overlapping_dataframes + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def prepare_sea_ice_extent(tb_nsidc: Table) -> Table: + tb_nsidc = tb_nsidc.copy() + # Create a table with the minimum and maximum Arctic sea ice extent. + # Assume minimum and maximum occur in September and February every year. + tb_nsidc["month"] = tb_nsidc["date"].astype(str).str[5:7] + tb_nsidc["year"] = tb_nsidc["date"].astype(str).str[0:4].astype(int) + arctic_sea_ice_extent = ( + tb_nsidc[(tb_nsidc["location"] == "Northern Hemisphere") & (tb_nsidc["month"].isin(["02", "09"]))] + .pivot(index=["location", "year"], columns=["month"], values="sea_ice_extent", join_column_levels_with=" ") + .rename(columns={"02": "arctic_sea_ice_extent_max", "09": "arctic_sea_ice_extent_min"}, errors="raise") + ) + # Instead of calling the location a generic "Northern Hemisphere", call it "Arctic Ocean". + arctic_sea_ice_extent["location"] = "Arctic Ocean" + + # Idem for the Antarctic sea ice extent. + # Assume maximum and minimum occur in September and February every year. + antarctic_sea_ice_extent = ( + tb_nsidc[(tb_nsidc["location"] == "Southern Hemisphere") & (tb_nsidc["month"].isin(["02", "09"]))] + .pivot(index=["location", "year"], columns=["month"], values="sea_ice_extent", join_column_levels_with=" ") + .rename(columns={"02": "antarctic_sea_ice_extent_min", "09": "antarctic_sea_ice_extent_max"}, errors="raise") + ) + # Instead of calling the location a generic "Southern Hemisphere", call it "Antarctica". + antarctic_sea_ice_extent["location"] = "Antarctica" + + return arctic_sea_ice_extent, antarctic_sea_ice_extent + + +def prepare_ocean_heat_content(tb_ocean_heat_annual: Table, tb_ocean_heat_annual_epa: Table) -> Table: + # Combine NOAA's annual data on ocean heat content (which is more up-to-date) with the analogous EPA's data based on + # NOAA (which, for some reason, spans a longer time range for 2000m). Prioritize NOAA's data on common years. + tb_ocean_heat_annual = combine_two_overlapping_dataframes( + tb_ocean_heat_annual.rename( + columns={ + "ocean_heat_content_700m": "ocean_heat_content_noaa_700m", + "ocean_heat_content_2000m": "ocean_heat_content_noaa_2000m", + }, + errors="raise", + ), + tb_ocean_heat_annual_epa, + index_columns=["location", "year"], + ) + # Recover the original indicator titles (they are empty because of combining two columns with different titles). + tb_ocean_heat_annual["ocean_heat_content_noaa_700m"].metadata.title = tb_ocean_heat_annual_epa[ + "ocean_heat_content_noaa_700m" + ].metadata.title + tb_ocean_heat_annual["ocean_heat_content_noaa_2000m"].metadata.title = tb_ocean_heat_annual_epa[ + "ocean_heat_content_noaa_2000m" + ].metadata.title + + return tb_ocean_heat_annual + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load GISS dataset surface temperature analysis, and read monthly data. + ds_giss = paths.load_dataset("surface_temperature_analysis") + tb_giss = ds_giss["surface_temperature_analysis"].reset_index() + + # Load NSIDC dataset of sea ice index. + ds_nsidc = paths.load_dataset("sea_ice_index") + tb_nsidc = ds_nsidc["sea_ice_index"].reset_index() + + # Load Met Office dataset on sea surface temperature. + ds_met_office = paths.load_dataset("sea_surface_temperature") + tb_met_office = ds_met_office["sea_surface_temperature"].reset_index() + + # Load NOAA/NCIE dataset on ocean heat content. + ds_ocean_heat = paths.load_dataset("ocean_heat_content", namespace="climate") + tb_ocean_heat_monthly = ds_ocean_heat["ocean_heat_content_monthly"].reset_index() + tb_ocean_heat_annual = ds_ocean_heat["ocean_heat_content_annual"].reset_index() + + # Load EPA's compilation of data on ocean heat content. + ds_epa = paths.load_dataset("ocean_heat_content", namespace="epa") + tb_ocean_heat_annual_epa = ds_epa["ocean_heat_content"].reset_index() + + # Load ocean pH data from the School of Ocean and Earth Science and Technology. + ds_ocean_ph = paths.load_dataset("ocean_ph_levels") + tb_ocean_ph = ds_ocean_ph["ocean_ph_levels"].reset_index() + + # Load snow cover extent from Rutgers University Global Snow Lab. + ds_snow = paths.load_dataset("snow_cover_extent") + tb_snow = ds_snow["snow_cover_extent"].reset_index() + + # Load ice sheet mass balance data from EPA. + ds_ice_sheet = paths.load_dataset("ice_sheet_mass_balance") + tb_ice_sheet = ds_ice_sheet["ice_sheet_mass_balance"].reset_index() + + # Load annual data on mass balance of US glaciers from EPA. + ds_us_glaciers = paths.load_dataset("mass_balance_us_glaciers") + tb_us_glaciers = ds_us_glaciers["mass_balance_us_glaciers"].reset_index() + + # Load monthly greenhouse gas concentration data from NOAA/GML. + ds_gml = paths.load_dataset("ghg_concentration") + tb_gml = ds_gml["ghg_concentration"].reset_index() + + # Load long-run yearly greenhouse gas concentration data. + ds_ghg = paths.load_dataset("long_run_ghg_concentration") + tb_ghg = ds_ghg["long_run_ghg_concentration"].reset_index() + + # Load global sea level. + ds_sea_level = paths.load_dataset("global_sea_level") + tb_sea_level = ds_sea_level["global_sea_level"].reset_index() + + # + # Process data. + # + # Prepare sea ice extent data. + arctic_sea_ice_extent, antarctic_sea_ice_extent = prepare_sea_ice_extent(tb_nsidc=tb_nsidc) + + # Prepare ocean heat content data. + tb_ocean_heat_annual = prepare_ocean_heat_content( + tb_ocean_heat_annual=tb_ocean_heat_annual, tb_ocean_heat_annual_epa=tb_ocean_heat_annual_epa + ) + + # Gather monthly data from different tables. + tb_monthly = tb_giss.astype({"date": str}).copy() + # NOTE: The values in tb_ocean_ph are monthly, but the dates are not consistently on the middle of the month. + # Instead, they are on different days of the month. When merging with other tables, this will create many nans. + # We could reindex linearly, but it's not a big deal. + for table in [ + tb_nsidc, + tb_met_office, + tb_ocean_heat_monthly, + tb_ocean_ph, + tb_snow, + tb_ice_sheet, + tb_gml, + tb_sea_level, + ]: + tb_monthly = tb_monthly.merge( + table.astype({"date": str}), + how="outer", + on=["location", "date"], + validate="one_to_one", + short_name="climate_change_impacts_monthly", + ) + + # Gather annual data from different tables. + tb_annual = tb_ocean_heat_annual.copy() + for table in [arctic_sea_ice_extent, antarctic_sea_ice_extent, tb_ghg, tb_us_glaciers.astype({"year": int})]: + tb_annual = tb_annual.merge( + table, + how="outer", + on=["location", "year"], + validate="one_to_one", + short_name="climate_change_impacts_annual", + ) + tb_annual.metadata.short_name = "climate_change_impacts_annual" + + # Set an appropriate index to monthly and annual tables, and sort conveniently. + tb_monthly = tb_monthly.set_index(["location", "date"], verify_integrity=True).sort_index() + tb_annual = tb_annual.set_index(["location", "year"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create explorer dataset with combined table in csv format. + ds_explorer = create_dataset(dest_dir, tables=[tb_annual, tb_monthly]) + ds_explorer.save() diff --git a/etl/steps/data/garden/climate/2024-09-30/ghg_concentration.meta.yml b/etl/steps/data/garden/climate/2024-09-30/ghg_concentration.meta.yml new file mode 100644 index 00000000000..ca5e6073998 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/ghg_concentration.meta.yml @@ -0,0 +1,44 @@ +definitions: + common: + presentation: + topic_tags: + - Climate Change + description_short: Measured in parts per million. + +dataset: + update_period_days: 60 + +tables: + ghg_concentration: + title: Monthly greenhouse gas concentration + variables: + co2_concentration: + title: Monthly concentration of atmospheric carbon dioxide + processing_level: minor + unit: parts per million + short_unit: ppm + ch4_concentration: + title: Monthly concentration of atmospheric methane + processing_level: minor + unit: parts per billion + short_unit: ppb + n2o_concentration: + title: Monthly concentration of atmospheric nitrous oxide + processing_level: minor + unit: parts per billion + short_unit: ppb + co2_concentration_yearly_average: + title: Rolling yearly average of the concentration of atmospheric carbon dioxide + processing_level: major + unit: parts per million + short_unit: ppm + ch4_concentration_yearly_average: + title: Rolling yearly average of the concentration of atmospheric methane + processing_level: major + unit: parts per billion + short_unit: ppb + n2o_concentration_yearly_average: + title: Rolling yearly average of the concentration of atmospheric nitrous oxide + processing_level: major + unit: parts per billion + short_unit: ppb diff --git a/etl/steps/data/garden/climate/2024-09-30/ghg_concentration.py b/etl/steps/data/garden/climate/2024-09-30/ghg_concentration.py new file mode 100644 index 00000000000..914ee6e8776 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/ghg_concentration.py @@ -0,0 +1,139 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from typing import List + +import pandas as pd +from owid.catalog import Table + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Columns to select from the data, and how to rename them. +COLUMNS = { + "year": "year", + "month": "month", + "average": "concentration", + # The following column is loaded only to perform a sanity check. + "decimal": "decimal", +} + + +def add_rolling_average(tb: Table, original_column_names: List[str]) -> Table: + tb_with_average = tb.copy() + + # Create a date range of each month (on the 15th). + # NOTE: The minimum date in the data is "2001-01-15", however, when passing this date to pd.date_range with + # freq="MS", the first point is dismissed because it is not the start of a month. For that reason, we shift the + # first point to be at the beginning of the month. + date_range = pd.date_range( + start=tb_with_average["date"].min() - pd.tseries.offsets.MonthBegin(1), + end=tb_with_average["date"].max(), + freq="MS", + ) + pd.DateOffset(days=14) + + # Get unique locations. + unique_locations = tb_with_average["location"].unique() + + # Set date as index and sort. + tb_with_average = tb_with_average.set_index(["location", "date"]).sort_index() + + # Create a MultiIndex with all possible combinations of date and location. + multi_index = pd.MultiIndex.from_product([unique_locations, date_range], names=["location", "date"]) + + # Reindex using the MultiIndex. + tb_with_average = tb_with_average.reindex(multi_index) + + for original_column_name in original_column_names: + # Create a rolling average with a window of one year, linearly interpolating missing values. + # NOTE: Currently no interpolation is needed, as no data points are missing (and in fact date_range is identical + # to the dates in the data). However, we need to interpolate in case there are missing points. Otherwise all + # points after the missing one will be nan. + tb_with_average[f"{original_column_name}_yearly_average"] = ( + tb_with_average[original_column_name].interpolate("linear").rolling(12).mean() + ) + + # Drop empty rows. + tb_with_average = tb_with_average.dropna(subset=original_column_names, how="all").reset_index() + + # Sort conveniently. + tb_with_average = tb_with_average.sort_values(["location", "date"]).reset_index(drop=True) + + for original_column_name in original_column_names: + # Check that the values of the original column have not been altered. + error = f"The values of the original {original_column_name} column have been altered." + assert tb_with_average[original_column_name].astype(float).equals(tb[original_column_name].astype(float)), error + + return tb_with_average + + +def prepare_gas_data(tb: Table) -> Table: + tb = tb.copy() + + # Extract gas name from table's short name. + gas = tb.metadata.short_name.split("_")[0] + + # Columns to select from the data, and how to rename them. + columns = { + "year": "year", + "month": "month", + "average": f"{gas}_concentration", + # The following column is loaded only to perform a sanity check. + "decimal": "decimal", + } + + # Select necessary columns and rename them. + tb = tb[list(columns)].rename(columns=columns, errors="raise") + + # There is a "decimal" column for the year as a decimal number, that only has 12 possible values, corresponding to + # the middle of each month, so we will assume the 15th of each month. + error = "Date format has changed." + assert len(set(tb["decimal"].astype(str).str.split(".").str[1])) == 12, error + assert set(tb["month"]) == set(range(1, 13)), error + tb["date"] = pd.to_datetime(tb[["year", "month"]].assign(day=15)) + + # Remove unnecessary columns. + tb = tb.drop(columns=["year", "month", "decimal"], errors="raise") + + # Add a location column. + tb["location"] = "World" + + # Add a column with a rolling average for each gas. + tb = add_rolling_average(tb=tb, original_column_names=[f"{gas}_concentration"]) + + return tb + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset and read its main table. + ds_meadow = paths.load_dataset("ghg_concentration") + tb_co2 = ds_meadow["co2_concentration_monthly"].reset_index() + tb_ch4 = ds_meadow["ch4_concentration_monthly"].reset_index() + tb_n2o = ds_meadow["n2o_concentration_monthly"].reset_index() + + # + # Process data. + # + # Prepare data for each gas. + tb_co2 = prepare_gas_data(tb=tb_co2) + tb_ch4 = prepare_gas_data(tb=tb_ch4) + tb_n2o = prepare_gas_data(tb=tb_n2o) + + # Combine data for different gases. + tb = tb_co2.merge(tb_ch4, how="outer", on=["location", "date"]).merge( + tb_n2o, how="outer", on=["location", "date"], short_name=paths.short_name + ) + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["location", "date"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new garden dataset. + ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_garden.save() diff --git a/etl/steps/data/garden/climate/2024-09-30/long_run_ghg_concentration.meta.yml b/etl/steps/data/garden/climate/2024-09-30/long_run_ghg_concentration.meta.yml new file mode 100644 index 00000000000..b02cba814ea --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/long_run_ghg_concentration.meta.yml @@ -0,0 +1,27 @@ +definitions: + common: + presentation: + topic_tags: + - Climate Change + - CO2 & Greenhouse Gas Emissions + description_processing: |- + - Long-run data from ice core studies has been merged with recent measurements of atmospheric concentration of greenhouse gases. + +dataset: + update_period_days: 0 + +tables: + long_run_ghg_concentration: + variables: + co2_concentration: + title: Long-run CO₂ concentration + unit: parts per million volume + short_unit: ppmv + ch4_concentration: + title: Long-run CH₄ concentration + unit: parts per billion volume + short_unit: ppbv + n2o_concentration: + title: Long-run N₂O concentration + unit: parts per billion volume + short_unit: ppbv diff --git a/etl/steps/data/garden/climate/2024-09-30/long_run_ghg_concentration.py b/etl/steps/data/garden/climate/2024-09-30/long_run_ghg_concentration.py new file mode 100644 index 00000000000..0e07095b425 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/long_run_ghg_concentration.py @@ -0,0 +1,84 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from owid.catalog import Table +from owid.datautils.dataframes import combine_two_overlapping_dataframes + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def convert_monthly_to_annual(tb_new: Table) -> Table: + tb_new = tb_new.copy() + + # Create a year column. + tb_new["year"] = tb_new["date"].dt.year + + # Create a table with the number of observations per year. + tb_counts = tb_new.groupby("year", as_index=False).agg( + { + "co2_concentration": "count", + "ch4_concentration": "count", + "n2o_concentration": "count", + } + ) + # Create a table with the average annual values. + tb_new = tb_new.groupby("year", as_index=False).agg( + { + "co2_concentration": "mean", + "ch4_concentration": "mean", + "n2o_concentration": "mean", + } + ) + # Make nan all data points based on less than 12 observations per year. + for gas in ["co2", "ch4", "n2o"]: + tb_new.loc[tb_counts[f"{gas}_concentration"] < 12, f"{gas}_concentration"] = None + + # Drop empty rows. + tb_new = tb_new.dropna( + subset=["co2_concentration", "ch4_concentration", "n2o_concentration"], how="all" + ).reset_index(drop=True) + + return tb_new + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset on long-run GHG concentrations from EPA, and read its main table. + ds_old = paths.load_dataset("ghg_concentration", namespace="epa") + tb_old = ds_old["ghg_concentration"].reset_index() + + # Load garden dataset of up-to-date GHG concentrations, and read its main table. + ds_new = paths.load_dataset("ghg_concentration", namespace="climate") + tb_new = ds_new["ghg_concentration"].reset_index() + + # + # Process data. + # + # Select columns. + tb_new = tb_new[["date", "co2_concentration", "ch4_concentration", "n2o_concentration"]].copy() + + # Calculate average annual values. + tb_new = convert_monthly_to_annual(tb_new=tb_new) + + # Combine old and new data, prioritizing the latter. + tb = combine_two_overlapping_dataframes(df1=tb_new, df2=tb_old, index_columns=["year"]) + + # Rename table. + tb.metadata.short_name = paths.short_name + + # Add location column. + tb["location"] = "World" + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["location", "year"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new garden dataset. + ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_garden.save() diff --git a/etl/steps/data/garden/climate/2024-09-30/ocean_heat_content.meta.yml b/etl/steps/data/garden/climate/2024-09-30/ocean_heat_content.meta.yml new file mode 100644 index 00000000000..c7f6fb474ea --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/ocean_heat_content.meta.yml @@ -0,0 +1,29 @@ +definitions: + common: + presentation: + topic_tags: + - Climate Change + processing_level: minor + description_short: Measured in 10²² Joules. + unit: 10²² Joules + short_unit: 10²² J + +dataset: + title: Ocean Heat Content + update_period_days: 60 + +tables: + ocean_heat_content_monthly: + title: Ocean Heat Content - Monthly average + variables: + ocean_heat_content_700m: + title: Monthly average ocean heat content for the 0-700 meters layer + ocean_heat_content_2000m: + title: Monthly average ocean heat content for the 0-2000 meters layer + ocean_heat_content_annual: + title: Ocean Heat Content - Annual average + variables: + ocean_heat_content_700m: + title: Annual average ocean heat content for the 0-700 meters layer + ocean_heat_content_2000m: + title: Annual average ocean heat content for the 0-2000 meters layer diff --git a/etl/steps/data/garden/climate/2024-09-30/ocean_heat_content.py b/etl/steps/data/garden/climate/2024-09-30/ocean_heat_content.py new file mode 100644 index 00000000000..dcbafe0d14c --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/ocean_heat_content.py @@ -0,0 +1,45 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset and read its tables. + ds_meadow = paths.load_dataset("ocean_heat_content") + tb_monthly = ds_meadow["ocean_heat_content_monthly"].reset_index() + tb_annual = ds_meadow["ocean_heat_content_annual"].reset_index() + + # + # Process data. + # + # Improve the format of the date column in monthly date (assume the middle of the month for each data point). + tb_monthly["date"] = ( + tb_monthly["date"].str.split("-").str[0] + "-" + tb_monthly["date"].str.split("-").str[1].str.zfill(2) + "-15" + ) + + # Replace date column (where all years are given as, e.g. 1955.5, 2000.5) by year column in annual data. + tb_annual["year"] = tb_annual["date"].astype(int) + tb_annual = tb_annual.drop(columns=["date"], errors="raise") + + # Instead of having a column for depth, create columns of heat content for each depth. + tb_monthly["depth"] = tb_monthly["depth"].astype(str) + "m" + tb_monthly = tb_monthly.pivot(index=["location", "date"], columns="depth", join_column_levels_with="_") + tb_annual["depth"] = tb_annual["depth"].astype(str) + "m" + tb_annual = tb_annual.pivot(index=["location", "year"], columns="depth", join_column_levels_with="_") + + # Set an appropriate index to each table and sort conveniently. + tb_monthly = tb_monthly.set_index(["location", "date"], verify_integrity=True).sort_index() + tb_annual = tb_annual.set_index(["location", "year"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new garden dataset. + ds_garden = create_dataset(dest_dir, tables=[tb_annual, tb_monthly], check_variables_metadata=True) + ds_garden.save() diff --git a/etl/steps/data/garden/climate/2024-09-30/ocean_ph_levels.meta.yml b/etl/steps/data/garden/climate/2024-09-30/ocean_ph_levels.meta.yml new file mode 100644 index 00000000000..d9364bd3280 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/ocean_ph_levels.meta.yml @@ -0,0 +1,22 @@ +definitions: + common: + presentation: + topic_tags: + - Climate Change + unit: pH + short_unit: pH + +dataset: + title: Ocean pH Levels + update_period_days: 60 + +tables: + ocean_ph_levels: + title: Ocean pH levels + variables: + ocean_ph: + title: Monthly measurement of ocean pH levels + processing_level: minor + ocean_ph_yearly_average: + title: Rolling yearly average of ocean pH levels + processing_level: major diff --git a/etl/steps/data/garden/climate/2024-09-30/ocean_ph_levels.py b/etl/steps/data/garden/climate/2024-09-30/ocean_ph_levels.py new file mode 100644 index 00000000000..204ec6bc0c5 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/ocean_ph_levels.py @@ -0,0 +1,82 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import pandas as pd +from owid.catalog import Table + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Columns to select from the data, and how to rename them. +COLUMNS = { + "date": "date", + "phcalc_insitu": "ocean_ph", +} + + +def add_rolling_average(tb: Table) -> Table: + tb_with_average = tb.copy() + + # Set date as index and sort. + tb_with_average = tb_with_average.set_index("date").sort_index() + + # Since values are given at different days of the month, reindex to have a value for each day. + tb_with_average = tb_with_average.reindex( + pd.date_range(start=tb_with_average.index.min(), end=tb_with_average.index.max(), freq="1D") + ) + + # Create a rolling average with a window of one year, linearly interpolating missing values. + tb_with_average["ocean_ph_yearly_average"] = ( + tb_with_average["ocean_ph"].interpolate(method="time").rolling(365).mean() + ) + + # Drop empty rows. + tb_with_average = ( + tb_with_average.dropna(subset=["ocean_ph"]).reset_index().rename(columns={"index": "date"}, errors="raise") + ) + + # Check that the values of the original ocean ph column have not been altered. + error = "The values of the original ocean_ph column have been altered." + assert tb_with_average["ocean_ph"].equals( + tb.dropna(subset=["ocean_ph"]).sort_values("date").reset_index(drop=True)["ocean_ph"] + ), error + + return tb_with_average + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset and read its tables. + ds_meadow = paths.load_dataset("hawaii_ocean_time_series") + tb_meadow = ds_meadow["hawaii_ocean_time_series"].reset_index() + + # + # Process data. + # + # Select and rename columns. + tb = tb_meadow[list(COLUMNS)].rename(columns=COLUMNS, errors="raise") + + # Add location column. + tb["location"] = "Hawaii" + + # Improve format of date column. + tb["date"] = pd.to_datetime(tb["date"], format="%d-%b-%y") + + # Add a column with a rolling average. + tb = add_rolling_average(tb=tb) + + # Set an appropriate index to each table and sort conveniently. + tb = tb.set_index(["location", "date"], verify_integrity=True).sort_index() + + # Rename table. + tb.metadata.short_name = paths.short_name + + # + # Save outputs. + # + # Create a new garden dataset. + ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_garden.save() diff --git a/etl/steps/data/garden/climate/2024-09-30/sea_ice_index.meta.yml b/etl/steps/data/garden/climate/2024-09-30/sea_ice_index.meta.yml new file mode 100644 index 00000000000..7facebf9240 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/sea_ice_index.meta.yml @@ -0,0 +1,19 @@ +definitions: + common: + presentation: + topic_tags: + - Climate Change + +dataset: + title: Sea Ice Index + update_period_days: 60 + +tables: + sea_ice_index: + variables: + sea_ice_extent: + title: Sea ice extent + # description_short: TODO + unit: million square kilometers + short_unit: million km² + processing_level: minor diff --git a/etl/steps/data/garden/climate/2024-09-30/sea_ice_index.py b/etl/steps/data/garden/climate/2024-09-30/sea_ice_index.py new file mode 100644 index 00000000000..3f8247e42b5 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/sea_ice_index.py @@ -0,0 +1,44 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import pandas as pd + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("sea_ice_index") + + # Read table from meadow dataset. + tb = ds_meadow["sea_ice_index"].reset_index() + + # + # Process data. + # + # Remove column with annual average. + tb = tb.drop(columns=["annual"]) + + # Convert table to long format. + tb = tb.melt(id_vars=["location", "year"], var_name="month", value_name="sea_ice_extent") + + # Create column of date, assuming each measurement is taken mid month. + tb["date"] = pd.to_datetime(tb["year"].astype(str) + tb["month"].str[0:3] + "15", format="%Y%b%d") + + # Drop empty rows and unnecessary columns. + tb = tb.dropna().drop(columns=["year", "month"]) + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["location", "date"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new garden dataset with the combined table. + ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_garden.save() diff --git a/etl/steps/data/garden/climate/2024-09-30/sea_surface_temperature.meta.yml b/etl/steps/data/garden/climate/2024-09-30/sea_surface_temperature.meta.yml new file mode 100644 index 00000000000..bf9ee9d13dc --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/sea_surface_temperature.meta.yml @@ -0,0 +1,29 @@ +definitions: + common: + presentation: + topic_tags: + - Climate Change + processing_level: minor + +dataset: + title: Sea surface temperature + update_period_days: 60 + +tables: + sea_surface_temperature: + variables: + sea_temperature_anomaly: + title: "Monthly sea surface temperature anomaly" + description_short: Measured in degrees Celsius. + unit: °C + short_unit: °C + sea_temperature_anomaly_low: + title: "Monthly sea surface temperature anomaly (lower bound)" + description_short: Measured in degrees Celsius. + unit: °C + short_unit: °C + sea_temperature_anomaly_high: + title: "Monthly sea surface temperature anomaly (upper bound)" + description_short: Measured in degrees Celsius. + unit: °C + short_unit: °C diff --git a/etl/steps/data/garden/climate/2024-09-30/sea_surface_temperature.py b/etl/steps/data/garden/climate/2024-09-30/sea_surface_temperature.py new file mode 100644 index 00000000000..2c2fb56098e --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/sea_surface_temperature.py @@ -0,0 +1,48 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +# Columns to select from data, and how to rename them. +COLUMNS = { + "year": "year", + "month": "month", + "location": "location", + "anomaly": "sea_temperature_anomaly", + "lower_bound_95pct_bias_uncertainty_range": "sea_temperature_anomaly_low", + "upper_bound_95pct_bias_uncertainty_range": "sea_temperature_anomaly_high", +} + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset and read its main table. + ds_meadow = paths.load_dataset("sea_surface_temperature") + tb = ds_meadow["sea_surface_temperature"].reset_index() + + # + # Process data. + # + # Select and rename columns. + tb = tb[list(COLUMNS)].rename(columns=COLUMNS, errors="raise") + + # Create a date column (assume the middle of the month for each monthly data point). + tb["date"] = tb["year"].astype(str) + "-" + tb["month"].astype(str).str.zfill(2) + "-15" + + # Remove unnecessary columns. + tb = tb.drop(columns=["year", "month"], errors="raise") + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["location", "date"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new garden dataset with the combined table. + ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_garden.save() diff --git a/etl/steps/data/garden/climate/2024-09-30/snow_cover_extent.meta.yml b/etl/steps/data/garden/climate/2024-09-30/snow_cover_extent.meta.yml new file mode 100644 index 00000000000..698ad73c63f --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/snow_cover_extent.meta.yml @@ -0,0 +1,23 @@ +definitions: + common: + presentation: + topic_tags: + - Climate Change + unit: "square kilometers" + short_unit: "km²" + description_short: Measured in square kilometers. + +dataset: + title: Snow Cover Extent + update_period_days: 60 + +tables: + snow_cover_extent: + title: Snow Cover Extent + variables: + snow_cover_extent: + title: Monthly measurement of the area covered by snow + processing_level: minor + snow_cover_extent_yearly_average: + title: Rolling yearly average of the area covered by snow + processing_level: major diff --git a/etl/steps/data/garden/climate/2024-09-30/snow_cover_extent.py b/etl/steps/data/garden/climate/2024-09-30/snow_cover_extent.py new file mode 100644 index 00000000000..618e62cce08 --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/snow_cover_extent.py @@ -0,0 +1,93 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import pandas as pd +from owid.catalog import Table + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Columns to select from the data, and how to rename them. +COLUMNS = { + "date": "date", + "phcalc_insitu": "ocean_ph", +} + + +def add_rolling_average(tb: Table, original_column_name: str) -> Table: + tb_with_average = tb.copy() + + # Create a date range. + date_range = pd.date_range(start=tb_with_average["date"].min(), end=tb_with_average["date"].max(), freq="1D") + + # Get unique locations. + unique_locations = tb_with_average["location"].unique() + + # Set date as index and sort. + tb_with_average = tb_with_average.set_index(["location", "date"]).sort_index() + + # Create a MultiIndex with all possible combinations of date and location. + multi_index = pd.MultiIndex.from_product([unique_locations, date_range], names=["location", "date"]) + + # Reindex using the MultiIndex. + tb_with_average = tb_with_average.reindex(multi_index) + + # Create a rolling average with a window of one year, linearly interpolating missing values. + tb_with_average[f"{original_column_name}_yearly_average"] = ( + tb_with_average[original_column_name].interpolate(method="linear").rolling(365).mean() + ) + + # Drop empty rows. + tb_with_average = tb_with_average.dropna(subset=[original_column_name]).reset_index() + + # Remove rolling average for the first year, given that it is based on incomplete data. + tb_with_average.loc[ + tb_with_average["date"] < tb_with_average["date"].min() + pd.Timedelta(days=365), + f"{original_column_name}_yearly_average", + ] = None + + # Sort conveniently. + tb_with_average = tb_with_average.sort_values(["location", "date"]).reset_index(drop=True) + + # Check that the values of the original column have not been altered. + error = f"The values of the original {original_column_name} column have been altered." + assert tb_with_average[original_column_name].astype(int).equals(tb[original_column_name].astype(int)), error + + return tb_with_average + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset and read its tables. + ds_meadow = paths.load_dataset("snow_cover_extent") + tb = ds_meadow["snow_cover_extent"].reset_index() + + # + # Process data. + # + # Create a date column. + # NOTE: Assign the middle of the month. + tb["date"] = pd.to_datetime(tb[["year", "month"]].assign(day=15)) + tb = tb.drop(columns=["year", "month"], errors="raise") + + # Data starts in 1966, but, as mentioned on their website + # https://climate.rutgers.edu/snowcover/table_area.php?ui_set=1&ui_sort=0 + # there is missing data between 1968 and 1971. + # So, for simplicity, select data from 1972 onwards, where data is complete. + tb = tb[tb["date"] >= "1972-01-01"].reset_index(drop=True) + + # Add a column with a rolling average. + tb = add_rolling_average(tb=tb, original_column_name="snow_cover_extent") + + # Set an appropriate index to each table and sort conveniently. + tb = tb.set_index(["location", "date"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new garden dataset. + ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_garden.save() diff --git a/etl/steps/data/garden/climate/2024-09-30/surface_temperature_analysis.meta.yml b/etl/steps/data/garden/climate/2024-09-30/surface_temperature_analysis.meta.yml new file mode 100644 index 00000000000..eda07f5ae5a --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/surface_temperature_analysis.meta.yml @@ -0,0 +1,20 @@ +definitions: + common: + presentation: + topic_tags: + - Climate Change + +dataset: + title: GISS surface temperature analysis + update_period_days: 60 + +tables: + surface_temperature_analysis: + variables: + temperature_anomaly: + title: "Global warming: monthly temperature anomaly" + description_short: |- + Combined land-surface air and sea-surface water temperature anomaly, given as the deviation from the 1951-1980 mean, in degrees Celsius. + unit: °C + short_unit: °C + processing_level: minor diff --git a/etl/steps/data/garden/climate/2024-09-30/surface_temperature_analysis.py b/etl/steps/data/garden/climate/2024-09-30/surface_temperature_analysis.py new file mode 100644 index 00000000000..43d328abbde --- /dev/null +++ b/etl/steps/data/garden/climate/2024-09-30/surface_temperature_analysis.py @@ -0,0 +1,56 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import owid.catalog.processing as pr +import pandas as pd + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset and read its main table. + ds_meadow = paths.load_dataset("surface_temperature_analysis") + tb = ds_meadow["surface_temperature_analysis_world"] + + # + # Process data. + # + # Initialize dictionary to store processed tables. + tables = {} + for table_name in ds_meadow.table_names: + # Read table. + tb = ds_meadow[table_name].reset_index() + # Get location from table name. + location = table_name.split("surface_temperature_analysis_")[-1].replace("_", " ").title() + # Add column for location. + tb["location"] = location + # Convert table to long format. + tb = tb.melt(id_vars=["year", "location"], var_name="month", value_name="temperature_anomaly") + # Create column of date, assuming each measurement is taken mid month. + tb["date"] = pd.to_datetime(tb["year"].astype(str) + tb["month"] + "15", format="%Y%b%d") + # Copy metadata from any other previous column. + tb["date"] = tb["date"].copy_metadata(tb["location"]) + # Select necessary columns. + tb = tb[["location", "date", "temperature_anomaly"]] + # Remove rows with missing values. + tb = tb.dropna(subset=["temperature_anomaly"]).reset_index(drop=True) + # Update table. + tables[location] = tb + + # Concatenate all tables. + tb = pr.concat(list(tables.values()), ignore_index=True, short_name=paths.short_name) + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["location", "date"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new garden dataset with the combined table. + ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_garden.save() diff --git a/etl/steps/data/grapher/climate/2024-09-30/climate_change_impacts_annual.py b/etl/steps/data/grapher/climate/2024-09-30/climate_change_impacts_annual.py new file mode 100644 index 00000000000..d2ce85e4a2d --- /dev/null +++ b/etl/steps/data/grapher/climate/2024-09-30/climate_change_impacts_annual.py @@ -0,0 +1,34 @@ +"""Load a garden dataset and create a grapher dataset. + +""" + + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset and read its annual table. + ds_garden = paths.load_dataset("climate_change_impacts") + tb_annual = ds_garden["climate_change_impacts_annual"].reset_index() + + # + # Process data. + # + # Create a country column (required by grapher). + tb_annual = tb_annual.rename(columns={"location": "country"}, errors="raise") + + # Set an appropriate index and sort conveniently. + tb_annual = tb_annual.set_index(["country", "year"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new grapher dataset. + ds_grapher = create_dataset(dest_dir, tables=[tb_annual], check_variables_metadata=True) + ds_grapher.save() diff --git a/etl/steps/data/grapher/climate/2024-09-30/climate_change_impacts_monthly.py b/etl/steps/data/grapher/climate/2024-09-30/climate_change_impacts_monthly.py new file mode 100644 index 00000000000..c69428bae1b --- /dev/null +++ b/etl/steps/data/grapher/climate/2024-09-30/climate_change_impacts_monthly.py @@ -0,0 +1,37 @@ +"""Load a garden dataset and create a grapher dataset. + +""" + +from etl.grapher_helpers import adapt_table_with_dates_to_grapher +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset and read its monthly table. + ds_garden = paths.load_dataset("climate_change_impacts") + tb = ds_garden["climate_change_impacts_monthly"].reset_index() + + # + # Process data. + # + # Create a country column (required by grapher). + tb = tb.rename(columns={"location": "country"}, errors="raise") + + # Adapt table with dates to grapher requirements. + tb = adapt_table_with_dates_to_grapher(tb) + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["country", "year"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new grapher dataset. + ds_grapher = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_grapher.save() diff --git a/etl/steps/data/grapher/climate/2024-09-30/sea_ice_anomalies_by_month.py b/etl/steps/data/grapher/climate/2024-09-30/sea_ice_anomalies_by_month.py new file mode 100644 index 00000000000..9f04bb5edfc --- /dev/null +++ b/etl/steps/data/grapher/climate/2024-09-30/sea_ice_anomalies_by_month.py @@ -0,0 +1,171 @@ +"""Load a garden dataset and create a grapher dataset.""" + + +from owid.catalog import Table + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Minimum year to consider. +# This is chosen because the minimum year informed is 1978 (with only 2 months informed). +# NOTE: We could include 1979. But, for consistency between yearly and decadal data, we ignore this year. +YEAR_MIN = 1980 + +# For each month's sea ice extent, subtract a certain baseline sea ice extent, calculated as an average value (for that month) between two reference years (defined above as REFERENCE_YEAR_MIN and REFERENCE_YEAR_MAX). +# NOTE: Both min and max years are included. +REFERENCE_YEAR_MIN = 1981 +REFERENCE_YEAR_MAX = 2010 + + +def improve_metadata(tb: Table) -> Table: + tb = tb.copy() + + # Rename table. + tb.metadata.title = "Sea ice anomaly in the northern and southern hemispheres" + for column in tb.drop(columns=["country", "year"]).columns: + location = column.split("sea_ice_extent_")[-1].title() + title = f"Sea ice anomaly in the {location} by month" + description_short_yearly = f"Each point represents the monthly average sea ice extent relative to a baseline, which is the average sea ice extent for the same month over the {REFERENCE_YEAR_MAX-REFERENCE_YEAR_MIN+1}-year period from {REFERENCE_YEAR_MIN} to {REFERENCE_YEAR_MAX}." + footnote = ( + "All years have data for all 12 months, except 1987 and 1988 (each missing one month) and the current year." + ) + + # Name of data column (there is only one). + tb[column].metadata.title = title + tb[column].metadata.description_short = description_short_yearly + tb[column].metadata.presentation.title_public = title + # Set color for each entity. + tb[column].metadata.presentation.grapher_config = { + "selectedEntityNames": [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ], + # "selectedEntityColors": colors, + "originUrl": "https://ourworldindata.org/climate-change", + "note": footnote, + # "hideAnnotationFieldsInTitle": {"time": True}, + "entityType": "month", + "entityTypePlural": "months", + } + + return tb + + +def sanity_check_inputs(tb: Table) -> None: + error = "Expected 1978 to be the first year in the data. Data may have changed. Consider editing YEAR_MIN" + assert tb["year"].min() == 1978, error + + # All years should have 12 months except: + # * The very first year in the data (1978). + # * Years 1987 and 1988, that have 11 months (because 1987-12 and 1988-01 are missing). + # * The very last year in the data (since it's the ongoing year). + error = "Expected 12 months per year." + assert ( + tb[~tb["year"].isin([tb["year"].min(), 1987, 1988, tb["year"].max()])] + .groupby(["location", "year"]) + .count()["sea_ice_extent"] + == 12 + ).all(), error + # Each month-year should appear only once in the data. + error = "Repeated months." + assert (tb.groupby(["location", "year", "month"]).count()["sea_ice_extent"] == 1).all(), error + # Each month-decade should appear 10 times (one per year in the decade), except: + # * The very first decade (1970s), since it starts in 1978. This decade will be ignored in the decadal data. + # * January and December 1980s, that appear 9 times (because 1987-12 and 1988-01 are missing). + # * The very last decade (since it's the ongoing decade). + error = "Expected 10 instances of each month per decade (except in specific cases)." + exceptions = tb[ + (tb["decade"] == tb["decade"].min()) + | (tb["decade"] == tb["decade"].max()) + | ((tb["decade"] == 1980) & (tb["month"].isin([1, 12]))) + ].index + assert (tb.drop(exceptions).groupby(["location", "decade", "month"]).count()["sea_ice_extent"] == 10).all(), error + assert ( + tb[(tb["decade"] == 1980) & (tb["month"].isin([1, 12]))] + .groupby(["location", "decade", "month"]) + .count()["sea_ice_extent"] + == 9 + ).all(), error + assert ( + tb[(tb["decade"] == tb["decade"].max())].groupby(["location", "decade", "month"]).count()["sea_ice_extent"] + <= 10 + ).all(), error + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("sea_ice_index") + tb = ds_garden.read_table("sea_ice_index") + + # + # Process data. + # + # Rename locations conveniently. + tb = tb.astype({"location": "string"}) + tb.loc[tb["location"] == "Northern Hemisphere", "location"] = "Arctic" + tb.loc[tb["location"] == "Southern Hemisphere", "location"] = "Antarctic" + assert set(tb["location"]) == {"Arctic", "Antarctic"}, "Unexpected locations." + + # Create columns for month, year, and decade. + tb["year"] = tb["date"].dt.year + tb["month"] = tb["date"].dt.month + tb["month_name"] = tb["date"].dt.strftime("%B") + tb["decade"] = (tb["year"] // 10) * 10 + + # Sanity checks. + sanity_check_inputs(tb=tb) + + # Select years after a certain minimum (see explanation above, where YEAR_MIN is defined) and a certain location. + tb = ( + tb[(tb["year"] >= YEAR_MIN)] + .sort_values(["year", "month"], ascending=(False, True)) + .drop(columns=["date", "month", "decade"], errors="raise") + .reset_index(drop=True) + ) + + # For each month's sea ice extent, subtract a certain baseline sea ice extent, calculated as an average value (for that month) between two reference years (defined above as REFERENCE_YEAR_MIN and REFERENCE_YEAR_MAX) + tb_reference = ( + tb[(tb["year"] >= REFERENCE_YEAR_MIN) & (tb["year"] <= REFERENCE_YEAR_MAX)] + .groupby(["location", "month_name"], as_index=False) + .agg({"sea_ice_extent": "mean"}) + .rename(columns={"sea_ice_extent": "sea_ice_extent_reference"}, errors="raise") + ) + tb = tb.merge(tb_reference, on=["location", "month_name"], how="left") + tb["sea_ice_extent"] -= tb["sea_ice_extent_reference"] + tb = tb.drop(columns=["sea_ice_extent_reference"], errors="raise") + + # Create one column for each hemisphere. + tb = tb.pivot( + index=["year", "month_name"], columns=["location"], values=["sea_ice_extent"], join_column_levels_with="_" + ).underscore() + + # Adapt column names to grapher. + tb = tb.rename(columns={"month_name": "country"}, errors="raise") + + # Improve metadata. + tb = improve_metadata(tb=tb) + + # Improve format. + tb = tb.format() + + # + # Save outputs. + # + # Create a new grapher dataset. + ds_grapher = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_grapher.save() diff --git a/etl/steps/data/grapher/climate/2024-09-30/sea_ice_extent_by_decade.py b/etl/steps/data/grapher/climate/2024-09-30/sea_ice_extent_by_decade.py new file mode 100644 index 00000000000..ebb8e2f063f --- /dev/null +++ b/etl/steps/data/grapher/climate/2024-09-30/sea_ice_extent_by_decade.py @@ -0,0 +1,203 @@ +"""Load a garden dataset and create a grapher dataset.""" + +import re + +import owid.catalog.processing as pr +from owid.catalog import Table + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Minimum year to consider. +# This is chosen because the minimum year informed is 1978 (with only 2 months informed). +# NOTE: We could include 1979. But, for consistency between yearly and decadal data, we ignore this year. +YEAR_MIN = 1980 + + +def create_yearly_table(tb: Table) -> Table: + tb_yearly = tb.copy() + + tb_yearly = tb_yearly[tb_yearly["year"] == tb_yearly["year"].max()].reset_index(drop=True) + tb_yearly = tb_yearly.drop(columns=["decade"], errors="raise").rename( + columns={"year": "country", "month": "year"}, errors="raise" + ) + + return tb_yearly + + +def create_decadal_table(tb: Table) -> Table: + tb_decadal = tb.copy() + + # Calculate the sea ice extent of each month, averaged over the same 10 months of each decade. + # For example, January 1990 will be the average sea ice extent of the 10 months of January between 1990 and 1999. + tb_decadal["decade"] = tb_decadal["decade"].astype("string") + "s" + tb_decadal = tb_decadal.groupby(["month", "decade"], observed=True, as_index=False).agg( + {"sea_ice_extent_arctic": "mean", "sea_ice_extent_antarctic": "mean"} + ) + tb_decadal = tb_decadal.rename(columns={"decade": "country", "month": "year"}, errors="raise") + + return tb_decadal + + +def improve_metadata(tb: Table) -> Table: + tb = tb.astype({"country": "string"}).copy() + + # Gather years in the data, and assign colors to them. + colors = {} + columns = [str(year) for year in set(tb["country"])] + years = [int(re.findall(r"\d{4}", column)[0]) for column in columns] + for year, column in zip(years, columns): + if 1980 <= year < 1990: + # Light blue. + color = "#CCE5FF" + elif 1990 <= year < 2000: + # Medium light blue. + color = "#99CCFF" + elif 2000 <= year < 2010: + # Medium blue. + color = "#6699FF" + elif 2010 <= year < 2020: + # Darker blue. + color = "#3366FF" + elif year == max(years): + # Black. + color = "#000000" + else: + # Red. + color = "#F89B9B" + colors[column] = color + + # Rename table. + tb.metadata.title = "Sea ice extent in the northern and southern hemispheres by decade" + + for column in tb.drop(columns=["country", "year"]).columns: + location = column.split("sea_ice_extent_")[-1].title() + title = f"Monthly sea ice extent in the {location}, decadal average" + description_short = ( + "Each point represents the monthly average sea ice extent, averaged across all years within the decade." + ) + subtitle = ( + description_short + + " The current decade is highlighted in red, with the current year shown in black for comparison." + ) + footnote = "The horizontal axis shows months from January (1) to December (12). All years have data for all 12 months, except 1987 and 1988 (each missing one month) and the current year." + + tb[column].metadata.title = title + tb[column].metadata.description_short = description_short + tb[column].metadata.presentation.title_public = title + tb[column].metadata.presentation.grapher_config = { + "subtitle": subtitle, + "note": footnote, + "selectedEntityNames": columns, + "selectedEntityColors": colors, + "originUrl": "https://ourworldindata.org/climate-change", + "hideAnnotationFieldsInTitle": {"time": True}, + "entityType": "year", + "entityTypePlural": "years", + } + + return tb + + +def sanity_check_inputs(tb: Table) -> None: + error = "Expected 1978 to be the first year in the data. Data may have changed. Consider editing YEAR_MIN" + assert tb["year"].min() == 1978, error + + # All years should have 12 months except: + # * The very first year in the data (1978). + # * Years 1987 and 1988, that have 11 months (because 1987-12 and 1988-01 are missing). + # * The very last year in the data (since it's the ongoing year). + error = "Expected 12 months per year." + assert ( + tb[~tb["year"].isin([tb["year"].min(), 1987, 1988, tb["year"].max()])] + .groupby(["location", "year"]) + .count()["sea_ice_extent"] + == 12 + ).all(), error + # Each month-year should appear only once in the data. + error = "Repeated months." + assert (tb.groupby(["location", "year", "month"]).count()["sea_ice_extent"] == 1).all(), error + # Each month-decade should appear 10 times (one per year in the decade), except: + # * The very first decade (1970s), since it starts in 1978. This decade will be ignored in the decadal data. + # * January and December 1980s, that appear 9 times (because 1987-12 and 1988-01 are missing). + # * The very last decade (since it's the ongoing decade). + error = "Expected 10 instances of each month per decade (except in specific cases)." + exceptions = tb[ + (tb["decade"] == tb["decade"].min()) + | (tb["decade"] == tb["decade"].max()) + | ((tb["decade"] == 1980) & (tb["month"].isin([1, 12]))) + ].index + assert (tb.drop(exceptions).groupby(["location", "decade", "month"]).count()["sea_ice_extent"] == 10).all(), error + assert ( + tb[(tb["decade"] == 1980) & (tb["month"].isin([1, 12]))] + .groupby(["location", "decade", "month"]) + .count()["sea_ice_extent"] + == 9 + ).all(), error + assert ( + tb[(tb["decade"] == tb["decade"].max())].groupby(["location", "decade", "month"]).count()["sea_ice_extent"] + <= 10 + ).all(), error + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("sea_ice_index") + tb = ds_garden.read_table("sea_ice_index") + + # + # Process data. + # + # Rename locations conveniently. + tb = tb.astype({"location": "string"}) + tb.loc[tb["location"] == "Northern Hemisphere", "location"] = "Arctic" + tb.loc[tb["location"] == "Southern Hemisphere", "location"] = "Antarctic" + assert set(tb["location"]) == {"Arctic", "Antarctic"}, "Unexpected locations." + + # Create columns for month, year, and decade. + tb["year"] = tb["date"].dt.year + tb["month"] = tb["date"].dt.month + tb["decade"] = (tb["year"] // 10) * 10 + + # Sanity checks. + sanity_check_inputs(tb=tb) + + # Select years after a certain minimum (see explanation above, where YEAR_MIN is defined) and a certain location. + tb = ( + tb[(tb["year"] >= YEAR_MIN)] + .sort_values(["year", "month"], ascending=(False, True)) + .drop(columns=["date"], errors="raise") + .reset_index(drop=True) + ) + + # Create one column for each hemisphere. + tb = tb.pivot( + index=["year", "decade", "month"], columns=["location"], values=["sea_ice_extent"], join_column_levels_with="_" + ).underscore() + + # Create yearly table, adapted to grapher. + tb_yearly = create_yearly_table(tb=tb) + + # Create decadal table, adapted to grapher. + tb_decadal = create_decadal_table(tb=tb) + + # Combine both tables (take decadal data prior to 2020, and individual years from 2020 on). + tb_combined = pr.concat([tb_decadal, tb_yearly], ignore_index=True) + + # Improve metadata. + tb_combined = improve_metadata(tb=tb_combined) + + # Improve format. + tb_combined = tb_combined.format(sort_rows=False) + + # + # Save outputs. + # + # Create a new grapher dataset. + ds_grapher = create_dataset(dest_dir, tables=[tb_combined], check_variables_metadata=True) + ds_grapher.save() diff --git a/etl/steps/data/grapher/climate/2024-09-30/sea_ice_extent_by_year.py b/etl/steps/data/grapher/climate/2024-09-30/sea_ice_extent_by_year.py new file mode 100644 index 00000000000..940f0eb32f8 --- /dev/null +++ b/etl/steps/data/grapher/climate/2024-09-30/sea_ice_extent_by_year.py @@ -0,0 +1,170 @@ +"""Load a garden dataset and create a grapher dataset.""" + +import re + +from owid.catalog import Table + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Minimum year to consider. +# This is chosen because the minimum year informed is 1978 (with only 2 months informed). +# NOTE: We could include 1979. But, for consistency between yearly and decadal data, we ignore this year. +YEAR_MIN = 1980 + + +def improve_metadata(tb: Table) -> Table: + tb = tb.copy() + + # Gather years in the data, and assign colors to them. + colors = {} + columns = [str(year) for year in sorted(set(tb["country"]), reverse=True)] + years = [int(re.findall(r"\d{4}", column)[0]) for column in columns] + for year, column in zip(years, columns): + if 1980 <= year < 1990: + # Light blue. + color = "#CCE5FF" + elif 1990 <= year < 2000: + # Medium light blue. + color = "#99CCFF" + elif 2000 <= year < 2010: + # Medium blue. + color = "#6699FF" + elif 2010 <= year < 2020: + # Darker blue. + color = "#3366FF" + elif year == max(years): + # Black. + color = "#000000" + else: + # Red. + color = "#F89B9B" + colors[column] = color + + # Rename table. + tb.metadata.title = "Sea ice extent in the northern and southern hemispheres by year" + for column in tb.drop(columns=["country", "year"]).columns: + location = column.split("sea_ice_extent_")[-1].title() + title = f"Monthly sea ice extent in the {location}" + description_short = "Each point represents the monthly average sea ice extent." + subtitle = ( + description_short + + " Years in the current decade are highlighted in red, with the current year highlighted in black." + ) + footnote = ( + "All years have data for all 12 months, except 1987 and 1988 (each missing one month) and the current year." + ) + + tb[column].metadata.title = title + tb[column].metadata.description_short = description_short + tb[column].metadata.presentation.title_public = title + tb[column].metadata.presentation.grapher_config = { + "subtitle": subtitle, + "note": footnote, + "selectedEntityNames": columns, + "selectedEntityColors": colors, + "originUrl": "https://ourworldindata.org/climate-change", + "hideAnnotationFieldsInTitle": {"time": True}, + "entityType": "year", + "entityTypePlural": "years", + } + + return tb + + +def sanity_check_inputs(tb: Table) -> None: + error = "Expected 1978 to be the first year in the data. Data may have changed. Consider editing YEAR_MIN" + assert tb["year"].min() == 1978, error + + # All years should have 12 months except: + # * The very first year in the data (1978). + # * Years 1987 and 1988, that have 11 months (because 1987-12 and 1988-01 are missing). + # * The very last year in the data (since it's the ongoing year). + error = "Expected 12 months per year." + assert ( + tb[~tb["year"].isin([tb["year"].min(), 1987, 1988, tb["year"].max()])] + .groupby(["location", "year"]) + .count()["sea_ice_extent"] + == 12 + ).all(), error + # Each month-year should appear only once in the data. + error = "Repeated months." + assert (tb.groupby(["location", "year", "month"]).count()["sea_ice_extent"] == 1).all(), error + # Each month-decade should appear 10 times (one per year in the decade), except: + # * The very first decade (1970s), since it starts in 1978. This decade will be ignored in the decadal data. + # * January and December 1980s, that appear 9 times (because 1987-12 and 1988-01 are missing). + # * The very last decade (since it's the ongoing decade). + error = "Expected 10 instances of each month per decade (except in specific cases)." + exceptions = tb[ + (tb["decade"] == tb["decade"].min()) + | (tb["decade"] == tb["decade"].max()) + | ((tb["decade"] == 1980) & (tb["month"].isin([1, 12]))) + ].index + assert (tb.drop(exceptions).groupby(["location", "decade", "month"]).count()["sea_ice_extent"] == 10).all(), error + assert ( + tb[(tb["decade"] == 1980) & (tb["month"].isin([1, 12]))] + .groupby(["location", "decade", "month"]) + .count()["sea_ice_extent"] + == 9 + ).all(), error + assert ( + tb[(tb["decade"] == tb["decade"].max())].groupby(["location", "decade", "month"]).count()["sea_ice_extent"] + <= 10 + ).all(), error + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("sea_ice_index") + tb = ds_garden.read_table("sea_ice_index") + + # + # Process data. + # + # Rename locations conveniently. + tb = tb.astype({"location": "string"}) + tb.loc[tb["location"] == "Northern Hemisphere", "location"] = "Arctic" + tb.loc[tb["location"] == "Southern Hemisphere", "location"] = "Antarctic" + assert set(tb["location"]) == {"Arctic", "Antarctic"}, "Unexpected locations." + + # Create columns for month, year, and decade. + tb["year"] = tb["date"].dt.year + tb["month"] = tb["date"].dt.month + tb["decade"] = (tb["year"] // 10) * 10 + + # Sanity checks. + sanity_check_inputs(tb=tb) + + # Select years after a certain minimum (see explanation above, where YEAR_MIN is defined) and a certain location. + tb = ( + tb[(tb["year"] >= YEAR_MIN)] + .sort_values(["year", "month"], ascending=(False, True)) + .drop(columns=["date"], errors="raise") + .reset_index(drop=True) + ) + + # Create one column for each hemisphere. + tb = tb.pivot( + index=["year", "month"], columns=["location"], values=["sea_ice_extent"], join_column_levels_with="_" + ).underscore() + + # Create yearly table, adapted column names to grapher. + tb = tb.rename(columns={"year": "country", "month": "year"}, errors="raise") + + # Improve metadata. + tb = improve_metadata(tb=tb) + + # Improve format. + tb = tb.format() + + # + # Save outputs. + # + # Create a new grapher dataset. + ds_grapher = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_grapher.save() diff --git a/etl/steps/data/meadow/climate/2024-09-30/ghg_concentration.py b/etl/steps/data/meadow/climate/2024-09-30/ghg_concentration.py new file mode 100644 index 00000000000..1ca24557052 --- /dev/null +++ b/etl/steps/data/meadow/climate/2024-09-30/ghg_concentration.py @@ -0,0 +1,42 @@ +"""Load a snapshot and create a meadow dataset.""" + + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Names of snapshot files to load and process. +FILES = [ + "co2_concentration_monthly", + "ch4_concentration_monthly", + "n2o_concentration_monthly", +] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Initialize dictionary to store raw tables. + tables = {} + for file_name in FILES: + # Retrieve snapshot. + snap = paths.load_snapshot(f"{file_name}.csv") + + # Load data from snapshot. + tables[file_name] = snap.read(comment="#", na_values="-9.99") + + # + # Process data. + # + for file_name, tb in tables.items(): + # Set an appropriate index and sort conveniently. + tables[file_name] = tb.set_index(["year", "month"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new meadow dataset with one table for each gas. + ds_meadow = create_dataset(dest_dir, tables=tables.values(), check_variables_metadata=True) + ds_meadow.save() diff --git a/etl/steps/data/meadow/climate/2024-09-30/hawaii_ocean_time_series.py b/etl/steps/data/meadow/climate/2024-09-30/hawaii_ocean_time_series.py new file mode 100644 index 00000000000..0544b0cb638 --- /dev/null +++ b/etl/steps/data/meadow/climate/2024-09-30/hawaii_ocean_time_series.py @@ -0,0 +1,29 @@ +"""Load a snapshot and create a meadow dataset.""" + + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load snapshot and read its data. + tb = paths.load_snapshot("hawaii_ocean_time_series.csv").read(skiprows=8, sep="\t", na_values=[-999]) + + # + # Process data. + # + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["date"], verify_integrity=True).sort_index().sort_index(axis=1) + + # + # Save outputs. + # + # Create a new meadow dataset. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_meadow.save() diff --git a/etl/steps/data/meadow/climate/2024-09-30/ocean_heat_content.py b/etl/steps/data/meadow/climate/2024-09-30/ocean_heat_content.py new file mode 100644 index 00000000000..844f5d34220 --- /dev/null +++ b/etl/steps/data/meadow/climate/2024-09-30/ocean_heat_content.py @@ -0,0 +1,75 @@ +"""Load a snapshot and create a meadow dataset.""" + +import owid.catalog.processing as pr + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Names of snapshot files to load and process. +FILES = [ + "ocean_heat_content_monthly_world_700m", + "ocean_heat_content_monthly_world_2000m", + "ocean_heat_content_annual_world_700m", + "ocean_heat_content_annual_world_2000m", +] + +# Columns to select from annual data, and how to rename them. +COLUMNS_ANNUAL = { + "YEAR": "date", + "WO": "ocean_heat_content", +} + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load data from snapshots. + tables_monthly = [] + tables_annual = [] + for file_name in FILES: + # Extract depth and location from file name. + depth = int(file_name.split("_")[-1].replace("m", "")) + location = file_name.split("_")[-2].title() + if "monthly" in file_name: + # Read data. + new_table = paths.load_snapshot(f"{file_name}.csv").read(names=["date", "ocean_heat_content"]) + # Add columns for location and depth. + new_table = new_table.assign(**{"depth": depth, "location": location}) + # Add monthly table to list. + tables_monthly.append(new_table) + elif "annual" in file_name: + # Read data, select and rename columns. + new_table = ( + paths.load_snapshot(f"{file_name}.csv") + .read_fwf()[list(COLUMNS_ANNUAL)] + .rename(columns=COLUMNS_ANNUAL, errors="raise") + ) + # Add columns for location and depth. + new_table = new_table.assign(**{"depth": depth, "location": location}) + # Add annual table to list. + tables_annual.append(new_table) + else: + raise ValueError(f"Unexpected file name: {file_name}") + + # + # Process data. + # + # Combine monthly data and add a column for location. + tb_monthly = pr.concat(tables_monthly, short_name="ocean_heat_content_monthly") + + # Combine annual data. + tb_annual = pr.concat(tables_annual, short_name="ocean_heat_content_annual") + + # Set an appropriate index and sort conveniently. + tb_monthly = tb_monthly.set_index(["location", "depth", "date"], verify_integrity=True).sort_index() + tb_annual = tb_annual.set_index(["location", "depth", "date"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new meadow dataset. + ds_meadow = create_dataset(dest_dir, tables=[tb_annual, tb_monthly], check_variables_metadata=True) + ds_meadow.save() diff --git a/etl/steps/data/meadow/climate/2024-09-30/sea_ice_index.py b/etl/steps/data/meadow/climate/2024-09-30/sea_ice_index.py new file mode 100644 index 00000000000..d4ded1a7859 --- /dev/null +++ b/etl/steps/data/meadow/climate/2024-09-30/sea_ice_index.py @@ -0,0 +1,51 @@ +"""Load a snapshot and create a meadow dataset.""" + +import owid.catalog.processing as pr + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("sea_ice_index.xlsx") + + # Read data from snapshot. + data = snap.ExcelFile() + + # + # Process data. + # + # Load sheet of northern hemisphere sea ice extent. + tb_nh = data.parse("NH-Extent").assign(**{"location": "Northern Hemisphere"}) + tb_sh = data.parse("SH-Extent").assign(**{"location": "Southern Hemisphere"}) + + # Sanity check. + assert tb_nh.iloc[0, 0] == 1978, "First cell in NH spreadsheet was expected to be 1978. Data has changed." + assert tb_sh.iloc[0, 0] == 1978, "First cell in SH spreadsheet was expected to be 1978. Data has changed." + + # Concatenate both tables. + tb = pr.concat([tb_sh, tb_nh], ignore_index=True, short_name=paths.short_name) + + # Fix column names. + tb = tb.rename(columns={tb.columns[0]: "year"}) + + # Drop empty rows and columns. + tb = tb.dropna(how="all").dropna(axis=1, how="all").reset_index(drop=True) + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["location", "year"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/etl/steps/data/meadow/climate/2024-09-30/sea_surface_temperature.py b/etl/steps/data/meadow/climate/2024-09-30/sea_surface_temperature.py new file mode 100644 index 00000000000..50623be8b7a --- /dev/null +++ b/etl/steps/data/meadow/climate/2024-09-30/sea_surface_temperature.py @@ -0,0 +1,49 @@ +"""Load a snapshot and create a meadow dataset.""" + +import owid.catalog.processing as pr + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Names of snapshot files to load and process. +FILES = [ + "sea_surface_temperature_world", + "sea_surface_temperature_northern_hemisphere", + "sea_surface_temperature_southern_hemisphere", +] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load data from each of the snapshots, and add a column with the region name. + tables = [ + paths.load_snapshot(f"{file_name}.csv") + .read() + .assign(**{"location": file_name.split("sea_surface_temperature_")[-1].replace("_", " ").title()}) + for file_name in FILES + ] + + # + # Process data. + # + # Concatenate all tables. + tb = pr.concat(tables) + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["location", "year", "month"], verify_integrity=True).sort_index().sort_index(axis=1) + + # Rename table. + tb.metadata.short_name = paths.short_name + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/etl/steps/data/meadow/climate/2024-09-30/snow_cover_extent.py b/etl/steps/data/meadow/climate/2024-09-30/snow_cover_extent.py new file mode 100644 index 00000000000..86e0d707a8b --- /dev/null +++ b/etl/steps/data/meadow/climate/2024-09-30/snow_cover_extent.py @@ -0,0 +1,50 @@ +"""Load a snapshot and create a meadow dataset.""" + +import owid.catalog.processing as pr + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Names of snapshot files to load and process. +FILES = [ + "snow_cover_extent_north_america", + "snow_cover_extent_northern_hemisphere", +] + +# Names of columns in the data. +COLUMNS = ["year", "month", "snow_cover_extent"] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load snapshot and read its data. + tables = [] + for file_name in FILES: + tb = paths.load_snapshot(f"{file_name}.csv").read_fwf(names=COLUMNS) + # Add a column for location. + tb["location"] = file_name.split("snow_cover_extent_")[-1].replace("_", " ").title() + # Add table to list. + tables.append(tb) + + # + # Process data. + # + # Combine data from all tables. + tb = pr.concat(tables) + + # Set an appropriate index and sort conveniently. + tb = tb.set_index(["location", "year", "month"], verify_integrity=True).sort_index().sort_index(axis=1) + + # Update table name. + tb.metadata.short_name = paths.short_name + + # + # Save outputs. + # + # Create a new meadow dataset. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_meadow.save() diff --git a/etl/steps/data/meadow/climate/2024-09-30/surface_temperature_analysis.py b/etl/steps/data/meadow/climate/2024-09-30/surface_temperature_analysis.py new file mode 100644 index 00000000000..88791a644b7 --- /dev/null +++ b/etl/steps/data/meadow/climate/2024-09-30/surface_temperature_analysis.py @@ -0,0 +1,62 @@ +"""Load a snapshot and create a meadow dataset.""" + + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Names of snapshot files to load and process. +FILES = [ + "surface_temperature_analysis_world", + "surface_temperature_analysis_northern_hemisphere", + "surface_temperature_analysis_southern_hemisphere", +] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Initialize dictionary to store raw tables. + tables = {} + for file_name in FILES: + # Retrieve snapshot. + snap = paths.load_snapshot(f"{file_name}.csv") + + # Load data from snapshot. + tables[file_name] = snap.read( + skiprows=1, + na_values="***", + usecols=[ + "Year", + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ], + ) + + # + # Process data. + # + for file_name, tb in tables.items(): + # Set an appropriate index and sort conveniently. + tables[file_name] = tb.set_index(["Year"], verify_integrity=True).sort_index() + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=tables.values(), check_variables_metadata=True) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/snapshots/climate/2024-09-30/ch4_concentration_monthly.csv.dvc b/snapshots/climate/2024-09-30/ch4_concentration_monthly.csv.dvc new file mode 100644 index 00000000000..9498d652264 --- /dev/null +++ b/snapshots/climate/2024-09-30/ch4_concentration_monthly.csv.dvc @@ -0,0 +1,23 @@ +meta: + origin: + producer: NOAA Global Monitoring Laboratory + title: Trends in Atmospheric Methane + description: |- + The Carbon Cycle Greenhouse Gases (CCGG) research area operates the Global Greenhouse Gas Reference Network, measuring the atmospheric distribution and trends of the three main long-term drivers of climate change, carbon dioxide (CO2), methane (CH4), and nitrous oxide (N2O), as well as carbon monoxide (CO) which is an important indicator of air pollution. + citation_full: |- + National Oceanic and Atmospheric Administration (NOAA) Global Monitoring Laboratory, Boulder, Colorado, USA (https://gml.noaa.gov) - Trends in Atmospheric Methane. + + Lan, X., K.W. Thoning, and E.J. Dlugokencky: Trends in globally-averaged CH4, N2O, and SF6 determined from NOAA Global Monitoring Laboratory measurements. https://doi.org/10.15138/P8XG-AA10 + attribution: NOAA Global Monitoring Laboratory - Trends in Atmospheric Methane (2024) + attribution_short: NOAA/GML + url_main: https://gml.noaa.gov/ccgg/trends_ch4/ + url_download: https://gml.noaa.gov/webdata/ccgg/trends/ch4/ch4_mm_gl.csv + date_accessed: '2024-09-30' + date_published: '2024-08-05' + license: + name: CC BY 4.0 + url: https://gml.noaa.gov/about/disclaimer.html +outs: + - md5: deec430f91f5b7fad34f4b326d8f4dac + size: 22903 + path: ch4_concentration_monthly.csv diff --git a/snapshots/climate/2024-09-30/climate_change_impacts.py b/snapshots/climate/2024-09-30/climate_change_impacts.py new file mode 100644 index 00000000000..f3aa5e5f526 --- /dev/null +++ b/snapshots/climate/2024-09-30/climate_change_impacts.py @@ -0,0 +1,213 @@ +"""Script to create a snapshot for each of the climate change datasets that have regular updates. + +The publication date will be automatically extracted from the source website, if possible, and otherwise it will be +assumed to be the same as the access date. These dates will be written to the metadata dvc files. + +NOTE: If any of the snapshots fails, first try to fix the issue. But, if that's not possible (e.g. because the data provider server is down, which happens relatively often) follow this steps: +1. Remove the new .dvc file of that failing snapshot. +2. Edit the climate.yml dag file, so that the new affected meadow steps use the latest working snapshot. +3. Comment out the file names of the failing snapshots in the "FILES" list below. +4. Execute this script. + * If another snapshot fails, go back to step 1. +5. Uncomment the file names of the failing snapshots (so that on next update all snapshots will be executed). +6. Commit the changes in the dag. + +If a certain snapshot has been failing multiple times (which you can see by looking at the date of the latest working snapshot) consider changing the data provider. + +""" + +import re +from datetime import datetime +from pathlib import Path +from typing import Optional + +import click +import requests +from bs4 import BeautifulSoup +from structlog import get_logger + +from etl.snapshot import Snapshot + +log = get_logger() + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + +# Names of data files. +FILES = [ + # NASA Goddard Institute for Space Studies - GISS Surface Temperature Analysis. + # NOTE: Publication date cannot be automatically extracted. + "surface_temperature_analysis_world.csv", + "surface_temperature_analysis_northern_hemisphere.csv", + "surface_temperature_analysis_southern_hemisphere.csv", + # National Snow and Ice Data Center - Sea Ice Index. + "sea_ice_index.xlsx", + # Met Office Hadley Centre - HadSST. + "sea_surface_temperature_world.csv", + "sea_surface_temperature_northern_hemisphere.csv", + "sea_surface_temperature_southern_hemisphere.csv", + # NOAA National Centers for Environmental Information - Ocean Heat Content. + # NOTE: Publication date cannot be automatically extracted. + "ocean_heat_content_monthly_world_700m.csv", + "ocean_heat_content_monthly_world_2000m.csv", + "ocean_heat_content_annual_world_700m.csv", + "ocean_heat_content_annual_world_2000m.csv", + # School of Ocean and Earth Science and Technology - Hawaii Ocean Time-series. + "hawaii_ocean_time_series.csv", + # Rutgers University Global Snow Lab - Snow Cover Extent. + # NOTE: Publication date cannot be automatically extracted. But they seem to have regular updates (even daily). + "snow_cover_extent_north_america.csv", + "snow_cover_extent_northern_hemisphere.csv", + # NOAA Global Monitoring Laboratory. + "co2_concentration_monthly.csv", + "ch4_concentration_monthly.csv", + "n2o_concentration_monthly.csv", +] + +######################################################################################################################## +# Other possible datasets to include: +# * Ocean heat content data from MRI/JMA. We have this data as part of the EPA ocean heat content compilation. +# But in the following link, they claim the data is updated every year, so it could be added to our yearly data. +# https://www.data.jma.go.jp/gmd/kaiyou/english/ohc/ohc_global_en.html +# * Rutgers University Global Snow Lab also includes snow cover extent for: +# * Eurasia: https://climate.rutgers.edu/snowcover/files/moncov.eurasia.txt +# * North America (excluding Greenland): https://climate.rutgers.edu/snowcover/files/moncov.nam.txt +# * Ice sheet mass balance from NASA EarthData. This is regularly updated, but to access it one has to manually log in. +# The data can be manually accessed from: +# https://climate.nasa.gov/vital-signs/ice-sheets/ +# By clicking on the HTTP link. This leads to a manual log in page. +# Once logged in, the data is accessible via the following link: +# https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ANTARCTICA_MASS_TELLUS_MASCON_CRI_TIME_SERIES_RL06.1_V3/antarctica_mass_200204_202310.txt +# So, one could use this link, trying with different dates (e.g. ..._202401.txt, ..._202312.txt, ..._202311.txt), +# until the most recent file is downloaded. +# I contacted EarthData to ask if there is any way to access the latest data programmatically. +# * Global sea level from NASA. +# We could get more up-to-date data on sea levels from https://sealevel.jpl.nasa.gov/ +# but we would need to use a special library with credentials to fetch the data (and the baseline and format would +# probably be different). +######################################################################################################################## + + +def find_date_published(snap: Snapshot) -> Optional[str]: + # Extract publication date for each individual origin, if possible. + # Otherwise, assign the current access date as publication date. + if snap.path.name == "sea_ice_index.xlsx": + # * For sea_ice_index, the date_published can be found on: + # https://noaadata.apps.nsidc.org/NOAA/G02135/seaice_analysis/ + # Next to the file name (Sea_Ice_Index_Monthly_Data_by_Year_G02135_v3.0.xlsx). + + # Extract all the text in the web page. + url = "/".join(snap.metadata.origin.url_download.split("/")[:-1]) # type: ignore + response = requests.get(url) + # Parse HTML content. + soup = BeautifulSoup(response.text, "html.parser") + + # Fetch the date that is written next to the title. + for line in soup.text.split("\n"): + if "Sea_Ice_Index_Monthly_Data_by_Year" in line: + dates = re.findall(r"\d{2}-\w{3}-\d{4}", line) + if len(dates) == 1: + # Format date conveniently. + date = datetime.strptime(dates[0], "%d-%b-%Y").strftime("%Y-%m-%d") + return date + else: + log.warn(f"Failed to extract date_published for: {snap.path.name}") + + elif snap.path.name.startswith("sea_surface_temperature_"): + # * For sea_surface_temperature_* the date_published can be found on: + # https://www.metoffice.gov.uk/hadobs/hadsst4/data/download.html + + # Extract all the text in the web page. + url = snap.metadata.origin.url_download.split("/data/")[0] + "/data/download.html" # type: ignore + response = requests.get(url) + # Parse HTML content. + soup = BeautifulSoup(response.text, "html.parser") + + for line in soup.text.split("\n"): + # At the bottom of the page, there is a line like "Last updated: 09/01/2024 Expires: 09/01/2025". + if "Last updated" in line: + dates = re.findall(r"\d{2}/\d{2}/\d{4}", line) + if len(dates) == 2: + # Format date conveniently. + date = datetime.strptime(dates[0], "%d/%m/%Y").strftime("%Y-%m-%d") + return date + else: + log.warn(f"Failed to extract date_published for: {snap.path.name}") + + elif snap.path.name == "hawaii_ocean_time_series.csv": + # * For the Hawaii Ocean Time-Series, the date_published can be found written on the header of the data itself: + # https://hahana.soest.hawaii.edu/hot/hotco2/HOT_surface_CO2.txt + + # Extract text from data file. + url = snap.metadata.origin.url_download # type: ignore + response = requests.get(url) # type: ignore[reportArgumentType] + for line in response.text.split("\n"): + # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". + if "Last updated" in line: + # Regular expression to extract the date + dates = re.findall(r"\d{1,2}\s+\w+\s+\d{4}", line) + if len(dates) == 1: + # Format date conveniently. + date = datetime.strptime(dates[0], "%d %B %Y").strftime("%Y-%m-%d") + return date + else: + log.warn(f"Failed to extract date_published for: {snap.path.name}") + + elif "_concentration" in snap.path.name: + # * For NOAA GML concentration data, the date_published can be found in the header of each data file. + # The date is in a line like "# File Creation: Fri Jan 5 03:55:24 2024". + + # Extract text from data file. + url = snap.metadata.origin.url_download # type: ignore + response = requests.get(url) # type: ignore[reportArgumentType] + for line in response.text.split("\n"): + # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". + if "File Creation" in line: + # Regular expression to extract the date + dates = re.findall(r"\w{3}\s\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s\d{4}", line) + if len(dates) == 1: + # Format date conveniently. + date = datetime.strptime(dates[0], "%a %b %d %H:%M:%S %Y").strftime("%Y-%m-%d") + return date + else: + log.warn(f"Failed to extract date_published for: {snap.path.name}") + + # In all other cases, assume date_published is the same as date_accessed. + return snap.metadata.origin.date_accessed # type: ignore + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot metadata dvc files for each of the data files. + for file_name in FILES: + snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/{file_name}") + + # To ease the recurrent task update, fetch the access date from the version, and write it to the dvc files. + snap.metadata.origin.date_accessed = SNAPSHOT_VERSION # type: ignore + + # Extract publication date, if possible, and otherwise assume it is the same as the access date. + snap.metadata.origin.date_published = find_date_published(snap=snap) # type: ignore + + # Extract publication year from date_published (which will be used in the custom attribution). + year_published = snap.metadata.origin.date_published.split("-")[0] # type: ignore + + # Assign a custom attribution. + snap.metadata.origin.attribution = ( # type: ignore + f"{snap.metadata.origin.producer} - {snap.metadata.origin.title} ({year_published})" # type: ignore + ) + + # Rewrite metadata to dvc file. + snap.metadata_path.write_text(snap.metadata.to_yaml()) + + # Create the actual snapshots, download the data and upload them to S3. + # NOTE: This cannot be done as part of the previous loop because, if the folder of dvc files has been manually + # duplicated (without manually removing the "outs" section), `create_snapshot` will fail because there are multiple + # files with the same "outs". Therefore, we first clean the dvc files, and then run `create_snapshot`. + for file_name in FILES: + snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/{file_name}") + snap.create_snapshot(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/climate/2024-09-30/co2_concentration_monthly.csv.dvc b/snapshots/climate/2024-09-30/co2_concentration_monthly.csv.dvc new file mode 100644 index 00000000000..b8b7d4cf9e4 --- /dev/null +++ b/snapshots/climate/2024-09-30/co2_concentration_monthly.csv.dvc @@ -0,0 +1,23 @@ +meta: + origin: + producer: NOAA Global Monitoring Laboratory + title: Trends in Atmospheric Carbon Dioxide + description: |- + The Carbon Cycle Greenhouse Gases (CCGG) research area operates the Global Greenhouse Gas Reference Network, measuring the atmospheric distribution and trends of the three main long-term drivers of climate change, carbon dioxide (CO2), methane (CH4), and nitrous oxide (N2O), as well as carbon monoxide (CO) which is an important indicator of air pollution. + citation_full: |- + National Oceanic and Atmospheric Administration (NOAA) Global Monitoring Laboratory, Boulder, Colorado, USA (https://gml.noaa.gov) - Trends in Atmospheric Carbon Dioxide. + + Lan, X., Tans, P. and K.W. Thoning: Trends in globally-averaged CO2 determined from NOAA Global Monitoring Laboratory measurements. https://doi.org/10.15138/9N0H-ZH07 + attribution: NOAA Global Monitoring Laboratory - Trends in Atmospheric Carbon Dioxide (2024) + attribution_short: NOAA/GML + url_main: https://gml.noaa.gov/ccgg/trends/gl_data.html + url_download: https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_gl.csv + date_accessed: '2024-09-30' + date_published: '2024-08-05' + license: + name: CC BY 4.0 + url: https://gml.noaa.gov/about/disclaimer.html +outs: + - md5: f3075e37532c18e3d00608c812ea5269 + size: 23717 + path: co2_concentration_monthly.csv diff --git a/snapshots/climate/2024-09-30/hawaii_ocean_time_series.csv.dvc b/snapshots/climate/2024-09-30/hawaii_ocean_time_series.csv.dvc new file mode 100644 index 00000000000..d8d3ac5f69b --- /dev/null +++ b/snapshots/climate/2024-09-30/hawaii_ocean_time_series.csv.dvc @@ -0,0 +1,25 @@ +meta: + origin: + producer: School of Ocean & Earth Science & Technology + title: Hawaii Ocean Time-series + citation_full: |- + School of Ocean and Earth Science and Technology at the University of Hawai'i at Manoa - Hawaii Ocean Time-series (HOT). + + Dore, J.E., R. Lukas, D.W. Sadler, M.J. Church, and D.M. Karl. 2009. Physical and biogeochemical modulation of ocean acidification in the central North Pacific. Proc Natl Acad Sci USA 106:12235-12240. + + HOT observations are supported by the U.S. National Science Foundation under Award #1756517. + + More details can be found at [the HOT Carbon Dioxide page](https://hahana.soest.hawaii.edu/hot/hotco2/hotco2.html), specifically in [this technical document](https://hahana.soest.hawaii.edu/hot/hotco2/HOT_surface_CO2_readme.pdf). + attribution: School of Ocean & Earth Science & Technology - Hawaii Ocean Time-series (2023) + attribution_short: SOEST/Hawaii + url_main: https://hahana.soest.hawaii.edu/hot/ + url_download: https://hahana.soest.hawaii.edu/hot/hotco2/HOT_surface_CO2.txt + date_accessed: '2024-09-30' + date_published: '2023-12-11' + license: + name: Public domain + url: https://hahana.soest.hawaii.edu/hot/dataaccess.html +outs: + - md5: fd502d28aa85a6f241e9507d85b8ca8b + size: 44820 + path: hawaii_ocean_time_series.csv diff --git a/snapshots/climate/2024-09-30/n2o_concentration_monthly.csv.dvc b/snapshots/climate/2024-09-30/n2o_concentration_monthly.csv.dvc new file mode 100644 index 00000000000..a578b5479c6 --- /dev/null +++ b/snapshots/climate/2024-09-30/n2o_concentration_monthly.csv.dvc @@ -0,0 +1,23 @@ +meta: + origin: + producer: NOAA Global Monitoring Laboratory + title: Trends in Atmospheric Nitrous Oxide + description: |- + The Carbon Cycle Greenhouse Gases (CCGG) research area operates the Global Greenhouse Gas Reference Network, measuring the atmospheric distribution and trends of the three main long-term drivers of climate change, carbon dioxide (CO2), methane (CH4), and nitrous oxide (N2O), as well as carbon monoxide (CO) which is an important indicator of air pollution. + citation_full: |- + National Oceanic and Atmospheric Administration (NOAA) Global Monitoring Laboratory, Boulder, Colorado, USA (https://gml.noaa.gov) - Trends in Atmospheric Nitrous Oxide. + + Lan, X., K.W. Thoning, and E.J. Dlugokencky: Trends in globally-averaged CH4, N2O, and SF6 determined from NOAA Global Monitoring Laboratory measurements. https://doi.org/10.15138/P8XG-AA10 + attribution: NOAA Global Monitoring Laboratory - Trends in Atmospheric Nitrous Oxide (2024) + attribution_short: NOAA/GML + url_main: https://gml.noaa.gov/ccgg/trends_n2o/ + url_download: https://gml.noaa.gov/webdata/ccgg/trends/n2o/n2o_mm_gl.csv + date_accessed: '2024-09-30' + date_published: '2024-08-05' + license: + name: CC BY 4.0 + url: https://gml.noaa.gov/about/disclaimer.html +outs: + - md5: a4960dfe2e0bfab995592893e9a50a40 + size: 13469 + path: n2o_concentration_monthly.csv diff --git a/snapshots/climate/2024-09-30/sea_ice_index.xlsx.dvc b/snapshots/climate/2024-09-30/sea_ice_index.xlsx.dvc new file mode 100644 index 00000000000..9b611063a15 --- /dev/null +++ b/snapshots/climate/2024-09-30/sea_ice_index.xlsx.dvc @@ -0,0 +1,19 @@ +meta: + origin: + producer: National Snow and Ice Data Center + title: Sea Ice Index + citation_full: |- + Fetterer, F., K. Knowles, W. N. Meier, M. Savoie, and A. K. Windnagel. (2017). Sea Ice Index, Version 3 [Data Set]. Boulder, Colorado USA. National Snow and Ice Data Center. https://doi.org/10.7265/N5K072F8. + attribution: National Snow and Ice Data Center - Sea Ice Index (2024) + attribution_short: NSIDC + version_producer: Version 3 + url_main: https://nsidc.org/data/g02135/ + url_download: https://noaadata.apps.nsidc.org/NOAA/G02135/seaice_analysis/Sea_Ice_Index_Monthly_Data_by_Year_G02135_v3.0.xlsx + date_accessed: '2024-09-30' + date_published: '2024-09-29' + license: + name: CC BY 4.0 +outs: + - md5: 09225afbd376e58037d3b3bfcbe7df96 + size: 25316 + path: sea_ice_index.xlsx diff --git a/snapshots/climate/2024-09-30/sea_surface_temperature_northern_hemisphere.csv.dvc b/snapshots/climate/2024-09-30/sea_surface_temperature_northern_hemisphere.csv.dvc new file mode 100644 index 00000000000..0113b45e878 --- /dev/null +++ b/snapshots/climate/2024-09-30/sea_surface_temperature_northern_hemisphere.csv.dvc @@ -0,0 +1,26 @@ +meta: + origin: + producer: Met Office Hadley Centre + title: Hadley Centre's Sea Surface Temperature (HadSST) + title_snapshot: Hadley Centre's Sea Surface Temperature (HadSST) - Northern hemisphere + citation_full: |- + Met Office Hadley Centre - Hadley Centre's Sea Surface Temperature (HadSST). + + Kennedy, J. J., Rayner, N. A., Atkinson, C. P., & Killick, R. + E. (2019). An ensemble data set of sea-surface temperature change from 1850: + the Met Office Hadley Centre HadSST.4.0.0.0 data set. Journal of Geophysical + Research: Atmospheres, 124. https://doi.org/10.1029/2018JD029867 + attribution: Met Office Hadley Centre - Hadley Centre's Sea Surface Temperature (HadSST) (2024) + attribution_short: Met Office + version_producer: 4.0.1.0 + url_main: https://www.metoffice.gov.uk/hadobs/hadsst4/ + url_download: https://www.metoffice.gov.uk/hadobs/hadsst4/data/csv/HadSST.4.0.1.0_monthly_NHEM.csv + date_accessed: '2024-09-30' + date_published: '2024-09-26' + license: + name: Open Government Licence v3 + url: https://www.metoffice.gov.uk/hadobs/hadsst4/data/download.html +outs: + - md5: 58e4bd0d122b2eaf2a4e27cf5cac2eed + size: 153174 + path: sea_surface_temperature_northern_hemisphere.csv diff --git a/snapshots/climate/2024-09-30/sea_surface_temperature_southern_hemisphere.csv.dvc b/snapshots/climate/2024-09-30/sea_surface_temperature_southern_hemisphere.csv.dvc new file mode 100644 index 00000000000..1c266ffe5d0 --- /dev/null +++ b/snapshots/climate/2024-09-30/sea_surface_temperature_southern_hemisphere.csv.dvc @@ -0,0 +1,26 @@ +meta: + origin: + producer: Met Office Hadley Centre + title: Hadley Centre's Sea Surface Temperature (HadSST) + title_snapshot: Hadley Centre's Sea Surface Temperature (HadSST) - Southern hemisphere + citation_full: |- + Met Office Hadley Centre - Hadley Centre's Sea Surface Temperature (HadSST). + + Kennedy, J. J., Rayner, N. A., Atkinson, C. P., & Killick, R. + E. (2019). An ensemble data set of sea-surface temperature change from 1850: + the Met Office Hadley Centre HadSST.4.0.0.0 data set. Journal of Geophysical + Research: Atmospheres, 124. https://doi.org/10.1029/2018JD029867 + attribution: Met Office Hadley Centre - Hadley Centre's Sea Surface Temperature (HadSST) (2024) + attribution_short: Met Office + version_producer: 4.0.1.0 + url_main: https://www.metoffice.gov.uk/hadobs/hadsst4/ + url_download: https://www.metoffice.gov.uk/hadobs/hadsst4/data/csv/HadSST.4.0.1.0_monthly_SHEM.csv + date_accessed: '2024-09-30' + date_published: '2024-09-26' + license: + name: Open Government Licence v3 + url: https://www.metoffice.gov.uk/hadobs/hadsst4/data/download.html +outs: + - md5: fe1c77ed38b002ce86cb062d4f33f0f9 + size: 153770 + path: sea_surface_temperature_southern_hemisphere.csv diff --git a/snapshots/climate/2024-09-30/sea_surface_temperature_world.csv.dvc b/snapshots/climate/2024-09-30/sea_surface_temperature_world.csv.dvc new file mode 100644 index 00000000000..ba7b08650b7 --- /dev/null +++ b/snapshots/climate/2024-09-30/sea_surface_temperature_world.csv.dvc @@ -0,0 +1,26 @@ +meta: + origin: + producer: Met Office Hadley Centre + title: Hadley Centre's Sea Surface Temperature (HadSST) + title_snapshot: Hadley Centre's Sea Surface Temperature (HadSST) - World + citation_full: |- + Met Office Hadley Centre - Hadley Centre's Sea Surface Temperature (HadSST). + + Kennedy, J. J., Rayner, N. A., Atkinson, C. P., & Killick, R. + E. (2019). An ensemble data set of sea-surface temperature change from 1850: + the Met Office Hadley Centre HadSST.4.0.0.0 data set. Journal of Geophysical + Research: Atmospheres, 124. https://doi.org/10.1029/2018JD029867 + attribution: Met Office Hadley Centre - Hadley Centre's Sea Surface Temperature (HadSST) (2024) + attribution_short: Met Office + version_producer: 4.0.1.0 + url_main: https://www.metoffice.gov.uk/hadobs/hadsst4/ + url_download: https://www.metoffice.gov.uk/hadobs/hadsst4/data/csv/HadSST.4.0.1.0_monthly_GLOBE.csv + date_accessed: '2024-09-30' + date_published: '2024-09-26' + license: + name: Open Government Licence v3 + url: https://www.metoffice.gov.uk/hadobs/hadsst4/data/download.html +outs: + - md5: 5d5dd43671fb8c72a6561fe3c4c3a295 + size: 153663 + path: sea_surface_temperature_world.csv diff --git a/snapshots/climate/2024-09-30/snow_cover_extent_north_america.csv.dvc b/snapshots/climate/2024-09-30/snow_cover_extent_north_america.csv.dvc new file mode 100644 index 00000000000..aeb1da811d8 --- /dev/null +++ b/snapshots/climate/2024-09-30/snow_cover_extent_north_america.csv.dvc @@ -0,0 +1,22 @@ +meta: + origin: + producer: Rutgers University Global Snow Lab + title: Snow Cover Extent + title_snapshot: Area of Snow Extent - North America (including Greenland) + citation_full: |- + Rutgers University Global Snow Lab - Area of Snow Extent. + + Robinson, David A., Estilow, Thomas W., and NOAA CDR Program (2012): NOAA Climate Data Record (CDR) of Northern Hemisphere (NH) Snow Cover Extent (SCE), Version 1. NOAA National Centers for Environmental Information. doi: 10.7289/V5N014G9 + attribution: Rutgers University Global Snow Lab - Snow Cover Extent (2024) + attribution_short: Rutgers + version_producer: Version 1 + url_main: https://climate.rutgers.edu/snowcover/table_area.php?ui_set=1&ui_sort=0 + url_download: https://climate.rutgers.edu/snowcover/files/moncov.namgnld.txt + date_accessed: '2024-09-30' + date_published: '2024-09-30' + license: + name: CC BY 4.0 +outs: + - md5: f36b06680b32bea93622889843190ba5 + size: 12690 + path: snow_cover_extent_north_america.csv diff --git a/snapshots/climate/2024-09-30/snow_cover_extent_northern_hemisphere.csv.dvc b/snapshots/climate/2024-09-30/snow_cover_extent_northern_hemisphere.csv.dvc new file mode 100644 index 00000000000..0a812aec87b --- /dev/null +++ b/snapshots/climate/2024-09-30/snow_cover_extent_northern_hemisphere.csv.dvc @@ -0,0 +1,22 @@ +meta: + origin: + producer: Rutgers University Global Snow Lab + title: Snow Cover Extent + title_snapshot: Area of Snow Extent - Northern Hemisphere + citation_full: |- + Rutgers University Global Snow Lab - Area of Snow Extent. + + Robinson, David A., Estilow, Thomas W., and NOAA CDR Program (2012): NOAA Climate Data Record (CDR) of Northern Hemisphere (NH) Snow Cover Extent (SCE), Version 1. NOAA National Centers for Environmental Information. doi: 10.7289/V5N014G9 + attribution: Rutgers University Global Snow Lab - Snow Cover Extent (2024) + attribution_short: Rutgers + version_producer: Version 1 + url_main: https://climate.rutgers.edu/snowcover/table_area.php?ui_set=1&ui_sort=0 + url_download: https://climate.rutgers.edu/snowcover/files/moncov.nhland.txt + date_accessed: '2024-09-30' + date_published: '2024-09-30' + license: + name: CC BY 4.0 +outs: + - md5: 2db9a4b2f070fad779235b809bd398e9 + size: 12813 + path: snow_cover_extent_northern_hemisphere.csv