First stab at event-set flood damages

nismod · Feb 7, 2024 · 8bdfe19 · 8bdfe19
1 parent a8a60b9
commit 8bdfe19
Show file tree

Hide file tree

Showing 4 changed files with 199 additions and 3 deletions.
diff --git a/config/config.yaml b/config/config.yaml
@@ -10,11 +10,13 @@ hazard_datasets:
   #aqueduct-coast: 'config/hazard_resource_locations/aqueduct-coast_wtsub_perc95.txt'
   #aqueduct-river: 'config/hazard_resource_locations/aqueduct-river_rcp4p5_MIROC-ESM-CHEM_2030_tifs.txt'  # subset of aqueduct_tifs.txt
   aqueduct-river: 'config/hazard_resource_locations/aqueduct-river.txt'
+  jba-event: 'config/hazard_resource_locations/jba-events.txt'
 
 # TODO: have hazards as instances of some kind of struct rather than several dicts?
 hazard_types:
   aqueduct-coast: 'flood'
   aqueduct-river: 'flood'
+  jba-event: 'flood'
 
 # OSM datasets in PBF format, principally from: https://download.geofabrik.de/ #
 infrastructure_datasets:
@@ -35,6 +37,7 @@ infrastructure_datasets:
   jamaica-latest: 'http://download.geofabrik.de/central-america/jamaica-latest.osm.pbf'
   kenya-latest: 'http://download.geofabrik.de/africa/kenya-latest.osm.pbf'
   tanzania-latest: 'https://download.geofabrik.de/africa/tanzania-latest.osm.pbf'
+  thailand-latest: 'https://download.geofabrik.de/asia/thailand-latest.osm.pbf'
   wales-latest: 'https://download.geofabrik.de/europe/great-britain/wales-latest.osm.pbf'
   # small extract for testing purposes
   tanzania-mini: 'https://raw.githubusercontent.com/mjaquiery/aqueduct/main/tanzania-mini.osm.pbf'
@@ -52,7 +55,7 @@ keep_tags:
   rail: ['railway', 'bridge', 'name']
 
 # Number of slices to cut dataset into -- must be a square number
-slice_count: 1
+slice_count: 64
 
 # CRS OSM uses
 osm_epsg: 4326

diff --git a/workflow/transport-flood/event_set_direct_damages.py b/workflow/transport-flood/event_set_direct_damages.py
@@ -0,0 +1,172 @@
+"""
+Given an exposure estimate and some damage curves, calculate the damage
+fraction for exposed assets.
+"""
+
+from collections import defaultdict
+import logging
+import sys
+import warnings
+
+import geopandas as gpd
+import pandas as pd
+from scipy.interpolate import interp1d
+from scipy.integrate import simpson
+
+from open_gira import fields
+from open_gira.io import write_empty_frames, read_damage_curves
+from open_gira.utils import natural_sort
+
+
+if __name__ == "__main__":
+
+    try:
+        unsplit_path: str = snakemake.input["unsplit"]
+        exposure_path: str = snakemake.input["exposure"]
+        damage_fraction_path: str = snakemake.output["damage_fraction"]
+        damage_cost_path: str = snakemake.output["damage_cost"]
+        damage_curves_dir: str = snakemake.config["direct_damages"]["curves_dir"]
+        network_type: str = snakemake.params["network_type"]
+        hazard_type: str = snakemake.params["hazard_type"]
+        asset_types: set[str] = set(snakemake.config["direct_damages"]["asset_types"])
+    except NameError:
+        raise ValueError("Must be run via snakemake.")
+
+    OUTPUT_FILE_PATHS: tuple[str] = (
+        damage_fraction_path,
+        damage_cost_path,
+    )
+
+    logging.basicConfig(format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO)
+
+    # Ignore geopandas parquet implementation warnings
+    # NB though that .geoparquet is not the format to use for archiving.
+    warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*")
+
+    # load curves first so if we fail here, we've failed early
+    # and we don't try and load the (potentially large) exposure file
+    damage_curves = read_damage_curves(damage_curves_dir, hazard_type, asset_types)
+    logging.info(f"Available damage curves: {damage_curves.keys()}")
+
+    logging.info("Reading exposure (network/raster intersection) data")
+    exposure: gpd.GeoDataFrame = gpd.read_parquet(exposure_path)
+    logging.info(f"{exposure.shape=}")
+
+    if exposure.empty:
+        logging.info("No data in geometry column, writing empty files.")
+
+        # snakemake requires that output files exist, even if empty
+        for path in OUTPUT_FILE_PATHS:
+            write_empty_frames(path)
+        sys.exit(0)  # exit gracefully so snakemake will continue
+
+    # column groupings for data selection
+    hazard_columns = [col for col in exposure.columns if col.startswith(fields.HAZARD_PREFIX)]
+    non_hazard_columns = list(set(exposure.columns) - set(hazard_columns))
+
+    ##########################################################
+    # DAMAGE FRACTIONS (per split geometry, for all rasters) #
+    ##########################################################
+
+    # calculate damages for assets we have damage curves for
+    damage_fraction_by_asset_type = []
+    logging.info(f"Exposed assets {set(exposure.asset_type)}")
+    for asset_type in natural_sort(set(exposure.asset_type) & set(damage_curves.keys())):
+
+        logging.info(f"Processing {asset_type=}")
+        damage_curve: pd.DataFrame = damage_curves[asset_type]
+
+        # pick out rows of asset type and columns of hazard intensity
+        asset_type_mask: gpd.GeoDataFrame = exposure.asset_type == asset_type
+        asset_exposure: pd.DataFrame = pd.DataFrame(exposure.loc[asset_type_mask, hazard_columns])
+
+        # create interpolated damage curve for given asset type
+        hazard_intensity, damage_fraction = damage_curve.iloc[:, 0], damage_curve.iloc[:, 1]
+        # if curve of length n, where x < x_0, y = y_0 and where x > x_n, y = y_n
+        bounds = tuple(f(damage_fraction) for f in (min, max))
+        interpolated_damage_curve = interp1d(
+            hazard_intensity,
+            damage_fraction,
+            kind='linear',
+            fill_value=bounds,
+            bounds_error=False
+        )
+
+        # apply damage_curve function to exposure table
+        # the return value of interpolated_damage_curve is a numpy array
+        logging.info("Calculating damage fractions")
+        damage_fraction_for_asset_type = pd.DataFrame(
+            interpolated_damage_curve(asset_exposure),
+            index=asset_exposure.index,
+            columns=asset_exposure.columns
+        )
+
+        # store the computed direct damages and any columns we started with
+        # (other than exposure)
+        damage_fraction_by_asset_type.append(
+            pd.concat(
+                [
+                    damage_fraction_for_asset_type,
+                    exposure.loc[asset_type_mask, non_hazard_columns]
+                ],
+                axis="columns"
+            )
+        )
+
+    # concatenate damage fractions for different asset types into single dataframe
+    damage_fraction: gpd.GeoDataFrame = gpd.GeoDataFrame(pd.concat(damage_fraction_by_asset_type))
+
+    ###################################################################
+    # DAMAGE COST (for split, then grouped geometry, for all rasters) #
+    ###################################################################
+
+    # multiply the damage fraction estimates by a cost to rebuild the asset
+    # units are: 1 * USD/km * km = USD
+    logging.info("Calculating direct damage costs")
+    direct_damages_only = damage_fraction[hazard_columns] \
+        .multiply(damage_fraction[fields.REHAB_COST], axis="index") \
+        .multiply(damage_fraction[fields.SPLIT_LENGTH], axis="index")
+
+    logging.info("Reading raw network data for unsplit geometry")
+    unsplit: gpd.GeoDataFrame = gpd.read_parquet(unsplit_path)
+    logging.info(f"{unsplit.shape=}")
+
+    # join the other fields with the direct damage estimates
+    logging.info("Unifying rasterised segments and summing damage costs")
+
+    # grouping on edge_id, sum all direct damage estimates to give a total dollar cost per edge
+    direct_damages = pd.concat(
+        [direct_damages_only, damage_fraction["edge_id"]],
+        axis="columns"
+    ).set_index("edge_id")
+    grouped_direct_damages = direct_damages.groupby(direct_damages.index).sum()
+
+    #########################################
+    # JOINING, VALIDATION AND SERIALIZATION #
+    #########################################
+
+    # lose columns like "cell_indicies" or rastered length measures that are specific to _rastered_ edges
+    non_hazard_output_columns = list(set(non_hazard_columns) & set(unsplit.columns))
+    unsplit_subset = unsplit[non_hazard_output_columns].set_index("edge_id", drop=False)
+
+    # rejoin direct damage cost estimates with geometry and metadata columns and write to disk
+    # join on 'right' / grouped_direct_damages index to only keep rows we have damages for
+    direct_damages = unsplit_subset.join(grouped_direct_damages, validate="one_to_one", how="right")
+    direct_damages["edge_id"] = direct_damages.index
+    # we may not have calculated damages for every possible asset_type
+    assert len(direct_damages) <= len(unsplit_subset)
+    assert "edge_id" in direct_damages.columns
+
+    # damage_fraction is on the split geometries, will have more rows
+    assert len(damage_fraction) >= len(direct_damages)
+
+    for dataframe in (damage_fraction, direct_damages):
+        assert "edge_id" in dataframe
+
+    logging.info(f"Writing out {damage_fraction.shape=} (per split geometry, hazard RP map)")
+    damage_fraction.to_parquet(damage_fraction_path)
+
+    logging.info(f"Writing out {direct_damages.shape=} (per unified geometry, hazard RP map)")
+    direct_damages.to_parquet(damage_cost_path)
+
+    logging.info("Done calculating direct damages")
diff --git a/workflow/transport-flood/flood_damages.smk b/workflow/transport-flood/flood_damages.smk
@@ -4,7 +4,7 @@ Damage fraction is a function of hazard intensity (expressed as damage curves)
 """
 
 
-rule direct_damages:
+rule return_period_direct_damages:
     input:
         unsplit = rules.create_transport_network.output.edges,  # for pre-intersection geometry
         exposure = rules.rasterise_osm_network.output.geoparquet
@@ -19,7 +19,7 @@ rule direct_damages:
         # determine the hazard type from the hazard slug, e.g. flood, earthquake, storm
         hazard_type=lambda wildcards: config["hazard_types"][wildcards.HAZARD_SLUG.replace('hazard-', '')]
     script:
-        "./direct_damages.py"
+        "./return_period_direct_damages.py"
 
 """
 Test with:
@@ -39,3 +39,24 @@ rule plot_damage_distributions:
 Test with:
 snakemake --cores 1 results/egypt-latest_filter-road/hazard-aqueduct-river/damage_fraction_plots
 """
+
+
+rule event_set_direct_damages:
+    input:
+        unsplit = rules.create_transport_network.output.edges,  # for pre-intersection geometry
+        exposure = rules.rasterise_osm_network.output.geoparquet
+    output:
+        damage_fraction = "{OUTPUT_DIR}/direct_damages/{DATASET}_{FILTER_SLUG}/{HAZARD_SLUG}/fraction/{SLICE_SLUG}.geoparquet",
+        damage_cost = "{OUTPUT_DIR}/direct_damages/{DATASET}_{FILTER_SLUG}/{HAZARD_SLUG}/cost/{SLICE_SLUG}.geoparquet",
+    params:
+        # determine the network type from the filter, e.g. road, rail
+        network_type=lambda wildcards: wildcards.FILTER_SLUG.replace('filter-', ''),
+        # determine the hazard type from the hazard slug, e.g. flood, earthquake, storm
+        hazard_type=lambda wildcards: config["hazard_types"][wildcards.HAZARD_SLUG.replace('hazard-', '')]
+    script:
+        "./event_set_direct_damages.py"
+
+"""
+Test with:
+snakemake --cores 1 results/direct_damages/thailand-latest_filter-road/hazard-jba-event/EAD_and_cost_per_RP/slice-5.geoparquet
+"""
diff --git a/workflow/transport-flood/direct_damages.py → ...ort-flood/return_period_direct_damages.py b/workflow/transport-flood/direct_damages.py → ...ort-flood/return_period_direct_damages.py