fix geopandas without fiona

CIROH-UA · Sep 5, 2024 · 910cfb3 · 910cfb3
1 parent 25948fa
commit 910cfb3
Show file tree

Hide file tree

Showing 2 changed files with 45 additions and 40 deletions.
diff --git a/modules/data_processing/gpkg_utils.py b/modules/data_processing/gpkg_utils.py
@@ -8,6 +8,8 @@
 import struct
 import geopandas as gpd
 from pathlib import Path
+import pyproj
+from shapely.ops import transform
 
 logger = logging.getLogger(__name__)
 
@@ -38,7 +40,6 @@ def verify_indices(gpkg: str = file_paths.conus_hydrofabric()) -> None:
             con.commit()
     con.close()
 
-
 def create_empty_gpkg(gpkg: str) -> None:
     """
     Create an empty geopackage with the necessary tables and indices.
@@ -121,6 +122,19 @@ def blob_to_centre_point(blob: bytes) -> Point:
     return Point(x, y)
 
 
+def convert_to_5070(shapely_geometry):
+    # convert to web mercator
+    if shapely_geometry.is_empty:
+        return shapely_geometry
+    source_crs = pyproj.CRS("EPSG:4326")
+    target_crs = pyproj.CRS("EPSG:5070")
+    project = pyproj.Transformer.from_crs(source_crs, target_crs, always_xy=True).transform
+    new_geometry = transform(project, shapely_geometry)
+    logger.debug(f" new geometry: {new_geometry}")
+    logger.debug(f"old geometry: {shapely_geometry}")
+    return new_geometry
+
+
 def get_catid_from_point(coords):
     """
     Retrieves the watershed boundary ID (catid) of the watershed that contains the given point.
@@ -138,12 +152,24 @@ def get_catid_from_point(coords):
     """
     logger.info(f"Getting catid for {coords}")
     q = file_paths.conus_hydrofabric()
-    d = {"col1": ["point"], "geometry": [Point(coords["lng"], coords["lat"])]}
-    point = gpd.GeoDataFrame(d, crs="EPSG:4326")
-    df = gpd.read_file(q, format="GPKG", layer="divides", mask=point)
-    if df.empty:
+    point = Point(coords["lng"], coords["lat"])
+    point = convert_to_5070(point)
+    with sqlite3.connect(q) as con:
+        sql = f"""SELECT DISTINCT d.divide_id, d.geom
+                FROM divides d
+                JOIN rtree_divides_geom r ON d.fid = r.id
+                WHERE r.minx <= {point.x} AND r.maxx >= {point.x}
+                AND r.miny <= {point.y} AND r.maxy >= {point.y}"""
+        results = con.execute(sql).fetchall()
+    if len(results) == 0:
         raise IndexError(f"No watershed boundary found for {coords}")
-    return df["id"].values[0]
+    if len(results) > 1:
+        # check the geometries to see which one contains the point
+        for result in results:
+            geom = blob_to_geometry(result[1])
+            if geom.contains(point):
+                return result[0]
+    return results[0][0]
 
 
 def create_rTree_table(table: str, con: sqlite3.Connection) -> None:

diff --git a/modules/map_app/views.py b/modules/map_app/views.py
@@ -39,19 +39,6 @@ def index():
     return render_template("index.html")
 
 
-def get_catid_from_point(coords):
-    # inpute coords are EPSG:4326
-    logger.info(coords)
-    # takes a point and returns the catid of the watershed it is in
-    # create a geometry mask for the point
-    # load the watershed boundaries
-    q = Path(__file__).parent.parent / "data_sources" / "conus.gpkg"
-    d = {"col1": ["point"], "geometry": [Point(coords["lng"], coords["lat"])]}
-    point = gpd.GeoDataFrame(d, crs="EPSG:4326")
-    df = gpd.read_file(q, format="GPKG", layer="divides", mask=point)
-    return df["divide_id"].values[0]
-
-
 @main.route("/handle_map_interaction", methods=["POST"])
 def handle_map_interaction():
     data = request.get_json()
@@ -99,28 +86,20 @@ def get_map_data():
 
 
 def catids_to_geojson(cat_dict):
+    # if the cat id is added to the dict by clicking on the map, it will have coodinates
+    # if it was added using the select by cat_id box, the coordinates are 0 0
+    # if we just use the name this doesn't matter
     for k, v in cat_dict.items():
-        if v[0] != 0 and v[1] != 0:
-            # assuming this was called by clicking on the map
-            cat_dict[k] = Point(v[1], v[0])
-        else:
-            # this was called without knowing the coordinates
-            # use sql to get the geometry
-            conn = sqlite3.connect(file_paths.conus_hydrofabric())
-            c = conn.cursor()
-            c.execute(f"SELECT geom FROM divides WHERE id = '{k}'")
-            result = c.fetchone()
-            if result is not None:
-                cat_dict[k] = convert_to_4326(blob_to_geometry(result[0])).buffer(-0.0001)
-
-    d = {"col1": cat_dict.keys(), "geometry": cat_dict.values()}
-    points = gpd.GeoDataFrame(d, crs="EPSG:4326")
-    logger.debug(points)
-    q = Path(__file__).parent.parent / "data_sources" / "conus.gpkg"
-    df = gpd.read_file(q, format="GPKG", layer="divides", mask=points)
-    # convert crs to 4326
-    df = df.to_crs(epsg=4326)
-    return df.to_json()
+        # use sql to get the geometry
+        conn = sqlite3.connect(file_paths.conus_hydrofabric())
+        c = conn.cursor()
+        c.execute(f"SELECT geom FROM divides WHERE divide_id = '{k}'")
+        result = c.fetchone()
+        if result is not None:
+            cat_dict[k] = convert_to_4326(blob_to_geometry(result[0])).buffer(-0.0001)
+    df = {"col1": cat_dict.keys(), "geometry": cat_dict.values()}
+    gdf = gpd.GeoDataFrame(df, crs="EPSG:4326")
+    return gdf.to_json()
 
 
 @main.route("/get_geojson_from_catids", methods=["POST"])