From 5d52aeedb3101b598887cea25a6e2f50a354151b Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Sun, 4 Feb 2024 18:02:15 +1000
Subject: [PATCH 01/32] OSM Geo-processing using GeoPandas

---
 aequilibrae/parameters.yml                    |  24 -
 aequilibrae/project/network/network.py        |   8 +-
 .../network/osm/model_area_gridding.py        |  29 ++
 .../project/network/osm/osm_builder.py        | 419 ++++++++----------
 .../project/network/osm/osm_downloader.py     |  39 +-
 aequilibrae/utils/db_utils.py                 |   4 +
 pyproject.toml                                |   2 +-
 requirements.txt                              |   3 +-
 8 files changed, 265 insertions(+), 263 deletions(-)
 create mode 100644 aequilibrae/project/network/osm/model_area_gridding.py

diff --git a/aequilibrae/parameters.yml b/aequilibrae/parameters.yml
index d7fe1be3e..9529aa360 100644
--- a/aequilibrae/parameters.yml
+++ b/aequilibrae/parameters.yml
@@ -42,30 +42,6 @@ network:
           description: name
           osm_source: name
           type: text
-      - cycleway:
-          description: cycleway, both way
-          osm_source: cycleway
-          type: text
-      - cycleway_right:
-          description: cycleway, right
-          osm_source: cycleway:right
-          type: text
-      - cycleway_left:
-          description: cycleway, left
-          osm_source: cycleway:left
-          type: text
-      - busway:
-          description: busway
-          osm_source: busway
-          type: text
-      - busway_right:
-          description: busway, right
-          osm_source: busway:right
-          type: text
-      - busway_left:
-          description: busway, left
-          osm_source: busway:left
-          type: text            
       two-way:
       - lanes:
           description: lanes
diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py
index b9d5f8115..07e03ee87 100644
--- a/aequilibrae/project/network/network.py
+++ b/aequilibrae/project/network/network.py
@@ -229,14 +229,14 @@ def create_from_osm(
                     if subarea.intersects(model_area):
                         polygons.append(subarea)
         self.logger.info("Downloading data")
-        self.downloader = OSMDownloader(polygons, modes, logger=self.logger)
+        dwnloader = OSMDownloader(polygons, modes, logger=self.logger)
         if pyqt:
-            self.downloader.downloading.connect(self.signal_handler)
+            dwnloader.downloading.connect(self.signal_handler)
 
-        self.downloader.doWork()
+        dwnloader.doWork()
 
         self.logger.info("Building Network")
-        self.builder = OSMBuilder(self.downloader.json, project=self.project, model_area=model_area)
+        self.builder = OSMBuilder(dwnloader.data, project=self.project, model_area=model_area)
 
         if pyqt:
             self.builder.building.connect(self.signal_handler)
diff --git a/aequilibrae/project/network/osm/model_area_gridding.py b/aequilibrae/project/network/osm/model_area_gridding.py
new file mode 100644
index 000000000..3f58a21b0
--- /dev/null
+++ b/aequilibrae/project/network/osm/model_area_gridding.py
@@ -0,0 +1,29 @@
+# Inspired by https://www.matecdev.com/posts/shapely-polygon-gridding.html
+import numpy as np
+from shapely.geometry import Polygon
+import geopandas as gpd
+
+
+def geometry_grid(model_area, srid) -> gpd.GeoDataFrame:
+    minx, miny, maxx, maxy = model_area.bounds
+    subd = min(0.01, abs(maxy - miny) / 3, abs(maxx - minx) / 3)
+    space_x = int((maxx - minx) / subd)
+    space_y = int((maxy - miny) / subd)
+    combx, comby = np.linspace(minx, maxx, space_x), np.linspace(miny, maxy, space_y)
+    elements = []
+    for i in range(len(combx) - 1):
+        for j in range(len(comby) - 1):
+            elements.append(
+                Polygon(
+                    [
+                        [combx[i], comby[j]],
+                        [combx[i], comby[j + 1]],
+                        [combx[i + 1], comby[j + 1]],
+                        [combx[i + 1], comby[j]],
+                    ]
+                )
+            )
+
+    gdf = gpd.GeoDataFrame({"id": np.arange(len(elements))}, geometry=elements, crs=srid)
+
+    return gdf.clip(model_area)
diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index ecc10c222..103c8e755 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -1,20 +1,21 @@
 import gc
 import importlib.util as iutil
 import string
-from typing import List
+from math import floor
+from pathlib import Path
 
+import geopandas as gpd
 import numpy as np
 import pandas as pd
-from shapely import Point
-from shapely.geometry import Polygon
+import shapely.wkb
+from shapely.geometry import Polygon, LineString
 
 from aequilibrae.context import get_active_project
 from aequilibrae.parameters import Parameters
-from aequilibrae.project.network.haversine import haversine
-from aequilibrae.project.network.link_types import LinkTypes
 from aequilibrae.utils import WorkerThread
-from aequilibrae.utils.db_utils import commit_and_close
+from aequilibrae.utils.db_utils import commit_and_close, read_and_close, list_columns
 from aequilibrae.utils.spatialite_utils import connect_spatialite
+from .model_area_gridding import geometry_grid
 
 pyqt = iutil.find_spec("PyQt5") is not None
 if pyqt:
@@ -28,23 +29,37 @@ class OSMBuilder(WorkerThread):
     if pyqt:
         building = pyqtSignal(object)
 
-    def __init__(self, osm_items: List, project, model_area: Polygon) -> None:
+    def __init__(self, data, project, model_area: Polygon) -> None:
         WorkerThread.__init__(self, None)
+
+        project.logger.info("Preparing OSM builder")
+        self.__emit_all(["text", "Preparing OSM builder"])
+
         self.project = project or get_active_project()
         self.logger = self.project.logger
-        self.osm_items = osm_items
-        self.model_area = model_area
+        self.model_area = geometry_grid(model_area, 4326)
         self.path = self.project.path_to_file
         self.node_start = 10000
-        self.__link_types = None  # type: LinkTypes
         self.report = []
-        self.__model_link_types = []
-        self.__model_link_type_ids = []
-        self.__link_type_quick_reference = {}
-        self.nodes = {}
-        self.node_df = []
-        self.links = {}
-        self.insert_qry = """INSERT INTO {} ({}, geometry) VALUES({}, GeomFromText(?, 4326))"""
+        self.__all_ltp = pd.DataFrame([])
+        self.__link_id = 1
+        self.__valid_links = {"link_id": [], "nodes": []}
+
+        nodes = (
+            data["nodes"]
+            .assign(
+                is_centroid=0,
+                modes="",
+                link_types="",
+                node_id=np.arange(data["nodes"].shape[0]) + self.node_start,
+            )
+            .reset_index(drop=True)
+        )
+        self.node_df = gpd.GeoDataFrame(nodes, geometry=gpd.points_from_xy(nodes.lon, nodes.lat), crs=4326)
+        del nodes
+        del data["nodes"]
+        gc.collect()
+        self.links_df = data["links"]
 
     def __emit_all(self, *args):
         if pyqt:
@@ -52,169 +67,128 @@ def __emit_all(self, *args):
 
     def doWork(self):
         with commit_and_close(connect_spatialite(self.path)) as conn:
-            self.__worksetup()
             node_count = self.data_structures()
-            self.importing_links(node_count, conn)
+            self.importing_network(node_count, conn)
             conn.execute(
                 "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)"
             )
         self.__emit_all(["finished_threaded_procedure", 0])
 
     def data_structures(self):
-        self.logger.info("Separating nodes and links")
-        self.__emit_all(["text", "Separating nodes and links"])
-        self.__emit_all(["maxValue", len(self.osm_items)])
-
-        alinks = []
-        n = []
-        tot_items = len(self.osm_items)
-        # When downloading data for entire countries, memory consumption can be quite intensive
-        # So we get rid of everything we don't need
-        for i in range(tot_items, 0, -1):
-            item = self.osm_items.pop(-1)
-            if item["type"] == "way":
-                alinks.append(item)
-            elif item["type"] == "node":
-                n.append(item)
-            self.__emit_all(["Value", tot_items - i])
-        gc.collect()
-
         self.logger.info("Setting data structures for nodes")
-        self.__emit_all(["text", "Setting data structures for nodes"])
-        self.__emit_all(["maxValue", len(n)])
-
-        self.node_df = []
-        for i, node in enumerate(n):
-            nid = node.pop("id")
-            _ = node.pop("type")
-            node["node_id"] = i + self.node_start
-            node["inside_model"] = self.model_area.contains(Point(node["lon"], node["lat"]))
-            self.nodes[nid] = node
-            self.node_df.append([node["node_id"], nid, node["lon"], node["lat"]])
-            self.__emit_all(["Value", i])
-        del n
-        self.node_df = (
-            pd.DataFrame(self.node_df, columns=["A", "B", "C", "D"])
-            .drop_duplicates(subset=["C", "D"])
-            .to_records(index=False)
-        )
-
-        self.logger.info("Setting data structures for links")
-        self.__emit_all(["text", "Setting data structures for links"])
-        self.__emit_all(["maxValue", len(alinks)])
 
-        all_nodes = []
-        for i, link in enumerate(alinks):
-            osm_id = link.pop("id")
-            _ = link.pop("type")
-            all_nodes.extend(link["nodes"])
-            self.links[osm_id] = link
-            self.__emit_all(["Value", i])
-        del alinks
+        aux = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count()
 
         self.logger.info("Finalizing data structures")
         self.__emit_all(["text", "Finalizing data structures"])
+        return aux
 
-        node_count = self.unique_count(np.array(all_nodes))
-
-        return node_count
-
-    def importing_links(self, node_count, conn):
-        node_ids = {}
-
-        vars = {}
-        vars["link_id"] = 1
-        table = "links"
-        fields = self.get_link_fields()
+    def importing_network(self, node_count, conn):
         self.__update_table_structure(conn)
-        field_names = ",".join(fields)
 
         self.logger.info("Adding network nodes")
-        self.__emit_all(["text", "Adding network nodes"])
-        sql = "insert into nodes(node_id, is_centroid, osm_id, geometry) Values(?, 0, ?, MakePoint(?,?, 4326))"
-        conn.executemany(sql, self.node_df)
         conn.commit()
-        del self.node_df
+        self.__emit_all(["text", "Adding network nodes"])
+        self.node_df.osm_id = self.node_df.osm_id.astype(np.int64)
+        self.node_df.set_index(["osm_id"], inplace=True)
 
-        self.logger.info("Adding network links")
+        self.logger.info("Creating necessary link types")
+        self.__emit_all(["text", "Creating necessary link types"])
+        self.__build_link_types()
+        shape_ = self.links_df.shape[0]
+        message_step = floor(shape_ / 100)
+        self.__emit_all(["maxValue", shape_])
+
+        self.establish_modes_for_all_links(conn)
+        self.process_link_attributes()
+
+        final_links = []
+        self.logger.info("Geo-procesing links")
         self.__emit_all(["text", "Adding network links"])
-        L = len(list(self.links.keys()))
-        self.__emit_all(["maxValue", L])
-
-        counter = 0
-        mode_codes, not_found_tags = self.modes_per_link_type(conn)
-        owf, twf = self.field_osm_source()
-        all_attrs = []
-        all_osm_ids = list(self.links.keys())
-        for osm_id in all_osm_ids:
-            link = self.links.pop(osm_id)
+        for counter, (idx, link) in enumerate(self.links_df.iterrows()):
             self.__emit_all(["Value", counter])
-            counter += 1
-            if counter % 1000 == 0:
-                self.logger.info(f"Creating segments from {counter:,} out of {L:,} OSM link objects")
-            vars["osm_id"] = osm_id
-            vars["link_type"] = "default"
-            linknodes = link["nodes"]
-            linktags = link["tags"]
-
-            indices = np.searchsorted(node_count[:, 0], linknodes)
-            nodedegree = node_count[indices, 1]
-
-            # Makes sure that beginning and end are end nodes for a link
-            nodedegree[0] = 2
-            nodedegree[-1] = 2
-
-            intersections = np.where(nodedegree > 1)[0]
-            segments = intersections.shape[0] - 1
-
-            # Attributes that are common to all individual links/segments
-            vars["direction"] = (linktags.get("oneway") == "yes") * 1
-
-            for k, v in owf.items():
-                vars[k] = linktags.get(v)
-
-            for k, v in twf.items():
-                val = linktags.get(v["osm_source"])
-                if vars["direction"] == 0:
-                    for d1, d2 in [("ab", "forward"), ("ba", "backward")]:
-                        vars[f"{k}_{d1}"] = self.__get_link_property(d2, val, linktags, v)
-                elif vars["direction"] == -1:
-                    vars[f"{k}_ba"] = linktags.get(f"{v['osm_source']}:{'backward'}", val)
-                elif vars["direction"] == 1:
-                    vars[f"{k}_ab"] = linktags.get(f"{v['osm_source']}:{'forward'}", val)
-
-            vars["modes"] = mode_codes.get(linktags.get("highway"), not_found_tags)
-
-            vars["link_type"] = self.__link_type_quick_reference.get(
-                vars["link_type"].lower(), self.__repair_link_type(vars["link_type"])
-            )
+            if counter % message_step == 0:
+                self.logger.info(f"Creating segments from {counter:,} out of {shape_ :,} OSM link objects")
+
+            # How can I link have less than two points?
+            if len(link["nodes"]) < 2:
+                self.logger.error(f"Link {idx} has less than two nodes. {link}")
+                continue
+
+            # The link is a straight line between two points
+            # Or all midpoints are only part of a single link
+            node_indices = node_count.loc[link["nodes"], "counter"]
+            if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1:
+                self.__set_geometry(link)
+                final_links.append(link)
+                continue
+
+            intersecs = np.where(node_indices > 1)[0]
+            for i, j in zip(intersecs[:-1], intersecs[1:]):
+                rec = link.copy(deep=True)
+                rec["nodes"] = link["nodes"][i : j + 1]
+                self.__set_geometry(rec)
+                final_links.append(rec)
+
+        self.links_df = pd.concat(final_links, axis=1).transpose()
+        self.links_df = self.links_df.assign(link_id=np.arange(self.links_df.shape[0]) + 1).drop(columns=["nodes"])
+
+        del final_links
+        gc.collect()
 
-            if len(vars["modes"]) > 0:
-                for i in range(segments):
-                    attributes = self.__build_link_data(vars, intersections, i, linknodes, node_ids, fields)
-                    if attributes is None:
-                        continue
-                    all_attrs.append(attributes)
-                    vars["link_id"] += 1
-
-            self.__emit_all(["text", f"{counter:,} of {L:,} super links added"])
-            self.links[osm_id] = []
-        sql = self.insert_qry.format(table, field_names, ",".join(["?"] * (len(all_attrs[0]) - 1)))
-        self.logger.info("Adding network links")
-        self.__emit_all(["text", "Adding network links"])
-        try:
-            conn.executemany(sql, all_attrs)
-        except Exception as e:
-            self.logger.error("error when inserting link {}. Error {}".format(all_attrs[0], e.args))
-            self.logger.error(sql)
-            raise e
-
-    def __worksetup(self):
-        self.__link_types = self.project.network.link_types
-        lts = self.__link_types.all_types()
-        for lt_id, lt in lts.items():
-            self.__model_link_types.append(lt.link_type)
-            self.__model_link_type_ids.append(lt_id)
+        # Gets ONLY the nodes that are needed
+        self.links_df = gpd.GeoDataFrame(self.links_df, geometry=self.links_df.geometry, crs=4326)
+        self.links_df = self.links_df.clip(self.model_area).explode(index_parts=False)
+        self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1
+
+        clip_nodes = pd.DataFrame(self.__valid_links)
+        clip_nodes = clip_nodes[clip_nodes.link_id.isin(self.links_df.link_id)]
+
+        self.node_df.reset_index(inplace=True)
+        self.node_df = self.node_df[self.node_df.osm_id.isin(clip_nodes.nodes)]
+        del clip_nodes
+        gc.collect()
+
+        cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types"]
+        self.node_df = gpd.GeoDataFrame(self.node_df[cols], geometry=self.node_df.geometry, crs=self.node_df.crs)
+
+        # Saves the data to disk in case of issues loading it to the database
+        osm_data_path = Path(self.project.project_base_path) / "osm_data"
+        osm_data_path.mkdir(exist_ok=True)
+        self.links_df.to_parquet(osm_data_path / "links.parquet")
+        self.node_df.to_parquet(osm_data_path / "nodes.parquet")
+
+        self.logger.info("Adding nodes to file")
+        self.__emit_all(["text", "Adding nodes to file"])
+        self.node_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="nodes", mode="a")
+        del self.node_df
+        gc.collect()
+
+        # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a")
+
+        # I could not get the above line to work, so I used the following code instead
+        insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromWKB(?, 4326))"
+        cols_no_geo = self.links_df.columns.tolist()
+        cols_no_geo.remove("geometry")
+        insert_qry = insert_qry.format(", ".join(cols_no_geo), ", ".join(["?"] * len(cols_no_geo)))
+
+        geos = self.links_df.geometry.to_wkb()
+        cols = cols_no_geo + ["geometry"]
+        links_df = pd.DataFrame(self.links_df[cols_no_geo]).assign(geometry=geos)[cols].to_records(index=False)
+
+        del self.links_df
+        gc.collect()
+        self.logger.info("Adding links to file")
+        self.__emit_all(["text", "Adding links to file"])
+        conn.executemany(insert_qry, links_df)
+
+    def __set_geometry(self, rec: pd.Series) -> LineString:
+        rec.geometry = LineString(self.node_df.loc[rec.nodes, "geometry"])
+        rec.link_id = self.__link_id
+
+        self.__valid_links["link_id"].extend([self.__link_id] * len(rec.nodes))
+        self.__valid_links["nodes"].extend(rec.nodes)
+        self.__link_id += 1
 
     def __update_table_structure(self, conn):
         structure = conn.execute("pragma table_info(Links)").fetchall()
@@ -225,39 +199,24 @@ def __update_table_structure(self, conn):
             conn.execute(f"Alter table Links add column {field} {ltype}")
         conn.commit()
 
-    def __build_link_data(self, vars, intersections, i, linknodes, node_ids, fields):
-        ii = intersections[i]
-        jj = intersections[i + 1]
-        all_nodes = [linknodes[x] for x in range(ii, jj + 1)]
-
-        vars["a_node"] = node_ids.get(linknodes[ii], self.node_start)
-        if vars["a_node"] == self.node_start:
-            node_ids[linknodes[ii]] = vars["a_node"]
-            self.node_start += 1
-
-        vars["b_node"] = node_ids.get(linknodes[jj], self.node_start)
-        if vars["b_node"] == self.node_start:
-            node_ids[linknodes[jj]] = vars["b_node"]
-            self.node_start += 1
-
-        vars["distance"] = sum(
-            [
-                haversine(self.nodes[x]["lon"], self.nodes[x]["lat"], self.nodes[y]["lon"], self.nodes[y]["lat"])
-                for x, y in zip(all_nodes[1:], all_nodes[:-1])
-            ]
-        )
-
-        geometry = ["{} {}".format(self.nodes[x]["lon"], self.nodes[x]["lat"]) for x in all_nodes]
-        inside_area = sum([self.nodes[x]["inside_model"] for x in all_nodes])
-        if inside_area == 0:
-            return None
-        geometry = "LINESTRING ({})".format(", ".join(geometry))
-
-        attributes = [vars.get(x) for x in fields]
-        attributes.append(geometry)
-        return attributes
+    def __build_link_types(self):
+        data = []
+        with read_and_close(self.project.path_to_file) as conn:
+            self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn)
+
+        for i, lt in enumerate(self.links_df.highway.unique()):
+            if str(lt).lower() in self.__all_ltp.link_type.values:
+                continue
+            data.append([*self.__define_link_type(str(lt)), str(lt)])
+            self.__all_ltp = pd.concat(
+                [self.__all_ltp, pd.DataFrame(data, columns=["link_type_id", "link_type", "highway"])]
+            )
+        self.__all_ltp.drop_duplicates(inplace=True)
+        self.links_df = self.links_df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left")
+        self.links_df.drop(columns=["highway"], inplace=True)
 
-    def __repair_link_type(self, link_type: str) -> str:
+    def __define_link_type(self, link_type: str) -> str:
+        proj_link_types = self.project.network.link_types
         original_link_type = link_type
         link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower()
 
@@ -266,35 +225,31 @@ def __repair_link_type(self, link_type: str) -> str:
             if piece in ["link", "segment", "stretch"]:
                 link_type = "_".join(split[0 : i + 1])
 
+        if self.__all_ltp.shape[0] >= 51:
+            link_type = "aggregate_link_type"
+
         if len(link_type) == 0:
             link_type = "empty"
 
-        if len(self.__model_link_type_ids) >= 51 and link_type not in self.__model_link_types:
-            link_type = "aggregate_link_type"
-
-        if link_type in self.__model_link_types:
-            lt = self.__link_types.get_by_name(link_type)
+        if link_type in self.__all_ltp.link_type.values:
+            lt = proj_link_types.get_by_name(link_type)
             if original_link_type not in lt.description:
                 lt.description += f", {original_link_type}"
                 lt.save()
-            self.__link_type_quick_reference[original_link_type.lower()] = link_type
-            return link_type
+            return [lt.link_type_id, link_type]
 
         letter = link_type[0]
-        if letter in self.__model_link_type_ids:
+        if letter in self.__all_ltp.link_type_id.values:
             letter = letter.upper()
-            if letter in self.__model_link_type_ids:
+            if letter in self.__all_ltp.link_type_id.values:
                 for letter in string.ascii_letters:
-                    if letter not in self.__model_link_type_ids:
+                    if letter not in self.__all_ltp.link_type_id.values:
                         break
-        lt = self.__link_types.new(letter)
+        lt = proj_link_types.new(letter)
         lt.link_type = link_type
         lt.description = f"Link types from Open Street Maps: {original_link_type}"
         lt.save()
-        self.__model_link_types.append(link_type)
-        self.__model_link_type_ids.append(letter)
-        self.__link_type_quick_reference[original_link_type.lower()] = link_type
-        return link_type
+        return [letter, link_type]
 
     def __get_link_property(self, d2, val, linktags, v):
         vald = linktags.get(f'{v["osm_source"]}:{d2}', val)
@@ -340,27 +295,41 @@ def get_link_field_type(field_name):
                 if field_name in tp:
                     return tp[field_name]["type"]
 
-    @staticmethod
-    def field_osm_source():
+    def process_link_attributes(self):
         p = Parameters()
         fields = p.parameters["network"]["links"]["fields"]
 
-        owf = {
-            list(x.keys())[0]: x[list(x.keys())[0]]["osm_source"]
-            for x in fields["one-way"]
-            if "osm_source" in x[list(x.keys())[0]]
-        }
+        for x in fields["one-way"]:
+            keys_ = list(x.values())[0]
+            field = list(x.keys())[0]
+            osm_name = keys_.get("osm_source", field).replace(":", "_")
+            self.links_df.rename(columns={osm_name: field}, inplace=True, errors="ignore")
 
-        twf = {}
         for x in fields["two-way"]:
-            if "osm_source" in x[list(x.keys())[0]]:
-                twf[list(x.keys())[0]] = {
-                    "osm_source": x[list(x.keys())[0]]["osm_source"],
-                    "osm_behaviour": x[list(x.keys())[0]]["osm_behaviour"],
-                }
-        return owf, twf
-
-    def modes_per_link_type(self, conn):
+            keys_ = list(x.values())[0]
+            field = list(x.keys())[0]
+            if "osm_source" not in keys_:
+                continue
+            osm_name = keys_.get("osm_source", field).replace(":", "_")
+            self.links_df[f"{field}_ba"] = self.links_df[osm_name].copy()
+            self.links_df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore")
+            if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide":
+                self.links_df[f"{field}_ab"] = pd.to_numeric(self.links_df[f"{field}_ab"], errors="coerce") / 2
+                self.links_df[f"{field}_ba"] = pd.to_numeric(self.links_df[f"{field}_ba"], errors="coerce") / 2
+
+                if f"{field}_forward" in self.links_df:
+                    fld = pd.to_numeric(self.links_df[f"{field}_forward"], errors="coerce")
+                    self.links_df.loc[fld > 0, f"{field}_ab"] = fld[fld > 0]
+                if f"{field}_backward" in self.links_df:
+                    fld = pd.to_numeric(self.links_df[f"{field}_backward"], errors="coerce")
+                    self.links_df.loc[fld > 0, f"{field}_ba"] = fld[fld > 0]
+        cols = list_columns(self.project.conn, "links") + ["nodes"]
+        self.links_df = self.links_df[[x for x in cols if x in self.links_df.columns]]
+        gc.collect()
+        self.links_df["geometry"] = 0
+        self.links_df["link_id"] = 0
+
+    def establish_modes_for_all_links(self, conn):
         p = Parameters()
         modes = p.parameters["network"]["osm"]["modes"]
 
@@ -373,13 +342,15 @@ def modes_per_link_type(self, conn):
             all_types = val["link_types"]
             md = mode_codes[mode]
             for tp in all_types:
-                type_list[tp] = "{}{}".format(type_list.get(tp, ""), md)
+                type_list[tp] = "".join(sorted("{}{}".format(type_list.get(tp, ""), md)))
             if val["unknown_tags"]:
                 notfound += md
 
         type_list = {k: "".join(set(v)) for k, v in type_list.items()}
 
-        return type_list, "{}".format(notfound)
+        df = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"])
+        self.links_df = self.links_df.merge(df, on="link_type", how="left")
+        self.links_df.modes.fillna("".join(sorted(notfound)), inplace=True)
 
     @staticmethod
     def get_node_fields():
diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py
index 76c810b1e..cfb52f685 100644
--- a/aequilibrae/project/network/osm/osm_downloader.py
+++ b/aequilibrae/project/network/osm/osm_downloader.py
@@ -10,20 +10,22 @@
 For the original work, please see https://github.com/gboeing/osmnx
 """
 
+import gc
+import importlib.util as iutil
 import logging
-import time
 import re
-from typing import List
+import time
+from typing import List, Dict
 
+import pandas as pd
 import requests
+from pandas import json_normalize
 from shapely import Polygon
 
-from .osm_params import http_headers, memory
-from aequilibrae.parameters import Parameters
 from aequilibrae.context import get_logger
+from aequilibrae.parameters import Parameters
 from aequilibrae.utils import WorkerThread
-import gc
-import importlib.util as iutil
+from .osm_params import http_headers, memory
 
 spec = iutil.find_spec("PyQt5")
 pyqt = spec is not None
@@ -50,6 +52,9 @@ def __init__(self, polygons: List[Polygon], modes, logger: logging.Logger = None
         self.overpass_endpoint = par["overpass_endpoint"]
         self.timeout = par["timeout"]
         self.sleeptime = par["sleeptime"]
+        self._nodes = []
+        self._links = []
+        self.data: Dict[str, pd.DataFrame] = {"nodes": pd.DataFrame([]), "links": pd.DataFrame([])}
 
     def doWork(self):
         infrastructure = 'way["highway"]'
@@ -80,10 +85,26 @@ def doWork(self):
             )
             json = self.overpass_request(data={"data": query_str}, timeout=self.timeout)
             if json["elements"]:
-                self.json.extend(json["elements"])
-            del json
-            gc.collect()
+                for tag, lst in [("node", self._nodes), ("way", self._links)]:
+                    df = pd.DataFrame([item for item in json["elements"] if item["type"] == tag])
+                    lst.append(df)
+                del json
+                gc.collect()
+
         self.__emit_all(["Value", len(self.polygons)])
+        self.__emit_all(["text", "Downloading finished. Processing data"])
+        for lst, table in [(self._links, "links"), (self._nodes, "nodes")]:
+            df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"])
+            if table == "links":
+                if "tags" in df.columns:
+                    df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"])
+                    df.columns = [x.replace(":", "_") for x in df.columns]
+            else:
+                df = df.drop(columns=["tags"])
+            self.data[table] = df.rename(columns={"id": "osm_id"})
+            lst.clear()
+            gc.collect()
+
         self.__emit_all(["FinishedDownloading", 0])
 
     def overpass_request(self, data, pause_duration=None, timeout=180, error_pause_duration=None):
diff --git a/aequilibrae/utils/db_utils.py b/aequilibrae/utils/db_utils.py
index 23534138f..83efb49a2 100644
--- a/aequilibrae/utils/db_utils.py
+++ b/aequilibrae/utils/db_utils.py
@@ -79,6 +79,10 @@ def get_schema(conn, table_name):
     return {e.name: e for e in rv}
 
 
+def list_columns(conn, table_name):
+    return list(get_schema(conn, table_name).keys())
+
+
 def has_column(conn, table_name, col_name):
     return col_name in get_schema(conn, table_name)
 
diff --git a/pyproject.toml b/pyproject.toml
index ff2ab1ff1..509bb0922 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ extend-exclude = '''docs/*'''
 
 
 [build-system]
-requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel"]
+requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel", "geopandas"]
 
 
 [tool.ruff]
diff --git a/requirements.txt b/requirements.txt
index d22ed0b9d..04296117f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,5 @@ shapely
 pandas
 pyproj
 rtree
-openmatrix
\ No newline at end of file
+openmatrix
+geopandas
\ No newline at end of file

From 2a2f8ea702d846ab50f791a2fe478b50ff473879 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Sun, 4 Feb 2024 18:45:44 +1000
Subject: [PATCH 02/32] OSM Geo-processing using GeoPandas

---
 .../project/network/osm/osm_builder.py        | 21 ++++++-------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 103c8e755..66e9804ea 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -43,18 +43,10 @@ def __init__(self, data, project, model_area: Polygon) -> None:
         self.report = []
         self.__all_ltp = pd.DataFrame([])
         self.__link_id = 1
-        self.__valid_links = {"link_id": [], "nodes": []}
-
-        nodes = (
-            data["nodes"]
-            .assign(
-                is_centroid=0,
-                modes="",
-                link_types="",
-                node_id=np.arange(data["nodes"].shape[0]) + self.node_start,
-            )
-            .reset_index(drop=True)
-        )
+        self.__valid_links = []
+
+        nids = np.arange(data["nodes"].shape[0]) + self.node_start
+        nodes = data["nodes"].assign(is_centroid=0, modes="", link_types="", node_id=nids).reset_index(drop=True)
         self.node_df = gpd.GeoDataFrame(nodes, geometry=gpd.points_from_xy(nodes.lon, nodes.lat), crs=4326)
         del nodes
         del data["nodes"]
@@ -141,7 +133,7 @@ def importing_network(self, node_count, conn):
         self.links_df = self.links_df.clip(self.model_area).explode(index_parts=False)
         self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1
 
-        clip_nodes = pd.DataFrame(self.__valid_links)
+        clip_nodes = pd.concat(self.__valid_links)
         clip_nodes = clip_nodes[clip_nodes.link_id.isin(self.links_df.link_id)]
 
         self.node_df.reset_index(inplace=True)
@@ -186,8 +178,7 @@ def __set_geometry(self, rec: pd.Series) -> LineString:
         rec.geometry = LineString(self.node_df.loc[rec.nodes, "geometry"])
         rec.link_id = self.__link_id
 
-        self.__valid_links["link_id"].extend([self.__link_id] * len(rec.nodes))
-        self.__valid_links["nodes"].extend(rec.nodes)
+        self.__valid_links.append({"link_id": [self.__link_id] * len(rec.nodes), "nodes": rec.nodes})
         self.__link_id += 1
 
     def __update_table_structure(self, conn):

From 27dc952c43fea9c3a68ab61eac5d6cced8872be3 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Sun, 4 Feb 2024 22:41:01 +1000
Subject: [PATCH 03/32] OSM Geo-processing using GeoPandas

---
 .../project/network/osm/osm_builder.py        | 82 ++++++++++---------
 1 file changed, 44 insertions(+), 38 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 66e9804ea..3ef54e492 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -3,11 +3,13 @@
 import string
 from math import floor
 from pathlib import Path
+from typing import List
 
 import geopandas as gpd
 import numpy as np
 import pandas as pd
 import shapely.wkb
+from shapely import MultiLineString
 from shapely.geometry import Polygon, LineString
 
 from aequilibrae.context import get_active_project
@@ -16,6 +18,7 @@
 from aequilibrae.utils.db_utils import commit_and_close, read_and_close, list_columns
 from aequilibrae.utils.spatialite_utils import connect_spatialite
 from .model_area_gridding import geometry_grid
+from aequilibrae.project.project_creation import remove_triggers, add_triggers
 
 pyqt = iutil.find_spec("PyQt5") is not None
 if pyqt:
@@ -94,53 +97,50 @@ def importing_network(self, node_count, conn):
         self.establish_modes_for_all_links(conn)
         self.process_link_attributes()
 
-        final_links = []
         self.logger.info("Geo-procesing links")
         self.__emit_all(["text", "Adding network links"])
+        geometries = []
         for counter, (idx, link) in enumerate(self.links_df.iterrows()):
             self.__emit_all(["Value", counter])
             if counter % message_step == 0:
                 self.logger.info(f"Creating segments from {counter:,} out of {shape_ :,} OSM link objects")
 
             # How can I link have less than two points?
+            if not isinstance(link["nodes"], list):
+                geometries.append(LineString())
+                self.logger.error(f"OSM link {idx} does not have a list of nodes.")
+                continue
+
             if len(link["nodes"]) < 2:
-                self.logger.error(f"Link {idx} has less than two nodes. {link}")
+                self.logger.error(f"Link {idx} has less than two nodes. {link.nodes}")
+                geometries.append(LineString())
                 continue
 
             # The link is a straight line between two points
             # Or all midpoints are only part of a single link
             node_indices = node_count.loc[link["nodes"], "counter"]
             if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1:
-                self.__set_geometry(link)
-                final_links.append(link)
-                continue
-
-            intersecs = np.where(node_indices > 1)[0]
-            for i, j in zip(intersecs[:-1], intersecs[1:]):
-                rec = link.copy(deep=True)
-                rec["nodes"] = link["nodes"][i : j + 1]
-                self.__set_geometry(rec)
-                final_links.append(rec)
-
-        self.links_df = pd.concat(final_links, axis=1).transpose()
-        self.links_df = self.links_df.assign(link_id=np.arange(self.links_df.shape[0]) + 1).drop(columns=["nodes"])
-
-        del final_links
-        gc.collect()
-
-        # Gets ONLY the nodes that are needed
-        self.links_df = gpd.GeoDataFrame(self.links_df, geometry=self.links_df.geometry, crs=4326)
+                # The link has no intersections
+                geo = self.__build_geometry(link.nodes)
+            else:
+                # The link has intersections
+                intersecs = np.where(node_indices > 1)[0]
+                geos = []
+                for i, j in zip(intersecs[:-1], intersecs[1:]):
+                    geos.append(self.__build_geometry(link.nodes[i : j + 1]))
+                geo = MultiLineString(geos)
+
+            geometries.append(geo)
+
+        # Builds the link Geo dataframe
+        self.links_df.drop(columns=["nodes"], inplace=True)
+        self.links_df = gpd.GeoDataFrame(self.links_df, geometry=geometries, crs=4326)
         self.links_df = self.links_df.clip(self.model_area).explode(index_parts=False)
-        self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1
+        self.links_df = self.links_df[self.links_df.geometry.length > 0]
 
-        clip_nodes = pd.concat(self.__valid_links)
-        clip_nodes = clip_nodes[clip_nodes.link_id.isin(self.links_df.link_id)]
+        self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1
 
         self.node_df.reset_index(inplace=True)
-        self.node_df = self.node_df[self.node_df.osm_id.isin(clip_nodes.nodes)]
-        del clip_nodes
-        gc.collect()
-
         cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types"]
         self.node_df = gpd.GeoDataFrame(self.node_df[cols], geometry=self.node_df.geometry, crs=self.node_df.crs)
 
@@ -152,10 +152,17 @@ def importing_network(self, node_count, conn):
 
         self.logger.info("Adding nodes to file")
         self.__emit_all(["text", "Adding nodes to file"])
+
+        # Removing the triggers before adding all nodes makes things a LOT faster
+        remove_triggers(conn, self.logger, "network")
+
         self.node_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="nodes", mode="a")
         del self.node_df
         gc.collect()
 
+        # But we need to add them back to add the links
+        add_triggers(conn, self.logger, "network")
+
         # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a")
 
         # I could not get the above line to work, so I used the following code instead
@@ -174,12 +181,8 @@ def importing_network(self, node_count, conn):
         self.__emit_all(["text", "Adding links to file"])
         conn.executemany(insert_qry, links_df)
 
-    def __set_geometry(self, rec: pd.Series) -> LineString:
-        rec.geometry = LineString(self.node_df.loc[rec.nodes, "geometry"])
-        rec.link_id = self.__link_id
-
-        self.__valid_links.append({"link_id": [self.__link_id] * len(rec.nodes), "nodes": rec.nodes})
-        self.__link_id += 1
+    def __build_geometry(self, nodes: List[int]) -> LineString:
+        return LineString(self.node_df.loc[nodes, "geometry"])
 
     def __update_table_structure(self, conn):
         structure = conn.execute("pragma table_info(Links)").fetchall()
@@ -195,14 +198,16 @@ def __build_link_types(self):
         with read_and_close(self.project.path_to_file) as conn:
             self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn)
 
+        self.links_df.highway.fillna("missing", inplace=True)
+        self.links_df.highway = self.links_df.highway.str.lower()
         for i, lt in enumerate(self.links_df.highway.unique()):
-            if str(lt).lower() in self.__all_ltp.link_type.values:
+            if str(lt) in self.__all_ltp.highway.values:
                 continue
             data.append([*self.__define_link_type(str(lt)), str(lt)])
             self.__all_ltp = pd.concat(
                 [self.__all_ltp, pd.DataFrame(data, columns=["link_type_id", "link_type", "highway"])]
             )
-        self.__all_ltp.drop_duplicates(inplace=True)
+            self.__all_ltp.drop_duplicates(inplace=True)
         self.links_df = self.links_df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left")
         self.links_df.drop(columns=["highway"], inplace=True)
 
@@ -287,6 +292,9 @@ def get_link_field_type(field_name):
                     return tp[field_name]["type"]
 
     def process_link_attributes(self):
+        self.links_df = self.links_df.assign(direction=0, link_id=0)
+        self.links_df.loc[self.links_df.oneway == "yes", "direction"] = 1
+        self.links_df.loc[self.links_df.oneway == "backward", "direction"] = -1
         p = Parameters()
         fields = p.parameters["network"]["links"]["fields"]
 
@@ -317,8 +325,6 @@ def process_link_attributes(self):
         cols = list_columns(self.project.conn, "links") + ["nodes"]
         self.links_df = self.links_df[[x for x in cols if x in self.links_df.columns]]
         gc.collect()
-        self.links_df["geometry"] = 0
-        self.links_df["link_id"] = 0
 
     def establish_modes_for_all_links(self, conn):
         p = Parameters()

From ea33710febef723a53c55860543110b39179be01 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 5 Feb 2024 11:04:21 +1000
Subject: [PATCH 04/32] OSM Geo-processing using GeoPandas

---
 .../project/network/osm/osm_builder.py        | 123 +++++++-----------
 .../project/network/osm/osm_downloader.py     |   7 +-
 2 files changed, 49 insertions(+), 81 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 3ef54e492..a350c0beb 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -8,17 +8,17 @@
 import geopandas as gpd
 import numpy as np
 import pandas as pd
-import shapely.wkb
+from pandas import json_normalize
 from shapely import MultiLineString
 from shapely.geometry import Polygon, LineString
 
 from aequilibrae.context import get_active_project
 from aequilibrae.parameters import Parameters
+from aequilibrae.project.project_creation import remove_triggers, add_triggers
 from aequilibrae.utils import WorkerThread
 from aequilibrae.utils.db_utils import commit_and_close, read_and_close, list_columns
 from aequilibrae.utils.spatialite_utils import connect_spatialite
 from .model_area_gridding import geometry_grid
-from aequilibrae.project.project_creation import remove_triggers, add_triggers
 
 pyqt = iutil.find_spec("PyQt5") is not None
 if pyqt:
@@ -62,28 +62,16 @@ def __emit_all(self, *args):
 
     def doWork(self):
         with commit_and_close(connect_spatialite(self.path)) as conn:
-            node_count = self.data_structures()
-            self.importing_network(node_count, conn)
+            self.__update_table_structure(conn)
+            self.importing_network(conn)
             conn.execute(
                 "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)"
             )
         self.__emit_all(["finished_threaded_procedure", 0])
 
-    def data_structures(self):
-        self.logger.info("Setting data structures for nodes")
-
-        aux = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count()
-
-        self.logger.info("Finalizing data structures")
-        self.__emit_all(["text", "Finalizing data structures"])
-        return aux
-
-    def importing_network(self, node_count, conn):
-        self.__update_table_structure(conn)
+    def importing_network(self, conn):
+        node_count = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count()
 
-        self.logger.info("Adding network nodes")
-        conn.commit()
-        self.__emit_all(["text", "Adding network nodes"])
         self.node_df.osm_id = self.node_df.osm_id.astype(np.int64)
         self.node_df.set_index(["osm_id"], inplace=True)
 
@@ -127,7 +115,7 @@ def importing_network(self, node_count, conn):
                 intersecs = np.where(node_indices > 1)[0]
                 geos = []
                 for i, j in zip(intersecs[:-1], intersecs[1:]):
-                    geos.append(self.__build_geometry(link.nodes[i : j + 1]))
+                    geos.append(self.__build_geometry(link.nodes[i: j + 1]))
                 geo = MultiLineString(geos)
 
             geometries.append(geo)
@@ -184,14 +172,13 @@ def importing_network(self, node_count, conn):
     def __build_geometry(self, nodes: List[int]) -> LineString:
         return LineString(self.node_df.loc[nodes, "geometry"])
 
-    def __update_table_structure(self, conn):
-        structure = conn.execute("pragma table_info(Links)").fetchall()
-        has_fields = [x[1].lower() for x in structure]
-        fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"]
-        for field in [f for f in fields if f not in has_fields]:
-            ltype = self.get_link_field_type(field).upper()
-            conn.execute(f"Alter table Links add column {field} {ltype}")
-        conn.commit()
+
+
+    def __process_link_chunk(self):
+
+        if "tags" in df.columns:
+            df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"])
+            df.columns = [x.replace(":", "_") for x in df.columns]
 
     def __build_link_types(self):
         data = []
@@ -219,7 +206,7 @@ def __define_link_type(self, link_type: str) -> str:
         split = link_type.split("_")
         for i, piece in enumerate(split[1:]):
             if piece in ["link", "segment", "stretch"]:
-                link_type = "_".join(split[0 : i + 1])
+                link_type = "_".join(split[0: i + 1])
 
         if self.__all_ltp.shape[0] >= 51:
             link_type = "aggregate_link_type"
@@ -247,49 +234,6 @@ def __define_link_type(self, link_type: str) -> str:
         lt.save()
         return [letter, link_type]
 
-    def __get_link_property(self, d2, val, linktags, v):
-        vald = linktags.get(f'{v["osm_source"]}:{d2}', val)
-        if vald is None:
-            return vald
-
-        if vald.isdigit():
-            if vald == val and v["osm_behaviour"] == "divide":
-                vald = float(val) / 2
-        return vald
-
-    @staticmethod
-    def unique_count(a):
-        # From: https://stackoverflow.com/a/21124789/1480643
-        unique, inverse = np.unique(a, return_inverse=True)
-        count = np.zeros(len(unique), int)
-        np.add.at(count, inverse, 1)
-        return np.vstack((unique, count)).T
-
-    @staticmethod
-    def get_link_fields():
-        p = Parameters()
-        fields = p.parameters["network"]["links"]["fields"]
-        owf = [list(x.keys())[0] for x in fields["one-way"]]
-
-        twf1 = ["{}_ab".format(list(x.keys())[0]) for x in fields["two-way"]]
-        twf2 = ["{}_ba".format(list(x.keys())[0]) for x in fields["two-way"]]
-
-        return owf + twf1 + twf2 + ["osm_id"]
-
-    @staticmethod
-    def get_link_field_type(field_name):
-        p = Parameters()
-        fields = p.parameters["network"]["links"]["fields"]
-
-        if field_name[-3:].lower() in ["_ab", "_ba"]:
-            field_name = field_name[:-3]
-            for tp in fields["two-way"]:
-                if field_name in tp:
-                    return tp[field_name]["type"]
-        else:
-            for tp in fields["one-way"]:
-                if field_name in tp:
-                    return tp[field_name]["type"]
 
     def process_link_attributes(self):
         self.links_df = self.links_df.assign(direction=0, link_id=0)
@@ -349,9 +293,38 @@ def establish_modes_for_all_links(self, conn):
         self.links_df = self.links_df.merge(df, on="link_type", how="left")
         self.links_df.modes.fillna("".join(sorted(notfound)), inplace=True)
 
+    ######## TABLE STRUCTURE UPDATING ########
+    def __update_table_structure(self, conn):
+        structure = conn.execute("pragma table_info(Links)").fetchall()
+        has_fields = [x[1].lower() for x in structure]
+        fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"]
+        for field in [f for f in fields if f not in has_fields]:
+            ltype = self.get_link_field_type(field).upper()
+            conn.execute(f"Alter table Links add column {field} {ltype}")
+        conn.commit()
+
+    @staticmethod
+    def get_link_fields():
+        p = Parameters()
+        fields = p.parameters["network"]["links"]["fields"]
+        owf = [list(x.keys())[0] for x in fields["one-way"]]
+
+        twf1 = ["{}_ab".format(list(x.keys())[0]) for x in fields["two-way"]]
+        twf2 = ["{}_ba".format(list(x.keys())[0]) for x in fields["two-way"]]
+
+        return owf + twf1 + twf2 + ["osm_id"]
+
     @staticmethod
-    def get_node_fields():
+    def get_link_field_type(field_name):
         p = Parameters()
-        fields = p.parameters["network"]["nodes"]["fields"]
-        fields = [list(x.keys())[0] for x in fields]
-        return fields + ["osm_id"]
+        fields = p.parameters["network"]["links"]["fields"]
+
+        if field_name[-3:].lower() in ["_ab", "_ba"]:
+            field_name = field_name[:-3]
+            for tp in fields["two-way"]:
+                if field_name in tp:
+                    return tp[field_name]["type"]
+        else:
+            for tp in fields["one-way"]:
+                if field_name in tp:
+                    return tp[field_name]["type"]
\ No newline at end of file
diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py
index cfb52f685..e93421d6f 100644
--- a/aequilibrae/project/network/osm/osm_downloader.py
+++ b/aequilibrae/project/network/osm/osm_downloader.py
@@ -19,7 +19,6 @@
 
 import pandas as pd
 import requests
-from pandas import json_normalize
 from shapely import Polygon
 
 from aequilibrae.context import get_logger
@@ -95,11 +94,7 @@ def doWork(self):
         self.__emit_all(["text", "Downloading finished. Processing data"])
         for lst, table in [(self._links, "links"), (self._nodes, "nodes")]:
             df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"])
-            if table == "links":
-                if "tags" in df.columns:
-                    df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"])
-                    df.columns = [x.replace(":", "_") for x in df.columns]
-            else:
+            if table != "links":
                 df = df.drop(columns=["tags"])
             self.data[table] = df.rename(columns={"id": "osm_id"})
             lst.clear()

From f026dc8c6cd5b4223e94e6d566de1d3c787d3848 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 5 Feb 2024 13:41:26 +1000
Subject: [PATCH 05/32] Processing with chunking

---
 .../project/network/osm/osm_builder.py        | 141 ++++++++++--------
 1 file changed, 79 insertions(+), 62 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index a350c0beb..1da2d8ce7 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -75,16 +75,11 @@ def importing_network(self, conn):
         self.node_df.osm_id = self.node_df.osm_id.astype(np.int64)
         self.node_df.set_index(["osm_id"], inplace=True)
 
-        self.logger.info("Creating necessary link types")
-        self.__emit_all(["text", "Creating necessary link types"])
-        self.__build_link_types()
+        self.__process_link_chunk()
         shape_ = self.links_df.shape[0]
         message_step = floor(shape_ / 100)
         self.__emit_all(["maxValue", shape_])
 
-        self.establish_modes_for_all_links(conn)
-        self.process_link_attributes()
-
         self.logger.info("Geo-procesing links")
         self.__emit_all(["text", "Adding network links"])
         geometries = []
@@ -115,7 +110,7 @@ def importing_network(self, conn):
                 intersecs = np.where(node_indices > 1)[0]
                 geos = []
                 for i, j in zip(intersecs[:-1], intersecs[1:]):
-                    geos.append(self.__build_geometry(link.nodes[i: j + 1]))
+                    geos.append(self.__build_geometry(link.nodes[i : j + 1]))
                 geo = MultiLineString(geos)
 
             geometries.append(geo)
@@ -172,22 +167,38 @@ def importing_network(self, conn):
     def __build_geometry(self, nodes: List[int]) -> LineString:
         return LineString(self.node_df.loc[nodes, "geometry"])
 
-
-
     def __process_link_chunk(self):
+        self.logger.info("Creating necessary link types")
+        self.__emit_all(["text", "Creating necessary link types"])
 
-        if "tags" in df.columns:
-            df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"])
-            df.columns = [x.replace(":", "_") for x in df.columns]
-
-    def __build_link_types(self):
-        data = []
+        # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk
+        # And let's also assume that each row will be 100 fields at 8 bytes each
+        # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so.
+        chunk_size = 100_000_000
+        list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
+        self.links_df = pd.DataFrame([])
+        # Initialize link types
         with read_and_close(self.project.path_to_file) as conn:
             self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn)
 
-        self.links_df.highway.fillna("missing", inplace=True)
-        self.links_df.highway = self.links_df.highway.str.lower()
-        for i, lt in enumerate(self.links_df.highway.unique()):
+            for i, df in enumerate(list_dfs):
+                if "tags" in df.columns:
+                    df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"])
+                    df.columns = [x.replace(":", "_") for x in df.columns]
+                    df = self.__build_link_types(df)
+                    df = self.__establish_modes_for_all_links(conn, df)
+                    df = self.__process_link_attributes(df)
+                else:
+                    self.logger.error("OSM link data does not have tags. Skipping an entire data chunk")
+                    df = pd.DataFrame([])
+                list_dfs[i] = df
+        self.links_df = pd.concat(list_dfs)
+
+    def __build_link_types(self, df):
+        data = []
+        df.highway.fillna("missing", inplace=True)
+        df.highway = df.highway.str.lower()
+        for i, lt in enumerate(df.highway.unique()):
             if str(lt) in self.__all_ltp.highway.values:
                 continue
             data.append([*self.__define_link_type(str(lt)), str(lt)])
@@ -195,8 +206,8 @@ def __build_link_types(self):
                 [self.__all_ltp, pd.DataFrame(data, columns=["link_type_id", "link_type", "highway"])]
             )
             self.__all_ltp.drop_duplicates(inplace=True)
-        self.links_df = self.links_df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left")
-        self.links_df.drop(columns=["highway"], inplace=True)
+        df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left")
+        return df.drop(columns=["highway"])
 
     def __define_link_type(self, link_type: str) -> str:
         proj_link_types = self.project.network.link_types
@@ -206,7 +217,7 @@ def __define_link_type(self, link_type: str) -> str:
         split = link_type.split("_")
         for i, piece in enumerate(split[1:]):
             if piece in ["link", "segment", "stretch"]:
-                link_type = "_".join(split[0: i + 1])
+                link_type = "_".join(split[0 : i + 1])
 
         if self.__all_ltp.shape[0] >= 51:
             link_type = "aggregate_link_type"
@@ -234,43 +245,7 @@ def __define_link_type(self, link_type: str) -> str:
         lt.save()
         return [letter, link_type]
 
-
-    def process_link_attributes(self):
-        self.links_df = self.links_df.assign(direction=0, link_id=0)
-        self.links_df.loc[self.links_df.oneway == "yes", "direction"] = 1
-        self.links_df.loc[self.links_df.oneway == "backward", "direction"] = -1
-        p = Parameters()
-        fields = p.parameters["network"]["links"]["fields"]
-
-        for x in fields["one-way"]:
-            keys_ = list(x.values())[0]
-            field = list(x.keys())[0]
-            osm_name = keys_.get("osm_source", field).replace(":", "_")
-            self.links_df.rename(columns={osm_name: field}, inplace=True, errors="ignore")
-
-        for x in fields["two-way"]:
-            keys_ = list(x.values())[0]
-            field = list(x.keys())[0]
-            if "osm_source" not in keys_:
-                continue
-            osm_name = keys_.get("osm_source", field).replace(":", "_")
-            self.links_df[f"{field}_ba"] = self.links_df[osm_name].copy()
-            self.links_df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore")
-            if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide":
-                self.links_df[f"{field}_ab"] = pd.to_numeric(self.links_df[f"{field}_ab"], errors="coerce") / 2
-                self.links_df[f"{field}_ba"] = pd.to_numeric(self.links_df[f"{field}_ba"], errors="coerce") / 2
-
-                if f"{field}_forward" in self.links_df:
-                    fld = pd.to_numeric(self.links_df[f"{field}_forward"], errors="coerce")
-                    self.links_df.loc[fld > 0, f"{field}_ab"] = fld[fld > 0]
-                if f"{field}_backward" in self.links_df:
-                    fld = pd.to_numeric(self.links_df[f"{field}_backward"], errors="coerce")
-                    self.links_df.loc[fld > 0, f"{field}_ba"] = fld[fld > 0]
-        cols = list_columns(self.project.conn, "links") + ["nodes"]
-        self.links_df = self.links_df[[x for x in cols if x in self.links_df.columns]]
-        gc.collect()
-
-    def establish_modes_for_all_links(self, conn):
+    def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFrame:
         p = Parameters()
         modes = p.parameters["network"]["osm"]["modes"]
 
@@ -289,9 +264,51 @@ def establish_modes_for_all_links(self, conn):
 
         type_list = {k: "".join(set(v)) for k, v in type_list.items()}
 
-        df = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"])
-        self.links_df = self.links_df.merge(df, on="link_type", how="left")
-        self.links_df.modes.fillna("".join(sorted(notfound)), inplace=True)
+        df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"])
+        df = df.merge(df_aux, on="link_type", how="left")
+        df.modes.fillna("".join(sorted(notfound)))
+        return df
+
+    def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame:
+        df = df.assign(direction=0, link_id=0)
+        df.loc[df.oneway == "yes", "direction"] = 1
+        df.loc[df.oneway == "backward", "direction"] = -1
+        p = Parameters()
+        fields = p.parameters["network"]["links"]["fields"]
+
+        for x in fields["one-way"]:
+            keys_ = list(x.values())[0]
+            field = list(x.keys())[0]
+            osm_name = keys_.get("osm_source", field).replace(":", "_")
+            df.rename(columns={osm_name: field}, inplace=True, errors="ignore")
+
+        for x in fields["two-way"]:
+            keys_ = list(x.values())[0]
+            field = list(x.keys())[0]
+            if "osm_source" not in keys_:
+                continue
+            osm_name = keys_.get("osm_source", field).replace(":", "_")
+            df[f"{field}_ba"] = df[osm_name].copy()
+            df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore")
+            if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide":
+                df[f"{field}_ab"] = pd.to_numeric(df[f"{field}_ab"], errors="coerce")
+                df[f"{field}_ba"] = pd.to_numeric(df[f"{field}_ba"], errors="coerce")
+
+                # Divides the values by 2 or zero them depending on the link direction
+                df.loc[df.direction == 0, f"{field}_ab"] /= 2
+                df.loc[df.direction == -1, f"{field}_ab"] = 0
+
+                df.loc[df.direction == 0, f"{field}_ba"] /= 2
+                df.loc[df.direction == 1, f"{field}_ba"] = 0
+
+                if f"{field}_forward" in df:
+                    fld = pd.to_numeric(df[f"{field}_forward"], errors="coerce")
+                    df.loc[fld > 0, f"{field}_ab"] = fld[fld > 0]
+                if f"{field}_backward" in df:
+                    fld = pd.to_numeric(df[f"{field}_backward"], errors="coerce")
+                    df.loc[fld > 0, f"{field}_ba"] = fld[fld > 0]
+        cols = list_columns(self.project.conn, "links") + ["nodes"]
+        return df[[x for x in cols if x in df.columns]]
 
     ######## TABLE STRUCTURE UPDATING ########
     def __update_table_structure(self, conn):
@@ -327,4 +344,4 @@ def get_link_field_type(field_name):
         else:
             for tp in fields["one-way"]:
                 if field_name in tp:
-                    return tp[field_name]["type"]
\ No newline at end of file
+                    return tp[field_name]["type"]

From 85f623f4f3755121ad97cbab055a7dcaced833ae Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 5 Feb 2024 13:52:44 +1000
Subject: [PATCH 06/32] Processing with chunking

---
 aequilibrae/project/network/osm/osm_downloader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py
index e93421d6f..6a75b6b06 100644
--- a/aequilibrae/project/network/osm/osm_downloader.py
+++ b/aequilibrae/project/network/osm/osm_downloader.py
@@ -95,7 +95,7 @@ def doWork(self):
         for lst, table in [(self._links, "links"), (self._nodes, "nodes")]:
             df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"])
             if table != "links":
-                df = df.drop(columns=["tags"])
+                df = df.drop(columns=["tags"], errors="ignore")
             self.data[table] = df.rename(columns={"id": "osm_id"})
             lst.clear()
             gc.collect()

From d434de59acabcb0493249fc2f289537ea0abb6c5 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Tue, 6 Feb 2024 07:07:14 +1000
Subject: [PATCH 07/32] Processing with chunking

---
 aequilibrae/project/network/osm/osm_builder.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 1da2d8ce7..20d71f3d4 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -110,7 +110,7 @@ def importing_network(self, conn):
                 intersecs = np.where(node_indices > 1)[0]
                 geos = []
                 for i, j in zip(intersecs[:-1], intersecs[1:]):
-                    geos.append(self.__build_geometry(link.nodes[i : j + 1]))
+                    geos.append(self.__build_geometry(link.nodes[i: j + 1]))
                 geo = MultiLineString(geos)
 
             geometries.append(geo)
@@ -168,20 +168,21 @@ def __build_geometry(self, nodes: List[int]) -> LineString:
         return LineString(self.node_df.loc[nodes, "geometry"])
 
     def __process_link_chunk(self):
-        self.logger.info("Creating necessary link types")
-        self.__emit_all(["text", "Creating necessary link types"])
+        self.logger.info("Processing link modes, types and fields")
+        self.__emit_all(["text", "Processing link modes, types and fields"])
 
         # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk
         # And let's also assume that each row will be 100 fields at 8 bytes each
         # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so.
         chunk_size = 100_000_000
-        list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
+        list_dfs = [self.links_df.iloc[i: i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
         self.links_df = pd.DataFrame([])
         # Initialize link types
         with read_and_close(self.project.path_to_file) as conn:
             self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn)
-
+            self.__emit_all(["maxValue", len(list_dfs)])
             for i, df in enumerate(list_dfs):
+                self.__emit_all(["Value", i])
                 if "tags" in df.columns:
                     df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"])
                     df.columns = [x.replace(":", "_") for x in df.columns]
@@ -217,7 +218,7 @@ def __define_link_type(self, link_type: str) -> str:
         split = link_type.split("_")
         for i, piece in enumerate(split[1:]):
             if piece in ["link", "segment", "stretch"]:
-                link_type = "_".join(split[0 : i + 1])
+                link_type = "_".join(split[0: i + 1])
 
         if self.__all_ltp.shape[0] >= 51:
             link_type = "aggregate_link_type"

From fcc6db47e59dbf5f66096abebfe65b17371b6915 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Tue, 6 Feb 2024 18:44:45 +1000
Subject: [PATCH 08/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 20d71f3d4..caccc277f 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -24,9 +24,6 @@
 if pyqt:
     from PyQt5.QtCore import pyqtSignal
 
-if iutil.find_spec("qgis") is not None:
-    pass
-
 
 class OSMBuilder(WorkerThread):
     if pyqt:

From 7a8606ed37d63a0e86ab7ae69ecf4741ac472139 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Tue, 6 Feb 2024 22:01:31 +1000
Subject: [PATCH 09/32] .

---
 aequilibrae/project/network/link_types.py     |  6 +-
 .../project/network/osm/osm_builder.py        | 57 +++++++++----------
 2 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/aequilibrae/project/network/link_types.py b/aequilibrae/project/network/link_types.py
index 05b3524ae..df3f91140 100644
--- a/aequilibrae/project/network/link_types.py
+++ b/aequilibrae/project/network/link_types.py
@@ -1,6 +1,7 @@
-from sqlite3 import IntegrityError, Connection
-from aequilibrae.project.network.link_type import LinkType
+from sqlite3 import IntegrityError
+
 from aequilibrae.project.field_editor import FieldEditor
+from aequilibrae.project.network.link_type import LinkType
 from aequilibrae.project.table_loader import TableLoader
 from aequilibrae.utils.db_utils import commit_and_close
 from aequilibrae.utils.spatialite_utils import connect_spatialite
@@ -84,7 +85,6 @@ def new(self, link_type_id: str) -> LinkType:
         tp["link_type_id"] = link_type_id
         lt = LinkType(tp, self.project)
         self.__items[link_type_id] = lt
-        self.logger.warning("Link type has not yet been saved to the database. Do so explicitly")
         return lt
 
     def delete(self, link_type_id: str) -> None:
diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index caccc277f..87d74e2d3 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -5,11 +5,9 @@
 from pathlib import Path
 from typing import List
 
-import geopandas as gpd
 import numpy as np
 import pandas as pd
 from pandas import json_normalize
-from shapely import MultiLineString
 from shapely.geometry import Polygon, LineString
 
 from aequilibrae.context import get_active_project
@@ -45,11 +43,9 @@ def __init__(self, data, project, model_area: Polygon) -> None:
         self.__link_id = 1
         self.__valid_links = []
 
+        # Building shapely geometries makes the code surprisingly slower.
         nids = np.arange(data["nodes"].shape[0]) + self.node_start
-        nodes = data["nodes"].assign(is_centroid=0, modes="", link_types="", node_id=nids).reset_index(drop=True)
-        self.node_df = gpd.GeoDataFrame(nodes, geometry=gpd.points_from_xy(nodes.lon, nodes.lat), crs=4326)
-        del nodes
-        del data["nodes"]
+        self.node_df = data["nodes"].assign(is_centroid=0, modes="", link_types="", node_id=nids).reset_index(drop=True)
         gc.collect()
         self.links_df = data["links"]
 
@@ -67,6 +63,7 @@ def doWork(self):
         self.__emit_all(["finished_threaded_procedure", 0])
 
     def importing_network(self, conn):
+        self.logger.info("Importing the network")
         node_count = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count()
 
         self.node_df.osm_id = self.node_df.osm_id.astype(np.int64)
@@ -93,7 +90,6 @@ def importing_network(self, conn):
 
             if len(link["nodes"]) < 2:
                 self.logger.error(f"Link {idx} has less than two nodes. {link.nodes}")
-                geometries.append(LineString())
                 continue
 
             # The link is a straight line between two points
@@ -101,28 +97,26 @@ def importing_network(self, conn):
             node_indices = node_count.loc[link["nodes"], "counter"]
             if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1:
                 # The link has no intersections
-                geo = self.__build_geometry(link.nodes)
+                geometries.append([idx, self.__build_geometry(link.nodes)])
             else:
                 # The link has intersections
+                # We build repeated records for links when they have intersections
+                # This is because it is faster to do this way and then have all the data repeated
+                # when doing the join with the link fields below
                 intersecs = np.where(node_indices > 1)[0]
-                geos = []
                 for i, j in zip(intersecs[:-1], intersecs[1:]):
-                    geos.append(self.__build_geometry(link.nodes[i: j + 1]))
-                geo = MultiLineString(geos)
-
-            geometries.append(geo)
+                    geometries.append([idx, self.__build_geometry(link.nodes[i : j + 1])])
 
         # Builds the link Geo dataframe
         self.links_df.drop(columns=["nodes"], inplace=True)
-        self.links_df = gpd.GeoDataFrame(self.links_df, geometry=geometries, crs=4326)
-        self.links_df = self.links_df.clip(self.model_area).explode(index_parts=False)
-        self.links_df = self.links_df[self.links_df.geometry.length > 0]
+        # We build a dataframe with the geometries created above
+        # and join with the database
+        geo_df = pd.DataFrame(geometries, columns=["link_id", "geometry"]).set_index("link_id")
+        self.links_df = self.links_df.join(geo_df, how="inner")
 
         self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1
 
-        self.node_df.reset_index(inplace=True)
-        cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types"]
-        self.node_df = gpd.GeoDataFrame(self.node_df[cols], geometry=self.node_df.geometry, crs=self.node_df.crs)
+        self.node_df = self.node_df.reset_index()
 
         # Saves the data to disk in case of issues loading it to the database
         osm_data_path = Path(self.project.project_base_path) / "osm_data"
@@ -136,7 +130,10 @@ def importing_network(self, conn):
         # Removing the triggers before adding all nodes makes things a LOT faster
         remove_triggers(conn, self.logger, "network")
 
-        self.node_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="nodes", mode="a")
+        cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types", "lon", "lat"]
+        insert_qry = f"INSERT INTO nodes ({','.join(cols[:-2])}, geometry) VALUES(?,?,?,?,?, MakePoint(?,?, 4326))"
+        conn.executemany(insert_qry, self.node_df[cols].to_records(index=False))
+
         del self.node_df
         gc.collect()
 
@@ -146,14 +143,13 @@ def importing_network(self, conn):
         # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a")
 
         # I could not get the above line to work, so I used the following code instead
-        insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromWKB(?, 4326))"
+        insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromText(?, 4326))"
         cols_no_geo = self.links_df.columns.tolist()
         cols_no_geo.remove("geometry")
         insert_qry = insert_qry.format(", ".join(cols_no_geo), ", ".join(["?"] * len(cols_no_geo)))
 
-        geos = self.links_df.geometry.to_wkb()
         cols = cols_no_geo + ["geometry"]
-        links_df = pd.DataFrame(self.links_df[cols_no_geo]).assign(geometry=geos)[cols].to_records(index=False)
+        links_df = self.links_df[cols].to_records(index=False)
 
         del self.links_df
         gc.collect()
@@ -161,8 +157,10 @@ def importing_network(self, conn):
         self.__emit_all(["text", "Adding links to file"])
         conn.executemany(insert_qry, links_df)
 
-    def __build_geometry(self, nodes: List[int]) -> LineString:
-        return LineString(self.node_df.loc[nodes, "geometry"])
+    def __build_geometry(self, nodes: List[int]) -> str:
+        slice = self.node_df.loc[nodes, :]
+        txt = ",".join((slice.lon.astype(str) + " " + slice.lat.astype(str)).tolist())
+        return f"LINESTRING({txt})"
 
     def __process_link_chunk(self):
         self.logger.info("Processing link modes, types and fields")
@@ -171,14 +169,15 @@ def __process_link_chunk(self):
         # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk
         # And let's also assume that each row will be 100 fields at 8 bytes each
         # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so.
-        chunk_size = 100_000_000
-        list_dfs = [self.links_df.iloc[i: i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
+        chunk_size = 1_000_000
+        list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
         self.links_df = pd.DataFrame([])
         # Initialize link types
         with read_and_close(self.project.path_to_file) as conn:
             self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn)
             self.__emit_all(["maxValue", len(list_dfs)])
             for i, df in enumerate(list_dfs):
+                self.logger.info(f"Processing chunk {i + 1}/{len(list_dfs)}")
                 self.__emit_all(["Value", i])
                 if "tags" in df.columns:
                     df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"])
@@ -215,7 +214,7 @@ def __define_link_type(self, link_type: str) -> str:
         split = link_type.split("_")
         for i, piece in enumerate(split[1:]):
             if piece in ["link", "segment", "stretch"]:
-                link_type = "_".join(split[0: i + 1])
+                link_type = "_".join(split[0 : i + 1])
 
         if self.__all_ltp.shape[0] >= 51:
             link_type = "aggregate_link_type"
@@ -264,7 +263,7 @@ def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFram
 
         df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"])
         df = df.merge(df_aux, on="link_type", how="left")
-        df.modes.fillna("".join(sorted(notfound)))
+        df.modes.fillna("".join(sorted(notfound)), inplace=True)
         return df
 
     def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame:

From bf9aca40f18b06929fefdcc05347a6fb16bf6b1a Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Tue, 6 Feb 2024 22:11:15 +1000
Subject: [PATCH 10/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 87d74e2d3..ba1c0cba8 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -268,8 +268,9 @@ def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFram
 
     def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame:
         df = df.assign(direction=0, link_id=0)
-        df.loc[df.oneway == "yes", "direction"] = 1
-        df.loc[df.oneway == "backward", "direction"] = -1
+        if "oneway" in df.columns:
+            df.loc[df.oneway == "yes", "direction"] = 1
+            df.loc[df.oneway == "backward", "direction"] = -1
         p = Parameters()
         fields = p.parameters["network"]["links"]["fields"]
 

From f32619650b8178d8454dd3efaa3cb5e04edf7cca Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Thu, 8 Feb 2024 20:04:58 +1000
Subject: [PATCH 11/32] .

---
 aequilibrae/project/network/osm/osm_builder.py    | 9 ++++-----
 aequilibrae/project/network/osm/osm_downloader.py | 2 ++
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index ba1c0cba8..334dcde57 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -44,8 +44,8 @@ def __init__(self, data, project, model_area: Polygon) -> None:
         self.__valid_links = []
 
         # Building shapely geometries makes the code surprisingly slower.
-        nids = np.arange(data["nodes"].shape[0]) + self.node_start
-        self.node_df = data["nodes"].assign(is_centroid=0, modes="", link_types="", node_id=nids).reset_index(drop=True)
+        self.node_df = data["nodes"]
+        self.node_df.loc[:, "node_id"] = np.arange(data["nodes"].shape[0]) + self.node_start
         gc.collect()
         self.links_df = data["links"]
 
@@ -84,12 +84,11 @@ def importing_network(self, conn):
 
             # How can I link have less than two points?
             if not isinstance(link["nodes"], list):
-                geometries.append(LineString())
-                self.logger.error(f"OSM link {idx} does not have a list of nodes.")
+                self.logger.debug(f"OSM link/feature {idx} does not have a list of nodes.")
                 continue
 
             if len(link["nodes"]) < 2:
-                self.logger.error(f"Link {idx} has less than two nodes. {link.nodes}")
+                self.logger.debug(f"Link {idx} has less than two nodes. {link.nodes}")
                 continue
 
             # The link is a straight line between two points
diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py
index 6a75b6b06..1e4586d57 100644
--- a/aequilibrae/project/network/osm/osm_downloader.py
+++ b/aequilibrae/project/network/osm/osm_downloader.py
@@ -86,6 +86,8 @@ def doWork(self):
             if json["elements"]:
                 for tag, lst in [("node", self._nodes), ("way", self._links)]:
                     df = pd.DataFrame([item for item in json["elements"] if item["type"] == tag])
+                    if tag == "node":
+                        df = df.assign(is_centroid=0, modes="", link_types="", node_id=0)
                     lst.append(df)
                 del json
                 gc.collect()

From d7f78fabb16a7a192e44af2e335d91a1111a82ac Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Thu, 8 Feb 2024 20:15:58 +1000
Subject: [PATCH 12/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 334dcde57..5bb763a3a 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -285,6 +285,8 @@ def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame:
             if "osm_source" not in keys_:
                 continue
             osm_name = keys_.get("osm_source", field).replace(":", "_")
+            if osm_name not in df.columns:
+                continue
             df[f"{field}_ba"] = df[osm_name].copy()
             df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore")
             if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide":

From 121e41d8059da035a60d7eb6333d9e503afe4eae Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Thu, 8 Feb 2024 20:47:47 +1000
Subject: [PATCH 13/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 5bb763a3a..b95598643 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -71,12 +71,13 @@ def importing_network(self, conn):
 
         self.__process_link_chunk()
         shape_ = self.links_df.shape[0]
-        message_step = floor(shape_ / 100)
+        message_step = max(1, floor(shape_ / 100))
         self.__emit_all(["maxValue", shape_])
 
         self.logger.info("Geo-procesing links")
         self.__emit_all(["text", "Adding network links"])
         geometries = []
+        self.links_df.set_index(["osm_id"], inplace=True)
         for counter, (idx, link) in enumerate(self.links_df.iterrows()):
             self.__emit_all(["Value", counter])
             if counter % message_step == 0:

From 979b1576d199c689861a49d5120ba371373c60df Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Thu, 8 Feb 2024 21:10:53 +1000
Subject: [PATCH 14/32] .

---
 aequilibrae/project/network/osm/osm_downloader.py | 6 ++++--
 tests/aequilibrae/project/test_osm_downloader.py  | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py
index 1e4586d57..97acd944d 100644
--- a/aequilibrae/project/network/osm/osm_downloader.py
+++ b/aequilibrae/project/network/osm/osm_downloader.py
@@ -95,10 +95,12 @@ def doWork(self):
         self.__emit_all(["Value", len(self.polygons)])
         self.__emit_all(["text", "Downloading finished. Processing data"])
         for lst, table in [(self._links, "links"), (self._nodes, "nodes")]:
-            df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"])
+            df = pd.DataFrame([])
+            if len(lst) > 0:
+                df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"])
             if table != "links":
                 df = df.drop(columns=["tags"], errors="ignore")
-            self.data[table] = df.rename(columns={"id": "osm_id"})
+            self.data[table] = df.rename(columns={"id": "osm_id"}, errors="ignore")
             lst.clear()
             gc.collect()
 
diff --git a/tests/aequilibrae/project/test_osm_downloader.py b/tests/aequilibrae/project/test_osm_downloader.py
index 706c04989..7c5fe3bb7 100644
--- a/tests/aequilibrae/project/test_osm_downloader.py
+++ b/tests/aequilibrae/project/test_osm_downloader.py
@@ -32,7 +32,7 @@ def test_do_work2(self):
         o = OSMDownloader([box(-112.185, 36.59, -112.179, 36.60)], ["car"])
         o.doWork()
 
-        if "elements" not in o.json[0]:
+        if len(o.json) == 0 or "elements" not in o.json[0]:
             return
 
         if len(o.json[0]["elements"]) > 1000:

From 630fbd8216457aa93eb68db6ef83f0d9a4255eff Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Fri, 9 Feb 2024 09:54:10 +1000
Subject: [PATCH 15/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index b95598643..3ca88f802 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -45,7 +45,7 @@ def __init__(self, data, project, model_area: Polygon) -> None:
 
         # Building shapely geometries makes the code surprisingly slower.
         self.node_df = data["nodes"]
-        self.node_df.loc[:, "node_id"] = np.arange(data["nodes"].shape[0]) + self.node_start
+        self.node_df.loc[:, "node_id"] = np.arange(self.node_start, self.node_start + self.node_df.shape[0])
         gc.collect()
         self.links_df = data["links"]
 
@@ -105,7 +105,7 @@ def importing_network(self, conn):
                 # when doing the join with the link fields below
                 intersecs = np.where(node_indices > 1)[0]
                 for i, j in zip(intersecs[:-1], intersecs[1:]):
-                    geometries.append([idx, self.__build_geometry(link.nodes[i : j + 1])])
+                    geometries.append([idx, self.__build_geometry(link.nodes[i: j + 1])])
 
         # Builds the link Geo dataframe
         self.links_df.drop(columns=["nodes"], inplace=True)
@@ -170,7 +170,7 @@ def __process_link_chunk(self):
         # And let's also assume that each row will be 100 fields at 8 bytes each
         # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so.
         chunk_size = 1_000_000
-        list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
+        list_dfs = [self.links_df.iloc[i: i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
         self.links_df = pd.DataFrame([])
         # Initialize link types
         with read_and_close(self.project.path_to_file) as conn:
@@ -214,7 +214,7 @@ def __define_link_type(self, link_type: str) -> str:
         split = link_type.split("_")
         for i, piece in enumerate(split[1:]):
             if piece in ["link", "segment", "stretch"]:
-                link_type = "_".join(split[0 : i + 1])
+                link_type = "_".join(split[0: i + 1])
 
         if self.__all_ltp.shape[0] >= 51:
             link_type = "aggregate_link_type"

From 5f78189934d0a5a82582911fbed565d7360db810 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Fri, 9 Feb 2024 20:32:34 +1000
Subject: [PATCH 16/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 3ca88f802..ee60ca224 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -105,7 +105,7 @@ def importing_network(self, conn):
                 # when doing the join with the link fields below
                 intersecs = np.where(node_indices > 1)[0]
                 for i, j in zip(intersecs[:-1], intersecs[1:]):
-                    geometries.append([idx, self.__build_geometry(link.nodes[i: j + 1])])
+                    geometries.append([idx, self.__build_geometry(link.nodes[i : j + 1])])
 
         # Builds the link Geo dataframe
         self.links_df.drop(columns=["nodes"], inplace=True)
@@ -170,7 +170,7 @@ def __process_link_chunk(self):
         # And let's also assume that each row will be 100 fields at 8 bytes each
         # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so.
         chunk_size = 1_000_000
-        list_dfs = [self.links_df.iloc[i: i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
+        list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
         self.links_df = pd.DataFrame([])
         # Initialize link types
         with read_and_close(self.project.path_to_file) as conn:
@@ -214,7 +214,7 @@ def __define_link_type(self, link_type: str) -> str:
         split = link_type.split("_")
         for i, piece in enumerate(split[1:]):
             if piece in ["link", "segment", "stretch"]:
-                link_type = "_".join(split[0: i + 1])
+                link_type = "_".join(split[0 : i + 1])
 
         if self.__all_ltp.shape[0] >= 51:
             link_type = "aggregate_link_type"

From 4b4371de572f38afcc4df717defa9ee99e3e8f93 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 12 Feb 2024 17:41:33 +1000
Subject: [PATCH 17/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index ee60ca224..c2743e905 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -143,6 +143,8 @@ def importing_network(self, conn):
         # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a")
 
         # I could not get the above line to work, so I used the following code instead
+        self.links_df.index.name="osm_id"
+        self.links_df.reset_index(inplace=True)
         insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromText(?, 4326))"
         cols_no_geo = self.links_df.columns.tolist()
         cols_no_geo.remove("geometry")
@@ -275,6 +277,8 @@ def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame:
         fields = p.parameters["network"]["links"]["fields"]
 
         for x in fields["one-way"]:
+            if "link_type" in x.keys():
+                continue
             keys_ = list(x.values())[0]
             field = list(x.keys())[0]
             osm_name = keys_.get("osm_source", field).replace(":", "_")

From 8fb82a8bfd437ffd9bcf230cc4d4f0045669895b Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 12 Feb 2024 17:53:41 +1000
Subject: [PATCH 18/32] .

---
 aequilibrae/project/network/osm/osm_builder.py    | 2 +-
 aequilibrae/project/network/osm/osm_downloader.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index c2743e905..f20f52646 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -208,7 +208,7 @@ def __build_link_types(self, df):
         df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left")
         return df.drop(columns=["highway"])
 
-    def __define_link_type(self, link_type: str) -> str:
+    def __define_link_type(self, link_type: str) -> [str, str]:
         proj_link_types = self.project.network.link_types
         original_link_type = link_type
         link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower()
diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py
index 97acd944d..58b3fd6c9 100644
--- a/aequilibrae/project/network/osm/osm_downloader.py
+++ b/aequilibrae/project/network/osm/osm_downloader.py
@@ -68,7 +68,7 @@ def doWork(self):
             m = f"[maxsize: {memory}]"
         for counter, poly in enumerate(self.polygons):
             msg = f"Downloading polygon {counter + 1} of {len(self.polygons)}"
-            self.logger.debug(msg)
+            self.logger.info(msg)
             self.__emit_all(["Value", counter])
             self.__emit_all(["text", msg])
             west, south, east, north = poly.bounds

From afa381fe6f737ae0acd06a33cc63e17749b283fe Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 12 Feb 2024 21:18:25 +1000
Subject: [PATCH 19/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index f20f52646..18ec3c7c8 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -169,9 +169,9 @@ def __process_link_chunk(self):
         self.__emit_all(["text", "Processing link modes, types and fields"])
 
         # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk
-        # And let's also assume that each row will be 100 fields at 8 bytes each
-        # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so.
-        chunk_size = 1_000_000
+        # And let's also assume that each row will be 200 fields at 8 bytes each
+        # This makes 8Gb roughly equal to 5.3 million rows, so 5 million would so.
+        chunk_size = 5_000_000
         list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
         self.links_df = pd.DataFrame([])
         # Initialize link types
@@ -182,6 +182,8 @@ def __process_link_chunk(self):
                 self.logger.info(f"Processing chunk {i + 1}/{len(list_dfs)}")
                 self.__emit_all(["Value", i])
                 if "tags" in df.columns:
+                    # It is critical to reset the index for the concat below to work
+                    df.reset_index(drop=True, inplace=True)
                     df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"])
                     df.columns = [x.replace(":", "_") for x in df.columns]
                     df = self.__build_link_types(df)
@@ -191,7 +193,7 @@ def __process_link_chunk(self):
                     self.logger.error("OSM link data does not have tags. Skipping an entire data chunk")
                     df = pd.DataFrame([])
                 list_dfs[i] = df
-        self.links_df = pd.concat(list_dfs)
+        self.links_df = pd.concat(list_dfs, ignore_index=True)
 
     def __build_link_types(self, df):
         data = []

From 9a0d09b6d561ae2fd705325e3dcd45949c3f500a Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 12 Feb 2024 21:18:41 +1000
Subject: [PATCH 20/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 18ec3c7c8..31da60b56 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -143,7 +143,7 @@ def importing_network(self, conn):
         # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a")
 
         # I could not get the above line to work, so I used the following code instead
-        self.links_df.index.name="osm_id"
+        self.links_df.index.name = "osm_id"
         self.links_df.reset_index(inplace=True)
         insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromText(?, 4326))"
         cols_no_geo = self.links_df.columns.tolist()

From 86571d95ac13f2239eb4d083be9be6e85b28e594 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 12 Feb 2024 21:21:18 +1000
Subject: [PATCH 21/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 31da60b56..e5dd8471b 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -170,8 +170,8 @@ def __process_link_chunk(self):
 
         # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk
         # And let's also assume that each row will be 200 fields at 8 bytes each
-        # This makes 8Gb roughly equal to 5.3 million rows, so 5 million would so.
-        chunk_size = 5_000_000
+        # This makes 2Gb roughly equal to 2.6 million rows, so 2 million would so.
+        chunk_size = 1_000_000
         list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)]
         self.links_df = pd.DataFrame([])
         # Initialize link types

From 224e5fbe8a2cbacb57bcb75d146502e0e58935f8 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 12 Feb 2024 22:02:24 +1000
Subject: [PATCH 22/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 11 +++++++----
 aequilibrae/transit/map_matching_graph.py      |  3 +--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index e5dd8471b..f0d3f0f4d 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -94,18 +94,21 @@ def importing_network(self, conn):
 
             # The link is a straight line between two points
             # Or all midpoints are only part of a single link
-            node_indices = node_count.loc[link["nodes"], "counter"]
+            node_indices = node_count.loc[link["nodes"], "counter"].to_numpy()
             if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1:
                 # The link has no intersections
-                geometries.append([idx, self.__build_geometry(link.nodes)])
+                geometries.append([idx, self._build_geometry(link.nodes)])
             else:
+                # Make sure we get the first and last nodes, as they are certainly the extremities of the sublinks
+                node_indices[0] = 2
+                node_indices[-1] = 2
                 # The link has intersections
                 # We build repeated records for links when they have intersections
                 # This is because it is faster to do this way and then have all the data repeated
                 # when doing the join with the link fields below
                 intersecs = np.where(node_indices > 1)[0]
                 for i, j in zip(intersecs[:-1], intersecs[1:]):
-                    geometries.append([idx, self.__build_geometry(link.nodes[i : j + 1])])
+                    geometries.append([idx, self._build_geometry(link.nodes[i : j + 1])])
 
         # Builds the link Geo dataframe
         self.links_df.drop(columns=["nodes"], inplace=True)
@@ -159,7 +162,7 @@ def importing_network(self, conn):
         self.__emit_all(["text", "Adding links to file"])
         conn.executemany(insert_qry, links_df)
 
-    def __build_geometry(self, nodes: List[int]) -> str:
+    def _build_geometry(self, nodes: List[int]) -> str:
         slice = self.node_df.loc[nodes, :]
         txt = ",".join((slice.lon.astype(str) + " " + slice.lat.astype(str)).tolist())
         return f"LINESTRING({txt})"
diff --git a/aequilibrae/transit/map_matching_graph.py b/aequilibrae/transit/map_matching_graph.py
index a1ab70418..56e5610d8 100644
--- a/aequilibrae/transit/map_matching_graph.py
+++ b/aequilibrae/transit/map_matching_graph.py
@@ -118,8 +118,7 @@ def __build_graph_from_cache(self):
 
     def __build_graph_from_scratch(self):
         self.logger.info(f"Creating map-matching graph from scratch for mode_id={self.mode_id}")
-        self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=np.nan)
-        self.df.loc[:, "geo"] = self.df.wkt.apply(shapely.wkt.loads)
+        self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=self.df.wkt.apply(shapely.wkt.loads))
         self.df.loc[self.df.link_id < 0, "link_id"] = self.df.link_id * -1 + self.df.link_id.max() + 1
         # We make sure all link IDs are in proper order
 

From eea6ad58a74f6f74906780a6854ed7abb7b7377a Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 12 Feb 2024 22:25:57 +1000
Subject: [PATCH 23/32] pandas deprecations

---
 aequilibrae/project/network/osm/osm_builder.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index f0d3f0f4d..c78a4993c 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -200,7 +200,7 @@ def __process_link_chunk(self):
 
     def __build_link_types(self, df):
         data = []
-        df.highway.fillna("missing", inplace=True)
+        df = df.fillna(value={"highway": "missing"})
         df.highway = df.highway.str.lower()
         for i, lt in enumerate(df.highway.unique()):
             if str(lt) in self.__all_ltp.highway.values:
@@ -270,8 +270,7 @@ def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFram
 
         df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"])
         df = df.merge(df_aux, on="link_type", how="left")
-        df.modes.fillna("".join(sorted(notfound)), inplace=True)
-        return df
+        return df.fillna(value={"modes": "".join(sorted(notfound))})
 
     def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame:
         df = df.assign(direction=0, link_id=0)

From b7df86e0cf6201f4668fe8566c68bc6710297abc Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Fri, 16 Feb 2024 09:53:18 +1000
Subject: [PATCH 24/32] improves griding function

---
 .../network/osm/model_area_gridding.py        |  31 ++++++++----------
 .../project/network/osm/osm_builder.py        |   9 +++--
 .../project/data/porto_rico.parquet           | Bin 0 -> 11961 bytes
 tests/aequilibrae/project/data/wynnum.parquet | Bin 0 -> 1626 bytes
 .../project/test_polygon_gridding.py          |  22 +++++++++++++
 5 files changed, 42 insertions(+), 20 deletions(-)
 create mode 100644 tests/aequilibrae/project/data/porto_rico.parquet
 create mode 100644 tests/aequilibrae/project/data/wynnum.parquet
 create mode 100644 tests/aequilibrae/project/test_polygon_gridding.py

diff --git a/aequilibrae/project/network/osm/model_area_gridding.py b/aequilibrae/project/network/osm/model_area_gridding.py
index 3f58a21b0..3a6b636d5 100644
--- a/aequilibrae/project/network/osm/model_area_gridding.py
+++ b/aequilibrae/project/network/osm/model_area_gridding.py
@@ -1,28 +1,25 @@
 # Inspired by https://www.matecdev.com/posts/shapely-polygon-gridding.html
+from math import ceil
+
+import geopandas as gpd
 import numpy as np
 from shapely.geometry import Polygon
-import geopandas as gpd
 
 
 def geometry_grid(model_area, srid) -> gpd.GeoDataFrame:
     minx, miny, maxx, maxy = model_area.bounds
-    subd = min(0.01, abs(maxy - miny) / 3, abs(maxx - minx) / 3)
-    space_x = int((maxx - minx) / subd)
-    space_y = int((maxy - miny) / subd)
-    combx, comby = np.linspace(minx, maxx, space_x), np.linspace(miny, maxy, space_y)
+    # Some rough heuristic to get the number of points per sub-polygon in the 2 digits range
+    subd = ceil((len(model_area.boundary.coords) / 32) ** 0.5)
+    dx = (maxx - minx) / subd
+    dy = (maxy - miny) / subd
     elements = []
-    for i in range(len(combx) - 1):
-        for j in range(len(comby) - 1):
-            elements.append(
-                Polygon(
-                    [
-                        [combx[i], comby[j]],
-                        [combx[i], comby[j + 1]],
-                        [combx[i + 1], comby[j + 1]],
-                        [combx[i + 1], comby[j]],
-                    ]
-                )
-            )
+    x1 = minx
+    for i in range(subd):
+        j1 = miny
+        for j in range(subd):
+            elements.append(Polygon([[x1, j1], [x1, j1 + dy], [x1 + dx, j1 + dy], [x1 + dx, j1]]))
+            j1 += dy
+        x1 += dx
 
     gdf = gpd.GeoDataFrame({"id": np.arange(len(elements))}, geometry=elements, crs=srid)
 
diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index c78a4993c..06d586a32 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -8,7 +8,7 @@
 import numpy as np
 import pandas as pd
 from pandas import json_normalize
-from shapely.geometry import Polygon, LineString
+from shapely.geometry import Polygon
 
 from aequilibrae.context import get_active_project
 from aequilibrae.parameters import Parameters
@@ -57,9 +57,12 @@ def doWork(self):
         with commit_and_close(connect_spatialite(self.path)) as conn:
             self.__update_table_structure(conn)
             self.importing_network(conn)
+
+            self.logger.info("Cleaning things up")
             conn.execute(
                 "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)"
             )
+            conn.execute("VACUUM;")
         self.__emit_all(["finished_threaded_procedure", 0])
 
     def importing_network(self, conn):
@@ -269,8 +272,8 @@ def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFram
         type_list = {k: "".join(set(v)) for k, v in type_list.items()}
 
         df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"])
-        df = df.merge(df_aux, on="link_type", how="left")
-        return df.fillna(value={"modes": "".join(sorted(notfound))})
+        df = df.merge(df_aux, on="link_type", how="left").fillna(value={"modes": "".join(sorted(notfound))})
+        return df
 
     def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame:
         df = df.assign(direction=0, link_id=0)
diff --git a/tests/aequilibrae/project/data/porto_rico.parquet b/tests/aequilibrae/project/data/porto_rico.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..97dfe198c23b20bad43fe123f8dadf8c091a0ee2
GIT binary patch
literal 11961
zcmchd2V7Iv`~Lw^QL#=CE!7k&YDGwH2t(R>!$=^Ia0!q=>dGPsWSId3SE;k^y7%6z
zTCEGmb?=3CSZ5v8YTbMPJ|}>-e*L}v|JVQj|NVKH<i5{%&U2pUInO!wUKydB7vvw*
zDyVfvkYA90Qy-s(+<HDfK3pD`{N-i``2;l$3XUL^L3M)agvR)EsQ0!q<lQCu)Z_T!
z+t1h6&nJA9Z)i|pOM$;not7a1LXk+El9`qq<QME0Jj*wDk#Bfnc)))zs$~QQ^AGa#
z!QkY_hx`NvHLg*yPIFTBLEZdvvqSKCPn(0a-f2_&ohq*2zTwaP!jJk9Rcd;EaPY98
z@S4G!&<)ApR+CGw<7B(kF1_;uHfs2<1|k2(_p}>Q=bd)`GNClZYp3<T!P|Vphu03@
z?@!eGw}kfveo*PgLCtHF4hY4V-oN*C4GE~Jd3P1;)8PZ9X!lPK-wy~4@~`%Alb$_I
zR-L}Ir&(XDH}~ZB?CG7tM&Dr8H~doF@E_}jzLWm$>c1K1gOdpe3apuoe_%W4@*flN
zC#50YZwUSey{Gt<2BATIA6A@Kt@v2q;3>Z059@~?t{++@@q=<dsOUUp&<bUZswX-<
zJRWo;$j8UG7PRoiryIVmJU-OE(+P&X{A%N?``sy8cu#w{JJ_kaI(bJoiuP+5<8BQu
znc47}K}FFmQX2IQJjSqZ{uU44pFz<_Gkb@Y9%a}EKEcl(rBn2}Idyz3;Mc)3+TR2J
zFnLjc{RqQO?KH4(AozHLlE{&V8MdA3eyK`A(TxH*eLH|XKd7eO6;t%YWe-E^fCD}o
z|9e(4MF%++&Z-SweDKN0WU#pR=q_CjF>GnO1I~V3DViBI+1eC5Awza6Fah%J#IFuL
z$gr=9JXgzNDSG;tqNF_s7`D!nTCGE(Df;6_jYr)EKltJ5#G5?Sx4k@LEc$Pv`aNve
ziK4A7?UTlVuigq+TpK*3HLuJH_Fp!j@qrE$J^6LJr5WIlcs(903#aG{&sU#c0&b!0
z^~t`EDLSy$q;7k_Yr}(|4sS)#A;;#-9RU67jTjy`GlZh&UoRiC1KeZqgxNVQC_1W0
zQ~m>ZZ{dSC%bHPi?;PvANnlsm`BAl-Q1p(NO9!m5-wvOhLz~v4=#CTb4(fsWWxaCb
z?E@%!Ky&w>tufxh$d>z3>rnL2maQ`V!EuFy|LRklqAdxQQ32q&10(-9UW=k_k+0Xc
z2RGA1F8$q)qF<%Ur)t302Pd=;flul7o{?a@UI%g``oH1P3zqPoj0Hz@2s{}5nn!Qr
z`TOhxw><PPefJ9<o!PeY#0}v1{!5}Yk9l;(jHZppgO9f86SeIjk6x$hHRd~TU(L+V
zFFfGUygR)Y)dT-=>~2BB`#ie-gVu&=n9o<Gw<?F+;nBg|@~=yXJ<dpeIC7gucR6a=
zxfS{eI@Uhd16=UD-KCXa_Zri}#&>yi(fa%2-heaq^2U7$dF<hmyUozQ>6vr2To|v<
ze8rS)sNb}-{7)zJ>pmiQjs=|5d*Aez;9b)W&RdOmX}xsYgAw4o{4>qR>}S|(_LYxU
zqkq}*RY?Ny+G(rWZ-)FY1`c^$Zy&>Eb{nnafDh^CJNxft*!$~Vvbo?!C%0|dzK3D8
zCncYbMgN97&)&Yen_({+_ch%9ghvOi+BE;oE{3Jzf8u96<IzEWn|6P{gJGYQFaM(a
zDUW{PJTdkj%dk)5P7b<-_T3I2YbXWxF}CUS^bwEt>mZtRcss*#J|8vZH0m!7O7sr}
zKNLU{@UJgM6`$S4uqmC7tZa<=>~IVUyaGOJ^s$x0A3G!Hp$EV(`drNW75cT3M($<7
zd!-GdXM^>luXca5m0=yN=G7m7`B+9fr`y5xJKuc02KGMnz#7p3{AoAe!DaAAezOLB
zQ?@YdklwMwegPjT9{g2a1^ipqe3keXk9KARKWPK5^_S+P1bj40yzl4D4BP&<0W+tg
zy=vo%odw{to4A!DFy88%7I!J|`8IcQJ3*hjXZvitvI*lqIk+Vk%sb-xVLW*A*<MR_
zq5bsNNJJYM_VFagw%<^H&^$Ix1eS05PBjfI*Crlr2VQrz+wkRR->m#bo8%3!&kWg*
zKi}igwI<%X!v~LTTCsiu+K1f>9OnyeMroVLV6R6{<fX^gWBp%AUvd%LxclQPBfzio
zceUvQ-n=Eg+wz|n_L6Jd%1HFz{?QEnymgS*_-X?c>gNQ7{J427!<Jom`a2)~`Sgq5
z<1{}qY@^ETD~;j*LgIMv#FMkeIiK_B`@Ke-`wFbKjGTEF>y3%4_$gxz!y3EFlZW_X
zeeYRPbZa%kE;X)rH52^sT922Dz~xQXe?AfGHKb0&rs3f2Q>R4kM*m+=j~!3|cI+H<
zV#P}yJ?X^$YeC?;Z6DX018%qS>5!d2Fl_Fe;DJM5As+t-pMP)_!-kwF&Nv4?eSOC4
zODhqt2l-K?-gr>fbqV-7C8&2E{TIZZ6<fi@AD`-9gnY2#V2*U#3Woh;jrp1o^XZUk
zE@`x!VYfv^=9(~nEBBOo@G^$o^^vyn0=U7nNqH^7a|cabKLGZS8pq|ym%?A;cVyj1
z|H}BTdwMTnSe59;mlGlX!1ZrF`+G6+-PRo~!olYd9vc@iY`5IU-J4+jB+}ak)d!zD
zK4E5m@B!ELhg%jhY%)J{RY&lOFUnVEg9khr-KrVZN7%)a9jDAgz7?)Guo2^j$xlWu
zn!~W!`OS;7Fu%olf&k}ihTT|NdaNAt@AmzaIYVYL?Cj9m!@dRI`+4-GP%snAT%3va
zlB`*kl`|OjrTyTFb<j^fVC&9r!Sl3rCvL=iwk}(%>NSmF$#QCp^}P4lbY|F8>^Jw@
zRN4?fFUCJTTMt}%X9@39#Onj!@}mo;p#AaNV>MWRUE0iQw|_Fj8amuA9gp_GbAPKp
z4ZOYb+1&`p>+Sa0-5uO3?0lCum|y2^3N;@b^=$UbbFlaR%At`<CNk`wH}-`;!+h^A
z-7Q`Kp0XiV^#VM()|aQ&Phi->M?dG5L*HwkEWVx!4j;4lYF%)YB`%`jc*F;ZGmQ7D
z@y8J}#xbno(ftG|^sPO#Me>608TLY}-#*^~E^I#T@QtwyD>>ERt0P!%>%J`8)en4M
zc&y-O<a=k@&&AEa_4t-I2aykc_YaxW72Nxi%fm<9<k3UM4D0y)7>4cI^75{~|Kib~
zkqve<&JVSN*$Lq7p|@K5fknN${A{|xqlbqruo^}oU!Du&{(}As_O#qJV<h6SP3NN_
zkpFb2Zdos|KI^xX6BvKsOY@b@BN+DB@0J$R$o>=Ec?|{Lcwh-N5<J~8xMb6C$dh3O
zqyJ3Z@<ScL-hBX^d6YSO?>mOwzb<CfE#$Xr%dcolz@t~Zq(acY=SHV`!7zqpd#HE3
zg8v$3e|>`k?s2;Iz#`bkIpEPo+fatRCmDC`IheVURw)<^dtX}lzy^QySaR#TvOx^n
zraU-G3vTtw7^esS{k(RC8a!tAp_7{i!hg-%%pMDWM4gd+-4@)hUQlQc<e!)47j=E{
zEyEt}TeoX(@RGghvEP+5Y;eZQtW)s!qph_$G`Qk?_X0QktL;~7{Kw$68zwy11D;g>
z^JV?NVc3tg?YH<No}09`gsvF?d7s^=Jsb5yV|+et44&1u%zOyzYr_ej^M2sR>aAIw
z!2MACO@G9r>Qwtp(0AgmlIxLukUs-$zg)!l;x<xS-!g_>(`wLuKggfot=0Cs9_-Jb
zC^lXN=RW0c$p;sIcVi`4f70^9N9UC?>`tR%eIxLqZ`Qr>1&2&9MQ`%K`A#?P`?Vzu
zYp2tdtC1f+f3rvBF2?yoJkin!=bzd;E}yAzWB<Du6nqxk?^x;}5%_x4!(k`Dhdq0@
z>A)3tLR!DTd8@^@p%b{^g+UjXr-2lG`P}zwes?i!<;S<%Zup3zB@;XJNe6S&^uyyD
zQ*`;7ZRH1?4EwzD%<K`UKT-CS$^xIh-FnUoaLgS&^U8sIeqYn}51j8Vo^HMM5Ae^4
z2d<oIO3`cEEe|^jp8UnENo|@_^e{==g2&(i&FVL?gOikD-u@MxDtg(2D7u|z)tz0v
z8TPXtvyKLRO3{;UTKGZWI`_&ydDD)fkIldIX0)AQ^9LXCX@T?W#KtR@O}1hGBM0`5
z6z!9qIWqygaKj->od}9Hq?}5gZG}IEK3?-}6h+rs?c3y%1^V7_k6^ht&vqTv=Yg4F
z=j`43tRd=SuGYWf0Q<kb```k2X;gZG0Nfxj|J)6nuixYi`>4E-Vf{{;&eai6^bvdC
zmIm<nuWX8IIFB2T=?j~huzmv*MyBy8+Pp6G`dlN!YJM3pcLwU4=NEPO34Eogxb?6&
zioUtBd}UMc!tIlH4(vkF4Q}qOXlr2DgvI%D&c#zS6({L2vjF2YxpeR}?gxJSNUZz|
z+{%*M^fc}lym5s71GqR@;C|xQjQxjo;Ly5F6!%i$uVbff1`{qDG4x3eMSF^${W281
zwr9!a2YD1-aIS0cJdBqawsU(x53~=6Sv3Is*Dpn7OfQP||1_WVNBfTBe(;xTD0<1*
zPLComzZL_Bnl7NeW7|ldQJ8<hW6#L@xWDRH@Jn<hxVz!pqK|R^W$Sot#8%kvo0Ao#
zT_ImRy7l%Auz#!OM{4DvzeKoUay0y7JN@ZvwU(mijOrG<5S(hcbfrI-JKbm3eel&z
z<r98WQ*`S^{hR~f-@2uhJN4km*C+1!As#+G({t%K=(lvJ(N!0$=ur7;1~~kJ*{MbR
zJox10ZeJrsf0>&3eG*tc<w@L0@R)^-x-;NP!Fbzx6YP2J>+h$4%iAf}JOyh^7sUd^
ztFmv>A8WxIgts;ngGV2@^tx>kRh2)$^IOk5@rjwDIg^frj>G!6kR5-kBij4E%wCp^
z_0((S?}AUkgBJ|3*2en#D<wf`LHka>hhB>TkIA|4EX6|6(m@R~e*pJM)2<tBrD##x
z*~%}F4+2_WI4`hMwEe^Z`Ay`H!8iWAWOGvV*{InoLXdCP9@9=RI4C;tv!u~0!Ne_<
zy(t=}v(8^3Kh2ymcjIO7xE`l&EBoR8<L1aaBhWwah`W^u`OU{&tM+*}ML(0O7k&$_
z+pAXCZg8gf>fhgi*B_@MMBv`FzUn#>JX5^q%_rcZj~X970ZyAYIV%G4-rVSYp%D31
z(`@P{7UP9qz3uFa{M&ROdvGo|{m;Dfxyav(dSXLI`*jCGpDV!58I~SG@P+U4qlSRn
zKR*7I!bQ=kg1?{a25(yScSRVu2}z*XFSr?t>Jxo79G@7FgZ(8t04u7PqBocO-MIoT
zvzPp0EkXOm<IQgDN7JuA$P$%O^uB?ei(i0;aO$=@3~u?;qNORra30sRp85>z-ABN6
ze(C*^@lbT=%}X(@u>X1QFCqWcptWBg0`o~ihCY|B{?#pOIQ+9OqEA!s>@n0OKkT>N
z$-xl%ZOID>TMXXt)8K0h(ZAsADf$7p>{9&KG{#%8`-@JdkqrB!_QRbsp>NY`Uy3Tg
zM|y-^E(T8>)WGA9{eS9)8(nh1*PG107CajJ)1Ae8)4|FUZKio}p4gP#CGQQ`d;Y|E
zW6XCG3LZdyhmCnz1HqpqKl?Vg4EeNY9c}XWcs_Eu*^dvvNB2q&uLb{={O!^Lu=4My
ziuU8+@1~p6FQNbLHIdf&;Lz?1D{g}8cl2?0#QAB`<!K8B!9LH7z4vm#-t#tCv*pj+
znG<pTPenlV#q)vT%VzbOg!9kKxuxHL&s7$tp2hiXrO`R#3|Mrmv#Z?{<R8E3&b#1*
zF~i>^;JjEr^+lUr{V2L_-9F8daK7}~3A`_3UXQ!rUq%KxT7Y9e-Z?M`=h5d)49k`1
z|7uR8yaSj#_DKiVE}9_~gFlZ)L}7kgO1oOOgHQjo_1rkj=gX9jn#AI~yUEV^tO5Aj
zD>IW?&Vqf0{q(dsc*D;t_}g%PuIPC6r>5v1<SV;#X)gA&SDR1H>;vwTy7;qscs|ft
z{G|+hyWOZ0UBE%DhUxj>`?tCe+%}(Ko0n`kV1vHOAGK{8FTnjyI&Sd6lBuVT*8y`Y
z+Ba$dzN46QJp#Pi_0<<de^cm<5;e~EJ1$O3cn*2%KS|j=3cO8{H>w@#N6yB^wHW)~
zh8`ye_J=>ux3uIg#d*7)tRDyR?svcgmSs54t$7sq2im)dqn0ylwC)QD>Hk|tMZ=w7
z?|Ov%ma&54eOEH<Z<|jAyu$qX*1|c@R>7W)zAlS~e`7-@)#?8O@?pU8HN^jcc>MJc
z?0CAMSv}aN(MPv^AFjs!d2Q$6Taf3y?^}cQakXFQ5$IF4vuUFaYmtA`W_}Y6eG?BA
zn@9YF{dx1LyQiVg`p&rH1<#t=EAChDsminU)wn;sv*KLwT=XBxeoqhFfc?0~!=MG=
z5!#-0)@(xiM?+>eK>Id@XSnq@GwkpC-Ps2*{=>ziZn(BEY!e(4E};GT-4zeTTN$=-
z`(Jaqfce>~0qJ1JkV~Uu!QOlbUR<~KLL+#{zW%}K+i|~P`{>ug0oaepbB-N|*DotC
zuKyO#&*HlWH`{~rBROXc#Ph&D+5n%uxZhue2Ze*Mf6hpZnGLRJ)%M5+)CXQouBg8s
z_cx!n`1}p}d-rkhq&0J=zbZ%njR?d8xIdg#x5NPU-nSfN*n#|~FZN)5<niJOaO%*Z
z-2SjPo<cSwJjMLQZyKzB?|nLW%yS$Fk&l*D{Meul*fVxk*ih^rIQ<X11NpnOT}Ld$
z{xjt2u?@9J{RByABKEVvNH?)y*NuY)Pa#FWD1LdQEBMSb`j!?Po_j`m_b{FhIJWqm
z!2Vn>dqVo<BiR23hn+iy{Wz2)nxhP>J2vakDeUj(MGjhe4Cj5D`gn7&k8jb>tBy15
zp=JRIdvG3j|9S2EkDlLqz`RKR?s<NIL2t9@T@H`;f%tP}@UXn_8@ljW4LknlI}X2=
zeFZ)tb(~tG-l7f)^a%=V=lAaANjskm@(|x!SoIF$pC#V+1~mo#==YzG<b8tt{<9oU
zysO8lQPd~cuVrXU^Z)BNIN|?y?_k3JM_!VI|Nnf65&l2<;vxM1FzAAZ_Xze24+v&^
zH23jo84*$oVGwe%VV|&Ky~Am;S;M-7@gljA+|C-kOC8AziwLtjOcu4n(^G3RyDg-?
zCU8(!Rpt!q(&t?%uJu-}O=q$i&{&zCNZL8PZ9c0Cbrc(2F1xc!x2SGWc89Gn(&jL9
zi>j)0c8e<JM#ffE6gq9z$g1*4at%=~k6jM|V!h4aP}_|rZIV12#a6Y26w8FkaeSao
z?Q&a)TJM_c^e&TDt}oC#pq^gjARXTIvpHNwj?kian6zpuCz*8T@Z!C)^k%ck?zEY7
zL|*kcVtF>H=0bu~Z!z_>s0(e5p6XH)<{86}jpOlSBf?BpVpM(40<+rX(p$X-q2l8r
zDM2hnQ9LfMUq50(n*+AOYsH>UkJF|1YVCAusuh1bxY~@dss{M_of>+ZcUl~bU~;*2
zWJYR@#-T4Zp}_0S47Ce|I+H`MtuC>WxuVRCSDG-HPH%AN^<n)YJ|NX*HT-vCz0UZj
zSiRawMtV<bcTCZ#wbl4cr+0cHiINVznz-wqX6JO~*sM;Y&7tS0tvZgapuh`VI7w=&
zTBk;}(5%rrT<Qc)qQm4eIgOko5|bJeN~CJc#-_oQOkaX}x7Ar<(iVBKz~;8<%tR*n
zWT-U`wa(^16&bxm?{sl;^bSb0IZS#dDK1uXvb8oBkt=gs)x~aC0w>ezQaenx1P<dc
zA$~Y1x)QZR$4PS%!3mr!w_c;yCUCOV&LXu78NEbpP2l9aix9vZ#tpS?9Jxtri{$+4
zMKPyCZnm(acWP>l+JXUvW+DN<4pW)U>b)pZTb*hsmZUbD3TzH5(F-%Tk!g@CtCjdx
zOSDYZo7K=upTH5J-lRiItQ3cVbV)^x$&|oJ)2WT<mxi#m+Dwp}X4RQsVqDlt@SR>g
zT)N)sa%+n`37ia@+vKcjB2}BLkS;|=sG7J`?R2WOMz>S%azU2Vq&1ohWbRTE^m5vW
zyQC(kll-&WF+2gA+d-~uuqhc=riV+svzF<L;c8M+0(~uZrxAf$g%YxspbV*!>PunE
zs_T54gSgonNl9vl4YMKwyqv5q@+y#NB%aGGGGpe%$e9kk!3G@}z18aUn2QlSD3u$r
zn%;Ue+vY|}cqg%nYb55Nr{3w*L%AG%De+FOOKnWxDD(~sriBX=tnL-7RAUxagNqEP
zB+>SdDihX*)oCI%9-9TN5Fgpq4XQP9M!pUAs*Z2Vv5qtvTPaCPu&B$}^KBOB!i$cK
zry{~gaQ8I7UBz8^F|m;`0%Sd6oqsfsi;0XzF<HX@Y~DE<nV63iT$TIO7z07&@?hnr
zFxg^^k35gG8%@Q;lU!D%&DWhV3Kh%aN5|Lv#YOO9Bjb4#8b)-E5k$st`Fz3KYJLPS
zE>ge~#0p}){|N<OyzUWUWH};0s<W0Bi`b=iXw5clQHjY(GF{C^)RW{&B;K#X7@OGx
z$A)$9*RP*XNT5(I&s27)+CM`64vr#!(S|gkP?&`e`AZiHqlJkEp-3n+2$Kxnvc$xH
z)t8XMly~)6sa1?`uM}@RWR?ig55Ha^WDH?4`4UR;NwlhzaYAlMzEGIzH0dk~kCrkQ
zYfNb|X@x0nX;O4ahEihIiE*8s=1j9DdelmuQEN39<WuoZos#EHE3}zZl%iOpRB*M{
zB2z|^q)<bZ@-#|8Sz4hzHqD|lYbc&smnsodvBHhMMQWwMO8P3euB0?mi7B5iscLVM
zm*rDNvqqU>$}~xAx>R|Iw#=^0;FT0>QzgY}lb~2@(P=aIvO=|3;mXgG;X1!0qmYy(
zdNh>1x0Z@`XIQK2vjtuzW9aZYN|lYd6%n0up8UK-n_4ML&NWMk-tK%Q&rEd6DH5e*
zCVR_@^7G_84f^Ok60SZk(OWOad{trv7k#r;N|jk_A#+lQwHA>J@||fGfl;H(CHiM-
z_!6!<DKQcDf$d~QjVUpuN~W+hGsjRy^inBHo$9>AG7V)es<P4B{;AN-Ak8U?mL?JV
z%U~CCfkr9n4Y>vRc}Ba|lNKW@l=3r^Q_5tX5)<r7sq!RcDn-0SBNlOWVuicf)*g)K
zmKNsnrG;r8X^yb8Add@OWyO$1{9#1f0xjQMhF_Og537uqZ<PzQV((nc8gZ#zn<^_-
zE9FKF(Z?c+R_94gnPwOK{7z?;GQLQaC%5N&5}lZ<U6-mTs<Mm7ly5~mS<D!#I?jqp
zjXJL^xgB;ad|PgTpRID6*KbOJ9c^OLEON7oFEOjKCGi=ES51|@tNkfQoSF;M#HGa=
zi|&6lCW&$A|NfW?qn4jVVu7czn7Q8A7AvgK%baHA=?b#3byKq9YI~$s%~K}G@X8W-
zR8=wiA3je@;u<uRpiC`}_o%WHjT%c9SY^>tM#OS%Oj-)!!Xhfl&lbAUQbf*ttV681
z*t8UhM-BUFDT5)aYQ4d~q@0KLX0C>k7&ASInW{XkA<GJ#tuj7h%u!XQvuOAVw@O^<
z9WP%gC`wByHNyTz74#r&ys??8Gh@yMg&1p}$mC1xsyr#i5ESbyDTZ9k7k2fIqa!h5
zRdCUl(OBh1v@gbW7Jd=`xdypX&d<-2U>tK9V$ZEgb{cZ?6lTn)5bbiXRvm~3vTphL
z%IMfMk(s9^Yt@vPrNjEriQ^2p7IUG-6KzlspNkbmX(AaHeau>q(56tx64PsJq?BNd
z5nI4sL~f4Lkd-H~>n-u_>T3t)T$b;h3+yX4!yXc>1=0sG4;$nw<wXBv_{ObL%E)@=
z>7cV*tbj}%=9i1MDyP>bUVk8O@FmtN8AhXy64})lHyi6gEsjH5vlHXEv5w0iSE50D
zK?h<xiH5HuzZvgza+zU6UOwd#pj--H!Ov#+s>;7uk7kt1U{67fEpwEjGA$)4RHrJ+
zu)et3GN&O&X@)(`R<Au%cy^6NU`GDq5*wfn?Y*B8t+)vK@$6bY{0Tb|*_b!CTE9${
zQi6O~Wyc(gf`V<{##Xji<ifhQsq-X6CvUuEd-n<j(cQ~AN<}p6N9^fkr83^5Ci+?8
z;a3vBBIM>$=tt&Py|z<ew;Ek3qg_MVQ!Zz|(un<pw8MH)>WVdmh$G}N#J50`sv@yZ
zbj!wiLS9LfXen=uBc90IkTbBJw3c`$$!Tbp1KW}uoRY6}A(kOmBsY8Q#Fv>>uoGfG
z+8ZY#1?*u!J;~|3VvRM6Tq7^(ptpC-JlF_(hQO*5$GagrPn9R*zTZZjC&M0y_3n*v
zodEkE`fJ^3B1O*Iwx$}r<z<NfB(nC<&-USZlB*;p4c}nO#HkGa<>|!60+Qd6A9yN~
zp9@pC8HGh2<SstZDUbSB9*mLZ<nuEM^Lb>SLkx4(qWFL2=9tXnd^dJNj6-su9J1rx
zD$2~&Ddl9(bIFp0ZdsWu9{ueq<YMfHMq+ESHgu^)WUmwDie#zYy&U^H*=J?RX;fw&
zkMuF+TjHa!r)V;~dZh8CIZ|$BPLbg4C#j@V=7|<abJDod!h9#%@t|*sv`osACBJJY
z^^}xmCPnk4Ik|jUj<!UaEUf;BbQlNu)J)dZyZr_G8}b$xdtxExRH)|bu+DNxj>p+2
z-i3AK(JSJ;dpp@nl^#hJ;#8nQUP?3b;;~1PbC^(=l!aqdvW=Xaipf{1_mIUUm#DyB
z4i0f~_)ST{p$vsJ=Q9H2j|d^>t?Dto-IKXd#ZggG23<+>tTb<_gfxG96of=lDvB>a
z++klOepgU9GZaV_@dc!1X-$7&iuaJGso`pQ8Klwwjy#M(S|)i9hXyhQ?|G4w33JI;
zQVBV#;vz9oPFRS8W6k`@6*-T>5=A1J+&hybEoo1^)hE|mDkhEop-&<yP0@*RxMpvE
z?|e!zW$(E)pXfmjt3<fBKQ3#=_s+M<{-i~fzFD~{v7D=5va5z8{;BEurB4V?oGi@X
z@FQ_A5W#Wd9>>73s~x@FdRJ$y-QHR4aM(&X)lZ!`vE-RkOEVsLzW2{39RHb?!GBu+
EFK+URrvLx|

literal 0
HcmV?d00001

diff --git a/tests/aequilibrae/project/data/wynnum.parquet b/tests/aequilibrae/project/data/wynnum.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9c9eeb46c11259cc0352d909f48fb7536c569a0c
GIT binary patch
literal 1626
zcmbVN%WvaE7`NTDDo`P`s-r}zP_VQSNNH+2w)5CsX(xRo&c<n8&1+SyV#lOT9Xp#i
zah<A2NPsvX&IoRB1_Tn1W5t037dRrpAAmS<0CC{LH%^-_%WX#*&wS6{_x)xvDWfV!
z2k7VN7mGAS2W}C>>PsX+5FAa=x9DIDE9o_QZA&6vCAkpEf$g~n1z(CJDI)sD7QGP`
z1H@W5L>e>|q=Hs(7UiSsHv!=oqzKRy0V(l9;N?4#{+_&s>~0=|mRuoM|7gHw73TYI
zZoK{NE86|vzRLg8I=gfEsqofk-+=t%m(f3e@a|lG@%87A1B`Zm=bcYJ`UT`k`SRs8
zkpEuQe@%h^)A3KY0}xNP@m}DjkM+K84|Qkk`R#m21wW>uKdnT6Ctv%2BUJdVNQBlV
z+KE2w(gA|rxJ>~Ow~68wAo$WU9TB9$TjAKVQ5Jp13WM*i1gYqH@Pn02f(YLYz0mJ<
z&CH_S(9Sud+2(X;O_<C%W8jbDjdUi%)SR(l^_dvs8a?NP$?&3hadAO}R)Cw(U*tJ6
z)5jCTw!oE(vvGD?)tzpfgMiW47<R|rqeWt{U$Y6eC%~3kz|<|x?qRqL(|nKxKUKBQ
zn9M^?OvG6+AtiY>mEc%W5M$eXGA<+~mIWOrrc<oM$9y*_!3#+?CGsqvmKJVOTH=xt
zpGqcJDajqj7<+8!meX|>b!+z2a`drg+S;jWOmx7#F6t3x0XTWclx@@NLz~Bb_Zz5M
zJ#1dH_w(uw9%ubsgir-09uE)_P_~Z_5bC4dzOTz>|6_Nta(`*hyI{sL4D7#)mU60`
z+}UycB#Ljxp}C%^x_ca!b+5p8T7`MHnV!l6+fZtPTO1&#J#4V;ns6o?uA$`%Bei+p
z>5cS2<<B^^DHw-F!Rj>APN!A%^qM@8hozaCZ#b%@7>Cxx=<to1+HA0zb-Iw7BE(il
z>QKY6tr{yE#odC@9;O9(FjAHJ(Hw9&W!_FG10*Q(eYaGvaK&BLD|rH2nzvnLPxF+$
zeeST{cS{3#auvfXa~bSNDAi?G8JzOV7}zUyjaLT!gfd5bX@1ZH%$YVcP1UNN^m67*
zHDn3b-fN01@Gwww=|MNq8)^IzuJ5MDMw-=ISyS_5sp%Cez=PP~fisg!zusaol`=vu
zCLUIwZ&(pNL>Lp8S*w#fm>gQAK{wZM+7&dz7U2JiQ*jPFB<10PuU_7CJ2j}yEU{4U
zwBrHCR&UhFS8X^auT*a=a52=re|7xx=gM%O@EKPszA}$NmRAd34H)?RdCu(R3nCVg
zGKZj41!XJPo<m4Odsq+6EG`x7D@T4lFk->~F1NW;yaiWC;VbXmBSKtm4;3Sccrwn%
uBGU={I3uI(`0-SCwzbh{yE`7+Zsh6>h$Qg~5RSo%habI7c)rW<uktStqq98#

literal 0
HcmV?d00001

diff --git a/tests/aequilibrae/project/test_polygon_gridding.py b/tests/aequilibrae/project/test_polygon_gridding.py
new file mode 100644
index 000000000..21dd40562
--- /dev/null
+++ b/tests/aequilibrae/project/test_polygon_gridding.py
@@ -0,0 +1,22 @@
+from pathlib import Path
+
+import geopandas as gpd
+import pytest
+
+from aequilibrae.project.network.osm.model_area_gridding import geometry_grid
+
+
+def test_geometry_grid():
+    pth = Path(__file__).parent / "data"
+
+    # Small simple polygon
+    polygon = gpd.read_parquet(pth / "wynnum.parquet").geometry[0]
+    grid = geometry_grid(polygon, 4326)
+    assert grid.geometry.area.sum() == pytest.approx(polygon.area, 0.000000001)
+    assert grid.shape[0] == 1
+
+    # Bigger polygon
+    polygon = gpd.read_parquet(pth / "porto_rico.parquet").geometry[0]
+    grid = geometry_grid(polygon, 4326)
+    assert grid.geometry.area.sum() == pytest.approx(polygon.area, 0.000000001)
+    assert grid.shape[0] == 16

From d593fd0b964390787306082df78f8f678ceb868c Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Fri, 16 Feb 2024 10:25:47 +1000
Subject: [PATCH 25/32] adds network cleaning

---
 aequilibrae/project/network/network.py        |  7 ++++++-
 .../project/network/osm/osm_builder.py        | 19 +++++++++++++++++--
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py
index 07e03ee87..68c0a45fa 100644
--- a/aequilibrae/project/network/network.py
+++ b/aequilibrae/project/network/network.py
@@ -126,6 +126,7 @@ def create_from_osm(
         model_area: Optional[Polygon] = None,
         place_name: Optional[str] = None,
         modes=("car", "transit", "bicycle", "walk"),
+        clean=True,
     ) -> None:
         """
         Downloads the network from Open-Street Maps
@@ -141,6 +142,9 @@ def create_from_osm(
             **modes** (:obj:`tuple`, Optional): List of all modes to be downloaded. Defaults to the modes in the parameter
             file
 
+            **clean** (:obj:`bool`, Optional): Keeps only the links that intersects the model area polygon. Defaults to
+            True. Does not apply to networks downloaded with a place name
+
         .. code-block:: python
 
             >>> from aequilibrae import Project
@@ -191,6 +195,7 @@ def create_from_osm(
                 raise ValueError("Coordinates out of bounds. Polygon must be in WGS84")
             west, south, east, north = model_area.bounds
         else:
+            clean = False
             bbox, report = placegetter(place_name)
             if bbox is None:
                 msg = f'We could not find a reference for place name "{place_name}"'
@@ -236,7 +241,7 @@ def create_from_osm(
         dwnloader.doWork()
 
         self.logger.info("Building Network")
-        self.builder = OSMBuilder(dwnloader.data, project=self.project, model_area=model_area)
+        self.builder = OSMBuilder(dwnloader.data, project=self.project, model_area=model_area, clean=clean)
 
         if pyqt:
             self.builder.building.connect(self.signal_handler)
diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 06d586a32..959275210 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -1,3 +1,4 @@
+import geopandas as gpd
 import gc
 import importlib.util as iutil
 import string
@@ -27,7 +28,7 @@ class OSMBuilder(WorkerThread):
     if pyqt:
         building = pyqtSignal(object)
 
-    def __init__(self, data, project, model_area: Polygon) -> None:
+    def __init__(self, data, project, model_area: Polygon, clean: bool) -> None:
         WorkerThread.__init__(self, None)
 
         project.logger.info("Preparing OSM builder")
@@ -38,6 +39,7 @@ def __init__(self, data, project, model_area: Polygon) -> None:
         self.model_area = geometry_grid(model_area, 4326)
         self.path = self.project.path_to_file
         self.node_start = 10000
+        self.clean = clean
         self.report = []
         self.__all_ltp = pd.DataFrame([])
         self.__link_id = 1
@@ -62,7 +64,9 @@ def doWork(self):
             conn.execute(
                 "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)"
             )
-            conn.execute("VACUUM;")
+            conn.commit()
+            self.__do_clean(conn)
+
         self.__emit_all(["finished_threaded_procedure", 0])
 
     def importing_network(self, conn):
@@ -170,6 +174,17 @@ def _build_geometry(self, nodes: List[int]) -> str:
         txt = ",".join((slice.lon.astype(str) + " " + slice.lat.astype(str)).tolist())
         return f"LINESTRING({txt})"
 
+    def __do_clean(self, conn):
+        if not self.clean:
+            conn.execute("VACUUM;")
+            return
+        self.logger("Cleaning up the network down to the selected area")
+        links = gpd.GeoDataFrame.from_postgis("SELECT link_id, asBinary(geometry) AS geom FROM links", conn, crs=4326)
+        links_left = [[x] for x in links[~links.link_id.isin(links.clip(self.model_area).link_id)].link_id]
+        conn.executemany("DELETE FROM links WHERE link_id = ?", links_left)
+        conn.commit()
+        conn.execute("VACUUM;")
+
     def __process_link_chunk(self):
         self.logger.info("Processing link modes, types and fields")
         self.__emit_all(["text", "Processing link modes, types and fields"])

From 3580c14dc4e8c1dc160771f9e1fe93edcc032af3 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Fri, 16 Feb 2024 10:50:03 +1000
Subject: [PATCH 26/32] adds network cleaning

---
 aequilibrae/project/network/osm/osm_builder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index 959275210..a71422683 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -178,7 +178,7 @@ def __do_clean(self, conn):
         if not self.clean:
             conn.execute("VACUUM;")
             return
-        self.logger("Cleaning up the network down to the selected area")
+        self.logger.info("Cleaning up the network down to the selected area")
         links = gpd.GeoDataFrame.from_postgis("SELECT link_id, asBinary(geometry) AS geom FROM links", conn, crs=4326)
         links_left = [[x] for x in links[~links.link_id.isin(links.clip(self.model_area).link_id)].link_id]
         conn.executemany("DELETE FROM links WHERE link_id = ?", links_left)

From 12e40284505a312629bf9f387d13cb03f2cc4f91 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 19 Feb 2024 00:15:01 +1000
Subject: [PATCH 27/32] Allows for use of polygons with rings inside

---
 aequilibrae/project/network/osm/model_area_gridding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aequilibrae/project/network/osm/model_area_gridding.py b/aequilibrae/project/network/osm/model_area_gridding.py
index 3a6b636d5..bcaa1a6a3 100644
--- a/aequilibrae/project/network/osm/model_area_gridding.py
+++ b/aequilibrae/project/network/osm/model_area_gridding.py
@@ -9,7 +9,7 @@
 def geometry_grid(model_area, srid) -> gpd.GeoDataFrame:
     minx, miny, maxx, maxy = model_area.bounds
     # Some rough heuristic to get the number of points per sub-polygon in the 2 digits range
-    subd = ceil((len(model_area.boundary.coords) / 32) ** 0.5)
+    subd = ceil((len(model_area.exterior.coords) / 32) ** 0.5)
     dx = (maxx - minx) / subd
     dy = (maxy - miny) / subd
     elements = []

From c05bb9fd50ba54e040c5ae8b31ec32629a41b792 Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 19 Feb 2024 00:39:49 +1000
Subject: [PATCH 28/32] Allows for use of polygons with rings inside

---
 aequilibrae/project/network/osm/osm_builder.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index a71422683..c4e919963 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -180,8 +180,9 @@ def __do_clean(self, conn):
             return
         self.logger.info("Cleaning up the network down to the selected area")
         links = gpd.GeoDataFrame.from_postgis("SELECT link_id, asBinary(geometry) AS geom FROM links", conn, crs=4326)
-        links_left = [[x] for x in links[~links.link_id.isin(links.clip(self.model_area).link_id)].link_id]
-        conn.executemany("DELETE FROM links WHERE link_id = ?", links_left)
+        existing_link_ids = gpd.sjoin(links, self.model_area, how="left").dropna().link_id.to_numpy()
+        to_delete = [[x] for x in links[~links.link_id.isin(existing_link_ids)].link_id]
+        conn.executemany("DELETE FROM links WHERE link_id = ?", to_delete)
         conn.commit()
         conn.execute("VACUUM;")
 

From 40c128eba65c08d456202ba015fd143e893c773f Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Mon, 19 Feb 2024 01:47:17 +1000
Subject: [PATCH 29/32] adjusts types

---
 aequilibrae/paths/graph.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py
index 26e37d13c..2a75572c1 100644
--- a/aequilibrae/paths/graph.py
+++ b/aequilibrae/paths/graph.py
@@ -237,9 +237,9 @@ def _build_directed_graph(self, network: pd.DataFrame, centroids: np.ndarray):
         if nans:
             self.logger.warning(f"Field(s) {nans} has(ve) at least one NaN value. Check your computations")
 
-        df.loc[:, "b_node"] = df.b_node.values.astype(self.__integer_type)
-        df.loc[:, "id"] = df.id.values.astype(self.__integer_type)
-        df.loc[:, "link_id"] = df.link_id.values.astype(self.__integer_type)
+        df["link_id"] = df["link_id"].astype(self.__integer_type)
+        df["b_node"] = df.b_node.values.astype(self.__integer_type)
+        df["id"] = df.id.values.astype(self.__integer_type)
         df["direction"] = df.direction.values.astype(np.int8)
 
         return all_nodes, num_nodes, nodes_to_indices, fs, df

From aacdfcb1328f33a772371e701c3553eee03c712c Mon Sep 17 00:00:00 2001
From: Pedro Camargo <c@margo.co>
Date: Fri, 23 Feb 2024 11:50:14 +1000
Subject: [PATCH 30/32] Update pyproject.toml

Co-authored-by: Jake Moss <jake.moss@uqconnect.edu.au>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 509bb0922..ff2ab1ff1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ extend-exclude = '''docs/*'''
 
 
 [build-system]
-requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel", "geopandas"]
+requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel"]
 
 
 [tool.ruff]

From 6e897e187bef619bab8f7d13076c47e1c506ecdc Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Fri, 23 Feb 2024 11:52:52 +1000
Subject: [PATCH 31/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index c4e919963..f4aece221 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -232,7 +232,7 @@ def __build_link_types(self, df):
         df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left")
         return df.drop(columns=["highway"])
 
-    def __define_link_type(self, link_type: str) -> [str, str]:
+    def __define_link_type(self, link_type: str) -> tuple[str, str]:
         proj_link_types = self.project.network.link_types
         original_link_type = link_type
         link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower()

From 1b8268e122acc49bf800f488077b5450b017aa3b Mon Sep 17 00:00:00 2001
From: pveigadecamargo <pveigadecamargo@anl.gov>
Date: Fri, 23 Feb 2024 12:16:26 +1000
Subject: [PATCH 32/32] .

---
 aequilibrae/project/network/osm/osm_builder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py
index f4aece221..af14827cb 100644
--- a/aequilibrae/project/network/osm/osm_builder.py
+++ b/aequilibrae/project/network/osm/osm_builder.py
@@ -4,7 +4,7 @@
 import string
 from math import floor
 from pathlib import Path
-from typing import List
+from typing import List, Tuple
 
 import numpy as np
 import pandas as pd
@@ -232,7 +232,7 @@ def __build_link_types(self, df):
         df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left")
         return df.drop(columns=["highway"])
 
-    def __define_link_type(self, link_type: str) -> tuple[str, str]:
+    def __define_link_type(self, link_type: str) -> Tuple[str, str]:
         proj_link_types = self.project.network.link_types
         original_link_type = link_type
         link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower()