From 5d52aeedb3101b598887cea25a6e2f50a354151b Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Sun, 4 Feb 2024 18:02:15 +1000 Subject: [PATCH 01/32] OSM Geo-processing using GeoPandas --- aequilibrae/parameters.yml | 24 - aequilibrae/project/network/network.py | 8 +- .../network/osm/model_area_gridding.py | 29 ++ .../project/network/osm/osm_builder.py | 419 ++++++++---------- .../project/network/osm/osm_downloader.py | 39 +- aequilibrae/utils/db_utils.py | 4 + pyproject.toml | 2 +- requirements.txt | 3 +- 8 files changed, 265 insertions(+), 263 deletions(-) create mode 100644 aequilibrae/project/network/osm/model_area_gridding.py diff --git a/aequilibrae/parameters.yml b/aequilibrae/parameters.yml index d7fe1be3e..9529aa360 100644 --- a/aequilibrae/parameters.yml +++ b/aequilibrae/parameters.yml @@ -42,30 +42,6 @@ network: description: name osm_source: name type: text - - cycleway: - description: cycleway, both way - osm_source: cycleway - type: text - - cycleway_right: - description: cycleway, right - osm_source: cycleway:right - type: text - - cycleway_left: - description: cycleway, left - osm_source: cycleway:left - type: text - - busway: - description: busway - osm_source: busway - type: text - - busway_right: - description: busway, right - osm_source: busway:right - type: text - - busway_left: - description: busway, left - osm_source: busway:left - type: text two-way: - lanes: description: lanes diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py index b9d5f8115..07e03ee87 100644 --- a/aequilibrae/project/network/network.py +++ b/aequilibrae/project/network/network.py @@ -229,14 +229,14 @@ def create_from_osm( if subarea.intersects(model_area): polygons.append(subarea) self.logger.info("Downloading data") - self.downloader = OSMDownloader(polygons, modes, logger=self.logger) + dwnloader = OSMDownloader(polygons, modes, logger=self.logger) if pyqt: - self.downloader.downloading.connect(self.signal_handler) + dwnloader.downloading.connect(self.signal_handler) - self.downloader.doWork() + dwnloader.doWork() self.logger.info("Building Network") - self.builder = OSMBuilder(self.downloader.json, project=self.project, model_area=model_area) + self.builder = OSMBuilder(dwnloader.data, project=self.project, model_area=model_area) if pyqt: self.builder.building.connect(self.signal_handler) diff --git a/aequilibrae/project/network/osm/model_area_gridding.py b/aequilibrae/project/network/osm/model_area_gridding.py new file mode 100644 index 000000000..3f58a21b0 --- /dev/null +++ b/aequilibrae/project/network/osm/model_area_gridding.py @@ -0,0 +1,29 @@ +# Inspired by https://www.matecdev.com/posts/shapely-polygon-gridding.html +import numpy as np +from shapely.geometry import Polygon +import geopandas as gpd + + +def geometry_grid(model_area, srid) -> gpd.GeoDataFrame: + minx, miny, maxx, maxy = model_area.bounds + subd = min(0.01, abs(maxy - miny) / 3, abs(maxx - minx) / 3) + space_x = int((maxx - minx) / subd) + space_y = int((maxy - miny) / subd) + combx, comby = np.linspace(minx, maxx, space_x), np.linspace(miny, maxy, space_y) + elements = [] + for i in range(len(combx) - 1): + for j in range(len(comby) - 1): + elements.append( + Polygon( + [ + [combx[i], comby[j]], + [combx[i], comby[j + 1]], + [combx[i + 1], comby[j + 1]], + [combx[i + 1], comby[j]], + ] + ) + ) + + gdf = gpd.GeoDataFrame({"id": np.arange(len(elements))}, geometry=elements, crs=srid) + + return gdf.clip(model_area) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index ecc10c222..103c8e755 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -1,20 +1,21 @@ import gc import importlib.util as iutil import string -from typing import List +from math import floor +from pathlib import Path +import geopandas as gpd import numpy as np import pandas as pd -from shapely import Point -from shapely.geometry import Polygon +import shapely.wkb +from shapely.geometry import Polygon, LineString from aequilibrae.context import get_active_project from aequilibrae.parameters import Parameters -from aequilibrae.project.network.haversine import haversine -from aequilibrae.project.network.link_types import LinkTypes from aequilibrae.utils import WorkerThread -from aequilibrae.utils.db_utils import commit_and_close +from aequilibrae.utils.db_utils import commit_and_close, read_and_close, list_columns from aequilibrae.utils.spatialite_utils import connect_spatialite +from .model_area_gridding import geometry_grid pyqt = iutil.find_spec("PyQt5") is not None if pyqt: @@ -28,23 +29,37 @@ class OSMBuilder(WorkerThread): if pyqt: building = pyqtSignal(object) - def __init__(self, osm_items: List, project, model_area: Polygon) -> None: + def __init__(self, data, project, model_area: Polygon) -> None: WorkerThread.__init__(self, None) + + project.logger.info("Preparing OSM builder") + self.__emit_all(["text", "Preparing OSM builder"]) + self.project = project or get_active_project() self.logger = self.project.logger - self.osm_items = osm_items - self.model_area = model_area + self.model_area = geometry_grid(model_area, 4326) self.path = self.project.path_to_file self.node_start = 10000 - self.__link_types = None # type: LinkTypes self.report = [] - self.__model_link_types = [] - self.__model_link_type_ids = [] - self.__link_type_quick_reference = {} - self.nodes = {} - self.node_df = [] - self.links = {} - self.insert_qry = """INSERT INTO {} ({}, geometry) VALUES({}, GeomFromText(?, 4326))""" + self.__all_ltp = pd.DataFrame([]) + self.__link_id = 1 + self.__valid_links = {"link_id": [], "nodes": []} + + nodes = ( + data["nodes"] + .assign( + is_centroid=0, + modes="", + link_types="", + node_id=np.arange(data["nodes"].shape[0]) + self.node_start, + ) + .reset_index(drop=True) + ) + self.node_df = gpd.GeoDataFrame(nodes, geometry=gpd.points_from_xy(nodes.lon, nodes.lat), crs=4326) + del nodes + del data["nodes"] + gc.collect() + self.links_df = data["links"] def __emit_all(self, *args): if pyqt: @@ -52,169 +67,128 @@ def __emit_all(self, *args): def doWork(self): with commit_and_close(connect_spatialite(self.path)) as conn: - self.__worksetup() node_count = self.data_structures() - self.importing_links(node_count, conn) + self.importing_network(node_count, conn) conn.execute( "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)" ) self.__emit_all(["finished_threaded_procedure", 0]) def data_structures(self): - self.logger.info("Separating nodes and links") - self.__emit_all(["text", "Separating nodes and links"]) - self.__emit_all(["maxValue", len(self.osm_items)]) - - alinks = [] - n = [] - tot_items = len(self.osm_items) - # When downloading data for entire countries, memory consumption can be quite intensive - # So we get rid of everything we don't need - for i in range(tot_items, 0, -1): - item = self.osm_items.pop(-1) - if item["type"] == "way": - alinks.append(item) - elif item["type"] == "node": - n.append(item) - self.__emit_all(["Value", tot_items - i]) - gc.collect() - self.logger.info("Setting data structures for nodes") - self.__emit_all(["text", "Setting data structures for nodes"]) - self.__emit_all(["maxValue", len(n)]) - - self.node_df = [] - for i, node in enumerate(n): - nid = node.pop("id") - _ = node.pop("type") - node["node_id"] = i + self.node_start - node["inside_model"] = self.model_area.contains(Point(node["lon"], node["lat"])) - self.nodes[nid] = node - self.node_df.append([node["node_id"], nid, node["lon"], node["lat"]]) - self.__emit_all(["Value", i]) - del n - self.node_df = ( - pd.DataFrame(self.node_df, columns=["A", "B", "C", "D"]) - .drop_duplicates(subset=["C", "D"]) - .to_records(index=False) - ) - - self.logger.info("Setting data structures for links") - self.__emit_all(["text", "Setting data structures for links"]) - self.__emit_all(["maxValue", len(alinks)]) - all_nodes = [] - for i, link in enumerate(alinks): - osm_id = link.pop("id") - _ = link.pop("type") - all_nodes.extend(link["nodes"]) - self.links[osm_id] = link - self.__emit_all(["Value", i]) - del alinks + aux = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count() self.logger.info("Finalizing data structures") self.__emit_all(["text", "Finalizing data structures"]) + return aux - node_count = self.unique_count(np.array(all_nodes)) - - return node_count - - def importing_links(self, node_count, conn): - node_ids = {} - - vars = {} - vars["link_id"] = 1 - table = "links" - fields = self.get_link_fields() + def importing_network(self, node_count, conn): self.__update_table_structure(conn) - field_names = ",".join(fields) self.logger.info("Adding network nodes") - self.__emit_all(["text", "Adding network nodes"]) - sql = "insert into nodes(node_id, is_centroid, osm_id, geometry) Values(?, 0, ?, MakePoint(?,?, 4326))" - conn.executemany(sql, self.node_df) conn.commit() - del self.node_df + self.__emit_all(["text", "Adding network nodes"]) + self.node_df.osm_id = self.node_df.osm_id.astype(np.int64) + self.node_df.set_index(["osm_id"], inplace=True) - self.logger.info("Adding network links") + self.logger.info("Creating necessary link types") + self.__emit_all(["text", "Creating necessary link types"]) + self.__build_link_types() + shape_ = self.links_df.shape[0] + message_step = floor(shape_ / 100) + self.__emit_all(["maxValue", shape_]) + + self.establish_modes_for_all_links(conn) + self.process_link_attributes() + + final_links = [] + self.logger.info("Geo-procesing links") self.__emit_all(["text", "Adding network links"]) - L = len(list(self.links.keys())) - self.__emit_all(["maxValue", L]) - - counter = 0 - mode_codes, not_found_tags = self.modes_per_link_type(conn) - owf, twf = self.field_osm_source() - all_attrs = [] - all_osm_ids = list(self.links.keys()) - for osm_id in all_osm_ids: - link = self.links.pop(osm_id) + for counter, (idx, link) in enumerate(self.links_df.iterrows()): self.__emit_all(["Value", counter]) - counter += 1 - if counter % 1000 == 0: - self.logger.info(f"Creating segments from {counter:,} out of {L:,} OSM link objects") - vars["osm_id"] = osm_id - vars["link_type"] = "default" - linknodes = link["nodes"] - linktags = link["tags"] - - indices = np.searchsorted(node_count[:, 0], linknodes) - nodedegree = node_count[indices, 1] - - # Makes sure that beginning and end are end nodes for a link - nodedegree[0] = 2 - nodedegree[-1] = 2 - - intersections = np.where(nodedegree > 1)[0] - segments = intersections.shape[0] - 1 - - # Attributes that are common to all individual links/segments - vars["direction"] = (linktags.get("oneway") == "yes") * 1 - - for k, v in owf.items(): - vars[k] = linktags.get(v) - - for k, v in twf.items(): - val = linktags.get(v["osm_source"]) - if vars["direction"] == 0: - for d1, d2 in [("ab", "forward"), ("ba", "backward")]: - vars[f"{k}_{d1}"] = self.__get_link_property(d2, val, linktags, v) - elif vars["direction"] == -1: - vars[f"{k}_ba"] = linktags.get(f"{v['osm_source']}:{'backward'}", val) - elif vars["direction"] == 1: - vars[f"{k}_ab"] = linktags.get(f"{v['osm_source']}:{'forward'}", val) - - vars["modes"] = mode_codes.get(linktags.get("highway"), not_found_tags) - - vars["link_type"] = self.__link_type_quick_reference.get( - vars["link_type"].lower(), self.__repair_link_type(vars["link_type"]) - ) + if counter % message_step == 0: + self.logger.info(f"Creating segments from {counter:,} out of {shape_ :,} OSM link objects") + + # How can I link have less than two points? + if len(link["nodes"]) < 2: + self.logger.error(f"Link {idx} has less than two nodes. {link}") + continue + + # The link is a straight line between two points + # Or all midpoints are only part of a single link + node_indices = node_count.loc[link["nodes"], "counter"] + if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1: + self.__set_geometry(link) + final_links.append(link) + continue + + intersecs = np.where(node_indices > 1)[0] + for i, j in zip(intersecs[:-1], intersecs[1:]): + rec = link.copy(deep=True) + rec["nodes"] = link["nodes"][i : j + 1] + self.__set_geometry(rec) + final_links.append(rec) + + self.links_df = pd.concat(final_links, axis=1).transpose() + self.links_df = self.links_df.assign(link_id=np.arange(self.links_df.shape[0]) + 1).drop(columns=["nodes"]) + + del final_links + gc.collect() - if len(vars["modes"]) > 0: - for i in range(segments): - attributes = self.__build_link_data(vars, intersections, i, linknodes, node_ids, fields) - if attributes is None: - continue - all_attrs.append(attributes) - vars["link_id"] += 1 - - self.__emit_all(["text", f"{counter:,} of {L:,} super links added"]) - self.links[osm_id] = [] - sql = self.insert_qry.format(table, field_names, ",".join(["?"] * (len(all_attrs[0]) - 1))) - self.logger.info("Adding network links") - self.__emit_all(["text", "Adding network links"]) - try: - conn.executemany(sql, all_attrs) - except Exception as e: - self.logger.error("error when inserting link {}. Error {}".format(all_attrs[0], e.args)) - self.logger.error(sql) - raise e - - def __worksetup(self): - self.__link_types = self.project.network.link_types - lts = self.__link_types.all_types() - for lt_id, lt in lts.items(): - self.__model_link_types.append(lt.link_type) - self.__model_link_type_ids.append(lt_id) + # Gets ONLY the nodes that are needed + self.links_df = gpd.GeoDataFrame(self.links_df, geometry=self.links_df.geometry, crs=4326) + self.links_df = self.links_df.clip(self.model_area).explode(index_parts=False) + self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1 + + clip_nodes = pd.DataFrame(self.__valid_links) + clip_nodes = clip_nodes[clip_nodes.link_id.isin(self.links_df.link_id)] + + self.node_df.reset_index(inplace=True) + self.node_df = self.node_df[self.node_df.osm_id.isin(clip_nodes.nodes)] + del clip_nodes + gc.collect() + + cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types"] + self.node_df = gpd.GeoDataFrame(self.node_df[cols], geometry=self.node_df.geometry, crs=self.node_df.crs) + + # Saves the data to disk in case of issues loading it to the database + osm_data_path = Path(self.project.project_base_path) / "osm_data" + osm_data_path.mkdir(exist_ok=True) + self.links_df.to_parquet(osm_data_path / "links.parquet") + self.node_df.to_parquet(osm_data_path / "nodes.parquet") + + self.logger.info("Adding nodes to file") + self.__emit_all(["text", "Adding nodes to file"]) + self.node_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="nodes", mode="a") + del self.node_df + gc.collect() + + # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a") + + # I could not get the above line to work, so I used the following code instead + insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromWKB(?, 4326))" + cols_no_geo = self.links_df.columns.tolist() + cols_no_geo.remove("geometry") + insert_qry = insert_qry.format(", ".join(cols_no_geo), ", ".join(["?"] * len(cols_no_geo))) + + geos = self.links_df.geometry.to_wkb() + cols = cols_no_geo + ["geometry"] + links_df = pd.DataFrame(self.links_df[cols_no_geo]).assign(geometry=geos)[cols].to_records(index=False) + + del self.links_df + gc.collect() + self.logger.info("Adding links to file") + self.__emit_all(["text", "Adding links to file"]) + conn.executemany(insert_qry, links_df) + + def __set_geometry(self, rec: pd.Series) -> LineString: + rec.geometry = LineString(self.node_df.loc[rec.nodes, "geometry"]) + rec.link_id = self.__link_id + + self.__valid_links["link_id"].extend([self.__link_id] * len(rec.nodes)) + self.__valid_links["nodes"].extend(rec.nodes) + self.__link_id += 1 def __update_table_structure(self, conn): structure = conn.execute("pragma table_info(Links)").fetchall() @@ -225,39 +199,24 @@ def __update_table_structure(self, conn): conn.execute(f"Alter table Links add column {field} {ltype}") conn.commit() - def __build_link_data(self, vars, intersections, i, linknodes, node_ids, fields): - ii = intersections[i] - jj = intersections[i + 1] - all_nodes = [linknodes[x] for x in range(ii, jj + 1)] - - vars["a_node"] = node_ids.get(linknodes[ii], self.node_start) - if vars["a_node"] == self.node_start: - node_ids[linknodes[ii]] = vars["a_node"] - self.node_start += 1 - - vars["b_node"] = node_ids.get(linknodes[jj], self.node_start) - if vars["b_node"] == self.node_start: - node_ids[linknodes[jj]] = vars["b_node"] - self.node_start += 1 - - vars["distance"] = sum( - [ - haversine(self.nodes[x]["lon"], self.nodes[x]["lat"], self.nodes[y]["lon"], self.nodes[y]["lat"]) - for x, y in zip(all_nodes[1:], all_nodes[:-1]) - ] - ) - - geometry = ["{} {}".format(self.nodes[x]["lon"], self.nodes[x]["lat"]) for x in all_nodes] - inside_area = sum([self.nodes[x]["inside_model"] for x in all_nodes]) - if inside_area == 0: - return None - geometry = "LINESTRING ({})".format(", ".join(geometry)) - - attributes = [vars.get(x) for x in fields] - attributes.append(geometry) - return attributes + def __build_link_types(self): + data = [] + with read_and_close(self.project.path_to_file) as conn: + self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn) + + for i, lt in enumerate(self.links_df.highway.unique()): + if str(lt).lower() in self.__all_ltp.link_type.values: + continue + data.append([*self.__define_link_type(str(lt)), str(lt)]) + self.__all_ltp = pd.concat( + [self.__all_ltp, pd.DataFrame(data, columns=["link_type_id", "link_type", "highway"])] + ) + self.__all_ltp.drop_duplicates(inplace=True) + self.links_df = self.links_df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") + self.links_df.drop(columns=["highway"], inplace=True) - def __repair_link_type(self, link_type: str) -> str: + def __define_link_type(self, link_type: str) -> str: + proj_link_types = self.project.network.link_types original_link_type = link_type link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower() @@ -266,35 +225,31 @@ def __repair_link_type(self, link_type: str) -> str: if piece in ["link", "segment", "stretch"]: link_type = "_".join(split[0 : i + 1]) + if self.__all_ltp.shape[0] >= 51: + link_type = "aggregate_link_type" + if len(link_type) == 0: link_type = "empty" - if len(self.__model_link_type_ids) >= 51 and link_type not in self.__model_link_types: - link_type = "aggregate_link_type" - - if link_type in self.__model_link_types: - lt = self.__link_types.get_by_name(link_type) + if link_type in self.__all_ltp.link_type.values: + lt = proj_link_types.get_by_name(link_type) if original_link_type not in lt.description: lt.description += f", {original_link_type}" lt.save() - self.__link_type_quick_reference[original_link_type.lower()] = link_type - return link_type + return [lt.link_type_id, link_type] letter = link_type[0] - if letter in self.__model_link_type_ids: + if letter in self.__all_ltp.link_type_id.values: letter = letter.upper() - if letter in self.__model_link_type_ids: + if letter in self.__all_ltp.link_type_id.values: for letter in string.ascii_letters: - if letter not in self.__model_link_type_ids: + if letter not in self.__all_ltp.link_type_id.values: break - lt = self.__link_types.new(letter) + lt = proj_link_types.new(letter) lt.link_type = link_type lt.description = f"Link types from Open Street Maps: {original_link_type}" lt.save() - self.__model_link_types.append(link_type) - self.__model_link_type_ids.append(letter) - self.__link_type_quick_reference[original_link_type.lower()] = link_type - return link_type + return [letter, link_type] def __get_link_property(self, d2, val, linktags, v): vald = linktags.get(f'{v["osm_source"]}:{d2}', val) @@ -340,27 +295,41 @@ def get_link_field_type(field_name): if field_name in tp: return tp[field_name]["type"] - @staticmethod - def field_osm_source(): + def process_link_attributes(self): p = Parameters() fields = p.parameters["network"]["links"]["fields"] - owf = { - list(x.keys())[0]: x[list(x.keys())[0]]["osm_source"] - for x in fields["one-way"] - if "osm_source" in x[list(x.keys())[0]] - } + for x in fields["one-way"]: + keys_ = list(x.values())[0] + field = list(x.keys())[0] + osm_name = keys_.get("osm_source", field).replace(":", "_") + self.links_df.rename(columns={osm_name: field}, inplace=True, errors="ignore") - twf = {} for x in fields["two-way"]: - if "osm_source" in x[list(x.keys())[0]]: - twf[list(x.keys())[0]] = { - "osm_source": x[list(x.keys())[0]]["osm_source"], - "osm_behaviour": x[list(x.keys())[0]]["osm_behaviour"], - } - return owf, twf - - def modes_per_link_type(self, conn): + keys_ = list(x.values())[0] + field = list(x.keys())[0] + if "osm_source" not in keys_: + continue + osm_name = keys_.get("osm_source", field).replace(":", "_") + self.links_df[f"{field}_ba"] = self.links_df[osm_name].copy() + self.links_df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore") + if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide": + self.links_df[f"{field}_ab"] = pd.to_numeric(self.links_df[f"{field}_ab"], errors="coerce") / 2 + self.links_df[f"{field}_ba"] = pd.to_numeric(self.links_df[f"{field}_ba"], errors="coerce") / 2 + + if f"{field}_forward" in self.links_df: + fld = pd.to_numeric(self.links_df[f"{field}_forward"], errors="coerce") + self.links_df.loc[fld > 0, f"{field}_ab"] = fld[fld > 0] + if f"{field}_backward" in self.links_df: + fld = pd.to_numeric(self.links_df[f"{field}_backward"], errors="coerce") + self.links_df.loc[fld > 0, f"{field}_ba"] = fld[fld > 0] + cols = list_columns(self.project.conn, "links") + ["nodes"] + self.links_df = self.links_df[[x for x in cols if x in self.links_df.columns]] + gc.collect() + self.links_df["geometry"] = 0 + self.links_df["link_id"] = 0 + + def establish_modes_for_all_links(self, conn): p = Parameters() modes = p.parameters["network"]["osm"]["modes"] @@ -373,13 +342,15 @@ def modes_per_link_type(self, conn): all_types = val["link_types"] md = mode_codes[mode] for tp in all_types: - type_list[tp] = "{}{}".format(type_list.get(tp, ""), md) + type_list[tp] = "".join(sorted("{}{}".format(type_list.get(tp, ""), md))) if val["unknown_tags"]: notfound += md type_list = {k: "".join(set(v)) for k, v in type_list.items()} - return type_list, "{}".format(notfound) + df = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"]) + self.links_df = self.links_df.merge(df, on="link_type", how="left") + self.links_df.modes.fillna("".join(sorted(notfound)), inplace=True) @staticmethod def get_node_fields(): diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py index 76c810b1e..cfb52f685 100644 --- a/aequilibrae/project/network/osm/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -10,20 +10,22 @@ For the original work, please see https://github.com/gboeing/osmnx """ +import gc +import importlib.util as iutil import logging -import time import re -from typing import List +import time +from typing import List, Dict +import pandas as pd import requests +from pandas import json_normalize from shapely import Polygon -from .osm_params import http_headers, memory -from aequilibrae.parameters import Parameters from aequilibrae.context import get_logger +from aequilibrae.parameters import Parameters from aequilibrae.utils import WorkerThread -import gc -import importlib.util as iutil +from .osm_params import http_headers, memory spec = iutil.find_spec("PyQt5") pyqt = spec is not None @@ -50,6 +52,9 @@ def __init__(self, polygons: List[Polygon], modes, logger: logging.Logger = None self.overpass_endpoint = par["overpass_endpoint"] self.timeout = par["timeout"] self.sleeptime = par["sleeptime"] + self._nodes = [] + self._links = [] + self.data: Dict[str, pd.DataFrame] = {"nodes": pd.DataFrame([]), "links": pd.DataFrame([])} def doWork(self): infrastructure = 'way["highway"]' @@ -80,10 +85,26 @@ def doWork(self): ) json = self.overpass_request(data={"data": query_str}, timeout=self.timeout) if json["elements"]: - self.json.extend(json["elements"]) - del json - gc.collect() + for tag, lst in [("node", self._nodes), ("way", self._links)]: + df = pd.DataFrame([item for item in json["elements"] if item["type"] == tag]) + lst.append(df) + del json + gc.collect() + self.__emit_all(["Value", len(self.polygons)]) + self.__emit_all(["text", "Downloading finished. Processing data"]) + for lst, table in [(self._links, "links"), (self._nodes, "nodes")]: + df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"]) + if table == "links": + if "tags" in df.columns: + df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) + df.columns = [x.replace(":", "_") for x in df.columns] + else: + df = df.drop(columns=["tags"]) + self.data[table] = df.rename(columns={"id": "osm_id"}) + lst.clear() + gc.collect() + self.__emit_all(["FinishedDownloading", 0]) def overpass_request(self, data, pause_duration=None, timeout=180, error_pause_duration=None): diff --git a/aequilibrae/utils/db_utils.py b/aequilibrae/utils/db_utils.py index 23534138f..83efb49a2 100644 --- a/aequilibrae/utils/db_utils.py +++ b/aequilibrae/utils/db_utils.py @@ -79,6 +79,10 @@ def get_schema(conn, table_name): return {e.name: e for e in rv} +def list_columns(conn, table_name): + return list(get_schema(conn, table_name).keys()) + + def has_column(conn, table_name, col_name): return col_name in get_schema(conn, table_name) diff --git a/pyproject.toml b/pyproject.toml index ff2ab1ff1..509bb0922 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ extend-exclude = '''docs/*''' [build-system] -requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel"] +requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel", "geopandas"] [tool.ruff] diff --git a/requirements.txt b/requirements.txt index d22ed0b9d..04296117f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ shapely pandas pyproj rtree -openmatrix \ No newline at end of file +openmatrix +geopandas \ No newline at end of file From 2a2f8ea702d846ab50f791a2fe478b50ff473879 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Sun, 4 Feb 2024 18:45:44 +1000 Subject: [PATCH 02/32] OSM Geo-processing using GeoPandas --- .../project/network/osm/osm_builder.py | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 103c8e755..66e9804ea 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -43,18 +43,10 @@ def __init__(self, data, project, model_area: Polygon) -> None: self.report = [] self.__all_ltp = pd.DataFrame([]) self.__link_id = 1 - self.__valid_links = {"link_id": [], "nodes": []} - - nodes = ( - data["nodes"] - .assign( - is_centroid=0, - modes="", - link_types="", - node_id=np.arange(data["nodes"].shape[0]) + self.node_start, - ) - .reset_index(drop=True) - ) + self.__valid_links = [] + + nids = np.arange(data["nodes"].shape[0]) + self.node_start + nodes = data["nodes"].assign(is_centroid=0, modes="", link_types="", node_id=nids).reset_index(drop=True) self.node_df = gpd.GeoDataFrame(nodes, geometry=gpd.points_from_xy(nodes.lon, nodes.lat), crs=4326) del nodes del data["nodes"] @@ -141,7 +133,7 @@ def importing_network(self, node_count, conn): self.links_df = self.links_df.clip(self.model_area).explode(index_parts=False) self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1 - clip_nodes = pd.DataFrame(self.__valid_links) + clip_nodes = pd.concat(self.__valid_links) clip_nodes = clip_nodes[clip_nodes.link_id.isin(self.links_df.link_id)] self.node_df.reset_index(inplace=True) @@ -186,8 +178,7 @@ def __set_geometry(self, rec: pd.Series) -> LineString: rec.geometry = LineString(self.node_df.loc[rec.nodes, "geometry"]) rec.link_id = self.__link_id - self.__valid_links["link_id"].extend([self.__link_id] * len(rec.nodes)) - self.__valid_links["nodes"].extend(rec.nodes) + self.__valid_links.append({"link_id": [self.__link_id] * len(rec.nodes), "nodes": rec.nodes}) self.__link_id += 1 def __update_table_structure(self, conn): From 27dc952c43fea9c3a68ab61eac5d6cced8872be3 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Sun, 4 Feb 2024 22:41:01 +1000 Subject: [PATCH 03/32] OSM Geo-processing using GeoPandas --- .../project/network/osm/osm_builder.py | 82 ++++++++++--------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 66e9804ea..3ef54e492 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -3,11 +3,13 @@ import string from math import floor from pathlib import Path +from typing import List import geopandas as gpd import numpy as np import pandas as pd import shapely.wkb +from shapely import MultiLineString from shapely.geometry import Polygon, LineString from aequilibrae.context import get_active_project @@ -16,6 +18,7 @@ from aequilibrae.utils.db_utils import commit_and_close, read_and_close, list_columns from aequilibrae.utils.spatialite_utils import connect_spatialite from .model_area_gridding import geometry_grid +from aequilibrae.project.project_creation import remove_triggers, add_triggers pyqt = iutil.find_spec("PyQt5") is not None if pyqt: @@ -94,53 +97,50 @@ def importing_network(self, node_count, conn): self.establish_modes_for_all_links(conn) self.process_link_attributes() - final_links = [] self.logger.info("Geo-procesing links") self.__emit_all(["text", "Adding network links"]) + geometries = [] for counter, (idx, link) in enumerate(self.links_df.iterrows()): self.__emit_all(["Value", counter]) if counter % message_step == 0: self.logger.info(f"Creating segments from {counter:,} out of {shape_ :,} OSM link objects") # How can I link have less than two points? + if not isinstance(link["nodes"], list): + geometries.append(LineString()) + self.logger.error(f"OSM link {idx} does not have a list of nodes.") + continue + if len(link["nodes"]) < 2: - self.logger.error(f"Link {idx} has less than two nodes. {link}") + self.logger.error(f"Link {idx} has less than two nodes. {link.nodes}") + geometries.append(LineString()) continue # The link is a straight line between two points # Or all midpoints are only part of a single link node_indices = node_count.loc[link["nodes"], "counter"] if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1: - self.__set_geometry(link) - final_links.append(link) - continue - - intersecs = np.where(node_indices > 1)[0] - for i, j in zip(intersecs[:-1], intersecs[1:]): - rec = link.copy(deep=True) - rec["nodes"] = link["nodes"][i : j + 1] - self.__set_geometry(rec) - final_links.append(rec) - - self.links_df = pd.concat(final_links, axis=1).transpose() - self.links_df = self.links_df.assign(link_id=np.arange(self.links_df.shape[0]) + 1).drop(columns=["nodes"]) - - del final_links - gc.collect() - - # Gets ONLY the nodes that are needed - self.links_df = gpd.GeoDataFrame(self.links_df, geometry=self.links_df.geometry, crs=4326) + # The link has no intersections + geo = self.__build_geometry(link.nodes) + else: + # The link has intersections + intersecs = np.where(node_indices > 1)[0] + geos = [] + for i, j in zip(intersecs[:-1], intersecs[1:]): + geos.append(self.__build_geometry(link.nodes[i : j + 1])) + geo = MultiLineString(geos) + + geometries.append(geo) + + # Builds the link Geo dataframe + self.links_df.drop(columns=["nodes"], inplace=True) + self.links_df = gpd.GeoDataFrame(self.links_df, geometry=geometries, crs=4326) self.links_df = self.links_df.clip(self.model_area).explode(index_parts=False) - self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1 + self.links_df = self.links_df[self.links_df.geometry.length > 0] - clip_nodes = pd.concat(self.__valid_links) - clip_nodes = clip_nodes[clip_nodes.link_id.isin(self.links_df.link_id)] + self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1 self.node_df.reset_index(inplace=True) - self.node_df = self.node_df[self.node_df.osm_id.isin(clip_nodes.nodes)] - del clip_nodes - gc.collect() - cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types"] self.node_df = gpd.GeoDataFrame(self.node_df[cols], geometry=self.node_df.geometry, crs=self.node_df.crs) @@ -152,10 +152,17 @@ def importing_network(self, node_count, conn): self.logger.info("Adding nodes to file") self.__emit_all(["text", "Adding nodes to file"]) + + # Removing the triggers before adding all nodes makes things a LOT faster + remove_triggers(conn, self.logger, "network") + self.node_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="nodes", mode="a") del self.node_df gc.collect() + # But we need to add them back to add the links + add_triggers(conn, self.logger, "network") + # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a") # I could not get the above line to work, so I used the following code instead @@ -174,12 +181,8 @@ def importing_network(self, node_count, conn): self.__emit_all(["text", "Adding links to file"]) conn.executemany(insert_qry, links_df) - def __set_geometry(self, rec: pd.Series) -> LineString: - rec.geometry = LineString(self.node_df.loc[rec.nodes, "geometry"]) - rec.link_id = self.__link_id - - self.__valid_links.append({"link_id": [self.__link_id] * len(rec.nodes), "nodes": rec.nodes}) - self.__link_id += 1 + def __build_geometry(self, nodes: List[int]) -> LineString: + return LineString(self.node_df.loc[nodes, "geometry"]) def __update_table_structure(self, conn): structure = conn.execute("pragma table_info(Links)").fetchall() @@ -195,14 +198,16 @@ def __build_link_types(self): with read_and_close(self.project.path_to_file) as conn: self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn) + self.links_df.highway.fillna("missing", inplace=True) + self.links_df.highway = self.links_df.highway.str.lower() for i, lt in enumerate(self.links_df.highway.unique()): - if str(lt).lower() in self.__all_ltp.link_type.values: + if str(lt) in self.__all_ltp.highway.values: continue data.append([*self.__define_link_type(str(lt)), str(lt)]) self.__all_ltp = pd.concat( [self.__all_ltp, pd.DataFrame(data, columns=["link_type_id", "link_type", "highway"])] ) - self.__all_ltp.drop_duplicates(inplace=True) + self.__all_ltp.drop_duplicates(inplace=True) self.links_df = self.links_df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") self.links_df.drop(columns=["highway"], inplace=True) @@ -287,6 +292,9 @@ def get_link_field_type(field_name): return tp[field_name]["type"] def process_link_attributes(self): + self.links_df = self.links_df.assign(direction=0, link_id=0) + self.links_df.loc[self.links_df.oneway == "yes", "direction"] = 1 + self.links_df.loc[self.links_df.oneway == "backward", "direction"] = -1 p = Parameters() fields = p.parameters["network"]["links"]["fields"] @@ -317,8 +325,6 @@ def process_link_attributes(self): cols = list_columns(self.project.conn, "links") + ["nodes"] self.links_df = self.links_df[[x for x in cols if x in self.links_df.columns]] gc.collect() - self.links_df["geometry"] = 0 - self.links_df["link_id"] = 0 def establish_modes_for_all_links(self, conn): p = Parameters() From ea33710febef723a53c55860543110b39179be01 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 5 Feb 2024 11:04:21 +1000 Subject: [PATCH 04/32] OSM Geo-processing using GeoPandas --- .../project/network/osm/osm_builder.py | 123 +++++++----------- .../project/network/osm/osm_downloader.py | 7 +- 2 files changed, 49 insertions(+), 81 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 3ef54e492..a350c0beb 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -8,17 +8,17 @@ import geopandas as gpd import numpy as np import pandas as pd -import shapely.wkb +from pandas import json_normalize from shapely import MultiLineString from shapely.geometry import Polygon, LineString from aequilibrae.context import get_active_project from aequilibrae.parameters import Parameters +from aequilibrae.project.project_creation import remove_triggers, add_triggers from aequilibrae.utils import WorkerThread from aequilibrae.utils.db_utils import commit_and_close, read_and_close, list_columns from aequilibrae.utils.spatialite_utils import connect_spatialite from .model_area_gridding import geometry_grid -from aequilibrae.project.project_creation import remove_triggers, add_triggers pyqt = iutil.find_spec("PyQt5") is not None if pyqt: @@ -62,28 +62,16 @@ def __emit_all(self, *args): def doWork(self): with commit_and_close(connect_spatialite(self.path)) as conn: - node_count = self.data_structures() - self.importing_network(node_count, conn) + self.__update_table_structure(conn) + self.importing_network(conn) conn.execute( "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)" ) self.__emit_all(["finished_threaded_procedure", 0]) - def data_structures(self): - self.logger.info("Setting data structures for nodes") - - aux = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count() - - self.logger.info("Finalizing data structures") - self.__emit_all(["text", "Finalizing data structures"]) - return aux - - def importing_network(self, node_count, conn): - self.__update_table_structure(conn) + def importing_network(self, conn): + node_count = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count() - self.logger.info("Adding network nodes") - conn.commit() - self.__emit_all(["text", "Adding network nodes"]) self.node_df.osm_id = self.node_df.osm_id.astype(np.int64) self.node_df.set_index(["osm_id"], inplace=True) @@ -127,7 +115,7 @@ def importing_network(self, node_count, conn): intersecs = np.where(node_indices > 1)[0] geos = [] for i, j in zip(intersecs[:-1], intersecs[1:]): - geos.append(self.__build_geometry(link.nodes[i : j + 1])) + geos.append(self.__build_geometry(link.nodes[i: j + 1])) geo = MultiLineString(geos) geometries.append(geo) @@ -184,14 +172,13 @@ def importing_network(self, node_count, conn): def __build_geometry(self, nodes: List[int]) -> LineString: return LineString(self.node_df.loc[nodes, "geometry"]) - def __update_table_structure(self, conn): - structure = conn.execute("pragma table_info(Links)").fetchall() - has_fields = [x[1].lower() for x in structure] - fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"] - for field in [f for f in fields if f not in has_fields]: - ltype = self.get_link_field_type(field).upper() - conn.execute(f"Alter table Links add column {field} {ltype}") - conn.commit() + + + def __process_link_chunk(self): + + if "tags" in df.columns: + df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) + df.columns = [x.replace(":", "_") for x in df.columns] def __build_link_types(self): data = [] @@ -219,7 +206,7 @@ def __define_link_type(self, link_type: str) -> str: split = link_type.split("_") for i, piece in enumerate(split[1:]): if piece in ["link", "segment", "stretch"]: - link_type = "_".join(split[0 : i + 1]) + link_type = "_".join(split[0: i + 1]) if self.__all_ltp.shape[0] >= 51: link_type = "aggregate_link_type" @@ -247,49 +234,6 @@ def __define_link_type(self, link_type: str) -> str: lt.save() return [letter, link_type] - def __get_link_property(self, d2, val, linktags, v): - vald = linktags.get(f'{v["osm_source"]}:{d2}', val) - if vald is None: - return vald - - if vald.isdigit(): - if vald == val and v["osm_behaviour"] == "divide": - vald = float(val) / 2 - return vald - - @staticmethod - def unique_count(a): - # From: https://stackoverflow.com/a/21124789/1480643 - unique, inverse = np.unique(a, return_inverse=True) - count = np.zeros(len(unique), int) - np.add.at(count, inverse, 1) - return np.vstack((unique, count)).T - - @staticmethod - def get_link_fields(): - p = Parameters() - fields = p.parameters["network"]["links"]["fields"] - owf = [list(x.keys())[0] for x in fields["one-way"]] - - twf1 = ["{}_ab".format(list(x.keys())[0]) for x in fields["two-way"]] - twf2 = ["{}_ba".format(list(x.keys())[0]) for x in fields["two-way"]] - - return owf + twf1 + twf2 + ["osm_id"] - - @staticmethod - def get_link_field_type(field_name): - p = Parameters() - fields = p.parameters["network"]["links"]["fields"] - - if field_name[-3:].lower() in ["_ab", "_ba"]: - field_name = field_name[:-3] - for tp in fields["two-way"]: - if field_name in tp: - return tp[field_name]["type"] - else: - for tp in fields["one-way"]: - if field_name in tp: - return tp[field_name]["type"] def process_link_attributes(self): self.links_df = self.links_df.assign(direction=0, link_id=0) @@ -349,9 +293,38 @@ def establish_modes_for_all_links(self, conn): self.links_df = self.links_df.merge(df, on="link_type", how="left") self.links_df.modes.fillna("".join(sorted(notfound)), inplace=True) + ######## TABLE STRUCTURE UPDATING ######## + def __update_table_structure(self, conn): + structure = conn.execute("pragma table_info(Links)").fetchall() + has_fields = [x[1].lower() for x in structure] + fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"] + for field in [f for f in fields if f not in has_fields]: + ltype = self.get_link_field_type(field).upper() + conn.execute(f"Alter table Links add column {field} {ltype}") + conn.commit() + + @staticmethod + def get_link_fields(): + p = Parameters() + fields = p.parameters["network"]["links"]["fields"] + owf = [list(x.keys())[0] for x in fields["one-way"]] + + twf1 = ["{}_ab".format(list(x.keys())[0]) for x in fields["two-way"]] + twf2 = ["{}_ba".format(list(x.keys())[0]) for x in fields["two-way"]] + + return owf + twf1 + twf2 + ["osm_id"] + @staticmethod - def get_node_fields(): + def get_link_field_type(field_name): p = Parameters() - fields = p.parameters["network"]["nodes"]["fields"] - fields = [list(x.keys())[0] for x in fields] - return fields + ["osm_id"] + fields = p.parameters["network"]["links"]["fields"] + + if field_name[-3:].lower() in ["_ab", "_ba"]: + field_name = field_name[:-3] + for tp in fields["two-way"]: + if field_name in tp: + return tp[field_name]["type"] + else: + for tp in fields["one-way"]: + if field_name in tp: + return tp[field_name]["type"] \ No newline at end of file diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py index cfb52f685..e93421d6f 100644 --- a/aequilibrae/project/network/osm/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -19,7 +19,6 @@ import pandas as pd import requests -from pandas import json_normalize from shapely import Polygon from aequilibrae.context import get_logger @@ -95,11 +94,7 @@ def doWork(self): self.__emit_all(["text", "Downloading finished. Processing data"]) for lst, table in [(self._links, "links"), (self._nodes, "nodes")]: df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"]) - if table == "links": - if "tags" in df.columns: - df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) - df.columns = [x.replace(":", "_") for x in df.columns] - else: + if table != "links": df = df.drop(columns=["tags"]) self.data[table] = df.rename(columns={"id": "osm_id"}) lst.clear() From f026dc8c6cd5b4223e94e6d566de1d3c787d3848 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 5 Feb 2024 13:41:26 +1000 Subject: [PATCH 05/32] Processing with chunking --- .../project/network/osm/osm_builder.py | 141 ++++++++++-------- 1 file changed, 79 insertions(+), 62 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index a350c0beb..1da2d8ce7 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -75,16 +75,11 @@ def importing_network(self, conn): self.node_df.osm_id = self.node_df.osm_id.astype(np.int64) self.node_df.set_index(["osm_id"], inplace=True) - self.logger.info("Creating necessary link types") - self.__emit_all(["text", "Creating necessary link types"]) - self.__build_link_types() + self.__process_link_chunk() shape_ = self.links_df.shape[0] message_step = floor(shape_ / 100) self.__emit_all(["maxValue", shape_]) - self.establish_modes_for_all_links(conn) - self.process_link_attributes() - self.logger.info("Geo-procesing links") self.__emit_all(["text", "Adding network links"]) geometries = [] @@ -115,7 +110,7 @@ def importing_network(self, conn): intersecs = np.where(node_indices > 1)[0] geos = [] for i, j in zip(intersecs[:-1], intersecs[1:]): - geos.append(self.__build_geometry(link.nodes[i: j + 1])) + geos.append(self.__build_geometry(link.nodes[i : j + 1])) geo = MultiLineString(geos) geometries.append(geo) @@ -172,22 +167,38 @@ def importing_network(self, conn): def __build_geometry(self, nodes: List[int]) -> LineString: return LineString(self.node_df.loc[nodes, "geometry"]) - - def __process_link_chunk(self): + self.logger.info("Creating necessary link types") + self.__emit_all(["text", "Creating necessary link types"]) - if "tags" in df.columns: - df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) - df.columns = [x.replace(":", "_") for x in df.columns] - - def __build_link_types(self): - data = [] + # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk + # And let's also assume that each row will be 100 fields at 8 bytes each + # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so. + chunk_size = 100_000_000 + list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] + self.links_df = pd.DataFrame([]) + # Initialize link types with read_and_close(self.project.path_to_file) as conn: self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn) - self.links_df.highway.fillna("missing", inplace=True) - self.links_df.highway = self.links_df.highway.str.lower() - for i, lt in enumerate(self.links_df.highway.unique()): + for i, df in enumerate(list_dfs): + if "tags" in df.columns: + df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) + df.columns = [x.replace(":", "_") for x in df.columns] + df = self.__build_link_types(df) + df = self.__establish_modes_for_all_links(conn, df) + df = self.__process_link_attributes(df) + else: + self.logger.error("OSM link data does not have tags. Skipping an entire data chunk") + df = pd.DataFrame([]) + list_dfs[i] = df + self.links_df = pd.concat(list_dfs) + + def __build_link_types(self, df): + data = [] + df.highway.fillna("missing", inplace=True) + df.highway = df.highway.str.lower() + for i, lt in enumerate(df.highway.unique()): if str(lt) in self.__all_ltp.highway.values: continue data.append([*self.__define_link_type(str(lt)), str(lt)]) @@ -195,8 +206,8 @@ def __build_link_types(self): [self.__all_ltp, pd.DataFrame(data, columns=["link_type_id", "link_type", "highway"])] ) self.__all_ltp.drop_duplicates(inplace=True) - self.links_df = self.links_df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") - self.links_df.drop(columns=["highway"], inplace=True) + df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") + return df.drop(columns=["highway"]) def __define_link_type(self, link_type: str) -> str: proj_link_types = self.project.network.link_types @@ -206,7 +217,7 @@ def __define_link_type(self, link_type: str) -> str: split = link_type.split("_") for i, piece in enumerate(split[1:]): if piece in ["link", "segment", "stretch"]: - link_type = "_".join(split[0: i + 1]) + link_type = "_".join(split[0 : i + 1]) if self.__all_ltp.shape[0] >= 51: link_type = "aggregate_link_type" @@ -234,43 +245,7 @@ def __define_link_type(self, link_type: str) -> str: lt.save() return [letter, link_type] - - def process_link_attributes(self): - self.links_df = self.links_df.assign(direction=0, link_id=0) - self.links_df.loc[self.links_df.oneway == "yes", "direction"] = 1 - self.links_df.loc[self.links_df.oneway == "backward", "direction"] = -1 - p = Parameters() - fields = p.parameters["network"]["links"]["fields"] - - for x in fields["one-way"]: - keys_ = list(x.values())[0] - field = list(x.keys())[0] - osm_name = keys_.get("osm_source", field).replace(":", "_") - self.links_df.rename(columns={osm_name: field}, inplace=True, errors="ignore") - - for x in fields["two-way"]: - keys_ = list(x.values())[0] - field = list(x.keys())[0] - if "osm_source" not in keys_: - continue - osm_name = keys_.get("osm_source", field).replace(":", "_") - self.links_df[f"{field}_ba"] = self.links_df[osm_name].copy() - self.links_df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore") - if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide": - self.links_df[f"{field}_ab"] = pd.to_numeric(self.links_df[f"{field}_ab"], errors="coerce") / 2 - self.links_df[f"{field}_ba"] = pd.to_numeric(self.links_df[f"{field}_ba"], errors="coerce") / 2 - - if f"{field}_forward" in self.links_df: - fld = pd.to_numeric(self.links_df[f"{field}_forward"], errors="coerce") - self.links_df.loc[fld > 0, f"{field}_ab"] = fld[fld > 0] - if f"{field}_backward" in self.links_df: - fld = pd.to_numeric(self.links_df[f"{field}_backward"], errors="coerce") - self.links_df.loc[fld > 0, f"{field}_ba"] = fld[fld > 0] - cols = list_columns(self.project.conn, "links") + ["nodes"] - self.links_df = self.links_df[[x for x in cols if x in self.links_df.columns]] - gc.collect() - - def establish_modes_for_all_links(self, conn): + def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFrame: p = Parameters() modes = p.parameters["network"]["osm"]["modes"] @@ -289,9 +264,51 @@ def establish_modes_for_all_links(self, conn): type_list = {k: "".join(set(v)) for k, v in type_list.items()} - df = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"]) - self.links_df = self.links_df.merge(df, on="link_type", how="left") - self.links_df.modes.fillna("".join(sorted(notfound)), inplace=True) + df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"]) + df = df.merge(df_aux, on="link_type", how="left") + df.modes.fillna("".join(sorted(notfound))) + return df + + def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: + df = df.assign(direction=0, link_id=0) + df.loc[df.oneway == "yes", "direction"] = 1 + df.loc[df.oneway == "backward", "direction"] = -1 + p = Parameters() + fields = p.parameters["network"]["links"]["fields"] + + for x in fields["one-way"]: + keys_ = list(x.values())[0] + field = list(x.keys())[0] + osm_name = keys_.get("osm_source", field).replace(":", "_") + df.rename(columns={osm_name: field}, inplace=True, errors="ignore") + + for x in fields["two-way"]: + keys_ = list(x.values())[0] + field = list(x.keys())[0] + if "osm_source" not in keys_: + continue + osm_name = keys_.get("osm_source", field).replace(":", "_") + df[f"{field}_ba"] = df[osm_name].copy() + df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore") + if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide": + df[f"{field}_ab"] = pd.to_numeric(df[f"{field}_ab"], errors="coerce") + df[f"{field}_ba"] = pd.to_numeric(df[f"{field}_ba"], errors="coerce") + + # Divides the values by 2 or zero them depending on the link direction + df.loc[df.direction == 0, f"{field}_ab"] /= 2 + df.loc[df.direction == -1, f"{field}_ab"] = 0 + + df.loc[df.direction == 0, f"{field}_ba"] /= 2 + df.loc[df.direction == 1, f"{field}_ba"] = 0 + + if f"{field}_forward" in df: + fld = pd.to_numeric(df[f"{field}_forward"], errors="coerce") + df.loc[fld > 0, f"{field}_ab"] = fld[fld > 0] + if f"{field}_backward" in df: + fld = pd.to_numeric(df[f"{field}_backward"], errors="coerce") + df.loc[fld > 0, f"{field}_ba"] = fld[fld > 0] + cols = list_columns(self.project.conn, "links") + ["nodes"] + return df[[x for x in cols if x in df.columns]] ######## TABLE STRUCTURE UPDATING ######## def __update_table_structure(self, conn): @@ -327,4 +344,4 @@ def get_link_field_type(field_name): else: for tp in fields["one-way"]: if field_name in tp: - return tp[field_name]["type"] \ No newline at end of file + return tp[field_name]["type"] From 85f623f4f3755121ad97cbab055a7dcaced833ae Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 5 Feb 2024 13:52:44 +1000 Subject: [PATCH 06/32] Processing with chunking --- aequilibrae/project/network/osm/osm_downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py index e93421d6f..6a75b6b06 100644 --- a/aequilibrae/project/network/osm/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -95,7 +95,7 @@ def doWork(self): for lst, table in [(self._links, "links"), (self._nodes, "nodes")]: df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"]) if table != "links": - df = df.drop(columns=["tags"]) + df = df.drop(columns=["tags"], errors="ignore") self.data[table] = df.rename(columns={"id": "osm_id"}) lst.clear() gc.collect() From d434de59acabcb0493249fc2f289537ea0abb6c5 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Tue, 6 Feb 2024 07:07:14 +1000 Subject: [PATCH 07/32] Processing with chunking --- aequilibrae/project/network/osm/osm_builder.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 1da2d8ce7..20d71f3d4 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -110,7 +110,7 @@ def importing_network(self, conn): intersecs = np.where(node_indices > 1)[0] geos = [] for i, j in zip(intersecs[:-1], intersecs[1:]): - geos.append(self.__build_geometry(link.nodes[i : j + 1])) + geos.append(self.__build_geometry(link.nodes[i: j + 1])) geo = MultiLineString(geos) geometries.append(geo) @@ -168,20 +168,21 @@ def __build_geometry(self, nodes: List[int]) -> LineString: return LineString(self.node_df.loc[nodes, "geometry"]) def __process_link_chunk(self): - self.logger.info("Creating necessary link types") - self.__emit_all(["text", "Creating necessary link types"]) + self.logger.info("Processing link modes, types and fields") + self.__emit_all(["text", "Processing link modes, types and fields"]) # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk # And let's also assume that each row will be 100 fields at 8 bytes each # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so. chunk_size = 100_000_000 - list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] + list_dfs = [self.links_df.iloc[i: i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] self.links_df = pd.DataFrame([]) # Initialize link types with read_and_close(self.project.path_to_file) as conn: self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn) - + self.__emit_all(["maxValue", len(list_dfs)]) for i, df in enumerate(list_dfs): + self.__emit_all(["Value", i]) if "tags" in df.columns: df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) df.columns = [x.replace(":", "_") for x in df.columns] @@ -217,7 +218,7 @@ def __define_link_type(self, link_type: str) -> str: split = link_type.split("_") for i, piece in enumerate(split[1:]): if piece in ["link", "segment", "stretch"]: - link_type = "_".join(split[0 : i + 1]) + link_type = "_".join(split[0: i + 1]) if self.__all_ltp.shape[0] >= 51: link_type = "aggregate_link_type" From fcc6db47e59dbf5f66096abebfe65b17371b6915 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Tue, 6 Feb 2024 18:44:45 +1000 Subject: [PATCH 08/32] . --- aequilibrae/project/network/osm/osm_builder.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 20d71f3d4..caccc277f 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -24,9 +24,6 @@ if pyqt: from PyQt5.QtCore import pyqtSignal -if iutil.find_spec("qgis") is not None: - pass - class OSMBuilder(WorkerThread): if pyqt: From 7a8606ed37d63a0e86ab7ae69ecf4741ac472139 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Tue, 6 Feb 2024 22:01:31 +1000 Subject: [PATCH 09/32] . --- aequilibrae/project/network/link_types.py | 6 +- .../project/network/osm/osm_builder.py | 57 +++++++++---------- 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/aequilibrae/project/network/link_types.py b/aequilibrae/project/network/link_types.py index 05b3524ae..df3f91140 100644 --- a/aequilibrae/project/network/link_types.py +++ b/aequilibrae/project/network/link_types.py @@ -1,6 +1,7 @@ -from sqlite3 import IntegrityError, Connection -from aequilibrae.project.network.link_type import LinkType +from sqlite3 import IntegrityError + from aequilibrae.project.field_editor import FieldEditor +from aequilibrae.project.network.link_type import LinkType from aequilibrae.project.table_loader import TableLoader from aequilibrae.utils.db_utils import commit_and_close from aequilibrae.utils.spatialite_utils import connect_spatialite @@ -84,7 +85,6 @@ def new(self, link_type_id: str) -> LinkType: tp["link_type_id"] = link_type_id lt = LinkType(tp, self.project) self.__items[link_type_id] = lt - self.logger.warning("Link type has not yet been saved to the database. Do so explicitly") return lt def delete(self, link_type_id: str) -> None: diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index caccc277f..87d74e2d3 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -5,11 +5,9 @@ from pathlib import Path from typing import List -import geopandas as gpd import numpy as np import pandas as pd from pandas import json_normalize -from shapely import MultiLineString from shapely.geometry import Polygon, LineString from aequilibrae.context import get_active_project @@ -45,11 +43,9 @@ def __init__(self, data, project, model_area: Polygon) -> None: self.__link_id = 1 self.__valid_links = [] + # Building shapely geometries makes the code surprisingly slower. nids = np.arange(data["nodes"].shape[0]) + self.node_start - nodes = data["nodes"].assign(is_centroid=0, modes="", link_types="", node_id=nids).reset_index(drop=True) - self.node_df = gpd.GeoDataFrame(nodes, geometry=gpd.points_from_xy(nodes.lon, nodes.lat), crs=4326) - del nodes - del data["nodes"] + self.node_df = data["nodes"].assign(is_centroid=0, modes="", link_types="", node_id=nids).reset_index(drop=True) gc.collect() self.links_df = data["links"] @@ -67,6 +63,7 @@ def doWork(self): self.__emit_all(["finished_threaded_procedure", 0]) def importing_network(self, conn): + self.logger.info("Importing the network") node_count = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count() self.node_df.osm_id = self.node_df.osm_id.astype(np.int64) @@ -93,7 +90,6 @@ def importing_network(self, conn): if len(link["nodes"]) < 2: self.logger.error(f"Link {idx} has less than two nodes. {link.nodes}") - geometries.append(LineString()) continue # The link is a straight line between two points @@ -101,28 +97,26 @@ def importing_network(self, conn): node_indices = node_count.loc[link["nodes"], "counter"] if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1: # The link has no intersections - geo = self.__build_geometry(link.nodes) + geometries.append([idx, self.__build_geometry(link.nodes)]) else: # The link has intersections + # We build repeated records for links when they have intersections + # This is because it is faster to do this way and then have all the data repeated + # when doing the join with the link fields below intersecs = np.where(node_indices > 1)[0] - geos = [] for i, j in zip(intersecs[:-1], intersecs[1:]): - geos.append(self.__build_geometry(link.nodes[i: j + 1])) - geo = MultiLineString(geos) - - geometries.append(geo) + geometries.append([idx, self.__build_geometry(link.nodes[i : j + 1])]) # Builds the link Geo dataframe self.links_df.drop(columns=["nodes"], inplace=True) - self.links_df = gpd.GeoDataFrame(self.links_df, geometry=geometries, crs=4326) - self.links_df = self.links_df.clip(self.model_area).explode(index_parts=False) - self.links_df = self.links_df[self.links_df.geometry.length > 0] + # We build a dataframe with the geometries created above + # and join with the database + geo_df = pd.DataFrame(geometries, columns=["link_id", "geometry"]).set_index("link_id") + self.links_df = self.links_df.join(geo_df, how="inner") self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1 - self.node_df.reset_index(inplace=True) - cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types"] - self.node_df = gpd.GeoDataFrame(self.node_df[cols], geometry=self.node_df.geometry, crs=self.node_df.crs) + self.node_df = self.node_df.reset_index() # Saves the data to disk in case of issues loading it to the database osm_data_path = Path(self.project.project_base_path) / "osm_data" @@ -136,7 +130,10 @@ def importing_network(self, conn): # Removing the triggers before adding all nodes makes things a LOT faster remove_triggers(conn, self.logger, "network") - self.node_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="nodes", mode="a") + cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types", "lon", "lat"] + insert_qry = f"INSERT INTO nodes ({','.join(cols[:-2])}, geometry) VALUES(?,?,?,?,?, MakePoint(?,?, 4326))" + conn.executemany(insert_qry, self.node_df[cols].to_records(index=False)) + del self.node_df gc.collect() @@ -146,14 +143,13 @@ def importing_network(self, conn): # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a") # I could not get the above line to work, so I used the following code instead - insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromWKB(?, 4326))" + insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromText(?, 4326))" cols_no_geo = self.links_df.columns.tolist() cols_no_geo.remove("geometry") insert_qry = insert_qry.format(", ".join(cols_no_geo), ", ".join(["?"] * len(cols_no_geo))) - geos = self.links_df.geometry.to_wkb() cols = cols_no_geo + ["geometry"] - links_df = pd.DataFrame(self.links_df[cols_no_geo]).assign(geometry=geos)[cols].to_records(index=False) + links_df = self.links_df[cols].to_records(index=False) del self.links_df gc.collect() @@ -161,8 +157,10 @@ def importing_network(self, conn): self.__emit_all(["text", "Adding links to file"]) conn.executemany(insert_qry, links_df) - def __build_geometry(self, nodes: List[int]) -> LineString: - return LineString(self.node_df.loc[nodes, "geometry"]) + def __build_geometry(self, nodes: List[int]) -> str: + slice = self.node_df.loc[nodes, :] + txt = ",".join((slice.lon.astype(str) + " " + slice.lat.astype(str)).tolist()) + return f"LINESTRING({txt})" def __process_link_chunk(self): self.logger.info("Processing link modes, types and fields") @@ -171,14 +169,15 @@ def __process_link_chunk(self): # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk # And let's also assume that each row will be 100 fields at 8 bytes each # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so. - chunk_size = 100_000_000 - list_dfs = [self.links_df.iloc[i: i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] + chunk_size = 1_000_000 + list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] self.links_df = pd.DataFrame([]) # Initialize link types with read_and_close(self.project.path_to_file) as conn: self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn) self.__emit_all(["maxValue", len(list_dfs)]) for i, df in enumerate(list_dfs): + self.logger.info(f"Processing chunk {i + 1}/{len(list_dfs)}") self.__emit_all(["Value", i]) if "tags" in df.columns: df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) @@ -215,7 +214,7 @@ def __define_link_type(self, link_type: str) -> str: split = link_type.split("_") for i, piece in enumerate(split[1:]): if piece in ["link", "segment", "stretch"]: - link_type = "_".join(split[0: i + 1]) + link_type = "_".join(split[0 : i + 1]) if self.__all_ltp.shape[0] >= 51: link_type = "aggregate_link_type" @@ -264,7 +263,7 @@ def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFram df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"]) df = df.merge(df_aux, on="link_type", how="left") - df.modes.fillna("".join(sorted(notfound))) + df.modes.fillna("".join(sorted(notfound)), inplace=True) return df def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: From bf9aca40f18b06929fefdcc05347a6fb16bf6b1a Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Tue, 6 Feb 2024 22:11:15 +1000 Subject: [PATCH 10/32] . --- aequilibrae/project/network/osm/osm_builder.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 87d74e2d3..ba1c0cba8 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -268,8 +268,9 @@ def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFram def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: df = df.assign(direction=0, link_id=0) - df.loc[df.oneway == "yes", "direction"] = 1 - df.loc[df.oneway == "backward", "direction"] = -1 + if "oneway" in df.columns: + df.loc[df.oneway == "yes", "direction"] = 1 + df.loc[df.oneway == "backward", "direction"] = -1 p = Parameters() fields = p.parameters["network"]["links"]["fields"] From f32619650b8178d8454dd3efaa3cb5e04edf7cca Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Thu, 8 Feb 2024 20:04:58 +1000 Subject: [PATCH 11/32] . --- aequilibrae/project/network/osm/osm_builder.py | 9 ++++----- aequilibrae/project/network/osm/osm_downloader.py | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index ba1c0cba8..334dcde57 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -44,8 +44,8 @@ def __init__(self, data, project, model_area: Polygon) -> None: self.__valid_links = [] # Building shapely geometries makes the code surprisingly slower. - nids = np.arange(data["nodes"].shape[0]) + self.node_start - self.node_df = data["nodes"].assign(is_centroid=0, modes="", link_types="", node_id=nids).reset_index(drop=True) + self.node_df = data["nodes"] + self.node_df.loc[:, "node_id"] = np.arange(data["nodes"].shape[0]) + self.node_start gc.collect() self.links_df = data["links"] @@ -84,12 +84,11 @@ def importing_network(self, conn): # How can I link have less than two points? if not isinstance(link["nodes"], list): - geometries.append(LineString()) - self.logger.error(f"OSM link {idx} does not have a list of nodes.") + self.logger.debug(f"OSM link/feature {idx} does not have a list of nodes.") continue if len(link["nodes"]) < 2: - self.logger.error(f"Link {idx} has less than two nodes. {link.nodes}") + self.logger.debug(f"Link {idx} has less than two nodes. {link.nodes}") continue # The link is a straight line between two points diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py index 6a75b6b06..1e4586d57 100644 --- a/aequilibrae/project/network/osm/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -86,6 +86,8 @@ def doWork(self): if json["elements"]: for tag, lst in [("node", self._nodes), ("way", self._links)]: df = pd.DataFrame([item for item in json["elements"] if item["type"] == tag]) + if tag == "node": + df = df.assign(is_centroid=0, modes="", link_types="", node_id=0) lst.append(df) del json gc.collect() From d7f78fabb16a7a192e44af2e335d91a1111a82ac Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Thu, 8 Feb 2024 20:15:58 +1000 Subject: [PATCH 12/32] . --- aequilibrae/project/network/osm/osm_builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 334dcde57..5bb763a3a 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -285,6 +285,8 @@ def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: if "osm_source" not in keys_: continue osm_name = keys_.get("osm_source", field).replace(":", "_") + if osm_name not in df.columns: + continue df[f"{field}_ba"] = df[osm_name].copy() df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore") if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide": From 121e41d8059da035a60d7eb6333d9e503afe4eae Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Thu, 8 Feb 2024 20:47:47 +1000 Subject: [PATCH 13/32] . --- aequilibrae/project/network/osm/osm_builder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 5bb763a3a..b95598643 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -71,12 +71,13 @@ def importing_network(self, conn): self.__process_link_chunk() shape_ = self.links_df.shape[0] - message_step = floor(shape_ / 100) + message_step = max(1, floor(shape_ / 100)) self.__emit_all(["maxValue", shape_]) self.logger.info("Geo-procesing links") self.__emit_all(["text", "Adding network links"]) geometries = [] + self.links_df.set_index(["osm_id"], inplace=True) for counter, (idx, link) in enumerate(self.links_df.iterrows()): self.__emit_all(["Value", counter]) if counter % message_step == 0: From 979b1576d199c689861a49d5120ba371373c60df Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Thu, 8 Feb 2024 21:10:53 +1000 Subject: [PATCH 14/32] . --- aequilibrae/project/network/osm/osm_downloader.py | 6 ++++-- tests/aequilibrae/project/test_osm_downloader.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py index 1e4586d57..97acd944d 100644 --- a/aequilibrae/project/network/osm/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -95,10 +95,12 @@ def doWork(self): self.__emit_all(["Value", len(self.polygons)]) self.__emit_all(["text", "Downloading finished. Processing data"]) for lst, table in [(self._links, "links"), (self._nodes, "nodes")]: - df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"]) + df = pd.DataFrame([]) + if len(lst) > 0: + df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"]) if table != "links": df = df.drop(columns=["tags"], errors="ignore") - self.data[table] = df.rename(columns={"id": "osm_id"}) + self.data[table] = df.rename(columns={"id": "osm_id"}, errors="ignore") lst.clear() gc.collect() diff --git a/tests/aequilibrae/project/test_osm_downloader.py b/tests/aequilibrae/project/test_osm_downloader.py index 706c04989..7c5fe3bb7 100644 --- a/tests/aequilibrae/project/test_osm_downloader.py +++ b/tests/aequilibrae/project/test_osm_downloader.py @@ -32,7 +32,7 @@ def test_do_work2(self): o = OSMDownloader([box(-112.185, 36.59, -112.179, 36.60)], ["car"]) o.doWork() - if "elements" not in o.json[0]: + if len(o.json) == 0 or "elements" not in o.json[0]: return if len(o.json[0]["elements"]) > 1000: From 630fbd8216457aa93eb68db6ef83f0d9a4255eff Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Fri, 9 Feb 2024 09:54:10 +1000 Subject: [PATCH 15/32] . --- aequilibrae/project/network/osm/osm_builder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index b95598643..3ca88f802 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -45,7 +45,7 @@ def __init__(self, data, project, model_area: Polygon) -> None: # Building shapely geometries makes the code surprisingly slower. self.node_df = data["nodes"] - self.node_df.loc[:, "node_id"] = np.arange(data["nodes"].shape[0]) + self.node_start + self.node_df.loc[:, "node_id"] = np.arange(self.node_start, self.node_start + self.node_df.shape[0]) gc.collect() self.links_df = data["links"] @@ -105,7 +105,7 @@ def importing_network(self, conn): # when doing the join with the link fields below intersecs = np.where(node_indices > 1)[0] for i, j in zip(intersecs[:-1], intersecs[1:]): - geometries.append([idx, self.__build_geometry(link.nodes[i : j + 1])]) + geometries.append([idx, self.__build_geometry(link.nodes[i: j + 1])]) # Builds the link Geo dataframe self.links_df.drop(columns=["nodes"], inplace=True) @@ -170,7 +170,7 @@ def __process_link_chunk(self): # And let's also assume that each row will be 100 fields at 8 bytes each # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so. chunk_size = 1_000_000 - list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] + list_dfs = [self.links_df.iloc[i: i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] self.links_df = pd.DataFrame([]) # Initialize link types with read_and_close(self.project.path_to_file) as conn: @@ -214,7 +214,7 @@ def __define_link_type(self, link_type: str) -> str: split = link_type.split("_") for i, piece in enumerate(split[1:]): if piece in ["link", "segment", "stretch"]: - link_type = "_".join(split[0 : i + 1]) + link_type = "_".join(split[0: i + 1]) if self.__all_ltp.shape[0] >= 51: link_type = "aggregate_link_type" From 5f78189934d0a5a82582911fbed565d7360db810 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Fri, 9 Feb 2024 20:32:34 +1000 Subject: [PATCH 16/32] . --- aequilibrae/project/network/osm/osm_builder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 3ca88f802..ee60ca224 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -105,7 +105,7 @@ def importing_network(self, conn): # when doing the join with the link fields below intersecs = np.where(node_indices > 1)[0] for i, j in zip(intersecs[:-1], intersecs[1:]): - geometries.append([idx, self.__build_geometry(link.nodes[i: j + 1])]) + geometries.append([idx, self.__build_geometry(link.nodes[i : j + 1])]) # Builds the link Geo dataframe self.links_df.drop(columns=["nodes"], inplace=True) @@ -170,7 +170,7 @@ def __process_link_chunk(self): # And let's also assume that each row will be 100 fields at 8 bytes each # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so. chunk_size = 1_000_000 - list_dfs = [self.links_df.iloc[i: i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] + list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] self.links_df = pd.DataFrame([]) # Initialize link types with read_and_close(self.project.path_to_file) as conn: @@ -214,7 +214,7 @@ def __define_link_type(self, link_type: str) -> str: split = link_type.split("_") for i, piece in enumerate(split[1:]): if piece in ["link", "segment", "stretch"]: - link_type = "_".join(split[0: i + 1]) + link_type = "_".join(split[0 : i + 1]) if self.__all_ltp.shape[0] >= 51: link_type = "aggregate_link_type" From 4b4371de572f38afcc4df717defa9ee99e3e8f93 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 12 Feb 2024 17:41:33 +1000 Subject: [PATCH 17/32] . --- aequilibrae/project/network/osm/osm_builder.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index ee60ca224..c2743e905 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -143,6 +143,8 @@ def importing_network(self, conn): # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a") # I could not get the above line to work, so I used the following code instead + self.links_df.index.name="osm_id" + self.links_df.reset_index(inplace=True) insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromText(?, 4326))" cols_no_geo = self.links_df.columns.tolist() cols_no_geo.remove("geometry") @@ -275,6 +277,8 @@ def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: fields = p.parameters["network"]["links"]["fields"] for x in fields["one-way"]: + if "link_type" in x.keys(): + continue keys_ = list(x.values())[0] field = list(x.keys())[0] osm_name = keys_.get("osm_source", field).replace(":", "_") From 8fb82a8bfd437ffd9bcf230cc4d4f0045669895b Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 12 Feb 2024 17:53:41 +1000 Subject: [PATCH 18/32] . --- aequilibrae/project/network/osm/osm_builder.py | 2 +- aequilibrae/project/network/osm/osm_downloader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index c2743e905..f20f52646 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -208,7 +208,7 @@ def __build_link_types(self, df): df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") return df.drop(columns=["highway"]) - def __define_link_type(self, link_type: str) -> str: + def __define_link_type(self, link_type: str) -> [str, str]: proj_link_types = self.project.network.link_types original_link_type = link_type link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower() diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py index 97acd944d..58b3fd6c9 100644 --- a/aequilibrae/project/network/osm/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -68,7 +68,7 @@ def doWork(self): m = f"[maxsize: {memory}]" for counter, poly in enumerate(self.polygons): msg = f"Downloading polygon {counter + 1} of {len(self.polygons)}" - self.logger.debug(msg) + self.logger.info(msg) self.__emit_all(["Value", counter]) self.__emit_all(["text", msg]) west, south, east, north = poly.bounds From afa381fe6f737ae0acd06a33cc63e17749b283fe Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 12 Feb 2024 21:18:25 +1000 Subject: [PATCH 19/32] . --- aequilibrae/project/network/osm/osm_builder.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index f20f52646..18ec3c7c8 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -169,9 +169,9 @@ def __process_link_chunk(self): self.__emit_all(["text", "Processing link modes, types and fields"]) # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk - # And let's also assume that each row will be 100 fields at 8 bytes each - # This makes 1Gb roughly equal to 1.34 million rows, so 1 million would so. - chunk_size = 1_000_000 + # And let's also assume that each row will be 200 fields at 8 bytes each + # This makes 8Gb roughly equal to 5.3 million rows, so 5 million would so. + chunk_size = 5_000_000 list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] self.links_df = pd.DataFrame([]) # Initialize link types @@ -182,6 +182,8 @@ def __process_link_chunk(self): self.logger.info(f"Processing chunk {i + 1}/{len(list_dfs)}") self.__emit_all(["Value", i]) if "tags" in df.columns: + # It is critical to reset the index for the concat below to work + df.reset_index(drop=True, inplace=True) df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) df.columns = [x.replace(":", "_") for x in df.columns] df = self.__build_link_types(df) @@ -191,7 +193,7 @@ def __process_link_chunk(self): self.logger.error("OSM link data does not have tags. Skipping an entire data chunk") df = pd.DataFrame([]) list_dfs[i] = df - self.links_df = pd.concat(list_dfs) + self.links_df = pd.concat(list_dfs, ignore_index=True) def __build_link_types(self, df): data = [] From 9a0d09b6d561ae2fd705325e3dcd45949c3f500a Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 12 Feb 2024 21:18:41 +1000 Subject: [PATCH 20/32] . --- aequilibrae/project/network/osm/osm_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 18ec3c7c8..31da60b56 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -143,7 +143,7 @@ def importing_network(self, conn): # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a") # I could not get the above line to work, so I used the following code instead - self.links_df.index.name="osm_id" + self.links_df.index.name = "osm_id" self.links_df.reset_index(inplace=True) insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromText(?, 4326))" cols_no_geo = self.links_df.columns.tolist() From 86571d95ac13f2239eb4d083be9be6e85b28e594 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 12 Feb 2024 21:21:18 +1000 Subject: [PATCH 21/32] . --- aequilibrae/project/network/osm/osm_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 31da60b56..e5dd8471b 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -170,8 +170,8 @@ def __process_link_chunk(self): # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk # And let's also assume that each row will be 200 fields at 8 bytes each - # This makes 8Gb roughly equal to 5.3 million rows, so 5 million would so. - chunk_size = 5_000_000 + # This makes 2Gb roughly equal to 2.6 million rows, so 2 million would so. + chunk_size = 1_000_000 list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] self.links_df = pd.DataFrame([]) # Initialize link types From 224e5fbe8a2cbacb57bcb75d146502e0e58935f8 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 12 Feb 2024 22:02:24 +1000 Subject: [PATCH 22/32] . --- aequilibrae/project/network/osm/osm_builder.py | 11 +++++++---- aequilibrae/transit/map_matching_graph.py | 3 +-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index e5dd8471b..f0d3f0f4d 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -94,18 +94,21 @@ def importing_network(self, conn): # The link is a straight line between two points # Or all midpoints are only part of a single link - node_indices = node_count.loc[link["nodes"], "counter"] + node_indices = node_count.loc[link["nodes"], "counter"].to_numpy() if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1: # The link has no intersections - geometries.append([idx, self.__build_geometry(link.nodes)]) + geometries.append([idx, self._build_geometry(link.nodes)]) else: + # Make sure we get the first and last nodes, as they are certainly the extremities of the sublinks + node_indices[0] = 2 + node_indices[-1] = 2 # The link has intersections # We build repeated records for links when they have intersections # This is because it is faster to do this way and then have all the data repeated # when doing the join with the link fields below intersecs = np.where(node_indices > 1)[0] for i, j in zip(intersecs[:-1], intersecs[1:]): - geometries.append([idx, self.__build_geometry(link.nodes[i : j + 1])]) + geometries.append([idx, self._build_geometry(link.nodes[i : j + 1])]) # Builds the link Geo dataframe self.links_df.drop(columns=["nodes"], inplace=True) @@ -159,7 +162,7 @@ def importing_network(self, conn): self.__emit_all(["text", "Adding links to file"]) conn.executemany(insert_qry, links_df) - def __build_geometry(self, nodes: List[int]) -> str: + def _build_geometry(self, nodes: List[int]) -> str: slice = self.node_df.loc[nodes, :] txt = ",".join((slice.lon.astype(str) + " " + slice.lat.astype(str)).tolist()) return f"LINESTRING({txt})" diff --git a/aequilibrae/transit/map_matching_graph.py b/aequilibrae/transit/map_matching_graph.py index a1ab70418..56e5610d8 100644 --- a/aequilibrae/transit/map_matching_graph.py +++ b/aequilibrae/transit/map_matching_graph.py @@ -118,8 +118,7 @@ def __build_graph_from_cache(self): def __build_graph_from_scratch(self): self.logger.info(f"Creating map-matching graph from scratch for mode_id={self.mode_id}") - self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=np.nan) - self.df.loc[:, "geo"] = self.df.wkt.apply(shapely.wkt.loads) + self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=self.df.wkt.apply(shapely.wkt.loads)) self.df.loc[self.df.link_id < 0, "link_id"] = self.df.link_id * -1 + self.df.link_id.max() + 1 # We make sure all link IDs are in proper order From eea6ad58a74f6f74906780a6854ed7abb7b7377a Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 12 Feb 2024 22:25:57 +1000 Subject: [PATCH 23/32] pandas deprecations --- aequilibrae/project/network/osm/osm_builder.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index f0d3f0f4d..c78a4993c 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -200,7 +200,7 @@ def __process_link_chunk(self): def __build_link_types(self, df): data = [] - df.highway.fillna("missing", inplace=True) + df = df.fillna(value={"highway": "missing"}) df.highway = df.highway.str.lower() for i, lt in enumerate(df.highway.unique()): if str(lt) in self.__all_ltp.highway.values: @@ -270,8 +270,7 @@ def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFram df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"]) df = df.merge(df_aux, on="link_type", how="left") - df.modes.fillna("".join(sorted(notfound)), inplace=True) - return df + return df.fillna(value={"modes": "".join(sorted(notfound))}) def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: df = df.assign(direction=0, link_id=0) From b7df86e0cf6201f4668fe8566c68bc6710297abc Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Fri, 16 Feb 2024 09:53:18 +1000 Subject: [PATCH 24/32] improves griding function --- .../network/osm/model_area_gridding.py | 31 ++++++++---------- .../project/network/osm/osm_builder.py | 9 +++-- .../project/data/porto_rico.parquet | Bin 0 -> 11961 bytes tests/aequilibrae/project/data/wynnum.parquet | Bin 0 -> 1626 bytes .../project/test_polygon_gridding.py | 22 +++++++++++++ 5 files changed, 42 insertions(+), 20 deletions(-) create mode 100644 tests/aequilibrae/project/data/porto_rico.parquet create mode 100644 tests/aequilibrae/project/data/wynnum.parquet create mode 100644 tests/aequilibrae/project/test_polygon_gridding.py diff --git a/aequilibrae/project/network/osm/model_area_gridding.py b/aequilibrae/project/network/osm/model_area_gridding.py index 3f58a21b0..3a6b636d5 100644 --- a/aequilibrae/project/network/osm/model_area_gridding.py +++ b/aequilibrae/project/network/osm/model_area_gridding.py @@ -1,28 +1,25 @@ # Inspired by https://www.matecdev.com/posts/shapely-polygon-gridding.html +from math import ceil + +import geopandas as gpd import numpy as np from shapely.geometry import Polygon -import geopandas as gpd def geometry_grid(model_area, srid) -> gpd.GeoDataFrame: minx, miny, maxx, maxy = model_area.bounds - subd = min(0.01, abs(maxy - miny) / 3, abs(maxx - minx) / 3) - space_x = int((maxx - minx) / subd) - space_y = int((maxy - miny) / subd) - combx, comby = np.linspace(minx, maxx, space_x), np.linspace(miny, maxy, space_y) + # Some rough heuristic to get the number of points per sub-polygon in the 2 digits range + subd = ceil((len(model_area.boundary.coords) / 32) ** 0.5) + dx = (maxx - minx) / subd + dy = (maxy - miny) / subd elements = [] - for i in range(len(combx) - 1): - for j in range(len(comby) - 1): - elements.append( - Polygon( - [ - [combx[i], comby[j]], - [combx[i], comby[j + 1]], - [combx[i + 1], comby[j + 1]], - [combx[i + 1], comby[j]], - ] - ) - ) + x1 = minx + for i in range(subd): + j1 = miny + for j in range(subd): + elements.append(Polygon([[x1, j1], [x1, j1 + dy], [x1 + dx, j1 + dy], [x1 + dx, j1]])) + j1 += dy + x1 += dx gdf = gpd.GeoDataFrame({"id": np.arange(len(elements))}, geometry=elements, crs=srid) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index c78a4993c..06d586a32 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd from pandas import json_normalize -from shapely.geometry import Polygon, LineString +from shapely.geometry import Polygon from aequilibrae.context import get_active_project from aequilibrae.parameters import Parameters @@ -57,9 +57,12 @@ def doWork(self): with commit_and_close(connect_spatialite(self.path)) as conn: self.__update_table_structure(conn) self.importing_network(conn) + + self.logger.info("Cleaning things up") conn.execute( "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)" ) + conn.execute("VACUUM;") self.__emit_all(["finished_threaded_procedure", 0]) def importing_network(self, conn): @@ -269,8 +272,8 @@ def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFram type_list = {k: "".join(set(v)) for k, v in type_list.items()} df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"]) - df = df.merge(df_aux, on="link_type", how="left") - return df.fillna(value={"modes": "".join(sorted(notfound))}) + df = df.merge(df_aux, on="link_type", how="left").fillna(value={"modes": "".join(sorted(notfound))}) + return df def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: df = df.assign(direction=0, link_id=0) diff --git a/tests/aequilibrae/project/data/porto_rico.parquet b/tests/aequilibrae/project/data/porto_rico.parquet new file mode 100644 index 0000000000000000000000000000000000000000..97dfe198c23b20bad43fe123f8dadf8c091a0ee2 GIT binary patch literal 11961 zcmchd2V7Iv`~Lw^QL#=CE!7k&YDGwH2t(R>!$=^Ia0!q=>dGPsWSId3SE;k^y7%6z zTCEGmb?=3CSZ5v8YTbMPJ|}>-e*L}v|JVQj|NVKHQ=bd)`GNClZYp3c1K1gOdpe3apuoe_%W4@*flN zC#50YZwUSey{Gt<2BATIA6A@Kt@v2q;3>Z059@~?t{++@@q=GnO1I~V3DViBI+1eC5Awza6Fah%J#IFuL z$gr=9JXgzNDSG;tqNF_s7`D!nTCGE(Df;6_jYr)EKltJ5#G5?Sx4k@LEc$Pv`aNve ziK4A7?UTlVuigq+TpK*3HLuJH_Fp!j@qrE$J^6LJr5WIlcs(903#aG{&sU#c0&b!0 z^~t`EDLSy$q;7k_Yr}(|4sS)#A;;#-9RU67jTjy`GlZh&UoRiC1KeZqgxNVQC_1W0 zQ~m>ZZ{dSC%bHPi?;PvANnlsm`BAl-Q1p(NO9!m5-wvOhLz~v4=#CTb4(fsWWxaCb z?E@%!Ky&w>tufxh$d>z3>rnL2maQ`V!EuFy|LRklqAdxQQ32q&10(-9UW=k_k+0Xc z2RGA1F8$q)qF<%Ur)t302Pd=;flul7o{?a@UI%g``oH1P3zqPoj0Hz@2s{}5nn!Qr z`TOhxw>~2BB`#ie-gVu&=n9oyi(fa%2-heaq^2U7$dF6vr2To|v< ze8rS)sNb}-{7)zJ>pmiQjs=|5d*Aez;9b)W&RdOmX}xsYgAw4o{4>qR>}S|(_LYxU zqkq}*RY?Ny+G(rWZ-)FY1`c^$Zy&>Eb{nnafDh^CJNxft*!$~Vvbo?!C%0|dzK3D8 zCncYbMgN97&)&Yen_({+_ch%9ghvOi+BE;oE{3Jzf8u96mZtRcss*#J|8vZH0m!7O7sr} zKNLU{@UJgM6`$S4uqmC7tZa<=>~IVUyaGOJ^s$x0A3G!Hp$EV(`drNW75cT3M($<7 zd!-GdXM^>luXca5m0=yN=G7m7`B+9fr`y5xJKuc02KGMnz#7p3{AoAe!DaAAezOLB zQ?@YdklwMwegPjT9{g2a1^ipqe3keXk9KARKWPK5^_S+P1bj40yzl4D4BP&<0W+tg zy=vo%odw{to4A!DFy88%7I!J|`8IcQJ3*hjXZvitvI*lqIk+Vk%sb-xVLW*A*m#bo8%3!&kWg* zKi}igwI<%X!v~LTTCsiu+K1f>9OnyeMroVLV6R6{gNQ7{J427!y3%4_$gxz!y3EFlZW_X zeeYRPbZa%kE;X)rH52^sT922Dz~xQXe?AfGHKb0&rs3f2Q>R4kM*m+=j~!3|cI+H< zV#P}yJ?X^$YeC?;Z6DX018%qS>5!d2Fl_Fe;DJM5As+t-pMP)_!-kwF&Nv4?eSOC4 zODhqt2l-K?-gr>fbqV-7C8&2E{TIZZ6NSmF$#QCp^}P4lbY|F8>^Jw@ zRN4?fFUCJTTMt}%X9@39#Onj!@}mo;p#AaNV>MWRUE0iQw|_Fj8amuA9gp_GbAPKp z4ZOYb+1&`p>+Sa0-5uO3?0lCum|y2^3N;@b^=$UbbFlaR%At`M?dG5L*HwkEWVx!4j;4lYF%)YB`%`jc*F;ZGmQ7D z@y8J}#xbno(ftG|^sPO#Me>608TLY}-#*^~E^I#T@QtwyD>>ERt0P!%>%J`8)en4M zc&y-O4cI^75{~|Kib~ zkqve<&JVSN*$Lq7p|@K5fknN${A{|xqlbqruo^}oU!Du&{(}As_O#qJV=Ec?|{Lcwh-N5mOwzb`k?s2;Iz#`bkIpEPo+fatRCmDC`IheVURw)<^dtX}lzy^QySaR#TvOx^n zraU-G3vTtw7^esS{k(RC8a!tAp_7{i!hg-%%pMDWM4gd+-4@)hUQlQc z^JV?NVc3tg?YHaAIw z!2MACO@G9r>Qwtp(0AgmlIxLukUs-$zg)!l;x(`wLuKggfot=0Cs9_-Jb zC^lXN=RW0c$p;sIcVi`4f70^9N9UC?>`tR%eIxLqZ`Qr>1&2&9MQ`%K`A#?P`?Vzu zYp2tdtC1f+f3rvBF2?yoJkin!=bzd;E}yAzWBA)3tLR!DTd8@^@p%b{^g+UjXr-2lG`P}zwes?i!<;S<%Zup3zB@;XJNe6S&^uyyD zQ*`;7ZRH1?4EwzD%od}9Hq?}5gZG}IEK3?-}6h+rs?c3y%1^V7_k6^ht&vqTv=Yg4F z=j`43tRd=SuGYWf0Q4E-Vf{{;&eai6^bvdC zmIm&3eG*tc+h$4%iAf}JOyh^7sUd^ ztFmv>A8WxIgts;ngGV2@^tx>kRh2)$^IOk5@rjwDIg^frj>G!6kR5-kBij4E%wCp^ z_0((S?}AUkgBJ|3*2en#DhhB>TkIA|4EX6|6(m@R~e*pJM)2fhgi*B_@MMBv`FzUn#>JX5^q%_rcZj~X970ZyAYIV%G4-rVSYp%D31 z(`@P{7UP9qz3uFa{M&ROdvGo|{m;Dfxyav(dSXLI`*jCGpDV!58I~SG@P+U4qlSRn zKR*7I!bQ=kg1?{a25(yScSRVu2}z*XFSr?t>Jxo79G@7FgZ(8t04u7PqBocO-MIoT zvzPp0EkXOmz-ABN6 ze(C*^@lbT=%}X(@u>X1QFCqWcptWBg0`o~ihCY|B{?#pOIQ+9OqEA!s>@n0OKkT>N z$-xl%ZOID>TMXXt)8K0h(ZAsADf$7p>{9&KG{#%8`-@JdkqrB!_QRbsp>NY`Uy3Tg zM|y-^E(T8>)WGA9{eS9)8(nh1*PG107CajJ)1Ae8)4|FUZKio}p4gP#CGQQ`d;Y|E zW6XCG3LZdyhmCnz1HqpqKl?Vg4EeNY9c}XWcs_Eu*^dvvNB2q&uLb{={O!^Lu=4My ziuU8+@1~p6FQNbLHIdf&;Lz?1D{g}8cl2?0#QAB`^;JjEr^+lUr{V2L_-9F8daK7}~3A`_3UXQ!rUq%KxT7Y9e-Z?M`=h5d)49k`1 z|7uR8yaSj#_DKiVE}9_~gFlZ)L}7kgO1oOOgHQjo_1rkj=gX9jn#AI~yUEV^tO5Aj zD>IW?&Vqf0{q(dsc*D;t_}g%PuIPC6r>5v1;vwTy7;qscs|ft z{G|+hyWOZ0UBE%DhUxj>`?tCe+%}(Ko0n`kV1vHOAGK{8FTnjyI&Sd6lBuVT*8y`Y z+Ba$dzN46QJp#Pi_0<w@#N6yB^wHW)~ zh8`ye_J=>ux3uIg#d*7)tRDyR?svcgmSs54t$7sq2im)dqn0ylwC)QD>Hk|tMZ=w7 z?|Ov%ma&54eOEH7W)zAlS~e`7-@)#?8O@?pU8HN^jcc>MJc z?0CAMSv}aN(MPv^AFjs!d2Q$6Taf3y?^}cQakXFQ5$IF4vuUFaYmtA`W_}Y6eG?BA zn@9YF{dx1LyQiVg`p&rH1<#t=EAChDsminU)wn;sv*KLwT=XBxeoqhFfc?0~!=MG= z5!#-0)@(xiM?+>eK>Id@XSnq@GwkpC-Ps2*{=>ziZn(BEY!e(4E};GT-4zeTTN$=- z`(Jaqfce>~0qJ1JkV~Uu!QOlbUR<~KLL+#{zW%}K+i|~P`{>ug0oaepbB-N|*DotC zuKyO#&*HlWH`{~rBROXc#Ph&D+5n%uxZhue2Ze*Mf6hpZnGLRJ)%M5+)CXQouBg8s z_cx!n`1}p}d-rkhq&0J=zbZ%njR?d8xIdg#x5NPU-nSfN*n#|~FZN)5dqVoJ2vakDeUj(MGjhe4Cj5D`gn7&k8jb>tBy15 zp=JRIdvG3j|9S2EkDlLqz`RKR?sj)0c8eH=0bu~Z!z_>s0(e5p6XH)<{86}jpOlSBf?BpVpM(40<+rX(p$X-q2l8r zDM2hnQ9LfMUq50(n*+AOYsH>UkJF|1YVCAusuh1bxY~@dss{M_of>+ZcUl~bU~;*2 zWJYR@#-T4Zp}_0S47Ce|I+H`MtuC>WxuVRCSDG-HPH%AN^ezQaenx1Pl+JXUvW+DN<4pW)U>b)pZTb*hsmZUbD3TzH5(F-%Tk!g@CtCjdx zOSDYZo7K=upTH5J-lRiItQ3cVbV)^x$&|oJ)2WTg zT)N)sa%+n`37ia@+vKcjB2}BLkS;|=sG7J`?R2WOMz>S%azU2Vq&1ohWbRTE^m5vW zyQC(kll-&WF+2gA+d-~uuqhc=riV+svzFPunE zs_T54gSgonNl9vl4YMKwyqv5q@+y#NB%aGGGGpe%$e9kk!3G@}z18aUn2QlSD3u$r zn%;Ue+vY|}cqg%nYb55Nr{3w*L%AG%De+FOOKnWxDD(~sriBX=tnL-7RAUxagNqEP zB+>SdDihX*)oCI%9-9TN5Fgpq4XQP9M!pUAs*Z2Vv5qtvTPaCPu&B$}^KBOB!i$cK zry{~gaQ8I7UBz8^F|m;`0%Sd6oqsfsi;0XzFge~#0p}){|N$}~xAx>R|Iw#=^0;FT0>QzgY}lb~2@(P=aIvO=|3;mXgG;X1!0qmYy( zdNh>1x0Z@`XIQK2vjtuzW9aZYN|lYd6%n0up8UK-n_4ML&NWMk-tK%Q&rEd6DH5e* zCVR_@^7G_84f^Ok60SZk(OWOad{trv7k#r;N|jk_A#+lQwHA>J@||fGfl;H(CHiM- z_!6!AG7V)esvRc}Ba|lNKW@l=3r^Q_5tX5)tM#OS%Oj-)!!Xhfl&lbAUQbf*ttV681 z*t8UhM-BUFDT5)aYQ4d~q@0KLX0C>k7&ASInW{XkAtK9V$ZEgb{cZ?6lTn)5bbiXRvm~3vTphL z%IMfMk(s9^Yt@vPrNjEriQ^2p7IUG-6KzlspNkbmX(AaHeau>q(56tx64PsJq?BNd z5nI4sL~f4Lkd-H~>n-u_>T3t)T$b;h3+yX4!yXc>1=0sG4;$nw7cV*tbj}%=9i1MDyP>bUVk8O@FmtN8AhXy64})lHyi6gEsjH5vlHXEv5w0iSE50D zK?h;7uk7kt1U{67fEpwEjGA$)4RHrJ+ zu)et3GN&O&X@)(`R$=tt&Py|zWVdmh$G}N#J50`sv@yZ zbj!wiLS9LfXen=uBc90IkTbBJw3c`$$!Tbp1KW}uoRY6}A(kOmBsY8Q#Fv>>uoGfG z+8ZY#1?*u!J;~|3VvRM6Tq7^(ptpC-JlF_(hQO*5$GagrPn9R*zTZZjC&M0y_3n*v zodEkE`fJ^3B1O*Iwx$}r|!60+Qd6A9yN~ zp9@pC8HGh2SLkx4(qWFL2=9tXnd^dJNj6-su9J1rx zD$2~&Ddl9(bIFp0ZdsWu9{ueqp+2 z-i3AK(JSJ;dpp@nl^#hJ;#8nQUP?3b;;~1PbC^(=l!aqdvW=Xaipf{1_mIUUm#DyB z4i0f~_)ST{p$vsJ=Q9H2j|d^>t?Dto-IKXd#ZggG23<+>tTb<_gfxG96of=lDvB>a z++klOepgU9GZaV_@dc!1X-$7&iuaJGso`pQ8Klwwjy#M(S|)i9hXyhQ?|G4w33JI; zQVBV#;vz9oPFRS8W6k`@6*-T>5=A1J+&hybEoo1^)hE|mDkhEop-&>73s~x@FdRJ$y-QHR4aM(&X)lZ!`vE-RkOEVsLzW2{39RHb?!GBu+ EFK+URrvLx| literal 0 HcmV?d00001 diff --git a/tests/aequilibrae/project/data/wynnum.parquet b/tests/aequilibrae/project/data/wynnum.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9c9eeb46c11259cc0352d909f48fb7536c569a0c GIT binary patch literal 1626 zcmbVN%WvaE7`NTDDo`P`s-r}zP_VQSNNH+2w)5CsX(xRo&c>PsX+5FAa=x9DIDE9o_QZA&6vCAkpEf$g~n1z(CJDI)sD7QGP` z1H@W5L>e>|q=Hs(7UiSsHv!=oqzKRy0V(l9;N?4#{+_&s>~0=|mRuoM|7gHw73TYI zZoK{NE86|vzRLg8I=gfEsqofk-+=t%m(f3e@a|lG@%87A1B`Zm=bcYJ`UT`k`SRs8 zkpEuQe@%h^)A3KY0}xNP@m}DjkM+K84|Qkk`R#m21wW>uKdnT6Ctv%2BUJdVNQBlV z+KE2w(gA|rxJ>~Ow~68wAo$WU9TB9$TjAKVQ5Jp13WM*i1gYqH@Pn02f(YLYz0mJ< z&CH_S(9Sud+2(X;O_mEc%W5M$eXGA<+~mIWOrrcb!+z2a`drg+S;jWOmx7#F6t3x0XTWclx@@NLz~Bb_Zz5M zJ#1dH_w(uw9%ubsgir-09uE)_P_~Z_5bC4dzOTz>|6_Nta(`*hyI{sL4D7#)mU60` z+}UycB#Ljxp}C%^x_ca!b+5p8T7`MHnV!l6+fZtPTO1&#J#4V;ns6o?uA$`%Bei+p z>5cS2<APN!A%^qM@8hozaCZ#b%@7>Cxx=QKY6tr{yE#odC@9;O9(FjAHJ(Hw9&W!_FG10*Q(eYaGvaK&BLD|rH2nzvnLPxF+$ zeeST{cS{3#auvfXa~bSNDAi?G8JzOV7}zUyjaLT!gfd5bX@1ZH%$YVcP1UNN^m67* zHDn3b-fN01@Gwww=|MNq8)^IzuJ5MDMw-=ISyS_5sp%Cez=PP~fisg!zusaol`=vu zCLUIwZ&(pNL>Lp8S*w#fm>gQAK{wZM+7&dz7U2JiQ*jPFB<10PuU_7CJ2j}yEU{4U zwBrHCR&UhFS8X^auT*a=a52=re|7xx=gM%O@EKPszA}$NmRAd34H)?RdCu(R3nCVg zGKZj41!XJPo~F1NW;yaiWC;VbXmBSKtm4;3Sccrwn% uBGU={I3uI(`0-SCwzbh{yE`7+Zsh6>h$Qg~5RSo%habI7c)rW Date: Fri, 16 Feb 2024 10:25:47 +1000 Subject: [PATCH 25/32] adds network cleaning --- aequilibrae/project/network/network.py | 7 ++++++- .../project/network/osm/osm_builder.py | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py index 07e03ee87..68c0a45fa 100644 --- a/aequilibrae/project/network/network.py +++ b/aequilibrae/project/network/network.py @@ -126,6 +126,7 @@ def create_from_osm( model_area: Optional[Polygon] = None, place_name: Optional[str] = None, modes=("car", "transit", "bicycle", "walk"), + clean=True, ) -> None: """ Downloads the network from Open-Street Maps @@ -141,6 +142,9 @@ def create_from_osm( **modes** (:obj:`tuple`, Optional): List of all modes to be downloaded. Defaults to the modes in the parameter file + **clean** (:obj:`bool`, Optional): Keeps only the links that intersects the model area polygon. Defaults to + True. Does not apply to networks downloaded with a place name + .. code-block:: python >>> from aequilibrae import Project @@ -191,6 +195,7 @@ def create_from_osm( raise ValueError("Coordinates out of bounds. Polygon must be in WGS84") west, south, east, north = model_area.bounds else: + clean = False bbox, report = placegetter(place_name) if bbox is None: msg = f'We could not find a reference for place name "{place_name}"' @@ -236,7 +241,7 @@ def create_from_osm( dwnloader.doWork() self.logger.info("Building Network") - self.builder = OSMBuilder(dwnloader.data, project=self.project, model_area=model_area) + self.builder = OSMBuilder(dwnloader.data, project=self.project, model_area=model_area, clean=clean) if pyqt: self.builder.building.connect(self.signal_handler) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 06d586a32..959275210 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -1,3 +1,4 @@ +import geopandas as gpd import gc import importlib.util as iutil import string @@ -27,7 +28,7 @@ class OSMBuilder(WorkerThread): if pyqt: building = pyqtSignal(object) - def __init__(self, data, project, model_area: Polygon) -> None: + def __init__(self, data, project, model_area: Polygon, clean: bool) -> None: WorkerThread.__init__(self, None) project.logger.info("Preparing OSM builder") @@ -38,6 +39,7 @@ def __init__(self, data, project, model_area: Polygon) -> None: self.model_area = geometry_grid(model_area, 4326) self.path = self.project.path_to_file self.node_start = 10000 + self.clean = clean self.report = [] self.__all_ltp = pd.DataFrame([]) self.__link_id = 1 @@ -62,7 +64,9 @@ def doWork(self): conn.execute( "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)" ) - conn.execute("VACUUM;") + conn.commit() + self.__do_clean(conn) + self.__emit_all(["finished_threaded_procedure", 0]) def importing_network(self, conn): @@ -170,6 +174,17 @@ def _build_geometry(self, nodes: List[int]) -> str: txt = ",".join((slice.lon.astype(str) + " " + slice.lat.astype(str)).tolist()) return f"LINESTRING({txt})" + def __do_clean(self, conn): + if not self.clean: + conn.execute("VACUUM;") + return + self.logger("Cleaning up the network down to the selected area") + links = gpd.GeoDataFrame.from_postgis("SELECT link_id, asBinary(geometry) AS geom FROM links", conn, crs=4326) + links_left = [[x] for x in links[~links.link_id.isin(links.clip(self.model_area).link_id)].link_id] + conn.executemany("DELETE FROM links WHERE link_id = ?", links_left) + conn.commit() + conn.execute("VACUUM;") + def __process_link_chunk(self): self.logger.info("Processing link modes, types and fields") self.__emit_all(["text", "Processing link modes, types and fields"]) From 3580c14dc4e8c1dc160771f9e1fe93edcc032af3 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Fri, 16 Feb 2024 10:50:03 +1000 Subject: [PATCH 26/32] adds network cleaning --- aequilibrae/project/network/osm/osm_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index 959275210..a71422683 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -178,7 +178,7 @@ def __do_clean(self, conn): if not self.clean: conn.execute("VACUUM;") return - self.logger("Cleaning up the network down to the selected area") + self.logger.info("Cleaning up the network down to the selected area") links = gpd.GeoDataFrame.from_postgis("SELECT link_id, asBinary(geometry) AS geom FROM links", conn, crs=4326) links_left = [[x] for x in links[~links.link_id.isin(links.clip(self.model_area).link_id)].link_id] conn.executemany("DELETE FROM links WHERE link_id = ?", links_left) From 12e40284505a312629bf9f387d13cb03f2cc4f91 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 19 Feb 2024 00:15:01 +1000 Subject: [PATCH 27/32] Allows for use of polygons with rings inside --- aequilibrae/project/network/osm/model_area_gridding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aequilibrae/project/network/osm/model_area_gridding.py b/aequilibrae/project/network/osm/model_area_gridding.py index 3a6b636d5..bcaa1a6a3 100644 --- a/aequilibrae/project/network/osm/model_area_gridding.py +++ b/aequilibrae/project/network/osm/model_area_gridding.py @@ -9,7 +9,7 @@ def geometry_grid(model_area, srid) -> gpd.GeoDataFrame: minx, miny, maxx, maxy = model_area.bounds # Some rough heuristic to get the number of points per sub-polygon in the 2 digits range - subd = ceil((len(model_area.boundary.coords) / 32) ** 0.5) + subd = ceil((len(model_area.exterior.coords) / 32) ** 0.5) dx = (maxx - minx) / subd dy = (maxy - miny) / subd elements = [] From c05bb9fd50ba54e040c5ae8b31ec32629a41b792 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 19 Feb 2024 00:39:49 +1000 Subject: [PATCH 28/32] Allows for use of polygons with rings inside --- aequilibrae/project/network/osm/osm_builder.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index a71422683..c4e919963 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -180,8 +180,9 @@ def __do_clean(self, conn): return self.logger.info("Cleaning up the network down to the selected area") links = gpd.GeoDataFrame.from_postgis("SELECT link_id, asBinary(geometry) AS geom FROM links", conn, crs=4326) - links_left = [[x] for x in links[~links.link_id.isin(links.clip(self.model_area).link_id)].link_id] - conn.executemany("DELETE FROM links WHERE link_id = ?", links_left) + existing_link_ids = gpd.sjoin(links, self.model_area, how="left").dropna().link_id.to_numpy() + to_delete = [[x] for x in links[~links.link_id.isin(existing_link_ids)].link_id] + conn.executemany("DELETE FROM links WHERE link_id = ?", to_delete) conn.commit() conn.execute("VACUUM;") From 40c128eba65c08d456202ba015fd143e893c773f Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Mon, 19 Feb 2024 01:47:17 +1000 Subject: [PATCH 29/32] adjusts types --- aequilibrae/paths/graph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index 26e37d13c..2a75572c1 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -237,9 +237,9 @@ def _build_directed_graph(self, network: pd.DataFrame, centroids: np.ndarray): if nans: self.logger.warning(f"Field(s) {nans} has(ve) at least one NaN value. Check your computations") - df.loc[:, "b_node"] = df.b_node.values.astype(self.__integer_type) - df.loc[:, "id"] = df.id.values.astype(self.__integer_type) - df.loc[:, "link_id"] = df.link_id.values.astype(self.__integer_type) + df["link_id"] = df["link_id"].astype(self.__integer_type) + df["b_node"] = df.b_node.values.astype(self.__integer_type) + df["id"] = df.id.values.astype(self.__integer_type) df["direction"] = df.direction.values.astype(np.int8) return all_nodes, num_nodes, nodes_to_indices, fs, df From aacdfcb1328f33a772371e701c3553eee03c712c Mon Sep 17 00:00:00 2001 From: Pedro Camargo Date: Fri, 23 Feb 2024 11:50:14 +1000 Subject: [PATCH 30/32] Update pyproject.toml Co-authored-by: Jake Moss --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 509bb0922..ff2ab1ff1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ extend-exclude = '''docs/*''' [build-system] -requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel", "geopandas"] +requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel"] [tool.ruff] From 6e897e187bef619bab8f7d13076c47e1c506ecdc Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Fri, 23 Feb 2024 11:52:52 +1000 Subject: [PATCH 31/32] . --- aequilibrae/project/network/osm/osm_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index c4e919963..f4aece221 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -232,7 +232,7 @@ def __build_link_types(self, df): df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") return df.drop(columns=["highway"]) - def __define_link_type(self, link_type: str) -> [str, str]: + def __define_link_type(self, link_type: str) -> tuple[str, str]: proj_link_types = self.project.network.link_types original_link_type = link_type link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower() From 1b8268e122acc49bf800f488077b5450b017aa3b Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Fri, 23 Feb 2024 12:16:26 +1000 Subject: [PATCH 32/32] . --- aequilibrae/project/network/osm/osm_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index f4aece221..af14827cb 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -4,7 +4,7 @@ import string from math import floor from pathlib import Path -from typing import List +from typing import List, Tuple import numpy as np import pandas as pd @@ -232,7 +232,7 @@ def __build_link_types(self, df): df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") return df.drop(columns=["highway"]) - def __define_link_type(self, link_type: str) -> tuple[str, str]: + def __define_link_type(self, link_type: str) -> Tuple[str, str]: proj_link_types = self.project.network.link_types original_link_type = link_type link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower()