diff --git a/aequilibrae/parameters.yml b/aequilibrae/parameters.yml index d7fe1be3e..9529aa360 100644 --- a/aequilibrae/parameters.yml +++ b/aequilibrae/parameters.yml @@ -42,30 +42,6 @@ network: description: name osm_source: name type: text - - cycleway: - description: cycleway, both way - osm_source: cycleway - type: text - - cycleway_right: - description: cycleway, right - osm_source: cycleway:right - type: text - - cycleway_left: - description: cycleway, left - osm_source: cycleway:left - type: text - - busway: - description: busway - osm_source: busway - type: text - - busway_right: - description: busway, right - osm_source: busway:right - type: text - - busway_left: - description: busway, left - osm_source: busway:left - type: text two-way: - lanes: description: lanes diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index 26e37d13c..2a75572c1 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -237,9 +237,9 @@ def _build_directed_graph(self, network: pd.DataFrame, centroids: np.ndarray): if nans: self.logger.warning(f"Field(s) {nans} has(ve) at least one NaN value. Check your computations") - df.loc[:, "b_node"] = df.b_node.values.astype(self.__integer_type) - df.loc[:, "id"] = df.id.values.astype(self.__integer_type) - df.loc[:, "link_id"] = df.link_id.values.astype(self.__integer_type) + df["link_id"] = df["link_id"].astype(self.__integer_type) + df["b_node"] = df.b_node.values.astype(self.__integer_type) + df["id"] = df.id.values.astype(self.__integer_type) df["direction"] = df.direction.values.astype(np.int8) return all_nodes, num_nodes, nodes_to_indices, fs, df diff --git a/aequilibrae/project/network/link_types.py b/aequilibrae/project/network/link_types.py index 05b3524ae..df3f91140 100644 --- a/aequilibrae/project/network/link_types.py +++ b/aequilibrae/project/network/link_types.py @@ -1,6 +1,7 @@ -from sqlite3 import IntegrityError, Connection -from aequilibrae.project.network.link_type import LinkType +from sqlite3 import IntegrityError + from aequilibrae.project.field_editor import FieldEditor +from aequilibrae.project.network.link_type import LinkType from aequilibrae.project.table_loader import TableLoader from aequilibrae.utils.db_utils import commit_and_close from aequilibrae.utils.spatialite_utils import connect_spatialite @@ -84,7 +85,6 @@ def new(self, link_type_id: str) -> LinkType: tp["link_type_id"] = link_type_id lt = LinkType(tp, self.project) self.__items[link_type_id] = lt - self.logger.warning("Link type has not yet been saved to the database. Do so explicitly") return lt def delete(self, link_type_id: str) -> None: diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py index b9d5f8115..68c0a45fa 100644 --- a/aequilibrae/project/network/network.py +++ b/aequilibrae/project/network/network.py @@ -126,6 +126,7 @@ def create_from_osm( model_area: Optional[Polygon] = None, place_name: Optional[str] = None, modes=("car", "transit", "bicycle", "walk"), + clean=True, ) -> None: """ Downloads the network from Open-Street Maps @@ -141,6 +142,9 @@ def create_from_osm( **modes** (:obj:`tuple`, Optional): List of all modes to be downloaded. Defaults to the modes in the parameter file + **clean** (:obj:`bool`, Optional): Keeps only the links that intersects the model area polygon. Defaults to + True. Does not apply to networks downloaded with a place name + .. code-block:: python >>> from aequilibrae import Project @@ -191,6 +195,7 @@ def create_from_osm( raise ValueError("Coordinates out of bounds. Polygon must be in WGS84") west, south, east, north = model_area.bounds else: + clean = False bbox, report = placegetter(place_name) if bbox is None: msg = f'We could not find a reference for place name "{place_name}"' @@ -229,14 +234,14 @@ def create_from_osm( if subarea.intersects(model_area): polygons.append(subarea) self.logger.info("Downloading data") - self.downloader = OSMDownloader(polygons, modes, logger=self.logger) + dwnloader = OSMDownloader(polygons, modes, logger=self.logger) if pyqt: - self.downloader.downloading.connect(self.signal_handler) + dwnloader.downloading.connect(self.signal_handler) - self.downloader.doWork() + dwnloader.doWork() self.logger.info("Building Network") - self.builder = OSMBuilder(self.downloader.json, project=self.project, model_area=model_area) + self.builder = OSMBuilder(dwnloader.data, project=self.project, model_area=model_area, clean=clean) if pyqt: self.builder.building.connect(self.signal_handler) diff --git a/aequilibrae/project/network/osm/model_area_gridding.py b/aequilibrae/project/network/osm/model_area_gridding.py new file mode 100644 index 000000000..bcaa1a6a3 --- /dev/null +++ b/aequilibrae/project/network/osm/model_area_gridding.py @@ -0,0 +1,26 @@ +# Inspired by https://www.matecdev.com/posts/shapely-polygon-gridding.html +from math import ceil + +import geopandas as gpd +import numpy as np +from shapely.geometry import Polygon + + +def geometry_grid(model_area, srid) -> gpd.GeoDataFrame: + minx, miny, maxx, maxy = model_area.bounds + # Some rough heuristic to get the number of points per sub-polygon in the 2 digits range + subd = ceil((len(model_area.exterior.coords) / 32) ** 0.5) + dx = (maxx - minx) / subd + dy = (maxy - miny) / subd + elements = [] + x1 = minx + for i in range(subd): + j1 = miny + for j in range(subd): + elements.append(Polygon([[x1, j1], [x1, j1 + dy], [x1 + dx, j1 + dy], [x1 + dx, j1]])) + j1 += dy + x1 += dx + + gdf = gpd.GeoDataFrame({"id": np.arange(len(elements))}, geometry=elements, crs=srid) + + return gdf.clip(model_area) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index ecc10c222..af14827cb 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -1,50 +1,55 @@ +import geopandas as gpd import gc import importlib.util as iutil import string -from typing import List +from math import floor +from pathlib import Path +from typing import List, Tuple import numpy as np import pandas as pd -from shapely import Point +from pandas import json_normalize from shapely.geometry import Polygon from aequilibrae.context import get_active_project from aequilibrae.parameters import Parameters -from aequilibrae.project.network.haversine import haversine -from aequilibrae.project.network.link_types import LinkTypes +from aequilibrae.project.project_creation import remove_triggers, add_triggers from aequilibrae.utils import WorkerThread -from aequilibrae.utils.db_utils import commit_and_close +from aequilibrae.utils.db_utils import commit_and_close, read_and_close, list_columns from aequilibrae.utils.spatialite_utils import connect_spatialite +from .model_area_gridding import geometry_grid pyqt = iutil.find_spec("PyQt5") is not None if pyqt: from PyQt5.QtCore import pyqtSignal -if iutil.find_spec("qgis") is not None: - pass - class OSMBuilder(WorkerThread): if pyqt: building = pyqtSignal(object) - def __init__(self, osm_items: List, project, model_area: Polygon) -> None: + def __init__(self, data, project, model_area: Polygon, clean: bool) -> None: WorkerThread.__init__(self, None) + + project.logger.info("Preparing OSM builder") + self.__emit_all(["text", "Preparing OSM builder"]) + self.project = project or get_active_project() self.logger = self.project.logger - self.osm_items = osm_items - self.model_area = model_area + self.model_area = geometry_grid(model_area, 4326) self.path = self.project.path_to_file self.node_start = 10000 - self.__link_types = None # type: LinkTypes + self.clean = clean self.report = [] - self.__model_link_types = [] - self.__model_link_type_ids = [] - self.__link_type_quick_reference = {} - self.nodes = {} - self.node_df = [] - self.links = {} - self.insert_qry = """INSERT INTO {} ({}, geometry) VALUES({}, GeomFromText(?, 4326))""" + self.__all_ltp = pd.DataFrame([]) + self.__link_id = 1 + self.__valid_links = [] + + # Building shapely geometries makes the code surprisingly slower. + self.node_df = data["nodes"] + self.node_df.loc[:, "node_id"] = np.arange(self.node_start, self.node_start + self.node_df.shape[0]) + gc.collect() + self.links_df = data["links"] def __emit_all(self, *args): if pyqt: @@ -52,212 +57,183 @@ def __emit_all(self, *args): def doWork(self): with commit_and_close(connect_spatialite(self.path)) as conn: - self.__worksetup() - node_count = self.data_structures() - self.importing_links(node_count, conn) + self.__update_table_structure(conn) + self.importing_network(conn) + + self.logger.info("Cleaning things up") conn.execute( "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)" ) + conn.commit() + self.__do_clean(conn) + self.__emit_all(["finished_threaded_procedure", 0]) - def data_structures(self): - self.logger.info("Separating nodes and links") - self.__emit_all(["text", "Separating nodes and links"]) - self.__emit_all(["maxValue", len(self.osm_items)]) - - alinks = [] - n = [] - tot_items = len(self.osm_items) - # When downloading data for entire countries, memory consumption can be quite intensive - # So we get rid of everything we don't need - for i in range(tot_items, 0, -1): - item = self.osm_items.pop(-1) - if item["type"] == "way": - alinks.append(item) - elif item["type"] == "node": - n.append(item) - self.__emit_all(["Value", tot_items - i]) - gc.collect() + def importing_network(self, conn): + self.logger.info("Importing the network") + node_count = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count() - self.logger.info("Setting data structures for nodes") - self.__emit_all(["text", "Setting data structures for nodes"]) - self.__emit_all(["maxValue", len(n)]) - - self.node_df = [] - for i, node in enumerate(n): - nid = node.pop("id") - _ = node.pop("type") - node["node_id"] = i + self.node_start - node["inside_model"] = self.model_area.contains(Point(node["lon"], node["lat"])) - self.nodes[nid] = node - self.node_df.append([node["node_id"], nid, node["lon"], node["lat"]]) - self.__emit_all(["Value", i]) - del n - self.node_df = ( - pd.DataFrame(self.node_df, columns=["A", "B", "C", "D"]) - .drop_duplicates(subset=["C", "D"]) - .to_records(index=False) - ) - - self.logger.info("Setting data structures for links") - self.__emit_all(["text", "Setting data structures for links"]) - self.__emit_all(["maxValue", len(alinks)]) - - all_nodes = [] - for i, link in enumerate(alinks): - osm_id = link.pop("id") - _ = link.pop("type") - all_nodes.extend(link["nodes"]) - self.links[osm_id] = link - self.__emit_all(["Value", i]) - del alinks - - self.logger.info("Finalizing data structures") - self.__emit_all(["text", "Finalizing data structures"]) - - node_count = self.unique_count(np.array(all_nodes)) - - return node_count - - def importing_links(self, node_count, conn): - node_ids = {} - - vars = {} - vars["link_id"] = 1 - table = "links" - fields = self.get_link_fields() - self.__update_table_structure(conn) - field_names = ",".join(fields) - - self.logger.info("Adding network nodes") - self.__emit_all(["text", "Adding network nodes"]) - sql = "insert into nodes(node_id, is_centroid, osm_id, geometry) Values(?, 0, ?, MakePoint(?,?, 4326))" - conn.executemany(sql, self.node_df) - conn.commit() - del self.node_df + self.node_df.osm_id = self.node_df.osm_id.astype(np.int64) + self.node_df.set_index(["osm_id"], inplace=True) + + self.__process_link_chunk() + shape_ = self.links_df.shape[0] + message_step = max(1, floor(shape_ / 100)) + self.__emit_all(["maxValue", shape_]) - self.logger.info("Adding network links") + self.logger.info("Geo-procesing links") self.__emit_all(["text", "Adding network links"]) - L = len(list(self.links.keys())) - self.__emit_all(["maxValue", L]) - - counter = 0 - mode_codes, not_found_tags = self.modes_per_link_type(conn) - owf, twf = self.field_osm_source() - all_attrs = [] - all_osm_ids = list(self.links.keys()) - for osm_id in all_osm_ids: - link = self.links.pop(osm_id) + geometries = [] + self.links_df.set_index(["osm_id"], inplace=True) + for counter, (idx, link) in enumerate(self.links_df.iterrows()): self.__emit_all(["Value", counter]) - counter += 1 - if counter % 1000 == 0: - self.logger.info(f"Creating segments from {counter:,} out of {L:,} OSM link objects") - vars["osm_id"] = osm_id - vars["link_type"] = "default" - linknodes = link["nodes"] - linktags = link["tags"] - - indices = np.searchsorted(node_count[:, 0], linknodes) - nodedegree = node_count[indices, 1] - - # Makes sure that beginning and end are end nodes for a link - nodedegree[0] = 2 - nodedegree[-1] = 2 - - intersections = np.where(nodedegree > 1)[0] - segments = intersections.shape[0] - 1 - - # Attributes that are common to all individual links/segments - vars["direction"] = (linktags.get("oneway") == "yes") * 1 - - for k, v in owf.items(): - vars[k] = linktags.get(v) - - for k, v in twf.items(): - val = linktags.get(v["osm_source"]) - if vars["direction"] == 0: - for d1, d2 in [("ab", "forward"), ("ba", "backward")]: - vars[f"{k}_{d1}"] = self.__get_link_property(d2, val, linktags, v) - elif vars["direction"] == -1: - vars[f"{k}_ba"] = linktags.get(f"{v['osm_source']}:{'backward'}", val) - elif vars["direction"] == 1: - vars[f"{k}_ab"] = linktags.get(f"{v['osm_source']}:{'forward'}", val) - - vars["modes"] = mode_codes.get(linktags.get("highway"), not_found_tags) - - vars["link_type"] = self.__link_type_quick_reference.get( - vars["link_type"].lower(), self.__repair_link_type(vars["link_type"]) - ) + if counter % message_step == 0: + self.logger.info(f"Creating segments from {counter:,} out of {shape_ :,} OSM link objects") + + # How can I link have less than two points? + if not isinstance(link["nodes"], list): + self.logger.debug(f"OSM link/feature {idx} does not have a list of nodes.") + continue + + if len(link["nodes"]) < 2: + self.logger.debug(f"Link {idx} has less than two nodes. {link.nodes}") + continue + + # The link is a straight line between two points + # Or all midpoints are only part of a single link + node_indices = node_count.loc[link["nodes"], "counter"].to_numpy() + if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1: + # The link has no intersections + geometries.append([idx, self._build_geometry(link.nodes)]) + else: + # Make sure we get the first and last nodes, as they are certainly the extremities of the sublinks + node_indices[0] = 2 + node_indices[-1] = 2 + # The link has intersections + # We build repeated records for links when they have intersections + # This is because it is faster to do this way and then have all the data repeated + # when doing the join with the link fields below + intersecs = np.where(node_indices > 1)[0] + for i, j in zip(intersecs[:-1], intersecs[1:]): + geometries.append([idx, self._build_geometry(link.nodes[i : j + 1])]) + + # Builds the link Geo dataframe + self.links_df.drop(columns=["nodes"], inplace=True) + # We build a dataframe with the geometries created above + # and join with the database + geo_df = pd.DataFrame(geometries, columns=["link_id", "geometry"]).set_index("link_id") + self.links_df = self.links_df.join(geo_df, how="inner") + + self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1 + + self.node_df = self.node_df.reset_index() + + # Saves the data to disk in case of issues loading it to the database + osm_data_path = Path(self.project.project_base_path) / "osm_data" + osm_data_path.mkdir(exist_ok=True) + self.links_df.to_parquet(osm_data_path / "links.parquet") + self.node_df.to_parquet(osm_data_path / "nodes.parquet") + + self.logger.info("Adding nodes to file") + self.__emit_all(["text", "Adding nodes to file"]) + + # Removing the triggers before adding all nodes makes things a LOT faster + remove_triggers(conn, self.logger, "network") + + cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types", "lon", "lat"] + insert_qry = f"INSERT INTO nodes ({','.join(cols[:-2])}, geometry) VALUES(?,?,?,?,?, MakePoint(?,?, 4326))" + conn.executemany(insert_qry, self.node_df[cols].to_records(index=False)) - if len(vars["modes"]) > 0: - for i in range(segments): - attributes = self.__build_link_data(vars, intersections, i, linknodes, node_ids, fields) - if attributes is None: - continue - all_attrs.append(attributes) - vars["link_id"] += 1 - - self.__emit_all(["text", f"{counter:,} of {L:,} super links added"]) - self.links[osm_id] = [] - sql = self.insert_qry.format(table, field_names, ",".join(["?"] * (len(all_attrs[0]) - 1))) - self.logger.info("Adding network links") - self.__emit_all(["text", "Adding network links"]) - try: - conn.executemany(sql, all_attrs) - except Exception as e: - self.logger.error("error when inserting link {}. Error {}".format(all_attrs[0], e.args)) - self.logger.error(sql) - raise e - - def __worksetup(self): - self.__link_types = self.project.network.link_types - lts = self.__link_types.all_types() - for lt_id, lt in lts.items(): - self.__model_link_types.append(lt.link_type) - self.__model_link_type_ids.append(lt_id) + del self.node_df + gc.collect() - def __update_table_structure(self, conn): - structure = conn.execute("pragma table_info(Links)").fetchall() - has_fields = [x[1].lower() for x in structure] - fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"] - for field in [f for f in fields if f not in has_fields]: - ltype = self.get_link_field_type(field).upper() - conn.execute(f"Alter table Links add column {field} {ltype}") + # But we need to add them back to add the links + add_triggers(conn, self.logger, "network") + + # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a") + + # I could not get the above line to work, so I used the following code instead + self.links_df.index.name = "osm_id" + self.links_df.reset_index(inplace=True) + insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromText(?, 4326))" + cols_no_geo = self.links_df.columns.tolist() + cols_no_geo.remove("geometry") + insert_qry = insert_qry.format(", ".join(cols_no_geo), ", ".join(["?"] * len(cols_no_geo))) + + cols = cols_no_geo + ["geometry"] + links_df = self.links_df[cols].to_records(index=False) + + del self.links_df + gc.collect() + self.logger.info("Adding links to file") + self.__emit_all(["text", "Adding links to file"]) + conn.executemany(insert_qry, links_df) + + def _build_geometry(self, nodes: List[int]) -> str: + slice = self.node_df.loc[nodes, :] + txt = ",".join((slice.lon.astype(str) + " " + slice.lat.astype(str)).tolist()) + return f"LINESTRING({txt})" + + def __do_clean(self, conn): + if not self.clean: + conn.execute("VACUUM;") + return + self.logger.info("Cleaning up the network down to the selected area") + links = gpd.GeoDataFrame.from_postgis("SELECT link_id, asBinary(geometry) AS geom FROM links", conn, crs=4326) + existing_link_ids = gpd.sjoin(links, self.model_area, how="left").dropna().link_id.to_numpy() + to_delete = [[x] for x in links[~links.link_id.isin(existing_link_ids)].link_id] + conn.executemany("DELETE FROM links WHERE link_id = ?", to_delete) conn.commit() + conn.execute("VACUUM;") + + def __process_link_chunk(self): + self.logger.info("Processing link modes, types and fields") + self.__emit_all(["text", "Processing link modes, types and fields"]) + + # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk + # And let's also assume that each row will be 200 fields at 8 bytes each + # This makes 2Gb roughly equal to 2.6 million rows, so 2 million would so. + chunk_size = 1_000_000 + list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] + self.links_df = pd.DataFrame([]) + # Initialize link types + with read_and_close(self.project.path_to_file) as conn: + self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn) + self.__emit_all(["maxValue", len(list_dfs)]) + for i, df in enumerate(list_dfs): + self.logger.info(f"Processing chunk {i + 1}/{len(list_dfs)}") + self.__emit_all(["Value", i]) + if "tags" in df.columns: + # It is critical to reset the index for the concat below to work + df.reset_index(drop=True, inplace=True) + df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) + df.columns = [x.replace(":", "_") for x in df.columns] + df = self.__build_link_types(df) + df = self.__establish_modes_for_all_links(conn, df) + df = self.__process_link_attributes(df) + else: + self.logger.error("OSM link data does not have tags. Skipping an entire data chunk") + df = pd.DataFrame([]) + list_dfs[i] = df + self.links_df = pd.concat(list_dfs, ignore_index=True) + + def __build_link_types(self, df): + data = [] + df = df.fillna(value={"highway": "missing"}) + df.highway = df.highway.str.lower() + for i, lt in enumerate(df.highway.unique()): + if str(lt) in self.__all_ltp.highway.values: + continue + data.append([*self.__define_link_type(str(lt)), str(lt)]) + self.__all_ltp = pd.concat( + [self.__all_ltp, pd.DataFrame(data, columns=["link_type_id", "link_type", "highway"])] + ) + self.__all_ltp.drop_duplicates(inplace=True) + df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") + return df.drop(columns=["highway"]) - def __build_link_data(self, vars, intersections, i, linknodes, node_ids, fields): - ii = intersections[i] - jj = intersections[i + 1] - all_nodes = [linknodes[x] for x in range(ii, jj + 1)] - - vars["a_node"] = node_ids.get(linknodes[ii], self.node_start) - if vars["a_node"] == self.node_start: - node_ids[linknodes[ii]] = vars["a_node"] - self.node_start += 1 - - vars["b_node"] = node_ids.get(linknodes[jj], self.node_start) - if vars["b_node"] == self.node_start: - node_ids[linknodes[jj]] = vars["b_node"] - self.node_start += 1 - - vars["distance"] = sum( - [ - haversine(self.nodes[x]["lon"], self.nodes[x]["lat"], self.nodes[y]["lon"], self.nodes[y]["lat"]) - for x, y in zip(all_nodes[1:], all_nodes[:-1]) - ] - ) - - geometry = ["{} {}".format(self.nodes[x]["lon"], self.nodes[x]["lat"]) for x in all_nodes] - inside_area = sum([self.nodes[x]["inside_model"] for x in all_nodes]) - if inside_area == 0: - return None - geometry = "LINESTRING ({})".format(", ".join(geometry)) - - attributes = [vars.get(x) for x in fields] - attributes.append(geometry) - return attributes - - def __repair_link_type(self, link_type: str) -> str: + def __define_link_type(self, link_type: str) -> Tuple[str, str]: + proj_link_types = self.project.network.link_types original_link_type = link_type link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower() @@ -266,53 +242,110 @@ def __repair_link_type(self, link_type: str) -> str: if piece in ["link", "segment", "stretch"]: link_type = "_".join(split[0 : i + 1]) + if self.__all_ltp.shape[0] >= 51: + link_type = "aggregate_link_type" + if len(link_type) == 0: link_type = "empty" - if len(self.__model_link_type_ids) >= 51 and link_type not in self.__model_link_types: - link_type = "aggregate_link_type" - - if link_type in self.__model_link_types: - lt = self.__link_types.get_by_name(link_type) + if link_type in self.__all_ltp.link_type.values: + lt = proj_link_types.get_by_name(link_type) if original_link_type not in lt.description: lt.description += f", {original_link_type}" lt.save() - self.__link_type_quick_reference[original_link_type.lower()] = link_type - return link_type + return [lt.link_type_id, link_type] letter = link_type[0] - if letter in self.__model_link_type_ids: + if letter in self.__all_ltp.link_type_id.values: letter = letter.upper() - if letter in self.__model_link_type_ids: + if letter in self.__all_ltp.link_type_id.values: for letter in string.ascii_letters: - if letter not in self.__model_link_type_ids: + if letter not in self.__all_ltp.link_type_id.values: break - lt = self.__link_types.new(letter) + lt = proj_link_types.new(letter) lt.link_type = link_type lt.description = f"Link types from Open Street Maps: {original_link_type}" lt.save() - self.__model_link_types.append(link_type) - self.__model_link_type_ids.append(letter) - self.__link_type_quick_reference[original_link_type.lower()] = link_type - return link_type + return [letter, link_type] - def __get_link_property(self, d2, val, linktags, v): - vald = linktags.get(f'{v["osm_source"]}:{d2}', val) - if vald is None: - return vald + def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFrame: + p = Parameters() + modes = p.parameters["network"]["osm"]["modes"] - if vald.isdigit(): - if vald == val and v["osm_behaviour"] == "divide": - vald = float(val) / 2 - return vald + mode_codes = conn.execute("SELECT mode_name, mode_id from modes").fetchall() + mode_codes = {p[0]: p[1] for p in mode_codes} - @staticmethod - def unique_count(a): - # From: https://stackoverflow.com/a/21124789/1480643 - unique, inverse = np.unique(a, return_inverse=True) - count = np.zeros(len(unique), int) - np.add.at(count, inverse, 1) - return np.vstack((unique, count)).T + type_list = {} + notfound = "" + for mode, val in modes.items(): + all_types = val["link_types"] + md = mode_codes[mode] + for tp in all_types: + type_list[tp] = "".join(sorted("{}{}".format(type_list.get(tp, ""), md))) + if val["unknown_tags"]: + notfound += md + + type_list = {k: "".join(set(v)) for k, v in type_list.items()} + + df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"]) + df = df.merge(df_aux, on="link_type", how="left").fillna(value={"modes": "".join(sorted(notfound))}) + return df + + def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: + df = df.assign(direction=0, link_id=0) + if "oneway" in df.columns: + df.loc[df.oneway == "yes", "direction"] = 1 + df.loc[df.oneway == "backward", "direction"] = -1 + p = Parameters() + fields = p.parameters["network"]["links"]["fields"] + + for x in fields["one-way"]: + if "link_type" in x.keys(): + continue + keys_ = list(x.values())[0] + field = list(x.keys())[0] + osm_name = keys_.get("osm_source", field).replace(":", "_") + df.rename(columns={osm_name: field}, inplace=True, errors="ignore") + + for x in fields["two-way"]: + keys_ = list(x.values())[0] + field = list(x.keys())[0] + if "osm_source" not in keys_: + continue + osm_name = keys_.get("osm_source", field).replace(":", "_") + if osm_name not in df.columns: + continue + df[f"{field}_ba"] = df[osm_name].copy() + df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore") + if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide": + df[f"{field}_ab"] = pd.to_numeric(df[f"{field}_ab"], errors="coerce") + df[f"{field}_ba"] = pd.to_numeric(df[f"{field}_ba"], errors="coerce") + + # Divides the values by 2 or zero them depending on the link direction + df.loc[df.direction == 0, f"{field}_ab"] /= 2 + df.loc[df.direction == -1, f"{field}_ab"] = 0 + + df.loc[df.direction == 0, f"{field}_ba"] /= 2 + df.loc[df.direction == 1, f"{field}_ba"] = 0 + + if f"{field}_forward" in df: + fld = pd.to_numeric(df[f"{field}_forward"], errors="coerce") + df.loc[fld > 0, f"{field}_ab"] = fld[fld > 0] + if f"{field}_backward" in df: + fld = pd.to_numeric(df[f"{field}_backward"], errors="coerce") + df.loc[fld > 0, f"{field}_ba"] = fld[fld > 0] + cols = list_columns(self.project.conn, "links") + ["nodes"] + return df[[x for x in cols if x in df.columns]] + + ######## TABLE STRUCTURE UPDATING ######## + def __update_table_structure(self, conn): + structure = conn.execute("pragma table_info(Links)").fetchall() + has_fields = [x[1].lower() for x in structure] + fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"] + for field in [f for f in fields if f not in has_fields]: + ltype = self.get_link_field_type(field).upper() + conn.execute(f"Alter table Links add column {field} {ltype}") + conn.commit() @staticmethod def get_link_fields(): @@ -339,51 +372,3 @@ def get_link_field_type(field_name): for tp in fields["one-way"]: if field_name in tp: return tp[field_name]["type"] - - @staticmethod - def field_osm_source(): - p = Parameters() - fields = p.parameters["network"]["links"]["fields"] - - owf = { - list(x.keys())[0]: x[list(x.keys())[0]]["osm_source"] - for x in fields["one-way"] - if "osm_source" in x[list(x.keys())[0]] - } - - twf = {} - for x in fields["two-way"]: - if "osm_source" in x[list(x.keys())[0]]: - twf[list(x.keys())[0]] = { - "osm_source": x[list(x.keys())[0]]["osm_source"], - "osm_behaviour": x[list(x.keys())[0]]["osm_behaviour"], - } - return owf, twf - - def modes_per_link_type(self, conn): - p = Parameters() - modes = p.parameters["network"]["osm"]["modes"] - - mode_codes = conn.execute("SELECT mode_name, mode_id from modes").fetchall() - mode_codes = {p[0]: p[1] for p in mode_codes} - - type_list = {} - notfound = "" - for mode, val in modes.items(): - all_types = val["link_types"] - md = mode_codes[mode] - for tp in all_types: - type_list[tp] = "{}{}".format(type_list.get(tp, ""), md) - if val["unknown_tags"]: - notfound += md - - type_list = {k: "".join(set(v)) for k, v in type_list.items()} - - return type_list, "{}".format(notfound) - - @staticmethod - def get_node_fields(): - p = Parameters() - fields = p.parameters["network"]["nodes"]["fields"] - fields = [list(x.keys())[0] for x in fields] - return fields + ["osm_id"] diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py index 76c810b1e..58b3fd6c9 100644 --- a/aequilibrae/project/network/osm/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -10,20 +10,21 @@ For the original work, please see https://github.com/gboeing/osmnx """ +import gc +import importlib.util as iutil import logging -import time import re -from typing import List +import time +from typing import List, Dict +import pandas as pd import requests from shapely import Polygon -from .osm_params import http_headers, memory -from aequilibrae.parameters import Parameters from aequilibrae.context import get_logger +from aequilibrae.parameters import Parameters from aequilibrae.utils import WorkerThread -import gc -import importlib.util as iutil +from .osm_params import http_headers, memory spec = iutil.find_spec("PyQt5") pyqt = spec is not None @@ -50,6 +51,9 @@ def __init__(self, polygons: List[Polygon], modes, logger: logging.Logger = None self.overpass_endpoint = par["overpass_endpoint"] self.timeout = par["timeout"] self.sleeptime = par["sleeptime"] + self._nodes = [] + self._links = [] + self.data: Dict[str, pd.DataFrame] = {"nodes": pd.DataFrame([]), "links": pd.DataFrame([])} def doWork(self): infrastructure = 'way["highway"]' @@ -64,7 +68,7 @@ def doWork(self): m = f"[maxsize: {memory}]" for counter, poly in enumerate(self.polygons): msg = f"Downloading polygon {counter + 1} of {len(self.polygons)}" - self.logger.debug(msg) + self.logger.info(msg) self.__emit_all(["Value", counter]) self.__emit_all(["text", msg]) west, south, east, north = poly.bounds @@ -80,10 +84,26 @@ def doWork(self): ) json = self.overpass_request(data={"data": query_str}, timeout=self.timeout) if json["elements"]: - self.json.extend(json["elements"]) - del json - gc.collect() + for tag, lst in [("node", self._nodes), ("way", self._links)]: + df = pd.DataFrame([item for item in json["elements"] if item["type"] == tag]) + if tag == "node": + df = df.assign(is_centroid=0, modes="", link_types="", node_id=0) + lst.append(df) + del json + gc.collect() + self.__emit_all(["Value", len(self.polygons)]) + self.__emit_all(["text", "Downloading finished. Processing data"]) + for lst, table in [(self._links, "links"), (self._nodes, "nodes")]: + df = pd.DataFrame([]) + if len(lst) > 0: + df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"]) + if table != "links": + df = df.drop(columns=["tags"], errors="ignore") + self.data[table] = df.rename(columns={"id": "osm_id"}, errors="ignore") + lst.clear() + gc.collect() + self.__emit_all(["FinishedDownloading", 0]) def overpass_request(self, data, pause_duration=None, timeout=180, error_pause_duration=None): diff --git a/aequilibrae/transit/map_matching_graph.py b/aequilibrae/transit/map_matching_graph.py index a1ab70418..56e5610d8 100644 --- a/aequilibrae/transit/map_matching_graph.py +++ b/aequilibrae/transit/map_matching_graph.py @@ -118,8 +118,7 @@ def __build_graph_from_cache(self): def __build_graph_from_scratch(self): self.logger.info(f"Creating map-matching graph from scratch for mode_id={self.mode_id}") - self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=np.nan) - self.df.loc[:, "geo"] = self.df.wkt.apply(shapely.wkt.loads) + self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=self.df.wkt.apply(shapely.wkt.loads)) self.df.loc[self.df.link_id < 0, "link_id"] = self.df.link_id * -1 + self.df.link_id.max() + 1 # We make sure all link IDs are in proper order diff --git a/aequilibrae/utils/db_utils.py b/aequilibrae/utils/db_utils.py index 23534138f..83efb49a2 100644 --- a/aequilibrae/utils/db_utils.py +++ b/aequilibrae/utils/db_utils.py @@ -79,6 +79,10 @@ def get_schema(conn, table_name): return {e.name: e for e in rv} +def list_columns(conn, table_name): + return list(get_schema(conn, table_name).keys()) + + def has_column(conn, table_name, col_name): return col_name in get_schema(conn, table_name) diff --git a/requirements.txt b/requirements.txt index d22ed0b9d..04296117f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ shapely pandas pyproj rtree -openmatrix \ No newline at end of file +openmatrix +geopandas \ No newline at end of file diff --git a/tests/aequilibrae/project/data/porto_rico.parquet b/tests/aequilibrae/project/data/porto_rico.parquet new file mode 100644 index 000000000..97dfe198c Binary files /dev/null and b/tests/aequilibrae/project/data/porto_rico.parquet differ diff --git a/tests/aequilibrae/project/data/wynnum.parquet b/tests/aequilibrae/project/data/wynnum.parquet new file mode 100644 index 000000000..9c9eeb46c Binary files /dev/null and b/tests/aequilibrae/project/data/wynnum.parquet differ diff --git a/tests/aequilibrae/project/test_osm_downloader.py b/tests/aequilibrae/project/test_osm_downloader.py index 706c04989..7c5fe3bb7 100644 --- a/tests/aequilibrae/project/test_osm_downloader.py +++ b/tests/aequilibrae/project/test_osm_downloader.py @@ -32,7 +32,7 @@ def test_do_work2(self): o = OSMDownloader([box(-112.185, 36.59, -112.179, 36.60)], ["car"]) o.doWork() - if "elements" not in o.json[0]: + if len(o.json) == 0 or "elements" not in o.json[0]: return if len(o.json[0]["elements"]) > 1000: diff --git a/tests/aequilibrae/project/test_polygon_gridding.py b/tests/aequilibrae/project/test_polygon_gridding.py new file mode 100644 index 000000000..21dd40562 --- /dev/null +++ b/tests/aequilibrae/project/test_polygon_gridding.py @@ -0,0 +1,22 @@ +from pathlib import Path + +import geopandas as gpd +import pytest + +from aequilibrae.project.network.osm.model_area_gridding import geometry_grid + + +def test_geometry_grid(): + pth = Path(__file__).parent / "data" + + # Small simple polygon + polygon = gpd.read_parquet(pth / "wynnum.parquet").geometry[0] + grid = geometry_grid(polygon, 4326) + assert grid.geometry.area.sum() == pytest.approx(polygon.area, 0.000000001) + assert grid.shape[0] == 1 + + # Bigger polygon + polygon = gpd.read_parquet(pth / "porto_rico.parquet").geometry[0] + grid = geometry_grid(polygon, 4326) + assert grid.geometry.area.sum() == pytest.approx(polygon.area, 0.000000001) + assert grid.shape[0] == 16