From 3ffc565e23e06d9d374ec98847533bb10ea77e6b Mon Sep 17 00:00:00 2001 From: Elizabeth Sall Date: Tue, 1 Aug 2023 17:20:45 -0700 Subject: [PATCH] transit tests working + linting --- .github/.github/pull_request_template.md | 18 +-- .github/ISSUE_TEMPLATE/bug_report.md | 7 +- .github/ISSUE_TEMPLATE/feature_request.md | 5 + CONTRIBUTING.md | 3 - README.md | 37 +++-- .../project_cards/transit_route_change.yml | 30 ++-- network_wrangler/roadwaynetwork.py | 2 +- network_wrangler/scenario.py | 2 +- network_wrangler/transit/feed.py | 86 +++++++++-- network_wrangler/transit/schemas.py | 12 ++ network_wrangler/transit/selection.py | 49 ++++--- network_wrangler/transitnetwork.py | 137 ++++++++++-------- network_wrangler/utils/utils.py | 4 +- ...dation - Transit Roadway Consistency.ipynb | 8 +- tests/test_transit/test_selections.py | 9 +- tests/test_transit/test_transit.py | 10 +- tests/test_transit_with_roadnet.py | 2 +- tests/test_utils.py | 4 +- 18 files changed, 267 insertions(+), 158 deletions(-) diff --git a/.github/.github/pull_request_template.md b/.github/.github/pull_request_template.md index 7fed4d0e..565bbdac 100644 --- a/.github/.github/pull_request_template.md +++ b/.github/.github/pull_request_template.md @@ -1,25 +1,25 @@ -## What existing problem does the pull request solve and why should we include it? +# Pull Request +## What existing problem does the pull request solve and why should we include it? ## What is the testing plan? *Demonstrate the code is solid by discussing how results are verified and covered by tests* - - [ ] Code for this PR is covered in tests - - [ ] Code passes all existing tests +- [ ] Code for this PR is covered in tests +- [ ] Code passes all existing tests ## Code formatting + *Code should be PEP8 compliant before merging by running a package like [`black`](https://pypi.org/project/black/)* - - [ ] Code linted +- [ ] Code linted ## Applicable Issues + *Please do not create a Pull Request without creating an issue first.* *Put `closes #XXXX` in your comment to auto-close the issue that your PR fixes.* - -#### Issues List - - - closes... - - closes... +- closes... +- closes... diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 6cc2459a..4d3e7e11 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -8,30 +8,33 @@ assignees: '' --- ## Describe the bug + A clear and concise description of what the bug is or the error code you got. e.g. + ```python KeyError: 'Passing list-likes to .loc or [] with any missing labels is no longer supported, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike' ``` ## To Reproduce + Steps to reproduce the behavior: + 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error ### Failing tests + - [ ] No applicable test failed, need to create. - [ ] ### Triggering line of code - ### Thoughts on resolution ### Full stack trace - ### Environment Operating system: diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 34ce645e..5cda9343 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -8,6 +8,7 @@ assignees: '' --- ### User Story + *As a ...insert type of user... I'd like to ...insert desired feature or behavior...* ### Priority @@ -17,16 +18,20 @@ assignees: '' ### Resolution Ideas ### Project + *Is there a funder or project associated with this feature?* ### Who should be involved? + Implementer: Commenters: Users: Reviewers: ### Risk + *Will this potentially break anything?* #### Tests + *What are relevant tests or what tests need to be created in order to determine that this issue is complete?* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9ff1977a..5eacb203 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,12 +1,9 @@ # Contributing to Network Wrangler - ## Roles - ## How to Contribute - ### Setup 1. Make sure you have a [GitHub](https://github.com/) account. diff --git a/README.md b/README.md index d40b3881..89a990f1 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ Network Wrangler is a Python library for managing travel model network scenarios. ## System Requirements + Network Wrangler should be operating system agonistic and has been tested on Ubuntu and Mac OS. In order to assist in installation, its helpful to have either [miniconda](https://docs.conda.io/en/latest/miniconda.html), [anaconda](https://docs.conda.io/projects/continuumio-conda/en/latest/user-guide/install/index.html#regular-installation) or [Docker CE](https://docs.docker.com/install/) installed. If you don't have any of these already, we reommend starting with Miniconda for Python 3.7 as it has the smallest footprint. `conda` is the environment manager that is contained within both the Anaconda and mini-conda applications. All commands described below should be entered into the `Ananconda Prompt` command window. @@ -10,6 +11,7 @@ In order to assist in installation, its helpful to have either [miniconda](https Network Wrangler does require Python 3.7+. If you have a different version of Python installed, `conda` will take care of installing it for you in the installation instructions below. ## Installation + Network Wrangler uses Python 3.6 and above. Requirements are stored in `requirements.txt` but are automatically installed when using `pip`. If you are managing multiple python versions, we suggest using [`virtualenv`](https://virtualenv.pypa.io/en/latest/) or [`conda`](https://conda.io/en/latest/) virtual environments. `conda` is the environment manager that is contained within both the Anaconda and mini-conda applications. Do not add Anaconda to the system path during installation. This may cause problems with other programs that require python 2.7 to be placed in the system path. @@ -27,6 +29,7 @@ pytest Network wrangler can be installed in several ways depending on the user's needs. Installing from github is the simplest method and is appropriate when the user does not anticipate needing to update network wrangler. An update will require rebuilding the network wrangler environment. Installing from clone is slightly more involved and requires the user to have a git manager on their machine, but permits the user to install network wrangler with the `-e`, edit, option so that their network wrangler installation can be updated through pulling new commits from the network wrangler repo without a full reinstallation. ### From GitHub + Use the package manager [pip](https://pip.pypa.io/en/stable/) to install Network Wrangler from the source on GitHub. ```bash @@ -39,6 +42,7 @@ pip install git+https://github.com/wsp-sag/network_wrangler.git@master#egg=netwo Note: if you wanted to install from a specific tag/version number or branch, replace `@master` with `@` or `@tag` ### From Clone + If you are going to be working on Network Wrangler locally, you might want to clone it to your local machine and install it from the clone. The -e will install it in [editable mode](https://pip.pypa.io/en/stable/reference/pip_install/?highlight=editable#editable-installs). This is also useful if you want to continue to update your Network Wrangler as it is developed on GitHub. @@ -131,16 +135,17 @@ Note: if you are not part of the project team and want to contribute code back t 4. Command to exit container: `exit` Containers: - - `wrangler-jupyter` started by running `docker-compose run wrangler-jupyter --build` is appropriate for running and testing wrangler. - - Default action is to start [jupyter notebook](https://jupyter.org/) which can be found at http://127.0.0.1:8888 - - Safe: It creates an empty folder to store jupyter notebooks within the container but wont overwrite the source files on your actual machine. - - Starting Bash: You can also start the container with a command line using `docker-compose run wrangler-jupyter /bin/bash --build`. - - Doesn't install development dependencies (although they can be installed from within the container) - - `wrangler-ci` is a small image without extras meant for running tests and deploying to continuous integration server. - - default command is to run [pytest](https://docs.pytest.org/en/latest/). - - contains development dependencies so that it can run tests and build docs. - - `wrangler-dev` is the most powerful but dangerous container `docker-compose run wrangler-dev /bin/bash --build` - - Warning: It will synchronize code edited from the container to your wrangler clone. This is great for developing within an IDE, but please take this into account. + +- `wrangler-jupyter` started by running `docker-compose run wrangler-jupyter --build` is appropriate for running and testing wrangler. + - Default action is to start [jupyter notebook](https://jupyter.org/) which can be found at + - Safe: It creates an empty folder to store jupyter notebooks within the container but wont overwrite the source files on your actual machine. + - Starting Bash: You can also start the container with a command line using `docker-compose run wrangler-jupyter /bin/bash --build`. + - Doesn't install development dependencies (although they can be installed from within the container) +- `wrangler-ci` is a small image without extras meant for running tests and deploying to continuous integration server. + - default command is to run [pytest](https://docs.pytest.org/en/latest/). + - contains development dependencies so that it can run tests and build docs. +- `wrangler-dev` is the most powerful but dangerous container `docker-compose run wrangler-dev /bin/bash --build` + - Warning: It will synchronize code edited from the container to your wrangler clone. This is great for developing within an IDE, but please take this into account. ### Common Installation Issues @@ -153,12 +158,14 @@ Try installing rtree on its own from the Anaconda cloud ```bash conda install rtree ``` + **Issue: Shapely, a pre-requisite, doesn't install propertly because it is missing GEOS module** Try installing shapely on its own from the Anaconda cloud ```bash conda install shapely ``` + **Issue: Conda is unable to install a library or to update to a specific library version** Try installing libraries from conda-forge @@ -177,11 +184,12 @@ To start the notebook, open a command line in the network_wrangler top-level dir `jupyter notebook` - ## Documentation + Documentation can be built from the `/docs` folder using the command: `make html` ## Usage + ```python import network_wrangler @@ -210,15 +218,18 @@ my_scenario.apply_all_projects() my_scenario.write("my_project/build", "baseline") ``` -## Attribution + +## Attribution + This project is built upon the ideas and concepts implemented in the [network wrangler project](https://github.com/sfcta/networkwrangler) by the [San Francisco County Transportation Authority](http://github.com/sfcta) and expanded upon by the [Metropolitan Transportation Commission](https://github.com/BayAreaMetro/NetworkWrangler). While Network Wrangler as written here is based on these concepts, the code is distinct and builds upon other packages such as `geopandas` and `partridge` which hadn't been implemented when networkwrangler 1.0 was developed. ## Contributing + Pull requests are welcome. Please open an issue first to discuss what you would like to change. Please make sure to update tests as appropriate. - ## License + [Apache-2.0](https://choosealicense.com/licenses/apache-2.0/) diff --git a/examples/stpaul/project_cards/transit_route_change.yml b/examples/stpaul/project_cards/transit_route_change.yml index 7fc8d64e..309f0e44 100644 --- a/examples/stpaul/project_cards/transit_route_change.yml +++ b/examples/stpaul/project_cards/transit_route_change.yml @@ -1,17 +1,17 @@ project: 365 Bus Reroute transit_property_change: -facility: - route_id: [53-111] - time: ['09:00', '15:00'] -properties: - routing: - existing: - - 62145 #N Robert St - 39430 #Jackson St - set: - - 62145 #N Robert St - -62146 #Robert/6th - -41625 #Robert/7th - 39426 #7th/Jackson with a stop (open to other ways of expressing this) - -39425 #6th Jackson - 39430 #5th Jackson + service: + route_id: [53-111] + time: ['09:00', '15:00'] + property_changes: + routing: + existing: + - 62145 #N Robert St + - 39430 #Jackson St + set: + - 62145 #N Robert St + - -62146 #Robert/6th + - -41625 #Robert/7th + - 39426 #7th/Jackson with a stop (open to other ways of expressing this) + - -39425 #6th Jackson + - 39430 #5th Jackson diff --git a/network_wrangler/roadwaynetwork.py b/network_wrangler/roadwaynetwork.py index 4eb1b2d2..4a632a53 100644 --- a/network_wrangler/roadwaynetwork.py +++ b/network_wrangler/roadwaynetwork.py @@ -9,7 +9,7 @@ import copy from collections import defaultdict -from typing import Collection, List, Union, Mapping, Any +from typing import Collection, List, Union import geopandas as gpd import networkx as nx diff --git a/network_wrangler/scenario.py b/network_wrangler/scenario.py index 80c2c64a..30ab085a 100644 --- a/network_wrangler/scenario.py +++ b/network_wrangler/scenario.py @@ -752,7 +752,7 @@ def create_base_scenario( "No transit directory specified, base scenario will have empty transit network." ) - transit_net.set_roadnet(road_net, validate_consistency=validate) + transit_net.road_net = road_net base_scenario = {"road_net": road_net, "transit_net": transit_net} return base_scenario diff --git a/network_wrangler/transit/feed.py b/network_wrangler/transit/feed.py index 9a39a683..823570c2 100644 --- a/network_wrangler/transit/feed.py +++ b/network_wrangler/transit/feed.py @@ -10,20 +10,35 @@ from networkx import DiGraph from partridge.config import default_config -from .schemas import FrequenciesSchema, StopsSchema, RoutesSchema, TripsSchema +from .schemas import ( + FrequenciesSchema, + StopsSchema, + RoutesSchema, + TripsSchema, + ShapesSchema, +) from ..utils import fk_in_pk from ..logger import WranglerLogger +# Raised when there is an issue reading the GTFS feed. class FeedReadError(Exception): pass +# Raised when there is an issue with the validation of the GTFS data. class FeedValidationError(Exception): pass class Feed: + """ + Wrapper class around GTFS feed to allow abstraction from partridge. + + TODO: Replace usage of partridge + """ + + # A list of GTFS files that are required to be present in the feed. REQUIRED_FILES = [ "agency.txt", "frequencies.txt", @@ -34,13 +49,16 @@ class Feed: "trips.txt", ] + # Dictionary mapping table names to their corresponding schema classes for validation purposes. SCHEMAS = { "frequencies": FrequenciesSchema, "routes": RoutesSchema, "trips": TripsSchema, "stops": StopsSchema, + "shapes": ShapesSchema, } + # List of table names used for calculating a hash representing the content of the entire feed. TABLES_IN_FEED_HASH = [ "frequencies", "routes", @@ -75,7 +93,6 @@ class Feed: def __init__(self, feed_path: Union[Path, str]): """Constructor for GTFS Feed. - TODO: Replace usage of partridge Updates partridge config based on which files are available. Validates each table to schemas in SCHEMAS as they are read. Validates foreign keys after all tables read in. @@ -101,9 +118,35 @@ def __init__(self, feed_path: Union[Path, str]): WranglerLogger.info(msg) @property - def config(self): + def config(self) -> DiGraph: + """ + Internal configuration of the GTFS feed. + """ return self._config + def __deepcopy__(self, memo): + """Custom implementation of __deepcopy__ method. + + This method is called by copy.deepcopy() to create a deep copy of the object. + + Args: + memo (dict): Dictionary to track objects already copied during deepcopy. + + Returns: + Feed: A deep copy of the Feed object. + """ + # First, create a new instance of the Feed class + new_feed = Feed(feed_path=self.feed_path) + + # Now, use the deepcopy method to create deep copies of each DataFrame + # and assign them to the corresponding attributes in the new Feed object. + for table_name, df in self.__dict__.items(): + if isinstance(df, pd.DataFrame): + new_feed.__dict__[table_name] = copy.deepcopy(df, memo) + + # Return the newly created deep copy of the Feed object + return new_feed + def _read_from_file(self, node: str) -> pd.DataFrame: """Read node from file + validate to schema if table name in SCHEMAS and return dataframe. @@ -115,7 +158,7 @@ def _read_from_file(self, node: str) -> pd.DataFrame: """ _table = node.replace(".txt", "") df = self._ptg_feed.get(node) - self.validate_df_as_table(_table, df) + df = self.validate_df_as_table(_table, df) return df.copy() def get(self, table: str, validate: bool = True) -> pd.DataFrame: @@ -128,7 +171,7 @@ def get(self, table: str, validate: bool = True) -> pd.DataFrame: """ df = self.__dict__.get(table, None) if validate: - self.validate_df_as_table(table, df) + df = self.validate_df_as_table(table, df) return df def validate_df_as_table(self, table: str, df: pd.DataFrame) -> bool: @@ -140,16 +183,16 @@ def validate_df_as_table(self, table: str, df: pd.DataFrame) -> bool: """ if self.SCHEMAS.get(table): # set to lazy so that all errors found before returning - self.SCHEMAS[table].validate(df, lazy=True) + df = self.SCHEMAS[table].validate(df, lazy=True) else: WranglerLogger.debug( f"{table} requested but no schema available to validate." ) - return True + return df @property def foreign_keys_valid(self) -> bool: - """Validate all foreign keys exist in primary key table.""" + """Boolean indiciating if all foreign keys exist in primary key table.""" return self.validate_fks() @classmethod @@ -184,14 +227,26 @@ def _config_from_files(cls, ptg_feed: ptg.gtfs.Feed) -> DiGraph: def validate_table_fks( self, table: str, df: pd.DataFrame = None, _raise_error: bool = True - ) -> Union[bool, list]: + ) -> tuple[bool, list]: + """Validates the foreign keys of a specific table. + + Args: + table (str): Table name (i.e. routes, stops, etc) + df (pd.DataFrame, optional): Dataframe to use as that table. If left to default of + None, will retrieve the table using table name using `self.feed.get()`. + + Returns: + tuple[bool, list]: A tuple where the first value is a boolean representing if foreign + keys are valid. The second value is a list of foreign keys that are missing from + the referenced table. + """ _fks = self.INTRA_FEED_FOREIGN_KEYS.get(table, {}) table_valid = True table_fk_missing = [] if not _fks: return True, [] if df is None: - node_df = self.get(table) + df = self.get(table) for _fk_field, (_pk_table, _pk_field) in _fks.items(): if _fk_field not in df.columns: continue @@ -210,7 +265,13 @@ def validate_table_fks( raise FeedValidationError("{table} missing Foreign Keys.") return table_valid, table_fk_missing - def validate_fks(self): + def validate_fks(self) -> bool: + """ + Validates the foreign keys of all the tables in the feed. + + Returns: + bool: If true, all tables in feed have valid foreign keys. + """ all_valid = True missing = [] for df in self.config.nodes.keys(): @@ -225,7 +286,8 @@ def validate_fks(self): return all_valid @property - def feed_hash(self): + def feed_hash(self) -> str: + """A hash representing the contents of the talbes in self.TABLES_IN_FEED_HASH.""" _table_hashes = [ self.get(t, validate=False).df_hash() for t in self.TABLES_IN_FEED_HASH ] diff --git a/network_wrangler/transit/schemas.py b/network_wrangler/transit/schemas.py index dccd6e97..6c7c4317 100644 --- a/network_wrangler/transit/schemas.py +++ b/network_wrangler/transit/schemas.py @@ -17,6 +17,18 @@ class FrequenciesSchema(DataFrameModel): class StopsSchema(DataFrameModel): stop_id: Series[str] = pa.Field(nullable=False, unique=True) + model_node_id: Series[int] = pa.Field(coerce=True, nullable=False) + stop_lat: Series[float] = pa.Field(coerce=True, nullable=False) + stop_lon: Series[float] = pa.Field(coerce=True, nullable=False) + wheelchair_boarding: Series[float] = pa.Field(coerce=True, nullable=True) + + +class ShapesSchema(DataFrameModel): + shape_id: Series[str] = pa.Field(nullable=False) + shape_model_node_id: Series[int] = pa.Field(coerce=True, nullable=False) + shape_pt_lat: Series[float] = pa.Field(coerce=True, nullable=False) + shape_pt_lon: Series[float] = pa.Field(coerce=True, nullable=False) + shape_pt_sequence: Series[int] = pa.Field(coerce=True, nullable=False) class TripsSchema(DataFrameModel): diff --git a/network_wrangler/transit/selection.py b/network_wrangler/transit/selection.py index 75e20fd9..e9500bb1 100644 --- a/network_wrangler/transit/selection.py +++ b/network_wrangler/transit/selection.py @@ -1,7 +1,7 @@ import copy import hashlib -from typing import Collection, Union +from typing import Union import pandas as pd @@ -25,6 +25,7 @@ class TransitSelection: TRIP_QUERY = ["trip_id", "route_id"] NODE_QUERY = ["nodes", "require_all"] QUERY_KEYS = FREQ_QUERY + ROUTE_QUERY + TRIP_QUERY + NODE_QUERY + def __init__( self, net: "TransitNetwork", @@ -50,9 +51,7 @@ def __init__( # Initialize self._selected_trips_df = None - self._stored_feed_hash = copy.deepcopy(self.net.feed_hash) - - + self._stored_feed_hash = copy.deepcopy(self.net.feed.feed_hash) def __nonzero__(self): if len(self.selected_trips_df) > 0: @@ -109,7 +108,7 @@ def node_selection_dict(self): d[k] = v return d - def validate_selection_dict(self,selection_dict:dict)->None: + def validate_selection_dict(self, selection_dict: dict) -> None: """Check that selection dictionary has valid and used properties consistent with network. Args: @@ -119,26 +118,34 @@ def validate_selection_dict(self,selection_dict:dict)->None: TransitSelectionFormatError: If not valid """ _nonstandard_sks = list(set(self.selection_dict.keys()) - set(self.QUERY_KEYS)) - _unused_sks = [ i for i in _nonstandard_sks if i.split(".")[0] not in ('trips','routes')] + _unused_sks = [ + i for i in _nonstandard_sks if i.split(".")[0] not in ("trips", "routes") + ] if _unused_sks: WranglerLogger.error( f"Following keys in selection query are not valid: {_unused_sks}" ) - _bad_route_keys = set(self.route_selection_dict.keys()) - set(self.net.feed.routes.columns) + _bad_route_keys = set(self.route_selection_dict.keys()) - set( + self.net.feed.routes.columns + ) if _bad_route_keys: WranglerLogger.error( f"Following route keys not found in routes.txt: {_bad_route_keys}" ) - _bad_trip_keys = set(self.trip_selection_dict.keys()) - set(self.net.feed.trips.columns) + _bad_trip_keys = set(self.trip_selection_dict.keys()) - set( + self.net.feed.trips.columns + ) if _bad_trip_keys: WranglerLogger.error( f"Following trip keys not found in trips.txt: {_bad_trip_keys}" ) if _bad_route_keys or _bad_trip_keys or _unused_sks: - raise TransitSelectionFormatError("Invalid selection keys in selection dictionary.") + raise TransitSelectionFormatError( + "Invalid selection keys in selection dictionary." + ) @property def selected_trips(self) -> list: @@ -158,11 +165,11 @@ def selected_trips_df(self) -> pd.DataFrame: """ if ( self._selected_trips_df is not None - ) and self._stored_feed_hash == self.net.feed_hash: + ) and self._stored_feed_hash == self.net.feed.feed_hash: return self._selected_trips_df self._selected_trips_df = self._select_trips() - self._stored_feed_hash = copy.deepcopy(self.net.feed_hash) + self._stored_feed_hash = copy.deepcopy(self.net.feed.feed_hash) return self._selected_trips_df @staticmethod @@ -191,7 +198,7 @@ def _select_trips(self) -> pd.DataFrame: if self.node_selection_dict: trips_df = self._select_trips_by_nodes(trips_df) - + trips_df = self._select_trips_by_properties(trips_df) _sel_trips = len(trips_df) @@ -225,7 +232,7 @@ def _select_trips_by_nodes( shapes_df = self.net.feed.shapes require_all = self.node_selection_dict.get("require_all", False) node_ids = self.node_selection_dict["nodes"] - node_fk = self.net.SHAPES_FOREIGN_KEY + node_fk, rd_field = self.net.TRANSIT_FOREIGN_KEYS_TO_ROADWAY["shapes"]["links"] if require_all: shape_ids = ( shapes_df.groupby("shape_id").filter( @@ -241,9 +248,9 @@ def _select_trips_by_nodes( _sel_trips = len(trips_df) WranglerLogger.debug(f"Selected {_sel_trips}/{_tot_trips} trips.") - if _sel_trips<10: + if _sel_trips < 10: WranglerLogger.debug(f"{trips_df.trip_id}") - + return trips_df def _select_trips_by_properties( @@ -268,9 +275,9 @@ def _select_trips_by_properties( trips_df = trips_df.copy() _sel_trips = len(trips_df) - + WranglerLogger.debug(f"Selected {_sel_trips}/{_tot_trips} trips.") - if _sel_trips<10: + if _sel_trips < 10: WranglerLogger.debug(f"{trips_df.trip_id}") return trips_df @@ -286,9 +293,9 @@ def _filter_trips_by_trip(self, trips_df: pd.DataFrame) -> pd.DataFrame: f"{_filtered_trips}/{_unfiltered_trips} trips remain after \ filtering to trip selection {self.trip_selection_dict}" ) - if _filtered_trips<10: + if _filtered_trips < 10: WranglerLogger.debug(f"{trips_df.trip_id}") - + return trips_df def _filter_trips_by_route(self, trips_df: pd.DataFrame) -> pd.DataFrame: @@ -305,7 +312,7 @@ def _filter_trips_by_route(self, trips_df: pd.DataFrame) -> pd.DataFrame: f"{_filtered_trips}/{_unfiltered_trips} trips remain after \ filtering to route selection {self.route_selection_dict}" ) - if _filtered_trips<10: + if _filtered_trips < 10: WranglerLogger.debug(f"{trips_df.trip_id}") return trips_df @@ -336,7 +343,7 @@ def _filter_trips_by_time_period(self, trips_df: pd.DataFrame) -> pd.DataFrame: {self.freq_selection_dict['end_time']}" ) - if _filtered_trips<10: + if _filtered_trips < 10: WranglerLogger.debug(f"{trips_df.trip_id}") return trips_df diff --git a/network_wrangler/transitnetwork.py b/network_wrangler/transitnetwork.py index ffd53024..aa82439a 100644 --- a/network_wrangler/transitnetwork.py +++ b/network_wrangler/transitnetwork.py @@ -4,9 +4,7 @@ from __future__ import annotations import copy -import hashlib import os -import re from typing import Union import networkx as nx @@ -16,7 +14,7 @@ from projectcard import ProjectCard from .logger import WranglerLogger -from .utils import parse_time_spans_to_secs, fk_in_pk, get_overlapping_range +from .utils import fk_in_pk from .transit import Feed, TransitSelection @@ -43,8 +41,6 @@ class TransitNetwork(object): validated_frequencies (bool): The frequencies have been validated. validated_road_network_consistency (): The network has been validated against the road network. - SHAPES_FOREIGN_KEY (str): foreign key between shapes dataframe and roadway network nodes - STOPS_FOREIGN_KEY (str): foreign key between stops dataframe and roadway network nodes ID_SCALAR (int): scalar value added to create new IDs when necessary. REQUIRED_FILES (list[str]): list of files that the transit network requires. @@ -58,10 +54,10 @@ class TransitNetwork(object): Network. """ TRANSIT_FOREIGN_KEYS_TO_ROADWAY = { - "stops": {"nodes": {"model_node_id": "index"}}, + "stops": {"nodes": ("model_node_id", "model_node_id")}, "shapes": { - "nodes": {"shape_model_node_id": "index"}, - "links": ("shape_model_node_id", ("fks_to_nodes")), + "nodes": ("shape_model_node_id", "model_node_id"), + "links": ("shape_model_node_id", "model_node_id"), }, } @@ -112,18 +108,18 @@ def road_net(self): @road_net.setter def road_net(self, road_net: "RoadwayNetwork"): + if "RoadwayNetwork" not in str(type(road_net)): + WranglerLogger.error(f"Cannot assign to road_net - type {type(road_net)}") + raise ValueError("road_net must be a RoadwayNetwork object.") if self._evaluate_consistency_with_road_net(road_net): - self._road_net_hash = copy.deepcopy(self.road_net.network_hash) self._road_net = road_net + self._road_net_hash = copy.deepcopy(self.road_net.network_hash) + else: raise TransitRoadwayConsistencyError( "RoadwayNetwork not as TransitNetwork base." ) - @property - def feed_hash(self): - return self.feed.feed_hash - @property def consistent_with_road_net(self) -> bool: """Indicate if road_net is consistent with transit network. @@ -142,16 +138,23 @@ def consistent_with_road_net(self) -> bool: self._road_net_hash = copy.deepcopy(self.road_net.network_hash) return self._consistent_with_road_net - def _evaluate_consistency_with_road_net(self, road_net: "RoadwayNetwork") -> bool: + def _evaluate_consistency_with_road_net( + self, road_net: "RoadwayNetwork" = None + ) -> bool: """Checks foreign key and network link relationships between transit feed and a road_net. Args: - road_net (RoadwayNetwork): Roadway network to check relationship with. + road_net (RoadwayNetwork): Roadway network to check relationship with. If None, will + check self.road_net. Returns: bool: boolean indicating if road_net is consistent with transit network. """ - _consistency = self._nodes_in_road_net() and self._shape_links_in_road_net() + if road_net is None: + road_net = self.road_net + _consistency = self._nodes_in_road_net( + road_net.nodes_df + ) and self._shape_links_in_road_net(road_net.links_df) return _consistency @property @@ -190,8 +193,6 @@ def validate_roadway_nodes_for_table( "nodes" ) - if _fk_field not in self.feed.get(table).columns: - return True, [] if nodes_df is None: nodes_df = self.road_net.nodes_df @@ -222,6 +223,10 @@ def _nodes_in_road_net(self, nodes_df: pd.DataFrame = None) -> bool: Returns: boolean indicating if relationships are all valid """ + if self.road_net is None: + return ValueError( + "Cannot evaluate consistency because roadway network us not set." + ) all_valid = True missing = [] if nodes_df is None: @@ -242,7 +247,7 @@ def _nodes_in_road_net(self, nodes_df: pd.DataFrame = None) -> bool: return all_valid def _shape_links_in_road_net(self, links_df: pd.DataFrame = None) -> bool: - """Validate that links in transis shapes exist in referenced roadway links. + """Validate that links in transit shapes exist in referenced roadway links. FIXME Args: links_df (pd.DataFrame, optional): Links dataframe from roadway network to validate @@ -251,11 +256,12 @@ def _shape_links_in_road_net(self, links_df: pd.DataFrame = None) -> bool: Returns: boolean indicating if relationships are all valid """ + tr_field, rd_field = self.TRANSIT_FOREIGN_KEYS_TO_ROADWAY["shapes"]["links"] if links_df is None: links_df = self.road_net.links_df - shapes_df = shapes_df.astype({TransitNetwork.SHAPES_FOREIGN_KEY: int}) + shapes_df = self.feed.shapes.astype({tr_field: int}) unique_shape_ids = shapes_df.shape_id.unique().tolist() - + valid = True for id in unique_shape_ids: subset_shapes_df = shapes_df[shapes_df["shape_id"] == id] subset_shapes_df = subset_shapes_df.sort_values(by=["shape_pt_sequence"]) @@ -268,8 +274,8 @@ def _shape_links_in_road_net(self, links_df: pd.DataFrame = None) -> bool: links_df, how="left", left_on=[ - TransitNetwork.SHAPES_FOREIGN_KEY + "_1", - TransitNetwork.SHAPES_FOREIGN_KEY + "_2", + tr_field + "_1", + tr_field + "_2", ], right_on=["A", "B"], indicator=True, @@ -415,17 +421,19 @@ def apply( else: project_dictionary = project_card_dictionary - if project_dictionary["type"].lower() == "transit_property_change:": + if "transit_property_change" in project_dictionary: return self.apply_transit_feature_change( - self.get_selection(project_dictionary["service"]).selected_trips, - project_dictionary["properties"], + self.get_selection( + project_dictionary["transit_property_change"]["service"] + ).selected_trips, + project_dictionary["transit_property_change"]["property_changes"], ) elif project_dictionary.get("pycode"): return self.apply_python_calculation(project_dictionary["pycode"]) else: - msg = f"Can't apply {project_dictionary['type']} – not implemented yet." + msg = "Cannot find transit project in project_dictionary – not implemented yet." WranglerLogger.error(msg) raise (msg) @@ -443,7 +451,7 @@ def apply_python_calculation(self, pycode: str) -> "TransitNetwork": def apply_transit_feature_change( self, trip_ids: pd.Series, - properties: list, + property_changes: dict, ) -> "TransitNetwork": """ Changes the transit attributes for the selected features based on the @@ -469,22 +477,20 @@ def apply_transit_feature_change( # self.road_net.build_selection_key(project_dictionary["facility"]) # )["route"] - for i in properties: - if i["property"] in ["headway_secs"]: + for property, p_changes in property_changes.items(): + if property in ["headway_secs"]: net = TransitNetwork._apply_transit_feature_change_frequencies( - net, trip_ids, i + net, trip_ids, property, p_changes ) - elif i["property"] in ["routing"]: + elif property in ["routing"]: net = TransitNetwork._apply_transit_feature_change_routing( - net, trip_ids, i + net, trip_ids, p_changes ) return net def _apply_transit_feature_change_routing( - self, - trip_ids: pd.Series, - properties: dict, + self, trip_ids: pd.Series, routing_change: dict ) -> TransitNetwork: net = copy.deepcopy(self) shapes = net.feed.shapes.copy() @@ -494,20 +500,22 @@ def _apply_transit_feature_change_routing( # A negative sign in "set" indicates a traversed node without a stop # If any positive numbers, stops have changed stops_change = False - if any(x > 0 for x in properties["set"]): + if any(x > 0 for x in routing_change["set"]): # Simplify "set" and "existing" to only stops - properties["set_stops"] = [str(i) for i in properties["set"] if i > 0] - if properties.get("existing") is not None: - properties["existing_stops"] = [ - str(i) for i in properties["existing"] if i > 0 + routing_change["set_stops"] = [ + str(i) for i in routing_change["set"] if i > 0 + ] + if routing_change.get("existing") is not None: + routing_change["existing_stops"] = [ + str(i) for i in routing_change["existing"] if i > 0 ] stops_change = True # Convert ints to objects - properties["set_shapes"] = [str(abs(i)) for i in properties["set"]] - if properties.get("existing") is not None: - properties["existing_shapes"] = [ - str(abs(i)) for i in properties["existing"] + routing_change["set_shapes"] = [str(abs(i)) for i in routing_change["set"]] + if routing_change.get("existing") is not None: + routing_change["existing_shapes"] = [ + str(abs(i)) for i in routing_change["existing"] ] # Replace shapes records @@ -539,6 +547,9 @@ def _apply_transit_feature_change_routing( # Make sure they are ordered by shape_pt_sequence this_shape = this_shape.sort_values(by=["shape_pt_sequence"]) + shape_node_fk, rd_field = self.net.TRANSIT_FOREIGN_KEYS_TO_ROADWAY[ + "shapes" + ]["links"] # Build a pd.DataFrame of new shape records new_shape_rows = pd.DataFrame( { @@ -547,24 +558,24 @@ def _apply_transit_feature_change_routing( "shape_pt_lon": None, # FIXME "shape_osm_node_id": None, # FIXME "shape_pt_sequence": None, - TransitNetwork.SHAPES_FOREIGN_KEY: properties["set_shapes"], + shape_node_fk: routing_change["set_shapes"], } ) # If "existing" is specified, replace only that segment # Else, replace the whole thing - if properties.get("existing") is not None: + if routing_change.get("existing") is not None: # Match list - nodes = this_shape[TransitNetwork.SHAPES_FOREIGN_KEY].tolist() + nodes = this_shape[shape_node_fk].tolist() index_replacement_starts = [ i for i, d in enumerate(nodes) - if d == properties["existing_shapes"][0] + if d == routing_change["existing_shapes"][0] ][0] index_replacement_ends = [ i for i, d in enumerate(nodes) - if d == properties["existing_shapes"][-1] + if d == routing_change["existing_shapes"][-1] ][-1] this_shape = pd.concat( [ @@ -590,13 +601,13 @@ def _apply_transit_feature_change_routing( # Replace stop_times and stops records (if required) if stops_change: - # If node IDs in properties["set_stops"] are not already + # If node IDs in routing_change["set_stops"] are not already # in stops.txt, create a new stop_id for them in stops existing_fk_ids = set(stops[TransitNetwork.STOPS_FOREIGN_KEY].tolist()) nodes_df = net.road_net.nodes_df.loc[ :, [TransitNetwork.STOPS_FOREIGN_KEY, "X", "Y"] ] - for fk_i in properties["set_stops"]: + for fk_i in routing_change["set_stops"]: if fk_i not in existing_fk_ids: WranglerLogger.info( "Creating a new stop in stops.txt for node ID: {}".format(fk_i) @@ -650,7 +661,7 @@ def _apply_transit_feature_change_routing( "stop_distance": None, "timepoint": None, "stop_is_skipped": None, - TransitNetwork.STOPS_FOREIGN_KEY: properties["set_stops"], + TransitNetwork.STOPS_FOREIGN_KEY: routing_change["set_stops"], } ) @@ -667,14 +678,14 @@ def _apply_transit_feature_change_routing( # If "existing" is specified, replace only that segment # Else, replace the whole thing - if properties.get("existing") is not None: + if routing_change.get("existing") is not None: # Match list (remember stops are passed in with node IDs) nodes = this_stoptime[TransitNetwork.STOPS_FOREIGN_KEY].tolist() index_replacement_starts = nodes.index( - properties["existing_stops"][0] + routing_change["existing_stops"][0] ) index_replacement_ends = nodes.index( - properties["existing_stops"][-1] + routing_change["existing_stops"][-1] ) this_stoptime = pd.concat( [ @@ -707,7 +718,7 @@ def _apply_transit_feature_change_routing( return net def _apply_transit_feature_change_frequencies( - self, trip_ids: pd.Series, properties: dict + self, trip_ids: pd.Series, property: str, property_change: dict ) -> TransitNetwork: net = copy.deepcopy(self) freq = net.feed.frequencies.copy() @@ -716,8 +727,8 @@ def _apply_transit_feature_change_frequencies( freq = freq[freq.trip_id.isin(trip_ids)] # Check all `existing` properties if given - if properties.get("existing") is not None: - if not all(freq.headway_secs == properties["existing"]): + if property_change.get("existing") is not None: + if not all(freq.headway_secs == property_change["existing"]): WranglerLogger.error( "Existing does not match for at least " "1 trip in:\n {}".format(trip_ids.to_string()) @@ -725,14 +736,14 @@ def _apply_transit_feature_change_frequencies( raise ValueError # Calculate build value - if properties.get("set") is not None: - build_value = properties["set"] + if property_change.get("set") is not None: + build_value = property_change["set"] else: - build_value = [i + properties["change"] for i in freq.headway_secs] + build_value = [i + property_change["change"] for i in freq.headway_secs] q = net.feed.frequencies.trip_id.isin(freq["trip_id"]) - net.feed.frequencies.loc[q, properties["property"]] = build_value + net.feed.frequencies.loc[q, property] = build_value return net diff --git a/network_wrangler/utils/utils.py b/network_wrangler/utils/utils.py index 6c0cf325..bbdef3f5 100644 --- a/network_wrangler/utils/utils.py +++ b/network_wrangler/utils/utils.py @@ -229,8 +229,8 @@ def fk_in_pk( if missing_flag.any(): WranglerLogger.warning( f"Following keys referenced in {fk.name} but missing\ - in primary key table:\n{fk[~fk.isin(pk)]} " + in primary key table:\n{fk[missing_flag]} " ) - return False, fk[~fk.isin(pk)].tolist() + return False, fk[missing_flag].tolist() return True, [] diff --git a/notebook/Validation - Transit Roadway Consistency.ipynb b/notebook/Validation - Transit Roadway Consistency.ipynb index 75a58926..62e9cdc4 100644 --- a/notebook/Validation - Transit Roadway Consistency.ipynb +++ b/notebook/Validation - Transit Roadway Consistency.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -52,6 +53,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -103,6 +105,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -137,10 +140,11 @@ ], "source": [ "transit_net = TransitNetwork.read(STPAUL_DIR)\n", - "transit_net.set_roadnet(road_net = road_net, validate_consistency=False)" + "transit_net.road_net = road_net" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -169,6 +173,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -197,6 +202,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/tests/test_transit/test_selections.py b/tests/test_transit/test_selections.py index 7d71186c..9dfcd5fa 100644 --- a/tests/test_transit/test_selections.py +++ b/tests/test_transit/test_selections.py @@ -1,12 +1,11 @@ -import os - import pytest from network_wrangler import WranglerLogger -from network_wrangler.transit.selection import TransitSelectionFormatError, TransitSelectionError -from projectcard import read_card +from network_wrangler.transit.selection import ( + TransitSelectionFormatError, +) -""" +""" Run just the tests using `pytest tests/test_transit/test_selections.py` """ diff --git a/tests/test_transit/test_transit.py b/tests/test_transit/test_transit.py index 440db5d5..16cb5cca 100644 --- a/tests/test_transit/test_transit.py +++ b/tests/test_transit/test_transit.py @@ -1,8 +1,6 @@ import os import pytest -from network_wrangler import TransitNetwork from projectcard import read_card -from network_wrangler import RoadwayNetwork from network_wrangler import WranglerLogger """ @@ -55,7 +53,6 @@ ] -@pytest.mark.menow @pytest.mark.parametrize("test_project", TEST_PROJECT_CARDS) def test_apply_transit_feature_change_from_projectcard( request, stpaul_transit_net, stpaul_card_dir, test_project @@ -99,12 +96,11 @@ def test_wrong_existing(request, stpaul_transit_net): WranglerLogger.info(f"--Finished: {request.node.name}") -@pytest.mark.menow def test_transit_road_consistencies(request, stpaul_transit_net, stpaul_net): WranglerLogger.info(f"--Starting: {request.node.name}") - stpaul_transit_net.set_roadnet(road_net=stpaul_net) + stpaul_transit_net.road_net = stpaul_net + _consistency = stpaul_transit_net._evaluate_consistency_with_road_net() + WranglerLogger.info(f"Consistency with RoadNet: {_consistency}") - stpaul_transit_net.validate_road_network_consistencies() - print(stpaul_transit_net.validated_road_network_consistency) WranglerLogger.info(f"--Finished: {request.node.name}") diff --git a/tests/test_transit_with_roadnet.py b/tests/test_transit_with_roadnet.py index 134113bd..5bd2b800 100644 --- a/tests/test_transit_with_roadnet.py +++ b/tests/test_transit_with_roadnet.py @@ -21,7 +21,7 @@ def test_set_roadnet(request): shapes_file=os.path.join(STPAUL_DIR, "shape.geojson"), ) transit_net = TransitNetwork.read(STPAUL_DIR) - transit_net.set_roadnet(road_net) + transit_net.road_net = road_net print("--Finished:", request.node.name) diff --git a/tests/test_utils.py b/tests/test_utils.py index be46c4a5..f49b9e9f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -125,10 +125,10 @@ def test_get_overlapping_range(request): a = range(0, 5) b = range(5, 10) - assert get_overlapping_range([a, b]) == None + assert get_overlapping_range([a, b]) is None c = range(100, 106) - assert get_overlapping_range([a, b, c]) == None + assert get_overlapping_range([a, b, c]) is None i = (1, 5) j = (2, 7)