diff --git a/.github/workflows/branch-docs.yml b/.github/workflows/branch-docs.yml index 4b23a68b4..7a4ab1f1c 100644 --- a/.github/workflows/branch-docs.yml +++ b/.github/workflows/branch-docs.yml @@ -40,7 +40,7 @@ jobs: id: cache - name: Update environment - run: mamba env update -n docbuild -f conda-environments/docbuild.yml + run: mamba env update --verbose -n docbuild -f conda-environments/docbuild.yml if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim diff --git a/.gitignore b/.gitignore index f92dc6b1d..ff98b3c68 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ sandbox/ .pytest_cache .vagrant - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/activitysim/abm/misc.py b/activitysim/abm/misc.py index 858c7358b..2dd0afdf5 100644 --- a/activitysim/abm/misc.py +++ b/activitysim/abm/misc.py @@ -4,6 +4,7 @@ import logging +import numpy as np import pandas as pd from activitysim.core import workflow @@ -16,16 +17,16 @@ @workflow.cached_object -def households_sample_size(state: workflow.State, override_hh_ids): +def households_sample_size(state: workflow.State, override_hh_ids) -> int: if override_hh_ids is None: - return state.settings, households_sample_size + return state.settings.households_sample_size else: - return 0 if override_hh_ids is None else len(override_hh_ids) + return len(override_hh_ids) @workflow.cached_object -def override_hh_ids(state: workflow.State): +def override_hh_ids(state: workflow.State) -> np.ndarray | None: hh_ids_filename = state.settings.hh_ids if hh_ids_filename is None: @@ -63,12 +64,12 @@ def override_hh_ids(state: workflow.State): @workflow.cached_object -def trace_od(state: workflow.State): +def trace_od(state: workflow.State) -> tuple[int, int] | None: od = state.settings.trace_od if od and not ( - isinstance(od, (list, tuple)) + isinstance(od, list | tuple) and len(od) == 2 and all(isinstance(x, int) for x in od) ): @@ -81,12 +82,12 @@ def trace_od(state: workflow.State): @workflow.cached_object -def chunk_size(state: workflow.State): +def chunk_size(state: workflow.State) -> int: _chunk_size = int(state.settings.chunk_size or 0) return _chunk_size @workflow.cached_object -def check_for_variability(state: workflow.State): +def check_for_variability(state: workflow.State) -> bool: return bool(state.settings.check_for_variability) diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index dc5dc8283..d36c017a3 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -3,16 +3,36 @@ from __future__ import annotations import logging +from typing import Any import numba as nb import numpy as np import pandas as pd from activitysim.core import assign, chunk, los, workflow +from activitysim.core.configuration.base import PydanticReadable logger = logging.getLogger(__name__) +class AccessibilitySettings(PydanticReadable): + """ + Settings for aggregate accessibility component. + """ + + CONSTANTS: dict[str, Any] = {} + + land_use_columns: list[str] = [] + """Only include the these columns in the computational tables + + Memory usage is reduced by only listing the minimum columns needed by + the SPEC, and nothing extra. + """ + + SPEC: str = "accessibility.csv" + """Filename for the accessibility specification (csv) file.""" + + @nb.njit def _accumulate_accessibility(arr, orig_zone_count, dest_zone_count): assert arr.size == orig_zone_count * dest_zone_count @@ -144,6 +164,10 @@ def compute_accessibility( land_use: pd.DataFrame, accessibility: pd.DataFrame, network_los: los.Network_LOS, + model_settings: AccessibilitySettings | None = None, + model_settings_file_name: str = "accessibility.yaml", + trace_label: str = "compute_accessibility", + output_table_name: str = "accessibility", ) -> None: """ Compute accessibility for each zone in land use file using expressions from accessibility_spec @@ -160,40 +184,44 @@ def compute_accessibility( product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ + if model_settings is None: + model_settings = AccessibilitySettings.read_settings_file( + state.filesystem, model_settings_file_name + ) - trace_label = "compute_accessibility" - model_settings = state.filesystem.read_model_settings("accessibility.yaml") assignment_spec = assign.read_assignment_spec( - state.filesystem.get_config_file_path("accessibility.csv") + state.filesystem.get_config_file_path(model_settings.SPEC) ) accessibility_df = accessibility if len(accessibility_df.columns) > 0: logger.warning( - f"accessibility table is not empty. Columns:{list(accessibility_df.columns)}" + f"accessibility table is not empty. " + f"Columns:{list(accessibility_df.columns)}" ) raise RuntimeError("accessibility table is not empty.") - constants = model_settings.get("CONSTANTS", {}) + constants = model_settings.CONSTANTS - # only include the land_use columns needed by spec, as specified by land_use_columns model_setting - land_use_columns = model_settings.get("land_use_columns", []) + # only include the land_use columns needed by spec, + # as specified by land_use_columns model_setting + land_use_columns = model_settings.land_use_columns land_use_df = land_use land_use_df = land_use_df[land_use_columns] logger.info( - f"Running {trace_label} with {len(accessibility_df.index)} orig zones {len(land_use_df)} dest zones" + f"Running {trace_label} with {len(accessibility_df.index)} orig zones " + f"{len(land_use_df)} dest zones" ) accessibilities_list = [] for ( - i, + _i, chooser_chunk, - chunk_trace_label, + _chunk_trace_label, chunk_sizer, ) in chunk.adaptive_chunked_choosers(state, accessibility_df, trace_label): - accessibilities = compute_accessibilities_for_zones( state, chooser_chunk, @@ -211,4 +239,4 @@ def compute_accessibility( logger.info(f"{trace_label} computed accessibilities {accessibility_df.shape}") # - write table to pipeline - state.add_table("accessibility", accessibility_df) + state.add_table(output_table_name, accessibility_df) diff --git a/activitysim/abm/models/atwork_subtour_destination.py b/activitysim/abm/models/atwork_subtour_destination.py index e545e2b34..c80175297 100644 --- a/activitysim/abm/models/atwork_subtour_destination.py +++ b/activitysim/abm/models/atwork_subtour_destination.py @@ -8,6 +8,7 @@ from activitysim.abm.models.util import tour_destination from activitysim.core import config, estimation, los, tracing, workflow +from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place logger = logging.getLogger(__name__) @@ -20,25 +21,30 @@ def atwork_subtour_destination( tours: pd.DataFrame, persons_merged: pd.DataFrame, network_los: los.Network_LOS, + model_settings: TourLocationComponentSettings | None = None, + model_settings_file_name: str = "atwork_subtour_destination.yaml", + trace_label: str = "atwork_subtour_destination", ) -> None: - trace_label = "atwork_subtour_destination" - model_settings_file_name = "atwork_subtour_destination.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) future_settings = { "SIZE_TERM_SELECTOR": "atwork", "SEGMENTS": ["atwork"], "ORIG_ZONE_ID": "workplace_zone_id", } - model_settings = config.future_model_settings( + model_settings = config.future_component_settings( model_settings_file_name, model_settings, future_settings ) destination_column_name = "destination" - logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME want_logsums = logsum_column_name is not None - sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME want_sample_table = ( state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) @@ -54,8 +60,8 @@ def atwork_subtour_destination( if estimator: estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') - estimator.write_spec(model_settings, tag="SPEC") - estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) + estimator.write_spec(file_name=model_settings.SPEC, tag="SPEC") + estimator.set_alt_id(model_settings.ALT_DEST_COL_NAME) estimator.write_table( state.get_injectable("size_terms"), "size_terms", append=False ) diff --git a/activitysim/abm/models/atwork_subtour_frequency.py b/activitysim/abm/models/atwork_subtour_frequency.py index 7bbee371f..e574ddc17 100644 --- a/activitysim/abm/models/atwork_subtour_frequency.py +++ b/activitysim/abm/models/atwork_subtour_frequency.py @@ -16,6 +16,8 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger(__name__) @@ -26,11 +28,23 @@ def add_null_results(state, trace_label, tours): state.add_table("tours", tours) +class AtworkSubtourFrequencySettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `atwork_subtour_frequency` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + @workflow.step def atwork_subtour_frequency( state: workflow.State, tours: pd.DataFrame, persons_merged: pd.DataFrame, + model_settings: AtworkSubtourFrequencySettings | None = None, + model_settings_file_name: str = "atwork_subtour_frequency.yaml", + trace_label: str = "atwork_subtour_frequency", ) -> None: """ This model predicts the frequency of making at-work subtour tours @@ -38,8 +52,6 @@ def atwork_subtour_frequency( configured by the user). """ - trace_label = "atwork_subtour_frequency" - model_settings_file_name = "atwork_subtour_frequency.yaml" trace_hh_id = state.settings.trace_hh_id work_tours = tours[tours.tour_type == "work"] @@ -48,10 +60,15 @@ def atwork_subtour_frequency( add_null_results(state, trace_label, tours) return - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = AtworkSubtourFrequencySettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + estimator = estimation.manager.begin_estimation(state, "atwork_subtour_frequency") - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator @@ -72,7 +89,7 @@ def atwork_subtour_frequency( constants = config.get_model_constants(model_settings) # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: expressions.assign_columns( state, diff --git a/activitysim/abm/models/atwork_subtour_mode_choice.py b/activitysim/abm/models/atwork_subtour_mode_choice.py index a989572c9..61972f191 100644 --- a/activitysim/abm/models/atwork_subtour_mode_choice.py +++ b/activitysim/abm/models/atwork_subtour_mode_choice.py @@ -9,6 +9,7 @@ from activitysim.abm.models.util.mode import run_tour_mode_choice_simulate from activitysim.core import config, estimation, expressions, los, tracing, workflow +from activitysim.core.configuration.logit import TourModeComponentSettings from activitysim.core.util import assign_in_place logger = logging.getLogger(__name__) @@ -20,19 +21,23 @@ def atwork_subtour_mode_choice( tours: pd.DataFrame, persons_merged: pd.DataFrame, network_los: los.Network_LOS, + model_settings: TourModeComponentSettings | None = None, + model_settings_file_name: str = "tour_mode_choice.yaml", + trace_label: str = "atwork_subtour_mode_choice", ) -> None: """ At-work subtour mode choice simulate """ - trace_label = "atwork_subtour_mode_choice" - trace_hh_id = state.settings.trace_hh_id - model_settings_file_name = "tour_mode_choice.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = TourModeComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME") + logsum_column_name = model_settings.MODE_CHOICE_LOGSUM_COLUMN_NAME mode_column_name = "tour_mode" subtours = tours[tours.tour_category == "atwork"] @@ -57,7 +62,7 @@ def atwork_subtour_mode_choice( ) constants = {} - constants.update(config.get_model_constants(model_settings)) + constants.update(model_settings.CONSTANTS) skim_dict = network_los.get_default_skim_dict() @@ -149,7 +154,7 @@ def atwork_subtour_mode_choice( # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: - tvpb_mode_path_types = model_settings.get("tvpb_mode_path_types") + tvpb_mode_path_types = model_settings.tvpb_mode_path_types for mode, path_types in tvpb_mode_path_types.items(): for direction, skim in zip( ["od", "do"], [tvpb_logsum_odt, tvpb_logsum_dot] @@ -189,12 +194,12 @@ def atwork_subtour_mode_choice( state.add_table("tours", tours) # - annotate tours table - if model_settings.get("annotate_tours"): + if model_settings.annotate_tours: tours = state.get_dataframe("tours") expressions.assign_columns( state, df=tours, - model_settings=model_settings.get("annotate_tours"), + model_settings=model_settings.annotate_tours, trace_label=tracing.extend_trace_label(trace_label, "annotate_tours"), ) state.add_table("tours", tours) diff --git a/activitysim/abm/models/atwork_subtour_scheduling.py b/activitysim/abm/models/atwork_subtour_scheduling.py index f6c71351d..8057d2c43 100644 --- a/activitysim/abm/models/atwork_subtour_scheduling.py +++ b/activitysim/abm/models/atwork_subtour_scheduling.py @@ -8,11 +8,15 @@ import pandas as pd from activitysim.abm.models.util.vectorize_tour_scheduling import ( + TourSchedulingSettings, vectorize_subtour_scheduling, ) from activitysim.core import config, estimation, expressions, simulate from activitysim.core import timetable as tt from activitysim.core import tracing, workflow +from activitysim.core.configuration.base import PydanticReadable +from activitysim.core.skim_dataset import SkimDataset +from activitysim.core.skim_dictionary import SkimDict from activitysim.core.util import assign_in_place logger = logging.getLogger(__name__) @@ -26,14 +30,15 @@ def atwork_subtour_scheduling( tours: pd.DataFrame, persons_merged: pd.DataFrame, tdd_alts: pd.DataFrame, - skim_dict, + skim_dict: SkimDict | SkimDataset, + model_settings: TourSchedulingSettings | None = None, + model_settings_file_name: str = "tour_scheduling_atwork.yaml", + trace_label: str = "atwork_subtour_scheduling", ) -> None: """ This model predicts the departure time and duration of each activity for at work subtours tours """ - trace_label = "atwork_subtour_scheduling" - model_settings_file_name = "tour_scheduling_atwork.yaml" trace_hh_id = state.settings.trace_hh_id subtours = tours[tours.tour_category == "atwork"] @@ -42,11 +47,16 @@ def atwork_subtour_scheduling( tracing.no_results(trace_label) return - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = TourSchedulingSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + estimator = estimation.manager.begin_estimation(state, "atwork_subtour_scheduling") - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) - sharrow_skip = model_settings.get("sharrow_skip") + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) + sharrow_skip = model_settings.sharrow_skip coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py index ba18a09df..16b3141e9 100644 --- a/activitysim/abm/models/auto_ownership.py +++ b/activitysim/abm/models/auto_ownership.py @@ -5,29 +5,46 @@ import logging import pandas as pd +from pydantic import validator from activitysim.core import config, estimation, simulate, tracing, workflow +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger(__name__) +class AutoOwnershipSettings(LogitComponentSettings): + """ + Settings for the `auto_ownership` component. + """ + + # This model is relatively simple and has no unique settings + + @workflow.step def auto_ownership_simulate( state: workflow.State, households: pd.DataFrame, households_merged: pd.DataFrame, + model_settings: AutoOwnershipSettings | None = None, + model_settings_file_name: str = "auto_ownership.yaml", + trace_label: str = "auto_ownership_simulate", + trace_hh_id: bool = False, ) -> None: """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ - trace_label = "auto_ownership_simulate" - model_settings_file_name = "auto_ownership.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) - trace_hh_id = state.settings.trace_hh_id + + if model_settings is None: + model_settings = AutoOwnershipSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) estimator = estimation.manager.begin_estimation(state, "auto_ownership") - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index f4d148a41..d9449f3a3 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -3,6 +3,8 @@ from __future__ import annotations import logging +from pathlib import Path +from typing import Any import pandas as pd @@ -15,17 +17,34 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.util import reindex logger = logging.getLogger(__name__) +class CdapSettings(PydanticReadable, extra="forbid"): + PERSON_TYPE_MAP: dict[str, list[int]] + INDIV_AND_HHSIZE1_SPEC: str + INTERACTION_COEFFICIENTS: str = "cdap_interaction_coefficients.csv" + FIXED_RELATIVE_PROPORTIONS_SPEC: str = "cdap_fixed_relative_proportions.csv" + ADD_JOINT_TOUR_UTILITY: bool = False + JOINT_TOUR_COEFFICIENTS: str = "cdap_joint_tour_coefficients.csv" + annotate_persons: PreprocessorSettings | None = None + annotate_households: PreprocessorSettings | None = None + COEFFICIENTS: Path + CONSTANTS: dict[str, Any] = {} + + @workflow.step def cdap_simulate( state: workflow.State, persons_merged: pd.DataFrame, persons: pd.DataFrame, households: pd.DataFrame, + model_settings: CdapSettings | None = None, + model_settings_file_name: str = "cdap.yaml", + trace_label: str = "cdap", ) -> None: """ CDAP stands for Coordinated Daily Activity Pattern, which is a choice of @@ -36,18 +55,16 @@ def cdap_simulate( routines in the cdap directory of activitysim for this purpose. This module simply applies those utilities using the simulation framework. """ - - trace_label = "cdap" - model_settings = state.filesystem.read_model_settings("cdap.yaml") + if model_settings is None: + model_settings = CdapSettings.read_settings_file( + state.filesystem, model_settings_file_name + ) trace_hh_id = state.settings.trace_hh_id - person_type_map = model_settings.get("PERSON_TYPE_MAP", None) - assert ( - person_type_map is not None - ), "Expected to find PERSON_TYPE_MAP setting in cdap.yaml" + person_type_map = model_settings.PERSON_TYPE_MAP estimator = estimation.manager.begin_estimation(state, "cdap") cdap_indiv_spec = state.filesystem.read_model_spec( - file_name=model_settings["INDIV_AND_HHSIZE1_SPEC"] + file_name=model_settings.INDIV_AND_HHSIZE1_SPEC ) coefficients_df = state.filesystem.read_model_coefficients(model_settings) @@ -56,9 +73,7 @@ def cdap_simulate( ) # Rules and coefficients for generating interaction specs for different household sizes - interaction_coefficients_file_name = model_settings.get( - "INTERACTION_COEFFICIENTS", "cdap_interaction_coefficients.csv" - ) + interaction_coefficients_file_name = model_settings.INTERACTION_COEFFICIENTS cdap_interaction_coefficients = pd.read_csv( state.filesystem.get_config_file_path(interaction_coefficients_file_name), comment="#", @@ -90,16 +105,14 @@ def cdap_simulate( (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0) """ cdap_fixed_relative_proportions = state.filesystem.read_model_spec( - file_name=model_settings["FIXED_RELATIVE_PROPORTIONS_SPEC"] + file_name=model_settings.FIXED_RELATIVE_PROPORTIONS_SPEC ) - add_joint_tour_utility = model_settings.get("ADD_JOINT_TOUR_UTILITY", False) + add_joint_tour_utility = model_settings.ADD_JOINT_TOUR_UTILITY if add_joint_tour_utility: # Rules and coefficients for generating cdap joint tour specs for different household sizes - joint_tour_coefficients_file_name = model_settings.get( - "JOINT_TOUR_COEFFICIENTS", "cdap_joint_tour_coefficients.csv" - ) + joint_tour_coefficients_file_name = model_settings.JOINT_TOUR_COEFFICIENTS cdap_joint_tour_coefficients = pd.read_csv( state.filesystem.get_config_file_path(joint_tour_coefficients_file_name), comment="#", @@ -211,7 +224,7 @@ def cdap_simulate( expressions.assign_columns( state, df=persons, - model_settings=model_settings.get("annotate_persons"), + model_settings=model_settings.annotate_persons, trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), ) @@ -225,7 +238,7 @@ def cdap_simulate( expressions.assign_columns( state, df=households, - model_settings=model_settings.get("annotate_households"), + model_settings=model_settings.annotate_households, trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), ) state.add_table("households", households) diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index 856c4e1b4..ab4f9acef 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -5,6 +5,7 @@ import logging import random from functools import reduce +from typing import Any, Literal import numpy as np import pandas as pd @@ -14,35 +15,173 @@ from activitysim.abm.models.util import tour_destination from activitysim.abm.tables import shadow_pricing from activitysim.core import estimation, los, tracing, util, workflow +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.expressions import assign_columns logger = logging.getLogger(__name__) -def read_disaggregate_accessibility_yaml(state: workflow.State, file_name): +class DisaggregateAccessibilitySuffixes(PydanticReadable): + SUFFIX: str = "proto_" + ROOTS: list[str] = [ + "persons", + "households", + "tours", + "persons_merged", + "person_id", + "household_id", + "tour_id", + ] + + +class DisaggregateAccessibilityTableSettings(PydanticReadable, extra="forbid"): + index_col: str | None = None + zone_col: str | None = None + rename_columns: dict[str, str] = {} + VARIABLES: dict[str, int | list[int]] + """ + Base value(s) for each variable. + + Results in the cartesian product (all non-repeating combinations) of the + fields. + """ + + mapped_fields: dict[str, dict] = {} + """ + Maps variables to the fields generated in VARIABLES. + + For non-combinatorial fields, users can map a variable to the fields generated + in VARIABLES (e.g., income category bins mapped to median dollar values). + """ + + filter_rows: list[str] = [] + """ + filter rows using pandas expressions. + + Users can also filter rows using these expressions if specific variable + combinations are not desired. + """ + + JOIN_ON: Any = None + """ + The persons variable to join the tours to (e.g., person_number). + This is required only for PROTO_TOURS + """ + + +class DisaggregateAccessibilityAnnotateSettings(PydanticReadable, extra="forbid"): + tablename: str + annotate: PreprocessorSettings + + +class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"): + suffixes: DisaggregateAccessibilitySuffixes = DisaggregateAccessibilitySuffixes() + ORIGIN_SAMPLE_SIZE: float | int = 0 + """ + The number of sampled origins where logsum is calculated. + + Setting this to zero implies sampling all zones. + + Origins without a logsum will draw from the nearest zone with a logsum. This + parameter is useful for systems with a large number of zones with similar + accessibility. Fractional values less than 1 will be interpreted as a percentage, + e.g., 0.5 = 50% sample. + """ + DESTINATION_SAMPLE_SIZE: float | int = 0 + """ + Number of destination zone alternatives sampled for calculating the destination logsum. + + Setting this to zero implies sampling all zones. + + Decimal values < 1 will be interpreted as a percentage, e.g., 0.5 = 50% sample. + """ + + BASE_RANDOM_SEED: int = 0 + add_size_tables: bool = True + zone_id_names: dict[str, str] = {"index_col": "zone_id"} + ORIGIN_SAMPLE_METHOD: Literal[ + None, "full", "uniform", "uniform-taz", "kmeans" + ] = None + """ + The method in which origins are sampled. + + Population weighted sampling can be TAZ-based or "TAZ-agnostic" using KMeans + clustering. The potential advantage of KMeans is to provide a more geographically + even spread of MAZs sampled that do not rely on TAZ hierarchies. Unweighted + sampling is also possible using 'uniform' and 'uniform-taz'. + + - None [Default] - Sample zones weighted by population, ensuring at least + one TAZ is sampled per MAZ. If n-samples > n-tazs then sample 1 MAZ from + each TAZ until n-remaining-samples < n-tazs, then sample n-remaining-samples + TAZs and sample an MAZ within each of those TAZs. If n-samples < n-tazs, then + it proceeds to the above 'then' condition. + + - "kmeans" - K-Means clustering is performed on the zone centroids (must be + provided as maz_centroids.csv), weighted by population. The clustering yields + k XY coordinates weighted by zone population for n-samples = k-clusters + specified. Once k new cluster centroids are found, these are then approximated + into the nearest available zone centroid and used to calculate accessibilities + on. By default, the k-means method is run on 10 different initial cluster + seeds (n_init) using using [k-means++ seeding algorithm](https://en.wikipedia.org/wiki/K-means%2B%2B). + The k-means method runs for max_iter iterations (default=300). + + - "uniform" - Unweighted sample of N zones independent of each other. + + - "uniform-taz" - Unweighted sample of 1 zone per taz up to the N samples + specified. + """ + + ORIGIN_WEIGHTING_COLUMN: str + CREATE_TABLES: dict[str, DisaggregateAccessibilityTableSettings | str] = {} + MERGE_ON: dict[str, list[str]] + """ + Field to merge the proto-population logsums onto the full synthetic population/ + + The proto-population should be designed such that the logsums are able to be + joined exactly on these variables specified to the full population. + Users specify the to join on using: + + - by: An exact merge will be attempted using these discrete variables. + - asof [optional]: The model can peform an "asof" join for continuous variables, + which finds the nearest value. This method should not be necessary since + synthetic populations are all discrete. + - method [optional]: Optional join method can be "soft", default is None. For + cases where a full inner join is not possible, a Naive Bayes clustering method + is fast but discretely constrained method. The proto-population is treated as + the "training data" to match the synthetic population value to the best possible + proto-population candidate. The Some refinement may be necessary to make this + procedure work. + """ + + FROM_TEMPLATES: bool = False + annotate_proto_tables: list[DisaggregateAccessibilityAnnotateSettings] = [] + """ + Allows modification of the proto-population. + + Annotation configurations are available here, if users wish to modify the + proto-population beyond basic generation in the YAML. + """ + NEAREST_METHOD: str = "skims" + + +def read_disaggregate_accessibility_yaml( + state: workflow.State, file_name +) -> DisaggregateAccessibilitySettings: """ Adds in default table suffixes 'proto_' if not defined in the settings file """ - model_settings = state.filesystem.read_model_settings(file_name) - if not model_settings.get("suffixes"): - model_settings["suffixes"] = { - "SUFFIX": "proto_", - "ROOTS": [ - "persons", - "households", - "tours", - "persons_merged", - "person_id", - "household_id", - "tour_id", - ], - } + model_settings = DisaggregateAccessibilitySettings.read_settings_file( + state.filesystem, file_name + ) # Convert decimal sample rate to integer sample size for sample in ["ORIGIN_SAMPLE_SIZE", "DESTINATION_SAMPLE_SIZE"]: - size = model_settings.get(sample, 0) + size = getattr(model_settings, sample) if size > 0 and size < 1: - model_settings[sample] = round( - size * len(state.get_dataframe("land_use").index) + setattr( + model_settings, + sample, + round(size * len(state.get_dataframe("land_use").index)), ) return model_settings @@ -65,19 +204,16 @@ def __init__(self, state: workflow.State, network_los, chunk_size): ) # Random seed - self.seed = self.model_settings.get("BASE_RANDOM_SEED", 0) + len( - self.land_use.index - ) + self.seed = self.model_settings.BASE_RANDOM_SEED + len(self.land_use.index) # Generation self.params = self.read_table_settings() self.create_proto_pop() logger.info( - "Created a proto-population with %s households across %s origin zones to %s possible destination zones" - % ( + "Created a proto-population with {} households across {} origin zones to {} possible destination zones".format( len(self.proto_pop["proto_households"]), len(self.proto_pop["proto_households"].home_zone_id.unique()), - self.model_settings["DESTINATION_SAMPLE_SIZE"], + self.model_settings.DESTINATION_SAMPLE_SIZE, ) ) self.inject_tables(state) @@ -87,11 +223,11 @@ def __init__(self, state: workflow.State, network_los, chunk_size): # - initialize shadow_pricing size tables after annotating household and person tables # since these are scaled to model size, they have to be created while single-process # this can now be called as a standalone model step instead, add_size_tables - add_size_tables = self.model_settings.get("add_size_tables", True) + add_size_tables = self.model_settings.add_size_tables if add_size_tables: # warnings.warn(f"Calling add_size_tables from initialize will be removed in the future.", FutureWarning) shadow_pricing.add_size_tables( - state, self.model_settings.get("suffixes"), scale=False + state, self.model_settings.suffixes.dict(), scale=False ) def zone_sampler(self): @@ -130,14 +266,14 @@ def zone_sampler(self): # default_zone_col = 'TAZ' if not (self.network_los.zone_system == los.ONE_ZONE) else 'zone_id' # zone_cols = self.model_settings["zone_id_names"].get("zone_group_cols", default_zone_col) - id_col = self.model_settings["zone_id_names"].get("index_col", "zone_id") - method = self.model_settings.get("ORIGIN_SAMPLE_METHOD") - n_samples = self.model_settings.get("ORIGIN_SAMPLE_SIZE", 0) + id_col = self.model_settings.zone_id_names.get("index_col", "zone_id") + method = self.model_settings.ORIGIN_SAMPLE_METHOD + n_samples = int(self.model_settings.ORIGIN_SAMPLE_SIZE) # Get weights, need to get households first to get persons merged. # Note: This will cause empty zones to be excluded. Which is intended, but just know that. zone_weights = self.land_use[ - self.model_settings["ORIGIN_WEIGHTING_COLUMN"] + self.model_settings.ORIGIN_WEIGHTING_COLUMN ].to_frame("weight") zone_weights = zone_weights[zone_weights.weight != 0] @@ -176,12 +312,12 @@ def zone_sampler(self): # Join the land_use pop on centroids, # this also filter only zones we need (relevant if running scaled model) centroids_df = centroids_df.join( - self.land_use[self.model_settings["ORIGIN_WEIGHTING_COLUMN"]], + self.land_use[self.model_settings.ORIGIN_WEIGHTING_COLUMN], how="inner", ) xy_list = list(centroids_df[["X", "Y"]].itertuples(index=False, name=None)) xy_weights = np.array( - centroids_df[self.model_settings["ORIGIN_WEIGHTING_COLUMN"]] + centroids_df[self.model_settings.ORIGIN_WEIGHTING_COLUMN] ) # Initializer k-means class @@ -302,14 +438,9 @@ def zone_sampler(self): def read_table_settings(self): # Check if setup properly - assert "CREATE_TABLES" in self.model_settings.keys() - # Set zone_id name if not already specified - self.model_settings["zone_id_names"] = self.model_settings.get( - "zone_id_names", {"index_col": "zone_id"} - ) - create_tables = self.model_settings.get("CREATE_TABLES") - from_templates = self.model_settings.get("FROM_TEMPLATES", False) + create_tables = self.model_settings.CREATE_TABLES + from_templates = self.model_settings.FROM_TEMPLATES zone_list = self.zone_sampler() params = {} @@ -330,21 +461,26 @@ def read_table_settings(self): params["proto_households"]["zone_col"] = "home_zone_id" else: assert all( - [True for k, v in create_tables.items() if "VARIABLES" in v.keys()] + [ + True + for k, v in create_tables.items() + if isinstance(v, DisaggregateAccessibilityTableSettings) + ] ) for name, table in create_tables.items(): + assert isinstance(table, DisaggregateAccessibilityTableSettings) # Ensure table variables are all lists params[name.lower()] = { "variables": { k: (v if isinstance(v, list) else [v]) - for k, v in table["VARIABLES"].items() + for k, v in table.VARIABLES.items() }, - "mapped": table.get("mapped_fields", []), - "filter": table.get("filter_rows", []), - "join_on": table.get("JOIN_ON", []), - "index_col": table.get("index_col", []), - "zone_col": table.get("zone_col", []), - "rename_columns": table.get("rename_columns", []), + "mapped": table.mapped_fields, + "filter": table.filter_rows, + "join_on": table.JOIN_ON, + "index_col": table.index_col, + "zone_col": table.zone_col, + "rename_columns": table.rename_columns, } # Add zones to households dicts as vary_on variable @@ -457,14 +593,14 @@ def create_proto_pop(self): klist = ["proto_households", "proto_persons", "proto_tours"] # Create ID columns, defaults to "%tablename%_id" - hhid, perid, tourid = [ + hhid, perid, tourid = ( self.params[x]["index_col"] if len(self.params[x]["index_col"]) > 0 else x + "_id" for x in klist - ] + ) - if self.model_settings.get("FROM_TEMPLATES"): + if self.model_settings.FROM_TEMPLATES: table_params = {k: self.params.get(k) for k in klist} tables = { k: pd.read_csv( @@ -475,7 +611,7 @@ def create_proto_pop(self): households, persons, tours = self.expand_template_zones(tables) households["household_serial_no"] = households[hhid] else: - households, persons, tours = [self.generate_replicates(k) for k in klist] + households, persons, tours = (self.generate_replicates(k) for k in klist) # Names households.name, persons.name, tours.name = klist @@ -534,17 +670,17 @@ def inject_tables(self, state: workflow.State): def annotate_tables(self, state: workflow.State): # Extract annotations - for annotations in self.model_settings["annotate_proto_tables"]: - tablename = annotations["tablename"] + for annot in self.model_settings.annotate_proto_tables: + tablename = annot.tablename df = self.state.get_dataframe(tablename) assert df is not None - assert annotations is not None + assert annot is not None assign_columns( state, df=df, model_settings={ - **annotations["annotate"], - **self.model_settings["suffixes"], + **annot.annotate.dict(), + **self.model_settings.suffixes.dict(), }, trace_label=tracing.extend_trace_label("ProtoPop.annotate", tablename), ) @@ -563,7 +699,7 @@ def merge_persons(self): ).merge( self.land_use, left_on=self.params["proto_households"]["zone_col"], - right_on=self.model_settings["zone_id_names"]["index_col"], + right_on=self.model_settings.zone_id_names["index_col"], ) perid = self.params["proto_persons"]["index_col"] @@ -575,7 +711,7 @@ def merge_persons(self): def get_disaggregate_logsums( - state: workflow.State, network_los, chunk_size, trace_hh_id + state: workflow.State, network_los: los.Network_LOS, chunk_size: int, trace_hh_id ): logsums = {} persons_merged = state.get_dataframe("proto_persons_merged").sort_index( @@ -591,11 +727,12 @@ def get_disaggregate_logsums( "non_mandatory_tour_destination", ]: trace_label = tracing.extend_trace_label(model_name, "accessibilities") - print("Running model {}".format(trace_label)) - model_settings = state.filesystem.read_model_settings(model_name + ".yaml") - model_settings["SAMPLE_SIZE"] = disagg_model_settings.get( - "DESTINATION_SAMPLE_SIZE" + print(f"Running model {trace_label}") + + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, model_name + ".yaml" ) + model_settings.SAMPLE_SIZE = disagg_model_settings.DESTINATION_SAMPLE_SIZE estimator = estimation.manager.begin_estimation(state, trace_label) if estimator: location_choice.write_estimation_specs( @@ -605,20 +742,20 @@ def get_disaggregate_logsums( # Append table references in settings with "proto_" # This avoids having to make duplicate copies of config files for disagg accessibilities model_settings = util.suffix_tables_in_settings(model_settings) - model_settings["CHOOSER_ID_COLUMN"] = "proto_person_id" + model_settings.CHOOSER_ID_COLUMN = "proto_person_id" # Include the suffix tags to pass onto downstream logsum models (e.g., tour mode choice) - if model_settings.get("LOGSUM_SETTINGS", None): - suffixes = util.concat_suffix_dict(disagg_model_settings.get("suffixes")) - suffixes.insert(0, model_settings.get("LOGSUM_SETTINGS")) - model_settings["LOGSUM_SETTINGS"] = " ".join(suffixes) + if model_settings.LOGSUM_SETTINGS: + suffixes = util.concat_suffix_dict(disagg_model_settings.suffixes) + suffixes.insert(0, str(model_settings.LOGSUM_SETTINGS)) + model_settings.LOGSUM_SETTINGS = " ".join(suffixes) if model_name != "non_mandatory_tour_destination": spc = shadow_pricing.load_shadow_price_calculator(state, model_settings) # explicitly turning off shadow pricing for disaggregate accessibilities spc.use_shadow_pricing = False # filter to only workers or students - chooser_filter_column = model_settings["CHOOSER_FILTER_COLUMN_NAME"] + chooser_filter_column = model_settings.CHOOSER_FILTER_COLUMN_NAME choosers = persons_merged[persons_merged[chooser_filter_column]] # run location choice and return logsums diff --git a/activitysim/abm/models/free_parking.py b/activitysim/abm/models/free_parking.py index 4db76ff20..97b70ade3 100644 --- a/activitysim/abm/models/free_parking.py +++ b/activitysim/abm/models/free_parking.py @@ -3,9 +3,9 @@ from __future__ import annotations import logging -from typing import Any import pandas as pd +from pydantic import validator from activitysim.core import ( config, @@ -15,17 +15,31 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger(__name__) +class FreeParkingSettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `free_parking` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + FREE_PARKING_ALT: int + """The code for free parking.""" + + @workflow.step def free_parking( state: workflow.State, persons_merged: pd.DataFrame, persons: pd.DataFrame, + model_settings: FreeParkingSettings | None = None, model_settings_file_name: str = "free_parking.yaml", - model_settings: dict[str, Any] = workflow.from_yaml("free_parking.yaml"), trace_label: str = "free_parking", ) -> None: """ @@ -35,17 +49,26 @@ def free_parking( ---------- state : workflow.State persons_merged : DataFrame + This represents the 'choosers' table for this component. persons : DataFrame - model_settings_file_name : str - This filename is used to write settings files in estimation mode. - model_settings : dict - The settings used in this model component. - trace_label : str - - Returns - ------- - + The original persons table is referenced so the free parking column + can be appended to it. + model_settings : FreeParkingSettings, optional + The settings used in this model component. If not provided, they are + loaded out of the configs directory YAML file referenced by + the `model_settings_file_name` argument. + model_settings_file_name : str, default "free_parking.yaml" + This is where model setting are found if `model_settings` is not given + explicitly. The same filename is also used to write settings files to + the estimation data bundle in estimation mode. + trace_label : str, default "free_parking" + This label is used for various tracing purposes. """ + if model_settings is None: + model_settings = FreeParkingSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) choosers = pd.DataFrame(persons_merged) choosers = choosers[choosers.workplace_zone_id > -1] @@ -53,10 +76,10 @@ def free_parking( estimator = estimation.manager.begin_estimation(state, "free_parking") - constants = config.get_model_constants(model_settings) + constants = model_settings.CONSTANTS or {} # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_d = {} if constants is not None: @@ -70,7 +93,7 @@ def free_parking( trace_label=trace_label, ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator @@ -80,8 +103,10 @@ def free_parking( if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) - estimator.write_spec(model_settings) - estimator.write_coefficients(coefficients_df, model_settings) + estimator.write_spec(file_name=model_settings.SPEC) + estimator.write_coefficients( + coefficients_df, file_name=model_settings.COEFFICIENTS + ) estimator.write_choosers(choosers) choices = simulate.simple_simulate( @@ -95,7 +120,7 @@ def free_parking( estimator=estimator, ) - free_parking_alt = model_settings["FREE_PARKING_ALT"] + free_parking_alt = model_settings.FREE_PARKING_ALT choices = choices == free_parking_alt if estimator: diff --git a/activitysim/abm/models/initialize.py b/activitysim/abm/models/initialize.py index dc83fef42..8e0c758c0 100644 --- a/activitysim/abm/models/initialize.py +++ b/activitysim/abm/models/initialize.py @@ -5,9 +5,12 @@ import logging import os import warnings +from typing import Any from activitysim.abm.tables import disaggregate_accessibility, shadow_pricing from activitysim.core import chunk, expressions, tracing, workflow +from activitysim.core.configuration.base import PydanticReadable +from activitysim.core.configuration.logit import PreprocessorSettings # We are using the naming conventions in the mtc_asim.h5 example # file for our default list. This provides backwards compatibility @@ -32,7 +35,7 @@ def annotate_tables(state: workflow.State, model_settings, trace_label, chunk_si Parameters ---------- state : workflow.State - model_settings : + model_settings : PydanticReadable trace_label : str chunk_sizer : ChunkSizer @@ -45,7 +48,8 @@ def annotate_tables(state: workflow.State, model_settings, trace_label, chunk_si chunk_sizer.log_rss(trace_label) - annotate_tables = model_settings.get("annotate_tables", []) + annotate_tables = model_settings.annotate_tables + print(annotate_tables) if not annotate_tables: logger.warning( @@ -59,7 +63,7 @@ def annotate_tables(state: workflow.State, model_settings, trace_label, chunk_si t0 = tracing.print_elapsed_time() for table_info in annotate_tables: - tablename = table_info["tablename"] + tablename = table_info.tablename chunk_sizer.log_rss(f"{trace_label}.pre-get_table.{tablename}") @@ -67,7 +71,7 @@ def annotate_tables(state: workflow.State, model_settings, trace_label, chunk_si chunk_sizer.log_df(trace_label, tablename, df) # - rename columns - column_map = table_info.get("column_map", None) + column_map = table_info.column_map if column_map: warnings.warn( f"Setting 'column_map' has been changed to 'rename_columns'. " @@ -79,11 +83,9 @@ def annotate_tables(state: workflow.State, model_settings, trace_label, chunk_si df.rename(columns=column_map, inplace=True) # - annotate - annotate = table_info.get("annotate", None) + annotate = table_info.annotate if annotate: - logger.info( - f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}" - ) + logger.info(f"{trace_label} - annotating {tablename} SPEC {annotate.SPEC}") expressions.assign_columns( state, df=df, model_settings=annotate, trace_label=trace_label ) @@ -97,26 +99,41 @@ def annotate_tables(state: workflow.State, model_settings, trace_label, chunk_si chunk_sizer.log_df(trace_label, tablename, None) +class AnnotateTableSettings(PydanticReadable): + tablename: str + annotate: PreprocessorSettings + column_map: dict[str, str] | None = None + + +class InitializeTableSettings(PydanticReadable): + """ + Settings for the `initialize_landuse` component. + """ + + annotate_tables: list[AnnotateTableSettings] = [] + + @workflow.step -def initialize_landuse(state: workflow.State) -> None: +def initialize_landuse( + state: workflow.State, + model_settings: InitializeTableSettings | None = None, + model_settings_file_name: str = "initialize_landuse.yaml", + trace_label: str = "initialize_landuse", +) -> None: """ Initialize the land use table. Parameters ---------- state : State - - Returns - ------- - ? """ - trace_label = "initialize_landuse" - settings_filename = "initialize_landuse.yaml" - with chunk.chunk_log(state, trace_label, base=True) as chunk_sizer: - model_settings = state.filesystem.read_settings_file( - settings_filename, mandatory=True - ) + if model_settings is None: + model_settings = InitializeTableSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + mandatory=True, + ) annotate_tables(state, model_settings, trace_label, chunk_sizer) @@ -126,8 +143,11 @@ def initialize_landuse(state: workflow.State) -> None: @workflow.step -def initialize_households(state: workflow.State) -> None: - trace_label = "initialize_households" +def initialize_households( + state: workflow.State, + model_settings_file_name: str = "initialize_households.yaml", + trace_label: str = "initialize_households", +) -> None: with chunk.chunk_log(state, trace_label, base=True) as chunk_sizer: chunk_sizer.log_rss(f"{trace_label}.inside-yield") @@ -144,19 +164,19 @@ def initialize_households(state: workflow.State) -> None: del persons chunk_sizer.log_df(trace_label, "persons", None) - model_settings = state.filesystem.read_settings_file( - "initialize_households.yaml", mandatory=True + model_settings = InitializeTableSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + mandatory=True, ) annotate_tables(state, model_settings, trace_label, chunk_sizer) # - initialize shadow_pricing size tables after annotating household and person tables # since these are scaled to model size, they have to be created while single-process # this can now be called as a stand alone model step instead, add_size_tables - add_size_tables = model_settings.get("add_size_tables", True) - if add_size_tables: - # warnings.warn(f"Calling add_size_tables from initialize will be removed in the future.", FutureWarning) - suffixes = disaggregate_accessibility.disaggregate_suffixes(state) - shadow_pricing.add_size_tables(state, suffixes) + # warnings.warn(f"Calling add_size_tables from initialize will be removed in the future.", FutureWarning) + suffixes = disaggregate_accessibility.disaggregate_suffixes(state) + shadow_pricing.add_size_tables(state, suffixes) # - preload person_windows person_windows = state.get_dataframe("person_windows") diff --git a/activitysim/abm/models/initialize_tours.py b/activitysim/abm/models/initialize_tours.py index 7cd416a89..da69e8d22 100644 --- a/activitysim/abm/models/initialize_tours.py +++ b/activitysim/abm/models/initialize_tours.py @@ -8,6 +8,8 @@ from activitysim.abm.models.util import tour_frequency as tf from activitysim.core import expressions, tracing, workflow +from activitysim.core.configuration import PydanticReadable +from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.input import read_input_table logger = logging.getLogger(__name__) @@ -76,6 +78,14 @@ def set_tour_index(state: workflow.State, tours, parent_tour_num_col, is_joint): return patched_tours +class InitializeToursSettings(PydanticReadable): + annotate_tours: PreprocessorSettings | None = None + """Preprocessor settings to annotate tours""" + + skip_patch_tour_ids: bool = False + """Skip patching tour_ids""" + + @workflow.step def initialize_tours( state: workflow.State, @@ -96,17 +106,17 @@ def initialize_tours( tours = tours[tours.person_id.isin(persons.index)] # annotate before patching tour_id to allow addition of REQUIRED_TOUR_COLUMNS defined above - model_settings = state.filesystem.read_model_settings( - "initialize_tours.yaml", mandatory=True + model_settings = InitializeToursSettings.read_settings_file( + state.filesystem, "initialize_tours.yaml", mandatory=True ) expressions.assign_columns( state, df=tours, - model_settings=model_settings.get("annotate_tours"), + model_settings=model_settings.annotate_tours, trace_label=tracing.extend_trace_label(trace_label, "annotate_tours"), ) - skip_patch_tour_ids = model_settings.get("skip_patch_tour_ids", False) + skip_patch_tour_ids = model_settings.skip_patch_tour_ids if skip_patch_tour_ids: pass else: diff --git a/activitysim/abm/models/joint_tour_composition.py b/activitysim/abm/models/joint_tour_composition.py index 50c458e7a..df0b944f4 100644 --- a/activitysim/abm/models/joint_tour_composition.py +++ b/activitysim/abm/models/joint_tour_composition.py @@ -15,6 +15,8 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger(__name__) @@ -25,27 +27,42 @@ def add_null_results(state, trace_label, tours): state.add_table("tours", tours) +class JointTourCompositionSettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `joint_tour_composition` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + @workflow.step def joint_tour_composition( state: workflow.State, tours: pd.DataFrame, households: pd.DataFrame, persons: pd.DataFrame, + model_settings: JointTourCompositionSettings | None = None, + model_settings_file_name: str = "joint_tour_composition.yaml", + trace_label: str = "joint_tour_composition", ) -> None: """ This model predicts the makeup of the travel party (adults, children, or mixed). """ - trace_label = "joint_tour_composition" - model_settings_file_name = "joint_tour_composition.yaml" joint_tours = tours[tours.tour_category == "joint"] + if model_settings is None: + model_settings = JointTourCompositionSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(state, trace_label, tours) return - model_settings = state.filesystem.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation(state, "joint_tour_composition") # - only interested in households with joint_tours @@ -58,7 +75,7 @@ def joint_tour_composition( ) # - run preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_dict = { "persons": persons, @@ -78,7 +95,7 @@ def joint_tour_composition( ) # - simple_simulate - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py index d94658b78..cb4e2756e 100644 --- a/activitysim/abm/models/joint_tour_destination.py +++ b/activitysim/abm/models/joint_tour_destination.py @@ -8,6 +8,7 @@ from activitysim.abm.models.util import tour_destination from activitysim.core import estimation, los, tracing, workflow +from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place logger = logging.getLogger(__name__) @@ -19,22 +20,27 @@ def joint_tour_destination( tours: pd.DataFrame, persons_merged: pd.DataFrame, network_los: los.Network_LOS, + model_settings: TourLocationComponentSettings | None = None, + model_settings_file_name: str = "joint_tour_destination.yaml", + trace_label: str = "joint_tour_destination", ) -> None: """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ + if model_settings is None: + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - trace_label = "joint_tour_destination" - model_settings_file_name = "joint_tour_destination.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) trace_hh_id = state.settings.trace_hh_id - logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME want_logsums = logsum_column_name is not None - sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME want_sample_table = ( state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) @@ -52,7 +58,7 @@ def joint_tour_destination( estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag="SPEC") - estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) + estimator.set_alt_id(model_settings.ALT_DEST_COL_NAME) estimator.write_table( state.get_injectable("size_terms"), "size_terms", append=False ) diff --git a/activitysim/abm/models/joint_tour_frequency.py b/activitysim/abm/models/joint_tour_frequency.py index 66996ce1d..656434b16 100644 --- a/activitysim/abm/models/joint_tour_frequency.py +++ b/activitysim/abm/models/joint_tour_frequency.py @@ -17,26 +17,44 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger(__name__) +class JointTourFrequencySettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `free_parking` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + @workflow.step def joint_tour_frequency( - state: workflow.State, households: pd.DataFrame, persons: pd.DataFrame + state: workflow.State, + households: pd.DataFrame, + persons: pd.DataFrame, + model_settings: JointTourFrequencySettings | None = None, + model_settings_file_name: str = "joint_tour_frequency.yaml", + trace_label: str = "joint_tour_frequency", ) -> None: """ This model predicts the frequency of making fully joint trips (see the alternatives above). """ - trace_label = "joint_tour_frequency" - model_settings_file_name = "joint_tour_frequency.yaml" + if model_settings is None: + model_settings = JointTourFrequencySettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + trace_hh_id = state.settings.trace_hh_id estimator = estimation.manager.begin_estimation(state, "joint_tour_frequency") - model_settings = state.filesystem.read_model_settings(model_settings_file_name) - alternatives = simulate.read_model_alts( state, "joint_tour_frequency_alternatives.csv", set_index="alt" ) @@ -55,7 +73,7 @@ def joint_tour_frequency( ) # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_dict = { "persons": persons, @@ -70,7 +88,7 @@ def joint_tour_frequency( trace_label=trace_label, ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index 66fcc534c..44e57d604 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -19,25 +19,41 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_simulate import interaction_simulate logger = logging.getLogger(__name__) +class JointTourFrequencyCompositionSettings(LogitComponentSettings): + """ + Settings for the `joint_tour_frequency_composition` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + ALTS_PREPROCESSOR: PreprocessorSettings | None = None + + @workflow.step def joint_tour_frequency_composition( state: workflow.State, households_merged: pd.DataFrame, persons: pd.DataFrame, + model_settings: JointTourFrequencyCompositionSettings | None = None, + model_settings_file_name: str = "joint_tour_frequency_composition.yaml", + trace_label: str = "joint_tour_frequency_composition", ) -> None: """ This model predicts the frequency and composition of fully joint tours. """ - - trace_label = "joint_tour_frequency_composition" - model_settings_file_name = "joint_tour_frequency_composition.yaml" - - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = JointTourFrequencyCompositionSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) alt_tdd = simulate.read_model_alts( state, "joint_tour_frequency_composition_alternatives.csv", set_index="alt" @@ -53,7 +69,7 @@ def joint_tour_frequency_composition( logger.info("Running %s with %d households", trace_label, len(choosers)) # alt preprocessor - alt_preprocessor_settings = model_settings.get("ALTS_PREPROCESSOR", None) + alt_preprocessor_settings = model_settings.ALTS_PREPROCESSOR if alt_preprocessor_settings: locals_dict = {} @@ -68,7 +84,7 @@ def joint_tour_frequency_composition( ) # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_dict = { "persons": persons, @@ -87,7 +103,7 @@ def joint_tour_frequency_composition( state, "joint_tour_frequency_composition" ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py index 3d55e51cb..dad275704 100644 --- a/activitysim/abm/models/joint_tour_participation.py +++ b/activitysim/abm/models/joint_tour_participation.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd -import numpy as np from activitysim.abm.models.util.canonical_ids import MAX_PARTICIPANT_PNUM from activitysim.abm.models.util.overlap import person_time_window_overlap @@ -19,6 +18,8 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.util import assign_in_place, reindex logger = logging.getLogger(__name__) @@ -164,16 +165,16 @@ def participants_chooser( assert probs.index.equals(choosers.index) # choice is boolean (participate or not) - model_settings = state.filesystem.read_model_settings( - "joint_tour_participation.yaml" + model_settings = JointTourParticipationSettings.read_settings_file( + state.filesystem, "joint_tour_participation.yaml", mandatory=False ) - choice_col = model_settings.get("participation_choice", "participate") + choice_col = model_settings.participation_choice assert ( choice_col in spec.columns ), "couldn't find participation choice column '%s' in spec" PARTICIPATE_CHOICE = spec.columns.get_loc(choice_col) - MAX_ITERATIONS = model_settings.get("max_participation_choice_iterations", 5000) + MAX_ITERATIONS = model_settings.max_participation_choice_iterations trace_label = tracing.extend_trace_label(trace_label, "participants_chooser") @@ -205,7 +206,7 @@ def participants_chooser( ) print(unsatisfied_candidates.head(20)) - if model_settings.get("FORCE_PARTICIPATION", False): + if model_settings.FORCE_PARTICIPATION: logger.warning( f"Forcing joint tour participation for {num_tours_remaining} tours." ) @@ -269,19 +270,27 @@ def participants_chooser( return choices, rands -def annotate_jtp(state: workflow.State, model_settings, trace_label): +def annotate_jtp( + state: workflow.State, + model_settings: JointTourParticipationSettings, + trace_label: str, +): # - annotate persons persons = state.get_dataframe("persons") expressions.assign_columns( state, df=persons, - model_settings=model_settings.get("annotate_persons"), + model_settings=model_settings.annotate_persons, trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), ) state.add_table("persons", persons) -def add_null_results(state, model_settings, trace_label): +def add_null_results( + state: workflow.State, + model_settings: JointTourParticipationSettings, + trace_label: str, +): logger.info("Skipping %s: joint tours", trace_label) # participants table is used downstream in non-joint tour expressions @@ -295,18 +304,42 @@ def add_null_results(state, model_settings, trace_label): annotate_jtp(state, model_settings, trace_label) +class JointTourParticipationSettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `joint_tour_participation` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + annotate_persons: PreprocessorSettings | None = None + """Instructions for annotating the persons table.""" + + participation_choice: str = "participate" + + max_participation_choice_iterations: int = 5000 + + FORCE_PARTICIPATION: bool = False + + @workflow.step def joint_tour_participation( state: workflow.State, tours: pd.DataFrame, persons_merged: pd.DataFrame, + model_settings: JointTourParticipationSettings | None = None, + model_settings_file_name: str = "joint_tour_participation.yaml", + trace_label: str = "joint_tour_participation", ) -> None: """ Predicts for each eligible person to participate or not participate in each joint tour. """ - trace_label = "joint_tour_participation" - model_settings_file_name = "joint_tour_participation.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + + if model_settings is None: + model_settings = JointTourParticipationSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) trace_hh_id = state.settings.trace_hh_id joint_tours = tours[tours.tour_category == "joint"] @@ -327,7 +360,7 @@ def joint_tour_participation( ) # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_dict = { "person_time_window_overlap": lambda x: person_time_window_overlap( @@ -348,7 +381,7 @@ def joint_tour_participation( estimator = estimation.manager.begin_estimation(state, "joint_tour_participation") - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator @@ -384,7 +417,7 @@ def joint_tour_participation( ) # choice is boolean (participate or not) - choice_col = model_settings.get("participation_choice", "participate") + choice_col = model_settings.participation_choice assert ( choice_col in model_spec.columns ), "couldn't find participation choice column '%s' in spec" diff --git a/activitysim/abm/models/joint_tour_scheduling.py b/activitysim/abm/models/joint_tour_scheduling.py index 0a488757a..465b22b96 100644 --- a/activitysim/abm/models/joint_tour_scheduling.py +++ b/activitysim/abm/models/joint_tour_scheduling.py @@ -7,6 +7,7 @@ import pandas as pd from activitysim.abm.models.util.vectorize_tour_scheduling import ( + TourSchedulingSettings, vectorize_joint_tour_scheduling, ) from activitysim.core import ( @@ -17,25 +18,45 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.util import assign_in_place, reindex logger = logging.getLogger(__name__) +# class JointTourSchedulingSettings(LogitComponentSettings, extra="forbid"): +# """ +# Settings for the `joint_tour_scheduling` component. +# """ +# +# preprocessor: PreprocessorSettings | None = None +# """Setting for the preprocessor.""" +# +# sharrow_skip: bool = False +# """Setting to skip sharrow""" +# + + @workflow.step def joint_tour_scheduling( state: workflow.State, tours: pd.DataFrame, persons_merged: pd.DataFrame, tdd_alts: pd.DataFrame, + model_settings: TourSchedulingSettings | None = None, + model_settings_file_name: str = "joint_tour_scheduling.yaml", + trace_label: str = "joint_tour_scheduling", ) -> None: """ This model predicts the departure time and duration of each joint tour """ - trace_label = "joint_tour_scheduling" - model_settings_file_name = "joint_tour_scheduling.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = TourSchedulingSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) trace_hh_id = state.settings.trace_hh_id joint_tours = tours[tours.tour_category == "joint"] @@ -64,7 +85,7 @@ def joint_tour_scheduling( constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_d = {} if constants is not None: @@ -82,8 +103,8 @@ def joint_tour_scheduling( estimator = estimation.manager.begin_estimation(state, "joint_tour_scheduling") - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) - sharrow_skip = model_settings.get("sharrow_skip", False) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) + sharrow_skip = model_settings.sharrow_skip coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index b58a3bcb7..937721c05 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -11,6 +11,10 @@ from activitysim.abm.models.util import tour_destination from activitysim.abm.tables import shadow_pricing from activitysim.core import estimation, expressions, los, simulate, tracing, workflow +from activitysim.core.configuration.logit import ( + TourLocationComponentSettings, + TourModeComponentSettings, +) from activitysim.core.interaction_sample import interaction_sample from activitysim.core.interaction_sample_simulate import interaction_sample_simulate @@ -75,7 +79,7 @@ def write_estimation_specs( state: workflow.State, estimator: estimation.Estimator, - model_settings, + model_settings: TourLocationComponentSettings, settings_file, ): """ @@ -91,8 +95,8 @@ def write_estimation_specs( estimator.write_model_settings(model_settings, settings_file) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') - estimator.write_spec(model_settings, tag="SPEC") - estimator.write_coefficients(model_settings=model_settings) + estimator.write_spec(file_name=model_settings.SPEC, tag="SPEC") + estimator.write_coefficients(file_name=model_settings.COEFFICIENTS) estimator.write_table( state.get_injectable("size_terms"), "size_terms", append=False @@ -107,7 +111,7 @@ def _location_sample( alternatives, skims, estimator, - model_settings, + model_settings: TourLocationComponentSettings, alt_dest_col_name, chunk_size, chunk_tag, @@ -135,10 +139,7 @@ def _location_sample( logger.info("Running %s with %d persons" % (trace_label, len(choosers.index))) - sample_size = model_settings["SAMPLE_SIZE"] - - if "sched" in trace_label: - print() + sample_size = model_settings.SAMPLE_SIZE if state.settings.disable_destination_sampling or ( estimator and estimator.want_unsampled_alternatives @@ -157,15 +158,16 @@ def _location_sample( "dest_col_name": skims.dest_key, # added for sharrow flows "timeframe": "timeless", } - constants = model_settings.get("CONSTANTS", {}) - locals_d.update(constants) + locals_d.update(model_settings.CONSTANTS or {}) spec = simulate.spec_for_segment( state, - model_settings, + None, spec_id="SAMPLE_SPEC", segment_name=segment_name, estimator=estimator, + spec_file_name=model_settings.SAMPLE_SPEC, + coefficients_file_name=model_settings.COEFFICIENTS, ) # here since presumably we want this when called for either sample or presample @@ -197,13 +199,13 @@ def location_sample( network_los, dest_size_terms, estimator, - model_settings, + model_settings: TourLocationComponentSettings, chunk_size, chunk_tag, trace_label, ): # FIXME - MEMORY HACK - only include columns actually used in spec - chooser_columns = model_settings["SIMULATE_CHOOSER_COLUMNS"] + chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS choosers = persons_merged[chooser_columns] # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers @@ -213,7 +215,7 @@ def location_sample( skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap("home_zone_id", "zone_id") - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME choices = _location_sample( state, @@ -239,7 +241,10 @@ def location_sample( def aggregate_size_terms( - state: workflow.State, dest_size_terms, network_los, model_settings + state: workflow.State, + dest_size_terms, + network_los: los.Network_LOS, + model_settings: TourLocationComponentSettings, ): # # aggregate MAZ_size_terms to TAZ_size_terms @@ -278,7 +283,7 @@ def aggregate_size_terms( spc = shadow_pricing.load_shadow_price_calculator(state, model_settings) if spc.use_shadow_pricing and ( - spc.shadow_settings["SHADOW_PRICE_METHOD"] == "simulation" + spc.shadow_settings.SHADOW_PRICE_METHOD == "simulation" ): # allow TAZs with at least one underassigned MAZ in them, therefore with a shadowprice larger than -999, to be selected again TAZ_size_terms["shadow_price_utility_adjustment"] = np.where( @@ -329,7 +334,7 @@ def location_presample( network_los, dest_size_terms, estimator, - model_settings, + model_settings: TourLocationComponentSettings, chunk_size, chunk_tag, trace_label, @@ -338,7 +343,7 @@ def location_presample( logger.info(f"{trace_label} location_presample") - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME assert DEST_TAZ != alt_dest_col_name MAZ_size_terms, TAZ_size_terms = aggregate_size_terms( @@ -355,7 +360,7 @@ def location_presample( # FIXME - MEMORY HACK - only include columns actually used in spec # FIXME we don't actually require that land_use provide a TAZ crosswalk # FIXME maybe we should add it for multi-zone (from maz_taz) if missing? - chooser_columns = model_settings["SIMULATE_CHOOSER_COLUMNS"] + chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS chooser_columns = [HOME_TAZ if c == HOME_MAZ else c for c in chooser_columns] choosers = persons_merged[chooser_columns] @@ -406,7 +411,7 @@ def run_location_sample( network_los, dest_size_terms, estimator, - model_settings, + model_settings: TourLocationComponentSettings, chunk_size, chunk_tag, trace_label, @@ -488,7 +493,7 @@ def run_location_logsums( persons_merged_df, network_los, location_sample_df, - model_settings, + model_settings: TourLocationComponentSettings, chunk_size, chunk_tag, trace_label, @@ -516,8 +521,10 @@ def run_location_logsums( assert not location_sample_df.empty - logsum_settings = state.filesystem.read_model_settings( - model_settings["LOGSUM_SETTINGS"] + logsum_settings = TourModeComponentSettings.read_settings_file( + state.filesystem, + str(model_settings.LOGSUM_SETTINGS), + mandatory=False, ) # FIXME - MEMORY HACK - only include columns actually used in spec @@ -525,17 +532,15 @@ def run_location_logsums( persons_merged_df, logsum_settings, model_settings ) - logger.info( - "Running %s with %s rows" % (trace_label, len(location_sample_df.index)) - ) + logger.info(f"Running {trace_label} with {len(location_sample_df.index)} rows") choosers = location_sample_df.join(persons_merged_df, how="left") - tour_purpose = model_settings["LOGSUM_TOUR_PURPOSE"] + tour_purpose = model_settings.LOGSUM_TOUR_PURPOSE if isinstance(tour_purpose, dict): tour_purpose = tour_purpose[segment_name] - logsums = logsum.compute_logsums( + logsums = logsum.compute_location_choice_logsums( state, choosers, tour_purpose, @@ -565,7 +570,7 @@ def run_location_simulate( dest_size_terms, want_logsums, estimator, - model_settings, + model_settings: TourLocationComponentSettings, chunk_size, chunk_tag, trace_label, @@ -586,10 +591,10 @@ def run_location_simulate( assert not persons_merged.empty # FIXME - MEMORY HACK - only include columns actually used in spec - chooser_columns = model_settings["SIMULATE_CHOOSER_COLUMNS"] + chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS choosers = persons_merged[chooser_columns] - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge additional alt columns into alt sample list @@ -616,9 +621,7 @@ def run_location_simulate( "dest_col_name": skims.dest_key, # added for sharrow flows "timeframe": "timeless", } - constants = model_settings.get("CONSTANTS", {}) - if constants is not None: - locals_d.update(constants) + locals_d.update(model_settings.CONSTANTS or {}) if estimator: # write choosers after annotation @@ -628,10 +631,12 @@ def run_location_simulate( spec = simulate.spec_for_segment( state, - model_settings, + None, spec_id="SPEC", segment_name=segment_name, estimator=estimator, + spec_file_name=model_settings.SPEC, + coefficients_file_name=model_settings.COEFFICIENTS, ) log_alt_losers = state.settings.log_alt_losers @@ -649,7 +654,7 @@ def run_location_simulate( chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label, - trace_choice_name=model_settings["DEST_CHOICE_COLUMN_NAME"], + trace_choice_name=model_settings.DEST_CHOICE_COLUMN_NAME, estimator=estimator, skip_choice=skip_choice, ) @@ -672,7 +677,7 @@ def run_location_choice( want_logsums, want_sample_table, estimator, - model_settings, + model_settings: TourLocationComponentSettings, chunk_size, chunk_tag, trace_label, @@ -706,10 +711,10 @@ def run_location_choice( logsums optional & only returned if DEST_CHOICE_LOGSUM_COLUMN_NAME specified in model_settings """ - chooser_segment_column = model_settings["CHOOSER_SEGMENT_COLUMN_NAME"] + chooser_segment_column = model_settings.CHOOSER_SEGMENT_COLUMN_NAME # maps segment names to compact (integer) ids - segment_ids = model_settings["SEGMENT_IDS"] + segment_ids = model_settings.SEGMENT_IDS choices_list = [] sample_list = [] @@ -790,7 +795,7 @@ def run_location_choice( choices_df.choice = estimator.get_survey_values( choices_df.choice, "persons", - column_names=model_settings["DEST_CHOICE_COLUMN_NAME"], + column_names=model_settings.DEST_CHOICE_COLUMN_NAME, ) estimator.write_override_choices(choices_df.choice) @@ -803,7 +808,7 @@ def run_location_choice( # merge mode_choice_logsum for the overridden location # alt_logsums columns: ['person_id', 'choice', 'logsum'] - alt_dest_col = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col = model_settings.ALT_DEST_COL_NAME alt_logsums = ( location_sample_df[[alt_dest_col, ALT_LOGSUM]] .rename(columns={alt_dest_col: "choice", ALT_LOGSUM: "logsum"}) @@ -833,7 +838,7 @@ def run_location_choice( if want_sample_table: # FIXME - sample_table location_sample_df.set_index( - model_settings["ALT_DEST_COL_NAME"], append=True, inplace=True + model_settings.ALT_DEST_COL_NAME, append=True, inplace=True ) sample_list.append(location_sample_df) else: @@ -858,15 +863,15 @@ def run_location_choice( def iterate_location_choice( state: workflow.State, - model_settings, - persons_merged, - persons, - households, - network_los, - estimator, - chunk_size, + model_settings: TourLocationComponentSettings, + persons_merged: pd.DataFrame, + persons: pd.DataFrame, + households: pd.DataFrame, + network_los: los.Network_LOS, + estimator: estimation.Estimator, + chunk_size: int, locutor: bool, - trace_label, + trace_label: str, ): """ iterate run_location_choice updating shadow pricing until convergence criteria satisfied @@ -895,12 +900,12 @@ def iterate_location_choice( chunk_tag = trace_label # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student) - chooser_filter_column = model_settings["CHOOSER_FILTER_COLUMN_NAME"] + chooser_filter_column = model_settings.CHOOSER_FILTER_COLUMN_NAME - dest_choice_column_name = model_settings["DEST_CHOICE_COLUMN_NAME"] - logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + dest_choice_column_name = model_settings.DEST_CHOICE_COLUMN_NAME + logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME - sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME want_sample_table = ( state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) @@ -914,8 +919,8 @@ def iterate_location_choice( ) # interaction_sample expects chooser index to be monotonic increasing # chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types - chooser_segment_column = model_settings["CHOOSER_SEGMENT_COLUMN_NAME"] - segment_ids = model_settings["SEGMENT_IDS"] + chooser_segment_column = model_settings.CHOOSER_SEGMENT_COLUMN_NAME + segment_ids = model_settings.SEGMENT_IDS assert ( chooser_segment_column in persons_merged_df @@ -925,7 +930,7 @@ def iterate_location_choice( max_iterations = spc.max_iterations assert not (spc.use_shadow_pricing and estimator) - logger.debug("%s max_iterations: %s" % (trace_label, max_iterations)) + logger.debug(f"{trace_label} max_iterations: {max_iterations}") save_sample_df = ( choices_df @@ -937,7 +942,7 @@ def iterate_location_choice( if spc.use_shadow_pricing and iteration > 1: spc.update_shadow_prices(state) - if spc.shadow_settings["SHADOW_PRICE_METHOD"] == "simulation": + if spc.shadow_settings.SHADOW_PRICE_METHOD == "simulation": # filter from the sampled persons persons_merged_df_ = persons_merged_df_[ persons_merged_df_.index.isin(spc.sampled_persons.index) @@ -965,7 +970,7 @@ def iterate_location_choice( if spc.use_shadow_pricing: # handle simulation method if ( - spc.shadow_settings["SHADOW_PRICE_METHOD"] == "simulation" + spc.shadow_settings.SHADOW_PRICE_METHOD == "simulation" and iteration > 1 ): # if a process ends up with no sampled workers in it, hence an empty choice_df_, then choice_df wil be what it was previously @@ -1007,10 +1012,10 @@ def iterate_location_choice( # - shadow price table if locutor: - if spc.use_shadow_pricing and "SHADOW_PRICE_TABLE" in model_settings: - state.add_table(model_settings["SHADOW_PRICE_TABLE"], spc.shadow_prices) - if "MODELED_SIZE_TABLE" in model_settings: - state.add_table(model_settings["MODELED_SIZE_TABLE"], spc.modeled_size) + if spc.use_shadow_pricing and model_settings.SHADOW_PRICE_TABLE: + state.add_table(model_settings.SHADOW_PRICE_TABLE, spc.shadow_prices) + if model_settings.MODELED_SIZE_TABLE: + state.add_table(model_settings.MODELED_SIZE_TABLE, spc.modeled_size) persons_df = persons @@ -1040,11 +1045,11 @@ def iterate_location_choice( state.extend_table(sample_table_name, save_sample_df) # - annotate persons table - if "annotate_persons" in model_settings: + if model_settings.annotate_persons: expressions.assign_columns( state, df=persons_df, - model_settings=model_settings.get("annotate_persons"), + model_settings=model_settings.annotate_persons, trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), ) @@ -1054,12 +1059,12 @@ def iterate_location_choice( state.tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True) # - annotate households table - if "annotate_households" in model_settings: + if model_settings.annotate_households: households_df = households expressions.assign_columns( state, df=households_df, - model_settings=model_settings.get("annotate_households"), + model_settings=model_settings.annotate_households, trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), ) state.add_table("households", households_df) @@ -1083,16 +1088,20 @@ def workplace_location( households: pd.DataFrame, network_los: los.Network_LOS, locutor: bool, + model_settings: TourLocationComponentSettings | None = None, + model_settings_file_name: str = "workplace_location.yaml", + trace_label: str = "workplace_location", ) -> None: """ workplace location choice model iterate_location_choice adds location choice column and annotations to persons table """ - - trace_label = "workplace_location" - model_settings = state.filesystem.read_model_settings("workplace_location.yaml") - + if model_settings is None: + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) estimator = estimation.manager.begin_estimation(state, "workplace_location") if estimator: write_estimation_specs( @@ -1133,6 +1142,9 @@ def school_location( households: pd.DataFrame, network_los: los.Network_LOS, locutor: bool, + model_settings: TourLocationComponentSettings | None = None, + model_settings_file_name: str = "school_location.yaml", + trace_label: str = "school_location", ) -> None: """ School location choice model @@ -1140,8 +1152,11 @@ def school_location( iterate_location_choice adds location choice column and annotations to persons table """ - trace_label = "school_location" - model_settings = state.filesystem.read_model_settings("school_location.yaml") + if model_settings is None: + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) estimator = estimation.manager.begin_estimation(state, "school_location") if estimator: diff --git a/activitysim/abm/models/mandatory_tour_frequency.py b/activitysim/abm/models/mandatory_tour_frequency.py index 166c671e0..413390b6e 100644 --- a/activitysim/abm/models/mandatory_tour_frequency.py +++ b/activitysim/abm/models/mandatory_tour_frequency.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +from typing import Any import pandas as pd @@ -15,6 +16,12 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import ( + BaseLogitComponentSettings, + LogitComponentSettings, + PreprocessorSettings, +) logger = logging.getLogger(__name__) @@ -42,20 +49,37 @@ def add_null_results(state, trace_label, mandatory_tour_frequency_settings): state.add_table("persons", persons) +class MandatoryTourFrequencySettings(LogitComponentSettings): + """ + Settings for the `mandatory_tour_frequency` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + annotate_persons: PreprocessorSettings | None = None + + @workflow.step def mandatory_tour_frequency( state: workflow.State, persons_merged: pd.DataFrame, + model_settings: MandatoryTourFrequencySettings | None = None, + model_settings_file_name: str = "mandatory_tour_frequency.yaml", + trace_label: str = "mandatory_tour_frequency", ) -> None: """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ - trace_label = "mandatory_tour_frequency" - model_settings_file_name = "mandatory_tour_frequency.yaml" + trace_hh_id = state.settings.trace_hh_id - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = MandatoryTourFrequencySettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) choosers = persons_merged # filter based on results of CDAP @@ -68,7 +92,7 @@ def mandatory_tour_frequency( return # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_dict = {} @@ -82,7 +106,7 @@ def mandatory_tour_frequency( estimator = estimation.manager.begin_estimation(state, "mandatory_tour_frequency") - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator @@ -149,7 +173,7 @@ def mandatory_tour_frequency( expressions.assign_columns( state, df=persons, - model_settings=model_settings.get("annotate_persons"), + model_settings=model_settings.annotate_persons, trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), ) diff --git a/activitysim/abm/models/non_mandatory_destination.py b/activitysim/abm/models/non_mandatory_destination.py index 7ca5c4e90..808501197 100644 --- a/activitysim/abm/models/non_mandatory_destination.py +++ b/activitysim/abm/models/non_mandatory_destination.py @@ -8,6 +8,7 @@ from activitysim.abm.models.util import annotate, tour_destination from activitysim.core import estimation, los, tracing, workflow +from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place logger = logging.getLogger(__name__) @@ -19,6 +20,9 @@ def non_mandatory_tour_destination( tours: pd.DataFrame, persons_merged: pd.DataFrame, network_los: los.Network_LOS, + model_settings: TourLocationComponentSettings | None = None, + model_settings_file_name: str = "non_mandatory_tour_destination.yaml", + trace_label: str = "non_mandatory_tour_destination", ) -> None: """ Given the tour generation from the above, each tour needs to have a @@ -26,15 +30,18 @@ def non_mandatory_tour_destination( person that's making the tour) """ - trace_label = "non_mandatory_tour_destination" - model_settings_file_name = "non_mandatory_tour_destination.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + trace_hh_id = state.settings.trace_hh_id - logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME want_logsums = logsum_column_name is not None - sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME want_sample_table = ( state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) @@ -65,7 +72,7 @@ def non_mandatory_tour_destination( estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag="SPEC") - estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) + estimator.set_alt_id(model_settings.ALT_DEST_COL_NAME) estimator.write_table( state.get_injectable("size_terms"), "size_terms", append=False ) @@ -112,7 +119,7 @@ def non_mandatory_tour_destination( state.add_table("tours", tours) - if model_settings.get("annotate_tours"): + if model_settings.annotate_tours: annotate.annotate_tours(state, model_settings, trace_label) if want_sample_table: diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index 86a794102..f54d5b120 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -3,6 +3,8 @@ from __future__ import annotations import logging +from pathlib import Path +from typing import Any import numpy as np import pandas as pd @@ -22,6 +24,8 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_simulate import interaction_simulate logger = logging.getLogger(__name__) @@ -147,9 +151,41 @@ def extend_tour_counts( return tour_counts +class NonMandatoryTourSpecSegment(PydanticReadable): + NAME: str + PTYPE: int + COEFFICIENTS: Path + + +class NonMandatoryTourFrequencySettings(LogitComponentSettings): + """ + Settings for the `non_mandatory_tour_frequency` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + SEGMENT_COL: str = "ptype" + # not used anymore TODO remove if needed + + SPEC_SEGMENTS: list[NonMandatoryTourSpecSegment] = [] + # check the above + + annotate_persons: PreprocessorSettings | None = None + """Preprocessor settings to annotate persons""" + + annotate_tours: PreprocessorSettings | None = None + """Preprocessor settings to annotate tours""" + + @workflow.step def non_mandatory_tour_frequency( - state: workflow.State, persons: pd.DataFrame, persons_merged: pd.DataFrame + state: workflow.State, + persons: pd.DataFrame, + persons_merged: pd.DataFrame, + model_settings: NonMandatoryTourFrequencySettings | None = None, + model_settings_file_name: str = "non_mandatory_tour_frequency.yaml", + trace_label: str = "non_mandatory_tour_frequency", ) -> None: """ This model predicts the frequency of making non-mandatory trips @@ -158,10 +194,11 @@ def non_mandatory_tour_frequency( othdiscr, eatout, and social trips in various combination. """ - trace_label = "non_mandatory_tour_frequency" - model_settings_file_name = "non_mandatory_tour_frequency.yaml" - - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = NonMandatoryTourFrequencySettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) # FIXME kind of tacky both that we know to add this here and del it below # 'tot_tours' is used in model_spec expressions @@ -175,7 +212,7 @@ def non_mandatory_tour_frequency( choosers = choosers[choosers.cdap_activity.isin(["M", "N"])] # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_dict = {"person_max_window": lambda x: person_max_window(state, x)} @@ -191,14 +228,14 @@ def non_mandatory_tour_frequency( constants = config.get_model_constants(model_settings) - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) - spec_segments = model_settings.get("SPEC_SEGMENTS", {}) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) + spec_segments = model_settings.SPEC_SEGMENTS # segment by person type and pick the right spec for each person type choices_list = [] for segment_settings in spec_segments: - segment_name = segment_settings["NAME"] - ptype = segment_settings["PTYPE"] + segment_name = segment_settings.NAME + ptype = segment_settings.PTYPE # pick the spec column for the segment segment_spec = model_spec[[segment_name]] @@ -395,13 +432,13 @@ def non_mandatory_tour_frequency( # need to re-compute tour frequency statistics to account for school escort tours recompute_tour_count_statistics(state) - if model_settings.get("annotate_tours"): + if model_settings.annotate_tours: annotate.annotate_tours(state, model_settings, trace_label) expressions.assign_columns( state, df=persons, - model_settings=model_settings.get("annotate_persons"), + model_settings=model_settings.annotate_persons, trace_label=trace_label, ) diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py index dbec927be..f3fd9674d 100644 --- a/activitysim/abm/models/parking_location_choice.py +++ b/activitysim/abm/models/parking_location_choice.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +from pathlib import Path import numpy as np import pandas as pd @@ -16,6 +17,8 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.tracing import print_elapsed_time from activitysim.core.util import assign_in_place @@ -25,7 +28,7 @@ NO_DESTINATION = -1 -def wrap_skims(state: workflow.State, model_settings): +def wrap_skims(state: workflow.State, model_settings: ParkingLocationSettings): """ wrap skims of trip destination using origin, dest column names from model settings. Various of these are used by destination_sample, compute_logsums, and destination_simulate @@ -53,10 +56,10 @@ def wrap_skims(state: workflow.State, model_settings): network_los = state.get_injectable("network_los") skim_dict = network_los.get_default_skim_dict() - origin = model_settings["TRIP_ORIGIN"] - park_zone = model_settings["ALT_DEST_COL_NAME"] - destination = model_settings["TRIP_DESTINATION"] - time_period = model_settings["TRIP_DEPARTURE_PERIOD"] + origin = model_settings.TRIP_ORIGIN + park_zone = model_settings.ALT_DEST_COL_NAME + destination = model_settings.TRIP_DESTINATION + time_period = model_settings.TRIP_DEPARTURE_PERIOD skims = { "odt_skims": skim_dict.wrap_3d( @@ -80,8 +83,12 @@ def wrap_skims(state: workflow.State, model_settings): return skims -def get_spec_for_segment(state: workflow.State, model_settings, spec_name, segment): - omnibus_spec = state.filesystem.read_model_spec(file_name=model_settings[spec_name]) +def get_spec_for_segment( + state: workflow.State, model_settings: ParkingLocationSettings, segment: str +): + omnibus_spec = state.filesystem.read_model_spec( + file_name=model_settings.SPECIFICATION + ) spec = omnibus_spec[[segment]] @@ -97,7 +104,7 @@ def parking_destination_simulate( segment_name, trips, destination_sample, - model_settings, + model_settings: ParkingLocationSettings, skims, chunk_size, trace_hh_id, @@ -116,12 +123,12 @@ def parking_destination_simulate( trace_label, "parking_destination_simulate" ) - spec = get_spec_for_segment(state, model_settings, "SPECIFICATION", segment_name) + spec = get_spec_for_segment(state, model_settings, segment_name) coefficients_df = state.filesystem.read_model_coefficients(model_settings) spec = simulate.eval_coefficients(state, spec, coefficients_df, None) - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME logger.info("Running parking_destination_simulate with %d trips", len(trips)) @@ -162,7 +169,7 @@ def choose_parking_location( segment_name, trips, alternatives, - model_settings, + model_settings: ParkingLocationSettings, want_sample_table, skims, chunk_size, @@ -173,7 +180,7 @@ def choose_parking_location( t0 = print_elapsed_time() - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME destination_sample = logit.interaction_dataset( state, trips, alternatives, alt_index_id=alt_dest_col_name ) @@ -195,7 +202,7 @@ def choose_parking_location( if want_sample_table: # FIXME - sample_table destination_sample.set_index( - model_settings["ALT_DEST_COL_NAME"], append=True, inplace=True + model_settings.ALT_DEST_COL_NAME, append=True, inplace=True ) else: destination_sample = None @@ -207,7 +214,7 @@ def choose_parking_location( def run_parking_destination( state: workflow.State, - model_settings, + model_settings: ParkingLocationSettings, trips, land_use, chunk_size, @@ -215,11 +222,11 @@ def run_parking_destination( trace_label, fail_some_trips_for_testing=False, ): - chooser_filter_column = model_settings.get("CHOOSER_FILTER_COLUMN_NAME") - chooser_segment_column = model_settings.get("CHOOSER_SEGMENT_COLUMN_NAME") + chooser_filter_column = model_settings.CHOOSER_FILTER_COLUMN_NAME + chooser_segment_column = model_settings.CHOOSER_SEGMENT_COLUMN_NAME - parking_location_column_name = model_settings["ALT_DEST_COL_NAME"] - sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") + parking_location_column_name = model_settings.ALT_DEST_COL_NAME + sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME want_sample_table = ( state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) @@ -232,7 +239,7 @@ def run_parking_destination( skims = wrap_skims(state, model_settings) - alt_column_filter_name = model_settings.get("ALTERNATIVE_FILTER_COLUMN_NAME") + alt_column_filter_name = model_settings.ALTERNATIVE_FILTER_COLUMN_NAME alternatives = land_use[land_use[alt_column_filter_name]] alternatives.index.name = parking_location_column_name @@ -281,6 +288,44 @@ def run_parking_destination( return trips[parking_location_column_name], save_sample_df +class ParkingLocationSettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `parking_location` component. + """ + + SPECIFICATION: Path | None = None + SPEC: None = None + """The school escort model does not use this setting, see `SPECIFICATION`.""" + + PREPROCESSOR: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + ALT_DEST_COL_NAME: str = "parking_zone" + """Parking destination column name.""" + + TRIP_DEPARTURE_PERIOD: str = "stop_period" + """Trip departure time period.""" + + PARKING_LOCATION_SAMPLE_TABLE_NAME: str | None = None + + TRIP_ORIGIN: str = "origin" + TRIP_DESTINATION: str = "destination" + + CHOOSER_FILTER_COLUMN_NAME: str + """A boolean column to filter choosers. + + If this column evaluates as True the row will be kept. + """ + + CHOOSER_SEGMENT_COLUMN_NAME: str + + DEST_CHOICE_SAMPLE_TABLE_NAME: str | None = None + + ALTERNATIVE_FILTER_COLUMN_NAME: str + + SEGMENTS: list[str] | None = None + + @workflow.step def parking_location( state: workflow.State, @@ -288,29 +333,34 @@ def parking_location( trips_merged: pd.DataFrame, land_use: pd.DataFrame, network_los: los.Network_LOS, + model_settings: ParkingLocationSettings | None = None, + model_settings_file_name: str = "parking_location_choice.yaml", + trace_label: str = "parking_location", ) -> None: """ Given a set of trips, each trip needs to have a parking location if it is eligible for remote parking. """ - trace_label = "parking_location" - model_settings = state.filesystem.read_model_settings( - "parking_location_choice.yaml" - ) + if model_settings is None: + model_settings = ParkingLocationSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + trace_hh_id = state.settings.trace_hh_id - alt_destination_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_destination_col_name = model_settings.ALT_DEST_COL_NAME - preprocessor_settings = model_settings.get("PREPROCESSOR", None) + preprocessor_settings = model_settings.PREPROCESSOR trips_df = trips trips_merged_df = trips_merged land_use_df = land_use - proposed_trip_departure_period = model_settings["TRIP_DEPARTURE_PERIOD"] + proposed_trip_departure_period = model_settings.TRIP_DEPARTURE_PERIOD # TODO: the number of skim time periods should be more readily available than this n_skim_time_periods = np.unique( - network_los.los_settings.skim_time_periods["labels"] + network_los.los_settings.skim_time_periods.labels ).size if trips_merged_df[proposed_trip_departure_period].max() > n_skim_time_periods: # max proposed_trip_departure_period is out of range, @@ -320,7 +370,7 @@ def parking_location( trips_merged_df["trip_period"] = network_los.skim_time_period_label( trips_merged_df[proposed_trip_departure_period], as_cat=True ) - model_settings["TRIP_DEPARTURE_PERIOD"] = "trip_period" + model_settings.TRIP_DEPARTURE_PERIOD = "trip_period" locals_dict = {"network_los": network_los} @@ -366,12 +416,10 @@ def parking_location( trips_df[trips_df.trip_num < trips_df.trip_count] ) - sample_table_name = model_settings.get("PARKING_LOCATION_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.PARKING_LOCATION_SAMPLE_TABLE_NAME assert sample_table_name is not None - logger.info( - "adding %s samples to %s" % (len(save_sample_df), sample_table_name) - ) + logger.info(f"adding {len(save_sample_df)} samples to {sample_table_name}") # lest they try to put tour samples into the same table if state.is_table(sample_table_name): diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index 9c1e438dc..88bf42617 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +from typing import Any import numpy as np import pandas as pd @@ -16,6 +17,8 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import BaseLogitComponentSettings from activitysim.core.interaction_simulate import interaction_simulate from activitysim.core.util import reindex @@ -26,7 +29,9 @@ NUM_CHAPERONES = 2 -def determine_escorting_participants(choosers, persons, model_settings): +def determine_escorting_participants( + choosers: pd.DataFrame, persons: pd.DataFrame, model_settings: SchoolEscortSettings +): """ Determining which persons correspond to chauffer 1..n and escortee 1..n. Chauffers are those with the highest weight given by: @@ -35,15 +40,15 @@ def determine_escorting_participants(choosers, persons, model_settings): """ global NUM_ESCORTEES global NUM_CHAPERONES - NUM_ESCORTEES = model_settings.get("NUM_ESCORTEES", NUM_ESCORTEES) - NUM_CHAPERONES = model_settings.get("NUM_CHAPERONES", NUM_CHAPERONES) + NUM_ESCORTEES = model_settings.NUM_ESCORTEES + NUM_CHAPERONES = model_settings.NUM_CHAPERONES - ptype_col = model_settings.get("PERSONTYPE_COLUMN", "ptype") - sex_col = model_settings.get("GENDER_COLUMN", "sex") - age_col = model_settings.get("AGE_COLUMN", "age") + ptype_col = model_settings.PERSONTYPE_COLUMN + sex_col = model_settings.GENDER_COLUMN + age_col = model_settings.AGE_COLUMN - escortee_age_cutoff = model_settings.get("ESCORTEE_AGE_CUTOFF", 16) - chaperone_age_cutoff = model_settings.get("CHAPERONE_AGE_CUTOFF", 18) + escortee_age_cutoff = model_settings.ESCORTEE_AGE_CUTOFF + chaperone_age_cutoff = model_settings.CHAPERONE_AGE_CUTOFF escortees = persons[ persons.is_student @@ -53,9 +58,9 @@ def determine_escorting_participants(choosers, persons, model_settings): households_with_escortees = escortees["household_id"] # can specify different weights to determine chaperones - persontype_weight = model_settings.get("PERSON_WEIGHT", 100) - gender_weight = model_settings.get("PERSON_WEIGHT", 10) - age_weight = model_settings.get("AGE_WEIGHT", 1) + persontype_weight = model_settings.PERSON_WEIGHT + gender_weight = model_settings.GENDER_WEIGHT + age_weight = model_settings.AGE_WEIGHT # can we move all of these to a config file? chaperones = persons[ @@ -101,7 +106,7 @@ def determine_escorting_participants(choosers, persons, model_settings): return choosers, participant_columns -def check_alts_consistency(alts): +def check_alts_consistency(alts: pd.DataFrame): """ Checking to ensure that the alternatives file is consistent with the number of chaperones and escortees set in the model settings. @@ -116,7 +121,9 @@ def check_alts_consistency(alts): return -def add_prev_choices_to_choosers(choosers, choices, alts, stage): +def add_prev_choices_to_choosers( + choosers: pd.DataFrame, choices: pd.Series, alts: pd.DataFrame, stage: str +) -> pd.DataFrame: # adding choice details to chooser table escorting_choice = "school_escorting_" + stage choosers[escorting_choice] = choices @@ -334,6 +341,62 @@ def create_school_escorting_bundles_table(choosers, tours, stage): return bundles +class SchoolEscortSettings(BaseLogitComponentSettings): + """ + Settings for the `telecommute_frequency` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + ALTS: Any + + NUM_ESCORTEES: int = 3 + NUM_CHAPERONES: int = 2 + + PERSONTYPE_COLUMN: str = "ptype" + GENDER_COLUMN: str = "sex" + AGE_COLUMN: str = "age" + + ESCORTEE_AGE_CUTOFF: int = 16 + CHAPERONE_AGE_CUTOFF: int = 18 + + PERSON_WEIGHT: float = 100.0 + GENDER_WEIGHT: float = 10.0 + AGE_WEIGHT: float = 1.0 + + sharrow_skip: bool | dict[str, bool] = False + """Setting to skip sharrow. + + Sharrow can be skipped (or not) for all school escorting stages by giving + simply true or false. Alternatively, it can be skipped only for particular + stages by giving a mapping of stage name to skipping. For example: + + ```yaml + sharrow_skip: + OUTBOUND: true + INBOUND: false + OUTBOUND_COND: true + ``` + """ + + SIMULATE_CHOOSER_COLUMNS: list[str] | None = None + + SPEC: None = None + """The school escort model does not use this setting.""" + + OUTBOUND_SPEC: str = "school_escorting_outbound.csv" + OUTBOUND_COEFFICIENTS: str = "school_escorting_coefficients_outbound.csv" + INBOUND_SPEC: str = "school_escorting_inbound.csv" + INBOUND_COEFFICIENTS: str = "school_escorting_coefficients_inbound.csv" + OUTBOUND_COND_SPEC: str = "school_escorting_outbound_cond.csv" + OUTBOUND_COND_COEFFICIENTS: str = "school_escorting_coefficients_outbound_cond.csv" + + preprocessor_outbound: PreprocessorSettings | None = None + preprocessor_inbound: PreprocessorSettings | None = None + preprocessor_outbound_cond: PreprocessorSettings | None = None + + @workflow.step def school_escorting( state: workflow.State, @@ -341,6 +404,9 @@ def school_escorting( households_merged: pd.DataFrame, persons: pd.DataFrame, tours: pd.DataFrame, + model_settings: SchoolEscortSettings | None = None, + model_settings_file_name: str = "school_escorting.yaml", + trace_label: str = "school_escorting_simulate", ) -> None: """ school escorting model @@ -365,12 +431,15 @@ def school_escorting( - timetable to avoid joint tours scheduled over school escort tours """ - trace_label = "school_escorting_simulate" - model_settings_file_name = "school_escorting.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = SchoolEscortSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + trace_hh_id = state.settings.trace_hh_id - alts = simulate.read_model_alts(state, model_settings["ALTS"], set_index="Alt") + alts = simulate.read_model_alts(state, model_settings.ALTS, set_index="Alt") households_merged, participant_columns = determine_escorting_participants( households_merged, persons, model_settings @@ -392,10 +461,10 @@ def school_escorting( ) model_spec_raw = state.filesystem.read_model_spec( - file_name=model_settings[stage.upper() + "_SPEC"] + file_name=getattr(model_settings, stage.upper() + "_SPEC") ) coefficients_df = state.filesystem.read_model_coefficients( - file_name=model_settings[stage.upper() + "_COEFFICIENTS"] + file_name=getattr(model_settings, stage.upper() + "_COEFFICIENTS") ) model_spec = simulate.eval_coefficients( state, model_spec_raw, coefficients_df, estimator @@ -403,7 +472,7 @@ def school_escorting( # allow for skipping sharrow entirely in this model with `sharrow_skip: true` # or skipping stages selectively with a mapping of the stages to skip - sharrow_skip = model_settings.get("sharrow_skip", False) + sharrow_skip = model_settings.sharrow_skip stage_sharrow_skip = False # default is false unless set below if sharrow_skip: if isinstance(sharrow_skip, dict): @@ -416,7 +485,7 @@ def school_escorting( locals_dict.pop("_sharrow_skip", None) # reduce memory by limiting columns if selected columns are supplied - chooser_columns = model_settings.get("SIMULATE_CHOOSER_COLUMNS", None) + chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS if chooser_columns is not None: chooser_columns = chooser_columns + participant_columns choosers = households_merged[chooser_columns] @@ -433,7 +502,7 @@ def school_escorting( logger.info("Running %s with %d households", stage_trace_label, len(choosers)) - preprocessor_settings = model_settings.get("preprocessor_" + stage, None) + preprocessor_settings = getattr(model_settings, "preprocessor_" + stage, None) if preprocessor_settings: expressions.assign_columns( state, @@ -545,7 +614,7 @@ def school_escorting( # including mandatory tours because their start / end times may have # changed to match the school escort times for tour_category in tours.tour_category.unique(): - for tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby( + for _tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby( "tour_num", sort=True ): timetable.assign( diff --git a/activitysim/abm/models/stop_frequency.py b/activitysim/abm/models/stop_frequency.py index 484a93cb1..7e27ae08a 100644 --- a/activitysim/abm/models/stop_frequency.py +++ b/activitysim/abm/models/stop_frequency.py @@ -3,6 +3,8 @@ from __future__ import annotations import logging +from pathlib import Path +from typing import Any, Literal import pandas as pd @@ -16,18 +18,63 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.util import assign_in_place logger = logging.getLogger(__name__) +class StopFrequencySpecSegmentSettings(LogitComponentSettings, extra="allow"): + # this class specifically allows "extra" settings because ActivitySim + # is set up to have the name of the segment column be identified with + # an arbitrary key. + SPEC: Path + COEFFICIENTS: Path + + +class StopFrequencySettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the stop frequency component. + """ + + LOGIT_TYPE: Literal["MNL"] = "MNL" + """Logit model mathematical form. + + * "MNL" + Multinomial logit model. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + SPEC_SEGMENTS: list[StopFrequencySpecSegmentSettings] = {} + + SPEC: Path | None = None + """Utility specification filename. + + This is sometimes alternatively called the utility expressions calculator + (UEC). It is a CSV file giving all the functions for the terms of a + linear-in-parameters utility expression. If SPEC_SEGMENTS is given, then + this unsegmented SPEC should be omitted. + """ + + SEGMENT_COL: str = "primary_purpose" + + CONSTANTS: dict[str, Any] = {} + """Named constants usable in the utility expressions.""" + + @workflow.step def stop_frequency( state: workflow.State, tours: pd.DataFrame, tours_merged: pd.DataFrame, - stop_frequency_alts, + stop_frequency_alts: pd.DataFrame, network_los: los.Network_LOS, + model_settings: StopFrequencySettings | None = None, + model_settings_file_name: str = "stop_frequency.yaml", + trace_label: str = "stop_frequency", ) -> None: """ stop frequency model @@ -55,11 +102,13 @@ def stop_frequency( """ - trace_label = "stop_frequency" - model_settings_file_name = "stop_frequency.yaml" trace_hh_id = state.settings.trace_hh_id - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = StopFrequencySettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() @@ -69,7 +118,7 @@ def stop_frequency( constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already assert "origin" in tours_merged @@ -99,11 +148,11 @@ def stop_frequency( "stop_frequency segments", tours_merged.primary_purpose, value_counts=True ) - spec_segments = model_settings.get("SPEC_SEGMENTS") + spec_segments = model_settings.SPEC_SEGMENTS assert ( spec_segments is not None ), f"SPEC_SEGMENTS setting not found in model settings: {model_settings_file_name}" - segment_col = model_settings.get("SEGMENT_COL") + segment_col = model_settings.SEGMENT_COL assert ( segment_col is not None ), f"SEGMENT_COL setting not found in model settings: {model_settings_file_name}" @@ -112,8 +161,7 @@ def stop_frequency( choices_list = [] for segment_settings in spec_segments: - segment_name = segment_settings[segment_col] - segment_value = segment_settings[segment_col] + segment_name = segment_value = getattr(segment_settings, segment_col) chooser_segment = tours_merged[tours_merged[segment_col] == segment_value] @@ -129,16 +177,14 @@ def stop_frequency( state, model_name=segment_name, bundle_name="stop_frequency" ) - segment_spec = state.filesystem.read_model_spec( - file_name=segment_settings["SPEC"] - ) + segment_spec = state.filesystem.read_model_spec(file_name=segment_settings.SPEC) assert segment_spec is not None, ( "spec for segment_type %s not found" % segment_name ) - coefficients_file_name = segment_settings["COEFFICIENTS"] + coefficients_file_name = segment_settings.COEFFICIENTS coefficients_df = state.filesystem.read_model_coefficients( - file_name=coefficients_file_name + file_name=str(coefficients_file_name) ) segment_spec = simulate.eval_coefficients( state, segment_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/summarize.py b/activitysim/abm/models/summarize.py index ed553ecbf..e842393fd 100644 --- a/activitysim/abm/models/summarize.py +++ b/activitysim/abm/models/summarize.py @@ -9,6 +9,7 @@ import pandas as pd from activitysim.core import expressions, workflow +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.los import Network_LOS logger = logging.getLogger(__name__) @@ -200,6 +201,23 @@ def manual_breaks( return bins +class SummarizeSettings(PydanticReadable, extra="allow"): + """ + Settings for the `summarize` component. + """ + + SPECIFICATION: str = "summarize.csv" + """Filename for the summarize specification (csv) file.""" + + OUTPUT: str = "summarize" + """Output folder name.""" + + EXPORT_PIPELINE_TABLES: bool = True + """To export pipeline tables for expression development.""" + + preprocessor: PreprocessorSettings | None = None + + @workflow.step def summarize( state: workflow.State, @@ -211,6 +229,9 @@ def summarize( trips: pd.DataFrame, tours_merged: pd.DataFrame, land_use: pd.DataFrame, + model_settings: SummarizeSettings | None = None, + model_settings_file_name: str = "summarize.yaml", + trace_label: str = "summarize", ) -> None: """ A standard model that uses expression files to summarize pipeline tables for vizualization. @@ -224,17 +245,18 @@ def summarize( Outputs a seperate csv summary file for each expression; outputs starting with '_' are saved as temporary local variables. """ - trace_label = "summarize" - model_settings_file_name = "summarize.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) - output_location = ( - model_settings["OUTPUT"] if "OUTPUT" in model_settings else "summaries" - ) + if model_settings is None: + model_settings = SummarizeSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + + output_location = model_settings.OUTPUT os.makedirs(state.get_output_file_path(output_location), exist_ok=True) spec = pd.read_csv( - state.filesystem.get_config_file_path(model_settings["SPECIFICATION"]), + state.filesystem.get_config_file_path(model_settings.SPECIFICATION), comment="#", ) @@ -247,7 +269,7 @@ def summarize( tours_merged.drop(columns=["person_id", "household_id"]), left_on="tour_id", right_index=True, - suffixes=["_trip", "_tour"], + suffixes=("_trip", "_tour"), how="left", ) @@ -272,8 +294,8 @@ def summarize( ) for table_name, df in locals_d.items(): - if table_name in model_settings: - meta = model_settings[table_name] + if hasattr(model_settings, table_name): + meta = getattr(model_settings, table_name) df = eval(table_name) if "AGGREGATE" in meta and meta["AGGREGATE"]: @@ -311,7 +333,7 @@ def summarize( ) # Output pipeline tables for expression development - if model_settings["EXPORT_PIPELINE_TABLES"] is True: + if model_settings.EXPORT_PIPELINE_TABLES is True: pipeline_table_dir = os.path.join(output_location, "pipeline_tables") os.makedirs(state.get_output_file_path(pipeline_table_dir), exist_ok=True) for name, df in locals_d.items(): diff --git a/activitysim/abm/models/telecommute_frequency.py b/activitysim/abm/models/telecommute_frequency.py index 8629909aa..300c81454 100755 --- a/activitysim/abm/models/telecommute_frequency.py +++ b/activitysim/abm/models/telecommute_frequency.py @@ -14,15 +14,29 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger("activitysim") +class TelecommuteFrequencySettings(LogitComponentSettings): + """ + Settings for the `telecommute_frequency` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + @workflow.step def telecommute_frequency( state: workflow.State, persons_merged: pd.DataFrame, persons: pd.DataFrame, + model_settings: TelecommuteFrequencySettings | None = None, + model_settings_file_name: str = "telecommute_frequency.yaml", + trace_label: str = "telecommute_frequency", ) -> None: """ This model predicts the frequency of telecommute for a person (worker) who @@ -32,21 +46,23 @@ def telecommute_frequency( office during a week. """ - trace_label = "telecommute_frequency" - model_settings_file_name = "telecommute_frequency.yaml" + if model_settings is None: + model_settings = TelecommuteFrequencySettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) choosers = persons_merged choosers = choosers[choosers.workplace_zone_id > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) - model_settings = state.filesystem.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation(state, "telecommute_frequency") constants = config.get_model_constants(model_settings) # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_d = {} if constants is not None: @@ -60,7 +76,7 @@ def telecommute_frequency( trace_label=trace_label, ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/tour_mode_choice.py b/activitysim/abm/models/tour_mode_choice.py index 36e678617..bc117be5a 100644 --- a/activitysim/abm/models/tour_mode_choice.py +++ b/activitysim/abm/models/tour_mode_choice.py @@ -10,6 +10,7 @@ from activitysim.abm.models.util import annotate, school_escort_tours_trips, trip from activitysim.abm.models.util.mode import run_tour_mode_choice_simulate from activitysim.core import config, estimation, logit, los, simulate, tracing, workflow +from activitysim.core.configuration.logit import TourModeComponentSettings from activitysim.core.util import assign_in_place, reindex logger = logging.getLogger(__name__) @@ -21,7 +22,10 @@ def get_alts_from_segmented_nested_logit( - state: workflow.State, model_settings, segment_name, trace_label + state: workflow.State, + model_settings: TourModeComponentSettings, + segment_name: str, + trace_label: str, ): """Infer alts from logit spec @@ -50,7 +54,11 @@ def get_alts_from_segmented_nested_logit( def create_logsum_trips( - state: workflow.State, tours, segment_column_name, model_settings, trace_label + state: workflow.State, + tours: pd.DataFrame, + segment_column_name: str, + model_settings: TourModeComponentSettings, + trace_label: str, ): """ Construct table of trips from half-tours (1 inbound, 1 outbound) for each tour-mode. @@ -60,7 +68,7 @@ def create_logsum_trips( tours : pandas.DataFrame segment_column_name : str column in tours table used for segmenting model spec - model_settings : dict + model_settings : TourModeComponentSettings trace_label : str Returns @@ -126,7 +134,11 @@ def append_tour_leg_trip_mode_choice_logsums(state: workflow.State, tours): def get_trip_mc_logsums_for_all_modes( - state: workflow.State, tours, segment_column_name, model_settings, trace_label + state: workflow.State, + tours: pd.DataFrame, + segment_column_name: str, + model_settings: TourModeComponentSettings, + trace_label: str, ): """Creates pseudo-trips from tours and runs trip mode choice to get logsums @@ -135,7 +147,7 @@ def get_trip_mc_logsums_for_all_modes( tours : pandas.DataFrame segment_column_name : str column in tours table used for segmenting model spec - model_settings : dict + model_settings : TourModeComponentSettings trace_label : str Returns @@ -176,15 +188,20 @@ def tour_mode_choice_simulate( tours: pd.DataFrame, persons_merged: pd.DataFrame, network_los: los.Network_LOS, + model_settings: TourModeComponentSettings | None = None, + model_settings_file_name: str = "tour_mode_choice.yaml", + trace_label: str = "tour_mode_choice", ) -> None: """ Tour mode choice simulate """ - trace_label = "tour_mode_choice" - model_settings_file_name = "tour_mode_choice.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = TourModeComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME") + logsum_column_name = model_settings.MODE_CHOICE_LOGSUM_COLUMN_NAME mode_column_name = "tour_mode" segment_column_name = "tour_purpose" @@ -206,7 +223,7 @@ def tour_mode_choice_simulate( constants = {} # model_constants can appear in expressions - constants.update(config.get_model_constants(model_settings)) + constants.update(model_settings.CONSTANTS) skim_dict = network_los.get_default_skim_dict() @@ -271,7 +288,7 @@ def tour_mode_choice_simulate( ) # TVPB constants can appear in expressions - if model_settings.get("use_TVPB_constants", True): + if model_settings.use_TVPB_constants: constants.update( network_los.setting("TVPB_SETTINGS.tour_mode_choice.CONSTANTS") ) @@ -302,7 +319,7 @@ def tour_mode_choice_simulate( ) # if trip logsums are used, run trip mode choice and append the logsums - if model_settings.get("COMPUTE_TRIP_MODE_CHOICE_LOGSUMS", False): + if model_settings.COMPUTE_TRIP_MODE_CHOICE_LOGSUMS: primary_tours_merged = get_trip_mc_logsums_for_all_modes( state, primary_tours_merged, @@ -357,7 +374,7 @@ def tour_mode_choice_simulate( # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: - tvpb_mode_path_types = model_settings.get("tvpb_mode_path_types") + tvpb_mode_path_types = model_settings.tvpb_mode_path_types if tvpb_mode_path_types is not None: for mode, path_types in tvpb_mode_path_types.items(): for direction, skim in zip( @@ -402,8 +419,9 @@ def tour_mode_choice_simulate( all_tours = tours assign_in_place(all_tours, choices_df) - if state.is_table("school_escort_tours") & model_settings.get( - "FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH", True + if ( + state.is_table("school_escort_tours") + & model_settings.FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH ): all_tours = ( school_escort_tours_trips.force_escortee_tour_modes_to_match_chauffeur( @@ -414,7 +432,7 @@ def tour_mode_choice_simulate( state.add_table("tours", all_tours) # - annotate tours table - if model_settings.get("annotate_tours"): + if model_settings.annotate_tours: annotate.annotate_tours(state, model_settings, trace_label) if state.settings.trace_hh_id: diff --git a/activitysim/abm/models/tour_od_choice.py b/activitysim/abm/models/tour_od_choice.py index c55af9af0..b518b36f9 100644 --- a/activitysim/abm/models/tour_od_choice.py +++ b/activitysim/abm/models/tour_od_choice.py @@ -20,7 +20,9 @@ def tour_od_choice( households: pd.DataFrame, land_use: pd.DataFrame, network_los: los.Network_LOS, - chunk_size, + model_settings: tour_od.TourODSettings | None = None, + model_settings_file_name: str = "tour_od_choice.yaml", + trace_label: str = "tour_od_choice", ) -> None: """Simulates joint origin/destination choice for all tours. @@ -43,24 +45,24 @@ def tour_od_choice( lazy-loaded land use data table network_los : los.Network_LOS lazy-loaded activitysim.los.Network_LOS object - chunk_size - simulation chunk size, set in main settings.yaml """ - - trace_label = "tour_od_choice" - model_settings_file_name = "tour_od_choice.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) - origin_col_name = model_settings["ORIG_COL_NAME"] - dest_col_name = model_settings["DEST_COL_NAME"] + if model_settings is None: + model_settings = tour_od.TourODSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + origin_col_name = model_settings.ORIG_COL_NAME + dest_col_name = model_settings.DEST_COL_NAME alt_id_col = tour_od.get_od_id_col(origin_col_name, dest_col_name) trace_hh_id = state.settings.trace_hh_id + chunk_size = state.settings.chunk_size - sample_table_name = model_settings.get("OD_CHOICE_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.OD_CHOICE_SAMPLE_TABLE_NAME want_sample_table = ( state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) - logsum_column_name = model_settings.get("OD_CHOICE_LOGSUM_COLUMN_NAME", None) + logsum_column_name = model_settings.OD_CHOICE_LOGSUM_COLUMN_NAME want_logsums = logsum_column_name is not None # interaction_sample_simulate insists choosers appear in same order as alts @@ -69,8 +71,8 @@ def tour_od_choice( estimator = estimation.manager.begin_estimation(state, "tour_od_choice") if estimator: estimator.write_coefficients(model_settings=model_settings) - estimator.write_spec(model_settings, tag="SAMPLE_SPEC") - estimator.write_spec(model_settings, tag="SPEC") + estimator.write_spec(file_name=model_settings.SAMPLE_SPEC, tag="SAMPLE_SPEC") + estimator.write_spec(file_name=model_settings.SPEC, tag="SPEC") estimator.set_alt_id(alt_id_col) estimator.write_table( state.get_injectable("size_terms"), "size_terms", append=False diff --git a/activitysim/abm/models/tour_scheduling_probabilistic.py b/activitysim/abm/models/tour_scheduling_probabilistic.py index 7d5961529..324db4566 100644 --- a/activitysim/abm/models/tour_scheduling_probabilistic.py +++ b/activitysim/abm/models/tour_scheduling_probabilistic.py @@ -8,6 +8,7 @@ from activitysim.abm.models.util import probabilistic_scheduling as ps from activitysim.core import chunk, estimation, workflow +from activitysim.core.configuration.base import PydanticReadable logger = logging.getLogger(__name__) @@ -69,8 +70,28 @@ def run_tour_scheduling_probabilistic( return choices +class TourSchedulingProbabilisticSettings(PydanticReadable): + """ + Settings for the `tour_scheduling_probabilistic` component. + """ + + depart_alt_base: int = 0 + + PROBS_SPEC: str = "tour_scheduling_probs.csv" + """Filename for the tour scheduling probabilistic specification (csv) file.""" + + PROBS_JOIN_COLS: list[str] | None = None + """List of columns""" + + @workflow.step -def tour_scheduling_probabilistic(state: workflow.State, tours: pd.DataFrame) -> None: +def tour_scheduling_probabilistic( + state: workflow.State, + tours: pd.DataFrame, + model_settings: TourSchedulingProbabilisticSettings | None = None, + model_settings_file_name: str = "tour_scheduling_probabilistic.yaml", + trace_label: str = "tour_scheduling_probabilistic", +) -> None: """Makes tour departure and arrival choices by sampling from a probability lookup table This model samples tour scheduling choices from an exogenously defined probability @@ -89,15 +110,18 @@ def tour_scheduling_probabilistic(state: workflow.State, tours: pd.DataFrame) -> """ - trace_label = "tour_scheduling_probabilistic" - model_settings_file_name = "tour_scheduling_probabilistic.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) - depart_alt_base = model_settings.get("depart_alt_base", 0) + if model_settings is None: + model_settings = TourSchedulingProbabilisticSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + + depart_alt_base = model_settings.depart_alt_base scheduling_probs_filepath = state.filesystem.get_config_file_path( - model_settings["PROBS_SPEC"] + model_settings.PROBS_SPEC ) scheduling_probs = pd.read_csv(scheduling_probs_filepath) - probs_join_cols = model_settings["PROBS_JOIN_COLS"] + probs_join_cols = model_settings.PROBS_JOIN_COLS tours_df = tours # trip_scheduling is a probabilistic model ane we don't support estimation, diff --git a/activitysim/abm/models/transit_pass_ownership.py b/activitysim/abm/models/transit_pass_ownership.py index 8fc23cc95..48e01c47d 100644 --- a/activitysim/abm/models/transit_pass_ownership.py +++ b/activitysim/abm/models/transit_pass_ownership.py @@ -14,33 +14,49 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger("activitysim") +class TransitPassOwnershipSettings(LogitComponentSettings): + """ + Settings for the `transit_pass_ownership` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + @workflow.step def transit_pass_ownership( state: workflow.State, persons_merged: pd.DataFrame, persons: pd.DataFrame, + model_settings: TransitPassOwnershipSettings | None = None, + model_settings_file_name: str = "transit_pass_ownership.yaml", + trace_label: str = "transit_pass_ownership", ) -> None: """ Transit pass ownership model. """ - trace_label = "transit_pass_ownership" - model_settings_file_name = "transit_pass_ownership.yaml" + if model_settings is None: + model_settings = TransitPassOwnershipSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) choosers = persons_merged logger.info("Running %s with %d persons", trace_label, len(choosers)) - model_settings = state.filesystem.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation(state, "transit_pass_ownership") constants = config.get_model_constants(model_settings) # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_d = {} if constants is not None: @@ -54,7 +70,7 @@ def transit_pass_ownership( trace_label=trace_label, ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/transit_pass_subsidy.py b/activitysim/abm/models/transit_pass_subsidy.py index cd6f3aa10..7d1f320e2 100644 --- a/activitysim/abm/models/transit_pass_subsidy.py +++ b/activitysim/abm/models/transit_pass_subsidy.py @@ -14,33 +14,48 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger("activitysim") +class TransitPassSubsidySettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `transit_pass_subsidy` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + @workflow.step def transit_pass_subsidy( state: workflow.State, persons_merged: pd.DataFrame, persons: pd.DataFrame, + model_settings: TransitPassSubsidySettings | None = None, + model_settings_file_name: str = "transit_pass_subsidy.yaml", + trace_label: str = "transit_pass_subsidy", ) -> None: """ Transit pass subsidy model. """ - - trace_label = "transit_pass_subsidy" - model_settings_file_name = "transit_pass_subsidy.yaml" + if model_settings is None: + model_settings = TransitPassSubsidySettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) choosers = persons_merged logger.info("Running %s with %d persons", trace_label, len(choosers)) - model_settings = state.filesystem.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation(state, "transit_pass_subsidy") constants = config.get_model_constants(model_settings) # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_d = {} if constants is not None: @@ -54,7 +69,7 @@ def transit_pass_subsidy( trace_label=trace_label, ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/trip_departure_choice.py b/activitysim/abm/models/trip_departure_choice.py index 9eb7dc90d..43f02df34 100644 --- a/activitysim/abm/models/trip_departure_choice.py +++ b/activitysim/abm/models/trip_departure_choice.py @@ -3,6 +3,8 @@ from __future__ import annotations import logging +from pathlib import Path +from typing import Any import numpy as np import pandas as pd @@ -18,6 +20,9 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.skim_dataset import SkimDataset +from activitysim.core.skim_dictionary import SkimDict from activitysim.core.util import reindex logger = logging.getLogger(__name__) @@ -477,14 +482,38 @@ def apply_stage_two_model(state, omnibus_spec, trips, chunk_size, trace_label): return trips["depart"].astype(int) +class TripDepartureChoiceSettings(PydanticReadable, extra="forbid"): + """ + Settings for the `trip_departure_choice` component. + """ + + PREPROCESSOR: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + SPECIFICATION: str = "trip_departure_choice.csv" + """Filename for the trip departure choice (.csv) file.""" + + CONSTANTS: dict[str, Any] = {} + + @workflow.step def trip_departure_choice( - state: workflow.State, trips: pd.DataFrame, trips_merged: pd.DataFrame, skim_dict + state: workflow.State, + trips: pd.DataFrame, + trips_merged: pd.DataFrame, + skim_dict: SkimDict | SkimDataset, + model_settings: TripDepartureChoiceSettings | None = None, + model_settings_file_name: str = "trip_departure_choice.yaml", + trace_label: str = "trip_departure_choice", ) -> None: - trace_label = "trip_departure_choice" - model_settings = state.filesystem.read_model_settings("trip_departure_choice.yaml") - spec = state.filesystem.read_model_spec(file_name=model_settings["SPECIFICATION"]) + if model_settings is None: + model_settings = TripDepartureChoiceSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + + spec = state.filesystem.read_model_spec(file_name=model_settings.SPECIFICATION) trips_merged_df = trips_merged # add tour-based chunk_id so we can chunk all trips in tour together @@ -500,7 +529,7 @@ def trip_departure_choice( ) locals_d = config.get_model_constants(model_settings).copy() - preprocessor_settings = model_settings.get("PREPROCESSOR", None) + preprocessor_settings = model_settings.PREPROCESSOR tour_legs = get_tour_legs(trips_merged_df) state.get_rn_generator().add_channel("tour_legs", tour_legs) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 4b0384cec..210479121 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -3,11 +3,12 @@ from __future__ import annotations import logging -from builtins import range +import warnings from pathlib import Path import numpy as np import pandas as pd +from pydantic import root_validator from activitysim.abm.models.util.school_escort_tours_trips import ( split_out_school_escorting_trips, @@ -27,7 +28,8 @@ tracing, workflow, ) -from activitysim.core.configuration.base import Any, PydanticBase +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LocationComponentSettings from activitysim.core.interaction_sample import interaction_sample from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.skim_dictionary import DataFrameMatrix @@ -44,37 +46,88 @@ # DEST_MAZ = 'dest_maz' -class TripDestinationSettings(PydanticBase): +class TripDestinationSettings(LocationComponentSettings, extra="forbid"): """Settings for the trip_destination component. .. versionadded:: 1.2 - - Note that this implementation is presently used only for generating - documentation, but future work may migrate the settings implementation to - actually use this pydantic code to validate the settings before running - the model. """ - SAMPLE_SPEC: Path - SPEC: Path - COEFFICIENTS: Path - SAMPLE_SIZE: int - """This many candidate stop locations will be sampled for each choice.""" - DESTINATION_SAMPLE_SPEC: Path - DESTINATION_SPEC: Path - LOGSUM_SETTINGS: Path DEST_CHOICE_LOGSUM_COLUMN_NAME: str = None DEST_CHOICE_SAMPLE_TABLE_NAME: str = None TRIP_ORIGIN: str = "origin" ALT_DEST_COL_NAME: str = "dest_taz" + PRIMARY_ORIGIN: str = "origin" PRIMARY_DEST: str = "tour_leg_dest" # must be created in preprocessor - REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS: list[str] = None - CONSTANTS: dict[str, Any] = None - preprocessor: Any + REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS: list[str] | None = None + preprocessor: PreprocessorSettings | None = None CLEANUP: bool fail_some_trips_for_testing: bool = False """This setting is used by testing code to force failed trip_destination.""" + @root_validator(pre=True) + def deprecated_destination_prefix(cls, values): + replacements = { + "DESTINATION_SAMPLE_SPEC": "SAMPLE_SPEC", + "DESTINATION_SPEC": "SPEC", + } + for badkey, goodkey in replacements.items(): + if badkey in values: + if goodkey in values: + if values[badkey] != values[goodkey]: + # both keys are given, with different values -> error + raise ValueError( + f"Deprecated `{badkey}` field must have the " + f"same value as `{goodkey}` if both are provided." + ) + else: + # both keys are given, with same values -> warning + warnings.warn( + f"Use of the field `{badkey}` in the " + "trip_destination configuration file is deprecated, use " + f"just `{goodkey}` instead (currently both are given).", + FutureWarning, + stacklevel=2, + ) + values.pop(badkey) + else: + # only the wrong key is given -> warning + warnings.warn( + f"Use of the field `{badkey}` in the " + "trip_destination configuration file is deprecated, use " + f"`{goodkey}` instead.", + FutureWarning, + stacklevel=2, + ) + values[goodkey] = values[badkey] + values.pop(badkey) + return values + + @property + def DESTINATION_SAMPLE_SPEC(self) -> Path: + """Alias for `SAMPLE_SPEC`. + + .. deprecated:: 1.3 + """ + warnings.warn( + "DESTINATION_SAMPLE_SPEC is deprecated, use SAMPLE_SPEC", + DeprecationWarning, + stacklevel=2, + ) + return self.SAMPLE_SPEC + + @property + def DESTINATION_SPEC(self) -> Path: + """Alias for `SPEC`. + + .. deprecated:: 1.3 + """ + warnings.warn( + "DESTINATION_SPEC is deprecated, use SPEC", + DeprecationWarning, + stacklevel=2, + ) + return self.SPEC + @workflow.func def _destination_sample( @@ -82,13 +135,13 @@ def _destination_sample( primary_purpose, trips, alternatives, - model_settings, + model_settings: TripDestinationSettings, size_term_matrix, skims, alt_dest_col_name, estimator, - chunk_tag, - trace_label, + chunk_tag: str, + trace_label: str, zone_layer=None, ): """ @@ -109,24 +162,26 @@ def _destination_sample( spec = simulate.spec_for_segment( state, - model_settings, - spec_id="DESTINATION_SAMPLE_SPEC", + None, + spec_id="SAMPLE_SPEC", segment_name=primary_purpose, estimator=estimator, + spec_file_name=model_settings.SAMPLE_SPEC, + coefficients_file_name=model_settings.COEFFICIENTS, ) - sample_size = model_settings["SAMPLE_SIZE"] + sample_size = model_settings.SAMPLE_SIZE if state.settings.disable_destination_sampling or ( estimator and estimator.want_unsampled_alternatives ): # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( - "Estimation mode for %s using unsampled alternatives short_circuit_choices" - % (trace_label,) + f"Estimation mode for {trace_label} using " + f"unsampled alternatives short_circuit_choices" ) sample_size = 0 - locals_dict = config.get_model_constants(model_settings).copy() + locals_dict = model_settings.CONSTANTS.copy() # size_terms of destination zones are purpose-specific, and trips have various purposes # so the relevant size_term for each interaction_sample row @@ -170,7 +225,7 @@ def destination_sample( primary_purpose, trips, alternatives, - model_settings, + model_settings: TripDestinationSettings, size_term_matrix, skim_hotel, estimator, @@ -180,7 +235,7 @@ def destination_sample( chunk_tag = "trip_destination.sample" skims = skim_hotel.sample_skims(presample=False) - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME choices = _destination_sample( state, @@ -474,7 +529,7 @@ def destination_presample( primary_purpose, trips, alternatives, - model_settings, + model_settings: TripDestinationSettings, size_term_matrix, skim_hotel, network_los, @@ -484,12 +539,12 @@ def destination_presample( trace_label = tracing.extend_trace_label(trace_label, "presample") chunk_tag = "trip_destination.presample" # distinguish from trip_destination.sample - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME TAZ_size_term_matrix = aggregate_size_term_matrix(size_term_matrix, network_los) - TRIP_ORIGIN = model_settings["TRIP_ORIGIN"] - PRIMARY_DEST = model_settings["PRIMARY_DEST"] + TRIP_ORIGIN = model_settings.TRIP_ORIGIN + PRIMARY_DEST = model_settings.PRIMARY_DEST trips_taz = trips.copy() trips_taz[TRIP_ORIGIN] = network_los.map_maz_to_taz(trips_taz[TRIP_ORIGIN]) @@ -678,7 +733,7 @@ def compute_logsums( trips: pd.DataFrame, destination_sample, tours_merged: pd.DataFrame, - model_settings, + model_settings: TripDestinationSettings, skim_hotel, trace_label: str, ): @@ -719,7 +774,7 @@ def compute_logsums( assert choosers.index.equals(destination_sample.index) logsum_settings = state.filesystem.read_model_settings( - model_settings["LOGSUM_SETTINGS"] + model_settings.LOGSUM_SETTINGS ) coefficients = state.filesystem.get_segment_coefficients( logsum_settings, primary_purpose @@ -749,8 +804,8 @@ def compute_logsums( # - od_logsums od_skims = { - "ORIGIN": model_settings["TRIP_ORIGIN"], - "DESTINATION": model_settings["ALT_DEST_COL_NAME"], + "ORIGIN": model_settings.TRIP_ORIGIN, + "DESTINATION": model_settings.ALT_DEST_COL_NAME, "odt_skims": skims["odt_skims"], "dot_skims": skims["dot_skims"], "od_skims": skims["od_skims"], @@ -778,8 +833,8 @@ def compute_logsums( # - dp_logsums dp_skims = { - "ORIGIN": model_settings["ALT_DEST_COL_NAME"], - "DESTINATION": model_settings["PRIMARY_DEST"], + "ORIGIN": model_settings.ALT_DEST_COL_NAME, + "DESTINATION": model_settings.PRIMARY_DEST, "odt_skims": skims["dpt_skims"], "dot_skims": skims["pdt_skims"], "od_skims": skims["dp_skims"], @@ -813,7 +868,7 @@ def trip_destination_simulate( primary_purpose, trips, destination_sample, - model_settings, + model_settings: TripDestinationSettings, want_logsums, size_term_matrix, skim_hotel, @@ -834,16 +889,18 @@ def trip_destination_simulate( spec = simulate.spec_for_segment( state, - model_settings, - spec_id="DESTINATION_SPEC", + None, + spec_id="SPEC", segment_name=primary_purpose, estimator=estimator, + spec_file_name=model_settings.SPEC, + coefficients_file_name=model_settings.COEFFICIENTS, ) if estimator: estimator.write_choosers(trips) - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME logger.info("Running trip_destination_simulate with %d trips", len(trips)) @@ -855,7 +912,7 @@ def trip_destination_simulate( if trip_period_idx is not None: trips["trip_period"] = trip_period_idx - locals_dict = config.get_model_constants(model_settings).copy() + locals_dict = model_settings.CONSTANTS.copy() locals_dict.update( { "size_terms": size_term_matrix, @@ -914,7 +971,7 @@ def choose_trip_destination( trips, alternatives, tours_merged, - model_settings, + model_settings: TripDestinationSettings, want_logsums, want_sample_table, size_term_matrix, @@ -993,7 +1050,7 @@ def choose_trip_destination( if want_sample_table: # FIXME - sample_table destination_sample.set_index( - model_settings["ALT_DEST_COL_NAME"], append=True, inplace=True + model_settings.ALT_DEST_COL_NAME, append=True, inplace=True ) else: destination_sample = None @@ -1003,18 +1060,23 @@ def choose_trip_destination( return destinations, destination_sample -class SkimHotel(object): - def __init__(self, model_settings, network_los, trace_label): +class SkimHotel: + def __init__( + self, + model_settings: TripDestinationSettings, + network_los: los.Network_LOS, + trace_label: str, + ): self.model_settings = model_settings self.trace_label = tracing.extend_trace_label(trace_label, "skim_hotel") self.network_los = network_los self.zone_system = network_los.zone_system def sample_skims(self, presample): - o = self.model_settings["TRIP_ORIGIN"] - d = self.model_settings["ALT_DEST_COL_NAME"] - n = self.model_settings.get("PRIMARY_ORIGIN", "origin") - p = self.model_settings["PRIMARY_DEST"] + o = self.model_settings.TRIP_ORIGIN + d = self.model_settings.ALT_DEST_COL_NAME + n = self.model_settings.PRIMARY_ORIGIN + p = self.model_settings.PRIMARY_DEST if presample: assert not (self.zone_system == los.ONE_ZONE) @@ -1056,9 +1118,9 @@ def sample_skims(self, presample): return skims def logsum_skims(self): - o = self.model_settings["TRIP_ORIGIN"] - d = self.model_settings["ALT_DEST_COL_NAME"] - p = self.model_settings["PRIMARY_DEST"] + o = self.model_settings.TRIP_ORIGIN + d = self.model_settings.ALT_DEST_COL_NAME + p = self.model_settings.PRIMARY_DEST skim_dict = self.network_los.get_default_skim_dict() skims = { @@ -1130,12 +1192,14 @@ def logsum_skims(self): @workflow.func def run_trip_destination( state: workflow.State, - trips, - tours_merged, - estimator, - chunk_size, - trace_label, - fail_some_trips_for_testing=False, + trips: pd.DataFrame, + tours_merged: pd.DataFrame, + estimator: estimation.Estimator | None, + chunk_size: int, + trace_label: str, + fail_some_trips_for_testing: bool = False, + model_settings: TripDestinationSettings | None = None, + model_settings_file_name: str = "trip_destination.yaml", ): """ trip destination - main functionality separated from model step so it can be called iteratively @@ -1148,29 +1212,34 @@ def run_trip_destination( Parameters ---------- - trips - tours_merged - want_sample_table - chunk_size - trace_hh_id - trace_label + state : workflow.State + trips : pd.DataFrame + tours_merged : pd.DataFrame + estimator + chunk_size : int + trace_label : str + fail_some_trips_for_testing : bool, default False + model_settings : TripDestinationSettings, optional + model_settings_file_name : str, default "trip_destination.yaml" Returns ------- - + trips : pd.DataFrame + sample_list : pd.DataFrame """ - - model_settings_file_name = "trip_destination.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) - preprocessor_settings = model_settings.get("preprocessor", None) + if model_settings is None: + model_settings = TripDestinationSettings.read_settings_file( + state.filesystem, model_settings_file_name + ) + preprocessor_settings = model_settings.preprocessor logsum_settings = state.filesystem.read_model_settings( - model_settings["LOGSUM_SETTINGS"] + model_settings.LOGSUM_SETTINGS ) - logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME want_logsums = logsum_column_name is not None - sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME want_sample_table = ( state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) @@ -1234,11 +1303,11 @@ def run_trip_destination( # - filter tours_merged (AFTER copying destination and origin columns to trips) # tours_merged is used for logsums, we filter it here upfront to save space and time tours_merged_cols = logsum_settings["TOURS_MERGED_CHOOSER_COLUMNS"] - redundant_cols = model_settings.get("REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS", []) + redundant_cols = model_settings.REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS or [] if redundant_cols: tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols] - assert model_settings["PRIMARY_DEST"] not in tours_merged_cols + assert model_settings.PRIMARY_DEST not in tours_merged_cols tours_merged = tours_merged[tours_merged_cols] # - skims @@ -1254,7 +1323,7 @@ def run_trip_destination( # don't need size terms in alternatives, just zone_id index alternatives = alternatives.drop(alternatives.columns, axis=1) - alternatives.index.name = model_settings["ALT_DEST_COL_NAME"] + alternatives.index.name = model_settings.ALT_DEST_COL_NAME sample_list = [] @@ -1275,7 +1344,7 @@ def run_trip_destination( "network_los": network_los, "size_terms": size_term_matrix, } - locals_dict.update(config.get_model_constants(model_settings)) + locals_dict.update(model_settings.CONSTANTS) # - annotate nth_trips if preprocessor_settings: @@ -1379,7 +1448,12 @@ def run_trip_destination( @workflow.step def trip_destination( - state: workflow.State, trips: pd.DataFrame, tours_merged: pd.DataFrame + state: workflow.State, + trips: pd.DataFrame, + tours_merged: pd.DataFrame, + model_settings: TripDestinationSettings | None = None, + model_settings_file_name: str = "trip_destination.yaml", + trace_label: str = "trip_destination", ) -> None: """ Choose a destination for all intermediate trips based on trip purpose. @@ -1395,27 +1469,30 @@ def trip_destination( Parameters ---------- + state : workflow.State trips : DataFrame The trips table. This table is edited in-place to add the trip destinations. tours_merged : DataFrame The tours table, with columns merge from persons and households as well. - chunk_size : int - If non-zero, iterate over trips using this chunk size. - trace_hh_id : int or list[int] - Generate trace output for these households. - + model_settings : TripDestinationSettings, optional + The settings used in this model component. If not provided, they are + loaded out of the configs directory YAML file referenced by + the `model_settings_file_name` argument. + model_settings_file_name : str, default "trip_destination.yaml" + This is where model setting are found if `model_settings` is not given + explicitly. The same filename is also used to write settings files to + the estimation data bundle in estimation mode. + trace_label : str, default "free_parking" + This label is used for various tracing purposes. """ - trace_label = "trip_destination" - - model_settings_file_name = "trip_destination.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) - - CLEANUP = model_settings.get("CLEANUP", True) - fail_some_trips_for_testing = model_settings.get( - "fail_some_trips_for_testing", False - ) + if model_settings is None: + model_settings = TripDestinationSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + fail_some_trips_for_testing = model_settings.fail_some_trips_for_testing trips_df = trips tours_merged_df = tours_merged @@ -1429,10 +1506,9 @@ def trip_destination( estimator = estimation.manager.begin_estimation(state, "trip_destination") if estimator: - estimator.write_coefficients(model_settings=model_settings) - # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') - estimator.write_spec(model_settings, tag="SPEC") - estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) + estimator.write_coefficients(file_name=model_settings.COEFFICIENTS) + estimator.write_spec(file_name=model_settings.SPEC, tag="SPEC") + estimator.set_alt_id(model_settings.ALT_DEST_COL_NAME) estimator.write_table( state.get_injectable("size_terms"), "size_terms", append=False ) @@ -1479,7 +1555,7 @@ def trip_destination( # no trips should have failed since we overwrite choices and sample should have not failed trips assert not trips_df.failed.any() - if CLEANUP: + if model_settings.CLEANUP: if trips_df.failed.any(): flag_failed_trip_leg_mates(trips_df, "failed") @@ -1504,7 +1580,7 @@ def trip_destination( # Origin is previous destination # (leaving first origin alone as it's already set correctly) trips_df["origin"] = np.where( - (trips_df["trip_num"] == 1) & (trips_df["outbound"] == True), + (trips_df["trip_num"] == 1) & (trips_df["outbound"] == 1), trips_df["origin"], trips_df.groupby("tour_id")["destination"].shift(), ).astype(int) @@ -1528,12 +1604,10 @@ def trip_destination( trips_df[trips_df.trip_num < trips_df.trip_count] ) - sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME assert sample_table_name is not None - logger.info( - "adding %s samples to %s" % (len(save_sample_df), sample_table_name) - ) + logger.info(f"adding {len(save_sample_df)} samples to {sample_table_name}") # lest they try to put tour samples into the same table if state.is_table(sample_table_name): diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py index 5476b3983..12c2e4f6a 100644 --- a/activitysim/abm/models/trip_matrices.py +++ b/activitysim/abm/models/trip_matrices.py @@ -3,21 +3,56 @@ from __future__ import annotations import logging +from pathlib import Path +from typing import Any import numpy as np import openmatrix as omx import pandas as pd from activitysim.core import config, expressions, los, workflow +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger(__name__) +class MatrixTableSettings(PydanticReadable): + name: str + data_field: str + + +class MatrixSettings(PydanticReadable): + file_name: Path + tables: list[MatrixTableSettings] = [] + is_tap: bool = False + + +class WriteTripMatricesSettings(PydanticReadable): + """ + Settings for the `write_trip_matrices` component. + """ + + SAVE_TRIPS_TABLE: bool = False + """Save trip tables""" + + HH_EXPANSION_WEIGHT_COL: str = "sample_rate" + """Column represents the sampling rate of households""" + + MATRICES: list[MatrixSettings] = [] + + CONSTANTS: dict[str, Any] = {} + + preprocessor: PreprocessorSettings | None = None + + @workflow.step(copy_tables=["trips"]) def write_trip_matrices( state: workflow.State, network_los: los.Network_LOS, trips: pd.DataFrame, + model_settings: WriteTripMatricesSettings | None = None, + model_settings_file_name: str = "write_trip_matrices.yaml", ) -> None: """ Write trip matrices step. @@ -46,10 +81,15 @@ def write_trip_matrices( ) return - model_settings = state.filesystem.read_model_settings("write_trip_matrices.yaml") + if model_settings is None: + model_settings = WriteTripMatricesSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + trips_df = annotate_trips(state, trips, network_los, model_settings) - if bool(model_settings.get("SAVE_TRIPS_TABLE")): + if model_settings.SAVE_TRIPS_TABLE: state.add_table("trips", trips_df) if "parking_location" in state.settings.models: @@ -72,7 +112,7 @@ def write_trip_matrices( ) # use the average household weight for all trips in the origin destination pair - hh_weight_col = model_settings.get("HH_EXPANSION_WEIGHT_COL") + hh_weight_col = model_settings.HH_EXPANSION_WEIGHT_COL aggregate_weight = ( trips_df[["origin", "destination", hh_weight_col]] .groupby(["origin", "destination"], sort=False) @@ -116,7 +156,7 @@ def write_trip_matrices( ) # use the average household weight for all trips in the origin destination pair - hh_weight_col = model_settings.get("HH_EXPANSION_WEIGHT_COL") + hh_weight_col = model_settings.HH_EXPANSION_WEIGHT_COL aggregate_weight = ( trips_df[["otaz", "dtaz", hh_weight_col]] .groupby(["otaz", "dtaz"], sort=False) @@ -164,7 +204,7 @@ def write_trip_matrices( ) # use the average household weight for all trips in the origin destination pair - hh_weight_col = model_settings.get("HH_EXPANSION_WEIGHT_COL") + hh_weight_col = model_settings.HH_EXPANSION_WEIGHT_COL aggregate_weight = ( trips_df[["otaz", "dtaz", hh_weight_col]] .groupby(["otaz", "dtaz"], sort=False) @@ -201,7 +241,7 @@ def write_trip_matrices( ) # use the average household weight for all trips in the origin destination pair - hh_weight_col = model_settings.get("HH_EXPANSION_WEIGHT_COL") + hh_weight_col = model_settings.HH_EXPANSION_WEIGHT_COL aggregate_weight = ( trips_df[["btap", "atap", hh_weight_col]] .groupby(["btap", "atap"], sort=False) @@ -230,9 +270,11 @@ def write_trip_matrices( ) -@workflow.func def annotate_trips( - state: workflow.State, trips: pd.DataFrame, network_los, model_settings + state: workflow.State, + trips: pd.DataFrame, + network_los, + model_settings: WriteTripMatricesSettings, ): """ Add columns to local trips table. The annotator has @@ -277,7 +319,7 @@ def annotate_trips( # Data will be expanded by an expansion weight column from # the households pipeline table, if specified in the model settings. - hh_weight_col = model_settings.get("HH_EXPANSION_WEIGHT_COL") + hh_weight_col = model_settings.HH_EXPANSION_WEIGHT_COL if hh_weight_col and hh_weight_col not in trips_df: logger.info("adding '%s' from households to trips table" % hh_weight_col) @@ -293,7 +335,7 @@ def write_matrices( zone_index, orig_index, dest_index, - model_settings, + model_settings: WriteTripMatricesSettings, is_tap=False, ): """ @@ -308,30 +350,30 @@ def write_matrices( but the table 'data_field's must be summable types: ints, floats, bools. """ - matrix_settings = model_settings.get("MATRICES") + matrix_settings = model_settings.MATRICES if not matrix_settings: logger.error("Missing MATRICES setting in write_trip_matrices.yaml") for matrix in matrix_settings: - matrix_is_tap = matrix.get("is_tap", False) + matrix_is_tap = matrix.is_tap if matrix_is_tap == is_tap: # only write tap matrices to tap matrix files - filename = matrix.get("file_name") + filename = str(matrix.file_name) filepath = state.get_output_file_path(filename) logger.info("opening %s" % filepath) file = omx.open_file(str(filepath), "w") # possibly overwrite existing file - table_settings = matrix.get("tables") + table_settings = matrix.tables for table in table_settings: - table_name = table.get("name") - col = table.get("data_field") + table_name = table.name + col = table.data_field if col not in aggregate_trips: logger.error(f"missing {col} column in aggregate_trips DataFrame") return - hh_weight_col = model_settings.get("HH_EXPANSION_WEIGHT_COL") + hh_weight_col = model_settings.HH_EXPANSION_WEIGHT_COL if hh_weight_col: aggregate_trips[col] = ( aggregate_trips[col] / aggregate_trips[hh_weight_col] diff --git a/activitysim/abm/models/trip_mode_choice.py b/activitysim/abm/models/trip_mode_choice.py index 907444137..25f20cc05 100644 --- a/activitysim/abm/models/trip_mode_choice.py +++ b/activitysim/abm/models/trip_mode_choice.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +from typing import Any import numpy as np import pandas as pd @@ -19,14 +20,51 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import TemplatedLogitComponentSettings from activitysim.core.util import assign_in_place logger = logging.getLogger(__name__) +class TripModeChoiceSettings(TemplatedLogitComponentSettings, extra="forbid"): + """ + Settings for the `trip_mode_choice` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + MODE_CHOICE_LOGSUM_COLUMN_NAME: str = "mode_choice_logsum" + """Column name of the mode choice logsum""" + + TOURS_MERGED_CHOOSER_COLUMNS: list[str] | None = None + """List of columns to be filtered from the dataframe to reduce memory + needs filter chooser table to these fields""" + + CHOOSER_COLS_TO_KEEP: list[str] = [] + + tvpb_mode_path_types: dict[str, Any] = {} + TVPB_recipe: str = "tour_mode_choice" + use_TVPB_constants: bool = True + + FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH: bool = True + + annotate_trips: PreprocessorSettings | None = None + + LEGACY_COEFFICIENTS: str | None = None + + REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS: list[str] | None = None + + @workflow.step def trip_mode_choice( - state: workflow.State, trips: pd.DataFrame, network_los: los.Network_LOS + state: workflow.State, + trips: pd.DataFrame, + network_los: los.Network_LOS, + model_settings: TripModeChoiceSettings | None = None, + model_settings_file_name: str = "trip_mode_choice.yaml", + trace_label: str = "trip_mode_choice", ) -> None: """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. @@ -37,11 +75,13 @@ def trip_mode_choice( Adds trip_mode column to trip table """ - trace_label = "trip_mode_choice" - model_settings_file_name = "trip_mode_choice.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = TripModeChoiceSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME") + logsum_column_name = model_settings.MODE_CHOICE_LOGSUM_COLUMN_NAME mode_column_name = "trip_mode" trips_df = trips @@ -52,7 +92,7 @@ def trip_mode_choice( # needed by tour_merged (e.g. home_zone_id) exist tours_cols = [ col - for col in model_settings["TOURS_MERGED_CHOOSER_COLUMNS"] + for col in model_settings.TOURS_MERGED_CHOOSER_COLUMNS if col not in trips_df.columns ] if len(tours_cols) > 0: @@ -78,7 +118,7 @@ def trip_mode_choice( orig_col = "origin" dest_col = "destination" - min_per_period = network_los.skim_time_periods["period_minutes"] + min_per_period = network_los.skim_time_periods.period_minutes periods_per_hour = 60 / min_per_period constants = {} @@ -118,7 +158,7 @@ def trip_mode_choice( if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb - tvpb_recipe = model_settings.get("TVPB_recipe", "tour_mode_choice") + tvpb_recipe = model_settings.TVPB_recipe tvpb_logsum_odt = tvpb.wrap_logsum( orig_key=orig_col, dest_key=dest_col, @@ -145,7 +185,7 @@ def trip_mode_choice( # the tvpb will still use the constants as defined in the recipe # specified above in `tvpb.wrap_logsum()` but they will not be used # in the trip mode choice expressions. - if model_settings.get("use_TVPB_constants", True): + if model_settings.use_TVPB_constants: constants.update( network_los.setting("TVPB_SETTINGS.tour_mode_choice.CONSTANTS") ) @@ -162,9 +202,9 @@ def trip_mode_choice( estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) nest_spec = config.get_logit_model_settings(model_settings) - cols_to_keep = model_settings.get("CHOOSER_COLS_TO_KEEP", None) + cols_to_keep = model_settings.CHOOSER_COLS_TO_KEEP choices_list = [] cols_to_keep_list = [] @@ -271,7 +311,7 @@ def trip_mode_choice( # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: - tvpb_mode_path_types = model_settings.get("tvpb_mode_path_types") + tvpb_mode_path_types = model_settings.tvpb_mode_path_types for mode, path_type in tvpb_mode_path_types.items(): skim_cache = tvpb_logsum_odt.cache[path_type] @@ -300,8 +340,9 @@ def trip_mode_choice( assign_in_place(trips_df, choices_df) - if state.is_table("school_escort_tours") & model_settings.get( - "FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH", True + if ( + state.is_table("school_escort_tours") + & model_settings.FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH ): trips_df = ( school_escort_tours_trips.force_escortee_trip_modes_to_match_chauffeur( @@ -319,7 +360,7 @@ def trip_mode_choice( state.add_table("trips", trips_df) - if model_settings.get("annotate_trips"): + if model_settings.annotate_trips: annotate.annotate_trips(state, model_settings, trace_label, locals_dict) if state.settings.trace_hh_id: diff --git a/activitysim/abm/models/trip_purpose.py b/activitysim/abm/models/trip_purpose.py index 7af387e21..1570d672c 100644 --- a/activitysim/abm/models/trip_purpose.py +++ b/activitysim/abm/models/trip_purpose.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +from typing import Any import numpy as np import pandas as pd @@ -17,9 +18,9 @@ expressions, logit, simulate, - tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.util import reindex logger = logging.getLogger(__name__) @@ -123,15 +124,12 @@ def choose_intermediate_trip_purpose( file_name = "%s.UNMATCHED_PROBS" % trace_label logger.error( - "%s %s of %s intermediate trips could not be matched to probs based on join columns %s" - % (trace_label, len(unmatched_choosers), len(choosers), probs_join_cols) + "{} {} of {} intermediate trips could not be matched to probs based on join columns {}".format( + trace_label, len(unmatched_choosers), len(choosers), probs_join_cols + ) ) logger.info( - "Writing %s unmatched choosers to %s" - % ( - len(unmatched_choosers), - file_name, - ) + f"Writing {len(unmatched_choosers)} unmatched choosers to {file_name}" ) state.tracing.write_csv( unmatched_choosers, file_name=file_name, transpose=False @@ -166,7 +164,22 @@ def choose_intermediate_trip_purpose( return choices -def run_trip_purpose(state: workflow.State, trips_df, estimator, trace_label): +class TripPurposeSettings(PydanticReadable): + probs_join_cols: list[str] = ["primary_purpose", "outbound", "person_type"] + PROBS_SPEC: str = "trip_purpose_probs.csv" + preprocessor: PreprocessorSettings | None = None + use_depart_time: bool = True + CONSTANTS: dict[str, Any] = {} + + +def run_trip_purpose( + state: workflow.State, + trips_df: pd.DataFrame, + estimator, + model_settings: TripPurposeSettings | None = None, + model_settings_file_name: str = "trip_purpose.yaml", + trace_label: str = "trip_purpose", +): """ trip purpose - main functionality separated from model step so it can be called iteratively @@ -186,12 +199,14 @@ def run_trip_purpose(state: workflow.State, trips_df, estimator, trace_label): # uniform across trip_purpose chunk_tag = "trip_purpose" - model_settings_file_name = "trip_purpose.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = TripPurposeSettings.read_settings_file( + state.filesystem, model_settings_file_name + ) - probs_join_cols = model_settings.get("probs_join_cols", PROBS_JOIN_COLUMNS) + probs_join_cols = model_settings.probs_join_cols - spec_file_name = model_settings.get("PROBS_SPEC", "trip_purpose_probs.csv") + spec_file_name = model_settings.PROBS_SPEC probs_spec = pd.read_csv( state.filesystem.get_config_file_path(spec_file_name), comment="#" ) @@ -225,7 +240,7 @@ def run_trip_purpose(state: workflow.State, trips_df, estimator, trace_label): trips_df = trips_df[~last_trip] logger.info("assign purpose to %s intermediate trips", trips_df.shape[0]) - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_dict = config.get_model_constants(model_settings) expressions.assign_columns( @@ -236,10 +251,10 @@ def run_trip_purpose(state: workflow.State, trips_df, estimator, trace_label): trace_label=trace_label, ) - use_depart_time = model_settings.get("use_depart_time", True) + use_depart_time = model_settings.use_depart_time for ( - i, + _i, trips_chunk, chunk_trace_label, chunk_sizer, @@ -258,7 +273,7 @@ def run_trip_purpose(state: workflow.State, trips_df, estimator, trace_label): result_list.append(choices) - chunk_sizer.log_df(trace_label, f"result_list", result_list) + chunk_sizer.log_df(trace_label, "result_list", result_list) if len(result_list) > 1: choices = pd.concat(result_list) diff --git a/activitysim/abm/models/trip_purpose_and_destination.py b/activitysim/abm/models/trip_purpose_and_destination.py index 3e7baf2b0..146588088 100644 --- a/activitysim/abm/models/trip_purpose_and_destination.py +++ b/activitysim/abm/models/trip_purpose_and_destination.py @@ -13,6 +13,7 @@ flag_failed_trip_leg_mates, ) from activitysim.core import estimation, tracing, workflow +from activitysim.core.configuration.base import PydanticReadable from activitysim.core.util import assign_in_place logger = logging.getLogger(__name__) @@ -50,16 +51,30 @@ def run_trip_purpose_and_destination( return trips_df, save_sample_df +class TripPurposeAndDestinationSettings(PydanticReadable): + """ + Settings for the `trip_purpose_and_destination` component. + """ + + MAX_ITERATIONS: int = 5 + """Setting for the maximum iterations""" + + @workflow.step def trip_purpose_and_destination( state: workflow.State, trips: pd.DataFrame, tours_merged: pd.DataFrame, + model_settings: TripPurposeAndDestinationSettings | None = None, + model_settings_file_name: str = "trip_purpose_and_destination.yaml", + trace_label: str = "trip_purpose_and_destination", ) -> None: - trace_label = "trip_purpose_and_destination" - model_settings = state.filesystem.read_model_settings( - "trip_purpose_and_destination.yaml" - ) + + if model_settings is None: + model_settings = TripPurposeAndDestinationSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) # for consistency, read sample_table_name setting from trip_destination settings file trip_destination_model_settings = state.filesystem.read_model_settings( @@ -72,7 +87,7 @@ def trip_purpose_and_destination( state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) - MAX_ITERATIONS = model_settings.get("MAX_ITERATIONS", 5) + MAX_ITERATIONS = model_settings.MAX_ITERATIONS trips_df = trips tours_merged_df = tours_merged diff --git a/activitysim/abm/models/trip_scheduling.py b/activitysim/abm/models/trip_scheduling.py index 15fe0c6d9..3dddd5e2b 100644 --- a/activitysim/abm/models/trip_scheduling.py +++ b/activitysim/abm/models/trip_scheduling.py @@ -5,6 +5,7 @@ import logging import warnings from builtins import range +from typing import Any, List, Literal import numpy as np import pandas as pd @@ -15,6 +16,7 @@ ) from activitysim.abm.models.util.trip import cleanup_failed_trips, failed_trip_cohorts from activitysim.core import chunk, config, estimation, expressions, tracing, workflow +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.util import reindex logger = logging.getLogger(__name__) @@ -40,18 +42,18 @@ DEPARTURE_MODE = "departure" DURATION_MODE = "stop_duration" RELATIVE_MODE = "relative" -PROBS_JOIN_COLUMNS_DEPARTURE_BASED = [ +PROBS_JOIN_COLUMNS_DEPARTURE_BASED: list[str] = [ "primary_purpose", "outbound", "tour_hour", "trip_num", ] -PROBS_JOIN_COLUMNS_DURATION_BASED = ["outbound", "stop_num"] -PROBS_JOIN_COLUMNS_RELATIVE_BASED = ["outbound", "periods_left"] +PROBS_JOIN_COLUMNS_DURATION_BASED: list[str] = ["outbound", "stop_num"] +PROBS_JOIN_COLUMNS_RELATIVE_BASED: list[str] = ["outbound", "periods_left"] -def _logic_version(model_settings): - logic_version = model_settings.get("logic_version", None) +def _logic_version(model_settings: TripSchedulingSettings): + logic_version = model_settings.logic_version if logic_version is None: warnings.warn( "The trip_scheduling component now has a logic_version setting " @@ -195,7 +197,7 @@ def schedule_trips_in_leg( outbound, trips, probs_spec, - model_settings, + model_settings: TripSchedulingSettings, is_last_iteration, trace_label, *, @@ -219,29 +221,25 @@ def schedule_trips_in_leg( depart choice for trips, indexed by trip_id """ - failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT) - depart_alt_base = model_settings.get("DEPART_ALT_BASE", 0) - scheduling_mode = model_settings.get("scheduling_mode", "departure") - preprocessor_settings = model_settings.get("preprocessor", None) - - if scheduling_mode == "departure": - probs_join_cols = model_settings.get( - "probs_join_cols", PROBS_JOIN_COLUMNS_DEPARTURE_BASED - ) - elif scheduling_mode == "stop_duration": - probs_join_cols = model_settings.get( - "probs_join_cols", PROBS_JOIN_COLUMNS_DURATION_BASED - ) - elif scheduling_mode == "relative": - probs_join_cols = model_settings.get( - "probs_join_cols", PROBS_JOIN_COLUMNS_RELATIVE_BASED - ) - else: - logger.error( - "Invalid scheduling mode specified: {0}.".format(scheduling_mode), - "Please select one of ['departure', 'stop_duration', 'relative'] and try again.", - ) - raise ValueError(f"Invalid scheduling mode specified: {scheduling_mode}") + failfix = model_settings.FAILFIX + depart_alt_base = model_settings.DEPART_ALT_BASE + scheduling_mode = model_settings.scheduling_mode + preprocessor_settings = model_settings.preprocessor + + probs_join_cols = model_settings.probs_join_cols + if probs_join_cols is None: + if scheduling_mode == "departure": + probs_join_cols = PROBS_JOIN_COLUMNS_DEPARTURE_BASED + elif scheduling_mode == "stop_duration": + probs_join_cols = PROBS_JOIN_COLUMNS_DURATION_BASED + elif scheduling_mode == "relative": + probs_join_cols = PROBS_JOIN_COLUMNS_RELATIVE_BASED + else: + logger.error( + "Invalid scheduling mode specified: {0}.".format(scheduling_mode), + "Please select one of ['departure', 'stop_duration', 'relative'] and try again.", + ) + raise ValueError(f"Invalid scheduling mode specified: {scheduling_mode}") # logger.debug("%s scheduling %s trips" % (trace_label, trips.shape[0])) @@ -429,11 +427,46 @@ def run_trip_scheduling( return choices +class TripSchedulingSettings(PydanticReadable): + """ + Settings for the `trip_scheduling` component. + """ + + PROBS_SPEC: str = "trip_scheduling_probs.csv" + """Filename for the trip scheduling probabilities (.csv) file.""" + + COEFFICIENTS: str = "trip_scheduling_coefficients.csv" + """Filename for the trip scheduling coefficients file""" + + FAILFIX: str = "choose_most_initial" + """ """ + + MAX_ITERATIONS: int = 1 + """Maximum iterations.""" + + DEPART_ALT_BASE: int = 5 + """Integer to add to probs column index to get time period it represents. + e.g. depart_alt_base = 5 means first column (column 0) represents 5 am""" + + scheduling_mode: Literal["departure", "stop_duration", "relative"] = "departure" + + probs_join_cols: list[str] | None = None + + preprocessor: PreprocessorSettings | None = None + + logic_version: int | None = None + + CONSTANTS: dict[str, Any] = {} + + @workflow.step(copy_tables=False) def trip_scheduling( state: workflow.State, trips: pd.DataFrame, tours: pd.DataFrame, + model_settings: TripSchedulingSettings | None = None, + model_settings_file_name: str = "trip_scheduling.yaml", + trace_label: str = "trip_scheduling", ) -> None: """ Trip scheduling assigns depart times for trips within the start, end limits of the tour. @@ -480,9 +513,12 @@ def trip_scheduling( Which option is applied is determined by the FAILFIX model setting """ - trace_label = "trip_scheduling" - model_settings_file_name = "trip_scheduling.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + + if model_settings is None: + model_settings = TripSchedulingSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) trips_df = trips.copy() @@ -517,7 +553,7 @@ def trip_scheduling( ] estimator.write_choosers(trips_df[chooser_cols_for_estimation]) - probs_spec_file = model_settings.get("PROBS_SPEC", "trip_scheduling_probs.csv") + probs_spec_file = model_settings.PROBS_SPEC probs_spec = pd.read_csv( state.filesystem.get_config_file_path(probs_spec_file), comment="#" ) @@ -531,10 +567,9 @@ def trip_scheduling( pd.Series(list(range(len(tours))), tours.index), trips_df.tour_id ) - assert "DEPART_ALT_BASE" in model_settings - failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT) + failfix = model_settings.FAILFIX - max_iterations = model_settings.get("MAX_ITERATIONS", 1) + max_iterations = model_settings.MAX_ITERATIONS assert max_iterations > 0 choices_list = [] @@ -580,9 +615,7 @@ def trip_scheduling( failed = choices.reindex(trips_chunk.index).isnull() logger.info("%s %s failed", trace_label_i, failed.sum()) - if (failed.sum() > 0) & ( - model_settings.get("scheduling_mode") == "relative" - ): + if (failed.sum() > 0) & (model_settings.scheduling_mode == "relative"): raise RuntimeError("failed trips with relative scheduling mode") if not is_last_iteration: diff --git a/activitysim/abm/models/trip_scheduling_choice.py b/activitysim/abm/models/trip_scheduling_choice.py index 91630c340..d6f6d5886 100644 --- a/activitysim/abm/models/trip_scheduling_choice.py +++ b/activitysim/abm/models/trip_scheduling_choice.py @@ -3,7 +3,7 @@ from __future__ import annotations import logging -from typing import Mapping +from collections.abc import Mapping import numpy as np import pandas as pd @@ -13,7 +13,10 @@ get_time_windows, ) from activitysim.core import chunk, expressions, simulate, tracing, workflow +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.interaction_sample_simulate import _interaction_sample_simulate +from activitysim.core.skim_dataset import SkimDataset +from activitysim.core.skim_dictionary import SkimDict logger = logging.getLogger(__name__) @@ -64,9 +67,7 @@ def generate_schedule_alternatives(tours): stops. :return: pd.Dataframe: Potential time duration windows. """ - assert set([NUM_IB_STOPS, NUM_OB_STOPS, TOUR_DURATION_COLUMN]).issubset( - tours.columns - ) + assert {NUM_IB_STOPS, NUM_OB_STOPS, TOUR_DURATION_COLUMN}.issubset(tours.columns) stop_pattern = tours[HAS_OB_STOPS].astype(int) + tours[HAS_IB_STOPS].astype(int) @@ -192,16 +193,19 @@ def get_pattern_index_and_arrays(tour_indexes, durations, one_way=True): return indexes, patterns, pattern_sizes -def get_spec_for_segment(state: workflow.State, model_settings, spec_name, segment): +def get_spec_for_segment( + state: workflow.State, model_settings: TripSchedulingChoiceSettings, segment: str +): """ Read in the model spec :param model_settings: model settings file - :param spec_name: name of the key in the settings file :param segment: which segment of the spec file do you want to read :return: array of utility equations """ - omnibus_spec = state.filesystem.read_model_spec(file_name=model_settings[spec_name]) + omnibus_spec = state.filesystem.read_model_spec( + file_name=model_settings.SPECIFICATION + ) spec = omnibus_spec[[segment]] @@ -300,7 +304,7 @@ def run_trip_scheduling_choice( result_list.append(choices) - chunk_sizer.log_df(trace_label, f"result_list", result_list) + chunk_sizer.log_df(trace_label, "result_list", result_list) # FIXME: this will require 2X RAM # if necessary, could append to hdf5 store on disk: @@ -323,16 +327,36 @@ def run_trip_scheduling_choice( return tours +class TripSchedulingChoiceSettings(PydanticReadable, extra="forbid"): + """ + Settings for the `trip_scheduling_choice` component. + """ + + PREPROCESSOR: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + SPECIFICATION: str + """file name of specification file""" + + @workflow.step def trip_scheduling_choice( state: workflow.State, trips: pd.DataFrame, tours: pd.DataFrame, - skim_dict, + skim_dict: SkimDict | SkimDataset, + model_settings: TripSchedulingChoiceSettings | None = None, + model_settings_file_name: str = "trip_scheduling_choice.yaml", + trace_label: str = "trip_scheduling_choice", ) -> None: - trace_label = "trip_scheduling_choice" - model_settings = state.filesystem.read_model_settings("trip_scheduling_choice.yaml") - spec = get_spec_for_segment(state, model_settings, "SPECIFICATION", "stage_one") + + if model_settings is None: + model_settings = TripSchedulingChoiceSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + + spec = get_spec_for_segment(state, model_settings, "stage_one") trips_df = trips tours_df = tours @@ -364,7 +388,7 @@ def trip_scheduling_choice( .reindex(tours.index) ) - preprocessor_settings = model_settings.get("PREPROCESSOR", None) + preprocessor_settings = model_settings.PREPROCESSOR # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap("origin", "destination") diff --git a/activitysim/abm/models/util/annotate.py b/activitysim/abm/models/util/annotate.py index 7726aac01..a689637f9 100644 --- a/activitysim/abm/models/util/annotate.py +++ b/activitysim/abm/models/util/annotate.py @@ -1,11 +1,13 @@ +# ActivitySim +# See full license in LICENSE.txt. from __future__ import annotations import logging -from activitysim.core import expressions, tracing, workflow +import pandas as pd -# ActivitySim -# See full license in LICENSE.txt. +from activitysim.core import expressions, tracing, workflow +from activitysim.core.configuration import PydanticBase """ Code for annotating tables @@ -15,16 +17,23 @@ def annotate_tours( - state: workflow.State, model_settings, trace_label, locals_dict=None + state: workflow.State, + model_settings: dict | PydanticBase, + trace_label: str, + locals_dict: dict | None = None, ): """ Add columns to the tours table in the pipeline according to spec. Parameters ---------- - model_settings : dict + state : workflow.State + model_settings : dict or PydanticBase trace_label : str + locals_dict : dict, optional """ + if isinstance(model_settings, PydanticBase): + model_settings = model_settings.dict() if locals_dict is None: locals_dict = {} tours = state.get_dataframe("tours") @@ -39,16 +48,23 @@ def annotate_tours( def annotate_trips( - state: workflow.State, model_settings, trace_label, locals_dict=None + state: workflow.State, + model_settings: dict | PydanticBase, + trace_label: str, + locals_dict=None, ): """ Add columns to the trips table in the pipeline according to spec. Parameters ---------- - model_settings : dict + state : workflow.State + model_settings : dict or PydanticBase trace_label : str + locals_dict : dict, optional """ + if isinstance(model_settings, PydanticBase): + model_settings = model_settings.dict() if locals_dict is None: locals_dict = {} trips = state.get_dataframe("trips") diff --git a/activitysim/abm/models/util/logsums.py b/activitysim/abm/models/util/logsums.py index fff541e92..b92045fa8 100644 --- a/activitysim/abm/models/util/logsums.py +++ b/activitysim/abm/models/util/logsums.py @@ -4,20 +4,39 @@ import logging -from activitysim.core import config, expressions, los, simulate, tracing +import pandas as pd -logger = logging.getLogger(__name__) +from activitysim.core import config, expressions, los, simulate, tracing, workflow +from activitysim.core.configuration import PydanticBase +from activitysim.core.configuration.logit import ( + TourLocationComponentSettings, + TourModeComponentSettings, +) +logger = logging.getLogger(__name__) -def filter_chooser_columns(choosers, logsum_settings, model_settings): - chooser_columns = logsum_settings.get("LOGSUM_CHOOSER_COLUMNS", []) +def filter_chooser_columns( + choosers, logsum_settings: dict | PydanticBase, model_settings: dict | PydanticBase +): + try: + chooser_columns = logsum_settings.LOGSUM_CHOOSER_COLUMNS + except AttributeError: + chooser_columns = logsum_settings.get("LOGSUM_CHOOSER_COLUMNS", []) if ( - "CHOOSER_ORIG_COL_NAME" in model_settings + isinstance(model_settings, dict) + and "CHOOSER_ORIG_COL_NAME" in model_settings and model_settings["CHOOSER_ORIG_COL_NAME"] not in chooser_columns ): chooser_columns.append(model_settings["CHOOSER_ORIG_COL_NAME"]) + if ( + isinstance(model_settings, PydanticBase) + and hasattr(model_settings, "CHOOSER_ORIG_COL_NAME") + and model_settings.CHOOSER_ORIG_COL_NAME + and model_settings.CHOOSER_ORIG_COL_NAME not in chooser_columns + ): + chooser_columns.append(model_settings.CHOOSER_ORIG_COL_NAME) missing_columns = [c for c in chooser_columns if c not in choosers] if missing_columns: @@ -32,19 +51,19 @@ def filter_chooser_columns(choosers, logsum_settings, model_settings): return choosers -def compute_logsums( - state, - choosers, +def compute_location_choice_logsums( + state: workflow.State, + choosers: pd.DataFrame, tour_purpose, - logsum_settings, - model_settings, - network_los, - chunk_size, - chunk_tag, - trace_label, - in_period_col=None, - out_period_col=None, - duration_col=None, + logsum_settings: TourModeComponentSettings, + model_settings: TourLocationComponentSettings, + network_los: los.Network_LOS, + chunk_size: int, + chunk_tag: str, + trace_label: str, + in_period_col: str | None = None, + out_period_col: str | None = None, + duration_col: str | None = None, ): """ @@ -52,8 +71,8 @@ def compute_logsums( ---------- choosers tour_purpose - logsum_settings - model_settings + logsum_settings : TourModeComponentSettings + model_settings : TourLocationComponentSettings network_los chunk_size trace_hh_id @@ -64,13 +83,20 @@ def compute_logsums( logsums: pandas series computed logsums with same index as choosers """ + if isinstance(model_settings, dict): + model_settings = TourLocationComponentSettings.parse_obj(model_settings) + if isinstance(logsum_settings, dict): + logsum_settings = TourModeComponentSettings.parse_obj(logsum_settings) trace_label = tracing.extend_trace_label(trace_label, "compute_logsums") - logger.debug("Running compute_logsums with %d choosers" % choosers.shape[0]) + logger.debug(f"Running compute_logsums with {choosers.shape[0]:d} choosers") # compute_logsums needs to know name of dest column in interaction_sample - orig_col_name = model_settings["CHOOSER_ORIG_COL_NAME"] - dest_col_name = model_settings["ALT_DEST_COL_NAME"] + orig_col_name = model_settings.CHOOSER_ORIG_COL_NAME + dest_col_name = model_settings.ALT_DEST_COL_NAME + + assert (in_period_col is not None) or (model_settings.IN_PERIOD is not None) + assert (out_period_col is not None) or (model_settings.OUT_PERIOD is not None) # FIXME - are we ok with altering choosers (so caller doesn't have to set these)? if (in_period_col is not None) and (out_period_col is not None): @@ -84,29 +110,29 @@ def compute_logsums( "out_period" not in choosers.columns ): if ( - type(model_settings["IN_PERIOD"]) is dict - and type(model_settings["OUT_PERIOD"]) is dict + type(model_settings.IN_PERIOD) is dict + and type(model_settings.OUT_PERIOD) is dict ): if ( - tour_purpose in model_settings["IN_PERIOD"] - and tour_purpose in model_settings["OUT_PERIOD"] + tour_purpose in model_settings.IN_PERIOD + and tour_purpose in model_settings.OUT_PERIOD ): choosers["in_period"] = network_los.skim_time_period_label( - model_settings["IN_PERIOD"][tour_purpose], + model_settings.IN_PERIOD[tour_purpose], as_cat=True, broadcast_to=choosers.index, ) choosers["out_period"] = network_los.skim_time_period_label( - model_settings["OUT_PERIOD"][tour_purpose], + model_settings.OUT_PERIOD[tour_purpose], as_cat=True, broadcast_to=choosers.index, ) else: choosers["in_period"] = network_los.skim_time_period_label( - model_settings["IN_PERIOD"], as_cat=True, broadcast_to=choosers.index + model_settings.IN_PERIOD, as_cat=True, broadcast_to=choosers.index ) choosers["out_period"] = network_los.skim_time_period_label( - model_settings["OUT_PERIOD"], as_cat=True, broadcast_to=choosers.index + model_settings.OUT_PERIOD, as_cat=True, broadcast_to=choosers.index ) else: logger.error("Choosers table already has columns 'in_period' and 'out_period'.") @@ -115,25 +141,23 @@ def compute_logsums( choosers["duration"] = choosers[duration_col] elif "duration" not in choosers.columns: if ( - type(model_settings["IN_PERIOD"]) is dict - and type(model_settings["OUT_PERIOD"]) is dict + type(model_settings.IN_PERIOD) is dict + and type(model_settings.OUT_PERIOD) is dict ): if ( - tour_purpose in model_settings["IN_PERIOD"] - and tour_purpose in model_settings["OUT_PERIOD"] + tour_purpose in model_settings.IN_PERIOD + and tour_purpose in model_settings.OUT_PERIOD ): choosers["duration"] = ( - model_settings["IN_PERIOD"][tour_purpose] - - model_settings["OUT_PERIOD"][tour_purpose] + model_settings.IN_PERIOD[tour_purpose] + - model_settings.OUT_PERIOD[tour_purpose] ) else: - choosers["duration"] = ( - model_settings["IN_PERIOD"] - model_settings["OUT_PERIOD"] - ) + choosers["duration"] = model_settings.IN_PERIOD - model_settings.OUT_PERIOD else: logger.error("Choosers table already has column 'duration'.") - logsum_spec = state.filesystem.read_model_spec(file_name=logsum_settings["SPEC"]) + logsum_spec = state.filesystem.read_model_spec(file_name=logsum_settings.SPEC) coefficients = state.filesystem.get_segment_coefficients( logsum_settings, tour_purpose ) @@ -204,7 +228,7 @@ def compute_logsums( ) # TVPB constants can appear in expressions - if logsum_settings.get("use_TVPB_constants", True): + if logsum_settings.use_TVPB_constants: locals_dict.update( network_los.setting("TVPB_SETTINGS.tour_mode_choice.CONSTANTS") ) @@ -213,11 +237,10 @@ def compute_logsums( # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers - preprocessor = model_settings.get("LOGSUM_PREPROCESSOR", "preprocessor") - preprocessor_settings = logsum_settings[preprocessor] + preprocessor = model_settings.LOGSUM_PREPROCESSOR + preprocessor_settings = getattr(logsum_settings, preprocessor, None) if preprocessor_settings: - simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns( diff --git a/activitysim/abm/models/util/mode.py b/activitysim/abm/models/util/mode.py index 3c0d2a5ed..341cf7391 100644 --- a/activitysim/abm/models/util/mode.py +++ b/activitysim/abm/models/util/mode.py @@ -9,6 +9,7 @@ import pandas as pd from activitysim.core import config, expressions, simulate, workflow +from activitysim.core.configuration.logit import TourModeComponentSettings from activitysim.core.estimation import Estimator """ @@ -91,7 +92,7 @@ def run_tour_mode_choice_simulate( state: workflow.State, choosers, tour_purpose, - model_settings, + model_settings: TourModeComponentSettings, mode_column_name, logsum_column_name, network_los, @@ -108,7 +109,7 @@ def run_tour_mode_choice_simulate( you want to use in the evaluation of variables. """ - spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients = state.filesystem.get_segment_coefficients( model_settings, tour_purpose ) diff --git a/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py b/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py index 3c3cc5c90..8dc7aaa39 100644 --- a/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py @@ -1,10 +1,13 @@ # ActivitySim # See full license in LICENSE.txt. +from __future__ import annotations + import pandas as pd import pandas.testing as pdt from activitysim.abm.models.util.vectorize_tour_scheduling import ( + TourSchedulingSettings, get_previous_tour_by_tourid, vectorize_tour_scheduling, ) @@ -64,7 +67,7 @@ def test_vts(): timetable, tour_segments={"spec": spec}, tour_segment_col=None, - model_settings={}, + model_settings=TourSchedulingSettings(), chunk_size=0, trace_label="test_vts", ) diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py index be07c33aa..fbc8113e2 100644 --- a/activitysim/abm/models/util/tour_destination.py +++ b/activitysim/abm/models/util/tour_destination.py @@ -10,6 +10,7 @@ from activitysim.abm.models.util import logsums as logsum from activitysim.abm.tables.size_terms import tour_destination_size_terms from activitysim.core import config, los, simulate, tracing, workflow +from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.interaction_sample import interaction_sample from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.util import reindex @@ -67,7 +68,7 @@ def _destination_sample( destination_size_terms, skims, estimator, - model_settings, + model_settings: TourLocationComponentSettings, alt_dest_col_name, chunk_tag, trace_label: str, @@ -75,15 +76,17 @@ def _destination_sample( ): model_spec = simulate.spec_for_segment( state, - model_settings, + None, spec_id="SAMPLE_SPEC", segment_name=spec_segment_name, estimator=estimator, + spec_file_name=model_settings.SAMPLE_SPEC, + coefficients_file_name=model_settings.COEFFICIENTS, ) logger.info("running %s with %d tours", trace_label, len(choosers)) - sample_size = model_settings["SAMPLE_SIZE"] + sample_size = model_settings.SAMPLE_SIZE if state.settings.disable_destination_sampling or ( estimator and estimator.want_unsampled_alternatives ): @@ -100,7 +103,7 @@ def _destination_sample( "dest_col_name": skims.dest_key, # added for sharrow flows "timeframe": "timeless", } - constants = config.get_model_constants(model_settings) + constants = model_settings.CONSTANTS if constants is not None: locals_d.update(constants) @@ -123,7 +126,7 @@ def _destination_sample( ) # if special person id is passed - chooser_id_column = model_settings.get("CHOOSER_ID_COLUMN", "person_id") + chooser_id_column = model_settings.CHOOSER_ID_COLUMN # remember person_id in chosen alts so we can merge with persons in subsequent steps # (broadcasts person_id onto all alternatives sharing the same tour_id index value) @@ -136,7 +139,7 @@ def destination_sample( state: workflow.State, spec_segment_name, choosers, - model_settings, + model_settings: TourLocationComponentSettings, network_los, destination_size_terms, estimator, @@ -147,7 +150,7 @@ def destination_sample( # create wrapper with keys for this lookup # the skims will be available under the name "skims" for any @ expressions - skim_origin_col_name = model_settings["CHOOSER_ORIG_COL_NAME"] + skim_origin_col_name = model_settings.CHOOSER_ORIG_COL_NAME skim_dest_col_name = destination_size_terms.index.name # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser') if skim_origin_col_name == skim_dest_col_name: @@ -157,7 +160,7 @@ def destination_sample( skims = skim_dict.wrap(skim_origin_col_name, skim_dest_col_name) # the name of the dest column to be returned in choices - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME choices = _destination_sample( state, @@ -460,7 +463,7 @@ def destination_presample( state: workflow.State, spec_segment_name, choosers, - model_settings, + model_settings: TourLocationComponentSettings, network_los, destination_size_terms, estimator, @@ -471,14 +474,14 @@ def destination_presample( logger.info(f"{trace_label} location_presample") - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME assert DEST_TAZ != alt_dest_col_name MAZ_size_terms, TAZ_size_terms = aggregate_size_terms( destination_size_terms, network_los ) - orig_maz = model_settings["CHOOSER_ORIG_COL_NAME"] + orig_maz = model_settings.CHOOSER_ORIG_COL_NAME assert orig_maz in choosers if ORIG_TAZ not in choosers: choosers[ORIG_TAZ] = network_los.map_maz_to_taz(choosers[orig_maz]) @@ -517,7 +520,7 @@ def run_destination_sample( spec_segment_name, tours, persons_merged, - model_settings, + model_settings: TourLocationComponentSettings, network_los, destination_size_terms, estimator, @@ -525,10 +528,10 @@ def run_destination_sample( trace_label, ): # FIXME - MEMORY HACK - only include columns actually used in spec (omit them pre-merge) - chooser_columns = model_settings["SIMULATE_CHOOSER_COLUMNS"] + chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS # if special person id is passed - chooser_id_column = model_settings.get("CHOOSER_ID_COLUMN", "person_id") + chooser_id_column = model_settings.CHOOSER_ID_COLUMN persons_merged = persons_merged[ [c for c in persons_merged.columns if c in chooser_columns] @@ -597,7 +600,7 @@ def run_destination_logsums( tour_purpose, persons_merged, destination_sample, - model_settings, + model_settings: TourLocationComponentSettings, network_los, chunk_size, trace_label, @@ -624,10 +627,10 @@ def run_destination_logsums( """ logsum_settings = state.filesystem.read_model_settings( - model_settings["LOGSUM_SETTINGS"] + model_settings.LOGSUM_SETTINGS ) # if special person id is passed - chooser_id_column = model_settings.get("CHOOSER_ID_COLUMN", "person_id") + chooser_id_column = model_settings.CHOOSER_ID_COLUMN chunk_tag = "tour_destination.logsums" @@ -650,7 +653,7 @@ def run_destination_logsums( state.tracing.dump_df(DUMP, persons_merged, trace_label, "persons_merged") state.tracing.dump_df(DUMP, choosers, trace_label, "choosers") - logsums = logsum.compute_logsums( + logsums = logsum.compute_location_choice_logsums( state, choosers, tour_purpose, @@ -669,13 +672,13 @@ def run_destination_logsums( def run_destination_simulate( state: workflow.State, - spec_segment_name, - tours, - persons_merged, + spec_segment_name: str, + tours: pd.DataFrame, + persons_merged: pd.DataFrame, destination_sample, - want_logsums, - model_settings, - network_los, + want_logsums: bool, + model_settings: TourLocationComponentSettings, + network_los: los.Network_LOS, destination_size_terms, estimator, chunk_size, @@ -690,17 +693,19 @@ def run_destination_simulate( model_spec = simulate.spec_for_segment( state, - model_settings, + None, spec_id="SPEC", segment_name=spec_segment_name, estimator=estimator, + spec_file_name=model_settings.SPEC, + coefficients_file_name=model_settings.COEFFICIENTS, ) # FIXME - MEMORY HACK - only include columns actually used in spec (omit them pre-merge) - chooser_columns = model_settings["SIMULATE_CHOOSER_COLUMNS"] + chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS # if special person id is passed - chooser_id_column = model_settings.get("CHOOSER_ID_COLUMN", "person_id") + chooser_id_column = model_settings.CHOOSER_ID_COLUMN persons_merged = persons_merged[ [c for c in persons_merged.columns if c in chooser_columns] @@ -722,8 +727,8 @@ def run_destination_simulate( if estimator: estimator.write_choosers(choosers) - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] - origin_col_name = model_settings["CHOOSER_ORIG_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME + origin_col_name = model_settings.CHOOSER_ORIG_COL_NAME # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge size_terms column into alt sample list @@ -733,7 +738,7 @@ def run_destination_simulate( state.tracing.dump_df(DUMP, destination_sample, trace_label, "alternatives") - constants = config.get_model_constants(model_settings) + constants = model_settings.CONSTANTS logger.info("Running tour_destination_simulate with %d persons", len(choosers)) @@ -788,20 +793,18 @@ def run_tour_destination( persons_merged: pd.DataFrame, want_logsums: bool, want_sample_table: bool, - model_settings, + model_settings: TourLocationComponentSettings, network_los: los.Network_LOS, estimator, trace_label, skip_choice=False, ): - size_term_calculator = SizeTermCalculator( - state, model_settings["SIZE_TERM_SELECTOR"] - ) + size_term_calculator = SizeTermCalculator(state, model_settings.SIZE_TERM_SELECTOR) # maps segment names to compact (integer) ids - segments = model_settings["SEGMENTS"] + segments = model_settings.SEGMENTS - chooser_segment_column = model_settings.get("CHOOSER_SEGMENT_COLUMN_NAME", None) + chooser_segment_column = model_settings.CHOOSER_SEGMENT_COLUMN_NAME if chooser_segment_column is None: assert ( len(segments) == 1 @@ -879,7 +882,7 @@ def run_tour_destination( if want_sample_table: # FIXME - sample_table location_sample_df.set_index( - model_settings["ALT_DEST_COL_NAME"], append=True, inplace=True + model_settings.ALT_DEST_COL_NAME, append=True, inplace=True ) sample_list.append(location_sample_df) else: diff --git a/activitysim/abm/models/util/tour_frequency.py b/activitysim/abm/models/util/tour_frequency.py index f2689608b..8c5c9ff04 100644 --- a/activitysim/abm/models/util/tour_frequency.py +++ b/activitysim/abm/models/util/tour_frequency.py @@ -9,6 +9,8 @@ from activitysim.abm.models.util.canonical_ids import set_tour_index from activitysim.core import workflow +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.util import reindex logger = logging.getLogger(__name__) @@ -322,7 +324,6 @@ def process_atwork_subtours( work_tours: pd.DataFrame, atwork_subtour_frequency_alts: pd.DataFrame, ): - """ This method processes the atwork_subtour_frequency column that comes out of the model of the same name and turns into a DataFrame that @@ -615,8 +616,33 @@ def process_tours_frequency_composition( return tours +class JointTourFreqCompContent(PydanticReadable): + VALUE_MAP: dict[int, str] + COLUMNS: list[str] + + +class JointTourFreqCompAlts(PydanticReadable): + PURPOSE: JointTourFreqCompContent + COMPOSITION: JointTourFreqCompContent + + +class JointTourFreqCompSettings(LogitComponentSettings): + """ + Settings for joint tour frequency and composition. + """ + + ALTS_TABLE_STRUCTURE: JointTourFreqCompAlts + preprocessor: PreprocessorSettings | None = None + ALTS_PREPROCESSOR: PreprocessorSettings | None = None + + def create_joint_tours( - state: workflow.State, tour_counts, tour_category, parent_col="person_id" + state: workflow.State, + tour_counts, + tour_category, + parent_col="person_id", + model_settings: JointTourFreqCompSettings | None = None, + model_settings_file_name: str = "joint_tour_frequency_composition.yaml", ): """ This method processes the tour_frequency column that comes @@ -657,36 +683,17 @@ def create_joint_tours( 2588676 2 0 0 2588677 1 1 0 """ - model_settings_file_name = "joint_tour_frequency_composition.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = JointTourFreqCompSettings.read_settings_file( + state.filesystem, model_settings_file_name + ) - alts_table_structure = model_settings.get("ALTS_TABLE_STRUCTURE", None) - assert ( - alts_table_structure is not None - ), f"Expected to find ALTS_TABLE_STRUCTURE setting in joint_tour_frequency_composition.yaml" - - tour_type_dict = alts_table_structure.get("PURPOSE", None).get("VALUE_MAP", None) - assert ( - tour_type_dict is not None - ), f"Expected to find PURPOSE.VALUE_MAP setting in ALTS_TABLE_STRUCTURE" - - tour_type_cols = alts_table_structure.get("PURPOSE", None).get("COLUMNS", None) - assert ( - tour_type_cols is not None - ), f"Expected to find PURPOSE.COLUMNS setting in ALTS_TABLE_STRUCTURE" - - tour_comp_dict = alts_table_structure.get("COMPOSITION", None).get( - "VALUE_MAP", None - ) - assert ( - tour_comp_dict is not None - ), f"Expected to find COMPOSITION.VALUE_MAP setting in ALTS_TABLE_STRUCTURE" - - tour_comp_cols = alts_table_structure.get("COMPOSITION", None).get("COLUMNS", None) - assert ( - tour_comp_cols is not None - ), f"Expected to find COMPOSITION.COLUMNS setting in ALTS_TABLE_STRUCTURE" + alts_table_structure = model_settings.ALTS_TABLE_STRUCTURE + tour_type_dict = alts_table_structure.PURPOSE.VALUE_MAP + tour_type_cols = alts_table_structure.PURPOSE.COLUMNS + tour_comp_dict = alts_table_structure.COMPOSITION.VALUE_MAP + tour_comp_cols = alts_table_structure.COMPOSITION.COLUMNS # reformat with the columns given below tours_purp = tour_counts[tour_type_cols].stack().reset_index() diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py index ba1208b47..22ea4a310 100644 --- a/activitysim/abm/models/util/tour_od.py +++ b/activitysim/abm/models/util/tour_od.py @@ -20,6 +20,8 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.interaction_sample import interaction_sample from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.util import reindex @@ -38,8 +40,18 @@ ORIG_TAZ_EXT = "orig_TAZ_ext" +class TourODSettings(TourLocationComponentSettings): + DEST_COL_NAME: str + OD_CHOICE_SAMPLE_TABLE_NAME: str | None = None + OD_CHOICE_LOGSUM_COLUMN_NAME: str | None = None + ORIGIN_ATTR_COLS_TO_USE: list[str] = [] + ORIG_COL_NAME: str + ORIG_FILTER: str | None = None + preprocessor: PreprocessorSettings | None = None + + def get_od_id_col(origin_col, destination_col): - colname = "{0}_{1}".format(origin_col, destination_col) + colname = f"{origin_col}_{destination_col}" return colname @@ -64,7 +76,7 @@ def _create_od_alts_from_dest_size_terms( origin_id_col="origin", dest_id_col="destination", origin_filter=None, - origin_attr_cols: Optional[list[str]] = None, + origin_attr_cols: list[str] | None = None, ): """ Extend destination size terms to create dataframe representing the @@ -124,7 +136,7 @@ def _od_sample( dest_id_col, skims, estimator, - model_settings, + model_settings: TourODSettings, alt_od_col_name, chunk_size, chunk_tag, @@ -132,10 +144,12 @@ def _od_sample( ): model_spec = simulate.spec_for_segment( state, - model_settings, + None, spec_id="SAMPLE_SPEC", segment_name=spec_segment_name, estimator=estimator, + spec_file_name=model_settings.SAMPLE_SPEC, + coefficients_file_name=model_settings.COEFFICIENTS, ) if alt_od_col_name is None: alt_col_name = get_od_id_col(origin_id_col, dest_id_col) @@ -144,7 +158,7 @@ def _od_sample( logger.info("running %s with %d tours", trace_label, len(choosers)) - sample_size = model_settings["SAMPLE_SIZE"] + sample_size = model_settings.SAMPLE_SIZE if state.settings.disable_destination_sampling or ( estimator and estimator.want_unsampled_alternatives ): @@ -165,12 +179,12 @@ def _od_sample( "orig_col_name": ORIG_TAZ, "dest_col_name": DEST_TAZ, } - constants = config.get_model_constants(model_settings) + constants = model_settings.CONSTANTS if constants is not None: locals_d.update(constants) - origin_filter = model_settings.get("ORIG_FILTER", None) - origin_attr_cols = model_settings["ORIGIN_ATTR_COLS_TO_USE"] + origin_filter = model_settings.ORIG_FILTER + origin_attr_cols = model_settings.ORIGIN_ATTR_COLS_TO_USE od_alts_df = _create_od_alts_from_dest_size_terms( state, @@ -208,9 +222,10 @@ def _od_sample( def od_sample( + state: workflow.State, spec_segment_name, choosers, - model_settings, + model_settings: TourODSettings, network_los, destination_size_terms, estimator, @@ -219,9 +234,9 @@ def od_sample( ): chunk_tag = "tour_od.sample" - origin_col_name = model_settings["ORIG_COL_NAME"] - dest_col_name = model_settings["DEST_COL_NAME"] - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + origin_col_name = model_settings.ORIG_COL_NAME + dest_col_name = model_settings.DEST_COL_NAME + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap(origin_col_name, dest_col_name) @@ -229,6 +244,7 @@ def od_sample( # the name of the od column to be returned in choices alt_od_col_name = get_od_id_col(origin_col_name, dest_col_name) choices = _od_sample( + state, spec_segment_name, choosers, network_los, @@ -567,7 +583,7 @@ def od_presample( state: workflow.State, spec_segment_name, choosers, - model_settings, + model_settings: TourODSettings, network_los, destination_size_terms, estimator, @@ -629,8 +645,8 @@ def od_presample( # outputs assert DEST_MAZ in maz_choices - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] - chooser_orig_col_name = model_settings["CHOOSER_ORIG_COL_NAME"] + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME + chooser_orig_col_name = model_settings.CHOOSER_ORIG_COL_NAME maz_choices = maz_choices.rename( columns={DEST_MAZ: alt_dest_col_name, ORIG_MAZ: chooser_orig_col_name} ) @@ -690,7 +706,7 @@ def run_od_sample( state, spec_segment_name, tours, - model_settings, + model_settings: TourODSettings, network_los, destination_size_terms, estimator, @@ -699,15 +715,17 @@ def run_od_sample( ): model_spec = simulate.spec_for_segment( state, - model_settings, + None, spec_id="SAMPLE_SPEC", segment_name=spec_segment_name, estimator=estimator, + spec_file_name=model_settings.SAMPLE_SPEC, + coefficients_file_name=model_settings.COEFFICIENTS, ) choosers = tours # FIXME - MEMORY HACK - only include columns actually used in spec - chooser_columns = model_settings["SIMULATE_CHOOSER_COLUMNS"] + chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS choosers = choosers[chooser_columns] # interaction_sample requires that choosers.index.is_monotonic_increasing @@ -745,6 +763,7 @@ def run_od_sample( else: choices = od_sample( + state, spec_segment_name, choosers, model_settings, @@ -763,7 +782,7 @@ def run_od_logsums( spec_segment_name, tours_merged_df, od_sample, - model_settings, + model_settings: TourODSettings, network_los, estimator, chunk_size, @@ -778,10 +797,10 @@ def run_od_logsums( """ chunk_tag = "tour_od.logsums" logsum_settings = state.filesystem.read_model_settings( - model_settings["LOGSUM_SETTINGS"] + model_settings.LOGSUM_SETTINGS ) - origin_id_col = model_settings["ORIG_COL_NAME"] - dest_id_col = model_settings["DEST_COL_NAME"] + origin_id_col = model_settings.ORIG_COL_NAME + dest_id_col = model_settings.DEST_COL_NAME tour_od_id_col = get_od_id_col(origin_id_col, dest_id_col) # FIXME - MEMORY HACK - only include columns actually used in spec @@ -912,7 +931,7 @@ def run_od_logsums( for col in new_cols: od_sample[col] = choosers[col] - logsums = logsum.compute_logsums( + logsums = logsum.compute_location_choice_logsums( state, choosers, spec_segment_name, @@ -939,7 +958,7 @@ def run_od_simulate( tours, od_sample, want_logsums, - model_settings, + model_settings: TourODSettings, network_los, destination_size_terms, estimator, @@ -953,17 +972,19 @@ def run_od_simulate( model_spec = simulate.spec_for_segment( state, - model_settings, + None, spec_id="SPEC", segment_name=spec_segment_name, estimator=estimator, + spec_file_name=model_settings.SPEC, + coefficients_file_name=model_settings.COEFFICIENTS, ) # merge persons into tours choosers = tours # FIXME - MEMORY HACK - only include columns actually used in spec - chooser_columns = model_settings["SIMULATE_CHOOSER_COLUMNS"] + chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS choosers = choosers[chooser_columns] # interaction_sample requires that choosers.index.is_monotonic_increasing @@ -976,10 +997,10 @@ def run_od_simulate( if estimator: estimator.write_choosers(choosers) - origin_col_name = model_settings["ORIG_COL_NAME"] - dest_col_name = model_settings["DEST_COL_NAME"] - alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] - origin_attr_cols = model_settings["ORIGIN_ATTR_COLS_TO_USE"] + origin_col_name = model_settings.ORIG_COL_NAME + dest_col_name = model_settings.DEST_COL_NAME + alt_dest_col_name = model_settings.ALT_DEST_COL_NAME + origin_attr_cols = model_settings.ORIGIN_ATTR_COLS_TO_USE alt_od_col_name = get_od_id_col(origin_col_name, dest_col_name) od_sample[alt_od_col_name] = create_od_id_col( @@ -1000,7 +1021,7 @@ def run_od_simulate( state.tracing.dump_df(DUMP, od_sample, trace_label, "alternatives") - constants = config.get_model_constants(model_settings) + constants = model_settings.CONSTANTS logger.info("Running tour_destination_simulate with %d persons", len(choosers)) @@ -1050,23 +1071,21 @@ def run_tour_od( persons, want_logsums, want_sample_table, - model_settings, + model_settings: TourODSettings, network_los, estimator, chunk_size, trace_hh_id, trace_label, ): - size_term_calculator = SizeTermCalculator( - state, model_settings["SIZE_TERM_SELECTOR"] - ) - preprocessor_settings = model_settings.get("preprocessor", None) - origin_col_name = model_settings["ORIG_COL_NAME"] + size_term_calculator = SizeTermCalculator(state, model_settings.SIZE_TERM_SELECTOR) + preprocessor_settings = model_settings.preprocessor + origin_col_name = model_settings.ORIG_COL_NAME - chooser_segment_column = model_settings["CHOOSER_SEGMENT_COLUMN_NAME"] + chooser_segment_column = model_settings.CHOOSER_SEGMENT_COLUMN_NAME # maps segment names to compact (integer) ids - segments = model_settings["SEGMENTS"] + segments = model_settings.SEGMENTS # interaction_sample_simulate insists choosers appear in same order as alts tours = tours.sort_index() @@ -1120,9 +1139,9 @@ def run_tour_od( ), ) - if model_settings["ORIG_FILTER"] == "original_MAZ > 0": + if model_settings.ORIG_FILTER == "original_MAZ > 0": pass - elif model_settings["ORIG_FILTER"] == "external_TAZ > 0": + elif model_settings.ORIG_FILTER == "external_TAZ > 0": # sampled alts using internal mazs, so now we # have to convert to using the external tazs od_sample_df[origin_col_name] = map_maz_to_ext_maz( @@ -1146,7 +1165,7 @@ def run_tour_od( chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=tracing.extend_trace_label( - trace_label, "logsums.%s" % segment_name + trace_label, f"logsums.{segment_name}" ), ) @@ -1174,7 +1193,7 @@ def run_tour_od( if want_sample_table: # FIXME - sample_table od_sample_df.set_index( - model_settings["ALT_DEST_COL_NAME"], append=True, inplace=True + model_settings.ALT_DEST_COL_NAME, append=True, inplace=True ) sample_list.append(od_sample_df) diff --git a/activitysim/abm/models/util/tour_scheduling.py b/activitysim/abm/models/util/tour_scheduling.py index bf69c6234..f52d0db44 100644 --- a/activitysim/abm/models/util/tour_scheduling.py +++ b/activitysim/abm/models/util/tour_scheduling.py @@ -9,6 +9,8 @@ from activitysim.abm.models.util import vectorize_tour_scheduling as vts from activitysim.core import config, estimation, expressions, simulate, workflow +from .vectorize_tour_scheduling import TourModeComponentSettings, TourSchedulingSettings + logger = logging.getLogger(__name__) @@ -23,18 +25,24 @@ def run_tour_scheduling( trace_label = model_name model_settings_file_name = f"{model_name}.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + model_settings = TourSchedulingSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + mandatory=False, + ) - if "LOGSUM_SETTINGS" in model_settings: - logsum_settings = state.filesystem.read_model_settings( - model_settings["LOGSUM_SETTINGS"] + if model_settings.LOGSUM_SETTINGS: + logsum_settings = TourModeComponentSettings.read_settings_file( + state.filesystem, + str(model_settings.LOGSUM_SETTINGS), + mandatory=False, ) - logsum_columns = logsum_settings.get("LOGSUM_CHOOSER_COLUMNS", []) + logsum_columns = logsum_settings.LOGSUM_CHOOSER_COLUMNS else: logsum_columns = [] # - filter chooser columns for both logsums and simulate - model_columns = model_settings.get("SIMULATE_CHOOSER_COLUMNS", []) + model_columns = model_settings.SIMULATE_CHOOSER_COLUMNS chooser_columns = logsum_columns + [ c for c in model_columns if c not in logsum_columns ] @@ -44,7 +52,7 @@ def run_tour_scheduling( timetable = state.get_injectable("timetable") # - run preprocessor to annotate choosers - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_d = {"tt": timetable.attach_state(state)} locals_d.update(config.get_model_constants(model_settings)) @@ -58,9 +66,9 @@ def run_tour_scheduling( ) estimators = {} - if "TOUR_SPEC_SEGMENTS" in model_settings: + if model_settings.TOUR_SPEC_SEGMENTS: # load segmented specs - spec_segment_settings = model_settings.get("SPEC_SEGMENTS", {}) + spec_segment_settings = model_settings.SPEC_SEGMENTS specs = {} sharrow_skips = {} for spec_segment_name, spec_settings in spec_segment_settings.items(): @@ -71,13 +79,13 @@ def run_tour_scheduling( state, model_name=bundle_name, bundle_name=bundle_name ) - spec_file_name = spec_settings["SPEC"] + spec_file_name = spec_settings.SPEC model_spec = state.filesystem.read_model_spec(file_name=spec_file_name) coefficients_df = state.filesystem.read_model_coefficients(spec_settings) specs[spec_segment_name] = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator ) - sharrow_skips[spec_segment_name] = spec_settings.get("sharrow_skip", False) + sharrow_skips[spec_segment_name] = spec_settings.sharrow_skip if estimator: estimators[spec_segment_name] = estimator # add to local list @@ -86,7 +94,7 @@ def run_tour_scheduling( estimator.write_coefficients(coefficients_df, spec_settings) # - spec dict segmented by primary_purpose - tour_segment_settings = model_settings.get("TOUR_SPEC_SEGMENTS", {}) + tour_segment_settings = model_settings.TOUR_SPEC_SEGMENTS tour_segments = {} for tour_segment_name, spec_segment_name in tour_segment_settings.items(): tour_segments[tour_segment_name] = {} @@ -105,15 +113,17 @@ def run_tour_scheduling( else: # unsegmented spec - assert "SPEC_SEGMENTS" not in model_settings - assert "TOUR_SPEC_SEGMENTS" not in model_settings + assert ( + not model_settings.SPEC_SEGMENTS + ), f"model_settings.SPEC_SEGMENTS should be omitted not {model_settings.SPEC_SEGMENTS!r}" + assert not model_settings.TOUR_SPEC_SEGMENTS assert tour_segment_col is None estimator = estimation.manager.begin_estimation(state, model_name) - spec_file_name = model_settings["SPEC"] + spec_file_name = model_settings.SPEC model_spec = state.filesystem.read_model_spec(file_name=spec_file_name) - sharrow_skip = model_settings.get("sharrow_skip", False) + sharrow_skip = model_settings.sharrow_skip coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator diff --git a/activitysim/abm/models/util/trip.py b/activitysim/abm/models/util/trip.py index 36a676e17..08b4e737d 100644 --- a/activitysim/abm/models/util/trip.py +++ b/activitysim/abm/models/util/trip.py @@ -151,7 +151,7 @@ def get_time_windows(residual, level): @workflow.cached_object -def stop_frequency_alts(state: workflow.State): +def stop_frequency_alts(state: workflow.State) -> pd.DataFrame: # alt file for building trips even though simulation is simple_simulate not interaction_simulate file_path = state.filesystem.get_config_file_path("stop_frequency_alternatives.csv") df = pd.read_csv(file_path, comment="#") @@ -160,7 +160,10 @@ def stop_frequency_alts(state: workflow.State): def initialize_from_tours( - state: workflow.State, tours, stop_frequency_alts, addtl_tour_cols_to_preserve=None + state: workflow.State, + tours, + stop_frequency_alts: pd.DataFrame, + addtl_tour_cols_to_preserve=None, ): """ Instantiates a trips table based on tour-level attributes: stop frequency, diff --git a/activitysim/abm/models/util/vectorize_tour_scheduling.py b/activitysim/abm/models/util/vectorize_tour_scheduling.py index 297a61e33..0d65ab0fa 100644 --- a/activitysim/abm/models/util/vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/vectorize_tour_scheduling.py @@ -3,13 +3,18 @@ from __future__ import annotations import logging +from pathlib import Path +from typing import Any import numpy as np import pandas as pd +from activitysim.abm.models.tour_mode_choice import TourModeComponentSettings from activitysim.core import chunk, config, expressions, los, simulate from activitysim.core import timetable as tt from activitysim.core import tracing, workflow +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.util import reindex @@ -21,16 +26,52 @@ RUN_ALTS_PREPROCESSOR_BEFORE_MERGE = True # see FIXME below before changing this -def skims_for_logsums(state: workflow.State, tour_purpose, model_settings, trace_label): - assert "LOGSUM_SETTINGS" in model_settings +# class TourSchedulingSpecSegmentsSettings(PydanticReadable, extra="forbid"): +# COEFFICIENTS: Path +# SPEC: Path + +class TourSchedulingSettings(LogitComponentSettings, extra="forbid"): + LOGSUM_SETTINGS: Path | None = None + DESTINATION_FOR_TOUR_PURPOSE: str | dict[str, str] | None = None + LOGSUM_PREPROCESSOR: str = "preprocessor" + ALTS_PREPROCESSOR: PreprocessorSettings | dict[str, PreprocessorSettings] = {} + """ + If the alternatives preprocessor is a single PreprocessorSettings object, + it is assumed to be an unsegmented preprocessor. Otherwise, the dict keys + give the segements. + """ + SIMULATE_CHOOSER_COLUMNS: list[str] | None = None + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + SPEC_SEGMENTS: dict[str, LogitComponentSettings] = {} + + TOUR_SPEC_SEGMENTS: dict[str, str] = {} + + SPEC: Path | None = None + """Utility specification filename. + + This is sometimes alternatively called the utility expressions calculator + (UEC). It is a CSV file giving all the functions for the terms of a + linear-in-parameters utility expression. If SPEC_SEGMENTS is given, then + this unsegmented SPEC should be omitted. + """ + + +def skims_for_logsums( + state: workflow.State, + tour_purpose, + model_settings: TourSchedulingSettings, + trace_label: str, +): network_los = state.get_injectable("network_los") skim_dict = network_los.get_default_skim_dict() orig_col_name = "home_zone_id" - destination_for_tour_purpose = model_settings.get("DESTINATION_FOR_TOUR_PURPOSE") + destination_for_tour_purpose = model_settings.DESTINATION_FOR_TOUR_PURPOSE if isinstance(destination_for_tour_purpose, str): dest_col_name = destination_for_tour_purpose elif isinstance(destination_for_tour_purpose, dict): @@ -97,7 +138,7 @@ def _compute_logsums( alt_tdd, tours_merged, tour_purpose, - model_settings, + model_settings: TourSchedulingSettings, network_los, skims, trace_label, @@ -109,8 +150,10 @@ def _compute_logsums( trace_label = tracing.extend_trace_label(trace_label, "logsums") with chunk.chunk_log(state, trace_label): - logsum_settings = state.filesystem.read_model_settings( - model_settings["LOGSUM_SETTINGS"] + logsum_settings = TourModeComponentSettings.read_settings_file( + state.filesystem, + str(model_settings.LOGSUM_SETTINGS), + mandatory=False, ) choosers = alt_tdd.join(tours_merged, how="left", rsuffix="_chooser") logger.info( @@ -138,8 +181,11 @@ def _compute_logsums( # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers - preprocessor = model_settings.get("LOGSUM_PREPROCESSOR", "preprocessor") - preprocessor_settings = logsum_settings[preprocessor] + preprocessor = model_settings.LOGSUM_PREPROCESSOR + preprocessor_settings = ( + getattr(logsum_settings, preprocessor, None) + or logsum_settings[preprocessor] + ) if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) @@ -153,9 +199,7 @@ def _compute_logsums( ) # - compute logsums - logsum_spec = state.filesystem.read_model_spec( - file_name=logsum_settings["SPEC"] - ) + logsum_spec = state.filesystem.read_model_spec(file_name=logsum_settings.SPEC) logsum_spec = simulate.eval_coefficients( state, logsum_spec, coefficients, estimator=None ) @@ -309,7 +353,7 @@ def compute_tour_scheduling_logsums( alt_tdd, tours_merged, tour_purpose, - model_settings, + model_settings: TourSchedulingSettings, skims, trace_label, *, @@ -570,7 +614,12 @@ def tdd_interaction_dataset( def run_alts_preprocessor( - state: workflow.State, model_settings, alts, segment, locals_dict, trace_label + state: workflow.State, + model_settings: TourSchedulingSettings, + alts, + segment, + locals_dict, + trace_label, ): """ run preprocessor on alts, as specified by ALTS_PREPROCESSOR in model_settings @@ -595,18 +644,18 @@ def run_alts_preprocessor( annotated copy of alts """ - preprocessor_settings = model_settings.get("ALTS_PREPROCESSOR", {}) + preprocessor_settings = model_settings.ALTS_PREPROCESSOR - if segment in preprocessor_settings: + if isinstance(preprocessor_settings, dict) and segment in preprocessor_settings: # segmented by logsum_tour_purpose preprocessor_settings = preprocessor_settings.get(segment) logger.debug( - f"running ALTS_PREPROCESSOR with spec for {segment}: {preprocessor_settings.get('SPEC')}" + f"running ALTS_PREPROCESSOR with spec for {segment}: {preprocessor_settings.SPEC}" ) - elif "SPEC" in preprocessor_settings: + elif isinstance(preprocessor_settings, PreprocessorSettings): # unsegmented (either because no segmentation, or fallback if settings has generic preprocessor) logger.debug( - f"running ALTS_PREPROCESSOR with unsegmented spec {preprocessor_settings.get('SPEC')}" + f"running ALTS_PREPROCESSOR with unsegmented spec {preprocessor_settings.SPEC}" ) else: logger.debug( @@ -638,7 +687,7 @@ def _schedule_tours( alts, spec, logsum_tour_purpose, - model_settings, + model_settings: TourSchedulingSettings, skims, timetable, window_id_col, @@ -673,7 +722,7 @@ def _schedule_tours( unavailable alternatives spec : DataFrame The spec which will be passed to interaction_simulate. - model_settings : dict + model_settings : TourSchedulingSettings timetable : TimeTable timetable of timewidows for person (or subtour) with rows for tours[window_id_col] window_id_col : str @@ -823,7 +872,7 @@ def schedule_tours( alts, spec, logsum_tour_purpose, - model_settings, + model_settings: TourSchedulingSettings, timetable, timetable_window_id_col, previous_tour, @@ -858,7 +907,7 @@ def schedule_tours( else: assert not tours[timetable_window_id_col].duplicated().any() - if "LOGSUM_SETTINGS" in model_settings: + if model_settings.LOGSUM_SETTINGS: # we need skims to calculate tvpb skim overhead in 3_ZONE systems for use by calc_rows_per_chunk skims = skims_for_logsums( state, logsum_tour_purpose, model_settings, tour_trace_label @@ -868,7 +917,7 @@ def schedule_tours( result_list = [] for ( - i, + _i, chooser_chunk, chunk_trace_label, chunk_sizer, @@ -917,7 +966,7 @@ def vectorize_tour_scheduling( timetable, tour_segments, tour_segment_col, - model_settings, + model_settings: TourSchedulingSettings, chunk_size=0, trace_label=None, ): @@ -950,7 +999,7 @@ def vectorize_tour_scheduling( spec : DataFrame The spec which will be passed to interaction_simulate. (or dict of specs keyed on tour_type if tour_types is not None) - model_settings : dict + model_settings : TourSchedulingSettings Returns ------- @@ -978,7 +1027,7 @@ def vectorize_tour_scheduling( timetable_window_id_col = "person_id" tour_owner_id_col = "person_id" - should_compute_logsums = "LOGSUM_SETTINGS" in model_settings + should_compute_logsums = model_settings.LOGSUM_SETTINGS is not None assert isinstance(tour_segments, dict) @@ -1089,7 +1138,7 @@ def vectorize_subtour_scheduling( persons_merged, alts, spec, - model_settings, + model_settings: TourSchedulingSettings, estimator, chunk_size=0, trace_label=None, @@ -1119,7 +1168,7 @@ def vectorize_subtour_scheduling( spec : DataFrame The spec which will be passed to interaction_simulate. (all subtours share same spec regardless of subtour type) - model_settings : dict + model_settings : TourSchedulingSettings chunk_size trace_label @@ -1219,7 +1268,7 @@ def build_joint_tour_timetables( joint_tour_windows_df = tt.create_timetable_windows(joint_tours, alts) joint_tour_timetable = tt.TimeTable(joint_tour_windows_df, alts) - for participant_num, nth_participants in joint_tour_participants.groupby( + for _participant_num, nth_participants in joint_tour_participants.groupby( "participant_num", sort=True ): # nth_participant windows from persons_timetable @@ -1243,7 +1292,7 @@ def vectorize_joint_tour_scheduling( alts, persons_timetable, spec, - model_settings, + model_settings: TourSchedulingSettings, estimator, chunk_size=0, trace_label=None, @@ -1269,7 +1318,7 @@ def vectorize_joint_tour_scheduling( spec : DataFrame The spec which will be passed to interaction_simulate. (or dict of specs keyed on tour_type if tour_types is not None) - model_settings : dict + model_settings : TourSchedulingSettings Returns ------- diff --git a/activitysim/abm/models/vehicle_allocation.py b/activitysim/abm/models/vehicle_allocation.py index fe2acd090..e4cb13489 100644 --- a/activitysim/abm/models/vehicle_allocation.py +++ b/activitysim/abm/models/vehicle_allocation.py @@ -15,30 +15,34 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger(__name__) -def annotate_vehicle_allocation(state: workflow.State, model_settings, trace_label): +def annotate_vehicle_allocation( + state: workflow.State, model_settings: VehicleAllocationSettings, trace_label: str +): """ Add columns to the tours table in the pipeline according to spec. Parameters ---------- - model_settings : dict + model_settings : VehicleAllocationSettings trace_label : str """ tours = state.get_dataframe("tours") expressions.assign_columns( state, df=tours, - model_settings=model_settings.get("annotate_tours"), + model_settings=model_settings.annotate_tours, trace_label=tracing.extend_trace_label(trace_label, "annotate_tours"), ) state.add_table("tours", tours) -def get_skim_dict(network_los, choosers): +def get_skim_dict(network_los: los.Network_LOS, choosers: pd.DataFrame): """ Returns a dictionary of skim wrappers to use in expression writing. @@ -81,6 +85,26 @@ def get_skim_dict(network_los, choosers): return skims +class VehicleAllocationSettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `joint_tour_scheduling` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + OCCUPANCY_LEVELS: list = [1] # TODO Check this + """Occupancy level + + It will create columns in the tour table selecting a vehicle for each of the + occupancy levels. They are named vehicle_occup_1, vehicle_occup_2,... etc. + if not supplied, will default to only one occupancy level of 1 + """ + + annotate_tours: PreprocessorSettings | None = None + """Preprocessor settings to annotate tours""" + + @workflow.step def vehicle_allocation( state: workflow.State, @@ -90,6 +114,9 @@ def vehicle_allocation( tours: pd.DataFrame, tours_merged: pd.DataFrame, network_los: los.Network_LOS, + model_settings: VehicleAllocationSettings | None = None, + model_settings_file_name: str = "vehicle_allocation.yaml", + trace_label: str = "vehicle_allocation", ) -> None: """Selects a vehicle for each occupancy level for each tour. @@ -112,15 +139,18 @@ def vehicle_allocation( tours_merged : pd.DataFrame network_los : los.Network_LOS """ - trace_label = "vehicle_allocation" - model_settings_file_name = "vehicle_allocation.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) - logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME") + if model_settings is None: + model_settings = VehicleAllocationSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + + # logsum_column_name = model_settings.MODE_CHOICE_LOGSUM_COLUMN_NAME estimator = estimation.manager.begin_estimation(state, "vehicle_allocation") - model_spec_raw = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec_raw = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec_raw, coefficients_df, estimator @@ -171,7 +201,7 @@ def vehicle_allocation( locals_dict.update(skims) # ------ preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: expressions.assign_columns( state, @@ -191,7 +221,7 @@ def vehicle_allocation( # ------ running for each occupancy level selected tours_veh_occup_cols = [] - for occup in model_settings.get("OCCUPANCY_LEVELS", [1]): + for occup in model_settings.OCCUPANCY_LEVELS: logger.info("Running for occupancy = %d", occup) # setting occup for access in spec expressions locals_dict.update({"occup": occup}) @@ -240,7 +270,7 @@ def vehicle_allocation( "vehicle_allocation", tours[tours_veh_occup_cols], value_counts=True ) - annotate_settings = model_settings.get("annotate_tours", None) + annotate_settings = model_settings.annotate_tours if annotate_settings: annotate_vehicle_allocation(state, model_settings, trace_label) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index 57273e977..a2ea7b0bd 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -6,6 +6,7 @@ import itertools import logging import os +from typing import Literal import pandas as pd @@ -18,13 +19,18 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_simulate import interaction_simulate logger = logging.getLogger(__name__) def append_probabilistic_vehtype_type_choices( - state: workflow.State, choices, model_settings, trace_label + state: workflow.State, + choices, + model_settings: VehicleTypeChoiceSettings, + trace_label, ): """ Select a fuel type for the provided body type and age of the vehicle. @@ -33,8 +39,10 @@ def append_probabilistic_vehtype_type_choices( Parameters ---------- + state : workflow.State choices : pandas.DataFrame selection of {body_type}_{age} to append vehicle type to + model_settings : VehicleTypeChoiceSettings trace_label : str Returns @@ -42,12 +50,12 @@ def append_probabilistic_vehtype_type_choices( choices : pandas.DataFrame table of chosen vehicle types """ - probs_spec_file = model_settings.get("PROBS_SPEC", None) + probs_spec_file = model_settings.PROBS_SPEC probs_spec = pd.read_csv( state.filesystem.get_config_file_path(probs_spec_file), comment="#" ) - fleet_year = model_settings.get("FLEET_YEAR") + fleet_year = model_settings.FLEET_YEAR probs_spec["age"] = (1 + fleet_year - probs_spec["vehicle_year"]).astype(int) probs_spec["vehicle_type"] = ( probs_spec[["body_type", "age"]].astype(str).agg("_".join, axis=1) @@ -91,63 +99,66 @@ def append_probabilistic_vehtype_type_choices( def annotate_vehicle_type_choice_households( - state: workflow.State, model_settings, trace_label + state: workflow.State, model_settings: VehicleTypeChoiceSettings, trace_label: str ): """ Add columns to the households table in the pipeline according to spec. Parameters ---------- - model_settings : dict + state : workflow.State + model_settings : VehicleTypeChoiceSettings trace_label : str """ households = state.get_dataframe("households") expressions.assign_columns( state, df=households, - model_settings=model_settings.get("annotate_households"), + model_settings=model_settings.annotate_households, trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), ) state.add_table("households", households) def annotate_vehicle_type_choice_persons( - state: workflow.State, model_settings, trace_label + state: workflow.State, model_settings: VehicleTypeChoiceSettings, trace_label: str ): """ Add columns to the persons table in the pipeline according to spec. Parameters ---------- - model_settings : dict + state : workflow.State + model_settings : VehicleTypeChoiceSettings trace_label : str """ persons = state.get_dataframe("persons") expressions.assign_columns( state, df=persons, - model_settings=model_settings.get("annotate_persons"), + model_settings=model_settings.annotate_persons, trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), ) state.add_table("persons", persons) def annotate_vehicle_type_choice_vehicles( - state: workflow.State, model_settings, trace_label + state: workflow.State, model_settings: VehicleTypeChoiceSettings, trace_label: str ): """ Add columns to the vehicles table in the pipeline according to spec. Parameters ---------- - model_settings : dict + state : workflow.State + model_settings : VehicleTypeChoiceSettings trace_label : str """ vehicles = state.get_dataframe("vehicles") expressions.assign_columns( state, df=vehicles, - model_settings=model_settings.get("annotate_vehicles"), + model_settings=model_settings.annotate_vehicles, trace_label=tracing.extend_trace_label(trace_label, "annotate_vehicles"), ) state.add_table("vehicles", vehicles) @@ -169,7 +180,7 @@ def get_combinatorial_vehicle_alternatives(alts_cats_dict): alts_wide : pd.DataFrame in wide format expanded using pandas get_dummies function alts_long : pd.DataFrame in long format """ - cat_cols = list(alts_cats_dict.keys()) # e.g. fuel type, body type, age + list(alts_cats_dict.keys()) # e.g. fuel type, body type, age alts_long = pd.DataFrame( list(itertools.product(*alts_cats_dict.values())), columns=alts_cats_dict.keys() ).astype(str) @@ -180,7 +191,10 @@ def get_combinatorial_vehicle_alternatives(alts_cats_dict): def construct_model_alternatives( - state: workflow.State, model_settings, alts_cats_dict, vehicle_type_data + state: workflow.State, + model_settings: VehicleTypeChoiceSettings, + alts_cats_dict, + vehicle_type_data, ): """ Construct the table of vehicle type alternatives. @@ -190,7 +204,7 @@ def construct_model_alternatives( Parameters ---------- state : workflow.State - model_settings : dict + model_settings : VehicleTypeChoiceSettings alts_cats_dict : dict nested dictionary of vehicle body, age, and fuel options vehicle_type_data : pandas.DataFrame @@ -202,7 +216,7 @@ def construct_model_alternatives( alts_long : pd.DataFrame rows just list the alternatives """ - probs_spec_file = model_settings.get("PROBS_SPEC", None) + probs_spec_file = model_settings.PROBS_SPEC if probs_spec_file: # do not include alternatives from fuel_type if they are given probabilisticly del alts_cats_dict["fuel_type"] @@ -223,7 +237,7 @@ def construct_model_alternatives( alts_wide._merge == "left_only", ["body_type", "fuel_type", "age"] ] - if model_settings.get("REQUIRE_DATA_FOR_ALL_ALTS", False): + if model_settings.REQUIRE_DATA_FOR_ALL_ALTS: # fail if alternative does not have an associated record in the data assert ( len(missing_alts) == 0 @@ -240,7 +254,7 @@ def construct_model_alternatives( configs_dirs = state.filesystem.get_configs_dir() configs_dirs = configs_dirs if isinstance(configs_dirs, list) else [configs_dirs] - if model_settings.get("WRITE_OUT_ALTS_FILE", False): + if model_settings.WRITE_OUT_ALTS_FILE: alts_wide.to_csv( os.path.join(configs_dirs[0]), "vehicle_type_choice_aternatives.csv" ) @@ -249,14 +263,17 @@ def construct_model_alternatives( def get_vehicle_type_data( - state: workflow.State, model_settings, vehicle_type_data_file + state: workflow.State, + model_settings: VehicleTypeChoiceSettings, + vehicle_type_data_file, ): """ Read in the vehicle type data and computes the vehicle age. Parameters ---------- - model_settings : dict + state : workflow.State + model_settings : VehicleTypeChoiceSettings vehicle_type_data_file : str name of vehicle type data file found in config folder @@ -268,7 +285,7 @@ def get_vehicle_type_data( vehicle_type_data = pd.read_csv( state.filesystem.get_config_file_path(vehicle_type_data_file), comment="#" ) - fleet_year = model_settings.get("FLEET_YEAR") + fleet_year = model_settings.FLEET_YEAR vehicle_type_data["age"] = ( 1 + fleet_year - vehicle_type_data["vehicle_year"] @@ -285,7 +302,7 @@ def get_vehicle_type_data( def iterate_vehicle_type_choice( state: workflow.State, vehicles_merged: pd.DataFrame, - model_settings, + model_settings: VehicleTypeChoiceSettings, model_spec, locals_dict, estimator, @@ -328,9 +345,9 @@ def iterate_vehicle_type_choice( """ # - model settings nest_spec = config.get_logit_model_settings(model_settings) - vehicle_type_data_file = model_settings.get("VEHICLE_TYPE_DATA_FILE", None) - probs_spec_file = model_settings.get("PROBS_SPEC", None) - alts_cats_dict = model_settings.get("combinatorial_alts", False) + vehicle_type_data_file = model_settings.VEHICLE_TYPE_DATA_FILE + probs_spec_file = model_settings.PROBS_SPEC + alts_cats_dict = model_settings.combinatorial_alts # adding vehicle type data to be available to locals_dict regardless of option if vehicle_type_data_file: @@ -365,7 +382,7 @@ def iterate_vehicle_type_choice( # running preprocessor on entire vehicle table to enumerate vehicle types # already owned by the household choosers = vehicles_merged - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: expressions.assign_columns( state, @@ -387,7 +404,7 @@ def iterate_vehicle_type_choice( # if there were so many alts that they had to be created programmatically, # by combining categorical variables, then the utility expressions should make # use of interaction terms to accommodate alt-specific coefficients and constants - simulation_type = model_settings.get("SIMULATION_TYPE", "interaction_simulate") + simulation_type = model_settings.SIMULATION_TYPE assert (simulation_type == "interaction_simulate") or ( simulation_type == "simple_simulate" ), "SIMULATION_TYPE needs to be interaction_simulate or simple_simulate" @@ -461,7 +478,7 @@ def iterate_vehicle_type_choice( all_choosers = pd.concat(all_choosers) # appending vehicle type data to the vehicle table - additional_cols = model_settings.get("COLS_TO_INCLUDE_IN_VEHICLE_TABLE") + additional_cols = model_settings.COLS_TO_INCLUDE_IN_VEHICLE_TABLE if additional_cols: additional_cols.append("vehicle_type") all_choices = ( @@ -473,6 +490,30 @@ def iterate_vehicle_type_choice( return all_choices, all_choosers +class VehicleTypeChoiceSettings(LogitComponentSettings): + """ + Settings for the `vehicle_type_choice` component. + """ + + VEHICLE_TYPE_DATA_FILE: str | None = None + PROBS_SPEC: str | None = None + combinatorial_alts: dict | None = None + preprocessor: PreprocessorSettings | None = None + SIMULATION_TYPE: Literal[ + "simple_simulate", "interaction_simulate" + ] = "interaction_simulate" + COLS_TO_INCLUDE_IN_VEHICLE_TABLE: list[str] = [] + + annotate_households: PreprocessorSettings | None = None + annotate_persons: PreprocessorSettings | None = None + annotate_vehicles: PreprocessorSettings | None = None + + REQUIRE_DATA_FOR_ALL_ALTS: bool = False + WRITE_OUT_ALTS_FILE: bool = False + + FLEET_YEAR: int + + @workflow.step def vehicle_type_choice( state: workflow.State, @@ -480,6 +521,9 @@ def vehicle_type_choice( households: pd.DataFrame, vehicles: pd.DataFrame, vehicles_merged: pd.DataFrame, + model_settings: VehicleTypeChoiceSettings | None = None, + model_settings_file_name: str = "vehicle_type_choice.yaml", + trace_label: str = "vehicle_type_choice", ) -> None: """Assign a vehicle type to each vehicle in the `vehicles` table. @@ -516,18 +560,24 @@ def vehicle_type_choice( Parameters ---------- + state : workflow.State persons : pd.DataFrame households : pd.DataFrame vehicles : pd.DataFrame - vehicles_merged : DataFrame + vehicles_merged :pd. DataFrame + model_settings : class specifying the model settings + model_settings_file_name: filename of the model settings file + trace_label: trace label of the vehicle type choice model """ - trace_label = "vehicle_type_choice" - model_settings_file_name = "vehicle_type_choice.yaml" - model_settings = state.filesystem.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = VehicleTypeChoiceSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) estimator = estimation.manager.begin_estimation(state, "vehicle_type") - model_spec_raw = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec_raw = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec_raw, coefficients_df, estimator @@ -581,11 +631,11 @@ def vehicle_type_choice( state.add_table("vehicles", vehicles) # - annotate tables - if model_settings.get("annotate_households"): + if model_settings.annotate_households: annotate_vehicle_type_choice_households(state, model_settings, trace_label) - if model_settings.get("annotate_persons"): + if model_settings.annotate_persons: annotate_vehicle_type_choice_persons(state, model_settings, trace_label) - if model_settings.get("annotate_vehicles"): + if model_settings.annotate_vehicles: annotate_vehicle_type_choice_vehicles(state, model_settings, trace_label) tracing.print_summary( diff --git a/activitysim/abm/models/work_from_home.py b/activitysim/abm/models/work_from_home.py index 1b9bc9049..234302b70 100755 --- a/activitysim/abm/models/work_from_home.py +++ b/activitysim/abm/models/work_from_home.py @@ -15,15 +15,59 @@ tracing, workflow, ) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger("activitysim") +class WorkFromHomeSettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `work_from_home` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + WORK_FROM_HOME_ALT: int + """Value that specify if the person is working from home""" # TODO + + WORK_FROM_HOME_ITERATIONS: int = 1 + """Setting to specify the number of iterations.""" + + CHOOSER_FILTER_COLUMN_NAME: str = "is_worker" + """Column name in the dataframe to represent worker.""" + + WORK_FROM_HOME_CHOOSER_FILTER: str = None + """Setting to filter work from home chooser.""" + + WORK_FROM_HOME_COEFFICIENT_CONSTANT: float = None + """Setting to set the work from home coefficient.""" + + WORK_FROM_HOME_TARGET_PERCENT: float = None + """Setting to set work from target percent.""" + + WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE: float = None + """Setting to set work from home target percent tolerance.""" + + sharrow_skip: bool = False + """Setting to skip sharrow.""" + + DEST_CHOICE_COLUMN_NAME: str = "workplace_zone_id" + """Column name in persons dataframe to specify the workplace zone id. """ + + SPEC: str = "work_from_home.csv" + """Filename for the accessibility specification (csv) file.""" + + @workflow.step def work_from_home( state: workflow.State, persons_merged: pd.DataFrame, persons: pd.DataFrame, + model_settings: WorkFromHomeSettings | None = None, + model_settings_file_name: str = "work_from_home.yaml", + trace_label: str = "work_from_home", ) -> None: """ This model predicts whether a person (worker) works from home. The output @@ -31,25 +75,24 @@ def work_from_home( The workplace location choice is overridden for workers who work from home and set to -1. """ - - trace_label = "work_from_home" - model_settings_file_name = "work_from_home.yaml" + if model_settings is None: + model_settings = WorkFromHomeSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) choosers = persons_merged - model_settings = state.filesystem.read_model_settings(model_settings_file_name) - chooser_filter_column_name = model_settings.get( - "CHOOSER_FILTER_COLUMN_NAME", "is_worker" - ) + chooser_filter_column_name = model_settings.CHOOSER_FILTER_COLUMN_NAME choosers = choosers[choosers[chooser_filter_column_name]] logger.info("Running %s with %d persons", trace_label, len(choosers)) estimator = estimation.manager.begin_estimation(state, "work_from_home") constants = config.get_model_constants(model_settings) - work_from_home_alt = model_settings["WORK_FROM_HOME_ALT"] + work_from_home_alt = model_settings.WORK_FROM_HOME_ALT # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_d = {} if constants is not None: @@ -63,7 +106,7 @@ def work_from_home( trace_label=trace_label, ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) nest_spec = config.get_logit_model_settings(model_settings) @@ -75,18 +118,12 @@ def work_from_home( estimator.write_choosers(choosers) # - iterative single process what-if adjustment if specified - iterations = model_settings.get("WORK_FROM_HOME_ITERATIONS", 1) - iterations_chooser_filter = model_settings.get( - "WORK_FROM_HOME_CHOOSER_FILTER", None - ) - iterations_coefficient_constant = model_settings.get( - "WORK_FROM_HOME_COEFFICIENT_CONSTANT", None - ) - iterations_target_percent = model_settings.get( - "WORK_FROM_HOME_TARGET_PERCENT", None - ) - iterations_target_percent_tolerance = model_settings.get( - "WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE", None + iterations = model_settings.WORK_FROM_HOME_ITERATIONS + iterations_chooser_filter = model_settings.WORK_FROM_HOME_CHOOSER_FILTER + iterations_coefficient_constant = model_settings.WORK_FROM_HOME_COEFFICIENT_CONSTANT + iterations_target_percent = model_settings.WORK_FROM_HOME_TARGET_PERCENT + iterations_target_percent_tolerance = ( + model_settings.WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE ) for iteration in range(iterations): @@ -98,12 +135,12 @@ def work_from_home( ) # re-read spec to reset substitution - model_spec = state.filesystem.read_model_spec(file_name=model_settings["SPEC"]) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator ) - if model_settings.get("sharrow_skip", False): + if model_settings.sharrow_skip: constants["disable_sharrow"] = True choices = simulate.simple_simulate( @@ -177,9 +214,7 @@ def work_from_home( # setting workplace_zone_id to -1 if person works from home # this will exclude them from the telecommute frequency model choosers # See https://github.com/ActivitySim/activitysim/issues/627 - dest_choice_column_name = model_settings.get( - "DEST_CHOICE_COLUMN_NAME", "workplace_zone_id" - ) + dest_choice_column_name = model_settings.DEST_CHOICE_COLUMN_NAME if dest_choice_column_name in persons.columns: persons[dest_choice_column_name] = np.where( persons.work_from_home == True, -1, persons[dest_choice_column_name] diff --git a/activitysim/abm/tables/disaggregate_accessibility.py b/activitysim/abm/tables/disaggregate_accessibility.py index 7858245f2..8ab0e0820 100644 --- a/activitysim/abm/tables/disaggregate_accessibility.py +++ b/activitysim/abm/tables/disaggregate_accessibility.py @@ -161,12 +161,14 @@ def disaggregate_accessibility(state: workflow.State): disaggregate_accessibility=None, ) + from ..models.disaggregate_accessibility import DisaggregateAccessibilitySettings + # Extract model settings - model_settings = state.filesystem.read_model_settings( - "disaggregate_accessibility.yaml" + model_settings = DisaggregateAccessibilitySettings.read_settings_file( + state.filesystem, "disaggregate_accessibility.yaml" ) - merging_params = model_settings.get("MERGE_ON") - nearest_method = model_settings.get("NEAREST_METHOD", "skims") + merging_params = model_settings.MERGE_ON + nearest_method = model_settings.NEAREST_METHOD accessibility_cols = [ x for x in proto_accessibility_df.columns if "accessibility" in x ] diff --git a/activitysim/abm/tables/shadow_pricing.py b/activitysim/abm/tables/shadow_pricing.py index 87870f5b6..6b06a9d58 100644 --- a/activitysim/abm/tables/shadow_pricing.py +++ b/activitysim/abm/tables/shadow_pricing.py @@ -7,7 +7,7 @@ import multiprocessing import time from collections import OrderedDict -from typing import Any +from typing import Any, Literal import numpy as np import pandas as pd @@ -15,6 +15,8 @@ from activitysim.abm.tables.size_terms import size_terms as get_size_terms from activitysim.abm.tables.size_terms import tour_destination_size_terms from activitysim.core import logit, tracing, util, workflow +from activitysim.core.configuration import PydanticReadable +from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.input import read_input_table logger = logging.getLogger(__name__) @@ -88,11 +90,67 @@ def size_table_name(model_selector): return "%s_destination_size" % model_selector +class ShadowPriceSettings(PydanticReadable, extra="forbid"): + """Settings used for shadow pricing.""" + + shadow_pricing_models: dict[str, str] | None = None + """List model_selectors and model_names of models that use shadow pricing. + This list identifies which size_terms to preload which must be done in single process mode, so + predicted_size tables can be scaled to population""" + + LOAD_SAVED_SHADOW_PRICES: bool = True + """Global switch to enable/disable loading of saved shadow prices. + + This is ignored if global use_shadow_pricing switch is False + """ + + MAX_ITERATIONS: int = 5 + """Number of shadow price iterations for cold start.""" + + MAX_ITERATIONS_SAVED: int = 1 + """Number of shadow price iterations for warm start. + + A warm start means saved shadow_prices were found in a file and loaded.""" + + SIZE_THRESHOLD: float = 10 + """ignore criteria for zones smaller than size_threshold""" + + PERCENT_TOLERANCE: float = 5 + """zone passes if modeled is within percent_tolerance of predicted_size""" + + FAIL_THRESHOLD: float = 10 + """max percentage of zones allowed to fail""" + + SHADOW_PRICE_METHOD: Literal["ctramp", "daysim", "simulation"] = "ctramp" + + DAMPING_FACTOR: float = 1 + """ctramp-style damping factor""" + + SCALE_SIZE_TABLE: bool = False + + DAYSIM_ABSOLUTE_TOLERANCE: float = 50 + DAYSIM_PERCENT_TOLERANCE: float = 10 + + TARGET_THRESHOLD: float = 20 + """ignore criteria for zones smaller than target_threshold (total employmnet or enrollment)""" + + workplace_segmentation_targets: dict[str, str] | None = None + school_segmentation_targets: dict[str, str] | None = None + + WRITE_ITERATION_CHOICES: bool = False + + SEGMENT_TO_NAME: dict[str, str] = { + "school": "school_segment", + "workplace": "income_segment", + } # pydantic uses deep copy, so mutable default value is ok here + """Mapping from model_selector to persons_segment_name.""" + + class ShadowPriceCalculator: def __init__( self, state: workflow.State, - model_settings, + model_settings: TourLocationComponentSettings, num_processes, shared_data=None, shared_data_lock=None, @@ -122,7 +180,7 @@ def __init__( None # set by read_saved_shadow_prices if loaded ) - self.model_selector = model_settings["MODEL_SELECTOR"] + self.model_selector = model_settings.MODEL_SELECTOR if (self.num_processes > 1) and not state.settings.fail_fast: # if we are multiprocessing, then fail_fast should be true or we will wait forever for failed processes @@ -133,26 +191,24 @@ def __init__( "Shadow pricing requires fail_fast setting in multiprocessing mode" ) - self.segment_ids = model_settings["SEGMENT_IDS"] + self.segment_ids = model_settings.SEGMENT_IDS # - modeled_size (set by call to set_choices/synchronize_modeled_size) self.modeled_size = None if self.use_shadow_pricing: - self.shadow_settings = state.filesystem.read_model_settings( - "shadow_pricing.yaml" + self.shadow_settings = ShadowPriceSettings.read_settings_file( + state.filesystem, "shadow_pricing.yaml" ) - for k in self.shadow_settings: - logger.debug( - "shadow_settings %s: %s" % (k, self.shadow_settings.get(k)) - ) + for k, k_value in self.shadow_settings: + logger.debug(f"shadow_settings {k}: {k_value}") full_model_run = state.settings.households_sample_size == 0 if ( self.use_shadow_pricing and not full_model_run - and self.shadow_settings["SHADOW_PRICE_METHOD"] != "simulation" + and self.shadow_settings.SHADOW_PRICE_METHOD != "simulation" ): # ctramp and daysim methods directly compare desired and modeled size to compute shadow prices. # desination size terms are scaled in add_size_tables only for full model runs @@ -165,7 +221,7 @@ def __init__( if ( self.use_shadow_pricing and self.model_selector not in ["workplace", "school"] - and self.shadow_settings["SHADOW_PRICE_METHOD"] == "simulation" + and self.shadow_settings.SHADOW_PRICE_METHOD == "simulation" ): logger.warning( "Shadow price simulation method is only implemented for workplace and school." @@ -203,23 +259,21 @@ def __init__( # - load saved shadow_prices (if available) and set max_iterations accordingly if self.use_shadow_pricing: self.shadow_prices = None - self.shadow_price_method = self.shadow_settings["SHADOW_PRICE_METHOD"] + self.shadow_price_method = self.shadow_settings.SHADOW_PRICE_METHOD assert self.shadow_price_method in ["daysim", "ctramp", "simulation"] # ignore convergence criteria for zones smaller than target_threshold - self.target_threshold = self.shadow_settings["TARGET_THRESHOLD"] + self.target_threshold = self.shadow_settings.TARGET_THRESHOLD - if self.shadow_settings["LOAD_SAVED_SHADOW_PRICES"]: + if self.shadow_settings.LOAD_SAVED_SHADOW_PRICES: # read_saved_shadow_prices logs error and returns None if file not found self.shadow_prices = self.read_saved_shadow_prices( state, model_settings ) if self.shadow_prices is None: - self.max_iterations = self.shadow_settings.get("MAX_ITERATIONS", 5) + self.max_iterations = self.shadow_settings.MAX_ITERATIONS else: - self.max_iterations = self.shadow_settings.get( - "MAX_ITERATIONS_SAVED", 1 - ) + self.max_iterations = self.shadow_settings.MAX_ITERATIONS_SAVED # initial_shadow_price if we did not load if self.shadow_prices is None: @@ -244,18 +298,18 @@ def __init__( if ( self.use_shadow_pricing - and self.shadow_settings["SHADOW_PRICE_METHOD"] == "simulation" + and self.shadow_settings.SHADOW_PRICE_METHOD == "simulation" ): assert self.model_selector in ["workplace", "school"] self.target = {} land_use = state.get_dataframe("land_use") if self.model_selector == "workplace": - employment_targets = self.shadow_settings[ - "workplace_segmentation_targets" - ] + employment_targets = ( + self.shadow_settings.workplace_segmentation_targets or {} + ) assert ( - employment_targets is not None + employment_targets ), "Need to supply workplace_segmentation_targets in shadow_pricing.yaml" for segment, target in employment_targets.items(): @@ -268,9 +322,9 @@ def __init__( self.target[segment] = land_use[target] elif self.model_selector == "school": - school_targets = self.shadow_settings["school_segmentation_targets"] + school_targets = self.shadow_settings.school_segmentation_targets or {} assert ( - school_targets is not None + school_targets ), "Need to supply school_segmentation_targets in shadow_pricing.yaml" for segment, target in school_targets.items(): @@ -282,7 +336,9 @@ def __init__( ), f"{target} is not in landuse columns: {land_use.columns}" self.target[segment] = land_use[target] - def read_saved_shadow_prices(self, state, model_settings): + def read_saved_shadow_prices( + self, state: workflow.State, model_settings: TourLocationComponentSettings + ): """ Read saved shadow_prices from csv file in data_dir (so-called warm start) returns None if no saved shadow price file name specified or named file not found @@ -299,9 +355,7 @@ def read_saved_shadow_prices(self, state, model_settings): shadow_prices = None # - load saved shadow_prices - saved_shadow_price_file_name = model_settings.get( - "SAVED_SHADOW_PRICE_TABLE_NAME" - ) + saved_shadow_price_file_name = model_settings.SAVED_SHADOW_PRICE_TABLE_NAME if saved_shadow_price_file_name: # FIXME - where should we look for this file? file_path = state.filesystem.get_data_file_path( @@ -544,17 +598,17 @@ def check_fit(self, state: workflow.State, iteration): # - convergence criteria for check_fit # ignore convergence criteria for zones smaller than size_threshold - size_threshold = self.shadow_settings["SIZE_THRESHOLD"] + size_threshold = self.shadow_settings.SIZE_THRESHOLD # zone passes if modeled is within percent_tolerance of desired_size - percent_tolerance = self.shadow_settings["PERCENT_TOLERANCE"] + percent_tolerance = self.shadow_settings.PERCENT_TOLERANCE # max percentage of zones allowed to fail - fail_threshold = self.shadow_settings["FAIL_THRESHOLD"] + fail_threshold = self.shadow_settings.FAIL_THRESHOLD # option to write out choices by iteration for each person to trace folder - write_choices = self.shadow_settings.get("WRITE_ITERATION_CHOICES", False) + write_choices = self.shadow_settings.WRITE_ITERATION_CHOICES if write_choices: self.choices_by_iteration[iteration] = self.choices_synced - if self.shadow_settings["SHADOW_PRICE_METHOD"] != "simulation": + if self.shadow_settings.SHADOW_PRICE_METHOD != "simulation": modeled_size = self.modeled_size desired_size = self.desired_size @@ -681,7 +735,7 @@ def update_shadow_prices(self, state): assert self.use_shadow_pricing - shadow_price_method = self.shadow_settings["SHADOW_PRICE_METHOD"] + shadow_price_method = self.shadow_settings.SHADOW_PRICE_METHOD # can't update_shadow_prices until after first iteration # modeled_size should have been set by set_choices at end of previous iteration @@ -697,7 +751,7 @@ def update_shadow_prices(self, state): // else // shadowPrice *= scaledSize; """ - damping_factor = self.shadow_settings["DAMPING_FACTOR"] + damping_factor = self.shadow_settings.DAMPING_FACTOR assert 0 < damping_factor <= 1 new_scale_factor = self.desired_size / self.modeled_size @@ -729,8 +783,8 @@ def update_shadow_prices(self, state): shadow_price = shadow_price + log(np.maximum(target, 0.01) / np.maximum(modeled, 0.01)) """ # FIXME should these be the same as PERCENT_TOLERANCE and FAIL_THRESHOLD above? - absolute_tolerance = self.shadow_settings["DAYSIM_ABSOLUTE_TOLERANCE"] - percent_tolerance = self.shadow_settings["DAYSIM_PERCENT_TOLERANCE"] / 100.0 + absolute_tolerance = self.shadow_settings.DAYSIM_ABSOLUTE_TOLERANCE + percent_tolerance = self.shadow_settings.DAYSIM_PERCENT_TOLERANCE / 100.0 assert 0 <= percent_tolerance <= 1 target = np.where( @@ -773,14 +827,12 @@ def update_shadow_prices(self, state): shadow_price_j = -999 resimulate n workers from zone j, with n = int(workers_j-emp_j/sum(emp_j*workers_j)) """ - percent_tolerance = self.shadow_settings["PERCENT_TOLERANCE"] + percent_tolerance = self.shadow_settings.PERCENT_TOLERANCE sampled_persons = pd.DataFrame() persons_merged = state.get_dataframe("persons_merged") # need to join the segment to the choices to sample correct persons - segment_to_name_dict = self.shadow_settings.get( - "", default_segment_to_name_dict - ) + segment_to_name_dict = self.shadow_settings.SEGMENT_TO_NAME segment_name = segment_to_name_dict[self.model_selector] if type(self.choices_synced) != pd.DataFrame: @@ -867,7 +919,7 @@ def dest_size_terms(self, segment): utility_adjustment = 0 if self.use_shadow_pricing: - shadow_price_method = self.shadow_settings["SHADOW_PRICE_METHOD"] + shadow_price_method = self.shadow_settings.SHADOW_PRICE_METHOD if shadow_price_method == "ctramp": size_term_adjustment = self.shadow_prices[segment] @@ -1162,7 +1214,9 @@ def shadow_price_data_from_buffers(data_buffers, shadow_pricing_info, model_sele return np.frombuffer(data.get_obj(), dtype=dtype).reshape(shape), data.get_lock() -def load_shadow_price_calculator(state: workflow.State, model_settings): +def load_shadow_price_calculator( + state: workflow.State, model_settings: TourLocationComponentSettings +): """ Initialize ShadowPriceCalculator for model_selector (e.g. school or workplace) @@ -1171,16 +1225,19 @@ def load_shadow_price_calculator(state: workflow.State, model_settings): Parameters ---------- - model_settings : dict + state : workflow.State + model_settings : TourLocationComponentSettings Returns ------- spc : ShadowPriceCalculator """ + if not isinstance(model_settings, TourLocationComponentSettings): + model_settings = TourLocationComponentSettings.parse_obj(model_settings) num_processes = state.get_injectable("num_processes", 1) - model_selector = model_settings["MODEL_SELECTOR"] + model_selector = model_settings.MODEL_SELECTOR # - get shared_data from data_buffers (if multiprocessing) data_buffers = state.get_injectable("data_buffers", None) @@ -1258,8 +1315,10 @@ def add_size_tables( use_shadow_pricing = bool(state.settings.use_shadow_pricing) - shadow_settings = state.filesystem.read_model_settings("shadow_pricing.yaml") - shadow_pricing_models = shadow_settings.get("shadow_pricing_models") + shadow_settings = ShadowPriceSettings.read_settings_file( + state.filesystem, "shadow_pricing.yaml" + ) + shadow_pricing_models = shadow_settings.shadow_pricing_models if shadow_pricing_models is None: logger.warning( @@ -1269,7 +1328,7 @@ def add_size_tables( # probably ought not scale if not shadow_pricing (breaks partial sample replicability) # but this allows compatability with existing CTRAMP behavior... - scale_size_table = shadow_settings.get("SCALE_SIZE_TABLE", False) + scale_size_table = shadow_settings.SCALE_SIZE_TABLE # Suffixes for disaggregate accessibilities # Set default here incase None is explicitly passed @@ -1291,26 +1350,28 @@ def add_size_tables( # since these are scaled to model size, they have to be created while single-process for model_selector, model_name in shadow_pricing_models.items(): - model_settings = state.filesystem.read_model_settings(model_name) + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, model_name + ) if suffix is not None and roots: model_settings = util.suffix_tables_in_settings( model_settings, suffix, roots ) - assert model_selector == model_settings["MODEL_SELECTOR"] + assert model_selector == model_settings.MODEL_SELECTOR - assert ( - "SEGMENT_IDS" in model_settings - ), f"missing SEGMENT_IDS setting in {model_name} model_settings" - segment_ids = model_settings["SEGMENT_IDS"] - chooser_table_name = model_settings["CHOOSER_TABLE_NAME"] - chooser_segment_column = model_settings["CHOOSER_SEGMENT_COLUMN_NAME"] + # assert ( + # "SEGMENT_IDS" in model_settings + # ), f"missing SEGMENT_IDS setting in {model_name} model_settings" + segment_ids = model_settings.SEGMENT_IDS + chooser_table_name = model_settings.CHOOSER_TABLE_NAME + chooser_segment_column = model_settings.CHOOSER_SEGMENT_COLUMN_NAME choosers_df = state.get_dataframe(chooser_table_name) - if "CHOOSER_FILTER_COLUMN_NAME" in model_settings: + if model_settings.CHOOSER_FILTER_COLUMN_NAME: choosers_df = choosers_df[ - choosers_df[model_settings["CHOOSER_FILTER_COLUMN_NAME"]] != 0 + choosers_df[model_settings.CHOOSER_FILTER_COLUMN_NAME] != 0 ] # - raw_desired_size @@ -1401,10 +1462,12 @@ def get_shadow_pricing_info(state): land_use = state.get_dataframe("land_use") size_terms = state.get_injectable("size_terms") - shadow_settings = state.filesystem.read_model_settings("shadow_pricing.yaml") + shadow_settings = ShadowPriceSettings.read_settings_file( + state.filesystem, "shadow_pricing.yaml" + ) # shadow_pricing_models is dict of {: } - shadow_pricing_models = shadow_settings.get("shadow_pricing_models", {}) + shadow_pricing_models = shadow_settings.shadow_pricing_models or {} blocks = OrderedDict() for model_selector in shadow_pricing_models: @@ -1422,7 +1485,7 @@ def get_shadow_pricing_info(state): } for k in shadow_pricing_info: - logger.debug("shadow_pricing_info %s: %s" % (k, shadow_pricing_info.get(k))) + logger.debug(f"shadow_pricing_info {k}: {shadow_pricing_info.get(k)}") return shadow_pricing_info @@ -1443,10 +1506,12 @@ def get_shadow_pricing_choice_info(state): persons = read_input_table(state, "persons") - shadow_settings = state.filesystem.read_model_settings("shadow_pricing.yaml") + shadow_settings = ShadowPriceSettings.read_settings_file( + state.filesystem, "shadow_pricing.yaml" + ) # shadow_pricing_models is dict of {: } - shadow_pricing_models = shadow_settings.get("shadow_pricing_models", {}) + shadow_pricing_models = shadow_settings.shadow_pricing_models or {} blocks = OrderedDict() for model_selector in shadow_pricing_models: @@ -1466,7 +1531,7 @@ def get_shadow_pricing_choice_info(state): for k in shadow_pricing_choice_info: logger.debug( - "shadow_pricing_choice_info %s: %s" % (k, shadow_pricing_choice_info.get(k)) + f"shadow_pricing_choice_info {k}: {shadow_pricing_choice_info.get(k)}" ) return shadow_pricing_choice_info diff --git a/activitysim/benchmarking/componentwise.py b/activitysim/benchmarking/componentwise.py index 86fecea5c..5a9d9a7cd 100644 --- a/activitysim/benchmarking/componentwise.py +++ b/activitysim/benchmarking/componentwise.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -def reload_settings(settings_filename, **kwargs): +def reload_settings(state, settings_filename, **kwargs): settings = state.filesystem.read_settings_file(settings_filename, mandatory=True) for k in kwargs: settings[k] = kwargs[k] @@ -85,6 +85,7 @@ def setup_component( state.add_injectable("output_dir", os.path.join(working_dir, output_dir)) reload_settings( + state, settings_filename, benchmarking=component_name, checkpoints=False, diff --git a/activitysim/core/assign.py b/activitysim/core/assign.py index d504d6378..1160c571c 100644 --- a/activitysim/core/assign.py +++ b/activitysim/core/assign.py @@ -80,7 +80,7 @@ def read_assignment_spec( Parameters ---------- - file_name : str + file_name : path-like Name of a CSV spec file. description_name : str, optional Name of the column in `fname` that contains the component description. diff --git a/activitysim/core/config.py b/activitysim/core/config.py index 5cd08e5a0..730c38315 100644 --- a/activitysim/core/config.py +++ b/activitysim/core/config.py @@ -1,12 +1,16 @@ from __future__ import annotations -# ActivitySim -# See full license in LICENSE.txt. -import argparse import logging import warnings +from typing import Any, TypeVar from activitysim.core import workflow +from activitysim.core.configuration.base import PydanticBase +from activitysim.core.configuration.logit import LogitComponentSettings + +# ActivitySim +# See full license in LICENSE.txt. + logger = logging.getLogger(__name__) @@ -33,9 +37,8 @@ def future_model_settings(model_name, model_settings, future_settings): Returns ------- - dict - model_settings with any missing future_settings added - + dict + model_settings with any missing future_settings added """ model_settings = model_settings.copy() for key, setting in future_settings.items(): @@ -51,6 +54,37 @@ def future_model_settings(model_name, model_settings, future_settings): return model_settings +T = TypeVar("T", bound=PydanticBase) + + +def future_component_settings( + model_name: str, model_settings: T, future_settings: dict +) -> T: + """ + Warn users of new required model settings, and substitute default values + + Parameters + ---------- + model_name: str + name of model + model_settings: PydanticBase + model_settings from settigns file + future_settings: dict + default values for new required settings + """ + for key, setting in future_settings.items(): + if getattr(model_settings, key) is None: + warnings.warn( + f"Setting '{key}' not found in {model_name} model settings." + f"Replacing with default value: {setting}." + f"This setting will be required in future versions", + FutureWarning, + stacklevel=2, + ) + setattr(model_settings, key, setting) + return model_settings + + def get_model_constants(model_settings): """ Read constants from model settings file @@ -60,10 +94,14 @@ def get_model_constants(model_settings): constants : dict dictionary of constants to add to locals for use by expressions in model spec """ + if hasattr(model_settings, "CONSTANTS"): + return model_settings.CONSTANTS return model_settings.get("CONSTANTS", {}) -def get_logit_model_settings(model_settings): +def get_logit_model_settings( + model_settings: LogitComponentSettings | dict[str, Any] | None +): """ Read nest spec (for nested logit) from model settings file @@ -71,10 +109,12 @@ def get_logit_model_settings(model_settings): ------- nests : dict dictionary specifying nesting structure and nesting coefficients - - constants : dict - dictionary of constants to add to locals for use by expressions in model spec """ + if isinstance(model_settings, LogitComponentSettings): + # all the validation for well formatted settings is handled by pydantic, + # so we just return the nests here. + return model_settings.NESTS + nests = None if model_settings is not None: diff --git a/activitysim/core/configuration/base.py b/activitysim/core/configuration/base.py index 05733ea7e..9a3f7b7cd 100644 --- a/activitysim/core/configuration/base.py +++ b/activitysim/core/configuration/base.py @@ -1,32 +1,128 @@ from __future__ import annotations -from typing import Any, Union # noqa: F401 +from pathlib import Path +from typing import Any, Literal, TypeVar, Union # noqa: F401 -from activitysim.core import configuration +from pydantic import BaseModel as PydanticBase -try: - from pydantic import BaseModel as PydanticBase -except ModuleNotFoundError: +from activitysim.core import configuration - class PydanticBase: - pass +PydanticReadableType = TypeVar("PydanticReadableType", bound="PydanticReadable") class PydanticReadable(PydanticBase): + """ + Base class for `pydantic.BaseModel`s readable from cascading config files. + + Although not formally defined as an abstract base class, there is generally + no reason to instantiate a `PydanticReadable` object directly. + """ + + source_file_paths: list[Path] = None + """ + A list of source files from which these settings were loaded. + + This value should not be set by the user within the YAML settings files, + instead it is populated as those files are loaded. It is primarily + provided for debugging purposes, and does not actually affect the operation + of any model. + """ + @classmethod def read_settings_file( - cls, - filesystem: "configuration.FileSystem", - file_name, - mandatory=True, - include_stack=False, - configs_dir_list=None, - ) -> PydanticReadable: + cls: type[PydanticReadableType], + filesystem: configuration.FileSystem, + file_name: str, + mandatory: bool = True, + ) -> PydanticReadableType: + """ + Load settings from one or more yaml files. + + This method has been written to allow models to be configured with + "settings file inheritance". This allows the user to avoid duplicating + settings across multiple related model configurations. Instead, + settings can be written in a "cascading" manner: multiple files can be + provided with settings values, and each particular key is set according + to the first value found for that key. + + For example, suppose a user has a basic model setup with some settings, and + they would like to do a model run with all the same settings except with the + `foo` setting using a value of `'baz'` instead of the usual value of `'bar'` + that is defined in the usual model setup. They could accomplish this by + placing a `file_name` file with *only* + + .. code-block:: yaml + + foo: baz + inherit_settings: true + + in the first directory listed in `filesystem.configs_dir`. The + `inherit_settings` flag tells the interpreter to search for other + matching settings files in the chain of config directories, and to fill + in other settings values that are not yet defined, but the `foo: baz` will + preempt any other values for `foo` that may be set in those other files. + If the `inherit_settings` flag is omitted or set to false, then the + search process ends with this file, only the `foo` setting would be + defined, and all other settings expected in this file would take on + their default values. + + Alternatively, a settings file may include a `include_settings` key, + + .. code-block:: yaml + + include_settings: other-filename.yaml + + with an alternative file name as its value, in which case the method + loads values from that other file instead. To avoid confusion, this + directive must appear ALONE in the target file, without any additional + settings or directives. + + Parameters + ---------- + filesystem: configuration.FileSystem + Provides the list of config directories to search. + file_name : str + The name of the YAML file to search for. + mandatory : boolean, default True + If true, raise SettingsFileNotFoundError if no matching settings file + is found in any config directory, otherwise this method will return + an empty dict or an all-default instance of the validator class. + + Returns + ------- + PydanticReadable or derived class + """ # pass through to read_settings_file, requires validator_class and provides type hinting for IDE's return filesystem.read_settings_file( file_name, mandatory, - include_stack, - configs_dir_list, validator_class=cls, ) + + +class PreprocessorSettings(PydanticBase): + """ + Preprocessor instructions. + """ + + SPEC: str + """Specification to use for pre-processing. + + This is the name of the specification CSV file to be found in one of the + configs directories. The '.csv' extension may be omitted. + """ + + DF: str + """Name of the primary table used for this preprocessor. + + The preprocessor will emit rows to a temporary table that match the rows + in this table from the pipeline.""" + + TABLES: list[str] | None + """Names of the additional tables to be merged for the preprocessor. + + Data from these tables will be merged into the primary table, according + to standard merge rules for these tables. Care should be taken to limit the + number of merged tables as the memory requirements for the preprocessor + will increase with each table. + """ diff --git a/activitysim/core/configuration/filesystem.py b/activitysim/core/configuration/filesystem.py index 3c9a40e4b..0f398c8fa 100644 --- a/activitysim/core/configuration/filesystem.py +++ b/activitysim/core/configuration/filesystem.py @@ -6,13 +6,16 @@ import struct import time from pathlib import Path +from typing import Any import numba +import pandas as pd import platformdirs import yaml from pydantic import DirectoryPath, validator from activitysim.core.configuration.base import PydanticBase +from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.exceptions import SettingsFileNotFoundError from activitysim.core.util import parse_suffix_args, suffix_tables_in_settings @@ -538,7 +541,9 @@ def get_configs_dir(self) -> tuple[Path]: """ return tuple(self.get_working_subdir(i) for i in self.configs_dir) - def get_config_file_path(self, file_name, mandatory=True, allow_glob=False) -> Path: + def get_config_file_path( + self, file_name: Path | str, mandatory: bool = True, allow_glob: bool = False + ) -> Path: """ Find the first matching file among config directories. @@ -629,12 +634,12 @@ def open_log_file(self, file_name, mode, header=None, prefix=False): def read_settings_file( self, - file_name, - mandatory=True, - include_stack=False, - configs_dir_list=None, - validator_class=None, - ): + file_name: str, + mandatory: bool = True, + include_stack: bool = False, + configs_dir_list: tuple[Path] | None = None, + validator_class: type[PydanticBase] | None = None, + ) -> dict | PydanticBase: """ Load settings from one or more yaml files. @@ -671,6 +676,8 @@ def read_settings_file( ------- dict or validator_class """ + if isinstance(file_name, Path): + file_name = str(file_name) def backfill_settings(settings, backfill): new_settings = backfill.copy() @@ -804,8 +811,10 @@ def backfill_settings(settings, backfill): if args.SUFFIX is not None and args.ROOTS: settings = suffix_tables_in_settings(settings, args.SUFFIX, args.ROOTS) - # we don't want to actually have inherit_settings as a settings + # we don't want to actually have inherit_settings or include_settings + # as they won't validate settings.pop("inherit_settings", None) + settings.pop("include_settings", None) if validator_class is not None: settings = validator_class.parse_obj(settings) @@ -825,19 +834,25 @@ def read_model_settings( # in the legacy implementation, this function has a default mandatory=False return self.read_settings_file(file_name, mandatory=mandatory) - def read_model_spec(self, file_name: str): + def read_model_spec(self, file_name: Path | str): from activitysim.core import simulate return simulate.read_model_spec(self, file_name) - def read_model_coefficients(self, model_settings=None, file_name=None): + def read_model_coefficients( + self, + model_settings: LogitComponentSettings | dict[str, Any] | None = None, + file_name: str | None = None, + ) -> pd.DataFrame: from activitysim.core import simulate return simulate.read_model_coefficients( self, model_settings=model_settings, file_name=file_name ) - def get_segment_coefficients(self, model_settings, segment_name): + def get_segment_coefficients( + self, model_settings: PydanticBase | dict, segment_name: str + ): from activitysim.core import simulate return simulate.get_segment_coefficients(self, model_settings, segment_name) diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py new file mode 100644 index 000000000..c01187eeb --- /dev/null +++ b/activitysim/core/configuration/logit.py @@ -0,0 +1,210 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Literal + +from pydantic import BaseModel as PydanticBase +from pydantic import validator + +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable + + +class LogitNestSpec(PydanticBase): + """ + Defines a nest in a nested logit model. + """ + + name: str + """A descriptive name for this nest.""" + + coefficient: str | float + """The named parameter to be used as the logsum coefficient. + + If given as a string, this named parameter should appear in the + logit models's `COEFFICIENTS` file. + """ + + alternatives: list[LogitNestSpec | str] + """The alternatives within this nest. + + These can be either the names of elemental alternatives, or `LogitNestSpec` + definitions for more nests, or a mixture of these. + """ + + @validator("coefficient") + def prefer_float_to_str(cls, coefficient_value): + """ + Convert string values to float directly if possible. + """ + try: + coefficient_value = float(coefficient_value) + except ValueError: + pass + return coefficient_value + + +class BaseLogitComponentSettings(PydanticReadable): + """ + Base configuration class for components that are logit models. + + These settings are common to all logit models. Component developers + should generally prefer using a derived classes that defines a complete + logit model such as `LogitComponentSettings`, or a compound component + such as `LocationComponentSettings`, which melds together alternative + sampling, logsums, and choice. + """ + + SPEC: Path + """Utility specification filename. + + This is sometimes alternatively called the utility expressions calculator + (UEC). It is a CSV file giving all the functions for the terms of a + linear-in-parameters utility expression. + """ + + COEFFICIENTS: Path | None = None + """Coefficients filename. + + This is a CSV file giving named parameters for use in the utility expression. + If it is not provided, then it is assumed that all model coefficients are + given explicitly in the `SPEC` as numerical values instead of named parameters. + This is perfectly acceptable for use with ActivitySim for typical simulation + applications, but may be problematic if used with "estimation mode". + """ + + CONSTANTS: dict[str, Any] = {} + """Named constants usable in the utility expressions.""" + + sharrow_skip: bool = False + """Skip sharrow when evaluating this component.""" + + +class LogitComponentSettings(BaseLogitComponentSettings): + """ + Base configuration class for components that are individual logit models. + """ + + LOGIT_TYPE: Literal["MNL", "NL"] = "MNL" + """Logit model mathematical form. + + * "MNL" + Multinomial logit model. + * "NL" + Nested multinomial logit model. + """ + + NESTS: LogitNestSpec | None = None + """Nesting structure for a nested logit model. + + The nesting structure is specified heirarchically from the top, so the + value of this field should be the "root" level nest of the nested logit + tree, which should contain references to lower level nests and/or the + actual alternatives. + + For example, this YAML defines a simple nesting structure for four + alternatives (DRIVE, WALK, WALK_TO_TRANSIT, DRIVE_TO_TRANSIT) with the two + transit alternatives grouped together in a nest: + + .. code-block:: yaml + + NESTS: + name: root + coefficient: coef_nest_root + alternatives: + - DRIVE + - WALK + - name: TRANSIT + coefficient: coef_nest_transit + alternatives: + - WALK_TO_TRANSIT + - DRIVE_TO_TRANSIT + """ + + @validator("NESTS") + def nests_are_for_nl(cls, nests, values): + """ + Checks that nests are provided if (and only if) `LOGIT_TYPE` is NL. + """ + if "LOGIT_TYPE" in values and values["LOGIT_TYPE"] == "NL": + if nests is None: + raise ValueError("NESTS cannot be omitted for a NL model") + if "LOGIT_TYPE" in values and values["LOGIT_TYPE"] == "MNL": + if nests is not None: + raise ValueError("NESTS cannot be provided for a MNL model") + return nests + + +class TemplatedLogitComponentSettings(LogitComponentSettings): + """ + Base configuration for segmented logit models with a coefficient template. + """ + + COEFFICIENT_TEMPLATE: str | None = None + """Coefficients template filename. + + For a segmented model component, this maps the named parameters to + segment-specific names. + """ + + +class LocationComponentSettings(BaseLogitComponentSettings): + """ + Base configuration class for components that are location choice models. + """ + + SAMPLE_SPEC: Path + """The utility spec giving expressions to use in alternative sampling.""" + + SAMPLE_SIZE: int + """This many candidate alternatives will be sampled for each choice.""" + + LOGSUM_SETTINGS: Path + """Settings for the logsum computation.""" + + +class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): + # Logsum-related settings + CHOOSER_ORIG_COL_NAME: str + ALT_DEST_COL_NAME: str + IN_PERIOD: int | dict[str, int] | None = None + OUT_PERIOD: int | dict[str, int] | None = None + LOGSUM_PREPROCESSOR: str = "preprocessor" + + SEGMENTS: list[str] | None = None + SIZE_TERM_SELECTOR: str | None = None + annotate_tours: PreprocessorSettings | None = None + + CHOOSER_FILTER_COLUMN_NAME: str | None = None + DEST_CHOICE_COLUMN_NAME: str | None = None + DEST_CHOICE_LOGSUM_COLUMN_NAME: str | None = None + DEST_CHOICE_SAMPLE_TABLE_NAME: str | None = None + CHOOSER_TABLE_NAME: str | None = None + CHOOSER_SEGMENT_COLUMN_NAME: str | None = None + SEGMENT_IDS: dict[str, int] | None = None + SHADOW_PRICE_TABLE: str | None = None + MODELED_SIZE_TABLE: str | None = None + annotate_persons: PreprocessorSettings | None = None + annotate_households: PreprocessorSettings | None = None + SIMULATE_CHOOSER_COLUMNS: list[str] | None = None + ALT_DEST_COL_NAME: str + LOGSUM_TOUR_PURPOSE: str | dict[str, str] | None = None + MODEL_SELECTOR: Literal["workplace", "school", None] = None + SAVED_SHADOW_PRICE_TABLE_NAME: str | None = None + CHOOSER_ID_COLUMN: str = "person_id" + + ORIG_ZONE_ID: str | None = None + """This setting appears to do nothing...""" + + +class TourModeComponentSettings(TemplatedLogitComponentSettings, extra="forbid"): + MODE_CHOICE_LOGSUM_COLUMN_NAME: str | None = None + use_TVPB_constants: bool = True + COMPUTE_TRIP_MODE_CHOICE_LOGSUMS: bool = False + tvpb_mode_path_types: dict[str, Any] | None = None + FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH: bool = True + annotate_tours: PreprocessorSettings | None = None + preprocessor: PreprocessorSettings | list[PreprocessorSettings] | None = None + nontour_preprocessor: PreprocessorSettings | list[ + PreprocessorSettings + ] | None = None + LOGSUM_CHOOSER_COLUMNS: list[str] = [] diff --git a/activitysim/core/configuration/network.py b/activitysim/core/configuration/network.py index 44b13da76..f81043438 100644 --- a/activitysim/core/configuration/network.py +++ b/activitysim/core/configuration/network.py @@ -1,8 +1,11 @@ from __future__ import annotations +import warnings from pathlib import Path from typing import Literal +from pydantic import PositiveInt, root_validator + from activitysim.core.configuration.base import ( Any, PydanticBase, @@ -148,14 +151,52 @@ class TAZ_Settings(PydanticBase): class MazToMazSettings(PydanticBase, extra="forbid"): - tables: list[str] + tables: list[str] = [] max_blend_distance: dict[str, float] = None - blend_distance_skim_name: str = None + blend_distance_skim_name: str | None = None """The name of the skim table used to blend distances for MAZs.""" +class TimeSettings(PydanticReadable, extra="forbid"): + """ + Settings to describe discrete time. + """ + + time_window: PositiveInt = 1440 + """total duration (in minutes) of the modeled time span.""" + + period_minutes: PositiveInt = 60 + """length of time (in minutes) each model time period represents. + + Must be whole factor of ``time_window``.""" + + periods: list[int] + """Breakpoints that define the aggregate periods for skims and assignment. + + The first value should be zero and the last value should equal `time_window` + divided by `period_minutes`. The intervals between these various values + represent the skimmed time periods, so this list should be one longer than + that of `labels`. + """ + + labels: list[str] + """Labels to define names for aggregate periods for skims and assignment""" + + @root_validator(pre=True) + def hours_deprecated(cls, data): + if "hours" in data: + data["periods"] = data.pop("hours") + warnings.warn( + "support for `skim_time_periods` key `hours` will be removed in " + "future verions. Use `periods` instead", + FutureWarning, + stacklevel=2, + ) + return data + + class NetworkSettings(PydanticReadable, extra="forbid"): """ Network level of service and skims settings @@ -188,14 +229,8 @@ class NetworkSettings(PydanticReadable, extra="forbid"): TAZ_Settings class, which allows for ZARR transformation and pre-processing. """ - skim_time_periods: dict - """time period upper bound values and labels - - * ``time_window`` - total duration (in minutes) of the modeled time span (Default: 1440 minutes (24 hours)) - * ``period_minutes`` - length of time (in minutes) each model time period represents. Must be whole factor of ``time_window``. (Default: 60 minutes) - * ``periods`` - Breakpoints that define the aggregate periods for skims and assignment - * ``labels`` - Labels to define names for aggregate periods for skims and assignment - """ + skim_time_periods: TimeSettings + """How to discretize time in this model.""" read_skim_cache: bool = False """Read cached skims (using numpy memmap) from output directory. @@ -222,7 +257,7 @@ class NetworkSettings(PydanticReadable, extra="forbid"): This file should contain the MAZ ID, TAZ, and land use and other MAZ attributes """ - maz_to_maz: MazToMazSettings = None + maz_to_maz: MazToMazSettings | None = None """Settings to manage maz-to-maz level of service in 2- and 3-zone models.""" #### 3 ZONE #### diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index ea72ece32..b54c85825 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -611,7 +611,7 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): When this value is True, all config directories are searched in order for additional files with the same filename. If other files are found they are also loaded, but only settings values that are not already explicitly - set are applied. Alternatives, set this to a different file name, in which + set are applied. Alternatively, set this to a different file name, in which case settings from that other file are loaded (again, backfilling unset values only). Once the settings files are loaded, this value does not have any other effect on the operation of the model(s). diff --git a/activitysim/core/contrast/__init__.py b/activitysim/core/contrast/__init__.py new file mode 100644 index 000000000..d4e787d53 --- /dev/null +++ b/activitysim/core/contrast/__init__.py @@ -0,0 +1,9 @@ +""" +Tools for contrasting the data or processes of various ActivitySim States. +""" + +from __future__ import annotations + +from ._optional import altair +from .continuous import compare_histogram +from .nominal import NominalTarget, compare_nominal diff --git a/activitysim/core/contrast/_optional.py b/activitysim/core/contrast/_optional.py new file mode 100644 index 000000000..0a224b53f --- /dev/null +++ b/activitysim/core/contrast/_optional.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +try: + import altair +except ImportError as altair_error: + altair = altair_error + +try: + import altaia +except ImportError as altaia_error: + altaia = altaia_error diff --git a/activitysim/core/contrast/continuous.py b/activitysim/core/contrast/continuous.py new file mode 100644 index 000000000..87bb91f3b --- /dev/null +++ b/activitysim/core/contrast/continuous.py @@ -0,0 +1,306 @@ +from __future__ import annotations + +import logging + +import numpy as np +import pandas as pd + +from activitysim.core import workflow +from activitysim.core.contrast import altair as alt + +logger = logging.getLogger(__name__) + + +def compare_histogram( + states: dict[str, workflow.State], + table_name, + column_name, + *, + checkpoint_name=None, + table_filter=None, + grouping=None, + bins: int | str = 10, + bounds=(None, None), + axis_label=None, + interpolate="step", + number_format=",.2f", + title=None, + tickCount=4, + style="histogram", + bandwidth=1, + kde_support=100, + relabel_states=None, +): + """ + + Parameters + ---------- + states + bins : int or str, default 10 + If an integer, then the range of data will be divided into this many + bins. If a string, no binning is undertaken, but the values are + converted to this datatype, usually "int" to achieve the general effect + of binning. + relabel_states : Mapping[str,str] + Remap the keys in `states` with these values. Any + missing values are retained. This allows you to modify + the figure to e.g. change "reference" to "v1.0.4" without + editing the original input data. + + Returns + ------- + altair.Chart + """ + if isinstance(alt, Exception): + raise alt + + if relabel_states is None: + relabel_states = {} + + if bins == "int" and number_format == ",.2f": + number_format = ",d" + + if grouping: + groupings = [grouping] + else: + groupings = [] + + targets = {} + for key, tableset in states.items(): + if isinstance(tableset, workflow.State): + df = tableset.get_dataframe(table_name) + else: + df = tableset.get_dataframe(table_name, checkpoint_name=checkpoint_name) + if isinstance(table_filter, str): + try: + df = df.query(table_filter) + except NotImplementedError: + # pandas.eval can't handle, try sharrow + import sharrow as sh + + q = ( + sh.DataTree(base=df) + .setup_flow({"out": table_filter}) + .load(dtype=np.bool_) + ) + df = df.loc[q] + targets[key] = df[[column_name] + groupings] + + result = pd.concat(targets, names=["source"]) + if bounds[0] is not None: + result = result[result[column_name] >= bounds[0]] + if bounds[1] is not None: + result = result[result[column_name] <= bounds[1]] + lower_bound = result[column_name].min() + upper_bound = result[column_name].max() + if isinstance(bins, str): + bin_width = 0 + result[column_name] = result[column_name].astype(bins) + else: + bin_width = (upper_bound - lower_bound) / bins + if style == "histogram": + result[column_name] = pd.cut(result[column_name], bins) + targets = {k: result.loc[k] for k in targets.keys()} + + n = f"n_{table_name}" + s = f"share_{table_name}" + + d = {} + if style == "histogram": + for key, dat in targets.items(): + if groupings: + df = ( + dat.groupby(groupings + [column_name]) + .size() + .rename(n) + .unstack(column_name) + .fillna(0) + .stack() + .rename(n) + .reset_index() + ) + df[s] = df[n] / df.groupby(groupings)[n].transform("sum") + else: + df = dat.groupby(column_name).size().rename(n).reset_index() + df[s] = df[n] / df[n].sum() + + if bin_width: + if groupings: + dummy = df.groupby(groupings).size().index.to_frame() + else: + dummy = pd.DataFrame(index=[0]) + df[column_name] = df[column_name].apply(lambda x: x.mid) + lower_edge = lower_bound - (bin_width / 2) + upper_edge = upper_bound + (bin_width / 2) + df = pd.concat( + [ + dummy.assign(**{column_name: lower_edge, n: 0, s: 0}), + df, + dummy.assign(**{column_name: upper_edge, n: 0, s: 0}), + ] + ).reset_index(drop=True) + d[relabel_states.get(key, key)] = df + elif style == "kde": + for key, dat in targets.items(): + df, bw = _kde(dat[column_name], bandwidth=bandwidth, n=kde_support) + d[relabel_states.get(key, key)] = df + + # This is sorted in reverse alphabetical order by source, so that + # the stroke width for the first line plotted is fattest, and progressively + # thinner lines are plotted over that, so all data is visible on the figure. + all_d = ( + pd.concat(d, names=["source"]) + .reset_index() + .sort_values("source", ascending=False) + ) + + if style == "histogram": + if len(states) != 1: + encode_kwds = dict( + color="source", + y=alt.Y(s, axis=alt.Axis(grid=False, title="")), + x=alt.X( + f"{column_name}:Q" if bin_width else f"{column_name}:O", + axis=alt.Axis( + grid=False, + title=axis_label or column_name, + format=number_format, + tickCount=tickCount, + ), + ), + # opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), + tooltip=[ + "source", + alt.Tooltip(column_name, format=number_format), + n, + alt.Tooltip(s, format=".2%"), + ], + strokeWidth="source", + ) + else: + encode_kwds = dict( + color="source", + y=alt.Y(s, axis=alt.Axis(grid=False, title="")), + x=alt.X( + f"{column_name}:Q" if bin_width else f"{column_name}:O", + axis=alt.Axis( + grid=False, + title=axis_label or column_name, + format=number_format, + tickCount=tickCount, + ), + ), + tooltip=[ + alt.Tooltip(column_name, format=number_format), + n, + alt.Tooltip(s, format=".2%"), + ], + ) + elif style == "kde": + if len(states) != 1: + encode_kwds = dict( + color="source", + y=alt.Y("density", axis=alt.Axis(grid=False, title="")), + x=alt.X( + f"{column_name}:Q", + axis=alt.Axis( + grid=False, + title=axis_label or column_name, + format=number_format, + tickCount=tickCount, + ), + ), + strokeWidth="source", + ) + else: + encode_kwds = dict( + color="source", + y=alt.Y("density", axis=alt.Axis(grid=False, title="")), + x=alt.X( + f"{column_name}:Q", + axis=alt.Axis( + grid=False, + title=axis_label or column_name, + format=number_format, + tickCount=tickCount, + ), + ), + ) + else: + raise ValueError(f"unknown {style=}") + + if grouping: + encode_kwds["facet"] = alt.Facet(grouping, columns=3) + + if grouping: + properties_kwds = dict( + width=200, + height=120, + ) + else: + properties_kwds = dict( + width=400, + height=240, + ) + + if bounds[0] is not None and bounds[1] is not None: + encode_kwds["x"]["scale"] = alt.Scale(domain=bounds) + + if len(states) != 1: + fig = ( + alt.Chart(all_d) + .mark_line(interpolate=interpolate) + .encode(**encode_kwds) + .properties(**properties_kwds) + ) + else: + if bin_width: + fig = ( + alt.Chart(all_d) + .mark_area(interpolate=interpolate) + .encode(**encode_kwds) + .properties(**properties_kwds) + ) + else: + fig = ( + alt.Chart(all_d) + .mark_bar() + .encode(**encode_kwds) + .properties(**properties_kwds) + ) + + if title: + fig = fig.properties(title=title).configure_title( + fontSize=20, + anchor="start", + color="black", + ) + + return fig + + +def _kde(values, n=5, bandwidth=0.2, **kwargs): + """Kernel Density Estimation with Scikit-learn""" + from sklearn.neighbors import KernelDensity + + x = np.asarray(values) + + if isinstance(bandwidth, (float, int)): + kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs) + kde_skl.fit(x[:, np.newaxis]) + else: + from sklearn.model_selection import GridSearchCV + + grid = GridSearchCV( + KernelDensity(), {"bandwidth": bandwidth}, cv=3 + ) # 20-fold cross-validation + grid.fit(x[:, None]) + bandwidth = grid.best_params_["bandwidth"] + kde_skl = grid.best_estimator_ + + x_grid = np.linspace(values.min(), values.max(), n) + + # score_samples() returns the log-likelihood of the samples + log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis]) + name = getattr(values, "name", "x") + return (pd.DataFrame({name: x_grid, "density": np.exp(log_pdf)}), bandwidth) diff --git a/activitysim/core/contrast/nominal.py b/activitysim/core/contrast/nominal.py new file mode 100644 index 000000000..42a6d071a --- /dev/null +++ b/activitysim/core/contrast/nominal.py @@ -0,0 +1,206 @@ +from __future__ import annotations + +import enum +import logging + +import numpy as np +import pandas as pd +import pyarrow.compute as pc + +from activitysim.core import workflow +from activitysim.core.contrast import altair as alt + +logger = logging.getLogger(__name__) + + +def _parse_grouping(g): + if isinstance(g, str): + return g, {"shorthand": g} + elif isinstance(g, dict): + return g.get("field"), g + elif g is None: + return None, None + else: + raise ValueError(g) + + +class NominalTarget: + def __init__(self, counts: dict): + total = sum(counts[i] for i in counts) + self._shares = {k: v / total for (k, v) in counts.items()} + self._counts = counts + + def as_dataframe(self, table_name, column_name): + targets = {} + if self._shares is not None: + targets[f"share of {table_name}"] = self._shares + if self._counts is not None: + targets[f"# of {table_name}"] = self._counts + return pd.DataFrame(targets).rename_axis(column_name, axis=0).reset_index() + + +def compare_nominal( + states: dict[str, workflow.State], + table_name: str, + column_name: str, + row_grouping=None, + col_grouping=None, + count_label=None, + share_label=None, + axis_label="Share", + title=None, + ordinal=False, + plot_type="share", + relabel_tablesets=None, + categories=None, + table_filter=None, +): + """ + Parameters + ---------- + states : Mapping[str, BasicState] + categories : Mapping + Maps the values found in the referred column into readable names. + """ + if isinstance(alt, Exception): + raise alt + + if isinstance(states, workflow.State): + states = {"results": states} + + if count_label is None: + count_label = f"# of {table_name}" + if share_label is None: + share_label = f"share of {table_name}" + if relabel_tablesets is None: + relabel_tablesets = {} + + row_g, row_g_kwd = _parse_grouping(row_grouping) + col_g, col_g_kwd = _parse_grouping(col_grouping) + + d = {} + groupings = [] + if row_g is not None: + groupings.append(row_g) + if col_g is not None: + groupings.append(col_g) + + if isinstance(table_filter, str): + table_filters = [table_filter] + mask = pc.field(table_filter) + elif table_filter is None: + table_filters = [] + mask = None + else: + raise NotImplementedError(f"{type(table_filter)=}") + + for key, state in states.items(): + if isinstance(state, workflow.State): + try: + raw = state.get_pyarrow( + table_name, groupings + [column_name] + table_filters + ) + except KeyError: + # table filter is maybe complex, try using sharrow + raw = state.get_pyarrow(table_name, groupings + [column_name]) + import sharrow as sh + + mask = ( + sh.DataTree(base=state.get_pyarrow(table_name)) + .setup_flow({"out": table_filter}) + .load(dtype=np.bool_) + .reshape(-1) + ) + if mask is not None: + raw = raw.filter(mask) + df = ( + raw.group_by(groupings + [column_name]) + .aggregate([(column_name, "count")]) + .to_pandas() + .rename(columns={f"{column_name}_count": count_label}) + ) + if not groupings: + df[share_label] = df[count_label] / df[count_label].sum() + else: + df[share_label] = df[count_label] / df.groupby(groupings)[ + count_label + ].transform("sum") + d[relabel_tablesets.get(key, key)] = df + elif isinstance(state, NominalTarget): + d[relabel_tablesets.get(key, key)] = state.as_dataframe( + table_name, column_name + ) + else: + raise TypeError(f"states cannot be {type(state)!r}") + + all_d = pd.concat(d, names=["source"]).reset_index() + + selection = alt.selection_multi( + fields=[column_name], + bind="legend", + ) + + if plot_type == "count": + x = alt.X( + count_label, + axis=alt.Axis(grid=False, labels=False, title=axis_label), + ) + elif plot_type == "share": + x = alt.X( + share_label, + axis=alt.Axis(grid=False, labels=False, title=axis_label), + scale=alt.Scale(domain=[0.0, 1.0]), + ) + else: + raise ValueError(f"unknown plot_type {plot_type}") + + encode = dict( + color=alt.Color( + column_name, + type="ordinal" if ordinal else "nominal", + ), + y=alt.Y("source", axis=alt.Axis(grid=False, title=""), sort=None), + x=x, + opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), + tooltip=[ + column_name, + "source", + count_label, + alt.Tooltip(f"{share_label}:Q", format=".2%"), + ] + + groupings, + ) + if row_g is not None: + encode["row"] = alt.Row(**row_g_kwd) + if col_g is not None: + encode["column"] = alt.Column(**col_g_kwd) + + if isinstance(categories, enum.EnumMeta): + categories = {i.value: i.name for i in categories} + if categories: + all_d[column_name] = all_d[column_name].map(categories) + + fig = ( + alt.Chart(all_d) + .mark_bar() + .encode( + **encode, + ) + .add_selection( + selection, + ) + ) + + if title: + fig = fig.properties(title=title).configure_title( + fontSize=20, + anchor="start", + color="black", + ) + + if col_grouping is not None: + fig = fig.properties( + width=100, + ) + + return fig diff --git a/activitysim/core/estimation.py b/activitysim/core/estimation.py index 8077e3318..ae8edeb84 100644 --- a/activitysim/core/estimation.py +++ b/activitysim/core/estimation.py @@ -10,7 +10,9 @@ import yaml from activitysim.core import simulate, workflow +from activitysim.core.configuration.base import PydanticBase from activitysim.core.util import reindex +from activitysim.core.yaml_tools import safe_dump logger = logging.getLogger("estimation") @@ -33,7 +35,6 @@ class Estimator: def __init__( self, state: workflow.State, bundle_name, model_name, estimation_table_recipes ): - logger.info("Initialize Estimator for'%s'" % (model_name,)) self.state = state @@ -112,7 +113,6 @@ def get_chooser_id(self): return self.chooser_id_column_name def end_estimation(self): - self.write_omnibus_table() self.estimating = False @@ -123,7 +123,6 @@ def end_estimation(self): manager.release(self) def output_directory(self, bundle_directory=False): - # shouldn't be asking for this if not estimating assert self.estimating assert self.model_name is not None @@ -143,7 +142,6 @@ def output_directory(self, bundle_directory=False): return dir def output_file_path(self, table_name, file_type=None, bundle_directory=False): - # shouldn't be asking for this if not estimating assert self.estimating @@ -219,12 +217,10 @@ def write_table(df, table_name, index, append, bundle_directory): self.debug("write_table write: %s" % table_name) def write_omnibus_table(self): - if len(self.omnibus_tables) == 0: return for omnibus_table, table_names in self.omnibus_tables.items(): - self.debug( "write_omnibus_table: %s table_names: %s" % (omnibus_table, table_names) ) @@ -252,7 +248,6 @@ def write_omnibus_table(self): self.debug("write_omnibus_choosers: %s" % file_path) def write_dict(self, d, dict_name, bundle_directory): - assert self.estimating file_path = self.output_file_path(dict_name, "yaml", bundle_directory) @@ -262,7 +257,7 @@ def write_dict(self, d, dict_name, bundle_directory): with open(file_path, "w") as f: # write ordered dict as array - yaml.dump(d, f) + safe_dump(d, f) self.debug("estimate.write_dict: %s" % file_path) @@ -277,7 +272,10 @@ def write_coefficients( if model_settings is not None: assert file_name is None - file_name = model_settings["COEFFICIENTS"] + file_name = ( + getattr(model_settings, "COEFFICIENTS", None) + or model_settings["COEFFICIENTS"] + ) assert file_name is not None @@ -295,6 +293,8 @@ def write_coefficients( def write_coefficients_template(self, model_settings): assert self.estimating + if isinstance(model_settings, PydanticBase): + model_settings = model_settings.dict() coefficients_df = simulate.read_model_coefficient_template( self.state.filesystem, model_settings ) @@ -325,7 +325,6 @@ def write_nest_spec(self, nest_spec): def copy_model_settings( self, settings_file_name, tag="model_settings", bundle_directory=False ): - input_path = self.state.filesystem.get_config_file_path(settings_file_name) output_path = self.output_file_path(tag, "yaml", bundle_directory) @@ -333,27 +332,45 @@ def copy_model_settings( shutil.copy(input_path, output_path) def write_model_settings( - self, model_settings, settings_file_name, bundle_directory=False + self, + model_settings: PydanticBase | dict, + settings_file_name: str, + bundle_directory: bool = False, ): - - if "include_settings" in model_settings: + if isinstance(model_settings, PydanticBase): + # TODO: Deal with how Pydantic settings are used in estimation. + # Legacy estimation data bundles provide separate handling + # for when `include_settings` and `inherit_settings` keys + # are present in YAML files. The new pydantic settings model + # divorces us from the config source content and merely stores + # the resulting values of settings. Do we really want to + # carry around all this baggage in estimation? The content + # is still out there in the original source files, why do we + # make copies in the estimation data bundle in the first place? file_path = self.output_file_path( "model_settings", "yaml", bundle_directory ) assert not os.path.isfile(file_path) with open(file_path, "w") as f: - yaml.dump(model_settings, f) + safe_dump(model_settings.dict(), f) else: - self.copy_model_settings( - settings_file_name, bundle_directory=bundle_directory - ) - if "inherit_settings" in model_settings: - self.write_dict( - model_settings, "inherited_model_settings", bundle_directory - ) + if "include_settings" in model_settings: + file_path = self.output_file_path( + "model_settings", "yaml", bundle_directory + ) + assert not os.path.isfile(file_path) + with open(file_path, "w") as f: + safe_dump(model_settings, f) + else: + self.copy_model_settings( + settings_file_name, bundle_directory=bundle_directory + ) + if "inherit_settings" in model_settings: + self.write_dict( + model_settings, "inherited_model_settings", bundle_directory + ) def melt_alternatives(self, df): - alt_id_name = self.alt_id_column_name assert alt_id_name is not None, ( @@ -449,10 +466,9 @@ def get_survey_table(self, table_name): def write_spec( self, model_settings=None, file_name=None, tag="SPEC", bundle_directory=False ): - if model_settings is not None: assert file_name is None - file_name = model_settings[tag] + file_name = getattr(model_settings, tag, None) or model_settings[tag] input_path = self.state.filesystem.get_config_file_path(file_name) @@ -464,7 +480,6 @@ def write_spec( class EstimationManager(object): def __init__(self): - self.settings_initialized = False self.bundles = [] self.estimation_table_recipes = {} @@ -472,7 +487,6 @@ def __init__(self): self.estimating = {} def initialize_settings(self, state): - # FIXME - can't we just initialize in init and handle no-presence of settings file as not enabled if self.settings_initialized: return @@ -494,7 +508,6 @@ def initialize_settings(self, state): self.estimation_table_recipes = settings.get("estimation_table_recipes", {}) if self.enabled: - self.survey_tables = settings.get("survey_tables", {}) for table_name, table_info in self.survey_tables.items(): assert ( @@ -588,7 +601,6 @@ def begin_estimation( return self.estimating[model_name] def release(self, estimator): - self.estimating.pop(estimator.model_name) def get_survey_table(self, table_name): @@ -602,7 +614,6 @@ def get_survey_table(self, table_name): return df def get_survey_values(self, model_values, table_name, column_names): - assert isinstance( model_values, (pd.Series, pd.DataFrame, pd.Index) ), "get_survey_values model_values has unrecognized type %s" % type( diff --git a/activitysim/core/expressions.py b/activitysim/core/expressions.py index b57eca94c..9ee3ac20e 100644 --- a/activitysim/core/expressions.py +++ b/activitysim/core/expressions.py @@ -4,13 +4,26 @@ import logging -from . import assign, config, simulate, tracing, workflow -from .util import assign_in_place, parse_suffix_args, suffix_expressions_df_str +import pandas as pd + +from activitysim.core import assign, simulate, tracing, workflow +from activitysim.core.configuration.base import PreprocessorSettings, PydanticBase +from activitysim.core.util import ( + assign_in_place, + parse_suffix_args, + suffix_expressions_df_str, +) logger = logging.getLogger(__name__) -def compute_columns(state, df, model_settings, locals_dict={}, trace_label=None): +def compute_columns( + state: workflow.State, + df: pd.DataFrame, + model_settings: str | dict | PydanticBase, + locals_dict: dict | None = None, + trace_label: str = None, +) -> pd.DataFrame: """ Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals @@ -25,7 +38,7 @@ def compute_columns(state, df, model_settings, locals_dict={}, trace_label=None) TABLES - list of pipeline tables to load and make available as (read only) locals str: name of yaml file in configs_dir to load dict from - locals_dict : dict + locals_dict : dict, optional dict of locals (e.g. utility functions) to add to the execution environment trace_label @@ -35,6 +48,11 @@ def compute_columns(state, df, model_settings, locals_dict={}, trace_label=None) one column for each expression (except temps with ALL_CAP target names) same index as df """ + if locals_dict is None: + locals_dict = {} + + if isinstance(model_settings, PydanticBase): + model_settings = model_settings.dict() if isinstance(model_settings, str): model_settings_name = model_settings @@ -49,7 +67,7 @@ def compute_columns(state, df, model_settings, locals_dict={}, trace_label=None) assert "DF" in model_settings, "Expected to find 'DF' in %s" % model_settings_name df_name = model_settings.get("DF") - helper_table_names = model_settings.get("TABLES", []) + helper_table_names = model_settings.get("TABLES") or [] expressions_spec_name = model_settings.get("SPEC", None) # Extract suffix for disaggregate accessibilities. @@ -162,28 +180,34 @@ def assign_columns( def annotate_preprocessors( - state: workflow.State, df, locals_dict, skims, model_settings, trace_label + state: workflow.State, + df: pd.DataFrame, + locals_dict, + skims, + model_settings: PydanticBase | dict, + trace_label: str, ): - locals_d = {} locals_d.update(locals_dict) locals_d.update(skims) - preprocessor_settings = model_settings.get("preprocessor", []) + try: + preprocessor_settings = model_settings.preprocessor + except AttributeError: + preprocessor_settings = model_settings.get("preprocessor", []) if preprocessor_settings is None: preprocessor_settings = [] if not isinstance(preprocessor_settings, list): - assert isinstance(preprocessor_settings, dict) + assert isinstance(preprocessor_settings, dict | PreprocessorSettings) preprocessor_settings = [preprocessor_settings] simulate.set_skim_wrapper_targets(df, skims) - for model_settings in preprocessor_settings: - + for preproc_settings in preprocessor_settings: results = compute_columns( state, df=df, - model_settings=model_settings, + model_settings=preproc_settings, locals_dict=locals_d, trace_label=trace_label, ) @@ -192,7 +216,6 @@ def annotate_preprocessors( def filter_chooser_columns(choosers, chooser_columns): - missing_columns = [c for c in chooser_columns if c not in choosers] if missing_columns: logger.debug("filter_chooser_columns missing_columns %s" % missing_columns) diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index b2cdcd0b5..80dc6fb00 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -15,6 +15,8 @@ tracing, workflow, ) +from activitysim.core.skim_dataset import DatasetWrapper +from activitysim.core.skim_dictionary import SkimWrapper logger = logging.getLogger(__name__) @@ -171,6 +173,11 @@ def _interaction_sample( the index is same as choosers and the series value is the alternative df index of chosen alternative + zone_layer : {'taz', 'maz'}, default 'taz' + Specify which zone layer of the skims is to be used. You cannot use the + 'maz' zone layer in a one-zone model, but you can use the 'taz' layer in + a two- or three-zone model (e.g. for destination pre-sampling). + Returns ------- choices_df : pandas.DataFrame @@ -225,7 +232,6 @@ def _interaction_sample( interaction_utilities = None interaction_utilities_sh = None if sharrow_enabled: - ( interaction_utilities, trace_eval_results, @@ -501,22 +507,21 @@ def _interaction_sample( def interaction_sample( - state, - choosers, - alternatives, - spec, - sample_size, - alt_col_name, - allow_zero_probs=False, - log_alt_losers=False, - skims=None, + state: workflow.State, + choosers: pd.DataFrame, + alternatives: pd.DataFrame, + spec: pd.DataFrame, + sample_size: int, + alt_col_name: str, + allow_zero_probs: bool = False, + log_alt_losers: bool = False, + skims: SkimWrapper | DatasetWrapper | None = None, locals_d=None, - chunk_size=0, - chunk_tag=None, - trace_label=None, - zone_layer=None, + chunk_size: int = 0, + chunk_tag: str | None = None, + trace_label: str | None = None, + zone_layer: str | None = None, ): - """ Run a simulation in the situation in which alternatives must be merged with choosers because there are interaction terms or @@ -526,6 +531,7 @@ def interaction_sample( Parameters ---------- + state : State choosers : pandas.DataFrame DataFrame of choosers alternatives : pandas.DataFrame @@ -540,7 +546,7 @@ def interaction_sample( which does not sample alternatives. alt_col_name: str name to give the sampled_alternative column - skims : Skims object + skims : SkimWrapper or DatasetWrapper or None The skims object is used to contain multiple matrices of origin-destination impedances. Make sure to also add it to the locals_d below in order to access it in expressions. The *only* job @@ -556,6 +562,10 @@ def interaction_sample( trace_label: str This is the label to be used for trace log file entries and dump file names when household tracing enabled. No tracing occurs if label is empty or None. + zone_layer : {'taz', 'maz'}, default 'taz' + Specify which zone layer of the skims is to be used. You cannot use the + 'maz' zone layer in a one-zone model, but you can use the 'taz' layer in + a two- or three-zone model (e.g. for destination pre-sampling). Returns ------- @@ -579,7 +589,8 @@ def interaction_sample( # we return alternatives ordered in (index, alt_col_name) # if choosers index is not ordered, it is probably a mistake, since the alts wont line up assert alt_col_name is not None - assert choosers.index.is_monotonic_increasing + if not choosers.index.is_monotonic_increasing: + assert choosers.index.is_monotonic_increasing # FIXME - legacy logic - not sure this is needed or even correct? sample_size = min(sample_size, len(alternatives.index)) @@ -591,7 +602,6 @@ def interaction_sample( chunk_trace_label, chunk_sizer, ) in chunk.adaptive_chunked_choosers(state, choosers, trace_label, chunk_tag): - choices = _interaction_sample( state, chooser_chunk, diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 780bd53e7..3da309369 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -65,6 +65,11 @@ def eval_interaction_utilities( yielding a dataframe with len(interaction_df) rows and one utility column having the same index as interaction_df (non-unique values from alternatives df) + zone_layer : {'taz', 'maz'}, default 'taz' + Specify which zone layer of the skims is to be used. You cannot use the + 'maz' zone layer in a one-zone model, but you can use the 'taz' layer in + a two- or three-zone model (e.g. for destination pre-sampling). + Returns ------- utilities : pandas.DataFrame diff --git a/activitysim/core/logit.py b/activitysim/core/logit.py index a20701a15..053c46e4a 100644 --- a/activitysim/core/logit.py +++ b/activitysim/core/logit.py @@ -4,13 +4,13 @@ import logging import warnings -from builtins import object import numpy as np import pandas as pd from activitysim.core import tracing, workflow from activitysim.core.choosing import choice_maker +from activitysim.core.configuration.logit import LogitNestSpec logger = logging.getLogger(__name__) @@ -412,7 +412,7 @@ def interaction_dataset( return alts_sample -class Nest(object): +class Nest: """ Data for a nest-logit node or leaf @@ -457,15 +457,14 @@ def nest_types(cls): return ["leaf", "node"] -def validate_nest_spec(nest_spec, trace_label): +def validate_nest_spec(nest_spec: dict | LogitNestSpec, trace_label: str): keys = [] duplicates = [] for nest in each_nest(nest_spec): if nest.name in keys: logger.error( - "validate_nest_spec:duplicate nest key '%s' in nest spec - %s" - % (nest.name, trace_label) + f"validate_nest_spec:duplicate nest key '{nest.name}' in nest spec - {trace_label}" ) duplicates.append(nest.name) @@ -474,12 +473,11 @@ def validate_nest_spec(nest_spec, trace_label): if duplicates: raise RuntimeError( - "validate_nest_spec:duplicate nest key/s '%s' in nest spec - %s" - % (duplicates, trace_label) + f"validate_nest_spec:duplicate nest key/s '{duplicates}' in nest spec - {trace_label}" ) -def _each_nest(spec, parent_nest, post_order): +def _each_nest(spec: LogitNestSpec, parent_nest, post_order): """ Iterate over each nest or leaf node in the tree (of subtree) @@ -487,7 +485,7 @@ def _each_nest(spec, parent_nest, post_order): Parameters ---------- - spec : dict + spec : LogitNestSpec Nest spec dict tree (or subtree when recursing) from the model spec yaml file parent_nest : Nest nest of parent node (passed to accumulate level, ancestors, and product_of_coefficients) @@ -497,7 +495,7 @@ def _each_nest(spec, parent_nest, post_order): Yields ------ - spec_node : dict + spec_node : LogitNestSpec Nest tree spec dict for this node subtree nest : Nest Nest object with info about the current node (nest or leaf) @@ -506,18 +504,20 @@ def _each_nest(spec, parent_nest, post_order): level = parent_nest.level + 1 - if isinstance(spec, dict): - name = spec["name"] - coefficient = spec["coefficient"] + if isinstance(spec, LogitNestSpec): + name = spec.name + coefficient = spec.coefficient assert isinstance( - coefficient, (int, float) - ), "Coefficient '%s' (%s) not a number" % ( - name, - coefficient, - ) # forgot to eval coefficient? - alternatives = [ - a["name"] if isinstance(a, dict) else a for a in spec["alternatives"] - ] + coefficient, int | float + ), f"Coefficient '{name}' ({coefficient}) not a number" # forgot to eval coefficient? + alternatives = [] + for a in spec.alternatives: + if isinstance(a, dict): + alternatives.append(a["name"]) + elif isinstance(a, LogitNestSpec): + alternatives.append(a.name) + else: + alternatives.append(a) nest = Nest(name=name) nest.level = parent_nest.level + 1 @@ -530,7 +530,7 @@ def _each_nest(spec, parent_nest, post_order): yield spec, nest # recursively iterate the list of alternatives - for alternative in spec["alternatives"]: + for alternative in spec.alternatives: for sub_node, sub_nest in _each_nest(alternative, nest, post_order): yield sub_node, sub_nest @@ -548,13 +548,13 @@ def _each_nest(spec, parent_nest, post_order): yield spec, nest -def each_nest(nest_spec, type=None, post_order=False): +def each_nest(nest_spec: dict | LogitNestSpec, type=None, post_order=False): """ Iterate over each nest or leaf node in the tree (of subtree) Parameters ---------- - nest_spec : dict + nest_spec : dict or LogitNestSpec Nest tree dict from the model spec yaml file type : str Nest class type to yield @@ -573,7 +573,10 @@ def each_nest(nest_spec, type=None, post_order=False): if type is not None and type not in Nest.nest_types(): raise RuntimeError("Unknown nest type '%s' in call to each_nest" % type) - for node, nest in _each_nest(nest_spec, parent_nest=Nest(), post_order=post_order): + if isinstance(nest_spec, dict): + nest_spec = LogitNestSpec.parse_obj(nest_spec) + + for _node, nest in _each_nest(nest_spec, parent_nest=Nest(), post_order=post_order): if type is None or (type == nest.type): yield nest diff --git a/activitysim/core/los.py b/activitysim/core/los.py index 7089466d7..8586a3018 100644 --- a/activitysim/core/los.py +++ b/activitysim/core/los.py @@ -187,19 +187,6 @@ def load_settings(self): # validate skim_time_periods self.skim_time_periods = self.state.network_settings.skim_time_periods - if "hours" in self.skim_time_periods: - self.skim_time_periods["periods"] = self.skim_time_periods.pop("hours") - warnings.warn( - "support for `skim_time_periods` key `hours` will be removed in " - "future verions. Use `periods` instead", - FutureWarning, - ) - assert ( - "periods" in self.skim_time_periods - ), "'periods' key not found in network_los.skim_time_periods" - assert ( - "labels" in self.skim_time_periods - ), "'labels' key not found in network_los.skim_time_periods" self.zone_system = self.setting("zone_system") assert self.zone_system in [ @@ -221,7 +208,6 @@ def load_settings(self): # validate skim_time_periods self.skim_time_periods = self.setting("skim_time_periods") - assert {"periods", "labels"}.issubset(set(self.skim_time_periods.keys())) def load_skim_info(self): """ @@ -724,7 +710,7 @@ def get_skim_dict(self, skim_tag): Returns ------- - SkimDict or subclass (e.g. MazSkimDict) + SkimDict or subclass (e.g. MazSkimDict) or SkimDataset """ sharrow_enabled = self.sharrow_enabled if sharrow_enabled and skim_tag in ("taz", "maz"): @@ -873,10 +859,10 @@ def skim_time_period_label( ), "'skim_time_periods' setting not found." # Default to 60 minute time periods - period_minutes = self.skim_time_periods.get("period_minutes", 60) + period_minutes = self.skim_time_periods.period_minutes # Default to a day - model_time_window_min = self.skim_time_periods.get("time_window", 1440) + model_time_window_min = self.skim_time_periods.time_window # Check to make sure the intervals result in no remainder time through 24 hour day assert 0 == model_time_window_min % period_minutes @@ -895,16 +881,16 @@ def skim_time_period_label( bin = ( np.digitize( [time_period % total_periods], - self.skim_time_periods["periods"], + self.skim_time_periods.periods, right=True, )[0] - 1 ) if fillna is not None: - default = self.skim_time_periods["labels"][fillna] - result = self.skim_time_periods["labels"].get(bin, default=default) + default = self.skim_time_periods.labels[fillna] + result = self.skim_time_periods.labels.get(bin, default=default) else: - result = self.skim_time_periods["labels"][bin] + result = self.skim_time_periods.labels[bin] if broadcast_to is not None: result = pd.Series( data=result, @@ -914,12 +900,12 @@ def skim_time_period_label( else: result = pd.cut( time_period, - self.skim_time_periods["periods"], - labels=self.skim_time_periods["labels"], + self.skim_time_periods.periods, + labels=self.skim_time_periods.labels, ordered=False, ) if fillna is not None: - default = self.skim_time_periods["labels"][fillna] + default = self.skim_time_periods.labels[fillna] result = result.fillna(default) if as_cat: result = result.astype(time_label_dtype) diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py index ad72dc468..aff2c53e3 100644 --- a/activitysim/core/simulate.py +++ b/activitysim/core/simulate.py @@ -8,6 +8,8 @@ from collections import OrderedDict from collections.abc import Callable from datetime import timedelta +from pathlib import Path +from typing import Any import numpy as np import pandas as pd @@ -23,6 +25,12 @@ util, workflow, ) +from activitysim.core.configuration.base import PydanticBase +from activitysim.core.configuration.logit import ( + BaseLogitComponentSettings, + LogitNestSpec, + TemplatedLogitComponentSettings, +) from activitysim.core.estimation import Estimator from activitysim.core.simulate_consts import ( ALT_LOSER_UTIL, @@ -72,7 +80,7 @@ def read_model_alts(state: workflow.State, file_name, set_index=None): return df -def read_model_spec(filesystem: configuration.FileSystem, file_name: str): +def read_model_spec(filesystem: configuration.FileSystem, file_name: Path | str): """ Read a CSV model specification into a Pandas DataFrame or Series. @@ -103,7 +111,8 @@ def read_model_spec(filesystem: configuration.FileSystem, file_name: str): The description column is dropped from the returned data and the expression values are set as the table index. """ - + if isinstance(file_name, Path): + file_name = str(file_name) assert isinstance(file_name, str) if not file_name.lower().endswith(".csv"): file_name = f"{file_name}.csv" @@ -137,8 +146,10 @@ def read_model_spec(filesystem: configuration.FileSystem, file_name: str): def read_model_coefficients( - filesystem: configuration.FileSystem, model_settings=None, file_name=None -): + filesystem: configuration.FileSystem, + model_settings: BaseLogitComponentSettings | dict[str, Any] | None = None, + file_name: Path | str | None = None, +) -> pd.DataFrame: """ Read the coefficient file specified by COEFFICIENTS model setting """ @@ -148,12 +159,18 @@ def read_model_coefficients( assert file_name is not None else: assert file_name is None - assert ( - "COEFFICIENTS" in model_settings - ), "'COEFFICIENTS' tag not in model_settings in %s" % model_settings.get( - "source_file_paths" - ) - file_name = model_settings["COEFFICIENTS"] + if isinstance(model_settings, BaseLogitComponentSettings) or ( + isinstance(model_settings, PydanticBase) + and hasattr(model_settings, "COEFFICIENTS") + ): + file_name = model_settings.COEFFICIENTS + else: + assert ( + "COEFFICIENTS" in model_settings + ), "'COEFFICIENTS' tag not in model_settings in %s" % model_settings.get( + "source_file_paths" + ) + file_name = model_settings["COEFFICIENTS"] logger.debug(f"read_model_coefficients file_name {file_name}") file_path = filesystem.get_config_file_path(file_name) @@ -179,13 +196,15 @@ def read_model_coefficients( return coefficients -@workflow.func def spec_for_segment( state: workflow.State, - model_settings, + model_settings: dict | None, spec_id: str, segment_name: str, estimator: Estimator | None, + *, + spec_file_name: Path | None = None, + coefficients_file_name: Path | None = None, ) -> pd.DataFrame: """ Select spec for specified segment from omnibus spec containing columns for each segment @@ -203,7 +222,8 @@ def spec_for_segment( canonical spec file with expressions in index and single column with utility coefficients """ - spec_file_name = model_settings[spec_id] + if spec_file_name is None: + spec_file_name = model_settings[spec_id] spec = read_model_spec(state.filesystem, file_name=spec_file_name) if len(spec.columns) > 1: @@ -214,7 +234,14 @@ def spec_for_segment( # doesn't really matter what it is called, but this may catch errors assert spec.columns[0] in ["coefficient", segment_name] - if "COEFFICIENTS" not in model_settings: + if ( + coefficients_file_name is None + and isinstance(model_settings, dict) + and "COEFFICIENTS" in model_settings + ): + coefficients_file_name = model_settings["COEFFICIENTS"] + + if coefficients_file_name is None: logger.warning( f"no coefficient file specified in model_settings for {spec_file_name}" ) @@ -224,11 +251,13 @@ def spec_for_segment( raise RuntimeError( f"No coefficient file specified for {spec_file_name} " f"but not all spec column values are numeric" - ) + ) from None return spec - coefficients = state.filesystem.read_model_coefficients(model_settings) + coefficients = read_model_coefficients( + state.filesystem, file_name=coefficients_file_name + ) spec = eval_coefficients(state, spec, coefficients, estimator) @@ -236,19 +265,22 @@ def spec_for_segment( def read_model_coefficient_template( - filesystem: configuration.FileSystem, model_settings + filesystem: configuration.FileSystem, + model_settings: dict | TemplatedLogitComponentSettings, ): """ Read the coefficient template specified by COEFFICIENT_TEMPLATE model setting """ - assert ( - "COEFFICIENT_TEMPLATE" in model_settings - ), "'COEFFICIENT_TEMPLATE' not in model_settings in %s" % model_settings.get( - "source_file_paths" - ) - - coefficients_file_name = model_settings["COEFFICIENT_TEMPLATE"] + if isinstance(model_settings, dict): + assert ( + "COEFFICIENT_TEMPLATE" in model_settings + ), "'COEFFICIENT_TEMPLATE' not in model_settings in %s" % model_settings.get( + "source_file_paths" + ) + coefficients_file_name = model_settings["COEFFICIENT_TEMPLATE"] + else: + coefficients_file_name = model_settings.COEFFICIENT_TEMPLATE file_path = filesystem.get_config_file_path(coefficients_file_name) try: @@ -300,7 +332,9 @@ def dump_mapped_coefficients(state: workflow.State, model_settings): def get_segment_coefficients( - filesystem: configuration.FileSystem, model_settings, segment_name + filesystem: configuration.FileSystem, + model_settings: PydanticBase | dict, + segment_name: str, ): """ Return a dict mapping generic coefficient names to segment-specific coefficient values @@ -332,10 +366,19 @@ def get_segment_coefficients( ... """ - - if "COEFFICIENTS" in model_settings and "COEFFICIENT_TEMPLATE" in model_settings: + if isinstance(model_settings, PydanticBase): + model_settings = model_settings.dict() + + if ( + "COEFFICIENTS" in model_settings + and "COEFFICIENT_TEMPLATE" in model_settings + and model_settings["COEFFICIENTS"] is not None + and model_settings["COEFFICIENT_TEMPLATE"] is not None + ): legacy = False - elif "COEFFICIENTS" in model_settings: + elif ( + "COEFFICIENTS" in model_settings and model_settings["COEFFICIENTS"] is not None + ): legacy = "COEFFICIENTS" warnings.warn( "Support for COEFFICIENTS without COEFFICIENT_TEMPLATE in model settings file will be removed." @@ -360,9 +403,17 @@ def get_segment_coefficients( omnibus_coefficients = pd.read_csv( legacy_coeffs_file_path, comment="#", index_col="coefficient_name" ) + try: + omnibus_coefficients_segment_name = omnibus_coefficients[segment_name] + except KeyError: + logger.error(f"No key {segment_name} found!") + possible_keys = "\n- ".join(omnibus_coefficients.keys()) + logger.error(f"possible keys include: \n- {possible_keys}") + raise coefficients_dict = assign.evaluate_constants( - omnibus_coefficients[segment_name], constants=constants + omnibus_coefficients_segment_name, constants=constants ) + else: coefficients_df = filesystem.read_model_coefficients(model_settings) template_df = read_model_coefficient_template(filesystem, model_settings) @@ -383,26 +434,41 @@ def get_segment_coefficients( return coefficients_dict -def eval_nest_coefficients(nest_spec, coefficients, trace_label): - def replace_coefficients(nest): +def eval_nest_coefficients( + nest_spec: LogitNestSpec | dict, coefficients: dict, trace_label: str +) -> LogitNestSpec: + def replace_coefficients(nest: LogitNestSpec): if isinstance(nest, dict): assert "coefficient" in nest coefficient_name = nest["coefficient"] if isinstance(coefficient_name, str): assert ( coefficient_name in coefficients - ), "%s not in nest coefficients" % (coefficient_name,) + ), f"{coefficient_name} not in nest coefficients" nest["coefficient"] = coefficients[coefficient_name] assert "alternatives" in nest for alternative in nest["alternatives"]: - if isinstance(alternative, dict): + if isinstance(alternative, dict | LogitNestSpec): + replace_coefficients(alternative) + elif isinstance(nest, LogitNestSpec): + if isinstance(nest.coefficient, str): + assert ( + nest.coefficient in coefficients + ), f"{nest.coefficient} not in nest coefficients" + nest.coefficient = coefficients[nest.coefficient] + + for alternative in nest.alternatives: + if isinstance(alternative, dict | LogitNestSpec): replace_coefficients(alternative) if isinstance(coefficients, pd.DataFrame): assert "value" in coefficients.columns coefficients = coefficients["value"].to_dict() + if not isinstance(nest_spec, LogitNestSpec): + nest_spec = LogitNestSpec.parse_obj(nest_spec) + replace_coefficients(nest_spec) logit.validate_nest_spec(nest_spec, trace_label) @@ -443,9 +509,9 @@ def eval_coefficients( zero_rows = (spec == 0).all(axis=1) if zero_rows.any(): if estimator: - logger.debug("keeping %s all-zero rows in SPEC" % (zero_rows.sum(),)) + logger.debug(f"keeping {zero_rows.sum()} all-zero rows in SPEC") else: - logger.debug("dropping %s all-zero rows from SPEC" % (zero_rows.sum(),)) + logger.debug(f"dropping {zero_rows.sum()} all-zero rows from SPEC") spec = spec.loc[~zero_rows] return spec @@ -723,7 +789,7 @@ def eval_utilities( misses = np.where(~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=0)) _sh_util_miss1 = sh_util[tuple(m[0] for m in misses)] _u_miss1 = utilities.values[tuple(m[0] for m in misses)] - diff = _sh_util_miss1 - _u_miss1 + _sh_util_miss1 - _u_miss1 if len(misses[0]) > sh_util.size * 0.01: print( f"big problem: {len(misses[0])} missed close values " @@ -1530,7 +1596,7 @@ def simple_simulate( result_list = [] # segment by person type and pick the right spec for each person type for ( - i, + _i, chooser_chunk, chunk_trace_label, chunk_sizer, @@ -1584,7 +1650,7 @@ def simple_simulate_by_chunk_id( choices = None result_list = [] for ( - i, + _i, chooser_chunk, chunk_trace_label, chunk_sizer, @@ -1902,7 +1968,7 @@ def simple_simulate_logsums( result_list = [] # segment by person type and pick the right spec for each person type for ( - i, + _i, chooser_chunk, chunk_trace_label, chunk_sizer, diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index e6528f1ea..8421cb6c7 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -471,7 +471,7 @@ def _use_existing_backing_if_valid(backing, omx_file_paths, skim_tag): def _dedupe_time_periods(network_los_preload): - raw_time_periods = network_los_preload.los_settings.skim_time_periods["labels"] + raw_time_periods = network_los_preload.los_settings.skim_time_periods.labels # deduplicate time period names time_periods = [] for t in raw_time_periods: diff --git a/activitysim/core/skim_dict_factory.py b/activitysim/core/skim_dict_factory.py index b78401a88..eb0195749 100644 --- a/activitysim/core/skim_dict_factory.py +++ b/activitysim/core/skim_dict_factory.py @@ -109,7 +109,7 @@ def load_skim_info(self, state, skim_tag): # ignore any 3D skims not in skim_time_periods # specifically, load all skims except those with key2 not in dim3_tags_to_load skim_time_periods = self.network_los.skim_time_periods - dim3_tags_to_load = skim_time_periods and skim_time_periods["labels"] + dim3_tags_to_load = skim_time_periods and skim_time_periods.labels self.omx_manifest = {} # dict mapping { omx_key: skim_name } diff --git a/activitysim/core/skim_dictionary.py b/activitysim/core/skim_dictionary.py index e2d621461..ad79b5294 100644 --- a/activitysim/core/skim_dictionary.py +++ b/activitysim/core/skim_dictionary.py @@ -669,9 +669,12 @@ def __init__(self, state: workflow.State, skim_tag, network_los, taz_skim_dict): self.dtype = np.dtype(self.skim_info.dtype_name) self.base_keys = taz_skim_dict.skim_info.base_keys - self.sparse_keys = list( - set(network_los.maz_to_maz_df.columns) - {"OMAZ", "DMAZ"} - ) + if network_los.maz_to_maz_df is not None: + self.sparse_keys = list( + set(network_los.maz_to_maz_df.columns) - {"OMAZ", "DMAZ"} + ) + else: + self.sparse_keys = [] self.sparse_key_usage = set() def _offset_mapper(self, state): diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py index 325fd2bbb..306f2cb44 100644 --- a/activitysim/core/steps/output.py +++ b/activitysim/core/steps/output.py @@ -334,6 +334,11 @@ def write_tables(state: workflow.State) -> None: logger.debug( f"write_tables sorting {table_name} on columns {sort_columns}" ) + elif dt_index_name is not None: + logger.debug( + f"write_tables sorting {table_name} on unrecognized index {dt_index_name}" + ) + dt = dt.sort_by(dt_index_name) else: logger.debug( f"write_tables sorting {table_name} on unrecognized index {dt_index_name}" diff --git a/activitysim/core/util.py b/activitysim/core/util.py index e56460f43..ce2439a9b 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -7,10 +7,10 @@ import itertools import logging import os -from builtins import zip from collections.abc import Iterable from operator import itemgetter from pathlib import Path +from typing import TypeVar import cytoolz as tz import cytoolz.curried @@ -20,6 +20,7 @@ import pyarrow.csv as csv import pyarrow.parquet as pq import yaml +from pydantic import BaseModel logger = logging.getLogger(__name__) @@ -424,13 +425,26 @@ def recursive_replace(obj, search, replace): return obj +T = TypeVar("T") + + def suffix_tables_in_settings( - model_settings, - suffix="proto_", - tables=["persons", "households", "tours", "persons_merged"], -): + model_settings: T, + suffix: str = "proto_", + tables: Iterable[str] = ("persons", "households", "tours", "persons_merged"), +) -> T: + if not isinstance(model_settings, dict): + model_settings_type = type(model_settings) + model_settings = model_settings.dict() + else: + model_settings_type = None + for k in tables: model_settings = recursive_replace(model_settings, k, suffix + k) + + if model_settings_type is not None: + model_settings = model_settings_type.parse_obj(model_settings) + return model_settings @@ -453,6 +467,10 @@ def parse_suffix_args(args): def concat_suffix_dict(args): + if isinstance(args, BaseModel): + args = args.dict() + if "source_file_paths" in args: + del args["source_file_paths"] if isinstance(args, dict): args = sum([["--" + k, v] for k, v in args.items()], []) if isinstance(args, list): diff --git a/activitysim/core/workflow/state.py b/activitysim/core/workflow/state.py index 22bbbf045..77c07180f 100644 --- a/activitysim/core/workflow/state.py +++ b/activitysim/core/workflow/state.py @@ -316,6 +316,8 @@ def make_default( self.default_settings() if settings: for k, v in settings.items(): + if k not in self.settings.__fields__: + raise KeyError(f"no field {k!r} in {type(self.settings)}") setattr(self.settings, k, v) return self @@ -482,9 +484,10 @@ def load_settings(self) -> State: if self.filesystem.cache_dir != cache_dir: logger.warning(f"settings file changes cache_dir to {cache_dir}") self.filesystem.cache_dir = cache_dir - self.settings: Settings = Settings.parse_obj(raw_settings) + settings_class = self.__class__.settings.member_type + self.settings: Settings = settings_class.parse_obj(raw_settings) - extra_settings = set(self.settings.__dict__) - set(Settings.__fields__) + extra_settings = set(self.settings.__dict__) - set(settings_class.__fields__) if extra_settings: warnings.warn( diff --git a/activitysim/core/workflow/steps.py b/activitysim/core/workflow/steps.py index 39d038a95..6a8884225 100644 --- a/activitysim/core/workflow/steps.py +++ b/activitysim/core/workflow/steps.py @@ -480,7 +480,7 @@ def _validate_workflow_function(f): argspec = getfullargspec(f) if argspec.args[0] != "state": raise SyntaxError("workflow.func must have `state` as the first argument") - if annot.get("state") is not workflow.State: + if not issubclass(annot.get("state"), workflow.State): raise SyntaxError( "workflow.func must have `State` as the first argument annotation" ) diff --git a/activitysim/core/yaml_tools.py b/activitysim/core/yaml_tools.py new file mode 100644 index 000000000..f86d1fe29 --- /dev/null +++ b/activitysim/core/yaml_tools.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from pathlib import Path + +from yaml import Dumper, SafeDumper, dump_all + + +def _Path(dumper: Dumper, data: Path): + """Dump a Path as a string.""" + return dumper.represent_str(str(data)) + + +SafeDumper.add_multi_representer(Path, _Path) + + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py index 74a426e71..fd61aea3d 100644 --- a/activitysim/estimation/larch/location_choice.py +++ b/activitysim/estimation/larch/location_choice.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import os from pathlib import Path from typing import Collection @@ -97,7 +99,9 @@ def _read_csv(filename, **kwargs): if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} - SIZE_TERM_SELECTOR = settings.get("SIZE_TERM_SELECTOR", model_selector) + SIZE_TERM_SELECTOR = ( + settings.get("SIZE_TERM_SELECTOR", model_selector) or model_selector + ) # filter size spec for this location choice only size_spec = ( diff --git a/activitysim/examples/placeholder_psrc/configs/trip_destination.yaml b/activitysim/examples/placeholder_psrc/configs/trip_destination.yaml index 4165986d7..055ce5b82 100644 --- a/activitysim/examples/placeholder_psrc/configs/trip_destination.yaml +++ b/activitysim/examples/placeholder_psrc/configs/trip_destination.yaml @@ -1,7 +1,7 @@ SAMPLE_SIZE: 30 -DESTINATION_SAMPLE_SPEC: trip_destination_sample.csv -DESTINATION_SPEC: trip_destination.csv +SAMPLE_SPEC: trip_destination_sample.csv +SPEC: trip_destination.csv COEFFICIENTS: _dummy_coefficients.csv LOGSUM_SETTINGS: trip_mode_choice.yaml diff --git a/activitysim/examples/placeholder_sandag/configs_2_zone/settings.yaml b/activitysim/examples/placeholder_sandag/configs_2_zone/settings.yaml index 2d341f552..b570a8253 100644 --- a/activitysim/examples/placeholder_sandag/configs_2_zone/settings.yaml +++ b/activitysim/examples/placeholder_sandag/configs_2_zone/settings.yaml @@ -189,3 +189,5 @@ household_median_value_of_time: 2: 8.81 3: 10.44 4: 12.86 + +recode_pipeline_columns: True diff --git a/activitysim/examples/placeholder_sandag/configs_2_zone/settings_mp.yaml b/activitysim/examples/placeholder_sandag/configs_2_zone/settings_mp.yaml index 212a1097a..9841f2a31 100644 --- a/activitysim/examples/placeholder_sandag/configs_2_zone/settings_mp.yaml +++ b/activitysim/examples/placeholder_sandag/configs_2_zone/settings_mp.yaml @@ -40,9 +40,27 @@ output_tables: tables: - checkpoints - accessibility - - land_use - - households - - persons - - tours - - trips - + - tablename: land_use + decode_columns: + zone_id: land_use.zone_id + - tablename: households + decode_columns: + home_zone_id: land_use.zone_id + - tablename: persons + decode_columns: + home_zone_id: land_use.zone_id + school_zone_id: nonnegative | land_use.zone_id + workplace_zone_id: nonnegative | land_use.zone_id + - tablename: tours + decode_columns: + origin: land_use.zone_id + destination: land_use.zone_id + - tablename: trips + decode_columns: + origin: land_use.zone_id + destination: land_use.zone_id + - joint_tour_participants + - tablename: proto_disaggregate_accessibility + decode_columns: + home_zone_id: land_use.zone_id + TAZ: land_use_taz.TAZ diff --git a/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings.yaml b/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings.yaml index 27a426684..498f257cf 100644 --- a/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings.yaml +++ b/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings.yaml @@ -65,9 +65,22 @@ output_tables: prefix: final_2_zone_ sort: True tables: - - tours - - trips - - vehicles - - proto_disaggregate_accessibility - -recode_pipeline_columns: False + - tablename: tours + decode_columns: + origin: land_use.zone_id + destination: land_use.zone_id + - tablename: trips + decode_columns: + origin: land_use.zone_id + destination: land_use.zone_id + - tablename: proto_disaggregate_accessibility + decode_columns: + home_zone_id: land_use.zone_id + TAZ: land_use_taz.TAZ + - tablename: land_use + decode_columns: + zone_id: land_use.zone_id + - tablename: accessibility + decode_columns: + home_zone_id: land_use.zone_id + TAZ: land_use_taz.TAZ diff --git a/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings_mp.yaml b/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings_mp.yaml index d965a26ec..85644fec5 100644 --- a/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings_mp.yaml +++ b/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings_mp.yaml @@ -16,7 +16,7 @@ fail_fast: True models: ## Disaggregate Accessibility steps # only including for 2 zone sandag test - - initialize_proto_population + - initialize_proto_population - compute_disaggregate_accessibility ### mp_initialize step - initialize_landuse @@ -84,7 +84,27 @@ output_tables: prefix: final_2_zone_ sort: True tables: - - tours - - trips - - vehicles - - proto_disaggregate_accessibility \ No newline at end of file + - tablename: land_use + decode_columns: + zone_id: land_use.zone_id + - tablename: households + decode_columns: + home_zone_id: land_use.zone_id + - tablename: persons + decode_columns: + home_zone_id: land_use.zone_id + school_zone_id: nonnegative | land_use.zone_id + workplace_zone_id: nonnegative | land_use.zone_id + - tablename: tours + decode_columns: + origin: land_use.zone_id + destination: land_use.zone_id + - tablename: trips + decode_columns: + origin: land_use.zone_id + destination: land_use.zone_id + - joint_tour_participants + - tablename: proto_disaggregate_accessibility + decode_columns: + home_zone_id: land_use.zone_id + TAZ: land_use_taz.TAZ diff --git a/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings_no_recode.yaml b/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings_no_recode.yaml new file mode 100644 index 000000000..45648afcc --- /dev/null +++ b/activitysim/examples/placeholder_sandag/test/configs_2_zone/settings_no_recode.yaml @@ -0,0 +1,2 @@ +inherit_settings: settings.yaml +recode_pipeline_columns: False diff --git a/activitysim/examples/placeholder_sandag/test/test_sandag.py b/activitysim/examples/placeholder_sandag/test/test_sandag.py index d354ca191..32e18ff1c 100644 --- a/activitysim/examples/placeholder_sandag/test/test_sandag.py +++ b/activitysim/examples/placeholder_sandag/test/test_sandag.py @@ -42,7 +42,7 @@ def data(): build_data() -def run_test(zone, multiprocess=False, sharrow=False): +def run_test(zone, multiprocess=False, sharrow=False, recode=True): def test_path(dirname): return os.path.join(os.path.dirname(__file__), dirname) @@ -136,6 +136,8 @@ def regress(zone): if multiprocess: run_args = run_args + ["-s", "settings_mp.yaml"] + elif not recode: + run_args = run_args + ["-s", "settings_no_recode.yaml"] if sharrow: run_args = ["-c", test_path(f"configs_{zone}_sharrow")] + run_args @@ -194,6 +196,10 @@ def test_2_zone(data): run_test(zone="2", multiprocess=False) +def test_2_zone_norecode(data): + run_test(zone="2", multiprocess=False, recode=False) + + def test_2_zone_mp(data): run_test(zone="2", multiprocess=True) diff --git a/activitysim/examples/production_semcog/configs/logging.yaml b/activitysim/examples/production_semcog/configs/logging.yaml index 4935421f4..9e7b6142a 100644 --- a/activitysim/examples/production_semcog/configs/logging.yaml +++ b/activitysim/examples/production_semcog/configs/logging.yaml @@ -15,7 +15,7 @@ logging: loggers: activitysim: - level: DEBUG + level: INFO handlers: [console, logfile] propagate: false diff --git a/activitysim/examples/production_semcog/configs/non_mandatory_tour_destination.yaml b/activitysim/examples/production_semcog/configs/non_mandatory_tour_destination.yaml index 7f2ff7633..ca005185d 100644 --- a/activitysim/examples/production_semcog/configs/non_mandatory_tour_destination.yaml +++ b/activitysim/examples/production_semcog/configs/non_mandatory_tour_destination.yaml @@ -54,4 +54,4 @@ CONSTANTS: WORK_LOW_SEGMENT_ID: 1 WORK_MED_SEGMENT_ID: 2 WORK_HIGH_SEGMENT_ID: 3 - WORK_VERYHIGH_SEGMENT_ID: 4 \ No newline at end of file + WORK_VERYHIGH_SEGMENT_ID: 4 diff --git a/activitysim/examples/production_semcog/configs/trip_destination.yaml b/activitysim/examples/production_semcog/configs/trip_destination.yaml index 6cf3aeb63..a80ea1d7e 100644 --- a/activitysim/examples/production_semcog/configs/trip_destination.yaml +++ b/activitysim/examples/production_semcog/configs/trip_destination.yaml @@ -4,9 +4,6 @@ COEFFICIENTS: trip_destination_coefficients.csv SAMPLE_SIZE: 30 -DESTINATION_SAMPLE_SPEC: trip_destination_sample.csv -DESTINATION_SPEC: trip_destination.csv - LOGSUM_SETTINGS: trip_mode_choice.yaml # optional (comment out if not desired) diff --git a/activitysim/examples/prototype_arc/configs/parking_location_choice.yaml b/activitysim/examples/prototype_arc/configs/parking_location_choice.yaml index e48287093..d17208fc2 100644 --- a/activitysim/examples/prototype_arc/configs/parking_location_choice.yaml +++ b/activitysim/examples/prototype_arc/configs/parking_location_choice.yaml @@ -1,12 +1,12 @@ -METADATA: - CHOOSER: trips_merged - INPUT: - persons: - trips: - tours: - OUTPUT: - trips: - - parking_zone_id +#METADATA: +# CHOOSER: trips_merged +# INPUT: +# persons: +# trips: +# tours: +# OUTPUT: +# trips: +# - parking_zone_id SPECIFICATION: parking_location_choice.csv COEFFICIENTS: parking_location_choice_coeffs.csv @@ -30,7 +30,7 @@ SEGMENTS: - mandatory_free - mandatory_pay - nonmandatory - + ALT_DEST_COL_NAME: parking_zone_id TRIP_ORIGIN: origin TRIP_DESTINATION: destination diff --git a/activitysim/examples/prototype_arc/configs/trip_departure_choice.yaml b/activitysim/examples/prototype_arc/configs/trip_departure_choice.yaml index daf657bcc..27e9fd974 100644 --- a/activitysim/examples/prototype_arc/configs/trip_departure_choice.yaml +++ b/activitysim/examples/prototype_arc/configs/trip_departure_choice.yaml @@ -1,20 +1,20 @@ -METADATA: - CHOOSER: tours - INPUT: - persons: - trips: - tours: - OUTPUT: - trips: - - start_period - - end_period +#METADATA: +# CHOOSER: tours +# INPUT: +# persons: +# trips: +# tours: +# OUTPUT: +# trips: +# - start_period +# - end_period SPECIFICATION: trip_departure_choice.csv -COEFFICIENTS: trip_departure_choice_coeff.csv +#COEFFICIENTS: trip_departure_choice_coeff.csv PREPROCESSOR: SPEC: trip_departure_choice_preprocessor DF: trips TABLES: - - tours \ No newline at end of file + - tours diff --git a/activitysim/examples/prototype_arc/configs/trip_destination.yaml b/activitysim/examples/prototype_arc/configs/trip_destination.yaml index a6ee2e48e..28d649f77 100644 --- a/activitysim/examples/prototype_arc/configs/trip_destination.yaml +++ b/activitysim/examples/prototype_arc/configs/trip_destination.yaml @@ -1,7 +1,7 @@ SAMPLE_SIZE: 30 -DESTINATION_SAMPLE_SPEC: trip_destination_sample.csv -DESTINATION_SPEC: trip_destination.csv +SAMPLE_SPEC: trip_destination_sample.csv +SPEC: trip_destination.csv LOGSUM_SETTINGS: trip_mode_choice.yaml @@ -38,4 +38,3 @@ CLEANUP: False # this setting is used by testing code to force failed trip_destination # fail_some_trips_for_testing: False - diff --git a/activitysim/examples/prototype_arc/configs/trip_scheduling_choice.yaml b/activitysim/examples/prototype_arc/configs/trip_scheduling_choice.yaml index f7a9171f7..3d1231f0a 100644 --- a/activitysim/examples/prototype_arc/configs/trip_scheduling_choice.yaml +++ b/activitysim/examples/prototype_arc/configs/trip_scheduling_choice.yaml @@ -1,22 +1,22 @@ -METADATA: - CHOOSER: tours - INPUT: - persons: - trips: - tours: - OUTPUT: - trips: - - start_period - - end_period +#METADATA: +# CHOOSER: tours +# INPUT: +# persons: +# trips: +# tours: +# OUTPUT: +# trips: +# - start_period +# - end_period SPECIFICATION: trip_scheduling_choice.csv -COEFFICIENTS: trip_scheduling_choice_coeff.csv +#COEFFICIENTS: trip_scheduling_choice_coeff.csv -SAMPLE_ALTERNATIVES: trip_departure_sample_patterns.csv +#SAMPLE_ALTERNATIVES: trip_departure_sample_patterns.csv PREPROCESSOR: SPEC: trip_scheduling_choice_preprocessor DF: tours TABLES: - persons - - trips \ No newline at end of file + - trips diff --git a/activitysim/examples/prototype_mtc_extended/sampling_scenarios.py b/activitysim/examples/prototype_mtc_extended/sampling_scenarios.py index a182a3bf3..03f480fb9 100644 --- a/activitysim/examples/prototype_mtc_extended/sampling_scenarios.py +++ b/activitysim/examples/prototype_mtc_extended/sampling_scenarios.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import argparse import os import shutil @@ -23,8 +25,10 @@ def integer_params(params): d_zones = 1 if params.DESTINATION_SAMPLE_SIZE > 1 else n_zones o_zones = 1 if params.ORIGIN_SAMPLE_SIZE > 1 else n_zones - params.DESTINATION_SAMPLE_SIZE = round(params.DESTINATION_SAMPLE_SIZE * d_zones) - params.ORIGIN_SAMPLE_SIZE = round(params.ORIGIN_SAMPLE_SIZE * o_zones) + params.DESTINATION_SAMPLE_SIZE = int( + round(params.DESTINATION_SAMPLE_SIZE * d_zones) + ) + params.ORIGIN_SAMPLE_SIZE = int(round(params.ORIGIN_SAMPLE_SIZE * o_zones)) return params diff --git a/activitysim/examples/prototype_mtc_extended/test/test_mtc_extended.py b/activitysim/examples/prototype_mtc_extended/test/test_mtc_extended.py index bdbbbca5b..b3ec258ba 100644 --- a/activitysim/examples/prototype_mtc_extended/test/test_mtc_extended.py +++ b/activitysim/examples/prototype_mtc_extended/test/test_mtc_extended.py @@ -16,7 +16,10 @@ def _test_prototype_mtc_extended( - multiprocess=False, sharrow=False, shadow_pricing=True + multiprocess=False, + sharrow=False, + shadow_pricing=True, + via_cli=True, ): def example_path(dirname): resource = os.path.join("examples", "prototype_mtc_extended", dirname) @@ -76,12 +79,18 @@ def regress(): final_vehicles_df, regress_vehicles_df, rtol=1.0e-4 ) + kwargs = { + "configs_dir": [], + } file_path = os.path.join(os.path.dirname(__file__), "simulation.py") shadowprice_configs = ( [] if shadow_pricing else ["-c", test_path("no-shadow-pricing")] ) + if shadow_pricing: + kwargs["configs_dir"].append(test_path("no-shadow-pricing")) if sharrow: sh_configs = ["-c", example_path("configs_sharrow")] + kwargs["configs_dir"].append(example_path("configs_sharrow")) else: sh_configs = [] if multiprocess: @@ -91,16 +100,20 @@ def regress(): "-c", example_path("configs_mp"), ] + kwargs["configs_dir"].append(test_path("configs_mp")) + kwargs["configs_dir"].append(example_path("configs_mp")) elif sharrow: mp_configs = [ "-c", test_path("configs"), ] + kwargs["configs_dir"].append(test_path("configs")) else: mp_configs = [ "-c", test_path("configs"), ] + kwargs["configs_dir"].append(test_path("configs")) run_args = ( shadowprice_configs + sh_configs @@ -118,13 +131,22 @@ def regress(): example_path("data_model"), ] ) + kwargs["configs_dir"].append(example_path("configs")) + kwargs["configs_dir"].append(example_mtc_path("configs")) + kwargs["data_dir"] = [example_mtc_path("data")] + kwargs["output_dir"] = test_path("output") + if os.environ.get("GITHUB_ACTIONS") == "true": subprocess.run(["coverage", "run", "-a", file_path] + run_args, check=True) - else: + elif via_cli: subprocess.run( [sys.executable, "-m", "activitysim", "run"] + run_args, check=True ) + else: + import activitysim.abm + state = workflow.State.make_default(**kwargs) + state.run.all() regress() diff --git a/activitysim/examples/prototype_mwcog/configs/non_mandatory_tour_destination.yaml b/activitysim/examples/prototype_mwcog/configs/non_mandatory_tour_destination.yaml index 4f9f144a8..4c5f7ef91 100644 --- a/activitysim/examples/prototype_mwcog/configs/non_mandatory_tour_destination.yaml +++ b/activitysim/examples/prototype_mwcog/configs/non_mandatory_tour_destination.yaml @@ -50,15 +50,15 @@ IN_PERIOD: othdiscr: 36 eatout: 36 social: 36 - escort: 11 + escort: 11 OUT_PERIOD: shopping: 15 othmaint: 15 othdiscr: 31 eatout: 31 social: 31 - escort: 10 - + escort: 10 + SEGMENT_IDS: work_low: 1 work_med: 2 @@ -70,10 +70,10 @@ CONSTANTS: WORK_MED_SEGMENT_ID: 2 WORK_HIGH_SEGMENT_ID: 3 WORK_VERYHIGH_SEGMENT_ID: 4 - -preprocessor: - SPEC: non_mandatory_tour_destination_annotate_tours_preprocessor - DF: tours - TABLES: - - persons \ No newline at end of file + +#preprocessor: +# SPEC: non_mandatory_tour_destination_annotate_tours_preprocessor +# DF: tours +# TABLES: +# - persons diff --git a/activitysim/examples/prototype_mwcog/configs/trip_destination.yaml b/activitysim/examples/prototype_mwcog/configs/trip_destination.yaml index 8bac788cc..9b2f1e588 100644 --- a/activitysim/examples/prototype_mwcog/configs/trip_destination.yaml +++ b/activitysim/examples/prototype_mwcog/configs/trip_destination.yaml @@ -4,9 +4,6 @@ COEFFICIENTS: trip_destination_coefficients.csv SAMPLE_SIZE: 30 -DESTINATION_SAMPLE_SPEC: trip_destination_sample.csv -DESTINATION_SPEC: trip_destination.csv - LOGSUM_SETTINGS: trip_mode_choice.yaml # optional (comment out if not desired) diff --git a/activitysim/examples/prototype_sandag_xborder/configs/tour_mode_choice.yaml b/activitysim/examples/prototype_sandag_xborder/configs/tour_mode_choice.yaml index 56bf36044..195407bcd 100755 --- a/activitysim/examples/prototype_sandag_xborder/configs/tour_mode_choice.yaml +++ b/activitysim/examples/prototype_sandag_xborder/configs/tour_mode_choice.yaml @@ -63,7 +63,5 @@ LOGSUM_CHOOSER_COLUMNS: - person_id - household_id -CHOICE_COL_NAME: tour_mode MODE_CHOICE_LOGSUM_COLUMN_NAME: mode_choice_logsum COMPUTE_TRIP_MODE_CHOICE_LOGSUMS: True - diff --git a/activitysim/examples/prototype_sandag_xborder/configs/trip_destination.yaml b/activitysim/examples/prototype_sandag_xborder/configs/trip_destination.yaml index d6b0aeeb7..f4d5d701b 100644 --- a/activitysim/examples/prototype_sandag_xborder/configs/trip_destination.yaml +++ b/activitysim/examples/prototype_sandag_xborder/configs/trip_destination.yaml @@ -4,9 +4,6 @@ COEFFICIENTS: trip_destination_coefficients.csv SAMPLE_SIZE: 30 -DESTINATION_SAMPLE_SPEC: trip_destination_sample.csv -DESTINATION_SPEC: trip_destination.csv - LOGSUM_SETTINGS: trip_mode_choice.yaml # optional (comment out if not desired) diff --git a/activitysim/examples/prototype_sandag_xborder/configs/write_trip_matrices.yaml b/activitysim/examples/prototype_sandag_xborder/configs/write_trip_matrices.yaml index 37c6849ac..dce35ce76 100644 --- a/activitysim/examples/prototype_sandag_xborder/configs/write_trip_matrices.yaml +++ b/activitysim/examples/prototype_sandag_xborder/configs/write_trip_matrices.yaml @@ -104,5 +104,3 @@ MATRICES: data_field: TNC_SINGLE_EV - name: TNC_SHARED_EV data_field: TNC_SHARED_EV - -CONSTANTS: diff --git a/activitysim/standalone/__init__.py b/activitysim/standalone/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/activitysim/standalone/compare.py b/activitysim/standalone/compare.py deleted file mode 100644 index 024c720a3..000000000 --- a/activitysim/standalone/compare.py +++ /dev/null @@ -1,319 +0,0 @@ -import os -import warnings - -import altair as alt -import pandas as pd - -from .data_dictionary import check_data_dictionary -from .pipeline import load_checkpointed_tables - - -def load_pipelines(pipelines, tables=None, checkpoint_name=None): - """ - Parameters - ---------- - pipelines : Dict[Str, Path-like] - Mapping run name to path of pipeline file. - checkpoint : str - Name of checkpoint to load for all pipelines - """ - return { - key: load_checkpointed_tables( - pth, - tables=tables, - checkpoint_name=checkpoint_name, - )[1] - for key, pth in pipelines.items() - } - - -def load_final_tables(output_dirs, tables=None, index_cols=None): - result = {} - for key, pth in output_dirs.items(): - if not os.path.exists(pth): - warnings.warn(f"{key} directory does not exist: {pth}") - continue - result[key] = {} - for tname, tfile in tables.items(): - tpath = os.path.join(pth, tfile) - kwargs = {} - if index_cols is not None and tname in index_cols: - kwargs["index_col"] = index_cols[tname] - if os.path.exists(tpath): - result[key][tname] = pd.read_csv(tpath, **kwargs) - if len(result[key]) == 0: - # no tables were loaded, delete the entire group - del result[key] - return result - - -def compare_trip_mode_choice( - tablesets, title="Trip Mode Choice", grouping="primary_purpose" -): - - d = {} - groupings = [ - grouping, - ] - - for key, tableset in tablesets.items(): - df = ( - tableset["trips"] - .groupby(groupings + ["trip_mode"]) - .size() - .rename("n_trips") - .reset_index() - ) - df["share_trips"] = df["n_trips"] / df.groupby(groupings)["n_trips"].transform( - "sum" - ) - d[key] = df - - all_d = pd.concat(d, names=["source"]).reset_index() - - selection = alt.selection_multi( - fields=["trip_mode"], - bind="legend", - ) - - fig = ( - alt.Chart(all_d) - .mark_bar() - .encode( - color="trip_mode", - y=alt.Y("source", axis=alt.Axis(grid=False, title=""), sort=None), - x=alt.X( - "share_trips", - axis=alt.Axis(grid=False, labels=False, title="Mode Share"), - ), - row="primary_purpose", - opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), - tooltip=[ - "trip_mode", - "source", - "n_trips", - alt.Tooltip("share_trips:Q", format=".2%"), - ], - ) - .add_selection( - selection, - ) - ) - - if title: - fig = fig.properties(title=title).configure_title( - fontSize=20, - anchor="start", - color="black", - ) - - return fig - - -def compare_trip_distance( - tablesets, - skims, - dist_skim_name, - otaz_col="origin", - dtaz_col="destination", - time_col="depart", - dist_bins=20, - grouping="primary_purpose", - title="Trip Length Distribution", - max_dist=None, -): - groupings = [grouping] - if not isinstance(skims, dict): - skims = {i: skims for i in tablesets.keys()} - - distances = {} - for key, tableset in tablesets.items(): - skim_dist = skims[key][[dist_skim_name]] - - zone_ids = tableset["land_use"].index - if ( - zone_ids.is_monotonic_increasing - and zone_ids[-1] == len(zone_ids) + zone_ids[0] - 1 - ): - offset = zone_ids[0] - looks = [ - tableset["trips"][otaz_col].rename("otaz") - offset, - tableset["trips"][dtaz_col].rename("dtaz") - offset, - ] - else: - remapper = dict(zip(zone_ids, pd.RangeIndex(len(zone_ids)))) - looks = [ - tableset["trips"][otaz_col].rename("otaz").apply(remapper.get), - tableset["trips"][dtaz_col].rename("dtaz").apply(remapper.get), - ] - if "time_period" in skim_dist.dims: - looks.append( - tableset["trips"][time_col] - .apply(skims[key].attrs["time_period_imap"].get) - .rename("time_period"), - ) - look = pd.concat(looks, axis=1) - distances[key] = skims[key][[dist_skim_name]].iat.df(look) - - if dist_bins is not None: - result = pd.concat(distances, names=["source"]) - if max_dist is not None: - result = result[result <= max_dist] - result = pd.cut(result.iloc[:, 0], dist_bins).to_frame() - distances = {k: result.loc[k] for k in tablesets.keys()} - - data = {} - for key, tableset in tablesets.items(): - data[key] = tableset["trips"].assign(**{"distance": distances[key]}) - - d = {} - for key, dat in data.items(): - df = ( - dat.groupby(groupings + ["distance"]) - .size() - .rename("n_trips") - .unstack("distance") - .fillna(0) - .stack() - .rename("n_trips") - .reset_index() - ) - df["share_trips"] = df["n_trips"] / df.groupby(groupings)["n_trips"].transform( - "sum" - ) - d[key] = df - - all_d = pd.concat(d, names=["source"]).reset_index() - all_d["distance"] = all_d["distance"].apply(lambda x: x.mid) - - fig = ( - alt.Chart(all_d) - .mark_line( - interpolate="monotone", - ) - .encode( - color="source", - y=alt.Y("share_trips", axis=alt.Axis(grid=False, title="")), - x=alt.X("distance", axis=alt.Axis(grid=False, title="Distance")), - # opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), - # tooltip = ['trip_mode', 'source', 'n_trips', alt.Tooltip('share_trips:Q', format='.2%')], - facet=alt.Facet(grouping, columns=3), - strokeWidth="source", - ) - .properties( - width=200, - height=120, - ) - ) - - if title: - fig = fig.properties(title=title).configure_title( - fontSize=20, - anchor="start", - color="black", - ) - - return fig - - -def compare_work_district( - tablesets, - district_id, - label="district", - hometaz_col="home_zone_id", - worktaz_col="workplace_zone_id", - data_dictionary=None, -): - data_dictionary = check_data_dictionary(data_dictionary) - - d = {} - h = f"home_{label}" - w = f"work_{label}" - - for key, tableset in tablesets.items(): - persons = tableset["persons"] - workers = persons[persons[worktaz_col] >= 0].copy() - district_map = tableset["land_use"][district_id] - # workers[f"home_{label}_"] = workers[hometaz_col].map(district_map) - # workers[f"work_{label}_"] = workers[worktaz_col].map(district_map) - home_district = workers[hometaz_col].map(district_map).rename(h) - work_district = workers[worktaz_col].map(district_map).rename(w) - df = ( - workers.groupby( - [home_district, work_district] - # [f"home_{label}_", f"work_{label}_"] - ) - .size() - .rename("n_workers") - ) - d[key] = df - - all_d = pd.concat(d, names=["source"]).reset_index() - - district_names = data_dictionary.get("land_use", {}).get(district_id, None) - if district_names is not None: - all_d[h] = all_d[h].map(district_names) - all_d[w] = all_d[w].map(district_names) - - selection = alt.selection_multi( - fields=[w], - bind="legend", - ) - - fig = ( - alt.Chart(all_d) - .mark_bar() - .encode( - color=f"{w}:N", - y=alt.Y("source", axis=alt.Axis(grid=False, title=""), sort=None), - x=alt.X("n_workers", axis=alt.Axis(grid=False)), - row=f"{h}:N", - opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), - tooltip=[f"{h}:N", f"{w}:N", "source", "n_workers"], - ) - .add_selection( - selection, - ) - ) - - return fig - - -def compare_runtime(combo_timing_log): - df = pd.read_csv(combo_timing_log, index_col="model_name") - df1 = ( - df[["sharrow", "legacy"]] - .rename_axis(columns="source") - .unstack() - .rename("seconds") - .reset_index() - ) - c = alt.Chart( - df1, - height={"step": 20}, - ) - - result = c.mark_bar(yOffset=-3, size=6,).transform_filter( - (alt.datum.source == "legacy") - ).encode( - x=alt.X("seconds:Q", stack=None), - y=alt.Y("model_name", type="nominal", sort=None), - color="source", - tooltip=["source", "model_name", "seconds"], - ) + c.mark_bar( - yOffset=4, - size=6, - ).transform_filter( - (alt.datum.source == "sharrow") - ).encode( - x=alt.X("seconds:Q", stack=None), - y=alt.Y("model_name", type="nominal", sort=None), - color="source", - tooltip=["source", "model_name", "seconds"], - ) | alt.Chart( - df1 - ).mark_bar().encode( - color="source", x="source", y="sum(seconds)", tooltip=["source", "sum(seconds)"] - ) - - return result diff --git a/activitysim/standalone/pipeline.py b/activitysim/standalone/pipeline.py deleted file mode 100644 index 948914d10..000000000 --- a/activitysim/standalone/pipeline.py +++ /dev/null @@ -1,45 +0,0 @@ -import pandas as pd - -from ..core import pipeline - - -def load_checkpointed_tables( - pipeline_file_path, - tables=None, - checkpoint_name=None, -): - pipeline_store = pd.HDFStore(pipeline_file_path, mode="r") - - checkpoints = pipeline_store[pipeline.CHECKPOINT_TABLE_NAME] - - # checkpoint row as series - if checkpoint_name is None: - checkpoint = checkpoints.iloc[-1] - checkpoint_name = checkpoint.loc[pipeline.CHECKPOINT_NAME] - else: - i = checkpoints.set_index(pipeline.CHECKPOINT_NAME).index.get_loc( - checkpoint_name - ) - checkpoint = checkpoints.iloc[i] - - # series with table name as index and checkpoint_name as value - checkpoint_tables = checkpoint[~checkpoint.index.isin(pipeline.NON_TABLE_COLUMNS)] - - # omit dropped tables with empty checkpoint name - checkpoint_tables = checkpoint_tables[checkpoint_tables != ""] - - # hdf5 key is / - checkpoint_tables = { - table_name: pipeline.pipeline_table_key(table_name, checkpoint_name) - for table_name, checkpoint_name in checkpoint_tables.items() - } - - data = {} - for table_name, table_key in checkpoint_tables.items(): - if tables is None or table_name in tables: - data[table_name] = pipeline_store[table_key] - - pipeline_store.close() - - # checkpoint name and series mapping table name to hdf5 key for tables in that checkpoint - return checkpoint_name, data diff --git a/activitysim/standalone/render.py b/activitysim/standalone/render.py deleted file mode 100644 index ad672a7d1..000000000 --- a/activitysim/standalone/render.py +++ /dev/null @@ -1,64 +0,0 @@ -import logging -import os -import textwrap -from contextlib import contextmanager -from pathlib import Path - -import nbclient -import nbformat as nbf -from nbconvert import HTMLExporter -from xmle import NumberedCaption, Reporter - -from .. import __version__ - -# from jupyter_contrib_nbextensions.nbconvert_support import TocExporter # problematic - - -@contextmanager -def chdir(path: Path): - """ - Sets the cwd within the context - - Args: - path (Path): The path to the cwd - - Yields: - None - """ - - cwd = Path().absolute() - try: - os.chdir(path) - yield - finally: - os.chdir(cwd) - - -def render_notebook(nb_filename, cellcontent): - nb_filename = os.path.splitext(nb_filename)[0] - nb = nbf.v4.new_notebook() - - cells = [] - for c in cellcontent: - c = textwrap.dedent(c).strip() - if c[:4] == "[md]": - cells.append(nbf.v4.new_markdown_cell(c[4:])) - else: - cells.append(nbf.v4.new_code_cell(c)) - nb["cells"] = cells - nbf.write(nb, nb_filename + ".ipynb") - - nb = nbclient.execute(nb, cwd=os.path.dirname(nb_filename)) - nbf.write(nb, nb_filename + "-e.ipynb") - - html_exporter = HTMLExporter( - embed_images=True, - exclude_input_prompt=True, - exclude_output_prompt=True, - exclude_input=True, - # template_name = 'classic' - ) - (body, resources) = html_exporter.from_notebook_node(nb) - - with open(nb_filename + ".html", "w") as f: - f.write(body) diff --git a/activitysim/standalone/skims.py b/activitysim/standalone/skims.py deleted file mode 100644 index 1065383c5..000000000 --- a/activitysim/standalone/skims.py +++ /dev/null @@ -1,64 +0,0 @@ -import glob -import logging -import os - -import numpy as np -import openmatrix -import sharrow as sh -import yaml - -logger = logging.getLogger(__name__) - - -def load_skims( - network_los_settings_filename, - data_dir, -): - with open(network_los_settings_filename, "rt") as f: - settings = yaml.safe_load(f) - - skim_settings = settings["taz_skims"] - if isinstance(skim_settings, str): - skims_omx_fileglob = skim_settings - else: - skims_omx_fileglob = skim_settings.get("omx", None) - skims_omx_fileglob = skim_settings.get("files", skims_omx_fileglob) - skims_filenames = glob.glob(os.path.join(data_dir, skims_omx_fileglob)) - index_names = ("otaz", "dtaz", "time_period") - indexes = None - time_period_breaks = settings.get("skim_time_periods", {}).get("periods") - time_periods = settings.get("skim_time_periods", {}).get("labels") - time_period_sep = "__" - - time_window = settings.get("skim_time_periods", {}).get("time_window") - period_minutes = settings.get("skim_time_periods", {}).get("period_minutes") - n_periods = int(time_window / period_minutes) - - tp_map = {} - tp_imap = {} - label = time_periods[0] - i = 0 - for t in range(n_periods): - if t in time_period_breaks: - i = time_period_breaks.index(t) - label = time_periods[i] - tp_map[t + 1] = label - tp_imap[t + 1] = i - - omxs = [ - openmatrix.open_file(skims_filename, mode="r") - for skims_filename in skims_filenames - ] - if isinstance(time_periods, (list, tuple)): - time_periods = np.asarray(time_periods) - result = sh.dataset.from_omx_3d( - omxs, - index_names=index_names, - indexes=indexes, - time_periods=time_periods, - time_period_sep=time_period_sep, - ) - result.attrs["time_period_map"] = tp_map - result.attrs["time_period_imap"] = tp_imap - - return result diff --git a/activitysim/workflows/steps/chunk_sizing.py b/activitysim/workflows/steps/chunk_sizing.py index ee0ceca69..f19c19ab3 100644 --- a/activitysim/workflows/steps/chunk_sizing.py +++ b/activitysim/workflows/steps/chunk_sizing.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import warnings import psutil @@ -43,7 +45,8 @@ def chunk_sizing( if chunk_size > total_ram: warnings.warn( f"chunk size of {chunk_size/ 2**30:.2f}GB exceeds " - f"total RAM of {total_ram/ 2**30:.2f}" + f"total RAM of {total_ram/ 2**30:.2f}", + stacklevel=2, ) out = dict(chunk_size=chunk_size) diff --git a/activitysim/workflows/steps/contrast/contrast_setup.py b/activitysim/workflows/steps/contrast/contrast_setup.py index fd91a814c..077abbdee 100644 --- a/activitysim/workflows/steps/contrast/contrast_setup.py +++ b/activitysim/workflows/steps/contrast/contrast_setup.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import multiprocessing import time @@ -19,6 +21,7 @@ def contrast_setup( multiprocess=0, chunk_training_mode=None, main_n_households=None, + persist_sharrow_cache=False, ): reset_progress_step(description="Constrast Setup") if tag is None: @@ -30,6 +33,8 @@ def contrast_setup( flags.append(f" -r {resume_after}") if fast: flags.append("--fast") + if persist_sharrow_cache: + flags.append("--persist-sharrow-cache") out = dict(tag=tag, contrast=contrast, flags=" ".join(flags)) if isinstance(reference, str) and "." in reference: diff --git a/activitysim/workflows/steps/contrast/district_to_district.py b/activitysim/workflows/steps/contrast/district_to_district.py index 93358ea78..6c978adec 100644 --- a/activitysim/workflows/steps/contrast/district_to_district.py +++ b/activitysim/workflows/steps/contrast/district_to_district.py @@ -1,12 +1,12 @@ +from __future__ import annotations + import logging import altair as alt import pandas as pd -from pypyr.context import Context -from ....standalone.data_dictionary import check_data_dictionary -from ..progression import reset_progress_step -from ..wrapping import workstep +from activitysim.workflows.steps.wrapping import workstep +from activitysim.workflows.utils.data_dictionary import check_data_dictionary logger = logging.getLogger(__name__) diff --git a/activitysim/workflows/steps/contrast/load_skims.py b/activitysim/workflows/steps/contrast/load_skims.py index 85071b8f7..f5cdcc048 100644 --- a/activitysim/workflows/steps/contrast/load_skims.py +++ b/activitysim/workflows/steps/contrast/load_skims.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import glob import logging import os @@ -9,9 +11,8 @@ import sharrow as sh import yaml -from activitysim.standalone.utils import chdir - -from ..wrapping import workstep +from activitysim.workflows.steps.wrapping import workstep +from activitysim.workflows.utils import chdir logger = logging.getLogger(__name__) diff --git a/activitysim/workflows/steps/contrast/load_tables.py b/activitysim/workflows/steps/contrast/load_tables.py index 16e33920e..b7c5d0e0e 100644 --- a/activitysim/workflows/steps/contrast/load_tables.py +++ b/activitysim/workflows/steps/contrast/load_tables.py @@ -1,33 +1,32 @@ +from __future__ import annotations + import os -from pathlib import Path +import warnings -from pypyr.context import Context +import pandas as pd -from activitysim.standalone.compare import load_final_tables -from activitysim.standalone.utils import chdir +from activitysim.workflows.steps.wrapping import workstep +from activitysim.workflows.utils import chdir -from ..error_handler import error_logging -from ..progression import reset_progress_step -from ..wrapping import workstep -# databases = context.get_formatted('databases') -# # the various different output directories to process, for example: -# # { -# # "sharrow": "output-sharrow", -# # "legacy": "output-legacy", -# # } -# -# tables = context.get_formatted('tables') -# # the various tables in the output directories to read, for example: -# # trips: -# # filename: final_trips.csv -# # index_col: trip_id -# # persons: -# # filename: final_persons.csv -# # index_col: person_id -# # land_use: -# # filename: final_land_use.csv -# # index_col: zone_id +def load_final_tables(output_dirs, tables=None, index_cols=None): + result = {} + for key, pth in output_dirs.items(): + if not os.path.exists(pth): + warnings.warn(f"{key} directory does not exist: {pth}") + continue + result[key] = {} + for tname, tfile in tables.items(): + tpath = os.path.join(pth, tfile) + kwargs = {} + if index_cols is not None and tname in index_cols: + kwargs["index_col"] = index_cols[tname] + if os.path.exists(tpath): + result[key][tname] = pd.read_csv(tpath, **kwargs) + if len(result[key]) == 0: + # no tables were loaded, delete the entire group + del result[key] + return result @workstep("tablesets") diff --git a/activitysim/workflows/steps/copy_files.py b/activitysim/workflows/steps/copy_files.py index e9ee247af..be8b52ce2 100644 --- a/activitysim/workflows/steps/copy_files.py +++ b/activitysim/workflows/steps/copy_files.py @@ -1,14 +1,10 @@ +from __future__ import annotations + import glob import os import shutil -from pypyr.errors import KeyNotInContextError -from pypyr.steps.fetchyaml import run_step as _fetch -from pypyr.steps.filewriteyaml import run_step as _write -from pypyr.steps.py import run_step as _run_step - -from .progression import progress, progress_overall, progress_step -from .wrapping import workstep +from activitysim.workflows.steps.wrapping import workstep @workstep diff --git a/activitysim/workflows/steps/create.py b/activitysim/workflows/steps/create.py index 70857240c..a16c2d4c7 100644 --- a/activitysim/workflows/steps/create.py +++ b/activitysim/workflows/steps/create.py @@ -1,9 +1,11 @@ +from __future__ import annotations + import os import shlex -from ...standalone.utils import chdir -from .progression import reset_progress_step -from .wrapping import workstep +from activitysim.workflows.steps.progression import reset_progress_step +from activitysim.workflows.steps.wrapping import workstep +from activitysim.workflows.utils import chdir @workstep diff --git a/activitysim/workflows/steps/error_handler.py b/activitysim/workflows/steps/error_handler.py index 97e85bb61..367996750 100644 --- a/activitysim/workflows/steps/error_handler.py +++ b/activitysim/workflows/steps/error_handler.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging @@ -8,7 +10,7 @@ def wrapper(*args, **kwargs): except Exception as err: logging.error(f"===== ERROR IN {func.__name__} =====") logging.exception(f"{err}") - logging.error(f"===== / =====") + logging.error("===== / =====") raise return wrapper diff --git a/activitysim/workflows/steps/install_env.py b/activitysim/workflows/steps/install_env.py index 0a30d53c8..d2cd21332 100644 --- a/activitysim/workflows/steps/install_env.py +++ b/activitysim/workflows/steps/install_env.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging import os.path import subprocess @@ -15,6 +17,7 @@ def install_env( asim_version="1.0.4", cwd=None, label=None, + python_version="3.9", ): if os.path.exists(env_prefix): return 0 @@ -26,7 +29,7 @@ def install_env( "create", "--prefix", env_prefix, - f"python=3.9", + f"python={python_version}", f"activitysim={asim_version}", "-c", "conda-forge", diff --git a/activitysim/workflows/steps/main.py b/activitysim/workflows/steps/main.py index 9fd7c7f5c..aa4e0d528 100644 --- a/activitysim/workflows/steps/main.py +++ b/activitysim/workflows/steps/main.py @@ -1,4 +1,6 @@ """Naive custom loader without any error handling.""" +from __future__ import annotations + import os import signal import sys @@ -59,7 +61,7 @@ def main(args): from pypyr.cli import get_args from pypyr.config import config except ImportError: - raise ImportError("activitysim.workflows requires pypyr") + raise ImportError("activitysim.workflows requires pypyr") from None parsed_args = get_args(args) diff --git a/activitysim/workflows/steps/memory_stress_test.py b/activitysim/workflows/steps/memory_stress_test.py index 0fc42d7c2..371d5104c 100644 --- a/activitysim/workflows/steps/memory_stress_test.py +++ b/activitysim/workflows/steps/memory_stress_test.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging import os import time @@ -5,8 +7,8 @@ import numpy as np import psutil -from ...core.util import si_units -from .wrapping import workstep +from activitysim.core.util import si_units +from activitysim.workflows.steps.wrapping import workstep def ping_mem(pid=None): @@ -31,5 +33,5 @@ def memory_stress_test(n=37): logging.critical(f"ping_mem = {ping_mem()}") time.sleep(5.0) logging.critical(f"ping_mem = {ping_mem()}") - logging.critical(f"bye") + logging.critical("bye") return {} diff --git a/activitysim/workflows/steps/run.py b/activitysim/workflows/steps/run.py index 8e8fdcdae..a795dc612 100644 --- a/activitysim/workflows/steps/run.py +++ b/activitysim/workflows/steps/run.py @@ -1,10 +1,12 @@ +from __future__ import annotations + import shlex from pypyr.errors import KeyNotInContextError -from activitysim.standalone.utils import chdir from activitysim.workflows.steps.progression import reset_progress_step from activitysim.workflows.steps.wrapping import workstep +from activitysim.workflows.utils import chdir def _get_formatted(context, key, default): diff --git a/activitysim/workflows/steps/run_subprocess.py b/activitysim/workflows/steps/run_subprocess.py index 35db46347..fea3a86ae 100644 --- a/activitysim/workflows/steps/run_subprocess.py +++ b/activitysim/workflows/steps/run_subprocess.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging import os import subprocess @@ -61,6 +63,7 @@ def run_activitysim_as_subprocess( conda_prefix=None, single_thread=True, multi_thread=None, + persist_sharrow_cache=False, ) -> None: if isinstance(pre_config_dirs, str): pre_config_dirs = [pre_config_dirs] @@ -81,6 +84,8 @@ def run_activitysim_as_subprocess( flags.append(f" -r {resume_after}") if fast: flags.append("--fast") + if persist_sharrow_cache: + flags.append("--persist-sharrow-cache") if settings_file: flags.append(f"-s {settings_file}") flags = " ".join(flags) @@ -97,7 +102,7 @@ def run_activitysim_as_subprocess( # args = shlex.split(args) env = os.environ.copy() - pythonpath = env.pop("PYTHONPATH", None) + _pythonpath = env.pop("PYTHONPATH", None) if single_thread: env["MKL_NUM_THREADS"] = "1" @@ -115,18 +120,6 @@ def run_activitysim_as_subprocess( env["VECLIB_MAXIMUM_THREADS"] = str(multi_thread.get("VECLIB", 1)) env["NUMEXPR_NUM_THREADS"] = str(multi_thread.get("NUMEXPR", 1)) - # if pythonpath: - # print(f"removed PYTHONPATH from ENV: {pythonpath}") - # else: - # print(f"no removed PYTHONPATH from ENV!") - # - # for k, v in env.items(): - # print(f" - {k}: {v}") - - # if conda_prefix is not None: - # args = ["conda", "init", "bash", "&&", 'conda', 'activate', conda_prefix, '&&'] + list(args) - # args = ['conda', 'run', '-p', conda_prefix] + list(args) - if conda_prefix: conda_prefix_1 = os.environ.get("CONDA_PREFIX_1", None) if conda_prefix_1 is None: diff --git a/activitysim/workflows/steps/update_yaml.py b/activitysim/workflows/steps/update_yaml.py index 079eb878f..ae6b35e5f 100644 --- a/activitysim/workflows/steps/update_yaml.py +++ b/activitysim/workflows/steps/update_yaml.py @@ -1,9 +1,8 @@ -from pypyr.errors import KeyNotInContextError +from __future__ import annotations + +import yaml from pypyr.steps.fetchyaml import run_step as _fetch from pypyr.steps.filewriteyaml import run_step as _write -from pypyr.steps.py import run_step as _run_step - -from .progression import progress, progress_overall, progress_step def run_step(context): @@ -48,3 +47,11 @@ def run_step(context): "encoding": fetch_yaml_input.get("encoding", None), } _write(context) + + +def update_yaml(path, payload): + with open(path) as f: + content = yaml.safe_load(f) + content.update(payload) + with open(path, "w") as f: + yaml.safe_dump(content, f) diff --git a/activitysim/workflows/steps/wrapping.py b/activitysim/workflows/steps/wrapping.py index ca3f48c8b..17ba49c4b 100644 --- a/activitysim/workflows/steps/wrapping.py +++ b/activitysim/workflows/steps/wrapping.py @@ -1,13 +1,15 @@ +from __future__ import annotations + import importlib import logging +from collections.abc import Mapping from inspect import getfullargspec -from typing import Mapping from pypyr.context import Context -from . import get_formatted_or_default -from .error_handler import error_logging -from .progression import reset_progress_step +from activitysim.workflows.steps import get_formatted_or_default +from activitysim.workflows.steps.error_handler import error_logging +from activitysim.workflows.steps.progression import reset_progress_step logger = logging.getLogger(__name__) @@ -88,7 +90,7 @@ def __new__(cls, wrapped_func=None, *, returns_names=None, updates_context=False wrapped_func : Callable The function being decorated. """ - if isinstance(wrapped_func, (str, tuple, list)): + if isinstance(wrapped_func, str | tuple | list): # the returns_names are provided instead of the wrapped func returns_names = wrapped_func wrapped_func = None @@ -117,7 +119,6 @@ def __call__(self, wrapped_func): returns_names = (returns_names,) def run_step(context: Context = None) -> None: - caption = get_formatted_or_default(context, "caption", None) progress_tag = get_formatted_or_default(context, "progress_tag", caption) if progress_tag is not None: diff --git a/activitysim/standalone/utils.py b/activitysim/workflows/utils/__init__.py similarity index 91% rename from activitysim/standalone/utils.py rename to activitysim/workflows/utils/__init__.py index 7ba7c4ca4..ee86b6ad1 100644 --- a/activitysim/standalone/utils.py +++ b/activitysim/workflows/utils/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import os from contextlib import contextmanager from pathlib import Path diff --git a/activitysim/standalone/data_dictionary.py b/activitysim/workflows/utils/data_dictionary.py similarity index 87% rename from activitysim/standalone/data_dictionary.py rename to activitysim/workflows/utils/data_dictionary.py index a4fc9e4b7..0714f2ab0 100644 --- a/activitysim/standalone/data_dictionary.py +++ b/activitysim/workflows/utils/data_dictionary.py @@ -1,6 +1,8 @@ +from __future__ import annotations + import os.path import warnings -from typing import Mapping +from collections.abc import Mapping import yaml @@ -17,9 +19,9 @@ def check_data_dictionary(input): return {} elif isinstance(input, str): if not os.path.exists(input): - warnings.warn(f"data dictionary file {input} is missing") + warnings.warn(f"data dictionary file {input} is missing", stacklevel=2) return {} - with open(input, "rt") as f: + with open(input) as f: content = yaml.safe_load(f) else: content = input diff --git a/activitysim/workflows/v1.3/_contrast_runner.yaml b/activitysim/workflows/v1.3/_contrast_runner.yaml new file mode 100644 index 000000000..942677dc8 --- /dev/null +++ b/activitysim/workflows/v1.3/_contrast_runner.yaml @@ -0,0 +1,712 @@ +# activitysim workflow example_runner example_name=prototype_mtc + +context_parser: pypyr.parser.keyvaluepairs + +on_failure: +- name: activitysim.workflows.steps.py + in: + py: | + import time + print("FAILURE", time.strftime("%Y-%m-%d %I:%M:%S%p")) + +steps: + +- description: Setting default workflow parameters + name: pypyr.steps.default + in: + defaults: + example_name: prototype_mtc + workflow_name: sharrow-contrast + workspace: workspace + multiprocess: 0 + chunk_size: 0 + create: True + compile: True + sharrow: True + legacy: True + reference: True + reference_asim_version: "1.2.0" + reference_python_version: "3.9" + tag: + resume_after: + fast: False + compile_n_households: 1000 + main_n_households: 100000 + config_dirs: configs + data_dir: data + ext_dirs: + instrument: False + memory_profile: False + trace_hh_id: + trace_od: + chunk_method: hybrid_uss + chunk_training_mode: disabled + machine_name: + disable_zarr: False + settings_file: settings.yaml + report_skip: [] + copy_report_to: + +- description: Ensure integer types for numbers + name: activitysim.workflows.steps.py + in: + label: Ensure integer types for numbers + py: | + def int_or_none(i): + return int(i) if i is not None else i + save( + compile_n_households=int_or_none(compile_n_households), + main_n_households=int_or_none(main_n_households), + multiprocess=int_or_none(multiprocess), + chunk_size=int_or_none(chunk_size), + ) + +- name: activitysim.workflows.steps.title + in: + label: "activitysim workflow {workflow_name}" + formatting: bold cyan + +- name: activitysim.workflows.steps.py + in: + label: Make {workspace} directory if it does not exist + py: | + import os + os.makedirs(f"{workspace}", exist_ok=True) + +- name: activitysim.workflows.steps.py + in: + label: Detect if debugging + py: | + def is_debug(): + import sys + gettrace = getattr(sys, 'gettrace', None) + if gettrace is None: + return False + else: + v = gettrace() + if v is None: + return False + else: + return True + should_swallow_errors = not is_debug() + save('should_swallow_errors') + +- activitysim.workflows.steps.contrast.contrast_setup + +- name: activitysim.workflows.steps.create + run: '{create}' + in: + destination: "{workspace}" + +- activitysim.workflows.steps.contrast.directory_prep + +- name: pypyr.steps.call + run: '{compile}' + in: + call: run-compile + swallow: False + +- name: pypyr.steps.call + run: '{sharrow}' + in: + call: run-sharrow + swallow: '{should_swallow_errors}' + +- name: pypyr.steps.call + run: '{legacy}' + in: + call: run-legacy + swallow: '{should_swallow_errors}' + +- name: pypyr.steps.call + run: '{reference}' + in: + call: run-reference + swallow: '{should_swallow_errors}' + +- activitysim.workflows.steps.contrast.composite_log + +- name: pypyr.steps.call + in: + call: + groups: reporting + success: report-save + failure: report-save + swallow: False + +################################################################################ +run-compile: + # This step group runs activitysim with a (usually) smaller sample of + # household, to generate the compiled numba code for the local machine + +- description: write configs_sh_compile + name: pypyr.steps.filewriteyaml + in: + fileWriteYaml: + path: "{workspace}/{example_name}/configs_sh_compile/{settings_file}" + payload: + inherit_settings: True + sharrow: test + chunk_training_mode: disabled + households_sample_size: '{compile_n_households}' + # cache_dir: cache_sharrow + trace_hh_id: '{trace_hh_id}' + trace_od: '{trace_od}' + instrument: '{instrument}' + disable_zarr: '{disable_zarr}' + multiprocess: False + recode_pipeline_columns: True + +- description: Run activitysim to compile and test sharrow-enabled model + name: activitysim.workflows.steps.run_subprocess + in: + pre_config_dirs: configs_sh_compile + output_dir: 'output-{tag}/output-compile' + cwd: "{workspace}/{example_name}" + label: "{example_name} -- sharrow compile" + persist_sharrow_cache: True + + +################################################################################ +run-sharrow: + +- description: write configs_sh + name: pypyr.steps.filewriteyaml + in: + fileWriteYaml: + path: "{workspace}/{example_name}/configs_sh/{settings_file}" + payload: + inherit_settings: True + sharrow: require + # cache_dir: cache_sharrow + households_sample_size: '{main_n_households}' + trace_hh_id: '{trace_hh_id}' + trace_od: '{trace_od}' + instrument: '{instrument}' + memory_profile: '{memory_profile}' + chunk_size: '{chunk_size}' + num_processes: '{num_processes}' + multiprocess: '{is_multiprocess}' + chunk_method: '{chunk_method}' + chunk_training_mode: '{chunk_training_mode}' + disable_zarr: '{disable_zarr}' + recode_pipeline_columns: True + +- description: Run activitysim to evaluate sharrow-enabled model + name: activitysim.workflows.steps.run_subprocess + in: + pre_config_dirs: configs_sh + output_dir: 'output-{tag}/output-sharrow' + cwd: "{workspace}/{example_name}" + label: "{example_name} -- sharrow run" + persist_sharrow_cache: True + +################################################################################ +run-legacy: + +- description: write.configs_legacy + name: pypyr.steps.filewriteyaml + in: + fileWriteYaml: + path: "{workspace}/{example_name}/configs_legacy/{settings_file}" + payload: + inherit_settings: True + recode_pipeline_columns: False + cache_dir: cache_legacy + households_sample_size: '{main_n_households}' + trace_hh_id: '{trace_hh_id}' + trace_od: '{trace_od}' + instrument: '{instrument}' + memory_profile: '{memory_profile}' + chunk_size: '{chunk_size}' + num_processes: '{num_processes}' + multiprocess: '{is_multiprocess}' + chunk_method: '{chunk_method}' + chunk_training_mode: '{chunk_training_mode}' + +- description: Run activitysim to evaluate legacy model + name: activitysim.workflows.steps.run_subprocess + in: + pre_config_dirs: configs_legacy + output_dir: 'output-{tag}/output-legacy' + cwd: "{workspace}/{example_name}" + label: "{example_name} -- legacy run" + + +################################################################################ +run-reference: + +- description: Install a reference environment + name: activitysim.workflows.steps.install_env + in: + label: "{example_name} -- install ref env {reference_asim_version}" + env_prefix: "{workspace}/env/asim-ref-{reference_asim_version}" + asim_version: '{reference_asim_version}' + python_version: '{reference_python_version}' + +- name: activitysim.workflows.steps.py + in: + label: Identify first config dir + py: | + if isinstance(config_dirs, str): + first_config_dir = config_dirs + else: + first_config_dir = config_dirs[0] + save('first_config_dir') + +- description: Copy required reference settings + name: activitysim.workflows.steps.copy_files + in: + source_glob: + - "{workspace}/{example_name}/{first_config_dir}/legacy-{reference_asim_version}/*.yaml" + - "{workspace}/{example_name}/{first_config_dir}/legacy-{reference_asim_version}/*.csv" + dest_dir: "{workspace}/{example_name}/configs_reference" + +- name: activitysim.workflows.steps.py + in: + label: Allow resume_after in ref only if pipeline exists + # Older versions of ActivitySim choke on resume_after if pipeline is missing. + py: | + import os + ref_pipeline = f"{workspace}/{example_name}/output-{tag}/output-reference/pipeline.h5" + if os.path.exists(ref_pipeline): + resume_after_ref = resume_after + else: + resume_after_ref = None + save('resume_after_ref') + +- description: write.configs_reference + name: activitysim.workflows.steps.update_yaml + in: + updateYaml: + path: "{workspace}/{example_name}/configs_reference/{settings_file}" + payload: + inherit_settings: True + households_sample_size: '{main_n_households}' + trace_hh_id: '{trace_hh_id}' + trace_od: '{trace_od}' + resume_after: '{resume_after_ref}' + chunk_size: '{chunk_size}' + num_processes: '{num_processes}' + multiprocess: '{is_multiprocess}' + chunk_method: '{chunk_method}' + chunk_training_mode: '{chunk_training_mode}' + +- description: Run activitysim to evaluate reference model + name: activitysim.workflows.steps.run_subprocess + in: + resume_after: + pre_config_dirs: configs_reference + output_dir: 'output-{tag}/output-reference' + cwd: "{workspace}/{example_name}" + label: "{example_name} -- reference run" + conda_prefix: "../env/asim-ref-{reference_asim_version}" + + +################################################################################ +reporting: + +- name: activitysim.workflows.steps.contrast.load_tables + in: + common_output_directory: "{workspace}/{example_name}/output-{tag}" + databases: + sharrow: "output-sharrow" + legacy: "output-legacy" + reference: "output-reference" + tables: + households: + filename: final_households.csv + index_col: household_id + persons: + filename: final_persons.csv + index_col: person_id + tours: + filename: final_tours.csv + index_col: tour_id + trips: + filename: final_trips.csv + index_col: trip_id + land_use: + filename: final_land_use.csv + index_col: zone_id + +- name: activitysim.workflows.steps.contrast.load_skims + in: + common_directory: "{workspace}/{example_name}" + +- name: activitysim.workflows.steps.reporting.init_report + in: + title: "{example_name} report" + common_directory: "{workspace}/{example_name}" + +- name: activitysim.workflows.steps.reporting.machine_info + in: + caption: Machine Info + +- name: activitysim.workflows.steps.reporting.settings + in: + caption: Settings + names: + - disable_zarr + - resume_after + - instrument + - memory_profile + - fast + - chunk_method + - chunk_training_mode + - chunk_size + - multiprocess + + +#### Runtime and Data Inventory #### + +- name: activitysim.workflows.steps.contrast.runtime + in: + caption: Model Runtime + include_runs: + - reference + - legacy + - sharrow + +- name: activitysim.workflows.steps.reporting.section_title + in: + title: Memory Usage + +- name: activitysim.workflows.steps.contrast.memory_use + in: + caption: USS + caption_level: 3 + memory_measure: uss + include_runs: + - reference + - legacy + - sharrow + +- name: activitysim.workflows.steps.contrast.memory_use + in: + caption: RSS + caption_level: 3 + memory_measure: rss + include_runs: + - reference + - legacy + - sharrow + +- name: activitysim.workflows.steps.contrast.memory_use + in: + caption: Full RSS + caption_level: 3 + memory_measure: full_rss + include_runs: + - reference + - legacy + - sharrow + +- name: activitysim.workflows.steps.contrast.memory_use_peak + in: + caption: Peak RSS by Component + caption_level: 3 + memory_measure: rss + include_runs: + - reference + - legacy + - sharrow + +- name: activitysim.workflows.steps.contrast.memory_use_peak + in: + caption: Peak USS by Component + caption_level: 3 + memory_measure: uss + include_runs: + - reference + - legacy + - sharrow + + +- name: activitysim.workflows.steps.contrast.data_inventory + + +#### Workplace Location #### + +- name: activitysim.workflows.steps.reporting.load_data_dictionary + in: + cwd: "{workspace}/{example_name}" + +- name: pypyr.steps.default + in: + defaults: + workplace_zone_agg: + +- name: activitysim.workflows.steps.reporting.section_title + skip: !py "'work location' in report_skip" + run: '{workplace_zone_agg}' + in: + title: Workplace Location + +- name: activitysim.workflows.steps.contrast.district_to_district + skip: !py "'work location' in report_skip" + run: '{workplace_zone_agg}' + in: + tablename: persons + caption: '{workplace_zone_agg[caption]}' + caption_level: 3 + district_id: '{workplace_zone_agg[district_id]}' + orig_label: home district + dest_label: work district + orig_col: home_zone_id + dest_col: workplace_zone_id + filter: workplace_zone_id >= 0 + size_label: n_workers + +- name: activitysim.workflows.steps.contrast.transform_data + skip: !py "household_income is None" + in: + tablename: households + column: '{household_income}' + out: income_tertile + qcut: + q: 3 + labels: + - low + - mid + - high + +- name: activitysim.workflows.steps.contrast.join_table_data + skip: !py "household_income is None" + in: + caption: Join Income to Persons + tablename: persons + from_tablename: households + columns: income_tertile + on: household_id + +- name: activitysim.workflows.steps.contrast.trip_distance + skip: !py "'work location' in report_skip" + in: + caption: Workplace Distance by Income + caption_level: 3 + dist_bins: 20 + dist_skim_name: 'distance_to_work' + tablename: persons + grouping: income_tertile + +- name: activitysim.workflows.steps.contrast.trip_distance + skip: !py "'school location' in report_skip" + in: + caption: School Distance by Income + caption_level: 3 + dist_bins: 20 + dist_skim_name: 'distance_to_school' + tablename: persons + grouping: income_tertile + +#### Auto Ownership #### + +- name: activitysim.workflows.steps.reporting.section_title + skip: !py "'auto ownership' in report_skip" + in: + title: Auto Ownership + +- name: activitysim.workflows.steps.contrast.transform_data + skip: !py "'auto ownership' in report_skip" + in: + tablename: households + column: hhsize + out: hhsize_to5 + clip: + upper: 5 + +- name: activitysim.workflows.steps.contrast.nominal_choice + skip: !py "'auto ownership' in report_skip" + in: + caption: Household Auto Ownership Counts by Household Size + caption_level: 3 + tablename: households + nominal_col: auto_ownership + row_grouping: + field: hhsize_to5 + title: Household Size (up to 5) + col_grouping: + field: income_tertile + sort: [low, mid, high] + title: Income Tertile + ordinal: true + plot_type: count + axis_label: "# of Households" + +- name: activitysim.workflows.steps.contrast.nominal_choice + skip: !py "'auto ownership' in report_skip" + in: + caption: Household Auto Ownership Shares by Household Size + caption_level: 3 + tablename: households + nominal_col: auto_ownership + row_grouping: + field: hhsize_to5 + title: Household Size (up to 5) + col_grouping: + field: income_tertile + sort: [low, mid, high] + title: Income Tertile + ordinal: true + + +#### CDAP #### + +- name: activitysim.workflows.steps.reporting.section_title + skip: !py "'cdap' in report_skip" + in: + title: Coordinated Daily Activity Pattern + +- name: activitysim.workflows.steps.contrast.nominal_choice + skip: !py "'cdap' in report_skip" + in: + caption: Coordinated Daily Activity Pattern by Person Type + caption_level: 3 + tablename: persons + nominal_col: cdap_activity + row_grouping: + field: ptype + title: Person Type + axis_label: Daily Activity Pattern Count + plot_type: count + + +#### Tour Mode Choice #### + +- name: activitysim.workflows.steps.reporting.section_title + in: + title: Tour Mode Choice + +- name: activitysim.workflows.steps.contrast.nominal_choice + in: + caption: Tour Mode Choice by Primary Purpose + caption_level: 3 + tablename: tours + nominal_col: tour_mode + row_grouping: primary_purpose + axis_label: Tour Mode Share + +- name: activitysim.workflows.steps.contrast.nominal_choice + skip: !py "'tour mode by composition' in report_skip" + in: + caption: Tour Mode Choice by Composition + caption_level: 3 + tablename: tours + nominal_col: tour_mode + row_grouping: composition + axis_label: Tour Mode Share + + +#### Tour Schedule #### + +- name: activitysim.workflows.steps.reporting.section_title + in: + title: Tour Scheduling + +- name: activitysim.workflows.steps.contrast.ordinal_distribution + in: + caption: Tour Start Time by Primary Purpose + caption_level: 3 + tablename: tours + ordinal_col: start + facet_grouping: primary_purpose + plot_type: count + +- name: activitysim.workflows.steps.contrast.ordinal_distribution + in: + caption: Tour Duration by Primary Purpose + caption_level: 3 + tablename: tours + ordinal_col: duration + facet_grouping: primary_purpose + plot_type: count + + +#### Trip Mode Choice #### + +- name: activitysim.workflows.steps.reporting.section_title + in: + title: Trip Mode Choice + +- name: activitysim.workflows.steps.contrast.nominal_choice + in: + caption: Trip Mode Choice by Primary Purpose + caption_level: 3 + tablename: trips + nominal_col: trip_mode + row_grouping: primary_purpose + axis_label: Trip Mode Share + +- name: activitysim.workflows.steps.contrast.nominal_choice + in: + caption: Trip Mode Choice by Departure Time + caption_level: 3 + tablename: trips + nominal_col: trip_mode + row_grouping: depart + axis_label: Trip Mode Share + + +#### Trip Distance #### + +- name: activitysim.workflows.steps.reporting.section_title + in: + title: Trip Distance + +- name: activitysim.workflows.steps.contrast.attach_skim_data + in: + tablename: trips + otaz_col: origin + dtaz_col: destination + time_col: depart + skim_vars: '{distance_skim}' + +- name: activitysim.workflows.steps.contrast.transform_data + in: + tablename: trips + column: '{distance_skim}' + out: distance_to10_binned + censor: + left: 0 + right: 10 + cut: + bins: 20 + labels: midpoint + +- name: activitysim.workflows.steps.contrast.ordinal_distribution + in: + caption: Trip Distance by the Primary Purpose, <10 miles + caption_level: 3 + tablename: trips + ordinal_col: distance_to10_binned + facet_grouping: primary_purpose + plot_type: count + interpolate: step + value_format: "0.2f" + axis_label: Distance (to 10 miles) + +- name: activitysim.workflows.steps.contrast.trip_distance + in: + caption: Trip Distance by Primary Purpose, <10 miles + caption_level: 3 + grouping: primary_purpose + dist_bins: 20 + dist_skim_name: '{distance_skim}' + max_dist: 10 + +- name: activitysim.workflows.steps.contrast.trip_distance + in: + caption: Trip Distance by Primary Purpose + caption_level: 3 + grouping: primary_purpose + dist_bins: 20 + dist_skim_name: '{distance_skim}' + + +################################################################################ +report-save: +- name: activitysim.workflows.steps.reporting.save_report + in: + html_filename: "{workspace}/{example_name}/output-{tag}/report-{tag}.html" + copy_filename: '{copy_report_to}' diff --git a/activitysim/workflows/v1.3/mtc_mini.yaml b/activitysim/workflows/v1.3/mtc_mini.yaml new file mode 100644 index 000000000..f1eff7f80 --- /dev/null +++ b/activitysim/workflows/v1.3/mtc_mini.yaml @@ -0,0 +1,44 @@ +# +# mtc_mini +# +# This workflow runs the Prototype MTC model using mini skims (190 zones), +# in a single process runner. It is for mainly for rapid testing of the code +# and specification files for errors, not for policy analysis. +# + +context_parser: pypyr.parser.keyvaluepairs +steps: + +- description: Setting default workflow parameters + name: pypyr.steps.default + in: + defaults: + example_name: prototype_mtc_sf + workflow_name: v1.3/mtc_mini + workspace: workspace # this is the directory where model runs are stored + create: True + compile: False + sharrow: True + legacy: True + reference: True + tag: + resume_after: + fast: False + compile_n_households: 500 + main_n_households: 50000 + config_dirs: configs + data_dir: data + instrument: False + memory_profile: True + trace_hh_id: + trace_od: + workplace_zone_agg: + caption: Workplaces by County + district_id: county_id + distance_skim: DIST + household_income: income + +- name: activitysim.workflows.steps.pype + in: + pype: + name: v1.3/_contrast_runner diff --git a/conda-environments/activitysim-dev-base.yml b/conda-environments/activitysim-dev-base.yml index 407ae85a4..7a94ae4e1 100644 --- a/conda-environments/activitysim-dev-base.yml +++ b/conda-environments/activitysim-dev-base.yml @@ -75,4 +75,4 @@ dependencies: - zstandard - pip: - - autodoc_pydantic + - autodoc_pydantic >=1.9,<2.0 diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml index 6b29b2d6f..dd1437581 100644 --- a/conda-environments/activitysim-dev.yml +++ b/conda-environments/activitysim-dev.yml @@ -71,5 +71,5 @@ dependencies: - zstandard - pip: - - autodoc_pydantic + - autodoc_pydantic >=1.9,<2.0 - -e .. diff --git a/conda-environments/docbuild.yml b/conda-environments/docbuild.yml index d508c2134..9f8f8539b 100644 --- a/conda-environments/docbuild.yml +++ b/conda-environments/docbuild.yml @@ -36,7 +36,7 @@ dependencies: - platformdirs - psutil >= 4.1 - pyarrow >= 2.0 -- pydantic +- pydantic = 1.10.* - pypyr >= 5.3 - pytables >=3.7 - pytest @@ -56,5 +56,5 @@ dependencies: - zarr - pip: - - autodoc_pydantic + - autodoc_pydantic >=1.9,<2.0 - -e .. diff --git a/docs/_templates/autopydantic-inherits.rst b/docs/_templates/autopydantic-inherits.rst new file mode 100644 index 000000000..7f1268b39 --- /dev/null +++ b/docs/_templates/autopydantic-inherits.rst @@ -0,0 +1,7 @@ +{{ name | escape | underline}} + +.. currentmodule:: {{ module }} + +.. auto{{ objtype }}:: {{ objname }} + :inherited-members: BaseModel + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index 0b6af5034..f8ca579c6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,6 +47,7 @@ "sphinx_autosummary_accessors", "sphinx_remove_toctrees", "sphinx_copybutton", + "sphinx.ext.autosectionlabel", ] remove_from_toctrees = [ diff --git a/docs/core.rst b/docs/core.rst index 082db7bb9..687e8f956 100644 --- a/docs/core.rst +++ b/docs/core.rst @@ -108,6 +108,8 @@ API .. automodule:: activitysim.core.random :members: +.. _trace : + Tracing ~~~~~~~ @@ -127,7 +129,7 @@ API :members: -.. _expressions: +.. _util_expressions: Utility Expressions ------------------- diff --git a/docs/dev-guide/build-docs.md b/docs/dev-guide/build-docs.md new file mode 100644 index 000000000..dc8c5d67d --- /dev/null +++ b/docs/dev-guide/build-docs.md @@ -0,0 +1,49 @@ + +(write-docs)= +# Documentation + +The core documentation for ActivitySim is built with [Sphinx](https://www.sphinx-doc.org). +The input files for this documentation can be written either in +[markdown](https://www.markdownguide.org) with filenames ending in `.md` (preferred +for new documentation pages) or +[reStructuredText](http://docutils.sourceforge.net/rst.html) with filenames ending in `.rst`. +In addition to converting *.md and *.rst files +to html format, Sphinx can also read the inline Python docstrings and convert +them into html as well. ActivitySim's docstrings are written in +[numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard) format. + +## Building the Documentation + +Developers who want to test a build of the ActivitySim documentation locally can +do so using `sphinx`. A pre-packaged conda environment is available to simplify this +process. On the command line, starting from the `activitysim` directory that constitutes the +main repository (i.e. you should see subdirectories including `activitysim`, +`conda-environments`, `docs`, and a few others) run these commands: + +```bash +mkdir -p ../.env +mamba env update -p ../.env/DOCBUILD -f conda-environments/docbuild.yml +conda activate ../.env/DOCBUILD +cd docs +make clean +make html +``` + +This will build the docs in the `docs/_build/html` directory. They can be viewed +in a web browser using the `file:///` protocol, or by double-clicking on the +`index.html` file (or any other .html file in that directory). + +## Automatic Documentation Builds + +Documentation can also be rendered online automatically by GitHub. Several scripts +are included in this repository's GitHub Actions to do so when updates are made +to the `main` or `develop` branches in the primary `ActivitySim` repository. + +If you are working in a *fork* of the primary `ActivitySim/activitysim` repository, you +can generate test builds of the documentation by pushing a commit to your branch +with the tag `[makedocs]` in the commit message. Note to prevent conflicts this +only works on a fork, not within the primary `ActivitySim` repository, and only +on branches named something other than `develop`. The documentation will then be +published on your own subdomain. For example, if your fork is `tacocat/activitysim`, +and you are working on the `featuring-cilantro` branch, the GitHub will render your +documentation build at `https://tacocat.github.io/activitysim/featuring-cilantro`. diff --git a/docs/dev-guide/component-configs.md b/docs/dev-guide/component-configs.md new file mode 100644 index 000000000..0fd1a9705 --- /dev/null +++ b/docs/dev-guide/component-configs.md @@ -0,0 +1,20 @@ +(component-config)= +# Component Configuration + +Individual components each have their own component-level configuration. These +configuration can include custom component-specific settings, as well as groups +of settings from these boilerplate base classes: + +```{eval-rst} +.. currentmodule:: activitysim.core.configuration +.. autosummary:: + :toctree: _generated + :template: autopydantic-inherits.rst + :recursive: + + ~base.PydanticReadable + ~base.PreprocessorSettings + ~logit.LogitComponentSettings + ~logit.TemplatedLogitComponentSettings + ~logit.LogitNestSpec +``` diff --git a/docs/dev-guide/components/accessibility.md b/docs/dev-guide/components/accessibility.md new file mode 100644 index 000000000..32c97d4bc --- /dev/null +++ b/docs/dev-guide/components/accessibility.md @@ -0,0 +1,69 @@ +(component-accessibility)= +# Accessibility + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.accessibility +``` + +The accessibilities model is an aggregate model that calculates multiple origin-based accessibility +measures by origin zone to all destination zones. + +The accessibility measure first multiplies an employment variable by a mode-specific decay function. The +product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) +the jobs are from the location in question. The products to each destination zone are next summed over +each origin zone, and the logarithm of the product mutes large differences. The decay function on +the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. + +Level-of-service variables from three time periods are used, specifically the AM peak period (6 am to 10 am), the +midday period (10 am to 3 pm), and the PM peak period (3 pm to 7 pm). + +*Inputs* + +* Highway skims for the three periods. Each skim is expected to include a table named "TOLLTIMEDA", which is the drive alone in-vehicle travel time for automobiles willing to pay a "value" (time-savings) toll. +* Transit skims for the three periods. Each skim is expected to include the following tables: (i) "IVT", in-vehicle time; (ii) "IWAIT", initial wait time; (iii) "XWAIT", transfer wait time; (iv) "WACC", walk access time; (v) "WAUX", auxiliary walk time; and, (vi) "WEGR", walk egress time. +* Zonal data with the following fields: (i) "TOTEMP", total employment; (ii) "RETEMPN", retail trade employment per the NAICS classification. + +*Outputs* + +* taz, travel analysis zone number +* autoPeakRetail, the accessibility by automobile during peak conditions to retail employment for this TAZ +* autoPeakTotal, the accessibility by automobile during peak conditions to all employment +* autoOffPeakRetail, the accessibility by automobile during off-peak conditions to retail employment +* autoOffPeakTotal, the accessibility by automobile during off-peak conditions to all employment +* transitPeakRetail, the accessibility by transit during peak conditions to retail employment +* transitPeakTotal, the accessibility by transit during peak conditions to all employment +* transitOffPeakRetail, the accessiblity by transit during off-peak conditions to retail employment +* transitOffPeakTotal, the accessiblity by transit during off-peak conditions to all employment +* nonMotorizedRetail, the accessibility by walking during all time periods to retail employment +* nonMotorizedTotal, the accessibility by walking during all time periods to all employment + +The main interface to the accessibility model is the +[compute_accessibility](activitysim.abm.models.accessibility.compute_accessibility) +function. This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `accessibility.yaml` +- *Core Table*: `skims` +- *Result Table*: `accessibility` + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: AccessibilitySettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/accessibility.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/accessibility.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: compute_accessibility +``` diff --git a/docs/dev-guide/components/atwork_subtour_destination.md b/docs/dev-guide/components/atwork_subtour_destination.md new file mode 100644 index 000000000..8c08193a2 --- /dev/null +++ b/docs/dev-guide/components/atwork_subtour_destination.md @@ -0,0 +1,46 @@ +(component-atwork-subtour-destination)= +# At-work Subtours Destination Choice + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.atwork_subtour_destination +``` + +The at-work subtours destination choice model is made up of three model steps: + + * sample - selects a sample of alternative locations for the next model step. This selects X locations from the full set of model zones using a simple utility. + * logsums - starts with the table created above and calculates and adds the mode choice logsum expression for each alternative location. + * simulate - starts with the table created above and chooses a final location, this time with the mode choice logsum included. + +At-work subtour location choice for [multiple_zone_systems](multiple_zone_systems) models uses [presampling](presampling) by default. + +The main interface to the at-work subtour destination model is the +[atwork_subtour_destination](ctivitysim.abm.models.atwork_subtour_destination.atwork_subtour_destination) +function. This function is registered as an Inject step in the example Pipeline. +[writing_logsums](writing_logsums) for how to write logsums for estimation. + +## Structure + +- *Configuration File*: `atwork_subtour_destination.yaml` +- *Core Table*: `tours` +- *Result Field*: `destination` +- *Skims keys*: `workplace_taz, alt_dest, MD time period` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TourLocationComponentSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/atwork_subtour_destination.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/atwork_subtour_destination.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: atwork_subtour_destination +``` diff --git a/docs/dev-guide/components/atwork_subtour_frequency.md b/docs/dev-guide/components/atwork_subtour_frequency.md new file mode 100644 index 000000000..0b0b4c78a --- /dev/null +++ b/docs/dev-guide/components/atwork_subtour_frequency.md @@ -0,0 +1,48 @@ +(component-atwork-subtour-frequency)= +# At-work Subtours Frequency + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.atwork_subtour_frequency +``` + +The at-work subtour frequency model selects the number of at-work subtours made for each work tour. +It also creates at-work subtours by adding them to the tours table in the data pipeline. +These at-work sub-tours are travel tours taken during the workday with their origin at the work +location, rather than from home. Explanatory variables include employment status, +income, auto ownership, the frequency of other tours, characteristics of the parent work tour, and +characteristics of the workplace zone. + +Choosers: work tours +Alternatives: none, 1 eating out tour, 1 business tour, 1 maintenance tour, 2 business tours, 1 eating out tour + 1 business tour +Dependent tables: household, person, accessibility +Outputs: work tour subtour frequency choice, at-work tours table (with only tour origin zone at this point) + +The main interface to the at-work subtours frequency model is the +[atwork_subtour_frequency](activitysim.abm.models.atwork_subtour_frequency.atwork_subtour_frequency) +function. This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `atwork_subtour_frequency.yaml` +- *Core Table*: `tours` +- *Result Field*: `atwork_subtour_frequency` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: AtworkSubtourFrequencySettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/atwork_subtour_destination.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/atwork_subtour_destination.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: atwork_subtour_frequency +``` diff --git a/docs/dev-guide/components/atwork_subtour_mode_choice.md b/docs/dev-guide/components/atwork_subtour_mode_choice.md new file mode 100644 index 000000000..a3038badc --- /dev/null +++ b/docs/dev-guide/components/atwork_subtour_mode_choice.md @@ -0,0 +1,41 @@ +(component-atwork-subtour-mode-choice)= +# At-work Subtour Mode + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.atwork_subtour_mode_choice +``` + +The at-work subtour mode choice model assigns a travel mode to each at-work subtour using the `tour_mode_choice` model. + +The main interface to the at-work subtour mode choice model is the +[atwork_subtour_mode_choice](activitysim.abm.models.atwork_subtour_mode_choice.atwork_subtour_mode_choice) +function. This function is called in the Inject step `atwork_subtour_mode_choice` and +is registered as an Inject step in the example Pipeline. +[writing_logsums](writing_logsums) for how to write logsums for estimation. + +## Structure + +- *Configuration File*: `tour_mode_choice.yaml` +- *Core Table*: `tour` +- *Result Field*: `tour_mode` +- *Skims keys*: `workplace_taz, destination, start, end` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TourModeComponentSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/tour_mode_choice.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/tour_mode_choice.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: atwork_subtour_mode_choice +``` diff --git a/docs/dev-guide/components/atwork_subtour_scheduling.md b/docs/dev-guide/components/atwork_subtour_scheduling.md new file mode 100644 index 000000000..bac01be00 --- /dev/null +++ b/docs/dev-guide/components/atwork_subtour_scheduling.md @@ -0,0 +1,50 @@ +(component-atwork-subtour-scheduling)= +# At-work Subtour Scheduling + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.atwork_subtour_scheduling +``` + +The at-work subtours scheduling model selects a tour departure and duration period (and therefore a start and end +period as well) for each at-work subtour. This model uses person `time_windows`. + +This model is the same as the mandatory tour scheduling model except it operates on the at-work tours and +constrains the alternative set to available person `time_windows`. The at-work subtour scheduling model does not use mode choice logsums. +The at-work subtour frequency model can choose multiple tours so this model must process all first tours and then second +tours since isFirstAtWorkTour is an explanatory variable. + +Choosers: at-work tours +Alternatives: alternative departure time and arrival back at origin time pairs WITHIN the work tour departure time and arrival time back at origin AND the person time window. If no time window is available for the tour, make the first and last time periods within the work tour available, make the choice, and log the number of times this occurs. +Dependent tables: skims, person, land use, work tour +Outputs: at-work tour departure time and arrival back at origin time, updated person time windows + +The main interface to the at-work subtours scheduling model is the +[atwork_subtour_scheduling](activitysim.abm.models.atwork_subtour_scheduling.atwork_subtour_scheduling) +function. This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `tour_scheduling_atwork.yaml` +- *Core Table*: `tours` +- *Result Field*: `start, end, duration` +- *Skims keys*: `workplace_taz, alt_dest, MD time period, MD time period` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TourSchedulingSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/tour_scheduling_atwork.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/tour_scheduling_atwork.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: atwork_subtour_scheduling +``` diff --git a/docs/dev-guide/components/auto_ownership.md b/docs/dev-guide/components/auto_ownership.md new file mode 100644 index 000000000..5c2dd6c9f --- /dev/null +++ b/docs/dev-guide/components/auto_ownership.md @@ -0,0 +1,36 @@ +(component-auto-ownership)= +# Auto Ownership + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.auto_ownership +``` + +The auto ownership model selects a number of autos for each household in the simulation. +The primary model components are household demographics, zonal density, and accessibility. + +## Structure + +- *Configuration File*: `auto_ownership.yaml` +- *Core Table*: `households` +- *Result Field*: `auto_owenership` + +This model is typically structured as multinomial logit model. + +## Configuration + +```{eval-rst} +.. autopydantic_model:: AutoOwnershipSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/auto_ownership.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/auto_ownership.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: auto_ownership_simulate +``` diff --git a/docs/dev-guide/components/cdap.md b/docs/dev-guide/components/cdap.md new file mode 100644 index 000000000..63e665543 --- /dev/null +++ b/docs/dev-guide/components/cdap.md @@ -0,0 +1,53 @@ +(component-cdap)= +# Coordinated Daily Activity Pattern + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.cdap +``` + +The Coordinated Daily Activity Pattern (CDAP) model predicts the choice of daily activity pattern (DAP) +for each member in the household, simultaneously. The DAP is categorized in to three types as +follows: +* Mandatory: the person engages in travel to at least one out-of-home mandatory activity - work, university, or school. The mandatory pattern may also include non-mandatory activities such as separate home-based tours or intermediate stops on mandatory tours. +* Non-mandatory: the person engages in only maintenance and discretionary tours, which, by definition, do not contain mandatory activities. +* Home: the person does not travel outside the home. + +The CDAP model is a sequence of vectorized table operations: + +* create a person level table and rank each person in the household for inclusion in the CDAP model. Priority is given to full time workers (up to two), then to part time workers (up to two workers, of any type), then to children (youngest to oldest, up to three). Additional members up to five are randomly included for the CDAP calculation. +* solve individual M/N/H utilities for each person +* take as input an interaction coefficients table and then programmatically produce and write out the expression files for households size 1, 2, 3, 4, and 5 models independent of one another +* select households of size 1, join all required person attributes, and then read and solve the automatically generated expressions +* repeat for households size 2, 3, 4, and 5. Each model is independent of one another. + +The main interface to the CDAP model is the [run_cdap](activitysim.abm.models.util.cdap.run_cdap) +function. This function is called by the Inject step `cdap_simulate` which is +registered as an Inject step in the example Pipeline. There are two cdap class definitions in +ActivitySim. The first is at [cdap](activitysim.abm.models.cdap) and contains the Inject +wrapper for running it as part of the model pipeline. The second is +at [cdap](activitysim.abm.models.util.cdap) and contains CDAP model logic. + +## Structure + +- *Configuration File*: `cdap.yaml` +- *Core Table*: `persons` +- *Result Field*: `cdap_activity` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: CdapSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/cdap.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/cdap.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: cdap_simulate +``` diff --git a/docs/dev-guide/components/disaggregate_accessibility.md b/docs/dev-guide/components/disaggregate_accessibility.md new file mode 100644 index 000000000..7705f4b9c --- /dev/null +++ b/docs/dev-guide/components/disaggregate_accessibility.md @@ -0,0 +1,77 @@ +(component-disaggregate-accessibility)= +# Disaggregate Accessibility + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.disaggregate_accessibility +``` + +The disaggregate accessibility model is an extension of the base accessibility model. +While the base accessibility model is based on a mode-specific decay function and uses fixed market +segments in the population (i.e., income), the disaggregate accessibility model extracts the actual +destination choice logsums by purpose (i.e., mandatory fixed school/work location and non-mandatory +tour destinations by purpose) from the actual model calculations using a user-defined proto-population. +This enables users to include features that may be more critical to destination +choice than just income (e.g., automobile ownership). + +## Structure + +*Inputs* + * disaggregate_accessibility.yaml - Configuration settings for disaggregate accessibility model. + * annotate.csv [optional] - Users can specify additional annotations specific to disaggregate accessibility. For example, annotating the proto-population tables. + +*Outputs* + * final_disaggregate_accessibility.csv [optional] + * final_non_mandatory_tour_destination_accesibility.csv [optional] + * final_workplace_location_accessibility.csv [optional] + * final_school_location_accessibility.csv [optional] + * final_proto_persons.csv [optional] + * final_proto_households.csv [optional] + * final_proto_tours.csv [optional] + +The above tables are created in the model pipeline, but the model will not save +any outputs unless specified in settings.yaml - output_tables. Users can return +the proto population tables for inspection, as well as the raw logsum accessibilities +for mandatory school/work and non-mandatory destinations. The logsums are then merged +at the household level in final_disaggregate_accessibility.csv, which each tour purpose +logsums shown as separate columns. + +*Usage* + +The disaggregate accessibility model is run as a model step in the model list. +There are two necessary steps: + +* `initialize_proto_population` +* `compute_disaggregate_accessibility` + +The reason the steps must be separate is to enable multiprocessing. +The proto-population must be fully generated and initialized before activitysim +slices the tables into separate threads. These steps must also occur before +initialize_households in order to avoid conflict with the shadow_pricing model. + +The model steps can be run either as part the activitysim model run, or setup +to run as a standalone run to pre-computing the accessibility values. +For standalone implementations, the final_disaggregate_accessibility.csv is read +into the pipeline and initialized with the initialize_household model step. + +- *Configuration File*: `disaggregate_accessibility.yaml` +- *Core Table*: Users define the variables to be generated for 'PROTO_HOUSEHOLDS', 'PROTO_PERSONS', and 'PROTO_TOURS' tables. These tables must include all basic fields necessary for running the actual model. Additional fields can be annotated in pre-processing using the annotation settings of this file. + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: DisaggregateAccessibilitySettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC_Extended](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc_extended/configs/disaggregate_accessibility.yaml) +- [Placeholder_SANDAG_2_Zone](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/placeholder_sandag/test/configs_2_zone/disaggregate_accessibility.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: disaggregate_accessibility +``` diff --git a/docs/dev-guide/components/free_parking.md b/docs/dev-guide/components/free_parking.md new file mode 100644 index 000000000..b1bc9de10 --- /dev/null +++ b/docs/dev-guide/components/free_parking.md @@ -0,0 +1,40 @@ +(component-free-parking)= +# Free Parking Eligibility + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.free_parking +``` + +The Free Parking Eligibility model predicts the availability of free parking at a person's +workplace. It is applied for people who work in zones that have parking charges, which are +generally located in the Central Business Districts. The purpose of the model is to adequately +reflect the cost of driving to work in subsequent models, particularly in mode choice. + +## Structure + +- *Configuration File*: `free_parking.yaml` +- *Core Table*: `persons` +- *Result Field*: `free_parking_at_work` + +This model generates only True or False outcomes, and is structured as a binary +logit model. + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: FreeParkingSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/free_parking.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/free_parking.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: free_parking +``` diff --git a/docs/dev-guide/components/index.rst b/docs/dev-guide/components/index.rst index e4ffabfb0..9329f3b0d 100644 --- a/docs/dev-guide/components/index.rst +++ b/docs/dev-guide/components/index.rst @@ -1,12 +1,51 @@ +.. _dev_components : + ========== Components ========== .. toctree:: - :maxdepth: 1 - + :maxdepth: 1 + initialize + initialize_los + initialize_tours + accessibility + auto_ownership + vehicle_type_choice + telecommute_frequency + cdap + mandatory_tour_frequency + school_escorting + joint_tour_composition + joint_tour_participation + joint_tour_destination + joint_tour_scheduling + non_mandatory_tour_frequency + non_mandatory_destination + non_mandatory_scheduling + mandatory_scheduling + disaggregate_accessibility + free_parking + school_location_choice + transit_pass_ownership + transit_pass_subsidy trip_destination - + work_from_home + work_location_choice + tour_mode_choice + atwork_subtour_frequency + atwork_subtour_destination + atwork_subtour_scheduling + atwork_subtour_mode_choice + stop_frequency + trip_purpose + trip_destination + trip_purpose_and_destination + trip_scheduling_choice + trip_departure_choice + trip_mode_choice + parking_location_choice + write_trip_matrices .. note:: diff --git a/docs/dev-guide/components/initialize.md b/docs/dev-guide/components/initialize.md new file mode 100644 index 000000000..90bba4a35 --- /dev/null +++ b/docs/dev-guide/components/initialize.md @@ -0,0 +1,25 @@ +(component-initialize)= +# Initialize + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.initialize +``` + +The initialize model isn't really a model, but rather a few data processing steps in the data pipeline. +The initialize data processing steps code variables used in downstream models, such as household and person +value-of-time. This step also pre-loads the land_use, households, persons, and person_windows tables because +random seeds are set differently for each step and therefore the sampling of households depends on which step +they are initially loaded in. + +The main interface to the initialize land use step is the [initialize_landuse](activitysim.abm.models.initialize.initialize_landuse) +function. The main interface to the initialize household step is the [initialize_households](activitysim.abm.models.initialize.initialize_households) +function. The main interface to the initialize tours step is the [initialize_tours](activitysim.abm.models.initialize_tours.initialize_tours) +function. These functions are registered as Inject steps in the example Pipeline. + + +## Implementation + +```{eval-rst} +.. autofunction:: initialize_landuse +.. autofunction:: initialize_households +``` diff --git a/docs/dev-guide/components/initialize_los.md b/docs/dev-guide/components/initialize_los.md new file mode 100644 index 000000000..98f0363c6 --- /dev/null +++ b/docs/dev-guide/components/initialize_los.md @@ -0,0 +1,25 @@ +(component-initialize-los)= +# Initialize LOS + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.initialize_los +``` + +The initialize LOS model isn't really a model, but rather a series of data processing steps in the data pipeline. +The initialize LOS model does two things: + + * Loads skims and cache for later if desired + * Loads network LOS inputs for transit virtual path building (see [transit_virtual_path_builder](transit_virtual_path_builder), pre-computes tap-to-tap total utilities and cache for later if desired + +The main interface to the initialize LOS step is the [initialize_los](activitysim.abm.models.initialize_los.initialize_los) +function. The main interface to the initialize TVPB step is the [initialize_tvpb](activitysim.abm.models.initialize_los.initialize_tvpb) +function. These functions are registered as Inject steps in the example Pipeline. + + +## Implementation + +```{eval-rst} +.. autofunction:: initialize_los +.. autofunction:: compute_utilities_for_attribute_tuple +.. autofunction:: initialize_tvpb +``` diff --git a/docs/dev-guide/components/initialize_tours.md b/docs/dev-guide/components/initialize_tours.md new file mode 100644 index 000000000..a05ec127f --- /dev/null +++ b/docs/dev-guide/components/initialize_tours.md @@ -0,0 +1,15 @@ +(component-initialize-tours)= +# Initialize Tours + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.initialize_tours +``` + + + + +## Implementation + +```{eval-rst} +.. autofunction:: initialize_tours +``` diff --git a/docs/dev-guide/components/joint_tour_composition.md b/docs/dev-guide/components/joint_tour_composition.md new file mode 100644 index 000000000..980bd38a3 --- /dev/null +++ b/docs/dev-guide/components/joint_tour_composition.md @@ -0,0 +1,42 @@ +(component-joint-tour-composition)= +# Joint Tour Composition + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.joint_tour_composition +``` +In the joint tour party composition model, the makeup of the travel party (adults, children, or +mixed - adults and children) is determined for each joint tour. The party composition determines the +general makeup of the party of participants in each joint tour in order to allow the micro-simulation +to faithfully represent the prevalence of adult-only, children-only, and mixed joint travel tours +for each purpose while permitting simplicity in the subsequent person participation model. + +The main interface to the joint tour composition model is the +[joint_tour_composition](activitysim.abm.models.joint_tour_composition.joint_tour_composition) +function. This function is registered as an Inject step in the example Pipeline. + + +## Structure + +- *Configuration File*: `joint_tour_composition.yaml` +- *Core Table*: `tours` +- *Result Field*: `composition` + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: JointTourCompositionSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/free_parking.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/joint_tour_composition.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: joint_tour_composition +``` diff --git a/docs/dev-guide/components/joint_tour_destination.md b/docs/dev-guide/components/joint_tour_destination.md new file mode 100644 index 000000000..21700fdd0 --- /dev/null +++ b/docs/dev-guide/components/joint_tour_destination.md @@ -0,0 +1,60 @@ +(component-joint-tour-destination)= +# Joint Tour Destination + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.joint_tour_destination +``` + +The joint tour destination choice model operate similarly to the usual work and +school location choice model, selecting the primary destination for travel tours. The only +procedural difference between the models is that the usual work and school location choice +model selects the usual location of an activity whether or not the activity is undertaken during the +travel day, while the joint tour destination choice model selects the location for an +activity which has already been generated. + +The tour's primary destination is the location of the activity that is assumed to provide the greatest +impetus for engaging in the travel tour. In the household survey, the primary destination was not asked, but +rather inferred from the pattern of stops in a closed loop in the respondents' travel diaries. The +inference was made by weighing multiple criteria including a defined hierarchy of purposes, the +duration of activities, and the distance from the tour origin. The model operates in the reverse +direction, designating the primary purpose and destination and then adding intermediate stops +based on spatial, temporal, and modal characteristics of the inbound and outbound journeys to +the primary destination. + +The joint tour destination choice model is made up of three model steps: + * sample - selects a sample of alternative locations for the next model step. This selects X locations from the full set of model zones using a simple utility. + * logsums - starts with the table created above and calculates and adds the mode choice logsum expression for each alternative location. + * simulate - starts with the table created above and chooses a final location, this time with the mode choice logsum included. + +Joint tour location choice for [multiple_zone_systems](multiple_zone_systems) models uses [presampling](presampling) by default. + +The main interface to the model is the [joint_tour_destination](activitysim.abm.models.joint_tour_destination.joint_tour_destination) +function. This function is registered as an Inject step in the example Pipeline. See [writing_logsums](writing_logsums) for how +to write logsums for estimation. + +## Structure + +- *Configuration File*: `joint_tour_destination.yaml` +- *Core Table*: `tours` +- *Result Field*: `destination` +- *Skims Keys*: `TAZ, alt_dest, MD time period` + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TourLocationComponentSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MWCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mwcog/configs/joint_tour_destination.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/joint_tour_destination.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: joint_tour_destination +``` diff --git a/docs/dev-guide/components/joint_tour_participation.md b/docs/dev-guide/components/joint_tour_participation.md new file mode 100644 index 000000000..b93d5d33b --- /dev/null +++ b/docs/dev-guide/components/joint_tour_participation.md @@ -0,0 +1,48 @@ +(component-joint-tour-participation)= +# Joint Tour Participation + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.joint_tour_participation +``` +In the joint tour person participation model, each eligible person sequentially makes a +choice to participate or not participate in each joint tour. Since the party composition model +determines what types of people are eligible to join a given tour, the person participation model +can operate in an iterative fashion, with each household member choosing to join or not to join +a travel party independent of the decisions of other household members. In the event that the +constraints posed by the result of the party composition model are not met, the person +participation model cycles through the household members multiple times until the required +types of people have joined the travel party. + +This step also creates the ``joint_tour_participants`` table in the pipeline, which stores the +person ids for each person on the tour. + +The main interface to the joint tour participation model is the +[joint_tour_participation](activitysim.abm.models.joint_tour_participation.joint_tour_participation) +function. This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `joint_tour_participation.yaml` +- *Core Table*: `tours` +- *Result Field*: `number_of_participants, person_id (for the point person)` + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: JointTourParticipationSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/joint_tour_participation.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/joint_tour_participation.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: joint_tour_participation +.. autofunction:: participants_chooser +``` diff --git a/docs/dev-guide/components/joint_tour_scheduling.md b/docs/dev-guide/components/joint_tour_scheduling.md new file mode 100644 index 000000000..6a9fa2190 --- /dev/null +++ b/docs/dev-guide/components/joint_tour_scheduling.md @@ -0,0 +1,46 @@ +(component-joint-tour-scheduling)= +# Joint Tour Scheduling + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.joint_tour_scheduling +``` + +The joint tour scheduling model selects a tour departure and duration period (and therefore a start and end +period as well) for each joint tour. This model uses person [time_windows](time_windows). The primary drivers in the +models are accessibility-based parameters such +as the auto travel time for the departure/arrival hour combination, demographics, and time +pattern characteristics such as the time windows available from previously scheduled tours. +The joint tour scheduling model does not use mode choice logsums. + +The main interface to the joint tour purpose scheduling model is the +[joint_tour_scheduling](activitysim.abm.models.joint_tour_scheduling.joint_tour_scheduling) +function. This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `joint_tour_scheduling.yaml` +- *Core Table*: `tours` +- *Result Field*: `start, end, duration` +- *Skims Keys*: ` TAZ, destination, MD time period, MD time period` + + + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TourSchedulingSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/joint_tour_scheduling.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/joint_tour_scheduling.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: joint_tour_scheduling +``` diff --git a/docs/dev-guide/components/mandatory_scheduling.md b/docs/dev-guide/components/mandatory_scheduling.md new file mode 100644 index 000000000..7780bc81f --- /dev/null +++ b/docs/dev-guide/components/mandatory_scheduling.md @@ -0,0 +1,52 @@ +(component-mandatory-scheduling)= +# Mandatory Tour Scheduling + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.mandatory_scheduling +``` + +The mandatory tour scheduling model selects a tour departure and duration period (and therefore a +start and end period as well) for each mandatory tour. The primary drivers in the model are +accessibility-based parameters such as the mode choice logsum for the departure/arrival hour +combination, demographics, and time pattern characteristics such as the time windows available +from previously scheduled tours. This model uses person :ref:`time_windows`. + +```{note} +For `prototype_mtc`, the modeled time periods for all submodels are hourly from 3 am to +3 am the next day, and any times before 5 am are shifted to time period 5, and any times +after 11 pm are shifted to time period 23. +``` + +If ``tour_departure_and_duration_segments.csv`` is included in the configs, then the model +will use these representative start and end time periods when calculating mode choice logsums +instead of the specific start and end combinations for each alternative to reduce runtime. This +feature, know as ``representative logsums``, takes advantage of the fact that the mode choice logsum, +say, from 6 am to 2 pm is very similar to the logsum from 6 am to 3 pm, and 6 am to 4 pm, and so using +just 6 am to 3 pm (with the idea that 3 pm is the "representative time period") for these alternatives is +sufficient for tour scheduling. By reusing the 6 am to 3 pm mode choice logsum, ActivitySim saves +significant runtime. + +The main interface to the mandatory tour purpose scheduling model is the +[mandatory_tour_scheduling](activitysim.abm.models.mandatory_scheduling.mandatory_tour_scheduling) +function. This function is registered as an Inject step in the example Pipeline + +## Structure + +- *Configuration File*: `mandatory_tour_scheduling.yaml` +- *Core Table*: `tours` +- *Result Field*: `start`,`end`,`duration` +- *Skim Keys*: `TAZ`,`workplace_taz`,`school_taz`,`start`,`end` + +This model generates only True or False outcomes, and is structured as a binary +logit model. + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/mandatory_tour_scheduling.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/mandatory_tour_scheduling.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: mandatory_tour_scheduling +``` diff --git a/docs/dev-guide/components/mandatory_tour_frequency.md b/docs/dev-guide/components/mandatory_tour_frequency.md new file mode 100644 index 000000000..2a5e61706 --- /dev/null +++ b/docs/dev-guide/components/mandatory_tour_frequency.md @@ -0,0 +1,44 @@ +(component-mandatory-tour-frequency)= +# Mandatory Tour Frequency + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.mandatory_tour_frequency +``` + +The individual mandatory tour frequency model predicts the number of work and school tours +taken by each person with a mandatory DAP. The primary drivers of mandatory tour frequency +are demographics, accessibility-based parameters such as drive time to work, and household +automobile ownership. It also creates mandatory tours in the data pipeline. + +The main interface to the mandatory tour purpose frequency model is the +[mandatory_tour_frequency](activitysim.abm.models.mandatory_tour_frequency.mandatory_tour_frequency) +function. This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `mandatory_tour_frequency.yaml` +- *Core Table*: `persons` +- *Result Field*: `mandatory_tour_frequency` + +This model generates only True or False outcomes, and is structured as a binary +logit model. + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: MandatoryTourFrequencySettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/mandatory_tour_frequency.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/mandatory_tour_frequency.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: mandatory_tour_frequency +``` diff --git a/docs/dev-guide/components/non_mandatory_destination.md b/docs/dev-guide/components/non_mandatory_destination.md new file mode 100644 index 000000000..11ce041fe --- /dev/null +++ b/docs/dev-guide/components/non_mandatory_destination.md @@ -0,0 +1,43 @@ +(component-non-mandatory-destination)= +# Non-Mandatory Destination Choice + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.non_mandatory_destination +``` + +The non-mandatory tour destination choice model chooses a destination zone for +non-mandatory tours. The three step (sample, logsums, final choice) process also used for +mandatory tour destination choice is used for non-mandatory tour destination choice. + +Non-mandatory tour location choice for [multiple_zone_systems](multiple_zone_systems) models uses [presampling](presampling) by default. + +The main interface to the non-mandatory tour destination choice model is the +[non_mandatory_tour_destination](activitysim.abm.models.non_mandatory_destination.non_mandatory_tour_destination) +function. This function is registered as an Inject step in the example Pipeline. See :ref:`writing_logsums` +for how to write logsums for estimation. + +## Structure + +- *Configuration File*: `non_mandatory_tour_destination.yaml` +- *Core Table*: `tours` +- *Result Field*: `destination` +- *Skims Keys*: `TAZ, alt_dest, MD time period, MD time period` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TourLocationComponentSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/non_mandatory_tour_destination.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/non_mandatory_tour_destination.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: non_mandatory_tour_destination +``` diff --git a/docs/dev-guide/components/non_mandatory_scheduling.md b/docs/dev-guide/components/non_mandatory_scheduling.md new file mode 100644 index 000000000..3c73bdb78 --- /dev/null +++ b/docs/dev-guide/components/non_mandatory_scheduling.md @@ -0,0 +1,30 @@ +(component-non-mandatory-scheduling)= +# Non-Mandatory Tour Scheduling + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.non_mandatory_scheduling +``` +The non-mandatory tour scheduling model selects a tour departure and duration period (and therefore a start and end +period as well) for each non-mandatory tour. This model uses person [time_windows](time_windows). Includes support +for [representative_logsums](representative_logsums). + +The main interface to the non-mandatory tour purpose scheduling model is the +[non_mandatory_tour_scheduling](activitysim.abm.models.non_mandatory_scheduling.non_mandatory_tour_scheduling) +function. This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `non_mandatory_tour_scheduling.yaml` +- *Core Table*: `tours` +- *Result Field*: `start, end, duration` +- *Skims Keys*: `TAZ, destination, MD time period, MD time period` + +### Examples + +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/non_mandatory_tour_scheduling.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: non_mandatory_tour_scheduling +``` diff --git a/docs/dev-guide/components/non_mandatory_tour_frequency.md b/docs/dev-guide/components/non_mandatory_tour_frequency.md new file mode 100644 index 000000000..c4bd957de --- /dev/null +++ b/docs/dev-guide/components/non_mandatory_tour_frequency.md @@ -0,0 +1,42 @@ +(component-non-mandatory-tour-frequency)= +# Non-Mandatory Tour Frequency + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.non_mandatory_tour_frequency +``` + +The non-mandatory tour frequency model selects the number of non-mandatory tours made by each person on the simulation day. +It also adds non-mandatory tours to the tours in the data pipeline. The individual non-mandatory tour frequency model +operates in two stages: + + * A choice is made using a random utility model between combinations of tours containing zero, one, and two or more escort tours, and between zero and one or more tours of each other purpose. + * Up to two additional tours of each purpose are added according to fixed extension probabilities. + +The main interface to the non-mandatory tour purpose frequency model is the +[non_mandatory_tour_frequency](activitysim.abm.models.non_mandatory_tour_frequency.non_mandatory_tour_frequency) +function. This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `non_mandatory_tour_frequency.yaml` +- *Core Table*: `persons` +- *Result Field*: `non_mandatory_tour_frequency` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: NonMandatoryTourFrequencySettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/non_mandatory_tour_frequency.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/non_mandatory_tour_frequency.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: non_mandatory_tour_frequency +``` diff --git a/docs/dev-guide/components/parking_location_choice.md b/docs/dev-guide/components/parking_location_choice.md new file mode 100644 index 000000000..05b6e2a3f --- /dev/null +++ b/docs/dev-guide/components/parking_location_choice.md @@ -0,0 +1,78 @@ +(component-parking-location-choice)= +# Parking Location Choice + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.parking_location_choice +``` + +The parking location choice model selects a parking location for specified trips. While the model does not +require parking location be applied to any specific set of trips, it is usually applied for drive trips to +specific zones (e.g., CBD) in the model. + +The model provides provides a filter for both the eligible choosers and eligible parking location zone. The +trips dataframe is the chooser of this model. The zone selection filter is applied to the land use zones +dataframe. + +If this model is specified in the pipeline, the `Write Trip Matrices`_ step will using the parking location +choice results to build trip tables in lieu of the trip destination. + +The main interface to the trip mode choice model is the +[parking_location_choice](activitysim.abm.models.parking_location_choice.parking_location_choice) function. This function +is registered as an Inject step, and it is available from the pipeline. + +## Structure + +- *Configuration File*: `parking_location_choice.yaml` +- *Core Table*: `trips` +- *Result*: `omx trip matrices` +- *Skims*: `odt_skims: Origin to Destination by Time of Day`, `dot_skims: Destination to Origin by Time of Day`, +`opt_skims: Origin to Parking Zone by Time of Day`, `pdt_skims: Parking Zone to Destination by Time of Day`, +`od_skims: Origin to Destination`, `do_skims: Destination to Origin`, `op_skims: Origin to Parking Zone`, +`pd_skims: Parking Zone to Destination` + +#### Required YAML attributes: + +- `SPECIFICATION`: + This file defines the logit specification for each chooser segment. +- `COEFFICIENTS`: + Specification coefficients +- `PREPROCESSOR`: + Preprocessor definitions to run on the chooser dataframe (trips) before the model is run +- `CHOOSER_FILTER_COLUMN_NAME`: + Boolean field on the chooser table defining which choosers are eligible to parking location choice model. If no + filter is specified, all choosers (trips) are eligible for the model. +- `CHOOSER_SEGMENT_COLUMN_NAME`: + Column on the chooser table defining the parking segment for the logit model +- `SEGMENTS`: + List of eligible chooser segments in the logit specification +- `ALTERNATIVE_FILTER_COLUMN_NAME`: + Boolean field used to filter land use zones as eligible parking location choices. If no filter is specified, + then all land use zones are considered as viable choices. +- `ALT_DEST_COL_NAME`: + The column name to append with the parking location choice results. For choosers (trips) ineligible for this + model, a -1 value will be placed in column. +- `TRIP_ORIGIN`: + Origin field on the chooser trip table +- `TRIP_DESTINATION`: + Destination field on the chooser trip table + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: ParkingLocationSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/parking_location_choice.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: parking_location +.. autofunction:: wrap_skims +.. autofunction:: parking_destination_simulate +``` diff --git a/docs/dev-guide/components/school_escorting.md b/docs/dev-guide/components/school_escorting.md new file mode 100644 index 000000000..f1f162603 --- /dev/null +++ b/docs/dev-guide/components/school_escorting.md @@ -0,0 +1,167 @@ +(component-school-escorting)= +# School Escorting + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.school_escorting +``` +The school escort model determines whether children are dropped-off at or picked-up from school, +simultaneously with the chaperone responsible for chauffeuring the children, +which children are bundled together on half-tours, and the type of tour (pure escort versus rideshare). +The model is run after work and school locations have been chosen for all household members, +and after work and school tours have been generated and scheduled. +The model labels household members of driving age as potential ‘chauffeurs’ and children with school tours as potential ‘escortees’. +The model then attempts to match potential chauffeurs with potential escortees in a choice model whose alternatives +consist of ‘bundles’ of escortees with a chauffeur for each half tour. + +School escorting is a household level decision – each household will choose an alternative from the ``school_escorting_alts.csv`` file, +with the first alternative being no escorting. This file contains the following columns: + +```{eval-rst} ++------------------------------------------------+--------------------------------------------------------------------+ +| Column Name | Column Description | ++================================================+====================================================================+ +| Alt | Alternative number | ++------------------------------------------------+--------------------------------------------------------------------+ +| bundle[1,2,3] | bundle number for child 1,2, and 3 | ++------------------------------------------------+--------------------------------------------------------------------+ +| chauf[1,2,3] | chauffeur number for child 1,2, and 3 | +| | - 0 = child not escorted | +| | - 1 = chauffeur 1 as ride share | +| | - 2 = chauffeur 1 as pure escort | +| | - 3 = chauffeur 2 as ride share | +| | - 4 = chauffeur 3 as pure escort | ++------------------------------------------------+--------------------------------------------------------------------+ +| nbund[1,2] | - number of escorting bundles for chauffeur 1 and 2 | ++------------------------------------------------+--------------------------------------------------------------------+ +| nbundles | - total number of bundles | +| | - equals nbund1 + nbund2 | ++------------------------------------------------+--------------------------------------------------------------------+ +| nrs1 | - number of ride share bundles for chauffeur 1 | ++------------------------------------------------+--------------------------------------------------------------------+ +| npe1 | - number of pure escort bundles for chauffeur 1 | ++------------------------------------------------+--------------------------------------------------------------------+ +| nrs2 | - number of ride share bundles for chauffeur 2 | ++------------------------------------------------+--------------------------------------------------------------------+ +| npe2 | - number of pure escort bundles for chauffeur 2 | ++------------------------------------------------+--------------------------------------------------------------------+ +| Description | - text description of alternative | ++------------------------------------------------+--------------------------------------------------------------------+ +``` + +The model as currently implemented contains three escortees and two chauffeurs. +Escortees are students under age 16 with a mandatory tour whereas chaperones are all persons in the household over the age of 18. +For households that have more than three possible escortees, the three youngest children are selected for the model. +The two chaperones are selected as the adults of the household with the highest weight according to the following calculation: +`Weight = 100*personType + 10*gender + 1*age(0,1)` +Where `personType` is the person type number from 1 to 5, `gender` is 1 for male and 2 for female, and +`age` is a binary indicator equal to 1 if age is over 25 else 0. + +The model is run sequentially three times, once in the outbound direction, once in the inbound direction, +and again in the outbound direction with additional conditions on what happened in the inbound direction. +There are therefore three sets of utility specifications, coefficients, and pre-processor files. +Each of these files is specified in the school_escorting.yaml file along with the number of escortees and number of chaperones. + +There is also a constants section in the school_escorting.yaml file which contain two constants. +One which sets the maximum time bin difference to match school and work tours for ride sharing +and another to set the number of minutes per time bin. +In the [prototype_mtc_extended](prototype_mtc_extended) example, these are set to 1 and 60 respectively. + +After a school escorting alternative is chosen for the inbound and outbound direction, the model will +create the tours and trips associated with the decision. Pure escort tours are created, +and the mandatory tour start and end times are changed to match the school escort bundle start and end times. +(Outbound tours have their start times matched and inbound tours have their end times matched.) +Escortee drop-off / pick-up order is determined by the distance from home to the school locations. +They are ordered from smallest to largest in the outbound direction, and largest to smallest in the inbound direction. +Trips are created for each half-tour that includes school escorting according to the provided order. + +The created pure escort tours are joined to the already created mandatory tour table in the pipeline +and are also saved separately to the pipeline under the table name “school_escort_tours”. +Created school escorting trips are saved to the pipeline under the table name “school_escort_trips”. +By saving these to the pipeline, their data can be queried in downstream models to set correct purposes, +destinations, and schedules to satisfy the school escorting model choice. + +There are a host of downstream model changes that are involved when including the school escorting model. +The following list contains the models that are changed in some way when school escorting is included: + +```{eval-rst} ++--------------------------------------------------------------------+------------------------------------------------------------------+ +| File Name(s) | Change(s) Needed | ++====================================================================+==================================================================+ +| - `non_mandatory_tour_scheduling_annotate_tours_preprocessor.csv` | | +| - `tour_scheduling_nonmandatory.csv` | - Set availability conditions based on those times | +| | - Do not schedule over other school escort tours | ++--------------------------------------------------------------------+------------------------------------------------------------------+ +| - `tour_mode_choice_annotate_choosers_preprocessor.csv` | - count number of escortees on tour by parsing the | +| - `tour_mode_choice.csv` | ``escort_participants`` column | +| | - set mode choice availability based on number of escortees | +| | | ++--------------------------------------------------------------------+------------------------------------------------------------------+ +| - `stop_frequency_school.csv` | Do not allow stops for half-tours that include school escorting | +| - `stop_frequency_work.csv` | | +| - `stop_frequency_univ.csv` | | +| - `stop_frequency_escort.csv` | | ++--------------------------------------------------------------------+------------------------------------------------------------------+ +| - `trip_mode_choice_annotate_trips_preprocessor.csv` | - count number of escortees on trip by parsing the | +| - `trip_mode_choice.csv` | ``escort_participants`` column | +| | - set mode choice availability based on number of escortees | +| | | ++--------------------------------------------------------------------+------------------------------------------------------------------+ +``` + +- *Joint tour scheduling:* Joint tours are not allowed to be scheduled over school escort tours. + This happens automatically by updating the timetable object with the updated mandatory tour times + and created pure escort tour times after the school escorting model is run. + There were no code or config changes in this model, but it is still affected by school escorting. +- *Non-Mandatory tour frequency:* Pure school escort tours are joined to the tours created in the + non-mandatory tour frequency model and tour statistics (such as tour_count and tour_num) are re-calculated. +- *Non-Mandatory tour destination:* Since the primary destination of pure school escort tours is known, + they are removed from the choosers table and have their destination set according to the destination in\ + school_escort_tours table. They are also excluded from the estimation data bundle. +- *Non-Mandatory tour scheduling:* Pure escort tours need to have the non-escorting portion of their tour scheduled. + This is done by inserting availability conditions in the model specification that ensures the alternative + chosen for the start of the tour is equal to the alternative start time for outbound tours and the end time + is equal to the alternative end time for the inbound tours. There are additional terms that ensure the tour + does not overlap with subsequent school escorting tours as well. Beware -- If the availability conditions + in the school escorting model are not set correctly, the tours created may not be consistent with each other + and this model will fail. +- *Tour mode choice:* Availability conditions are set in tour mode choice to prohibit the drive alone mode + if the tour contains an escortee and the shared-ride 2 mode if the tour contains more than one escortee. +- *Stop Frequency:* No stops are allowed on half-tours that include school escorting. + This is enforced by adding availability conditions in the stop frequency model. After the stop frequency + model is run, the school escorting trips are merged from the trips created by the stop frequency model + and a new stop frequency is computed along with updated trip numbers. +- *Trip purpose, destination, and scheduling:* Trip purpose, destination, and departure times are known + for school escorting trips. As such they are removed from their respective chooser tables and the estimation + data bundles, and set according to the values in the school_escort_trips table residing in the pipeline. +- *Trip mode choice:* Like in tour mode choice, availability conditions are set to prohibit trip containing + an escortee to use the drive alone mode or the shared-ride 2 mode for trips with more than one escortee. + +Many of the changes discussed in the above list are handled in the code and the user is not required to make any +changes when implementing the school escorting model. However, it is the users responsibility to include the +changes in the following model configuration files for models downstream of the school escorting model: + + +When not including the school escorting model, all of the escort trips to and from school are counted implicitly in +escort tours determined in the non-mandatory tour frequency model. Thus, when including the school escort model and +accounting for these tours explicitly, extra care should be taken not to double count them in the non-mandatory +tour frequency model. The non-mandatory tour frequency model should be re-evaluated and likely changed to decrease +the number of escort tours generated by that model. This was not implemented in the [prototype_mtc_extended](prototype_mtc_extended) +implementation due to a lack of data surrounding the number of escort tours in the region. + +## Configuration + +```{eval-rst} +.. autopydantic_model:: SchoolEscortSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC Extended](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: school_escorting +``` diff --git a/docs/dev-guide/components/school_location_choice.md b/docs/dev-guide/components/school_location_choice.md new file mode 100644 index 000000000..6238d8e42 --- /dev/null +++ b/docs/dev-guide/components/school_location_choice.md @@ -0,0 +1,53 @@ +(component-location_choice)= +# School Location + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.location_choice.school_location +``` + +The usual school location choice models assign a usual school location for the primary +mandatory activity of each child and university student in the +synthetic population. The models are composed of a set of accessibility-based parameters +(including one-way distance between home and primary destination and the tour mode choice +logsum - the expected maximum utility in the mode choice model which is given by the +logarithm of the sum of exponentials in the denominator of the logit formula) and size terms, +which describe the quantity of grade-school or university opportunities in each possible +destination. + +The school location model is made up of four steps: + * sampling - selects a sample of alternative school locations for the next model step. This selects X locations from the full set of model zones using a simple utility. + * logsums - starts with the table created above and calculates and adds the mode choice logsum expression for each alternative school location. + * simulate - starts with the table created above and chooses a final school location, this time with the mode choice logsum included. + * shadow prices - compare modeled zonal destinations to target zonal size terms and calculate updated shadow prices. + +These steps are repeated until shadow pricing convergence criteria are satisfied or a max number of iterations is reached. See [shadow_pricing](shadow_pricing). + +School location choice for [multiple_zone_systems](multiple_zone_systems) models uses [presampling](presampling) by default. + +The main interfaces to the model is the [school_location](activitysim.abm.models.location_choice.school_location) function. +This function is registered as an Inject step in the example Pipeline. [writing_logsums](writing_logsums) for how to write logsums for estimation. + +## Structure + +- *Configuration File*: `school_location.yaml` +- *Core Table*: `persons` +- *Result Field*: `school_taz` +- *School Location - Skims Keys*: `TAZ, alt_dest, AM time period, MD time period` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TourLocationComponentSettings +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/school_location.yaml) +- [Prototype MWCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mwcog/configs/school_location.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: school_location +``` diff --git a/docs/dev-guide/components/shadow_pricing.md b/docs/dev-guide/components/shadow_pricing.md new file mode 100644 index 000000000..c618858fe --- /dev/null +++ b/docs/dev-guide/components/shadow_pricing.md @@ -0,0 +1,83 @@ +(component-shadow-pricing)= +# Shadow Pricing + +```{eval-rst} +.. currentmodule:: activitysim.abm.tables.shadow_pricing +``` + +The shadow pricing calculator used by work and school location choice. + +## Structure +- *Configuration File*: `shadow_pricing.yaml` + +### Turning on and saving shadow prices + +Shadow pricing is activated by setting the `use_shadow_pricing` to True in the settings.yaml file. +Once this setting has been activated, ActivitySim will search for shadow pricing configuration in +the shadow_pricing.yaml file. When shadow pricing is activated, the shadow pricing outputs will be +exported by the tracing engine. As a result, the shadow pricing output files will be prepended with +`trace` followed by the iteration number the results represent. For example, the shadow pricing +outputs for iteration 3 of the school location model will be called +`trace.shadow_price_school_shadow_prices_3.csv`. + +In total, ActivitySim generates three types of output files for each model with shadow pricing: + +- `trace.shadow_price__desired_size.csv` The size terms by zone that the ctramp and daysim + methods are attempting to target. These equal the size term columns in the land use data + multiplied by size term coefficients. + +- `trace.shadow_price__modeled_size_.csv` These are the modeled size terms after + the iteration of shadow pricing identified by the number. In other words, these are + the predicted choices by zone and segment for the model after the iteration completes. (Not + applicable for ``simulation`` option.) + +- `trace.shadow_price__shadow_prices_.csv` The actual shadow price for each zone + and segment after the of shadow pricing. This is the file that can be used to warm + start the shadow pricing mechanism in ActivitySim. (Not applicable for `simulation` option.) + +There are three shadow pricing methods in activitysim: `ctramp`, `daysim`, and `simulation`. +The first two methods try to match model output with workplace/school location model size terms, +while the last method matches model output with actual employment/enrollmment data. + +The simulation approach operates the following steps. First, every worker / student will be +assigned without shadow prices applied. The modeled share and the target share for each zone are +compared. If the zone is overassigned, a sample of people from the over-assigned zones will be +selected for re-simulation. Shadow prices are set to -999 for the next iteration for overassigned +zones which removes the zone from the set of alternatives in the next iteration. The sampled people +will then be forced to choose from one of the under-assigned zones that still have the initial +shadow price of 0. (In this approach, the shadow price variable is really just a switch turning that +zone on or off for selection in the subsequent iterations. For this reason, warm-start functionality +for this approach is not applicable.) This process repeats until the overall convergence criteria +is met or the maximum number of allowed iterations is reached. + +Because the simulation approach only re-simulates workers / students who were over-assigned in the +previous iteration, run time is significantly less (~90%) than the CTRAMP or DaySim approaches which +re-simulate all workers and students at each iteration. + +## Configuration + +```{eval-rst} +.. autopydantic_model:: ShadowPriceSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/free_parking.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/free_parking.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: ShadowPriceCalculator +.. autofunction:: buffers_for_shadow_pricing +.. autofunction:: buffers_for_shadow_pricing_choice +.. autofunction:: shadow_price_data_from_buffers_choice +.. autofunction:: shadow_price_data_from_buffers +.. autofunction:: load_shadow_price_calculator +.. autofunction:: add_size_tables +.. autofunction:: get_shadow_pricing_info +.. autofunction:: get_shadow_pricing_choice_info + +``` diff --git a/docs/dev-guide/components/stop_frequency.md b/docs/dev-guide/components/stop_frequency.md new file mode 100644 index 000000000..84e3ba25e --- /dev/null +++ b/docs/dev-guide/components/stop_frequency.md @@ -0,0 +1,53 @@ +(component-stop-frequency)= +# Stop Frequency + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.stop_frequency +``` + +The stop frequency model assigns to each tour the number of intermediate destinations a person +will travel to on each leg of the tour from the origin to tour primary destination and back. +The model incorporates the ability for more than one stop in each direction, +up to a maximum of 3, for a total of 8 trips per tour (four on each tour leg). + +Intermediate stops are not modeled for drive-transit tours because doing so can have unintended +consequences because of the difficulty of tracking the location of the vehicle. For example, +consider someone who used a park and ride for work and then took transit to an intermediate +shopping stop on the way home. Without knowing the vehicle location, it cannot be determined +if it is reasonable to allow the person to drive home. Even if the tour were constrained to allow +driving only on the first and final trip, the trip home from an intermediate stop may not use the +same park and ride where the car was dropped off on the outbound leg, which is usually as close +as possible to home because of the impracticality of coding drive access links from every park +and ride lot to every zone. + +This model also creates a trips table in the pipeline for later models. + +The main interface to the intermediate stop frequency model is the +[stop_frequency](activitysim.abm.models.stop_frequency.stop_frequency) +function. This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `stop_frequency.yaml` +- *Core Table*: `tours` +- *Result Field*: `stop_frequency` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: StopFrequencySettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/stop_frequency.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/stop_frequency.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: stop_frequency +``` diff --git a/docs/dev-guide/components/telecommute_frequency.md b/docs/dev-guide/components/telecommute_frequency.md new file mode 100644 index 000000000..971188533 --- /dev/null +++ b/docs/dev-guide/components/telecommute_frequency.md @@ -0,0 +1,44 @@ +(component-telecommute-frequency)= +# Telecommute Frequency + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.telecommute_frequency +``` + +Telecommuting is defined as workers who work from home instead of going to work. It only applies to +workers with a regular workplace outside of home. The telecommute model consists of two +submodels - a person [work_from_home](work_from_home) model and this person telecommute frequency model. + +For all workers that work out of the home, the telecommute models predicts the +level of telecommuting. The model alternatives are the frequency of telecommuting in +days per week (0 days, 1 day, 2 to 3 days, 4+ days). + +The main interface to the work from home model is the +[telecommute_frequency](activitysim.abm.models.telecommute_frequency) function. This +function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `telecommute_frequency.yaml` +- *Core Table*: `persons` +- *Result Field*: `telecommute_frequency` + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TelecommuteFrequencySettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MWCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mwcog/configs/telecommute_frequency.yaml) +- [Prototype SEMCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_semcog/configs/telecommute_frequency.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: telecommute_frequency +``` diff --git a/docs/dev-guide/components/tour_mode_choice.md b/docs/dev-guide/components/tour_mode_choice.md new file mode 100644 index 000000000..43c95c38c --- /dev/null +++ b/docs/dev-guide/components/tour_mode_choice.md @@ -0,0 +1,68 @@ +(component-tour-mode-choice)= +# Tour Mode Choice + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.tour_mode_choice +``` + +The mandatory, non-mandatory, and joint tour mode choice model assigns to each tour the "primary" mode that +is used to get from the origin to the primary destination. The tour-based modeling approach requires a reconsideration +of the conventional mode choice structure. Instead of a single mode choice model used in a four-step +structure, there are two different levels where the mode choice decision is modeled: (a) the +tour mode level (upper-level choice); and, (b) the trip mode level (lower-level choice conditional +upon the upper-level choice). + +The mandatory, non-mandatory, and joint tour mode level represents the decisions that apply to the entire tour, and +that will affect the alternatives available for each individual trip or joint trip. These decisions include the choice to use a private +car versus using public transit, walking, or biking; whether carpooling will be considered; and +whether transit will be accessed by car or by foot. Trip-level decisions correspond to details of +the exact mode used for each trip, which may or may not change over the trips in the tour. + +The mandatory, non-mandatory, and joint tour mode choice structure is a nested logit model which separates +similar modes into different nests to more accurately model the cross-elasticities between the alternatives. The +eighteen modes are incorporated into the nesting structure specified in the model settings file. The +first level of nesting represents the use a private car, non-motorized +means, or transit. In the second level of nesting, the auto nest is divided into vehicle occupancy +categories, and transit is divided into walk access and drive access nests. The final level splits +the auto nests into free or pay alternatives and the transit nests into the specific line-haul modes. + +The primary variables are in-vehicle time, other travel times, cost (the influence of which is derived +from the automobile in-vehicle time coefficient and the persons' modeled value of time), +characteristics of the destination zone, demographics, and the household's level of auto +ownership. + +The main interface to the mandatory, non-mandatory, and joint tour mode model is the +[tour_mode_choice_simulate](activitysim.abm.models.tour_mode_choice.tour_mode_choice_simulate) function. This function is +called in the Inject step [tour_mode_choice_simulate](tour_mode_choice_simulate) and is registered as an Inject step in the example Pipeline. +See [writing_logsums](writing_logsums) for how to write logsums for estimation. + +## Structure + +- *Configuration File*: `tour_mode_choice.yaml` +- *Core Table*: `tours` +- *Result Field*: `mode` +- *Skims Keys*: `TAZ, destination, start, end` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TourModeComponentSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/tour_mode_choice.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/tour_mode_choice.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: tour_mode_choice_simulate +.. autofunction:: create_logsum_trips +.. autofunction:: append_tour_leg_trip_mode_choice_logsums +.. autofunction:: get_trip_mc_logsums_for_all_modes +.. autofunction:: get_trip_mc_logsums_for_all_modes +``` diff --git a/docs/dev-guide/components/transit_pass_ownership.md b/docs/dev-guide/components/transit_pass_ownership.md new file mode 100644 index 000000000..845979878 --- /dev/null +++ b/docs/dev-guide/components/transit_pass_ownership.md @@ -0,0 +1,43 @@ +(component-transit-pass-ownership)= +# Transit Pass Ownership +```{eval-rst} +.. currentmodule:: activitysim.abm.models.transit_pass_ownership +``` + +The transit fare discount is defined as persons who purchase or are +provided a transit pass. The transit fare discount consists of two submodels - this +transit pass ownership model and a person [transit_pass_subsidy](transit_pass_subsidy) model. The +result of this model can be used to condition downstream models such as the tour and trip +mode choice models via fare discount adjustments. + +The main interface to the transit pass ownership model is the +[transit_pass_ownership](activitysim.abm.models.transit_pass_ownership) function. This +function is registered as an Inject step in the example Pipeline. + +This model generates only True or False outcomes, and is structured as a binary +logit model. + +## Structure + +- *Configuration File*: `transit_pass_ownership.yaml` +- *Core Table*: `persons` +- *Result Field*: `transit_pass_ownership` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TransitPassOwnershipSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype SEMCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_semcog/configs/transit_pass_ownership.yaml) +- [Prototype MWCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mwcog/configs/transit_pass_ownership.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: transit_pass_ownership +``` diff --git a/docs/dev-guide/components/transit_pass_subsidy.md b/docs/dev-guide/components/transit_pass_subsidy.md new file mode 100644 index 000000000..e8e2bf772 --- /dev/null +++ b/docs/dev-guide/components/transit_pass_subsidy.md @@ -0,0 +1,42 @@ +(component-transit-pass-subsidy)= +# Transit Pass Subsidy + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.transit_pass_subsidy +``` + +The transit pass subsidy model is a component of the transit fare discount model, which models persons who purchase or are +provided a transit pass. The transit fare discount consists of two submodels - this +transit pass subsidy model and a person [transit_pass_ownership](transit_pass_ownership) model. The +result of this model can be used to condition downstream models such as the +person [transit_pass_ownership](transit_pass_ownership) model and the tour and trip mode choice models +via fare discount adjustments. + +The main interface to the transit pass subsidy model is the +[transit_pass_subsidy](activitysim.abm.models.transit_pass_subsidy) function. This +function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `transit_pass_subsidy.yaml` +- *Core Table*: `persons` +- *Result Field*: `transit_pass_subsidy` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TransitPassSubsidySettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MWCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mwcog/configs/transit_pass_subsidy.yaml) +- [Prototype SEMCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_semcog/configs/transit_pass_subsidy.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: transit_pass_subsidy +``` diff --git a/docs/dev-guide/components/trip_departure_choice.md b/docs/dev-guide/components/trip_departure_choice.md new file mode 100644 index 000000000..268f3b3fa --- /dev/null +++ b/docs/dev-guide/components/trip_departure_choice.md @@ -0,0 +1,33 @@ +(component-trip-departure-choice)= +# Trip Departure Choice + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.trip_departure_choice +``` + +Used in conjuction with Trip Scheduling Choice (Logit Choice), this model chooses departure +time periods consistent with the time windows for the appropriate leg of the trip. + +## Structure + +- *Configuration File*: `trip_departure_choice.yaml` +- *Core Table*: `trips` +- *Result Field*: `depart` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TripDepartureChoiceSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/trip_departure_choice.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: trip_departure_choice +``` diff --git a/docs/dev-guide/components/trip_mode_choice.md b/docs/dev-guide/components/trip_mode_choice.md new file mode 100644 index 000000000..331f09f37 --- /dev/null +++ b/docs/dev-guide/components/trip_mode_choice.md @@ -0,0 +1,49 @@ +(component-trip-mode-choice)= +# Trip Mode Choice + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.trip_mode_choice +``` + +The trip mode choice model assigns a travel mode for each trip on a given tour. It +operates similarly to the tour mode choice model, but only certain trip modes are available for +each tour mode. The correspondence rules are defined according to the following principles: + + * Pay trip modes are only available for pay tour modes (for example, drive-alone pay is only available at the trip mode level if drive-alone pay is selected as a tour mode). + * The auto occupancy of the tour mode is determined by the maximum occupancy across all auto trips that make up the tour. Therefore, the auto occupancy for the tour mode is the maximum auto occupancy for any trip on the tour. + * Transit tours can include auto shared-ride trips for particular legs. Therefore, 'casual carpool', wherein travelers share a ride to work and take transit back to the tour origin, is explicitly allowed in the tour/trip mode choice model structure. + * The walk mode is allowed for any trip. + * The availability of transit line-haul submodes on transit tours depends on the skimming and tour mode choice hierarchy. Free shared-ride modes are also available in walk-transit tours, albeit with a low probability. Paid shared-ride modes are not allowed on transit tours because no stated preference data is available on the sensitivity of transit riders to automobile value tolls, and no observed data is available to verify the number of people shifting into paid shared-ride trips on transit tours. + +The trip mode choice models explanatory variables include household and person variables, level-of-service +between the trip origin and destination according to the time period for the tour leg, urban form +variables, and alternative-specific constants segmented by tour mode. + +The main interface to the trip mode choice model is the +[trip_mode_choice](activitysim.abm.models.trip_mode_choice.trip_mode_choice) function. This function +is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `trip_mode_choice.yaml` +- *Result Field*: `trip_mode` +- *Skim Keys*: `origin, destination, trip_period` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TripModeChoiceSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/trip_mode_choice.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/trip_mode_choice.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: trip_mode_choice +``` diff --git a/docs/dev-guide/components/trip_purpose.md b/docs/dev-guide/components/trip_purpose.md new file mode 100644 index 000000000..671db4091 --- /dev/null +++ b/docs/dev-guide/components/trip_purpose.md @@ -0,0 +1,46 @@ +(component-trip-purpose)= +# Trip Purpose + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.trip_purpose +``` + +For trip other than the last trip outbound or inbound, assign a purpose based on an +observed frequency distribution. The distribution is segmented by tour purpose, tour +direction and person type. Work tours are also segmented by departure or arrival time period. + +The main interface to the trip purpose model is the +[trip_purpose](activitysim.abm.models.trip_purpose.trip_purpose) +function. This function is registered as an Inject step in the example Pipeline. + + +## Structure + + +- *Core Table*: `trips` +- *Result Field*: `purpose` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Note +Trip purpose and trip destination choice can be run iteratively together [trip_purpose_and_destination_model](activitysim.abm.models.trip_purpose_and_destination.py) + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/trip_purpose.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/trip_purpose.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: trip_purpose +.. autofunction:: choose_intermediate_trip_purpose +.. autofunction:: run_trip_purpose +``` diff --git a/docs/dev-guide/components/trip_purpose_and_destination.md b/docs/dev-guide/components/trip_purpose_and_destination.md new file mode 100644 index 000000000..8d074fa08 --- /dev/null +++ b/docs/dev-guide/components/trip_purpose_and_destination.md @@ -0,0 +1,40 @@ +(component-trip-purpose-and-destination)= +# Trip Purpose and Destination + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.trip_purpose_and_destination +``` + +After running trip purpose and trip destination separately, the two model can be ran together in an iterative fashion on +the remaining failed trips (i.e. trips that cannot be assigned a destination). Each iteration uses new random numbers. + +The main interface to the trip purpose model is the +[trip_purpose_and_destination](activitysim.abm.models.trip_purpose_and_destination.trip_purpose_and_destination) +function. This function is registered as an Inject step in the example Pipeline. + + +## Structure + +- *Core Table*: `trips` +- *Result Field*: `purpose, destination` +- *Skims Keys*: `origin, (tour primary) destination, dest_taz, trip_period` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TripPurposeAndDestinationSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/trip_purpose_and_destination.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/trip_purpose_and_destination.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: trip_purpose_and_destination +``` diff --git a/docs/dev-guide/components/trip_scheduling_choice.md b/docs/dev-guide/components/trip_scheduling_choice.md new file mode 100644 index 000000000..186078e8f --- /dev/null +++ b/docs/dev-guide/components/trip_scheduling_choice.md @@ -0,0 +1,43 @@ +(component-trip-scheduling-choice)= +# Trip Scheduling Choice + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.trip_scheduling_choice +``` + +This model uses a logit-based formulation to determine potential trip windows for the three +main components of a tour. + +- Outbound Leg: The time from leaving the origin location to the time second to last outbound stop. +- Main Leg: The time window from the last outbound stop through the main tour destination to the first inbound stop. +- Inbound Leg: The time window from the first inbound stop to the tour origin location. + +## Structure + +- *Configuration File*: `trip_scheduling_choice.yaml` +- *Core Table*: `tours` +- *Result Field*: `outbound_duration`, `main_leg_duration`, `inbound_duration` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TripSchedulingChoiceSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/trip_scheduling_choice.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: trip_scheduling_choice +.. autofunction:: generate_schedule_alternatives +.. autofunction:: no_stops_patterns +.. autofunction:: stop_one_way_only_patterns +.. autofunction:: stop_two_way_only_patterns +.. autofunction:: get_pattern_index_and_arrays +.. autofunction:: get_spec_for_segment +``` diff --git a/docs/dev-guide/components/trip_scheduling_probabilistic.md b/docs/dev-guide/components/trip_scheduling_probabilistic.md new file mode 100644 index 000000000..a92fea627 --- /dev/null +++ b/docs/dev-guide/components/trip_scheduling_probabilistic.md @@ -0,0 +1,93 @@ +(component-trip-scheduling-probabilistic)= +# Trip Scheduling (Probabilistic) + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.trip_scheduling +``` + +For each trip, assign a departure hour based on an input lookup table of percents by tour purpose, +direction (inbound/outbound), tour hour, and trip index. + + * The tour hour is the tour start hour for outbound trips and the tour end hour for inbound trips. The trip index is the trip sequence on the tour, with up to four trips per half tour + * For outbound trips, the trip depart hour must be greater than or equal to the previously selected trip depart hour + * For inbound trips, trips are handled in reverse order from the next-to-last trip in the leg back to the first. The tour end hour serves as the anchor time point from which to start assigning trip time periods. + * Outbound trips on at-work subtours are assigned the tour depart hour and inbound trips on at-work subtours are assigned the tour end hour. + +The assignment of trip depart time is run iteratively up to a max number of iterations since it is possible that +the time period selected for an earlier trip in a half-tour makes selection of a later trip time +period impossible (or very low probability). Thus, the sampling is re-run until a feasible set of trip time +periods is found. If a trip can't be scheduled after the max iterations, then the trip is assigned +the previous trip's choice (i.e. assumed to happen right after the previous trip) or dropped, as configured by the user. +The trip scheduling model does not use mode choice logsums. + +Alternatives: Available time periods in the tour window (i.e. tour start and end period). When processing stops on +work tours, the available time periods is constrained by the at-work subtour start and end period as well. + +In order to avoid trip failing, a new probabilistic trip scheduling mode was developed named "relative". +When setting the _scheduling_mode_ option to relative, trips are scheduled relative to the previously scheduled trips. +The first trip still departs when the tour starts and for every subsequent trip, the choices are selected with respect to +the previous trip depart time. Inbound trips are no longer handled in reverse order. The key to this relative mode is to +index the probabilities based on how much time is remaining on the tour. For tours that include subtours, the time remaining will +be based on the subtour start time for outbound trips and will resume again for inbound trips after the subtour ends. +By indexing the probabilities based on time remaining and scheduling relative to the previous trip, scheduling trips in relative +mode will not fail. Note also that relative scheduling mode requires the use of logic +version 2 (see warning about logic versions, below). + +An example of trip scheduling in relative mode is included in the [prototype_mwcog](prototype_mwcog) example. In this example, trip +scheduling probabilities are indexed by the following columns: + + * periods_left_min: the minimum bin for the number of time periods left on the tour. + * periods_left_max: the maximum bin for the number of time periods left on the tour. This is the same as periods_left_min until the final time period bin. + * outbound: whether the trip occurs on the outbound leg of a tour. + * tour_purpose_grouped: Tour purpose grouped into mandatory and non-mandatory categories + * half_tour_stops_remaining_grouped: The number of stops remaining on the half tour with the categories of 0 and 1+ + +Each of these variables are listed as merge columns in the trip_scheduling.yaml file and are declared in the trip scheduling preprocessor. +The variables above attempt to balance the statistics available for probability creation with the amount of segmentation of trip characteristics. + +.. warning:: + + Earlier versions of ActivitySim contained a logic error in this model, whereby + the earliest departure time for inbound legs was bounded by the maximum outbound + departure time, even if there was a scheduling failure for one or more outbound + leg departures and that bound was NA. For continuity, this process has been + retained in this ActivitySim component as *logic_version* 1, and it remains the + default process if the user does not explicitly specify a logic version in the + model settings yaml file. The revised logic includes bounding inbound legs only + when the maximum outbound departure time is well defined. This version of the + model can be used by explicitly setting `logic_version: 2` (or greater) in the + model settings yaml file. It is strongly recommended that all new model + development efforts use logic version 2; a future version of ActivitySim may + make this the default for this component, and/or remove logic version 1 entirely. + +The main interface to the trip scheduling model is the +[trip_scheduling](activitysim.abm.models.trip_scheduling.trip_scheduling) function. +This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `trip_scheduling.yaml` +- *Core Table*: `trips` +- *Result Field*: `depart` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TripSchedulingSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/camsys/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/trip_scheduling.yaml) +- [Prototype SEMCOG](https://github.com/camsys/activitysim/blob/main/activitysim/examples/prototype_semcog/configs/trip_scheduling.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: trip_scheduling +.. autofunction:: set_stop_num +.. autofunction:: update_tour_earliest +.. autofunction:: schedule_trips_in_leg +``` diff --git a/docs/dev-guide/components/vehicle_allocation.md b/docs/dev-guide/components/vehicle_allocation.md new file mode 100644 index 000000000..68d6de034 --- /dev/null +++ b/docs/dev-guide/components/vehicle_allocation.md @@ -0,0 +1,49 @@ +(component-vehicle_allocation)= +# Vehicle Allocation + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.vehicle_allocation +``` + +The vehicle allocation model selects which vehicle would be used for a tour of given occupancy. The alternatives for the vehicle +allocation model consist of the vehicles owned by the household and an additional non household vehicle option. (Zero-auto +households would be assigned the non-household vehicle option since there are no owned vehicles in the household). +A vehicle is selected for each occupancy level set by the user such that different tour modes that have different occupancies could see different operating +characteristics. The output of the vehicle allocation model is appended to the tour table with column names [vehicle_occup_{occupancy}](vehicle_occup_{occupancy}) and the values are +the vehicle type selected. + +In [prototype_mtc_extended](prototype_mtc_extended), three occupancy levels are used: 1, 2, and 3.5. The auto operating cost +for occupancy level 1 is used in the drive alone mode and drive to transit modes. Occupancy levels 2 and 3.5 are used for shared +ride 2 and shared ride 3+ auto operating costs, respectively. Auto operating costs are selected in the mode choice pre-processors by selecting the allocated +vehicle type data from the vehicles table. If the allocated vehicle type was the non-household vehicle, the auto operating costs uses +the previous default value from [prototype_mtc](prototype_mtc). All trips and atwork subtours use the auto operating cost of the parent tour. Functionality +was added in tour and atwork subtour mode choice to annotate the tour table and create a ``selected_vehicle`` which denotes the actual vehicle used. +If the tour mode does not include a vehicle, then the ``selected_vehicle`` entry is left blank. + +The current implementation does not account for possible use of the household vehicles by other household members. Thus, it is possible for a +selected vehicle to be used in two separate tours at the same time. + +## Structure + +- *Configuration File*: `vehicle_allocation.yaml` +- *Result Field*: `vehicle_occup_{occupancy}` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: VehicleAllocationSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC Extended](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc_extended/configs/vehicle_type_choice.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: vehicle_allocation +.. autofunction:: annotate_vehicle_allocation +.. autofunction:: get_skim_dict +``` diff --git a/docs/dev-guide/components/vehicle_type_choice.md b/docs/dev-guide/components/vehicle_type_choice.md new file mode 100644 index 000000000..bdf8d65c7 --- /dev/null +++ b/docs/dev-guide/components/vehicle_type_choice.md @@ -0,0 +1,70 @@ +(component-vehicle-type-choice)= +# Vehicle Type Choice + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.vehicle_type_choice +``` + +The vehicle type choice model selects a vehicle type for each household vehicle. A vehicle type +is a combination of the vehicle's body type, age, and fuel type. For example, a 13 year old +gas powered van would have a vehicle type of *van_13_gas*. + +There are two vehicle type choice model structures implemented: + +1. Simultaneous choice of body type, age, and fuel type. +2. Simultaneous choice of body type and age, with fuel type assigned from a probability distribution. + +## Structure + +- *Configuration File*: `vehicle_type_choice.yaml` + +Input vehicle type data included in [prototype_mtc_extended](prototype_mtc_extended) came from a variety of sources. The number of vehicle makes, models, MPG, and +electric vehicle range was sourced from the Enivornmental Protection Agency (EPA). Additional data on vehicle costs were derived from the +National Household Travel Survey. Auto operating costs in the vehicle type data file were a sum of fuel costs and maintenance costs. +Fuel costs were calculated from MPG assuming a $3.00 cost for a gallon of gas. When MPG was not available to calculate fuel costs, +the closest year, vehicle type, or body type available was used. Maintenance costs were taken from AAA's +[2017 driving cost study](https://exchange.aaa.com/wp-content/uploads/2017/08/17-0013_Your-Driving-Costs-Brochure-2017-FNL-CX-1.pdf). +Size categories within body types were averaged, e.g. car was an average of AAA's small, medium, and large sedan categories. +Motorcycles were assigned the small sedan maintenance costs since they were not included in AAA's report. +Maintenance costs were not varied by vehicle year. (According to +`data from the U.S. [Bureau of Labor Statistics](https://www.bls.gov/opub/btn/volume-3/pdf/americans-aging-autos.pdf), +there was no consistent relationship between vehicle age and maintenance costs.) + +Using the above methodology, the average auto operating costs of vehicles output from :ref:`prototype_mtc_extended` was 18.4 cents. +This value is very close to the auto operating cost of 18.3 cents used in [prototype_mtc](prototype_mtc). +Non-household vehicles in prototype_mtc_extended use the auto operating cost of 18.3 cents used in prototype_mtc. +Users are encouraged to make their own assumptions and calculate auto operating costs as they see fit. + +The distribution of fuel type probabilities included in [prototype_mtc_extended](prototype_mtc_extended) are computed directly from the National Household Travel Survey data +and include the entire US. Therefore, there is "lumpiness" in probabilities due to poor statistics in the data for some vehicle types. +The user is encouraged to adjust the probabilities to their modeling region and "smooth" them for more consistent results. + +Further discussion of output results and model sensitivities can be found [here](https://github.com/ActivitySim/activitysim/wiki/Project-Meeting-2022.05.05). + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: VehicleTypeChoiceSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc_extended/configs/vehicle_type_choice.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: vehicle_type_choice +.. autofunction:: append_probabilistic_vehtype_type_choices +.. autofunction:: annotate_vehicle_type_choice_households +.. autofunction:: annotate_vehicle_type_choice_persons +.. autofunction:: annotate_vehicle_type_choice_vehicles +.. autofunction:: get_combinatorial_vehicle_alternatives +.. autofunction:: construct_model_alternatives +.. autofunction:: get_vehicle_type_data +.. autofunction:: iterate_vehicle_type_choice +``` diff --git a/docs/dev-guide/components/work_from_home.md b/docs/dev-guide/components/work_from_home.md new file mode 100644 index 000000000..d5faf9cc3 --- /dev/null +++ b/docs/dev-guide/components/work_from_home.md @@ -0,0 +1,51 @@ +(component-work_from_home)= +# Work from Home + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.work_from_home +``` + +Telecommuting is defined as workers who work from home instead of going +to work. It only applies to workers with a regular workplace outside of home. +The telecommute model consists of two submodels - this work from home model and a +person [telecommute_frequency](telecommute_frequency) model. This model predicts for all workers whether they +usually work from home. + +The work from home model includes the ability to adjust a work from home alternative +constant to attempt to realize a work from home percent for what-if type analysis. +This iterative single process procedure takes as input a number of iterations, a filter on +the choosers to use for the calculation, a target work from home percent, a tolerance percent +for convergence, and the name of the coefficient to adjust. An example setup is provided and +the coefficient adjustment at each iteration is: +``new_coefficient = log( target_percent / current_percent ) + current_coefficient``. + +The main interface to the work from home model is the +[work_from_home](activitysim.abm.models.work_from_home) function. This +function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Configuration File*: `work_from_home.yaml` +- *Core Table*: `persons` +- *Result Table*: `work_from_home` + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: WorkFromHomeSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype SEMCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_semcog/configs/work_from_home.yaml) +- [Prototype MWCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mwcog/configs/work_from_home.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: work_from_home +``` diff --git a/docs/dev-guide/components/work_location_choice.md b/docs/dev-guide/components/work_location_choice.md new file mode 100644 index 000000000..5a484cd30 --- /dev/null +++ b/docs/dev-guide/components/work_location_choice.md @@ -0,0 +1,52 @@ +(component-location_choice)= +# Work Location + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.location_choice.workplace_location +``` + +The usual work location choice models assign a usual work location for the primary +mandatory activity of each employed person in the +synthetic population. The models are composed of a set of accessibility-based parameters +(including one-way distance between home and primary destination and the tour mode choice +logsum - the expected maximum utility in the mode choice model which is given by the +logarithm of the sum of exponentials in the denominator of the logit formula) and size terms, +which describe the quantity of work opportunities in each possible destination. + +The work location model is made up of four steps: + * sample - selects a sample of alternative work locations for the next model step. This selects X locations from the full set of model zones using a simple utility. + * logsums - starts with the table created above and calculates and adds the mode choice logsum expression for each alternative work location. + * simulate - starts with the table created above and chooses a final work location, this time with the mode choice logsum included. + * shadow prices - compare modeled zonal destinations to target zonal size terms and calculate updated shadow prices. + +These steps are repeated until shadow pricing convergence criteria are satisfied or a max number of iterations is reached. See [shadow_pricing](shadow_pricing). + +Work location choice for [multiple_zone_systems](multiple_zone_systems) models uses [presampling](presampling) by default. + +The main interfaces to the model is the [workplace_location](activitysim.abm.models.location_choice.workplace_location) function. +This function is registered as an Inject step in the example Pipeline. See [writing_logsums](writing_logsums) for how to write logsums for estimation. + +## Structure + +- *Configuration File*: `workplace_location.yaml` +- *Core Table*: `persons` +- *Result Field*: `workplace_taz` +- *School Location - Skims Keys*: `TAZ, alt_dest, AM time period, PM time period` + +## Configuration + +```{eval-rst} +.. autopydantic_model:: TourLocationComponentSettings +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/workplace_location.yaml) +- [Prototype MWCOG](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mwcog/configs/workplace_location.yaml) + + +## Implementation + +```{eval-rst} +.. autofunction:: workplace_location +``` diff --git a/docs/dev-guide/components/write_trip_matrices.md b/docs/dev-guide/components/write_trip_matrices.md new file mode 100644 index 000000000..c5349c707 --- /dev/null +++ b/docs/dev-guide/components/write_trip_matrices.md @@ -0,0 +1,42 @@ +(component-write-trip-matrices)= +# Write Trip Matrices + +```{eval-rst} +.. currentmodule:: activitysim.abm.models.trip_matrices +``` + +Write open matrix (OMX) trip matrices for assignment. Reads the trips table post preprocessor and run expressions +to code additional data fields, with one data fields for each matrix specified. The matrices are scaled by a +household level expansion factor, which is the household sample rate by default, which is calculated when +households are read in at the beginning of a model run. The main interface to write trip +matrices is the [write_trip_matrices](activitysim.abm.models.trip_matrices.write_trip_matrices) function. +This function is registered as an Inject step in the example Pipeline. + +## Structure + +- *Core Table*: `trips` +- *Result*: `omx trip matrices` +- *Skims Keys*: `origin, destination` + +This model generates only True or False outcomes, and is structured as a binary +logit model. + + +## Configuration + +```{eval-rst} +.. autopydantic_model:: WriteTripMatricesSettings + :inherited-members: BaseModel, PydanticReadable + :show-inheritance: +``` + +### Examples + +- [Prototype MTC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_mtc/configs/write_trip_matrices.yaml) +- [Prototype ARC](https://github.com/ActivitySim/activitysim/blob/main/activitysim/examples/prototype_arc/configs/write_trip_matrices.yaml) + +## Implementation + +```{eval-rst} +.. autofunction:: write_trip_matrices +``` diff --git a/docs/dev-guide/index.rst b/docs/dev-guide/index.rst index 6661429f4..1f6400abc 100644 --- a/docs/dev-guide/index.rst +++ b/docs/dev-guide/index.rst @@ -30,7 +30,8 @@ Contents workflows logging ../development - ../models + component-configs components/index ../core ../benchmarking + build-docs diff --git a/docs/development.rst b/docs/development.rst index 1c891115b..8ba941f24 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -235,21 +235,8 @@ a profiler file. Then run snakeviz on the profiler file to interactively explor Documentation ~~~~~~~~~~~~~ -The documentation is written in `reStructuredText `__ markup -and built with `Sphinx `__. In addition to converting rst files -to html and other document formats, these tools also read the inline Python docstrings and convert -them into html as well. ActivitySim's docstrings are written in `numpydoc format -`__ since it is easier to use -than standard rst format. +See :ref:`Documentation `. -To build the documentation, first make sure the required packages are installed. Next, build the -documentation in html format with the ``make html`` command run from the ``docs`` folder. - -If the activitysim package is installed, then the documentation will be built from that version of -the source code instead of the git repo version. When pushing revisions to the repo, the documentation -is automatically built by Travis after successfully passing the tests. - -GitHub automatically publishes the gh-pages branch at https://activitysim.github.io/activitysim. .. _release_steps : diff --git a/docs/estimation.rst b/docs/estimation.rst index 814575e6c..faffde5f5 100644 --- a/docs/estimation.rst +++ b/docs/estimation.rst @@ -1,5 +1,5 @@ -.. _estimation : +.. _estimation_old : Estimation ---------- @@ -80,7 +80,7 @@ Additional settings for running ActivitySim in estimation mode are specified in * ``survey_tables`` - the list of input ActivitySim format survey tables with observed choices to override model simulation choices in order to write EDBs. These tables are the output of the ``scripts\infer.py`` script that pre-processes the ActivitySim format household travel survey files for the example data and submodels -.. _estimation_example_notebooks: +.. _estimation_example_notebooks_old: Estimation Notebooks ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/examples.rst b/docs/examples.rst index a70583c0b..efe0ae707 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -48,7 +48,7 @@ The current examples are: +---------------------------------+-----------------------------------------------------------+--------------+----------------------+ | :ref:`prototype_arc` | ARC agency example | 1 | In development | +---------------------------------+-----------------------------------------------------------+--------------+----------------------+ -| :ref:`example_semcog` | SEMCOG agency example | 1 | In production | +| :ref:`production_semcog` | SEMCOG agency example | 1 | In production | +---------------------------------+-----------------------------------------------------------+--------------+----------------------+ | :ref:`placeholder_psrc` | PSRC agency example | 2 | Future development | +---------------------------------+-----------------------------------------------------------+--------------+----------------------+ @@ -64,7 +64,7 @@ The current examples are: contains example commands to create and run several versions of the examples. See also :ref:`adding_agency_examples` for more information on agency example models. -.. _prototype_mtc : + prototype_mtc ------------- @@ -523,7 +523,7 @@ Python/pandas/numpy expressions, alternatives, and other settings used by each m alternatives file since the alternatives are not easily described as columns in the expressions file. An example of this is the ``non_mandatory_tour_frequency_alternatives.csv`` file, which lists each alternative as a row and each columns indicates the number of non-mandatory tours by purpose. The set of files for the prototype_mtc are below. The -:ref:`prototype_arc`, :ref:`example_semcog`, and :ref:`prototype_mtc_extended` examples added additional submodels. +:ref:`prototype_arc`, :ref:`production_semcog`, and :ref:`prototype_mtc_extended` examples added additional submodels. +------------------------------------------------+--------------------------------------------------------------------+ | Model | Specification Files | @@ -1078,7 +1078,7 @@ for examples that illustrate running ActivitySim in estimation mode and using la .. index:: multiple_zone_systems .. _multiple_zone_systems : -.. _placeholder_multiple_zone : +.. _placeholder_multiple_zone_old : placeholder_multiple_zone ------------------------- @@ -1484,9 +1484,9 @@ Example See example commands in `example_manifest.yaml `_ for running prototype_arc. For optimal performance, configure multiprocessing and chunk_size based on machine hardware. -.. _example_semcog : -example_semcog + +production_semcog ---------------- .. note:: @@ -1494,7 +1494,7 @@ example_semcog This example is in development -The example_semcog added a :ref:`work_from_home`, :ref:`telecommute_frequency`, :ref:`transit_pass_subsidy` +The production_semcog model added a :ref:`work_from_home`, :ref:`telecommute_frequency`, :ref:`transit_pass_subsidy` and :ref:`transit_pass_ownership` submodel. These submodel specification files are below, and are in addition to the :ref:`prototype_mtc` submodel :ref:`sub-model-spec-files`. These submodels were added to prototype_semcog as extensions, which is a way for users to add submodels within their model setup as opposed to formally adding them to the activitysim package. Extension submodels are run through @@ -1526,7 +1526,7 @@ Example SEMCOG Sub-Model Specification Files | | - transit_pass_ownership_coeffs.csv | +------------------------------------------------+--------------------------------------------------------------------+ -Additional extensions were included specifically as part of the example_semcog model to deal with the unique travel of +Additional extensions were included specifically as part of the production_semcog model to deal with the unique travel of university students attending the University of Michigan (UofM). First off, UofM students have their school zones resampled weighted by enrollment. This happens after both school location choice in the university_location_zone_override model and after university trip destination choice in the trip_destination_univ_zone_override model. Next, parking trips are handled @@ -1537,7 +1537,7 @@ While a persons vehicle is parked, trip mode choice treats the tour mode as walk until the person returns back to their car. For more information, please see SEMCOG's final model documentation and the SEMCOG model user quide. -These submodels were added to example_semcog as extensions, which is a way for users to add +These submodels were added to production_semcog as extensions, which is a way for users to add submodels within their model setup as opposed to formally adding them to the activitysim package. Extension submodels are run through the `models` settings. However, the model must be run with the `simulation.py` script instead of the command line interface in order to load the extensions folder. @@ -1547,7 +1547,7 @@ Example ~~~~~~~ See example commands in `example_manifest.yaml `_ -for running example_semcog. For optimal performance, configure multiprocessing and chunk_size based on machine hardware. +for running production_semcog. For optimal performance, configure multiprocessing and chunk_size based on machine hardware. .. _placeholder_psrc : diff --git a/docs/howitworks.rst b/docs/howitworks.rst index 65e712729..00102fa7e 100644 --- a/docs/howitworks.rst +++ b/docs/howitworks.rst @@ -9,7 +9,9 @@ This page describes how the software works, how multiprocessing works, and the p Execution Flow -------------- -The example model run starts by running the steps in :ref:`example_run`. +An example model run starts by running the steps in :ref:`example_run`. The following flow chart represents steps of ActivitySim, but specific implementations will have different individual model components in their execution. + +.. image:: images/example_flowchart.png Initialization ~~~~~~~~~~~~~~ @@ -232,262 +234,11 @@ actually reads the input tables from the input HDF5 or CSV file using the ``inpu column_map: HHID: household_id -School Location -~~~~~~~~~~~~~~~ - -Now that the persons, households, and other data are in memory, and also annotated with additional fields -for later calculations, the school location model can be run. The school location model is defined -in :mod:`activitysim.abm.models.location_choice`. As shown below, the school location model -actually uses the ``persons_merged`` table, which includes joined household, land use, and accessibility -tables as well. The school location model also requires the network_los object, which is discussed next. -Before running the generic iterate location choice function, the model reads the model settings file, which -defines various settings, including the expression files, sample size, mode choice logsum -calculation settings, time periods for skim lookups, shadow pricing settings, etc. - -:: - - #persons.py - # another common merge for persons - @inject.table() - def persons_merged(persons, households, land_use, accessibility): - return inject.merge_tables(persons.name, tables=[persons, households, land_use, accessibility]) - - #location_choice.py - @inject.step() - def school_location( - persons_merged, persons, households, - network_los, chunk_size, trace_hh_id, locutor - ): - - trace_label = 'school_location' - model_settings = config.read_model_settings('school_location.yaml') - - iterate_location_choice( - model_settings, - persons_merged, persons, households, - network_los, - chunk_size, trace_hh_id, locutor, trace_label - - -Deep inside the method calls, the skim matrix lookups required for this model are configured via ``network_los``. The following -code sets the keys for looking up the skim values for this model. In this case there is a ``TAZ`` column -in the households table that is renamed to `TAZ_chooser`` and a ``TAZ`` in the alternatives generation code. -The skims are lazy loaded under the name "skims" and are available in the expressions using the ``@skims`` expression. - -:: - - # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers - # and a zone_id in the alternatives which get merged during interaction - # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser') - # the skims will be available under the name "skims" for any @ expressions - skim_dict = network_los.get_default_skim_dict() - skims = skim_dict.wrap('home_zone_id', 'zone_id') - - locals_d = { - 'skims': skims, - } - -The next step is to call the :func:`activitysim.core.interaction_sample.interaction_sample` function which -selects a sample of alternatives by running a MNL choice model simulation in which alternatives must be -merged with choosers because there are interaction terms. The choosers table, the alternatives table, the -sample size, the model specification expressions file, the skims, the skims lookups, the chunk size, and the -trace labels are passed in. - -:: - - #interaction_sample - choices = interaction_sample( - choosers, - alternatives, - sample_size=sample_size, - alt_col_name=alt_dest_col_name, - spec=spec_for_segment(model_spec, segment_name), - skims=skims, - locals_d=locals_d, - chunk_size=chunk_size, - trace_label=trace_label) - -This function solves the utilities, calculates probabilities, draws random numbers, selects choices with -replacement, and returns the choices. This is done in a for loop of chunks of chooser records in order to avoid -running out of RAM when building the often large data tables. This method does a lot, and eventually -calls :func:`activitysim.core.interaction_simulate.eval_interaction_utilities`, which loops through each -expression in the expression file and solves it at once for all records in the chunked chooser -table using Python's ``eval``. - -The :func:`activitysim.core.interaction_sample.interaction_sample` method is currently only a multinomial -logit choice model. The :func:`activitysim.core.simulate.simple_simulate` method supports both MNL and NL as specified by -the ``LOGIT_TYPE`` setting in the model settings YAML file. The ``auto_ownership.yaml`` file for example specifies -the ``LOGIT_TYPE`` as ``MNL.`` - -If the expression is a skim matrix, then the entire column of chooser OD pairs is retrieved from the matrix (i.e. numpy array) -in one vectorized step. The ``orig`` and ``dest`` objects in ``self.data[orig, dest]`` in :mod:`activitysim.core.los` are vectors -and selecting numpy array items with vector indexes returns a vector. Trace data is also written out if configured (not shown below). - -:: - - # evaluate expressions from the spec multiply by coefficients and sum - interaction_utilities, trace_eval_results \ - = eval_interaction_utilities(spec, interaction_df, locals_d, trace_label, trace_rows) - - # reshape utilities (one utility column and one row per row in model_design) - # to a dataframe with one row per chooser and one column per alternative - utilities = pd.DataFrame( - interaction_utilities.values.reshape(len(choosers), alternative_count), - index=choosers.index) - - # convert to probabilities (utilities exponentiated and normalized to probs) - # probs is same shape as utilities, one row per chooser and one column for alternative - probs = logit.utils_to_probs(utilities, allow_zero_probs=allow_zero_probs, - trace_label=trace_label, trace_choosers=choosers) - - choices_df = make_sample_choices( - choosers, probs, alternatives, sample_size, alternative_count, alt_col_name, - allow_zero_probs=allow_zero_probs, trace_label=trace_label) - - # pick_count is number of duplicate picks - pick_group = choices_df.groupby([choosers.index.name, alt_col_name]) - - # number each item in each group from 0 to the length of that group - 1. - choices_df['pick_count'] = pick_group.cumcount(ascending=True) - # flag duplicate rows after first - choices_df['pick_dup'] = choices_df['pick_count'] > 0 - # add reverse cumcount to get total pick_count (conveniently faster than groupby.count + merge) - choices_df['pick_count'] += pick_group.cumcount(ascending=False) + 1 - - # drop the duplicates - choices_df = choices_df[~choices_df['pick_dup']] - del choices_df['pick_dup'] - - return choices_df - -The model creates the ``location_sample_df`` table using the choices above. This table is -then used for the next model step - solving the logsums for the sample. - -:: - - # - location_logsums - location_sample_df = run_location_logsums( - segment_name, - choosers, - network_los, - location_sample_df, - model_settings, - chunk_size, - trace_hh_id, - tracing.extend_trace_label(trace_label, 'logsums.%s' % segment_name)) - -The next steps are similar to what the sampling model does, except this time the sampled locations -table is the choosers and the model is calculating and adding the tour mode choice logsums using the -logsums settings and expression files. The resulting logsums are added to the chooser table as the -``mode_choice_logsum`` column. - -:: - - #inside run_location_logsums() defined in location_choice.py - logsums = logsum.compute_logsums( - choosers, - tour_purpose, - logsum_settings, model_settings, - network_los, - chunk_size, - trace_label) - - location_sample_df['mode_choice_logsum'] = logsums - -The :func:`activitysim.abm.models.util.logsums.compute_logsums` method goes through a similar series -of steps as the interaction_sample function but ends up calling -:func:`activitysim.core.simulate.simple_simulate_logsums` since it supports nested logit models, which -are required for the mode choice logsum calculation. The -:func:`activitysim.core.simulate.simple_simulate_logsums` returns a vector of logsums (instead of a vector -choices). - -The final school location choice model operates on the ``location_sample_df`` table created -above and is called as follows: - -:: - - # - location_simulate - choices = \ - run_location_simulate( - segment_name, - choosers, - location_sample_df, - network_los, - dest_size_terms, - model_settings, - chunk_size, - tracing.extend_trace_label(trace_label, 'simulate.%s' % segment_name)) - - choices_list.append(choices) - -The operations executed by this model are very similar to the earlier models, except -this time the sampled locations table is the choosers and the model selects one alternative for -each chooser using the school location simulate expression files and the -:func:`activitysim.core.interaction_sample_simulate.interaction_sample_simulate` function. - -Back in ``iterate_location_choice()``, the model adds the choices as a column to the ``persons`` table and adds -additional output columns using a postprocessor table annotation if specified in the settings file. Refer -to :ref:`table_annotation` for more information and the :func:`activitysim.abm.models.util.expressions.assign_columns` -function. The overall school location model is run within a shadow pricing iterative loop as shown below. Refer -to :ref:`shadow_pricing` for more information. - -:: - - - # in iterate_location_choice() in location_choice.py - for iteration in range(1, max_iterations + 1): - - if spc.use_shadow_pricing and iteration > 1: - spc.update_shadow_prices() - - choices = run_location_choice( - persons_merged_df, - network_los, - spc, - model_settings, - chunk_size, trace_hh_id, - trace_label=tracing.extend_trace_label(trace_label, 'i%s' % iteration)) - - choices_df = choices.to_frame('dest_choice') - choices_df['segment_id'] = \ - persons_merged_df[chooser_segment_column].reindex(choices_df.index) - - spc.set_choices(choices_df) - - if locutor: - spc.write_trace_files(iteration) - - if spc.use_shadow_pricing and spc.check_fit(iteration): - logging.info("%s converged after iteration %s" % (trace_label, iteration,)) - break - - # - shadow price table - if locutor: - if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings: - inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices) - if 'MODELED_SIZE_TABLE' in model_settings: - inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size) - - dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME'] - tracing.print_summary(dest_choice_column_name, choices, value_counts=True) - - persons_df = persons.to_frame() - # We only chose school locations for the subset of persons who go to school - # so we backfill the empty choices with -1 to code as no school location - NO_DEST_TAZ = -1 - persons_df[dest_choice_column_name] = \ - choices.reindex(persons_df.index).fillna(NO_DEST_TAZ).astype(int) - - # - annotate persons table - if 'annotate_persons' in model_settings: - expressions.assign_columns( - df=persons_df, - model_settings=model_settings.get('annotate_persons'), - trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) - - pipeline.replace_table("persons", persons_df) +Running Model Components +~~~~~~~~~~~~~~~~~~~~~~~~ +The next steps include running the model components specific to the individual implementation that you are running and as specified in the ``settings.yaml`` file. Finishing Up ~~~~~~~~~~~~ @@ -502,2553 +253,8 @@ Back in the main ``run`` command, the final steps are to: * close the data pipeline (and attached HDF5 file) -Additional Notes ----------------- - -The rest of the microsimulation models operate in a similar fashion with a few notable additions: - -* creating new tables -* vectorized 3D skims indexing -* aggregate (OD-level) accessibilities model - -Creating New Tables -~~~~~~~~~~~~~~~~~~~ - -In addition to calculating the mandatory tour frequency for a person, the model must also create mandatory tour records. -Once the number of tours is known, then the next step is to create tours records for subsequent models. This is done by the -:func:`activitysim.abm.models.util.tour_frequency.process_tours` function, which is called by the -:func:`activitysim.abm.models.mandatory_tour_frequency.mandatory_tour_frequency` function, which adds the tours to -the ``tours`` table managed in the data pipeline. This is the same basic pattern used for creating new tables - tours, trips, etc. - -:: - - @inject.step() - def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id): - - choosers['mandatory_tour_frequency'] = choices - mandatory_tours = process_mandatory_tours( - persons=choosers, - mandatory_tour_frequency_alts=alternatives - ) - tours = pipeline.extend_table("tours", mandatory_tours) - - -Vectorized 3D Skim Indexing -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The mode choice model uses a collection of skims with a third dimension, which in this case -is time period. Setting up the 3D index for skims is done as follows: - -:: - - skim_dict = network_los.get_default_skim_dict() - - # setup skim keys - orig_col_name = 'home_zone_id' - dest_col_name = 'destination' - - out_time_col_name = 'start' - in_time_col_name = 'end' - odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, - dim3_key='out_period') - dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, - dim3_key='in_period') - odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, - dim3_key='in_period') - dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, - dim3_key='out_period') - od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) - - skims = { - "odt_skims": odt_skim_stack_wrapper, - "dot_skims": dot_skim_stack_wrapper, - "od_skims": od_skim_stack_wrapper, - 'orig_col_name': orig_col_name, - 'dest_col_name': dest_col_name, - 'out_time_col_name': out_time_col_name, - 'in_time_col_name': in_time_col_name - } - -When model expressions such as ``@odt_skims['WLK_LOC_WLK_TOTIVT']`` are solved, -the ``WLK_LOC_WLK_TOTIVT`` skim matrix values for all chooser table origins, destinations, and -out_periods can be retrieved in one vectorized request. - -All the skims are preloaded (cached) by the pipeline manager at the beginning of the model -run in order to avoid repeatedly reading the skims from the OMX files on disk. This saves -significant model runtime. - -See :ref:`los_in_detail` for more information on skim handling. - -Accessibilities Model -~~~~~~~~~~~~~~~~~~~~~ - -Unlike the microsimulation models, which operate on a table of choosers, the accessibilities model is -an aggregate model that calculates accessibility measures by origin zone to all destination zones. This -model could be implemented with a matrix library such as numpy since it involves a series of matrix -and vector operations. However, all the other ActivitySim AB models - the -microsimulation models - are implemented with pandas.DataFrame tables, and so this would be a -different approach for just this model. The benefits of keeping with the same table approach to -data setup, expression management, and solving means ActivitySim has one expression syntax, is -easier to understand and document, and is more efficiently implemented. - -As illustrated below, in order to convert the -accessibility calculation into a table operation, a table of OD pairs is first built using numpy -``repeat`` and ``tile`` functions. Once constructed, the additional data columns are added to the -table in order to solve the accessibility calculations. The skim data is also added in column form. -After solving the expressions for each OD pair row, the accessibility module aggregates the results -to origin zone and write them to the datastore. - -:: - - # create OD dataframe - od_df = pd.DataFrame( - data={ - 'orig': np.repeat(np.asanyarray(land_use_df.index), zone_count), - 'dest': np.tile(np.asanyarray(land_use_df.index), zone_count) - } - ) - - -.. index:: multiprocessing - -.. _multiprocessing: - -Multiprocessing ---------------- - -Most models can be implemented as a series of independent vectorized operations on pandas DataFrames and -numpy arrays. These vectorized operations are much faster than sequential Python because they are -implemented by native code (compiled C) and are to some extent multi-threaded. But the benefits of -numpy multi-processing are limited because they only apply to atomic numpy or pandas calls, and as -soon as control returns to Python it is single-threaded and slow. - -Multi-threading is not an attractive strategy to get around the Python performance problem because -of the limitations imposed by Python's global interpreter lock (GIL). Rather than struggling with -Python multi-threading, ActivitySim uses the -Python `multiprocessing `__ library to parallelize -most models. - -ActivitySim's modular and extensible architecture makes it possible to not hardwire the multiprocessing -architecture. The specification of which models should be run in parallel, how many processers -should be used, and the segmentation of the data between processes are all specified in the -settings config file. - -Mutliprocessing Configuration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The multiprocess_steps setting below indicate that the simulation should be broken into three steps. - -:: +Components +---------- - models: - ### mp_initialize step - - initialize_landuse - - compute_accessibility - - initialize_households - ### mp_households step - - school_location - - workplace_location - - auto_ownership_simulate - - free_parking - ### mp_summarize step - - write_tables - - multiprocess_steps: - - name: mp_initialize - begin: initialize_landuse - - name: mp_households - begin: school_location - num_processes: 2 - slice: - tables: - - households - - persons - - name: mp_summarize - begin: write_tables - - -The first multiprocess_step, ``mp_initialize``, begins with the initialize landuse step and is -implicity single-process because there is no 'slice' key indicating how to apportion the tables. -This first step includes all models listed in the 'models' setting up until the first step -in the next multiprocess_steps. - -The second multiprocess_step, ``mp_households``, starts with the school location model and continues -through auto ownership. The 'slice' info indicates that the tables should be sliced by -``households``, and that ``persons`` is a dependent table and so ``persons`` with a ref_col (foreign key -column with the same name as the ``Households`` table index) referencing a household record should be -taken to 'belong' to that household. Similarly, any other table that either share an index -(i.e. having the same name) with either the ``households`` or ``persons`` table, or have a ref_col to -either of their indexes, should also be considered a dependent table. - -The num_processes setting of 2 indicates that the pipeline should be split in two, and half of the -households should be apportioned into each subprocess pipeline, and all dependent tables should -likewise be apportioned accordingly. All other tables (e.g. ``land_use``) that do share an index (name) -or have a ref_col should be considered mirrored and be included in their entirety. - -The primary table is sliced by num_processes-sized strides. (e.g. for num_processes == 2, the -sub-processes get every second record starting at offsets 0 and 1 respectively. All other dependent -tables slices are based (directly or indirectly) on this primary stride segmentation of the primary -table index. - -Two separate sub-process are launched (num_processes == 2) and each passed the name of their -apportioned pipeline file. They execute independently and if they terminate successfully, their -contents are then coalesced into a single pipeline file whose tables should then be essentially -the same as it had been generated by a single process. - -We assume that any new tables that are created by the sub-processes are directly dependent on the -previously primary tables or are mirrored. Thus we can coalesce the sub-process pipelines by -concatenating the primary and dependent tables and simply retaining any copy of the mirrored tables -(since they should all be identical.) - -The third multiprocess_step, ``mp_summarize``, then is handled in single-process mode and runs the -``write_tables`` model, writing the results, but also leaving the tables in the pipeline, with -essentially the same tables and results as if the whole simulation had been run as a single process. - -Shared Data -~~~~~~~~~~~ - -Although multiprocessing subprocesses each have their apportioned pipeline, they also share some -data passed to them by the parent process: - - * read-only shared data such as skim matrices - * read-write shared memory when needed. For example when school and work modeled destinations by zone are compared to target zone sizes (as calculated by the size terms). - -Outputs -~~~~~~~ - -When multiprocessing is run, the following additional outputs are created, which are useful for understanding how multiprocessing works: - - * run_list.txt - which contains the expanded model run list with additional annotation for single and multiprocessed steps - * Log files for each multiprocess step and process, for example ``mp_households_0-activitysim.log`` and ``mp_households_1-activitysim.log`` - * Pipeline file for each multiprocess step and process, for example ``mp_households_0-pipeline.h5`` - * mem.csv - memory used for each step - * breadcrumbs.yaml - multiprocess global info - -See the :ref:`multiprocessing_in_detail` section for more detail. - - -.. index:: data tables -.. index:: tables -.. index:: data schema - -Data Schema ------------ - -The ActivitySim data schema depends on the sub-models implemented. The data schema listed below is for -the primary TM1 example model. These tables and skims are defined in the :mod:`activitysim.abm.tables` package. - -.. index:: constants -.. index:: households -.. index:: input store -.. index:: land use -.. index:: persons -.. index:: size terms -.. index:: time windows table -.. index:: tours -.. index:: trips - -Data Tables -~~~~~~~~~~~ - -The following tables are currently implemented: - - * households - household attributes for each household being simulated. Index: ``household_id`` (see ``activitysim.abm.tables.households.py``) - * landuse - zonal land use (such as population and employment) attributes. Index: ``zone_id`` (see ``activitysim.abm.tables.landuse.py``) - * persons - person attributes for each person being simulated. Index: ``person_id`` (see ``activitysim.abm.tables.persons.py``) - * time windows - manages person time windows throughout the simulation. See :ref:`time_windows`. Index: ``person_id`` (see the person_windows table create decorator in ``activitysim.abm.tables.time_windows.py``) - * tours - tour attributes for each tour (mandatory, non-mandatory, joint, and atwork-subtour) being simulated. Index: ``tour_id`` (see ``activitysim.abm.models.util.tour_frequency.py``) - * trips - trip attributes for each trip being simulated. Index: ``trip_id`` (see ``activitysim.abm.models.stop_frequency.py``) - -A few additional tables are also used, which are not really tables, but classes: - - * input store - reads input data tables from the input data store - * constants - various constants used throughout the model system, such as person type codes - * shadow pricing - shadow price calculator and associated utility methods, see :ref:`shadow_pricing` - * size terms - created by reading the ``destination_choice_size_terms.csv`` input file. Index - ``segment`` (see ``activitysim.abm.tables.size_terms.py``) - * skims - see :ref:`skims` - * table dictionary - stores which tables should be registered as random number generator channels for restartability of the pipeline - -Data Schema -~~~~~~~~~~~ - -The following table lists the pipeline data tables, each final field, the data type, the step that created it, and the -number of columns and rows in the table at the time of creation. The ``other_resources\scripts\make_pipeline_output.py`` script -uses the information stored in the pipeline file to create the table below for a small sample of households. - -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| Table | Field | DType | Creator |NCol |NRow | -+============================+===============================+=========+==============================+======+======+ -| accessibility | auPkRetail | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| accessibility | auPkTotal | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| accessibility | auOpRetail | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| accessibility | auOpTotal | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| accessibility | trPkRetail | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| accessibility | trPkTotal | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| accessibility | trOpRetail | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| accessibility | trOpTotal | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| accessibility | nmRetail | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| accessibility | nmTotal | float32 | compute_accessibility | 10 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | TAZ | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | SERIALNO | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | PUMA5 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | income | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hhsize | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | HHT | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | UNITTYPE | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | NOC | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | BLDGSZ | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | TENURE | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | VEHICL | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hinccat1 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hinccat2 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hhagecat | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hsizecat | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hfamily | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hunittype | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hNOCcat | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hwrkrcat | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h0004 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h0511 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h1215 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h1617 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h1824 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h2534 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h3549 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h5064 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h6579 | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | h80up | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_workers | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hwork_f | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hwork_p | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | huniv | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hnwork | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hretire | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hpresch | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hschpred | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hschdriv | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | htypdwel | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hownrent | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hadnwst | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hadwpst | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hadkids | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | bucketBin | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | originalPUMA | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hmultiunit | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | chunk_id | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | income_in_thousands | float64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | income_segment | int32 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | median_value_of_time | float64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hh_value_of_time | float64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_non_workers | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_drivers | int8 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_adults | int8 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_children | int8 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_young_children | int8 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_children_5_to_15 | int8 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_children_16_to_17 | int8 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_college_age | int8 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_young_adults | int8 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | non_family | bool | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | family | bool | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | home_is_urban | bool | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | home_is_rural | bool | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | auto_ownership | int64 | initialize_households | 65 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | hh_work_auto_savings_ratio | float32 | workplace_location | 66 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_under16_not_at_school | int8 | cdap_simulate | 73 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_travel_active | int8 | cdap_simulate | 73 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_travel_active_adults | int8 | cdap_simulate | 73 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_travel_active_preschoolers| int8 | cdap_simulate | 73 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_travel_active_children | int8 | cdap_simulate | 73 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households |num_travel_active_non_presch | int8 | cdap_simulate | 73 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | participates_in_jtf_model | int8 | cdap_simulate | 73 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | joint_tour_frequency | object | joint_tour_frequency | 75 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| households | num_hh_joint_tours | int8 | joint_tour_frequency | 75 | 100 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| joint_tour_participants | tour_id | int64 | joint_tour_participation | 4 | 13 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| joint_tour_participants | household_id | int64 | joint_tour_participation | 4 | 13 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| joint_tour_participants | person_id | int64 | joint_tour_participation | 4 | 13 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| joint_tour_participants | participant_num | int64 | joint_tour_participation | 4 | 13 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | DISTRICT | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | SD | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | county_id | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | TOTHH | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | HHPOP | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | TOTPOP | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | EMPRES | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | SFDU | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | MFDU | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | HHINCQ1 | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | HHINCQ2 | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | HHINCQ3 | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | HHINCQ4 | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | TOTACRE | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | RESACRE | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | CIACRE | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | SHPOP62P | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | TOTEMP | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | AGE0004 | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | AGE0519 | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | AGE2044 | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | AGE4564 | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | AGE65P | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | RETEMPN | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | FPSEMPN | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | HEREMPN | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | OTHEMPN | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | AGREMPN | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | MWTEMPN | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | PRKCST | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | OPRKCST | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | area_type | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | HSENROLL | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | COLLFTE | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | COLLPTE | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | TOPOLOGY | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | TERMINAL | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | ZERO | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | hhlds | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | sftaz | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | gqpop | int64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | household_density | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | employment_density | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| land_use | density_index | float64 | initialize_landuse | 44 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 4 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 5 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 6 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 7 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 8 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 9 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 10 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 11 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 12 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 13 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 14 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 15 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 16 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 17 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 18 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 19 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 20 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 21 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 22 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 23 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| person_windows | 24 | int8 | initialize_households | 21 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | household_id | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | age | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | RELATE | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | ESR | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | GRADE | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | PNUM | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | PAUG | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | DDP | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | sex | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | WEEKS | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | HOURS | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | MSP | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | POVERTY | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | EARNS | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | pagecat | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | pemploy | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | pstudent | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | ptype | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | padkid | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | age_16_to_19 | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | age_16_p | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | adult | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | male | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | female | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_non_worker | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_retiree | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_preschool_kid | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_driving_kid | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_school_kid | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_full_time | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_part_time | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_university | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | student_is_employed | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | nonstudent_to_school | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | is_student | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | is_gradeschool | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | is_highschool | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | is_university | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | school_segment | int8 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | is_worker | bool | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | home_taz | int64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | value_of_time | float64 | initialize_households | 42 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | school_taz | int32 | school_location | 45 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | distance_to_school | float32 | school_location | 45 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | roundtrip_auto_time_to_school | float32 | school_location | 45 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | workplace_taz | int32 | workplace_location | 52 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | distance_to_work | float32 | workplace_location | 52 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | workplace_in_cbd | bool | workplace_location | 52 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | work_zone_area_type | float64 | workplace_location | 52 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | roundtrip_auto_time_to_work | float32 | workplace_location | 52 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | work_auto_savings | float32 | workplace_location | 52 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | work_auto_savings_ratio | float32 | workplace_location | 52 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | free_parking_at_work | bool | free_parking | 53 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | cdap_activity | object | cdap_simulate | 59 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | cdap_rank | int64 | cdap_simulate | 59 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | travel_active | bool | cdap_simulate | 59 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | under16_not_at_school | bool | cdap_simulate | 59 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_preschool_kid_at_home | bool | cdap_simulate | 59 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | has_school_kid_at_home | bool | cdap_simulate | 59 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | mandatory_tour_frequency | object | mandatory_tour_frequency | 64 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | work_and_school_and_worker | bool | mandatory_tour_frequency | 64 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | work_and_school_and_student | bool | mandatory_tour_frequency | 64 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_mand | int8 | mandatory_tour_frequency | 64 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_work_tours | int8 | mandatory_tour_frequency | 64 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_joint_tours | int8 | joint_tour_participation | 65 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | non_mandatory_tour_frequency | int8 | non_mandatory_tour_frequency | 74 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_non_mand | int8 | non_mandatory_tour_frequency | 74 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_escort_tours | int8 | non_mandatory_tour_frequency | 74 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_eatout_tours | int8 | non_mandatory_tour_frequency | 74 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_shop_tours | int8 | non_mandatory_tour_frequency | 74 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_maint_tours | int8 | non_mandatory_tour_frequency | 74 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_discr_tours | int8 | non_mandatory_tour_frequency | 74 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_social_tours | int8 | non_mandatory_tour_frequency | 74 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| persons | num_non_escort_tours | int8 | non_mandatory_tour_frequency | 74 | 271 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| school_destination_size | gradeschool | float64 | initialize_households | 3 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| school_destination_size | highschool | float64 | initialize_households | 3 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| school_destination_size | university | float64 | initialize_households | 3 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| school_modeled_size | gradeschool | int32 | school_location | 3 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| school_modeled_size | highschool | int32 | school_location | 3 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| school_modeled_size | university | int32 | school_location | 3 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | person_id | int64 | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | tour_type | object | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | tour_type_count | int64 | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | tour_type_num | int64 | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | tour_num | int64 | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | tour_count | int64 | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | tour_category | object | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | number_of_participants | int64 | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | destination | int32 | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | origin | int64 | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | household_id | int64 | mandatory_tour_frequency | 11 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | start | int8 | mandatory_tour_scheduling | 15 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | end | int8 | mandatory_tour_scheduling | 15 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | duration | int8 | mandatory_tour_scheduling | 15 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | tdd | int64 | mandatory_tour_scheduling | 15 | 153 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | composition | object | joint_tour_composition | 16 | 159 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | tour_mode | object | tour_mode_choice_simulate | 17 | 319 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | atwork_subtour_frequency | object | atwork_subtour_frequency | 19 | 344 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | parent_tour_id | float64 | atwork_subtour_frequency | 19 | 344 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | stop_frequency | object | stop_frequency | 21 | 344 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| tours | primary_purpose | object | stop_frequency | 21 | 344 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | person_id | int64 | stop_frequency | 7 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | household_id | int64 | stop_frequency | 7 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | tour_id | int64 | stop_frequency | 7 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | primary_purpose | object | stop_frequency | 7 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | trip_num | int64 | stop_frequency | 7 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | outbound | bool | stop_frequency | 7 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | trip_count | int64 | stop_frequency | 7 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | purpose | object | trip_purpose | 8 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | destination | int32 | trip_destination | 11 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | origin | int32 | trip_destination | 11 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | failed | bool | trip_destination | 11 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | depart | float64 | trip_scheduling | 11 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| trips | trip_mode | object | trip_mode_choice | 12 | 859 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| workplace_destination_size | work_high | float64 | initialize_households | 4 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| workplace_destination_size | work_low | float64 | initialize_households | 4 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| workplace_destination_size | work_med | float64 | initialize_households | 4 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| workplace_destination_size | work_veryhigh | float64 | initialize_households | 4 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| workplace_modeled_size | work_high | int32 | workplace_location | 4 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| workplace_modeled_size | work_low | int32 | workplace_location | 4 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| workplace_modeled_size | work_med | int32 | workplace_location | 4 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ -| workplace_modeled_size | work_veryhigh | int32 | workplace_location | 4 | 1454 | -+----------------------------+-------------------------------+---------+------------------------------+------+------+ - -.. index:: skims -.. index:: omx_file -.. index:: skim matrices - -.. _skims: - -Skims -~~~~~ - -The skims class defines Inject injectables to access the skim matrices. The skims class reads the -skims from the omx_file on disk. The injectables and omx_file for the example are listed below. -The skims are float64 matrix. - -Skims are named ___