Skip to content

Commit

Permalink
Merge pull request #11 from camsys/data-type-op-pd-cat
Browse files Browse the repository at this point in the history
latest updates from other PRs
  • Loading branch information
i-am-sijia authored Feb 13, 2024
2 parents a3cb622 + 1f40c06 commit db03dae
Show file tree
Hide file tree
Showing 29 changed files with 827 additions and 630 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/core_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
mamba install --yes \
"psutil=5.9.5" \
"pydantic=1.10.13" \
"pydantic=2.6.1" \
"pypyr=5.8.0" \
"pytables=3.6.1" \
"pytest-cov" \
Expand Down Expand Up @@ -149,7 +149,7 @@ jobs:
mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
mamba install --yes \
"psutil=5.9.5" \
"pydantic=1.10.13" \
"pydantic=2.6.1" \
"pypyr=5.8.0" \
"pytables=3.6.1" \
"pytest-cov" \
Expand Down Expand Up @@ -247,7 +247,7 @@ jobs:
mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
mamba install --yes \
"psutil=5.9.5" \
"pydantic=1.10.13" \
"pydantic=2.6.1" \
"pypyr=5.8.0" \
"pytables=3.6.1" \
"pytest-cov" \
Expand Down Expand Up @@ -344,7 +344,7 @@ jobs:
mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
mamba install --yes \
"psutil=5.9.5" \
"pydantic=1.10.13" \
"pydantic=2.6.1" \
"pypyr=5.8.0" \
"pytables=3.6.1" \
"pytest-cov" \
Expand Down Expand Up @@ -411,7 +411,7 @@ jobs:
mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
mamba install --yes \
"psutil=5.9.5" \
"pydantic=1.10.13" \
"pydantic=2.6.1" \
"pypyr=5.8.0" \
"pytables=3.6.1" \
"pytest-cov" \
Expand Down Expand Up @@ -477,7 +477,7 @@ jobs:
mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
mamba install --yes \
"psutil=5.9.5" \
"pydantic=1.10.13" \
"pydantic=2.6.1" \
"pypyr=5.8.0" \
"pytables=3.6.1" \
"pytest-cov" \
Expand Down
19 changes: 16 additions & 3 deletions activitysim/abm/models/non_mandatory_tour_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import logging
import warnings
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -200,12 +201,24 @@ def non_mandatory_tour_frequency(
model_settings_file_name,
)

# FIXME kind of tacky both that we know to add this here and del it below
# 'tot_tours' is used in model_spec expressions
alternatives = simulate.read_model_alts(
state, "non_mandatory_tour_frequency_alternatives.csv", set_index=None
)
alternatives["tot_tours"] = alternatives.sum(axis=1)
if "tot_tours" not in alternatives.columns:
# add a column for total tours
alternatives["tot_tours"] = alternatives.sum(axis=1)
warnings.warn(
"The 'tot_tours' column may not be automatically added in the future.",
FutureWarning,
)
else:
# tot_tours already exists, check if it is consistent with legacy behavior
if not (alternatives["tot_tours"] == alternatives.sum(axis=1)).all():
warnings.warn(
"The 'tot_tours' column in non_mandatory_tour_frequency_alternatives.csv "
"does not match the sum of the other columns.",
RuntimeWarning,
)

# filter based on results of CDAP
choosers = persons_merged
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/models/util/canonical_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def determine_flavors_from_alts_file(
flavors = {
c: int(alts[c].max() + max_extension)
for c in alts.columns
if all(alts[c].astype(str).str.isnumeric())
if all(alts[c].astype(str).str.isnumeric()) and (c != "tot_tours")
}
valid_flavors = all(
[(isinstance(flavor, str) & (num >= 0)) for flavor, num in flavors.items()]
Expand Down
4 changes: 2 additions & 2 deletions activitysim/abm/models/util/logsums.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ def compute_location_choice_logsums(
computed logsums with same index as choosers
"""
if isinstance(model_settings, dict):
model_settings = TourLocationComponentSettings.parse_obj(model_settings)
model_settings = TourLocationComponentSettings.model_validate(model_settings)
if isinstance(logsum_settings, dict):
logsum_settings = TourModeComponentSettings.parse_obj(logsum_settings)
logsum_settings = TourModeComponentSettings.model_validate(logsum_settings)

trace_label = tracing.extend_trace_label(trace_label, "compute_logsums")
logger.debug(f"Running compute_logsums with {choosers.shape[0]:d} choosers")
Expand Down
59 changes: 59 additions & 0 deletions activitysim/abm/models/util/test/test_vehicle_type_alts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# ActivitySim
# See full license in LICENSE.txt.

import pandas as pd
import pandas.testing as pdt

from activitysim.abm.models.vehicle_type_choice import (
get_combinatorial_vehicle_alternatives,
construct_model_alternatives,
VehicleTypeChoiceSettings,
)
from activitysim.core import workflow


def test_vehicle_type_alts():
state = workflow.State.make_default(__file__)

alts_cats_dict = {
"body_type": ["Car", "SUV"],
"fuel_type": ["Gas", "BEV"],
"age": [1, 2, 3],
}

alts_wide, alts_long = get_combinatorial_vehicle_alternatives(alts_cats_dict)

# alts are initially constructed combinatorially
assert len(alts_long) == 12, "alts_long should have 12 rows"
assert len(alts_wide) == 12, "alts_wide should have 12 rows"

model_settings = VehicleTypeChoiceSettings.model_construct()
model_settings.combinatorial_alts = alts_cats_dict
model_settings.PROBS_SPEC = None
model_settings.WRITE_OUT_ALTS_FILE = False

# constructing veh type data with missing alts
vehicle_type_data = pd.DataFrame(
data={
"body_type": ["Car", "Car", "Car", "SUV", "SUV"],
"fuel_type": ["Gas", "Gas", "BEV", "Gas", "BEV"],
"age": ["1", "2", "3", "1", "2"],
"dummy_data": [1, 2, 3, 4, 5],
},
index=[0, 1, 2, 3, 4],
)

alts_wide, alts_long = construct_model_alternatives(
state, model_settings, alts_cats_dict, vehicle_type_data
)

# should only have alts left that are in the file
assert len(alts_long) == 5, "alts_long should have 5 rows"

# indexes need to be the same to choices match alts
pdt.assert_index_equal(alts_long.index, alts_wide.index)

# columns need to be in correct order for downstream configs
pdt.assert_index_equal(
alts_long.columns, pd.Index(["body_type", "age", "fuel_type"])
)
19 changes: 15 additions & 4 deletions activitysim/abm/models/vehicle_type_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,19 @@ def construct_model_alternatives(
), f"missing vehicle data for alternatives:\n {missing_alts}"
else:
# eliminate alternatives if no vehicle type data
# if this happens, alts_wide is not the same length as alts_long
num_alts_before_filer = len(alts_wide)
alts_wide = alts_wide[alts_wide._merge != "left_only"]
logger.warning(
f"Removed {num_alts_before_filer - len(alts_wide)} alternatives not included in input vehicle type data."
)
# need to also remove any alts from alts_long
alts_long.set_index(["body_type", "age", "fuel_type"], inplace=True)
alts_long = alts_long[
alts_long.index.isin(
alts_wide.set_index(["body_type", "age", "fuel_type"]).index
)
].reset_index()
alts_long.index = alts_wide.index
alts_wide.drop(columns="_merge", inplace=True)

# converting age to integer to allow interactions in utilities
Expand Down Expand Up @@ -481,11 +492,11 @@ def iterate_vehicle_type_choice(
alts = (
alts_long[alts_long.columns]
.apply(lambda row: "_".join(row.values.astype(str)), axis=1)
.values
.to_dict()
)
else:
alts = model_spec.columns
choices["vehicle_type"] = choices["vehicle_type"].map(dict(enumerate(alts)))
alts = enumerate(dict(model_spec.columns))
choices["vehicle_type"] = choices["vehicle_type"].map(alts)

# STEP II: append probabilistic vehicle type attributes
if probs_spec_file is not None:
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/tables/shadow_pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,7 +1233,7 @@ def load_shadow_price_calculator(
spc : ShadowPriceCalculator
"""
if not isinstance(model_settings, TourLocationComponentSettings):
model_settings = TourLocationComponentSettings.parse_obj(model_settings)
model_settings = TourLocationComponentSettings.model_validate(model_settings)

num_processes = state.get_injectable("num_processes", 1)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_load_cached_accessibility():
settings = state.settings
input_table_list = settings.input_table_list
input_table_list.append(
configuration.InputTable.parse_obj(
configuration.InputTable.model_validate(
{
"tablename": "accessibility",
"filename": "cached_accessibility.csv",
Expand Down
2 changes: 1 addition & 1 deletion activitysim/core/configuration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class PreprocessorSettings(PydanticBase):
The preprocessor will emit rows to a temporary table that match the rows
in this table from the pipeline."""

TABLES: list[str] | None
TABLES: list[str] | None = None
"""Names of the additional tables to be merged for the preprocessor.
Data from these tables will be merged into the primary table, according
Expand Down
4 changes: 2 additions & 2 deletions activitysim/core/configuration/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ def read_settings_file(
include_stack: bool = False,
configs_dir_list: tuple[Path] | None = None,
validator_class: type[PydanticBase] | None = None,
) -> dict | PydanticBase:
) -> PydanticBase | dict:
"""
Load settings from one or more yaml files.
Expand Down Expand Up @@ -817,7 +817,7 @@ def backfill_settings(settings, backfill):
settings.pop("include_settings", None)

if validator_class is not None:
settings = validator_class.parse_obj(settings)
settings = validator_class.model_validate(settings)

if include_stack:
# if we were called recursively, return an updated list of source_file_paths
Expand Down
2 changes: 1 addition & 1 deletion activitysim/core/logit.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ def each_nest(nest_spec: dict | LogitNestSpec, type=None, post_order=False):
raise RuntimeError("Unknown nest type '%s' in call to each_nest" % type)

if isinstance(nest_spec, dict):
nest_spec = LogitNestSpec.parse_obj(nest_spec)
nest_spec = LogitNestSpec.model_validate(nest_spec)

for _node, nest in _each_nest(nest_spec, parent_nest=Nest(), post_order=post_order):
if type is None or (type == nest.type):
Expand Down
1 change: 0 additions & 1 deletion activitysim/core/mp_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,6 @@ def setup_injectables_and_logging(injectables, locutor: bool = True) -> workflow
state = workflow.State()
state = state.initialize_filesystem(**injectables)
state.settings = injectables.get("settings", Settings())
# state.settings = Settings.parse_obj(injectables.get("settings_package", {}))

# register abm steps and other abm-specific injectables
# by default, assume we are running activitysim.abm
Expand Down
2 changes: 1 addition & 1 deletion activitysim/core/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ def replace_coefficients(nest: LogitNestSpec):
coefficients = coefficients["value"].to_dict()

if not isinstance(nest_spec, LogitNestSpec):
nest_spec = LogitNestSpec.parse_obj(nest_spec)
nest_spec = LogitNestSpec.model_validate(nest_spec)

replace_coefficients(nest_spec)

Expand Down
12 changes: 6 additions & 6 deletions activitysim/core/test/test_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_csv_reader(seed_households, state):
"""

settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
settings = configuration.Settings.parse_obj(settings)
settings = configuration.Settings.model_validate(settings)
state.settings = settings

hh_file = state.filesystem.get_data_dir()[0].joinpath("households.csv")
Expand All @@ -94,7 +94,7 @@ def test_hdf_reader1(seed_households, state):
"""

settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
settings = configuration.Settings.parse_obj(settings)
settings = configuration.Settings.model_validate(settings)
state.settings = settings

hh_file = state.filesystem.get_data_dir()[0].joinpath("households.h5")
Expand All @@ -120,7 +120,7 @@ def test_hdf_reader2(seed_households, state):
"""

settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
settings = configuration.Settings.parse_obj(settings)
settings = configuration.Settings.model_validate(settings)
state.settings = settings

hh_file = state.filesystem.get_data_dir()[0].joinpath("households.h5")
Expand All @@ -145,7 +145,7 @@ def test_hdf_reader3(seed_households, state):
"""

settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
settings = configuration.Settings.parse_obj(settings)
settings = configuration.Settings.model_validate(settings)
state.settings = settings

hh_file = state.filesystem.get_data_dir()[0].joinpath("input_data.h5")
Expand All @@ -169,7 +169,7 @@ def test_missing_filename(seed_households, state):
"""

settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
settings = configuration.Settings.parse_obj(settings)
settings = configuration.Settings.model_validate(settings)
state.settings = settings

with pytest.raises(AssertionError) as excinfo:
Expand All @@ -191,7 +191,7 @@ def test_create_input_store(seed_households, state):
"""

settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
settings = configuration.Settings.parse_obj(settings)
settings = configuration.Settings.model_validate(settings)
state.settings = settings

hh_file = state.filesystem.get_data_dir()[0].joinpath("households.csv")
Expand Down
29 changes: 28 additions & 1 deletion activitysim/core/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas.testing as pdt
import pytest

from ..util import other_than, quick_loc_df, quick_loc_series, reindex
from ..util import other_than, quick_loc_df, quick_loc_series, reindex, df_from_dict


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -62,3 +62,30 @@ def test_quick_loc_series():

assert list(quick_loc_series(loc_list, series)) == attrib_list
assert list(quick_loc_series(loc_list, series)) == list(series.loc[loc_list])


def test_df_from_dict():

index = [1, 2, 3, 4, 5]
df = pd.DataFrame({"attrib": [1, 2, 2, 3, 1]}, index=index)

# scramble index order for one expression and not the other
sorted = df.eval("attrib.sort_values()")
not_sorted = df.eval("attrib * 1")

# check above expressions
pdt.assert_series_equal(
sorted, pd.Series([1, 1, 2, 2, 3], index=[1, 5, 2, 3, 4]), check_names=False
)
pdt.assert_series_equal(not_sorted, df.attrib, check_names=False)

# create a new dataframe from the above expressions
values = {"sorted": sorted, "not_sorted": not_sorted}
new_df = df_from_dict(values, index)

# index should become unscrambed and back to the same order as before
expected_df = pd.DataFrame(
{"sorted": [1, 2, 2, 3, 1], "not_sorted": [1, 2, 2, 3, 1]}, index=index
)

pdt.assert_frame_equal(new_df, expected_df)
Loading

0 comments on commit db03dae

Please sign in to comment.