Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BayDAG Contribution #10: NMTF Person Available Periods #776

Merged
merged 11 commits into from
Apr 1, 2024
29 changes: 20 additions & 9 deletions activitysim/abm/models/non_mandatory_tour_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
import pandas as pd

from activitysim.abm.models.util import annotate
from activitysim.abm.models.util.overlap import person_max_window
from activitysim.abm.models.util.overlap import (
person_max_window,
person_available_periods,
)
from activitysim.abm.models.util.school_escort_tours_trips import (
recompute_tour_count_statistics,
)
Expand Down Expand Up @@ -230,7 +233,11 @@ def non_mandatory_tour_frequency(
# - preprocessor
preprocessor_settings = model_settings.preprocessor
if preprocessor_settings:
locals_dict = {"person_max_window": lambda x: person_max_window(state, x)}

locals_dict = {
"person_max_window": lambda x: person_max_window(state, x),
"person_available_periods": lambda x: person_available_periods(state, x),
}

expressions.assign_columns(
state,
Expand Down Expand Up @@ -324,6 +331,9 @@ def non_mandatory_tour_frequency(

choices_list.append(choices)

# FIXME only want to keep actual purposes, adding cols in alts will mess this up
# this is complicated by canonical_ids calculated based on alts if not specified explicitly
# thus, adding column to input alts will change IDs and break estimation mode....
dhensle marked this conversation as resolved.
Show resolved Hide resolved
del alternatives["tot_tours"] # del tot_tours column we added above

# The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate
Expand Down Expand Up @@ -418,13 +428,14 @@ def non_mandatory_tour_frequency(
if estimator:
# make sure they created the right tours
survey_tours = estimation.manager.get_survey_table("tours").sort_index()
non_mandatory_survey_tours = survey_tours[
survey_tours.tour_category == "non_mandatory"
]
assert len(non_mandatory_survey_tours) == len(non_mandatory_tours)
assert non_mandatory_survey_tours.index.equals(
non_mandatory_tours.sort_index().index
)
# FIXME below check needs to remove the pure-escort tours from the survey tours table
i-am-sijia marked this conversation as resolved.
Show resolved Hide resolved
# non_mandatory_survey_tours = survey_tours[
# survey_tours.tour_category == "non_mandatory"
# ]
# assert len(non_mandatory_survey_tours) == len(non_mandatory_tours)
# assert non_mandatory_survey_tours.index.equals(
# non_mandatory_tours.sort_index().index
# )

# make sure they created tours with the expected tour_ids
columns = ["person_id", "household_id", "tour_type", "tour_category"]
Expand Down
95 changes: 95 additions & 0 deletions activitysim/abm/models/util/overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,98 @@ def person_max_window(state: workflow.State, persons):
max_window.index = persons.index

return max_window


def calculate_consecutive(array):
# Append zeros columns at either sides of counts
append1 = np.zeros((array.shape[0], 1), dtype=int)
array_ext = np.column_stack((append1, array, append1))

# Get start and stop indices with 1s as triggers
diffs = np.diff((array_ext == 1).astype(int), axis=1)
starts = np.argwhere(diffs == 1)
stops = np.argwhere(diffs == -1)

# Get intervals using differences between start and stop indices
intvs = stops[:, 1] - starts[:, 1]

# Store intervals as a 2D array for further vectorized ops to make.
c = np.bincount(starts[:, 0])
mask = np.arange(c.max()) < c[:, None]
intvs2D = mask.astype(float)
intvs2D[mask] = intvs

# Get max along each row as final output
out = intvs2D.max(1).astype(int)
return out


def person_available_periods(
state: workflow.State, persons, start_bin=None, end_bin=None, continuous=False
):
"""
Returns the number of available time period bins foreach person in persons.
Can limit the calculation to include starting and/or ending bins.
Can return either the total number of available time bins with continuous = True,
or only the maximum

This is equivalent to person_max_window if no start/end bins provided and continous=True

time bins are inclusive, i.e. [start_bin, end_bin]

e.g.
available out of timetable has dummy first and last bins
available = [
[1,1,1,1,1,1,1,1,1,1,1,1],
[1,1,0,1,1,0,0,1,0,1,0,1],
#-,0,1,2,3,4,5,6,7,8,9,- time bins
]
returns:
for start_bin=None, end_bin=None, continuous=False: (10, 5)
for start_bin=None, end_bin=None, continuous=True: (10, 2)
for start_bin=5, end_bin=9, continuous=False: (5, 2)
for start_bin=5, end_bin=9, continuous=True: (5, 1)


Parameters
----------
start_bin : (int) starting time bin to include starting from 0
end_bin : (int) ending time bin to include
continuous : (bool) count all available bins if false or just largest continuous run if True

Returns
-------
pd.Series of the number of available time bins indexed by person ID
"""
timetable = state.get_injectable("timetable")

# ndarray with one row per person and one column per time period
# array value of 1 where free periods and 0 elsewhere
s = pd.Series(persons.index.values, index=persons.index)

# first and last bins are dummys in the time table
# so if you have 48 half hour time periods, shape is (len(persons), 50)
available = timetable.individually_available(s)

# Create a mask to exclude bins before the starting bin and after the ending bin
mask = np.ones(available.shape[1], dtype=bool)
mask[0] = False
mask[len(mask) - 1] = False
if start_bin is not None:
# +1 needed due to dummy first bin
mask[: start_bin + 1] = False
if end_bin is not None:
# +2 for dummy first bin and inclusive end_bin
mask[end_bin + 2 :] = False

# Apply the mask to the array
masked_array = available[:, mask]

# Calculate the number of available time periods for each person
availability = np.sum(masked_array, axis=1)

if continuous:
availability = calculate_consecutive(masked_array)

availability = pd.Series(availability, index=persons.index)
return availability
Loading