diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index d36c017a3..f82a34f67 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -32,30 +32,54 @@ class AccessibilitySettings(PydanticReadable): SPEC: str = "accessibility.csv" """Filename for the accessibility specification (csv) file.""" + explicit_chunk: int = 0 + """If > 0, use this chunk size instead of adaptive chunking.""" + @nb.njit def _accumulate_accessibility(arr, orig_zone_count, dest_zone_count): assert arr.size == orig_zone_count * dest_zone_count - arr2 = arr.reshape((orig_zone_count, dest_zone_count)) + assert arr.ndim == 1 + i = 0 result = np.empty((orig_zone_count,), dtype=arr.dtype) for o in range(orig_zone_count): x = 0 for d in range(dest_zone_count): - x += arr2[o, d] + x += arr[i] + i += 1 result[o] = np.log1p(x) return result def compute_accessibilities_for_zones( - state, - accessibility_df, - land_use_df, - assignment_spec, - constants, - network_los, - trace_label, - chunk_sizer, + state: workflow.State, + accessibility_df: pd.DataFrame, + land_use_df: pd.DataFrame, + assignment_spec: dict, + constants: dict, + network_los: los.Network_LOS, + trace_label: str, + chunk_sizer: chunk.ChunkSizer, ): + """ + Compute accessibility for each zone in land use file using expressions from accessibility_spec. + + Parameters + ---------- + state : workflow.State + accessibility_df : pd.DataFrame + land_use_df : pd.DataFrame + assignment_spec : dict + constants : dict + network_los : los.Network_LOS + trace_label : str + chunk_sizer : chunk.ChunkSizer + + Returns + ------- + accessibility_df : pd.DataFrame + The accessibility_df is updated in place. + """ orig_zones = accessibility_df.index.values dest_zones = land_use_df.index.values @@ -215,13 +239,16 @@ def compute_accessibility( ) accessibilities_list = [] + explicit_chunk_size = model_settings.explicit_chunk for ( _i, chooser_chunk, _chunk_trace_label, chunk_sizer, - ) in chunk.adaptive_chunked_choosers(state, accessibility_df, trace_label): + ) in chunk.adaptive_chunked_choosers( + state, accessibility_df, trace_label, explicit_chunk_size=explicit_chunk_size + ): accessibilities = compute_accessibilities_for_zones( state, chooser_chunk, diff --git a/activitysim/abm/test/test_agg_accessibility.py b/activitysim/abm/test/test_agg_accessibility.py new file mode 100644 index 000000000..4015e35cb --- /dev/null +++ b/activitysim/abm/test/test_agg_accessibility.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import logging + +import pytest + +from activitysim.abm import models # noqa: F401 +from activitysim.abm.models.accessibility import ( + AccessibilitySettings, + compute_accessibility, +) +from activitysim.core import workflow + +logger = logging.getLogger(__name__) + + +@pytest.fixture +def state() -> workflow.State: + state = workflow.create_example("prototype_mtc", temp=True) + + state.settings.models = [ + "initialize_landuse", + "initialize_households", + "compute_accessibility", + ] + state.settings.chunk_size = 0 + state.settings.sharrow = False + + state.run.by_name("initialize_landuse") + state.run.by_name("initialize_households") + return state + + +def test_simple_agg_accessibility(state, dataframe_regression): + state.run.by_name("compute_accessibility") + df = state.get_dataframe("accessibility") + dataframe_regression.check(df, basename="simple_agg_accessibility") + + +def test_agg_accessibility_explicit_chunking(state, dataframe_regression): + # set top level settings + state.settings.chunk_size = 0 + state.settings.sharrow = False + state.settings.chunk_training_mode = "explicit" + + # read the accessibility settings and override the explicit chunk size to 5 + model_settings = AccessibilitySettings.read_settings_file( + state.filesystem, "accessibility.yaml" + ) + model_settings.explicit_chunk = 5 + + compute_accessibility( + state, + state.get_dataframe("land_use"), + state.get_dataframe("accessibility"), + state.get("network_los"), + model_settings, + model_settings_file_name="accessibility.yaml", + trace_label="compute_accessibility", + output_table_name="accessibility", + ) + df = state.get_dataframe("accessibility") + dataframe_regression.check(df, basename="simple_agg_accessibility") diff --git a/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility.csv b/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility.csv new file mode 100644 index 000000000..9c7340509 --- /dev/null +++ b/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility.csv @@ -0,0 +1,26 @@ +zone_id,auPkRetail,auPkTotal,auOpRetail,auOpTotal,trPkRetail,trPkTotal,trOpRetail,trOpTotal,nmRetail,nmTotal +0,9.3164942696213568,12.615175743409841,9.3074367804092777,12.607849383502469,7.7642635203141968,11.145248204314596,7.6930860038975712,11.037285967769643,8.1373609284815895,11.726242204251774 +1,9.3168979433052908,12.613460949773618,9.3046270180951538,12.604209004116514,7.5113009238919934,10.950045517942753,7.4270600345597773,10.763101915020352,8.1427168965425558,11.724186002096861 +2,9.2932169855583542,12.580014484201365,9.2862416670099286,12.574901916285086,7.3409752547469438,10.787608449410779,7.2526778560064189,10.574953629615715,8.0503691347033364,11.478912540319461 +3,9.3573494887919679,12.630894217760538,9.3482485710113998,12.623585758033322,7.8733268188651611,11.224171200372194,7.8143652246183066,11.135415940164263,8.3711974981452286,11.775230687719375 +4,9.3435505366989382,12.585069456547828,9.3332621841590289,12.574553613617503,7.5893556698506153,11.082549781423353,7.5495574089217605,11.027965011367975,8.3180592569770333,11.431764418643981 +5,9.2713502507871883,12.523449294093886,9.2657623133569711,12.519697725868093,7.3138724828278905,10.504310979303222,7.0683412975590123,10.251789948959422,7.8382412773559516,11.023737623179843 +6,9.2931944176067329,12.528401489853936,9.2863728392168525,12.52041616561077,7.6419100510389839,10.805002739363189,7.6078784435600868,10.752509743759392,8.0169148075612071,11.108804747288168 +7,9.2678442418060758,12.497146015767587,9.2621330948390046,12.489886065239302,7.5469338237309387,10.834136335989049,7.5014237921006828,10.779320100783975,7.9819505240836239,11.052152868115712 +8,9.1895029665940431,12.42603649432956,9.184035053974922,12.415459802362889,7.188751493522151,10.303186212218705,7.149056566233341,10.260609523175923,7.4156298027129628,10.75866342061707 +9,9.1860041373516861,12.40389009503904,9.1807619960868472,12.396344385917818,7.3793358243378222,10.548674769786773,7.3065218950712447,10.495921674032259,7.5678261562359008,10.694411485785926 +10,9.3200926649609794,12.519242143782318,9.3150950606590026,12.511758212122075,7.4557019917326173,10.875601348026509,7.3483680514593566,10.762777861942512,8.2282865778153074,11.171156639341758 +11,9.3515905766957719,12.600777214457102,9.3402871188008589,12.590072565945791,7.9459651463751646,11.204374985337394,7.8463256139030397,11.074533943328571,8.420517825501955,11.618972560237365 +12,9.3475957875421258,12.610940370257774,9.3373286290969943,12.601590484236365,7.7678939430595539,11.12100647463696,7.691841626412466,11.012476514764131,8.4227464698159356,11.742390115774514 +13,9.3272875917488811,12.61272185509814,9.3195224070699076,12.605842856331305,7.9829144769225122,11.205704184915069,7.9147382904661239,11.09630520845371,8.2936062476422165,11.736593006351654 +14,9.2849351600384047,12.581337475036822,9.2777982690366851,12.575463251817387,7.6566141377182202,10.99707556494131,7.5743974507217873,10.914272271565647,8.0004874415095593,11.541814468777813 +15,9.3121586675513246,12.554715357067975,9.3098515403136357,12.554250369775952,7.0161536446213777,10.534220863424366,6.9452061747018057,10.442447038350544,8.2473032556208885,11.373742456242004 +16,9.2525132367431926,12.480891480108502,9.2515129069576805,12.479315380270007,6.6611995964557575,9.84475304878708,6.5626838669177907,9.7353179337959279,7.6671416984161134,10.785216324011325 +17,9.2493602579025467,12.438990589690656,9.2489616305878055,12.440034826308484,7.0859296965612435,10.25268796871535,6.997755995841743,10.137302158694951,7.5966361055753779,10.414585321652353 +18,9.1690294854761021,12.357455449640511,9.169914338241016,12.359583887732027,6.0886234793709892,9.29599202765759,5.9074690455933911,9.1012171904995682,7.0886934502985248,9.8650009904318754 +19,9.2217425939140494,12.420066322549278,9.2188633511870108,12.41597699421243,6.636652875558787,9.8019044704452742,6.5908204292849781,9.7532014746502398,7.4941970400287934,10.367677845680188 +20,9.3219157803287924,12.515866541333265,9.3251764782856572,12.518960683082542,7.428996554006094,10.580037613397844,7.3427336309884295,10.454321473639835,8.1084359799671955,11.011608267238865 +21,9.2296522071417968,12.543187229493718,9.2196001636905649,12.535205063776825,7.1509239235248376,10.713897532287987,6.9410839531896329,10.463984552764911,7.6379082836081578,11.319586635314565 +22,9.1161497032019767,12.433017912353973,9.1078484333015215,12.426054879223617,6.2116445474246689,9.7707806110979902,6.1182229314057297,9.6877986968503578,6.8876395653603648,10.656699616249735 +23,9.2437967585247165,12.55097406396871,9.2300864314796112,12.541349601021635,7.3227417893504727,10.850763529501046,7.1151212727695663,10.577146937229784,7.7136276931827608,11.346711473571119 +24,9.1982619817999431,12.494596324310164,9.1914370661962863,12.490871942939226,7.2966461173380575,10.729604500822386,7.1803656319823519,10.549488525934116,7.6165179958947329,11.016222756734685 diff --git a/activitysim/cli/main.py b/activitysim/cli/main.py index 423dd6a3c..2a68910ca 100644 --- a/activitysim/cli/main.py +++ b/activitysim/cli/main.py @@ -1,3 +1,6 @@ +from __future__ import annotations + +import logging import os import sys @@ -67,10 +70,11 @@ def main(): sys.exit(workflows.main(sys.argv[2:])) else: sys.exit(asim.execute()) - except Exception: + except Exception as err: # if we are in the debugger, re-raise the error instead of exiting if sys.gettrace() is not None: raise + logging.exception(err) sys.exit(99) diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py index c0e65028d..6ad77a7be 100644 --- a/activitysim/core/chunk.py +++ b/activitysim/core/chunk.py @@ -74,13 +74,27 @@ MODE_CHUNKLESS Do not do chunking, and also do not check or log memory usage, so ActivitySim can focus on performance assuming there is abundant RAM. + +MODE_EXPLICIT + Allow the user to explicitly set a chunk size (number of chooser row per chunk) + for each component. No assessment of overhead is made, and all responsibility + for monitoring RAM usage is and ensuring quality performance is transferred to the + model user. If a component is missing an `explicit_chunk` setting, it is assumed + to be run in a single chunk. """ MODE_RETRAIN = "training" MODE_ADAPTIVE = "adaptive" MODE_PRODUCTION = "production" MODE_CHUNKLESS = "disabled" -TRAINING_MODES = [MODE_RETRAIN, MODE_ADAPTIVE, MODE_PRODUCTION, MODE_CHUNKLESS] +MODE_EXPLICIT = "explicit" +TRAINING_MODES = [ + MODE_RETRAIN, + MODE_ADAPTIVE, + MODE_PRODUCTION, + MODE_CHUNKLESS, + MODE_EXPLICIT, +] # # low level @@ -726,7 +740,7 @@ def __init__( self.cum_overhead = {m: 0 for m in METRICS} self.headroom = None - if self.chunk_training_mode != MODE_CHUNKLESS: + if self.chunk_training_mode not in (MODE_CHUNKLESS, MODE_EXPLICIT): if chunk_metric(self.state) == USS: self.rss, self.uss = mem.get_rss(force_garbage_collect=True, uss=True) else: @@ -748,6 +762,20 @@ def __init__( else: self.rss, self.uss = 0, 0 # config.override_setting("chunk_size", 0) + if self.chunk_training_mode == MODE_CHUNKLESS: + # chunkless needs nothing else + return + + self.chunk_tag = chunk_tag + self.trace_label = trace_label + self.chunk_size = chunk_size + + self.num_choosers = num_choosers + self.rows_processed = 0 + + if self.chunk_training_mode == MODE_EXPLICIT: + self.rows_per_chunk = chunk_size + # explicit needs nothing else return min_chunk_ratio = min_available_chunk_ratio(self.state) @@ -782,11 +810,12 @@ def __init__( ) def close(self): - if self.chunk_training_mode == MODE_CHUNKLESS: + if self.chunk_training_mode in (MODE_CHUNKLESS, MODE_EXPLICIT): return if ((self.depth == 1) or WRITE_SUBCHUNK_HISTORY) and ( - self.chunk_training_mode not in (MODE_PRODUCTION, MODE_CHUNKLESS) + self.chunk_training_mode + not in (MODE_PRODUCTION, MODE_CHUNKLESS, MODE_EXPLICIT) ): self.state.chunk.HISTORIAN.write_history( self.state, self.history, self.chunk_tag @@ -813,7 +842,16 @@ def available_headroom(self, xss): return headroom def initial_rows_per_chunk(self): - # whatever the TRAINING_MODE, use cache to determine initial_row_size + if self.chunk_training_mode == MODE_EXPLICIT: + if self.rows_per_chunk: + number_of_chunks = self.num_choosers // self.rows_per_chunk + ( + 1 if self.num_choosers % self.rows_per_chunk else 0 + ) + else: + number_of_chunks = 1 + return self.rows_per_chunk, number_of_chunks + + # for any other TRAINING_MODE, use cache to determine initial_row_size # (presumably preferable to default_initial_rows_per_chunk) self.initial_row_size = self.state.chunk.HISTORIAN.cached_row_size( self.state, self.chunk_tag @@ -872,6 +910,15 @@ def initial_rows_per_chunk(self): return rows_per_chunk, estimated_number_of_chunks def adaptive_rows_per_chunk(self, i): + if self.chunk_training_mode == MODE_EXPLICIT: + if self.rows_per_chunk: + number_of_chunks = self.num_choosers // self.rows_per_chunk + ( + 1 if self.num_choosers % self.rows_per_chunk else 0 + ) + else: + number_of_chunks = 1 + return self.rows_per_chunk, number_of_chunks + # rows_processed is out of phase with cum_overhead # overhead is the actual bytes/rss used top process chooser chunk with prev_rows_per_chunk rows @@ -971,15 +1018,19 @@ def adaptive_rows_per_chunk(self, i): # input() - if self.chunk_training_mode not in (MODE_PRODUCTION, MODE_CHUNKLESS): + if self.chunk_training_mode not in ( + MODE_PRODUCTION, + MODE_CHUNKLESS, + MODE_EXPLICIT, + ): self.cum_rows += self.rows_per_chunk return self.rows_per_chunk, estimated_number_of_chunks @contextmanager def ledger(self): - # don't do anything in chunkless mode - if self.chunk_training_mode == MODE_CHUNKLESS: + # don't do anything in chunkless mode or explicit mode + if self.chunk_training_mode in (MODE_CHUNKLESS, MODE_EXPLICIT): yield return @@ -1043,8 +1094,8 @@ def ledger(self): self.chunk_ledger = None def log_rss(self, trace_label: str, force: bool = False): - if self.chunk_training_mode == MODE_CHUNKLESS: - # no memory tracing at all in chunkless mode + if self.chunk_training_mode in (MODE_CHUNKLESS, MODE_EXPLICIT): + # no memory tracing at all in chunkless or explicit mode return assert ( @@ -1069,7 +1120,7 @@ def log_rss(self, trace_label: str, force: bool = False): ) def log_df(self, trace_label: str, table_name: str, df: pd.DataFrame): - if self.chunk_training_mode in (MODE_PRODUCTION, MODE_CHUNKLESS): + if self.chunk_training_mode in (MODE_PRODUCTION, MODE_CHUNKLESS, MODE_EXPLICIT): return assert ( @@ -1123,9 +1174,9 @@ def chunk_log(state: workflow.State, trace_label, chunk_tag=None, base=False): yield ChunkSizer(state, "chunkless", trace_label, 0, 0, _chunk_training_mode) return - if base != (len(state.chunk.CHUNK_SIZERS) == 0): - raise AssertionError - assert base == (len(state.chunk.CHUNK_SIZERS) == 0) + assert (_chunk_training_mode == MODE_EXPLICIT) or ( + base == (len(state.chunk.CHUNK_SIZERS) == 0) + ), f"{base=}, {len(state.chunk.CHUNK_SIZERS)=}" trace_label = f"{trace_label}.chunk_log" @@ -1162,10 +1213,14 @@ def adaptive_chunked_choosers( chunk_tag: str = None, *, chunk_size: int | None = None, + explicit_chunk_size: int = 0, ): # generator to iterate over choosers - if state.settings.chunk_training_mode == MODE_CHUNKLESS: + if state.settings.chunk_training_mode == MODE_CHUNKLESS or ( + (state.settings.chunk_training_mode == MODE_EXPLICIT) + and (explicit_chunk_size == 0) + ): # The adaptive chunking logic is expensive and sometimes results # in needless data copying. So we short circuit it entirely # when chunking is disabled. @@ -1176,7 +1231,10 @@ def adaptive_chunked_choosers( return chunk_tag = chunk_tag or trace_label - if chunk_size is None: + + if state.settings.chunk_training_mode == MODE_EXPLICIT: + chunk_size = explicit_chunk_size + elif chunk_size is None: chunk_size = state.settings.chunk_size num_choosers = len(choosers.index) @@ -1201,7 +1259,6 @@ def adaptive_chunked_choosers( i = offset = 0 while offset < num_choosers: i += 1 - assert offset + rows_per_chunk <= num_choosers chunk_trace_label = trace_label_for_chunk(state, trace_label, chunk_size, i) @@ -1233,7 +1290,9 @@ def adaptive_chunked_choosers_and_alts( alternatives: pd.DataFrame, trace_label: str, chunk_tag: str = None, + *, chunk_size: int | None = None, + explicit_chunk_size: int = 0, ): """ generator to iterate over choosers and alternatives in chunk_size chunks @@ -1267,7 +1326,10 @@ def adaptive_chunked_choosers_and_alts( chunk of alternatives for chooser chunk """ - if state.settings.chunk_training_mode == MODE_CHUNKLESS: + if state.settings.chunk_training_mode == MODE_CHUNKLESS or ( + (state.settings.chunk_training_mode == MODE_EXPLICIT) + and (explicit_chunk_size == 0) + ): # The adaptive chunking logic is expensive and sometimes results # in needless data copying. So we short circuit it entirely # when chunking is disabled. @@ -1306,7 +1368,9 @@ def adaptive_chunked_choosers_and_alts( f"with {num_choosers} choosers and {num_alternatives} alternatives" ) - if chunk_size is None: + if state.settings.chunk_training_mode == MODE_EXPLICIT: + chunk_size = explicit_chunk_size + elif chunk_size is None: chunk_size = state.settings.chunk_size chunk_sizer = ChunkSizer( state, @@ -1373,7 +1437,11 @@ def adaptive_chunked_choosers_and_alts( def adaptive_chunked_choosers_by_chunk_id( - state: workflow.State, choosers: pd.DataFrame, trace_label: str, chunk_tag=None + state: workflow.State, + choosers: pd.DataFrame, + trace_label: str, + chunk_tag=None, + explicit_chunk_size: int = 0, ): # generator to iterate over choosers in chunk_size chunks # like chunked_choosers but based on chunk_id field rather than dataframe length @@ -1381,7 +1449,10 @@ def adaptive_chunked_choosers_by_chunk_id( # all have to be included in the same chunk) # FIXME - we pathologically know name of chunk_id col in households table - if state.settings.chunk_training_mode == MODE_CHUNKLESS: + if state.settings.chunk_training_mode == MODE_CHUNKLESS or ( + (state.settings.chunk_training_mode == MODE_EXPLICIT) + and (explicit_chunk_size == 0) + ): # The adaptive chunking logic is expensive and sometimes results # in needless data copying. So we short circuit it entirely # when chunking is disabled. @@ -1397,7 +1468,10 @@ def adaptive_chunked_choosers_by_chunk_id( num_choosers = choosers["chunk_id"].max() + 1 assert num_choosers > 0 - chunk_size = state.settings.chunk_size + if state.settings.chunk_training_mode == MODE_EXPLICIT: + chunk_size = explicit_chunk_size + else: + chunk_size = state.settings.chunk_size chunk_sizer = ChunkSizer( state, chunk_tag, @@ -1412,7 +1486,6 @@ def adaptive_chunked_choosers_by_chunk_id( i = offset = 0 while offset < num_choosers: i += 1 - assert offset + rows_per_chunk <= num_choosers chunk_trace_label = trace_label_for_chunk(state, trace_label, chunk_size, i) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index b54c85825..3a6c3ae47 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -284,7 +284,7 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): """ chunk_training_mode: Literal[ - "disabled", "training", "production", "adaptive" + "disabled", "training", "production", "adaptive", "explicit" ] = "disabled" """ The method to use for chunk training. diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 3da309369..f1ae6e7ea 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -417,7 +417,7 @@ def to_series(x): ), dtype=np.float32, ) - sh_utility_fat = sh_utility_fat[trace_rows, :] + # sh_utility_fat = sh_utility_fat[trace_rows, :] sh_utility_fat = sh_utility_fat.to_dataframe("vals") try: sh_utility_fat = sh_utility_fat.unstack("expressions") diff --git a/conda-environments/docbuild.yml b/conda-environments/docbuild.yml index 9f8f8539b..2e289fa9f 100644 --- a/conda-environments/docbuild.yml +++ b/conda-environments/docbuild.yml @@ -32,7 +32,7 @@ dependencies: - numpydoc - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2 - platformdirs - psutil >= 4.1 - pyarrow >= 2.0 diff --git a/docs/conf.py b/docs/conf.py index f8ca579c6..f6f56f081 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # ActivitySim documentation build configuration file, created by # sphinx-quickstart on Tue May 26 14:13:47 2016. @@ -15,7 +14,6 @@ from __future__ import annotations import os -import sys # -- Get Package Version -------------------------------------------------- import activitysim