Merge pull request #9 from camsys/data-type-dev-merge

Merge Develop Branch
ActivitySim · Feb 6, 2024 · 42d4e8c · 42d4e8c
2 parents ae126f1 + 9cd9bbe
commit 42d4e8c
Show file tree

Hide file tree

Showing 239 changed files with 95,340 additions and 6,453 deletions.
diff --git a/.github/workflows/branch-docs.yml b/.github/workflows/branch-docs.yml
@@ -40,7 +40,7 @@ jobs:
         id: cache
 
       - name: Update environment
-        run: mamba env update -n docbuild -f conda-environments/docbuild.yml
+        run: mamba env update --verbose -n docbuild -f conda-environments/docbuild.yml
         if: steps.cache.outputs.cache-hit != 'true'
 
       - name: Install activitysim

diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml
@@ -45,7 +45,21 @@ jobs:
         id: cache
 
       - name: Update environment
-        run: mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+        run: |
+          mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+          mamba install --yes \
+          	"psutil=5.9.5" \
+          	"pydantic=1.10.13" \
+          	"pypyr=5.8.0" \
+          	"pytables=3.6.1" \
+          	"pytest-cov" \
+          	"pytest-regressions=2.5.0" \
+          	"scikit-learn=1.2.2" \
+          	"sharrow>=2.6.0" \
+          	"simwrapper=1.8.5" \
+          	"xarray=2023.2.0" \
+          	"zarr=2.14.2" \
+          	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
 
       - name: Install activitysim
@@ -131,7 +145,21 @@ jobs:
         id: cache
 
       - name: Update environment
-        run: mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+        run: |
+          mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+          mamba install --yes \
+          	"psutil=5.9.5" \
+          	"pydantic=1.10.13" \
+          	"pypyr=5.8.0" \
+          	"pytables=3.6.1" \
+          	"pytest-cov" \
+          	"pytest-regressions=2.5.0" \
+          	"scikit-learn=1.2.2" \
+          	"sharrow>=2.6.0" \
+          	"simwrapper=1.8.5" \
+          	"xarray=2023.2.0" \
+          	"zarr=2.14.2" \
+          	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
 
       - name: Install activitysim
@@ -215,7 +243,21 @@ jobs:
         id: cache
 
       - name: Update environment
-        run: mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+        run: |
+          mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+          mamba install --yes \
+          	"psutil=5.9.5" \
+          	"pydantic=1.10.13" \
+          	"pypyr=5.8.0" \
+          	"pytables=3.6.1" \
+          	"pytest-cov" \
+          	"pytest-regressions=2.5.0" \
+          	"scikit-learn=1.2.2" \
+          	"sharrow>=2.6.0" \
+          	"simwrapper=1.8.5" \
+          	"xarray=2023.2.0" \
+          	"zarr=2.14.2" \
+          	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
 
       - name: Install activitysim
@@ -298,7 +340,21 @@ jobs:
         id: cache
 
       - name: Update environment
-        run: mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+        run: |
+          mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+          mamba install --yes \
+          	"psutil=5.9.5" \
+          	"pydantic=1.10.13" \
+          	"pypyr=5.8.0" \
+          	"pytables=3.6.1" \
+          	"pytest-cov" \
+          	"pytest-regressions=2.5.0" \
+          	"scikit-learn=1.2.2" \
+          	"sharrow>=2.6.0" \
+          	"simwrapper=1.8.5" \
+          	"xarray=2023.2.0" \
+          	"zarr=2.14.2" \
+          	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
 
       - name: Install activitysim
@@ -351,7 +407,21 @@ jobs:
         id: cache
 
       - name: Update environment
-        run: mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+        run: |
+          mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+          mamba install --yes \
+          	"psutil=5.9.5" \
+          	"pydantic=1.10.13" \
+          	"pypyr=5.8.0" \
+          	"pytables=3.6.1" \
+          	"pytest-cov" \
+          	"pytest-regressions=2.5.0" \
+          	"scikit-learn=1.2.2" \
+          	"sharrow>=2.6.0" \
+          	"simwrapper=1.8.5" \
+          	"xarray=2023.2.0" \
+          	"zarr=2.14.2" \
+          	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
 
       - name: Install activitysim
@@ -403,7 +473,21 @@ jobs:
         id: cache
 
       - name: Update environment
-        run: mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+        run: |
+          mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+          mamba install --yes \
+          	"psutil=5.9.5" \
+          	"pydantic=1.10.13" \
+          	"pypyr=5.8.0" \
+          	"pytables=3.6.1" \
+          	"pytest-cov" \
+          	"pytest-regressions=2.5.0" \
+          	"scikit-learn=1.2.2" \
+          	"sharrow>=2.6.0" \
+          	"simwrapper=1.8.5" \
+          	"xarray=2023.2.0" \
+          	"zarr=2.14.2" \
+          	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
 
       - name: Install Larch

diff --git a/.gitignore b/.gitignore
@@ -5,7 +5,6 @@ sandbox/
 .pytest_cache
 .vagrant
 
-
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/activitysim/abm/misc.py b/activitysim/abm/misc.py
@@ -4,6 +4,7 @@
 
 import logging
 
+import numpy as np
 import pandas as pd
 
 from activitysim.core import workflow
@@ -16,16 +17,16 @@
 
 
 @workflow.cached_object
-def households_sample_size(state: workflow.State, override_hh_ids):
+def households_sample_size(state: workflow.State, override_hh_ids) -> int:
 
     if override_hh_ids is None:
-        return state.settings, households_sample_size
+        return state.settings.households_sample_size
     else:
-        return 0 if override_hh_ids is None else len(override_hh_ids)
+        return len(override_hh_ids)
 
 
 @workflow.cached_object
-def override_hh_ids(state: workflow.State):
+def override_hh_ids(state: workflow.State) -> np.ndarray | None:
 
     hh_ids_filename = state.settings.hh_ids
     if hh_ids_filename is None:
@@ -63,12 +64,12 @@ def override_hh_ids(state: workflow.State):
 
 
 @workflow.cached_object
-def trace_od(state: workflow.State):
+def trace_od(state: workflow.State) -> tuple[int, int] | None:
 
     od = state.settings.trace_od
 
     if od and not (
-        isinstance(od, (list, tuple))
+        isinstance(od, list | tuple)
         and len(od) == 2
         and all(isinstance(x, int) for x in od)
     ):
@@ -81,12 +82,12 @@ def trace_od(state: workflow.State):
 
 
 @workflow.cached_object
-def chunk_size(state: workflow.State):
+def chunk_size(state: workflow.State) -> int:
     _chunk_size = int(state.settings.chunk_size or 0)
 
     return _chunk_size
 
 
 @workflow.cached_object
-def check_for_variability(state: workflow.State):
+def check_for_variability(state: workflow.State) -> bool:
     return bool(state.settings.check_for_variability)
diff --git a/activitysim/abm/models/__init__.py b/activitysim/abm/models/__init__.py
@@ -15,6 +15,7 @@
     initialize,
     initialize_los,
     initialize_tours,
+    input_checker,
     joint_tour_composition,
     joint_tour_destination,
     joint_tour_frequency,

diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py
@@ -3,39 +3,83 @@
 from __future__ import annotations
 
 import logging
+from typing import Any
 
 import numba as nb
 import numpy as np
 import pandas as pd
 
 from activitysim.core import assign, chunk, los, workflow
+from activitysim.core.configuration.base import PydanticReadable
 
 logger = logging.getLogger(__name__)
 
 
+class AccessibilitySettings(PydanticReadable):
+    """
+    Settings for aggregate accessibility component.
+    """
+
+    CONSTANTS: dict[str, Any] = {}
+
+    land_use_columns: list[str] = []
+    """Only include the these columns in the computational tables
+
+    Memory usage is reduced by only listing the minimum columns needed by
+    the SPEC, and nothing extra.
+    """
+
+    SPEC: str = "accessibility.csv"
+    """Filename for the accessibility specification (csv) file."""
+
+    explicit_chunk: int = 0
+    """If > 0, use this chunk size instead of adaptive chunking."""
+
+
 @nb.njit
 def _accumulate_accessibility(arr, orig_zone_count, dest_zone_count):
     assert arr.size == orig_zone_count * dest_zone_count
-    arr2 = arr.reshape((orig_zone_count, dest_zone_count))
+    assert arr.ndim == 1
+    i = 0
     result = np.empty((orig_zone_count,), dtype=arr.dtype)
     for o in range(orig_zone_count):
         x = 0
         for d in range(dest_zone_count):
-            x += arr2[o, d]
+            x += arr[i]
+            i += 1
         result[o] = np.log1p(x)
     return result
 
 
 def compute_accessibilities_for_zones(
-    state,
-    accessibility_df,
-    land_use_df,
-    assignment_spec,
-    constants,
-    network_los,
-    trace_label,
-    chunk_sizer,
+    state: workflow.State,
+    accessibility_df: pd.DataFrame,
+    land_use_df: pd.DataFrame,
+    assignment_spec: dict,
+    constants: dict,
+    network_los: los.Network_LOS,
+    trace_label: str,
+    chunk_sizer: chunk.ChunkSizer,
 ):
+    """
+    Compute accessibility for each zone in land use file using expressions from accessibility_spec.
+
+    Parameters
+    ----------
+    state : workflow.State
+    accessibility_df : pd.DataFrame
+    land_use_df : pd.DataFrame
+    assignment_spec : dict
+    constants : dict
+    network_los : los.Network_LOS
+    trace_label : str
+    chunk_sizer : chunk.ChunkSizer
+
+    Returns
+    -------
+    accessibility_df : pd.DataFrame
+        The accessibility_df is updated in place.
+    """
     orig_zones = accessibility_df.index.values
     dest_zones = land_use_df.index.values
 
@@ -144,6 +188,10 @@ def compute_accessibility(
     land_use: pd.DataFrame,
     accessibility: pd.DataFrame,
     network_los: los.Network_LOS,
+    model_settings: AccessibilitySettings | None = None,
+    model_settings_file_name: str = "accessibility.yaml",
+    trace_label: str = "compute_accessibility",
+    output_table_name: str = "accessibility",
 ) -> None:
     """
     Compute accessibility for each zone in land use file using expressions from accessibility_spec
@@ -160,40 +208,47 @@ def compute_accessibility(
     product mutes large differences.  The decay function on the walk accessibility measure is
     steeper than automobile or transit.  The minimum accessibility is zero.
     """
+    if model_settings is None:
+        model_settings = AccessibilitySettings.read_settings_file(
+            state.filesystem, model_settings_file_name
+        )
 
-    trace_label = "compute_accessibility"
-    model_settings = state.filesystem.read_model_settings("accessibility.yaml")
     assignment_spec = assign.read_assignment_spec(
-        state.filesystem.get_config_file_path("accessibility.csv")
+        state.filesystem.get_config_file_path(model_settings.SPEC)
     )
 
     accessibility_df = accessibility
     if len(accessibility_df.columns) > 0:
         logger.warning(
-            f"accessibility table is not empty. Columns:{list(accessibility_df.columns)}"
+            f"accessibility table is not empty. "
+            f"Columns:{list(accessibility_df.columns)}"
         )
         raise RuntimeError("accessibility table is not empty.")
 
-    constants = model_settings.get("CONSTANTS", {})
+    constants = model_settings.CONSTANTS
 
-    # only include the land_use columns needed by spec, as specified by land_use_columns model_setting
-    land_use_columns = model_settings.get("land_use_columns", [])
+    # only include the land_use columns needed by spec,
+    # as specified by land_use_columns model_setting
+    land_use_columns = model_settings.land_use_columns
     land_use_df = land_use
     land_use_df = land_use_df[land_use_columns]
 
     logger.info(
-        f"Running {trace_label} with {len(accessibility_df.index)} orig zones {len(land_use_df)} dest zones"
+        f"Running {trace_label} with {len(accessibility_df.index)} orig zones "
+        f"{len(land_use_df)} dest zones"
     )
 
     accessibilities_list = []
+    explicit_chunk_size = model_settings.explicit_chunk
 
     for (
-        i,
+        _i,
         chooser_chunk,
-        chunk_trace_label,
+        _chunk_trace_label,
         chunk_sizer,
-    ) in chunk.adaptive_chunked_choosers(state, accessibility_df, trace_label):
-
+    ) in chunk.adaptive_chunked_choosers(
+        state, accessibility_df, trace_label, explicit_chunk_size=explicit_chunk_size
+    ):
         accessibilities = compute_accessibilities_for_zones(
             state,
             chooser_chunk,
@@ -211,4 +266,4 @@ def compute_accessibility(
     logger.info(f"{trace_label} computed accessibilities {accessibility_df.shape}")
 
     # - write table to pipeline
-    state.add_table("accessibility", accessibility_df)
+    state.add_table(output_table_name, accessibility_df)