NREL
diff --git a/‎.github/workflows/tests.yaml
+13-12 b/‎.github/workflows/tests.yaml
+13-12
diff --git a/‎docs/source/developer/design.md
+28 b/‎docs/source/developer/design.md
+28
diff --git a/‎docs/source/developer/index.rst
+1-1 b/‎docs/source/developer/index.rst
+1-1
diff --git a/‎examples/test_for_determinism.py
+82 b/‎examples/test_for_determinism.py
+82
diff --git a/‎nrel/hive/app/hive_cosim.py
+7 b/‎nrel/hive/app/hive_cosim.py
+7
diff --git a/‎nrel/hive/app/run.py
+6 b/‎nrel/hive/app/run.py
+6
diff --git a/‎nrel/hive/config/sim.py
+1 b/‎nrel/hive/config/sim.py
+1
diff --git a/‎nrel/hive/dispatcher/instruction_generator/assignment_ops.py
+3-4 b/‎nrel/hive/dispatcher/instruction_generator/assignment_ops.py
+3-4
diff --git a/‎nrel/hive/dispatcher/instruction_generator/dispatcher.py
+1-3 b/‎nrel/hive/dispatcher/instruction_generator/dispatcher.py
+1-3
diff --git a/‎nrel/hive/dispatcher/instruction_generator/instruction_generator_ops.py
+2-4 b/‎nrel/hive/dispatcher/instruction_generator/instruction_generator_ops.py
+2-4
diff --git a/‎nrel/hive/initialization/initialize_simulation_with_sampling.py
+2-3 b/‎nrel/hive/initialization/initialize_simulation_with_sampling.py
+2-3
diff --git a/‎nrel/hive/initialization/sample_requests.py
+1-1 b/‎nrel/hive/initialization/sample_requests.py
+1-1
diff --git a/‎nrel/hive/model/roadnetwork/roadnetwork.py
+1-1 b/‎nrel/hive/model/roadnetwork/roadnetwork.py
+1-1
diff --git a/‎nrel/hive/model/station/station.py
+2-1 b/‎nrel/hive/model/station/station.py
+2-1
diff --git a/‎nrel/hive/reporting/handler/stateful_handler.py
+3-3 b/‎nrel/hive/reporting/handler/stateful_handler.py
+3-3
diff --git a/‎nrel/hive/reporting/handler/summary_stats.py
+3-3 b/‎nrel/hive/reporting/handler/summary_stats.py
+3-3
@@ -2,23 +2,23 @@ name: tests
 
 on:
   push:
-      branches: [ main ]
+    branches: [main]
   pull_request:
 
 jobs:
   test:
-      if: github.event.pull_request.merged == false
-      runs-on: ubuntu-latest
+    if: github.event.pull_request.merged == false
+    runs-on: ubuntu-latest
 
-      strategy:
-        fail-fast: false
-        matrix:
-          python-version: ['3.8', '3.9', '3.10', '3.11']
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
-      env:
-        PYTHON: ${{ matrix.python-version }}
+    env:
+      PYTHON: ${{ matrix.python-version }}
 
-      steps:
+    steps:
       - uses: actions/checkout@v3
 
       - name: set up python ${{ matrix.python-version }}
@@ -29,6 +29,7 @@ jobs:
       - name: Install package
         run: |
           pip install ".[dev]"
+          pip install pandas # determinism test dependency
 
       - name: Run mypy
         run: mypy . --ignore-missing-imports
@@ -45,6 +46,6 @@ jobs:
         run: |
           pytest tests/ -v
 
-      - name: HIVE Denver Demo test
+      - name: HIVE determinism test
         run: |
-          hive denver_demo.yaml
+          python examples/test_for_determinism.py --iterations 2
@@ -0,0 +1,28 @@
+# Design
+
+This page describes details specific to HIVE for new developers interacting with the library.
+
+## table of contents
+
+- **[determinism](#determinism)**: details related to keeping HIVE runs deterministic
+
+### determinism
+
+#### immutables.Map does not iterate based on insertion order
+
+Most of the HIVE state is stored in hash maps. A [3rd party library](https://github.com/MagicStack/immutables) provides an immutable hash map via the Hash Array Mapped Trie (HAMT) data structure. While it is, for the most part, a drop-in replacement for a python Dict, it has one caveat, which is that insertion order is not guaranteed. This has determinism implications for HIVE. For this reason, any iteration of HAMT data structures must first be _sorted_. This is the default behavior for accessing the entity collections on a `SimulationState`, that they are first sorted by `EntityId`, such as `sim.get_vehicles()`.
+
+Deeper within HIVE, whenever the HAMT data structure is interacted with, we must take care. There are two possible situations:
+  1. the iteration order is irrelevant (for example, when iterating on a collection in order to write reports, or when updating a collection)
+    - here, use of `.items()` iteration is acceptable
+  2. the iteration order is sorted (exclusively when retrieving a Map as an _iterator_)
+    - here, prefer `DictOps.iterate_vals()` or `DictOps.iterate_items()` which first sort by key
+    - if key sorting is not preferred, write a specialized sort 
+
+When making a specialized sort function over a set of entities, consider bundling the cost value with the entity id. If two entities have the same value, the id can be used to "break the tie" in a deterministic way. Example:
+
+```python
+vs: List[Vehicle] = ... #
+sorted(vs, key=lambda v: v.distance_traveled_km)          # bad
+sorted(vs, key=lambda v: (v.distance_traveled_km, v.id))  # good
+```
@@ -6,4 +6,4 @@ Developer Docs
 
   release 
   contributing
-
+  design
@@ -0,0 +1,82 @@
+from pathlib import Path
+from typing import Dict, List
+from pkg_resources import resource_filename
+from nrel.hive.initialization.load import load_config, load_simulation
+import random
+import numpy
+import pandas
+import argparse
+import sys
+
+from nrel.hive.runner.local_simulation_runner import LocalSimulationRunner
+
+# this utility demonstrates a set of runs have the same high-level results
+if __name__ == "__main__":
+    denver = Path(resource_filename(
+            "nrel.hive.resources.scenarios.denver_downtown", 
+            "denver_demo.yaml"))
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--scenario', type=Path, default=denver)
+    parser.add_argument('--iterations', type=int, default=5)
+    parser.add_argument('--outfile', type=Path, required=False)
+    args = parser.parse_args()
+    
+    iterations = args.iterations
+    data: List[Dict] = []
+    for i in range(iterations):
+        # set up config with scenario + limited (stats only) logging
+        config_no_log = load_config(args.scenario).suppress_logging()
+        config = config_no_log._replace(
+            global_config=config_no_log.global_config._replace(
+                log_stats=True
+            )
+        )
+        # set random seed from Sim config
+        if config.sim.seed is not None:
+            random.seed(config.sim.seed)
+            numpy.random.seed(config.sim.seed)
+
+        rp0 = load_simulation(config)
+        rp1 = LocalSimulationRunner.run(rp0)
+        stats = rp1.e.reporter.get_summary_stats(rp1)
+        if stats is None:
+            raise Exception("hive result missing stats object")
+        stats['iteration'] = i
+        # flatten vehicle states
+        vs = stats['vehicle_state'].copy()
+        del stats['vehicle_state']
+        for k, v in vs.items():
+            stats[f'{k}StatePct'] = v['observed_percent']
+            stats[f'{k}StateVkt'] = v['vkt']
+
+        data.append(stats)
+        print(f"finished iteration {i}")
+
+
+    df = pandas.DataFrame(data)
+    
+    test_cols = [
+        'mean_final_soc',
+        'requests_served_percent',
+        'total_vkt',
+        'total_kwh_expended',
+        'total_gge_expended',
+        'total_kwh_dispensed',
+        'total_gge_dispensed'
+    ]
+    
+    print(f'testing for determinism between {args.iterations} runs')
+    exit_code = 0
+    for col in test_cols:
+        n = df[col].nunique()
+        if n == 1:
+            print(f'{col} is good, all values match')
+        else:
+            exit_code = 1
+            entries = '[' + ', '.join(df[col].unique()) + ']'
+            print(f'{col} no good, has {n} unique entries (should be one): {entries}')
+    
+    if args.outfile:
+        df.to_csv(args.outfile)
+
+    sys.exit(exit_code)
@@ -3,6 +3,8 @@
 from typing import Iterable, Tuple, NamedTuple, Optional, TypeVar
 
 from tqdm import tqdm
+import random
+import numpy
 
 from nrel.hive.dispatcher.instruction_generator.instruction_generator import InstructionGenerator
 from nrel.hive.initialization.initialize_simulation import InitFunction
@@ -27,6 +29,11 @@ def load_scenario(
     :raises: Error when issues with files
     """
     config = load_config(scenario_file, output_suffix)
+
+    if config.sim.seed is not None:
+        random.seed(config.sim.seed)
+        numpy.random.seed(config.sim.seed)
+
     initial_payload = load_simulation(config, custom_instruction_generators, custom_init_functions)
 
     # add a specialized Reporter handler that catches vehicle charge events
 
@@ -8,6 +8,8 @@
 
 import pkg_resources
 import yaml
+import random
+import numpy
 
 from nrel.hive.dispatcher.instruction_generator.instruction_generator import InstructionGenerator
 from nrel.hive.initialization.initialize_simulation import InitFunction
@@ -52,6 +54,10 @@ def run_sim(
 
     config = load_config(scenario_file)
 
+    if config.sim.seed is not None:
+        random.seed(config.sim.seed)
+        numpy.random.seed(config.sim.seed)
+
     initial_payload = load_simulation(
         config,
         custom_instruction_generators=custom_instruction_generators,
 
@@ -19,6 +19,7 @@ class Sim(NamedTuple):
     request_cancel_time_seconds: int
     schedule_type: ScheduleType
     min_delta_energy_change: Ratio = 0.0001
+    seed: Optional[int] = 0
 
     @classmethod
     def default_config(cls) -> Dict:
 
@@ -303,15 +303,15 @@ def _time_to_full(v: Vehicle) -> Seconds:
 
             return _time_to_full
 
-        def _sort_enqueue_time(v: Vehicle) -> int:
+        def _sort_enqueue_time(v: Vehicle) -> Tuple[int, str]:
             if isinstance(v.vehicle_state, ChargeQueueing):
                 enqueue_time = int(v.vehicle_state.enqueue_time)
             else:
                 log.error(
                     "calling _sort_enqueue_time on a vehicle state that is not ChargeQueueing"
                 )
                 enqueue_time = 0
-            return enqueue_time
+            return (enqueue_time, v.id)
 
         def _greedy_assignment(
             _charging: Tuple[Seconds, ...],
@@ -374,12 +374,11 @@ def _greedy_assignment(
         vehicles_at_station = sim.get_vehicles(filter_function=_veh_at_station)
         vehicles_enqueued = sim.get_vehicles(
             filter_function=_veh_enqueued,
-            sort=True,
             sort_key=_sort_enqueue_time,
         )
 
         estimates: Dict[ChargerId, int] = {}
-        for charger_id in station.state.keys():
+        for charger_id in sorted(station.state.keys()):
             charger_state = station.state.get(charger_id)
             charger = charger_state.charger if charger_state is not None else None
 
 
@@ -96,9 +96,7 @@ def _valid_request(r: Request) -> bool:
             )
 
             unassigned_requests = simulation_state.get_requests(
-                sort=True,
-                sort_key=lambda r: r.value,
-                sort_reversed=True,
+                sort_key=lambda r: -r.value,
                 filter_function=_valid_request,
             )
 
 
@@ -16,8 +16,6 @@
 
 log = logging.getLogger(__name__)
 
-random.seed(123)
-
 if TYPE_CHECKING:
     from nrel.hive.model.vehicle.vehicle import Vehicle
     from nrel.hive.state.simulation_state.simulation_state import SimulationState
@@ -85,7 +83,7 @@ def add_driver_instructions(self, simulation_state, environment):
                 ),
             )
             + acc,
-            simulation_state.vehicles.values(),
+            simulation_state.get_vehicles(),
             (),
         )
 
@@ -313,7 +311,7 @@ def get_nearest_valid_station_distance(
 
     nearest_station = H3Ops.nearest_entity(
         geoid=geoid,
-        entities=simulation_state.stations.values(),
+        entities=simulation_state.get_stations(),
         entity_search=simulation_state.s_search,
         sim_h3_search_resolution=simulation_state.sim_h3_search_resolution,
         max_search_distance_km=max_search_radius_km,
 
@@ -186,8 +186,7 @@ def _add_row_unsafe(
         )
 
     # add all stations to the simulation once we know they are complete
-    sim_with_stations = simulation_state_ops.add_entities(
-        simulation_state, stations_builder.values()
-    )
+    stations = DictOps.iterate_vals(stations_builder)
+    sim_with_stations = simulation_state_ops.add_entities(simulation_state, stations)
 
     return sim_with_stations
@@ -67,6 +67,6 @@ def default_request_sampler(
 
         id_counter += 1
 
-    sorted_reqeusts = sorted(requests, key=lambda r: r.departure_time)
+    sorted_reqeusts = sorted(requests, key=lambda r: (r.departure_time, r.id))
 
     return tuple(sorted_reqeusts)
@@ -79,7 +79,7 @@ def position_from_geoid(self, geoid: GeoId) -> Optional[EntityPosition]:
                 position = EntityPosition(link.link_id, geoid)
                 return position
             else:
-                hexes_by_dist = sorted(hexes_on_link, key=lambda h: h3.h3_distance(geoid, h))
+                hexes_by_dist = sorted(hexes_on_link, key=lambda h: (h3.h3_distance(geoid, h), h))
                 closest_hex_to_query = hexes_by_dist[0]
                 position = EntityPosition(link.link_id, closest_hex_to_query)
                 return position
 
@@ -23,6 +23,7 @@
     station_state_updates,
 )
 from nrel.hive.runner.environment import Environment
+from nrel.hive.util.dict_ops import DictOps
 from nrel.hive.util.error_or_result import ErrorOr
 from nrel.hive.util.exception import H3Error, SimulationStateError
 from nrel.hive.util.typealiases import *
@@ -109,7 +110,7 @@ def _chargers(acc, charger_data):
                     return None, updated_builder
 
         initial = None, immutables.Map[ChargerId, ChargerState]()
-        error, charger_states = ft.reduce(_chargers, chargers.items(), initial)
+        error, charger_states = ft.reduce(_chargers, DictOps.iterate_items(chargers), initial)
         if error is not None:
             raise error
         if charger_states is None:
 
@@ -35,23 +35,23 @@ def handle(self, reports: List[Report], runner_payload: RunnerPayload):
         sim_state = runner_payload.s
         if ReportType.DRIVER_STATE in self.global_config.log_sim_config:
             self._report_entities(
-                entities=sim_state.vehicles.values(),
+                entities=sim_state.get_vehicles(),
                 asdict=self.driver_asdict,
                 sim_time=sim_state.sim_time,
                 report_type=ReportType.DRIVER_STATE,
             )
 
         if ReportType.VEHICLE_STATE in self.global_config.log_sim_config:
             self._report_entities(
-                entities=sim_state.vehicles.values(),
+                entities=sim_state.get_vehicles(),
                 asdict=self.vehicle_asdict,
                 sim_time=sim_state.sim_time,
                 report_type=ReportType.VEHICLE_STATE,
             )
 
         if ReportType.STATION_STATE in self.global_config.log_sim_config:
             self._report_entities(
-                entities=sim_state.stations.values(),
+                entities=sim_state.get_stations(),
                 asdict=self.station_asdict,
                 sim_time=sim_state.sim_time,
                 report_type=ReportType.STATION_STATE,
 
@@ -48,19 +48,19 @@ def compile_stats(self, rp: RunnerPayload) -> Dict[str, Any]:
         self.mean_final_soc = mean(
             [
                 env.mechatronics[v.mechatronics_id].fuel_source_soc(v)
-                for v in sim_state.vehicles.values()
+                for v in sim_state.get_vehicles()
             ]
         )
 
         self.station_revenue = reduce(
             lambda income, station: income + station.balance,
-            sim_state.stations.values(),
+            sim_state.get_stations(),
             0.0,
         )
 
         self.fleet_revenue = reduce(
             lambda income, vehicle: income + vehicle.balance,
-            sim_state.vehicles.values(),
+            sim_state.get_vehicles(),
             0.0,
         )
-Original file line number
+Diff line change
   release
   contributing
+-
 +  design
Original file line number	Diff line number	Diff line change
`@@ -96,9 +96,7 @@ def _valid_request(r: Request) -> bool:`
`96`	`96`	`)`
`97`	`97`
`98`	`98`	`unassigned_requests = simulation_state.get_requests(`
`99`		`- sort=True,`
`100`		`- sort_key=lambda r: r.value,`
`101`		`- sort_reversed=True,`
	`99`	`+ sort_key=lambda r: -r.value,`
`102`	`100`	`filter_function=_valid_request,`
`103`	`101`	`)`
`104`	`102`
Original file line number	Diff line number	Diff line change
`@@ -186,8 +186,7 @@ def _add_row_unsafe(`
`186`	`186`	`)`
`187`	`187`
`188`	`188`	`# add all stations to the simulation once we know they are complete`
`189`		`- sim_with_stations = simulation_state_ops.add_entities(`
`190`		`- simulation_state, stations_builder.values()`
`191`		`- )`
	`189`	`+ stations = DictOps.iterate_vals(stations_builder)`
	`190`	`+ sim_with_stations = simulation_state_ops.add_entities(simulation_state, stations)`
`192`	`191`
`193`	`192`	`return sim_with_stations`
Original file line number	Diff line number	Diff line change
`@@ -48,19 +48,19 @@ def compile_stats(self, rp: RunnerPayload) -> Dict[str, Any]:`
`48`	`48`	`self.mean_final_soc = mean(`
`49`	`49`	`[`
`50`	`50`	`env.mechatronics[v.mechatronics_id].fuel_source_soc(v)`
`51`		`- for v in sim_state.vehicles.values()`
	`51`	`+ for v in sim_state.get_vehicles()`
`52`	`52`	`]`
`53`	`53`	`)`
`54`	`54`
`55`	`55`	`self.station_revenue = reduce(`
`56`	`56`	`lambda income, station: income + station.balance,`
`57`		`- sim_state.stations.values(),`
	`57`	`+ sim_state.get_stations(),`
`58`	`58`	`0.0,`
`59`	`59`	`)`
`60`	`60`
`61`	`61`	`self.fleet_revenue = reduce(`
`62`	`62`	`lambda income, vehicle: income + vehicle.balance,`
`63`		`- sim_state.vehicles.values(),`
	`63`	`+ sim_state.get_vehicles(),`
`64`	`64`	`0.0,`
`65`	`65`	`)`
`66`	`66`