OpenSourceEconomics · hmgaudecker · May 18, 2026 · May 19, 2026 · May 20, 2026 · May 20, 2026
diff --git a/.github/workflows/benchmark-pr.yml b/.github/workflows/benchmark-pr.yml
@@ -7,6 +7,10 @@ on:
   pull_request:
     branches:
       - '**'
+    paths-ignore:
+      - docs/**
+      - '**.md'
+      - '**.ipynb'
   workflow_dispatch: null
 jobs:
   run-benchmarks:

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -11,6 +11,11 @@ on:
   pull_request:
     branches:
       - '**'
+    paths-ignore:
+      - docs/**
+      - benchmarks/**
+      - '**.md'
+      - '**.ipynb'
 jobs:
   run-tests:
     name: Run tests for ${{ matrix.os }} on ${{ matrix.python-version }}

diff --git a/AGENTS.md b/AGENTS.md
@@ -221,11 +221,15 @@ Model(
 
 ### Core Methods
 
-- `model.solve(params=params)` - Solve the model and return value function arrays per
-  period and regime
-- `model.simulate(params=params, initial_conditions=initial_conditions, period_to_regime_to_V_arr=period_to_regime_to_V_arr)`
+- `model.solve(params=params, log_level="debug")` - Solve the model and return value
+  function arrays per period and regime
+- `model.simulate(params=params, initial_conditions=initial_conditions, period_to_regime_to_V_arr=period_to_regime_to_V_arr, log_level="debug")`
   \- Simulate forward given solution. `period_to_regime_to_V_arr` is optional; when
   `None`, the model is solved automatically before simulating.
+- `log_level` is **required** on both `solve()` and `simulate()`
+  (`off < warning < progress < debug`). It governs all runtime validation: `"off"` skips
+  it, `"warning"` / `"progress"` warn and continue, `"debug"` raises. Start projects at
+  `"debug"`.
 
 ### Derived Categoricals
 
@@ -245,6 +249,7 @@ result = model.simulate(
     params=params,
     initial_conditions=initial_conditions,
     period_to_regime_to_V_arr=None,
+    log_level="debug",
 )
 
 # Convert to DataFrame (deferred computation)
@@ -562,6 +567,16 @@ Code structure should be self-evident from function names and ordering.
 - Always use **plotly** for visualizations, never matplotlib. Use `plotly.graph_objects`
   and `plotly.subplots.make_subplots`.
 
+### Notebooks
+
+Explanation notebooks live in `docs/explanations/*.ipynb`. After editing one, verify:
+
+- Each cell's `source` is a JSON array of lines (one array element per line), never a
+  single multi-line string — a one-string `source` produces an unreadable diff.
+- Outputs and execution counts are stripped (`pixi run nbstripout <file>`).
+- Markdown and code use literal UTF-8 characters (`—`, `→`, `μ`), never `\u`-style
+  escape sequences.
+
 ### Key Dependencies
 
 - **jax**: Numerical computation

diff --git a/benchmarks/bench_aca_baseline.py b/benchmarks/bench_aca_baseline.py
@@ -8,6 +8,15 @@
 over 19 regimes, DAG resolution, pref_type batching) while shrinking
 per-call numerical work so the benchmark fits in an asv invocation.
 
+Two simulate variants run as separate benchmark classes:
+
+- `AcaBaseline` — `log_level="off"`, `log_path=None`: runtime validation
+  and diagnostic logging disabled.
+- `AcaBaselineDebugLog` — `log_level="debug"` with snapshots written to a
+  temporary directory: the slow path that runs every validation check
+  and persists diagnostic snapshots. The gap to `AcaBaseline` is the
+  validation + logging overhead.
+
 Requires the `aca_model` package to be importable. Use the
 `benchmarks-cuda12` pixi environment, which pulls aca-model from its
 public git URL. Inside the aca-dev monorepo the editable path install
@@ -28,15 +37,20 @@
   compilation is still per-method — the JIT cache is process-local —
   but the persistent XLA disk cache keeps second and third compiles
   fast.
-- `AcaBaselineGpuPeakMem` runs in a separate subprocess via `_gpu_mem`
-  that does not go through ASV's `setup_cache` pipeline. It calls
-  `setup_for_gpu_measurement()` (rebuild fresh, no warm-up) then
-  `time_execution()` to measure cold peak memory. Both methods
-  accept `cache=None` so the same callable serves ASV (cache passed
-  in) and the subprocess (cache omitted).
+- `AcaBaselineDebugLog` subclasses `AcaBaseline`, overriding only the
+  `log_level` and the per-run temporary `log_path`; it reuses the same
+  `setup_cache` / metric methods.
+- `AcaBaselineGpuPeakMem` and `AcaBaselineDebugLogGpuPeakMem` run in a
+  separate subprocess via `_gpu_mem` that does not go through ASV's
+  `setup_cache` pipeline. They call `setup_for_gpu_measurement()`
+  (rebuild fresh, no warm-up) then `time_execution()` to measure cold
+  peak memory. Both methods accept `cache=None` so the same callable
+  serves ASV (cache passed in) and the subprocess (cache omitted).
 """
 
 import gc
+import shutil
+import tempfile
 import time
 
 import cloudpickle
@@ -79,6 +93,8 @@ def _build() -> tuple[object, object, object]:
 
 
 class AcaBaseline:
+    """aca-baseline simulate with runtime validation and logging off."""
+
     timeout = 3600
     # Pin every ASV sample knob to 1 so setup runs once per subprocess
     # and one warm call is timed. `timeout=3600` gives headroom for the
@@ -88,6 +104,10 @@ class AcaBaseline:
     number = 1
     warmup_time = 0
 
+    # Simulate logging configuration; `AcaBaselineDebugLog` overrides both.
+    log_level = "off"
+    log_path: str | None = None
+
     def setup_cache(self) -> bytes:
         # Build once per ASV benchmark class run and hand the result to
         # every method via ASV's setup_cache mechanism. ASV pickles the
@@ -103,13 +123,7 @@ def setup(self, cache: bytes) -> None:
         )
         # Warm-trigger compilation so time_execution runs on a hot kernel.
         start = time.perf_counter()
-        self.model.simulate(
-            params=self.model_params,
-            initial_conditions=self.initial_conditions,
-            period_to_regime_to_V_arr=None,
-            log_level="off",
-            check_initial_conditions=False,
-        )
+        self._simulate()
         self._compile_time = time.perf_counter() - start
 
     def setup_for_gpu_measurement(self) -> None:
@@ -118,23 +132,20 @@ def setup_for_gpu_measurement(self) -> None:
         # (build + compile + run, no warm-up).
         self.model, self.model_params, self.initial_conditions = _build()
 
-    def time_execution(self, cache: bytes | None = None) -> None:
+    def _simulate(self) -> None:
         self.model.simulate(
             params=self.model_params,
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
-            log_level="off",
-            check_initial_conditions=False,
+            log_level=self.log_level,
+            log_path=self.log_path,
         )
 
+    def time_execution(self, cache: bytes | None = None) -> None:
+        self._simulate()
+
     def peakmem_execution(self, cache: bytes | None = None) -> None:
-        self.model.simulate(
-            params=self.model_params,
-            initial_conditions=self.initial_conditions,
-            period_to_regime_to_V_arr=None,
-            log_level="off",
-            check_initial_conditions=False,
-        )
+        self._simulate()
 
     def teardown(self, cache: bytes | None = None) -> None:
         import jax
@@ -148,7 +159,42 @@ def track_compilation_time(self, cache: bytes | None = None) -> float:
     track_compilation_time.unit = "seconds"
 
 
+class AcaBaselineDebugLog(AcaBaseline):
+    """aca-baseline simulate at `log_level="debug"` with snapshot logging.
+
+    Runs every runtime-validation check and persists diagnostic
+    snapshots to a temporary directory. Measured against `AcaBaseline`
+    (`log_level="off"`), the difference is the validation + logging
+    overhead.
+    """
+
+    log_level = "debug"
+
+    def setup(self, cache: bytes) -> None:
+        self.log_path = tempfile.mkdtemp(prefix="aca-bench-debug-log-")
+        super().setup(cache)
+
+    def setup_for_gpu_measurement(self) -> None:
+        # Mirror `setup`'s log_path setup so the cold-measurement
+        # subprocess exercises snapshot writing too. The tmpdir leaks
+        # when the subprocess exits — acceptable since /tmp is OS-cleaned.
+        self.log_path = tempfile.mkdtemp(prefix="aca-bench-debug-log-")
+        super().setup_for_gpu_measurement()
+
+    def teardown(self, cache: bytes | None = None) -> None:
+        super().teardown(cache)
+        if self.log_path is not None:
+            shutil.rmtree(self.log_path, ignore_errors=True)
+            self.log_path = None
+
+
 class AcaBaselineGpuPeakMem(_gpu_mem.GpuPeakMem):
     bench_module = "benchmarks.bench_aca_baseline"
     bench_class = "AcaBaseline"
     timeout = 3600
+
+
+class AcaBaselineDebugLogGpuPeakMem(_gpu_mem.GpuPeakMem):
+    bench_module = "benchmarks.bench_aca_baseline"
+    bench_class = "AcaBaselineDebugLog"
+    timeout = 3600
diff --git a/benchmarks/bench_mahler_yum.py b/benchmarks/bench_mahler_yum.py
@@ -44,7 +44,6 @@ def setup(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
             log_level="off",
-            check_initial_conditions=False,
         )
         self._compile_time = time.perf_counter() - start
 
@@ -57,7 +56,6 @@ def time_execution(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
             log_level="off",
-            check_initial_conditions=False,
         )
 
     def peakmem_execution(self):
@@ -66,7 +64,6 @@ def peakmem_execution(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
             log_level="off",
-            check_initial_conditions=False,
         )
 
     def teardown(self):

diff --git a/benchmarks/bench_precautionary_savings.py b/benchmarks/bench_precautionary_savings.py
@@ -100,7 +100,6 @@ def setup(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=self.period_to_regime_to_V_arr,
             log_level="off",
-            check_initial_conditions=False,
         )
         self._compile_time = time.perf_counter() - start
 
@@ -113,7 +112,6 @@ def time_execution(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=self.period_to_regime_to_V_arr,
             log_level="off",
-            check_initial_conditions=False,
         )
 
     def peakmem_execution(self):
@@ -122,7 +120,6 @@ def peakmem_execution(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=self.period_to_regime_to_V_arr,
             log_level="off",
-            check_initial_conditions=False,
         )
 
     def teardown(self):
@@ -157,7 +154,6 @@ def setup(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
             log_level="off",
-            check_initial_conditions=False,
         )
         self._compile_time = time.perf_counter() - start
 
@@ -170,7 +166,6 @@ def time_execution(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
             log_level="off",
-            check_initial_conditions=False,
         )
 
     def peakmem_execution(self):
@@ -179,7 +174,6 @@ def peakmem_execution(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
             log_level="off",
-            check_initial_conditions=False,
         )
 
     def teardown(self):
@@ -215,7 +209,6 @@ def setup(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
             log_level="off",
-            check_initial_conditions=False,
         )
         self._compile_time = time.perf_counter() - start
 
@@ -228,7 +221,6 @@ def time_execution(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
             log_level="off",
-            check_initial_conditions=False,
         )
 
     def peakmem_execution(self):
@@ -237,7 +229,6 @@ def peakmem_execution(self):
             initial_conditions=self.initial_conditions,
             period_to_regime_to_V_arr=None,
             log_level="off",
-            check_initial_conditions=False,
         )
 
     def teardown(self):

diff --git a/benchmarks/pr_comment.py b/benchmarks/pr_comment.py
@@ -35,6 +35,8 @@
 _CLASS_DISPLAY = {
     "AcaBaseline": "aca-baseline",
     "AcaBaselineGpuPeakMem": "aca-baseline",
+    "AcaBaselineDebugLog": "aca-baseline-debug",
+    "AcaBaselineDebugLogGpuPeakMem": "aca-baseline-debug",
     "MahlerYum": "Mahler-Yum",
     "MahlerYumGpuPeakMem": "Mahler-Yum",
     "PrecautionarySavingsSolve": "Precautionary Savings - Solve",

diff --git a/docs/examples/mahler_yum_2024.md b/docs/examples/mahler_yum_2024.md
@@ -46,6 +46,7 @@ result = MAHLER_YUM_MODEL.simulate(
         ),
     },
     period_to_regime_to_V_arr=None,
+    log_level="debug",
     seed=8295,
 )
 ```
diff --git a/docs/examples/mortality.md b/docs/examples/mortality.md
@@ -29,6 +29,7 @@ result = model.simulate(
         "regime_id": jnp.full(100, model.regime_names_to_ids["working_life"]),
     },
     period_to_regime_to_V_arr=None,
+    log_level="debug",
     seed=1234,
 )
 

diff --git a/docs/examples/precautionary_savings.md b/docs/examples/precautionary_savings.md
@@ -30,6 +30,7 @@ result = model.simulate(
         "regime_id": jnp.full(100, model.regime_names_to_ids["alive"]),
     },
     period_to_regime_to_V_arr=None,
+    log_level="debug",
 )
 
 df = result.to_dataframe(additional_targets="all")

diff --git a/docs/examples/precautionary_savings_health.md b/docs/examples/precautionary_savings_health.md
@@ -35,6 +35,7 @@ result = model.simulate(
         "regime_id": jnp.full(1_000, model.regime_names_to_ids["working_life"]),
     },
     period_to_regime_to_V_arr=None,
+    log_level="debug",
 )
 
 df = result.to_dataframe(additional_targets="all")

diff --git a/docs/examples/tiny.md b/docs/examples/tiny.md
@@ -35,6 +35,7 @@ result = model.simulate(
     params=params,
     initial_conditions=initial_df,
     period_to_regime_to_V_arr=None,
+    log_level="debug",
 )
 
 df = result.to_dataframe(additional_targets="all")

diff --git a/docs/explanations/beta_delta.ipynb b/docs/explanations/beta_delta.ipynb
@@ -319,6 +319,7 @@
     "    params={\"working\": {\"H\": {\"discount_factor\": DELTA}}},\n",
     "    initial_conditions=initial_conditions,\n",
     "    period_to_regime_to_V_arr=None,\n",
+    "    log_level=\"debug\",\n",
     ")\n",
     "\n",
     "df_exp = result_exp.to_dataframe().query('regime == \"working\"')\n",
@@ -363,6 +364,7 @@
     "    },\n",
     "    initial_conditions=initial_conditions,\n",
     "    period_to_regime_to_V_arr=None,\n",
+    "    log_level=\"debug\",\n",
     ")\n",
     "\n",
     "df_naive = result_naive.to_dataframe().query('regime == \"working\"')\n",