speedups and bugfixes

jwdink · jwdink · commit 44b5dd373d1d · 2025-08-18T14:01:52.000-05:00
diff --git a/tests/test_kalman_filter.py b/tests/test_kalman_filter.py
@@ -28,17 +28,17 @@ def test_nans(self, ndim: int = 1, n_step: int = 1):
         data[2, 2, 0] = float('nan')
 
         # test critical helper fun:
-        get_nan_groups2 = torch.jit.script(get_nan_groups)
         nan_groups = {2}
         if ndim > 1:
             nan_groups.add(0)
         for t in range(ntimes):
-            for group_idx, valid_idx in get_nan_groups2(torch.isnan(data[:, t])):
+            for group_idx, masks in get_nan_groups(torch.isnan(data[:, t])):
                 if t == 2:
-                    if valid_idx is None:
+                    if masks is None:
                         self.assertEqual(len(group_idx), data.shape[0] - len(nan_groups))
                         self.assertFalse(bool(set(group_idx.tolist()).intersection(nan_groups)))
                     else:
+                        valid_idx, m1d, m2d = masks
                         self.assertLess(len(valid_idx), ndim)
                         self.assertGreater(len(valid_idx), 0)
                         if len(valid_idx) == 1:
@@ -52,7 +52,7 @@ def test_nans(self, ndim: int = 1, n_step: int = 1):
                             self.assertSetEqual(set(valid_idx.tolist()), {1, 2})
                             self.assertSetEqual(set(group_idx.tolist()), {2})
                 else:
-                    self.assertIsNone(valid_idx)
+                    self.assertIsNone(masks)
 
         # test `update`
         # TODO: measure dim vs. state-dim
diff --git a/torchcast/exp_smooth/exp_smooth.py b/torchcast/exp_smooth/exp_smooth.py
@@ -10,7 +10,7 @@
 
 from torchcast.exp_smooth.smoothing_matrix import SmoothingMatrix
 from torchcast.covariance import Covariance
-from torchcast.internals.utils import update_tensor, get_meshgrids, transpose_last_dims
+from torchcast.internals.utils import update_tensor, transpose_last_dims
 from torchcast.process import Process
 from torchcast.state_space import StateSpaceModel
 
@@ -43,14 +43,14 @@ def initial_covariance(self, inputs: dict, num_groups: int, num_times: int, _ign
 
     def _mask_mats(self,
                    groups: torch.Tensor,
-                   val_idx: Optional[torch.Tensor],
+                   masks: Optional[tuple[torch.Tensor, torch.Tensor, torch.Tensor]],
                    **kwargs) -> dict[str, torch.Tensor]:
-        out = super()._mask_mats(groups, val_idx, **kwargs)
-        if val_idx is None:
+        out = super()._mask_mats(groups, masks, **kwargs)
+        if masks is None:
             return out
-        m1d, _ = get_meshgrids(groups, val_idx)
+        val_id, m1d, m2d = masks
         Kt = transpose_last_dims(kwargs['K'])
-        out['K'] = Kt[m1d]  # K is always a 2D matrix, so we can use m1d
+        out['K'] = transpose_last_dims(Kt[m1d])
         return out
 
     def _parse_kwargs(self,
@@ -70,13 +70,14 @@ def _parse_kwargs(self,
         if self.smoothing_matrix.expected_kwargs:
             smat_kwargs = {k: kwargs[k] for k in self.smoothing_matrix.expected_kwargs}
         used_keys |= set(smat_kwargs)
-        Ks = self.smoothing_matrix(smat_kwargs, num_groups=num_groups, num_times=num_timesteps)
-        update_kwargs['K'] = Ks.unbind(1)
-
-        if self.smoothing_matrix.expected_kwargs or self.measure_covariance.expected_kwargs:
+        if smat_kwargs:
+            Ks = self.smoothing_matrix(smat_kwargs, num_groups=num_groups, num_times=num_timesteps)
+            update_kwargs['K'] = Ks.unbind(1)
             predict_kwargs['cov1step'] = Ks @ torch.stack(measure_covs, 1) @ Ks.transpose(-1, -2)
         else:
-            K1 = update_kwargs['K'][0]
+            # faster if not time-varying:
+            K1 = self.smoothing_matrix(smat_kwargs, num_groups=num_groups, num_times=1).squeeze(1)
+            update_kwargs['K'] = [K1] * num_timesteps
             measure_cov = measure_covs[0]
             cov1step = K1 @ measure_cov @ K1.transpose(-1, -2)
             predict_kwargs['cov1step'] = [cov1step] * num_timesteps
diff --git a/torchcast/internals/batch_design/measurement_model.py b/torchcast/internals/batch_design/measurement_model.py
@@ -161,7 +161,7 @@ def _adjust_measure_mat(self,
             # apply measure-wide adjustment
             measure_mat[i] = self.measure_funs[measure].adjust_measure_mat(measure_mat[i], measured_mean[i])
 
-        return torch.stack(measure_mat, dim=-1)
+        return torch.stack(measure_mat, dim=-2)
 
     @cached_property
     def measure2idx(self) -> dict[str, int]:
diff --git a/torchcast/internals/batch_design/transition_model.py b/torchcast/internals/batch_design/transition_model.py
@@ -20,23 +20,18 @@ def __init__(self,
         )
         self.measures = measures
 
-        zeros = torch.zeros(
+        F = torch.zeros(
             (self.num_groups, self.num_timesteps, self.state_rank, self.state_rank),
             device=self.device,
             dtype=self.dtype
         )
-        F = []
         for pid, process in self.processes.items():
             if process.linear_transition:
                 pidx = self.process2slice[pid]
-                # note: as in other parts, assuming autograd makes it more efficient to create clones then sum vs.
-                # repeated masks on the same tensor. should verify that
-                thisF = zeros.clone()
-                thisF[:, :, pidx, pidx] = process.get_transition_matrix()
-                F.append(thisF)
+                F[:, :, pidx, pidx] = process.get_transition_matrix()
             else:
                 raise NotImplementedError
-        self._transition_mats = torch.stack(F, dim=0).sum(0)
+        self._transition_mats = F
 
     @cached_property
     def transition_mats(self) -> Sequence[torch.Tensor]:
diff --git a/torchcast/internals/utils.py b/torchcast/internals/utils.py
@@ -12,40 +12,6 @@ def get_subclasses(cls: Type) -> Iterable[Type]:
         yield subclass
 
 
-@functools.lru_cache(maxsize=100)
-def get_meshgrids(groups: torch.Tensor,
-                  val_idx: torch.Tensor) -> tuple[tuple[torch.Tensor, ...], tuple[torch.Tensor, ...]]:
-    """
-    Returns meshgrids for the given groups and val_idx.
-    """
-    m1d = torch.meshgrid(groups, val_idx, indexing='ij')
-    m2d = torch.meshgrid(groups, val_idx, val_idx, indexing='ij')
-    return m1d, m2d
-
-
-def mask_mats(groups: torch.Tensor,
-              val_idx: Optional[torch.Tensor],
-              mats: Sequence[tuple[str, torch.Tensor, Collection[int]]]) -> dict[str, torch.Tensor]:
-    out = {}
-    if val_idx is None:
-        for nm, mat, _ in mats:
-            out[nm] = mat[groups]
-    else:
-        m1d, m2d = get_meshgrids(groups, val_idx)
-        for nm, mat, dim in mats:
-            dim = set(dim)
-            if dim == {-2}:
-                mat = transpose_last_dims(mat)
-                out[nm] = transpose_last_dims(mat[m1d])
-            elif dim == {-1}:
-                out[nm] = mat[m1d]
-            elif dim == {-2, -1}:
-                out[nm] = mat[m2d]
-            else:
-                raise ValueError(f"Invalid dim ({dim}), must be 0, 1, or 2")
-    return out
-
-
 def normalize_index(index: tuple) -> tuple:
     # Special-case early check for the batched pattern
     if isinstance(index, tuple) and _is_special_batched_pattern(index):
@@ -182,30 +148,41 @@ def transpose_last_dims(x: torch.Tensor) -> torch.Tensor:
     return x.permute(*args)
 
 
-def get_nan_groups(isnan: torch.Tensor) -> List[Tuple[torch.Tensor, Optional[torch.Tensor]]]:
+def get_nan_groups(isnan: torch.Tensor) -> List[Tuple[torch.Tensor, Optional[tuple[torch.Tensor, torch.Tensor, torch.Tensor]]]]:
     """
     Iterable of (group_idx, valid_idx) tuples that can be passed to torch.meshgrid. If no valid, then not returned; if
     all valid then (group_idx, None) is returned; can skip call to meshgrid.
     """
     assert len(isnan.shape) == 2
     state_dim = isnan.shape[-1]
-    out: List[Tuple[torch.Tensor, Optional[torch.Tensor]]] = []
+
+    out = []
     if state_dim == 1:
         # shortcut for univariate
         group_idx = (~isnan.squeeze(-1)).nonzero().view(-1)
         out.append((group_idx, None))
         return out
-    for nan_combo in torch.unique(isnan, dim=0):
+
+    nan_combos = torch.unique(isnan, dim=0)
+    if len(nan_combos) == 1 and nan_combos[0].sum() == 0:
+        # shortcut for no nans
+        out.append((torch.arange(isnan.shape[0]), None))
+        return out
+
+    for nan_combo in nan_combos:
         num_nan = nan_combo.sum()
         if num_nan < state_dim:
             c1 = (isnan * nan_combo[None, :]).sum(1) == num_nan
             c2 = (~isnan * ~nan_combo[None, :]).sum(1) == (state_dim - num_nan)
             group_idx = (c1 & c2).nonzero().view(-1)
             if num_nan == 0:
-                valid_idx = None
+                out.append((group_idx, None))
             else:
                 valid_idx = (~nan_combo).nonzero().view(-1)
-            out.append((group_idx, valid_idx))
+                m1d = torch.meshgrid(group_idx, valid_idx, indexing='ij')
+                m2d = torch.meshgrid(group_idx, valid_idx, valid_idx, indexing='ij')
+                masks = (valid_idx, m1d, m2d)
+                out.append((group_idx, masks))
     return out
 
 
diff --git a/torchcast/kalman_filter/binomial_filter.py b/torchcast/kalman_filter/binomial_filter.py
@@ -5,7 +5,6 @@
 from typing import Sequence, TYPE_CHECKING, Optional, Union
 
 from torchcast.covariance import Covariance
-from torchcast.internals.utils import get_meshgrids
 from torchcast.kalman_filter import KalmanFilter
 from torchcast.state_space import Predictions
 from torchcast.internals.batch_design import MeasurementModel, Sigmoid
@@ -101,20 +100,21 @@ def _generate_predictions(self,
             updates=updates,
             mc_white_noise=mc_white_noise,
             num_obs=num_obs,
-            observed_counts=observed_counts
+            observed_counts=observed_counts,
         )
 
     def _mask_mats(self,
                    groups: torch.Tensor,
-                   val_idx: Optional[torch.Tensor],
+                   masks: Optional[tuple[torch.Tensor, torch.Tensor, torch.Tensor]],
                    binary_idx: Optional[Sequence[int]] = None,
                    **kwargs) -> dict:
-        out = super()._mask_mats(groups, val_idx, **kwargs)
-        if val_idx is None or binary_idx is None:
+        out = super()._mask_mats(groups, masks, **kwargs)
+        if masks is None or binary_idx is None:
             return out
+        val_idx = masks[0]
         out['binary_idx'] = [i for i in binary_idx if i in val_idx]
         _binary_subset_idx = torch.tensor([i1 for i1, i2 in enumerate(binary_idx) if i2 in val_idx], dtype=torch.long)
-        m1d, _ = get_meshgrids(groups, _binary_subset_idx)
+        m1d = torch.meshgrid(groups, _binary_subset_idx, indexing='ij')
         out['num_obs'] = kwargs['num_obs'][m1d]
         return out
 
@@ -271,15 +271,13 @@ def __init__(self,
                  measure_covs: Union[Sequence[torch.Tensor], torch.Tensor],
                  num_obs: Sequence[torch.Tensor],
                  observed_counts: Optional[bool] = None,
-                 updates: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
-                 mc_white_noise: Optional[torch.Tensor] = None):
+                 **kwargs):
 
         super().__init__(
             measurement_model=measurement_model,
             states=states,
             measure_covs=measure_covs,
-            updates=updates,
-            mc_white_noise=mc_white_noise
+            **kwargs
         )
 
         self.observed_counts = observed_counts
@@ -381,17 +379,17 @@ def _get_posterior_predict_samples(self) -> torch.Tensor:
         return samples
 
 
-def main(num_groups: int = 100, num_timesteps: int = 100, bias: float = -1, prop_common: float = 1.):
-    from torchcast.process import LocalLevel
+def main(num_groups: int = 50, num_timesteps: int = 365, bias: float = -1, prop_common: float = 1.):
+    from torchcast.process import LocalLevel, Season
     from torchcast.utils import TimeSeriesDataset
     from scipy.special import expit
     import pandas as pd
     from plotnine import geom_line, aes, ggtitle
     torch.manual_seed(1234)
 
     TOTAL_COUNT = 4
-    measures = ['dim1', 'dim2', 'dim3']
-    binary_measures = ['dim1']
+    measures = ['dim1', 'dim2']
+    binary_measures = []
     latent_common = torch.cumsum(.05 * torch.randn((num_groups, num_timesteps, 1)), dim=1)
     latent_ind = torch.cumsum(.05 * torch.randn((num_groups, num_timesteps, len(measures))), dim=1)
     assert 0 <= prop_common <= 1
@@ -424,22 +422,29 @@ def main(num_groups: int = 100, num_timesteps: int = 100, bias: float = -1, prop
     )
 
     bf = BinomialFilter(
-        processes=[LocalLevel(id=f'level_{m}', measure=m) for m in measures],
+        processes=[LocalLevel(id=f'level_{m}', measure=m) for m in measures]
+                  + [Season(id=f'season_{m}', measure=m, dt_unit='D', period=7, K=2) for m in measures],
         measures=measures,
         binary_measures=binary_measures,
         observed_counts=False
     )
 
     y = dataset.tensors[0]
     bf.fit(y,
-           stopping={'monitor_params': True},
-           num_obs=TOTAL_COUNT,
-           mc_samples=32)
+           stopping={
+               # 'max_iter': 10
+               #    'monitor_params': True
+           },
+           start_offsets=dataset.start_offsets,
+           mc_samples=32
+           )
     _kwargs = {}
-    if TOTAL_COUNT != 1:
-        _kwargs['num_obs'] = TOTAL_COUNT
+    # if TOTAL_COUNT != 1:
+    #     _kwargs['num_obs'] = TOTAL_COUNT
     preds = bf(
-        dataset.tensors[0], **_kwargs,
+        dataset.tensors[0],
+        start_offsets=dataset.start_offsets,
+        **_kwargs,
     )
     df_preds = preds.to_dataframe(dataset)
     if bf.observed_counts:
@@ -458,8 +463,8 @@ def main(num_groups: int = 100, num_timesteps: int = 100, bias: float = -1, prop
                 + geom_line(aes(y='latent'), color='purple')
                 + ggtitle(g)
         ).show()
-    # preds._white_noise = torch.zeros((1, len(binary_measures)))
-    # print(preds.log_prob(y).mean())
+    preds._white_noise = torch.zeros((1, len(binary_measures)))
+    print(preds.log_prob(y).mean())
 
 
 if __name__ == '__main__':
diff --git a/torchcast/kalman_filter/kalman_filter.py b/torchcast/kalman_filter/kalman_filter.py
@@ -100,13 +100,20 @@ def _parse_kwargs(self,
         )
 
         # process-variance:
+        measure_scaling = torch.diag_embed(self._get_measure_scaling().unsqueeze(0))
         pcov_kwargs = {}
         if self.process_covariance.expected_kwargs:
             pcov_kwargs = {k: kwargs[k] for k in self.process_covariance.expected_kwargs}
         used_keys |= set(pcov_kwargs)
-        pcov_raw = self.process_covariance(pcov_kwargs, num_groups=num_groups, num_times=num_timesteps)
-        measure_scaling = torch.diag_embed(self._get_measure_scaling().unsqueeze(0).unsqueeze(0))
-        Qs = measure_scaling @ pcov_raw @ measure_scaling
-        predict_kwargs['Q'] = Qs.unbind(1)
+        if pcov_kwargs:
+            measure_scaling = measure_scaling.unsqueeze(0)
+            pcov_raw = self.process_covariance(pcov_kwargs, num_groups=num_groups, num_times=num_timesteps)
+            Qs = measure_scaling @ pcov_raw @ measure_scaling
+            predict_kwargs['Q'] = Qs.unbind(1)
+        else:
+            # faster if not time-varying
+            pcov_raw = self.process_covariance(pcov_kwargs, num_groups=num_groups, num_times=1)
+            Qs = measure_scaling @ pcov_raw.squeeze(1) @ measure_scaling
+            predict_kwargs['Q'] = [Qs] * num_timesteps
 
         return predict_kwargs, update_kwargs, used_keys
diff --git a/torchcast/state_space/predictions.py b/torchcast/state_space/predictions.py
diff --git a/torchcast/state_space/state_space.py b/torchcast/state_space/state_space.py