derivativeGP gpu support (facebookresearch#444)

JasonKChow · facebook-github-bot · commit b1c9f0422d58 · 2024-11-08T21:25:23.000-08:00
Summary:

Add gpu support for derivative GP.

I noticed that this model isn’t actually like a normal model that can show up in a live experiment with a config, but we should still make it work for GPU. I did most of that but it did require some pretty arcane shenanigans with overriding GPyTorch’s underlying handling of train_inputs. This in turn made me do some arcane mypy stuff.

Differential Revision: D65515631
diff --git a/aepsych/means/constant_partial_grad.py b/aepsych/means/constant_partial_grad.py
@@ -26,6 +26,6 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
         idx = input[..., -1].to(dtype=torch.long) > 0
         mean_fit = super(ConstantMeanPartialObsGrad, self).forward(input[..., ~idx, :])
         sz = mean_fit.shape[:-1] + torch.Size([input.shape[-2]])
-        mean = torch.zeros(sz)
+        mean = torch.zeros(sz).to(input)
         mean[~idx] = mean_fit
         return mean
diff --git a/aepsych/models/base.py b/aepsych/models/base.py
@@ -116,7 +116,7 @@ class AEPsychMixin(GPyTorchModel):
 
     extremum_solver = "Nelder-Mead"
     outcome_types: List[str] = []
-    train_inputs: Optional[Tuple[torch.Tensor]]
+    train_inputs: Optional[Tuple[torch.Tensor, ...]]
     train_targets: Optional[torch.Tensor]
 
     @property
@@ -398,7 +398,7 @@ def p_below_threshold(
 
 
 class AEPsychModelDeviceMixin(AEPsychMixin):
-    _train_inputs: Optional[Tuple[torch.Tensor]]
+    _train_inputs: Optional[Tuple[torch.Tensor, ...]]
     _train_targets: Optional[torch.Tensor]
 
     def set_train_data(self, inputs=None, targets=None, strict=False):
@@ -423,13 +423,17 @@ def device(self) -> torch.device:
         return next(self.parameters()).device
 
     @property
-    def train_inputs(self) -> Optional[Tuple[torch.Tensor]]:
+    def train_inputs(self) -> Optional[Tuple[torch.Tensor, ...]]:
         if self._train_inputs is None:
             return None
 
         # makes sure the tensors are on the right device, move in place
+        _train_inputs = []
         for input in self._train_inputs:
-            input.to(self.device)
+            _train_inputs.append(input.to(self.device))
+
+        _tuple_inputs: Tuple[torch.Tensor, ...] = tuple(_train_inputs)
+        self._train_inputs = _tuple_inputs
 
         return self._train_inputs
 
diff --git a/aepsych/models/derivative_gp.py b/aepsych/models/derivative_gp.py
@@ -13,6 +13,7 @@
 import torch
 from aepsych.kernels.rbf_partial_grad import RBFKernelPartialObsGrad
 from aepsych.means.constant_partial_grad import ConstantMeanPartialObsGrad
+from aepsych.models.base import AEPsychModelDeviceMixin
 from botorch.models.gpytorch import GPyTorchModel
 from gpytorch.distributions import MultivariateNormal
 from gpytorch.kernels import Kernel
@@ -22,7 +23,9 @@
 from gpytorch.variational import CholeskyVariationalDistribution, VariationalStrategy
 
 
-class MixedDerivativeVariationalGP(gpytorch.models.ApproximateGP, GPyTorchModel):
+class MixedDerivativeVariationalGP(
+    gpytorch.models.ApproximateGP, AEPsychModelDeviceMixin, GPyTorchModel
+):
     """A variational GP with mixed derivative observations.
 
     For more on GPs with derivative observations, see e.g. Riihimaki & Vehtari 2010.
diff --git a/tests_gpu/models/test_derivative_gp.py b/tests_gpu/models/test_derivative_gp.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from aepsych import Config, SequentialStrategy
+from aepsych.models.derivative_gp import MixedDerivativeVariationalGP
+from botorch.fit import fit_gpytorch_mll
+from botorch.utils.testing import BotorchTestCase
+from gpytorch.likelihoods import BernoulliLikelihood
+from gpytorch.mlls.variational_elbo import VariationalELBO
+
+
+class TestDerivativeGP(BotorchTestCase):
+
+    def test_MixedDerivativeVariationalGP_gpu(self):
+        train_x = torch.cat(
+            (torch.tensor([1.0, 2.0, 3.0, 4.0]).unsqueeze(1), torch.zeros(4, 1)), dim=1
+        )
+        train_y = torch.tensor([1.0, 2.0, 3.0, 4.0])
+        m = MixedDerivativeVariationalGP(
+            train_x=train_x,
+            train_y=train_y,
+            inducing_points=train_x,
+            fixed_prior_mean=0.5,
+        ).cuda()
+
+        self.assertEqual(m.mean_module.constant.item(), 0.5)
+        self.assertEqual(
+            m.covar_module.base_kernel.raw_lengthscale.shape, torch.Size([1, 1])
+        )
+        mll = VariationalELBO(
+            likelihood=BernoulliLikelihood(), model=m, num_data=train_y.numel()
+        ).cuda()
+        mll = fit_gpytorch_mll(mll)
+        test_x = torch.tensor([[1.0, 0], [3.0, 1.0]]).cuda()
+        m(test_x)