probcomp · horizon-blue · Dec 2, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 27, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -26,6 +26,7 @@ add_library(genmetaballs_core
   genmetaballs/src/cuda/core/geometry.cu
   genmetaballs/src/cuda/core/confidence.cuh
   genmetaballs/src/cuda/core/image.cuh
+  genmetaballs/src/cuda/core/intersector.cuh
 )
 
 # Set include directories for the core library

diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu
@@ -4,13 +4,15 @@
 #include <nanobind/operators.h>
 #include <nanobind/stl/tuple.h>
 #include <nanobind/stl/vector.h>
+#include <tuple>
 
 #include "core/blender.cuh"
 #include "core/camera.cuh"
 #include "core/confidence.cuh"
 #include "core/fmb.cuh"
 #include "core/geometry.cuh"
 #include "core/image.cuh"
+#include "core/intersector.cuh"
 #include "core/utils.cuh"
 
 namespace nb = nanobind;
@@ -24,6 +26,28 @@ void bind_image_view(nb::module_& m, const char* name);
 
 NB_MODULE(_genmetaballs_bindings, m) {
 
+    /*
+     * Confidence module bindings
+     */
+
+    nb::module_ confidence = m.def_submodule("confidence");
+    nb::class_<ZeroParameterConfidence>(confidence, "ZeroParameterConfidence")
+        .def(nb::init<>())
+        .def("get_confidence", &ZeroParameterConfidence::get_confidence, nb::arg("sumexpd"),
+             "Get the confidence value for a given sumexpd")
+        .def("__repr__",
+             [](const ZeroParameterConfidence& c) { return nb::str("ZeroParameterConfidence()"); });
+
+    nb::class_<TwoParameterConfidence>(confidence, "TwoParameterConfidence")
+        .def(nb::init<float, float>())
+        .def_ro("beta4", &TwoParameterConfidence::beta4)
+        .def_ro("beta5", &TwoParameterConfidence::beta5)
+        .def("get_confidence", &TwoParameterConfidence::get_confidence, nb::arg("sumexpd"),
+             "Get the confidence value for a given sumexpd")
+        .def("__repr__", [](const TwoParameterConfidence& c) {
+            return nb::str("TwoParameterConfidence(beta4={}, beta5={})").format(c.beta4, c.beta5);
+        });
+
     /*
      * FMB module bindings
      */
@@ -38,6 +62,8 @@ NB_MODULE(_genmetaballs_bindings, m) {
                          auto extent = self.get_extent();
                          return std::tuple{extent.x, extent.y, extent.z};
                      })
+        .def("cov_inv_apply", &FMB::cov_inv_apply,
+             "apply the inverse covariance matrix to the given vector", nb::arg("vec"))
         .def("quadratic_form", &FMB::quadratic_form,
              "Evaluate the associated quadratic form at the given vector", nb::arg("vec"));
 
@@ -85,9 +111,9 @@ NB_MODULE(_genmetaballs_bindings, m) {
         .def("inv", &Pose::inv, "Inverse pose");
 
     nb::class_<Ray>(geometry, "Ray")
-        .def(nb::init<>())
-        .def_rw("start", &Ray::start)
-        .def_rw("direction", &Ray::direction);
+        .def(nb::init<Vec3D, Vec3D>())
+        .def_ro("start", &Ray::start)
+        .def_ro("direction", &Ray::direction);
 
     /*
      * Camera module bindings
@@ -116,26 +142,17 @@ NB_MODULE(_genmetaballs_bindings, m) {
     bind_image<MemoryLocation::DEVICE>(image, "GPUImage");
 
     /*
-     * Confidence module bindings
+     * Intersector module bindings
      */
 
-    nb::module_ confidence = m.def_submodule("confidence");
-    nb::class_<ZeroParameterConfidence>(confidence, "ZeroParameterConfidence")
-        .def(nb::init<>())
-        .def("get_confidence", &ZeroParameterConfidence::get_confidence, nb::arg("sumexpd"),
-             "Get the confidence value for a given sumexpd")
-        .def("__repr__",
-             [](const ZeroParameterConfidence& c) { return nb::str("ZeroParameterConfidence()"); });
-
-    nb::class_<TwoParameterConfidence>(confidence, "TwoParameterConfidence")
-        .def(nb::init<float, float>())
-        .def_ro("beta4", &TwoParameterConfidence::beta4)
-        .def_ro("beta5", &TwoParameterConfidence::beta5)
-        .def("get_confidence", &TwoParameterConfidence::get_confidence, nb::arg("sumexpd"),
-             "Get the confidence value for a given sumexpd")
-        .def("__repr__", [](const TwoParameterConfidence& c) {
-            return nb::str("TwoParameterConfidence(beta4={}, beta5={})").format(c.beta4, c.beta5);
-        });
+    nb::module_ intersector = m.def_submodule("intersector");
+    intersector.def(
+        "linear_intersect",
+        [](const FMB& fmb, const Ray& ray, const Pose& cam_pose) {
+            auto [t, d] = LinearIntersector::intersect(fmb, ray, cam_pose);
+            return std::make_tuple(t, d);
+        },
+        "Linear intersection of ray and FMB.", nb::arg("fmb"), nb::arg("ray"), nb::arg("cam_pose"));
 
     /*
      * Utils module bindings

diff --git a/genmetaballs/src/cuda/core/fmb.cu b/genmetaballs/src/cuda/core/fmb.cu
@@ -2,12 +2,18 @@
 #include "geometry.cuh"
 #include "utils.cuh"
 
+CUDA_CALLABLE __forceinline__ Vec3D vecdiv(const Vec3D u, const Vec3D v) {
+    return {u.x / v.x, u.y / v.y, u.z / v.z};
+}
+
+CUDA_CALLABLE Vec3D FMB::cov_inv_apply(const Vec3D vec) const {
+    const auto rot = pose_.get_rot();
+    return rot.inv().apply(vecdiv(rot.apply(vec), extent_));
+}
+
 CUDA_CALLABLE float FMB::quadratic_form(const Vec3D vec) const {
-    const auto shftd_vec = vec - pose_.get_tran();
-    const auto rot_shftd_vec = pose_.get_rot().apply(shftd_vec);
-    const auto scaled_rot_shftd_vec = Vec3D(
-        rot_shftd_vec.x / extent_.x, rot_shftd_vec.y / extent_.y, rot_shftd_vec.z / extent_.z);
-    return dot(rot_shftd_vec, scaled_rot_shftd_vec);
+    const auto shifted_vec = vec - get_mean();
+    return dot(shifted_vec, cov_inv_apply(shifted_vec));
 }
 
 template <>

diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh
@@ -12,6 +12,9 @@ private:
     // In Gaussian terms:
     // - mean: pose.tran
     // - cov: pose.rot.mat().inv() * diag(extent) * pose.rot.mat()
+    // note: note how the inverse is on the left. This means that pose
+    // corresponds to the precision matrix, i.e.
+    // prec = pose.rot.mat() * diag(1/extent) * pose.rot.mat().inv()
     Pose pose_;
     float3 extent_;
 
@@ -31,6 +34,11 @@ public:
     CUDA_CALLABLE float3 get_extent() const {
         return extent_;
     }
+    CUDA_CALLABLE float3 get_mean() const {
+        return pose_.get_tran();
+    }
+
+    CUDA_CALLABLE Vec3D cov_inv_apply(const Vec3D) const;
 
     CUDA_CALLABLE float quadratic_form(const Vec3D) const;
 };

diff --git a/genmetaballs/src/cuda/core/forward.cu b/genmetaballs/src/cuda/core/forward.cu
@@ -33,7 +33,7 @@ __global__ render_kernel(const Getter fmb_getter, const Blender blender,
     for (const auto& [pixel_coords, ray] : pixel_coords_and_rays) {
         float w0 = 0.0f, tf = 0.0f, sumexpd = 0.0f;
         for (const auto& fmb : fmb_getter->get_metaballs(ray)) {
-            const auto& [t, d] = Intersector::intersect(fmb, ray);
+            const auto& [t, d] = Intersector::intersect(fmb, ray, extr);
             w = blender->blend(t, d, fmb, ray);
             sumexpd += exp(d); // numerically unstable. use logsumexp
             tf += t;

diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh
@@ -102,4 +102,6 @@ public:
 struct Ray {
     Vec3D start;
     Vec3D direction;
+
+    CUDA_CALLABLE Ray(const Vec3D _start, const Vec3D _dir) : start{_start}, direction{_dir} {}
 };
diff --git a/genmetaballs/src/cuda/core/intersector.cuh b/genmetaballs/src/cuda/core/intersector.cuh
@@ -1,12 +1,22 @@
 #pragma once
 
-#include <utility>
+#include <cuda/std/tuple>
 
-#include "fmb.h"
-#include "geometry.h"
+#include "fmb.cuh"
+#include "geometry.cuh"
 
 // implement equation (6) in the paper
 class LinearIntersector {
-
-    static CUDA_CALLABLE std::pair<float, float> intersect(const FMB& fmb, const Ray& ray) const;
+public:
+    /*
+     * Ray should be in camera frame
+     */
+    CUDA_CALLABLE static cuda::std::tuple<float, float> intersect(const FMB& fmb, const Ray& ray,
+                                                                  const Pose& cam_pose) {
+        const auto v = cam_pose.get_rot().apply(ray.direction);
+        const auto cov_inv_v = fmb.cov_inv_apply(v);
+        const auto cam_tran = cam_pose.get_tran();
+        const auto t = dot(fmb.get_mean() - cam_tran, cov_inv_v) / dot(v, cov_inv_v);
+        return {t, fmb.quadratic_form(cam_tran + t * v)};
+    }
 };
diff --git a/genmetaballs/src/genmetaballs/core/__init__.py b/genmetaballs/src/genmetaballs/core/__init__.py
@@ -1,6 +1,6 @@
 from typing import Literal
 
-from genmetaballs._genmetaballs_bindings import fmb, geometry
+from genmetaballs._genmetaballs_bindings import fmb, geometry, intersector
 from genmetaballs._genmetaballs_bindings.blender import (
     FourParameterBlender,
     ThreeParameterBlender,
@@ -55,6 +55,7 @@ def make_image(height: int, width: int, device: DeviceType) -> CPUImage | GPUIma
     "geometry",
     "Camera",
     "Intrinsics",
+    "intersector",
     "sigmoid",
     "FourParameterBlender",
     "ThreeParameterBlender",

diff --git a/tests/python_tests/test_fmb.py b/tests/python_tests/test_fmb.py
@@ -14,6 +14,19 @@ def rng() -> np.random.Generator:
     return np.random.default_rng(0)
 
 
+def test_fmb_cov_inv_apply(rng):
+    for _ in range(100):
+        quat = rng.uniform(size=4).astype(np.float32)
+        tran, extent, vec = rng.uniform(size=(3, 3)).astype(np.float32)
+        pose = Pose.from_components(Rotation.from_quat(*quat), Vec3D(*tran))
+        scipy_rot_mat = Rot.from_quat(quat).as_matrix()
+        cov = scipy_rot_mat.T @ np.diag(extent) @ scipy_rot_mat
+        theirs = np.linalg.solve(cov, vec)
+        ours = FMB(pose, *extent).cov_inv_apply(Vec3D(*vec))
+        ourvec = np.array([ours.x, ours.y, ours.z], dtype=np.float32)
+        assert np.allclose(theirs, ourvec, atol=1e-6)
+
+
 def test_fmb_quadratic_form(rng):
     for _ in range(100):
         quat = rng.uniform(size=4)

diff --git a/tests/python_tests/test_intersector.py b/tests/python_tests/test_intersector.py
@@ -0,0 +1,36 @@
+import numpy as np
+import pytest
+from scipy.spatial.distance import mahalanobis
+from scipy.spatial.transform import Rotation as Rot
+
+from genmetaballs.core import fmb, geometry, intersector
+
+FMB = fmb.FMB
+Pose, Vec3D, Rotation, Ray = geometry.Pose, geometry.Vec3D, geometry.Rotation, geometry.Ray
+
+
+@pytest.fixture
+def rng() -> np.random.Generator:
+    return np.random.default_rng(0)
+
+
+def test_linear_intersect(rng):
+    for _ in range(100):
+        # sample
+        cam_quat, fmb_quat = rng.uniform(size=(2, 4)).astype(np.float32)
+        fmb_extent, fmb_mu = rng.uniform(size=(2, 3)).astype(np.float32)
+        cam_tran, ray_dir = rng.uniform(size=(2, 3)).astype(np.float32)
+        ray_start = np.zeros(3, dtype=np.float32)  # in camera frame
+        # ground truth computation
+        v = Rot.from_quat(cam_quat).apply(ray_dir)
+        fmb_rotmat = Rot.from_quat(fmb_quat).as_matrix()
+        cov_inv = fmb_rotmat.T @ np.diag(1 / fmb_extent) @ fmb_rotmat
+        t = ((fmb_mu - cam_tran) @ cov_inv @ v) / (v @ cov_inv @ v)
+        d = mahalanobis(fmb_mu, cam_tran + t * v, cov_inv) ** 2
+        # cuda computation
+        fmb_pose = Pose.from_components(Rotation.from_quat(*fmb_quat), Vec3D(*fmb_mu))
+        cam_pose = Pose.from_components(Rotation.from_quat(*cam_quat), Vec3D(*cam_tran))
+        fmb = FMB(fmb_pose, *fmb_extent)
+        ray = Ray(Vec3D(*ray_start), Vec3D(*ray_dir))
+        t_, d_ = intersector.linear_intersect(fmb, ray, cam_pose)
+        assert np.isclose(t, t_) and np.isclose(d, d_)