microsoft · sarlinpe · Mar 3, 2024 · Mar 3, 2024 · Mar 3, 2024 · Mar 3, 2024
@@ -33,6 +33,15 @@ class FeatureMatching:
                 },
             },
         },
+        'lightglue': {
+            'name': 'lightglue',
+            'hloc': {
+                'model': {
+                    'name': 'lightglue',
+                    'features': 'superpoint',
+                },
+            },
+        },
         'mnn': {
             'name': 'mnn',
             'hloc': {

@@ -17,6 +17,7 @@
 
 
 logger = logging.getLogger(__name__)
+invalid_point3d_id = pycolmap.Point2D().point3D_id
 
 
 class MappingPaths:
@@ -94,6 +95,10 @@ def __init__(self, config, outputs, capture, session_id,
             self.name2key[image.name]: image.image_id
             for image in self.reconstruction.images.values()
         }
+        self.imageid_to_point3dids = {
+            image_id: np.array([p.point3D_id for p in image.points2D], np.uint64)
+            for image_id, image in self.reconstruction.images.items()
+        }
 
     def run(self, capture):
         run_capture_to_empty_colmap.run(capture, [self.session_id], self.paths.sfm_empty)
@@ -107,18 +112,20 @@ def run(self, capture):
         )
 
     def get_points3D(self, key, point2D_indices):
-        image = self.reconstruction.images[self.key2imageid[key]]
-        valid = []
-        xyz = []
-        ids = []
-        if len(image.points2D) > 0:
-            for idx in point2D_indices:
-                p = image.points2D[idx]
-                valid.append(p.has_point3D())
-                if valid[-1]:
-                    ids.append(p.point3D_id)
-                    xyz.append(self.reconstruction.points3D[ids[-1]].xyz)
-        return np.array(valid, bool), xyz, ids
+        image_id = self.key2imageid[key]
+        all_ids = self.imageid_to_point3dids[image_id]
+        if len(all_ids) > 0:
+            ids = all_ids[point2D_indices]
+            valid = ids != invalid_point3d_id
+            ids = ids[valid]
+        else:
+            valid = np.empty(0, bool)
+            ids = np.empty(0, int)
+        if len(ids) > 0:
+            xyz = np.stack([self.reconstruction.points3D[i].xyz for i in ids])
+        else:
+            xyz = np.empty((0, 3), float)
+        return valid, xyz, ids
 
 
 class MeshLifting(Mapping):

@@ -33,6 +33,7 @@ def __init__(self, root, config, query_id, ref_id, override_workdir_root=None):
             root / 'pose_estimation' / query_id / ref_id
             / config['features']['name'] / config['matches']['name']
             / config['pairs']['name'] / config['mapping']['name'] / config['name']
+            / config['estimator']
         )
         self.poses = self.workdir / 'poses.txt'
         self.config = self.workdir / 'configuration.json'
@@ -145,6 +146,7 @@ def convert_poses_for_eval(self, T_c2w):
 class SingleImagePoseEstimation(PoseEstimation):
     method = {
         'name': 'single_image',
+        'estimator': 'pycolmap',
         'pnp_error_multiplier': 3.0,
     }
 
@@ -167,7 +169,7 @@ def _worker_fn(idx: int):
                 camera,
                 ref_key_names,
                 self.recover_matches_2d3d,
-                self.config['pnp_error_multiplier'],
+                self.config,
                 return_covariance=self.return_covariance
             )
             if pose is not None:
@@ -185,6 +187,7 @@ def convert_poses_for_eval(self, T_c2w):
 class RigPoseEstimation(PoseEstimation):
     method = {
         'name': 'rig',
+        'estimator': 'pycolmap',
         'pnp_error_multiplier': 1.0
     }
 
@@ -211,7 +214,7 @@ def _worker_fn(idx: int):
                 query_names, cameras, T_cams2rig,
                 ref_key_names,
                 self.recover_matches_2d3d,
-                self.config['pnp_error_multiplier'],
+                self.config,
                 return_covariance=self.return_covariance
             )
             if pose is not None:
@@ -229,6 +232,7 @@ def convert_poses_for_eval(self, T_c2w):
 class RigSinglePoseEstimation(SingleImagePoseEstimation):
     method = {
         'name': 'rig_single',
+        'estimator': 'pycolmap',
         'pnp_error_multiplier': 1.0
     }
 
@@ -303,11 +307,13 @@ def recover_matches_2d3d(self, query: str, ref_key_names: List[Tuple[KeyType, st
 class SingleImageDensePoseEstimation(DensePoseEstimation, SingleImagePoseEstimation):
     method = {
         'name': 'dense_single_image',
+        'estimator': 'pycolmap',
         'pnp_error_multiplier': 3.0,
     }
 
 class RigDensePoseEstimation(DensePoseEstimation, RigPoseEstimation):
     method = {
         'name': 'dense_rig',
+        'estimator': 'pycolmap',
         'pnp_error_multiplier': 1.0,
     }
@@ -1,9 +1,12 @@
 from pathlib import Path
-from typing import List, Tuple, Callable
-from collections import defaultdict
+from typing import Any, Dict, List, Tuple, Callable
 import numpy as np
 
 import pycolmap
+try:
+    import poselib
+except ImportError:
+    poselib = None
 
 from scantools.capture import Camera, Pose, Trajectories
 from scantools.proc.alignment.image_matching import get_keypoints, get_matches
@@ -14,8 +17,6 @@
 def recover_matches_2d3d(query: str, ref_key_names: List[Tuple[KeyType, str]],
                          mapping, query_features: Path, match_file: Path):
     (p2d,), (noise,) = get_keypoints(query_features, [query])
-    p2d_to_p3d = defaultdict(list)
-    num_matches = 0
 
     if len(ref_key_names) == 0:
         ref_keys = ref_names = []
@@ -29,32 +30,30 @@ def recover_matches_2d3d(query: str, ref_key_names: List[Tuple[KeyType, str]],
         'indices': [np.empty((0,), int)],
         'node_ids_ref': [np.empty((0, 2), object)]
     }
+    p2d_to_p3d = set()
     for idx, (ref_key, matches) in enumerate(zip(ref_keys, all_matches)):
         if len(matches) == 0:
             continue
         valid, p3ds, p3d_ids = mapping.get_points3D(ref_key, matches[:, 1])
         matches = matches[valid]
-        num_matches += len(matches)
 
-        p2d_q = []
-        p3d = []
-        indices = []
+        p2d_ids = []
+        match_indices = []
         node_ids_ref = []
-        for (i, j), p3d_id, xyz in zip(matches, p3d_ids, p3ds):
-            # avoid duplicate observations
-            if p3d_id != -1 and p3d_id in p2d_to_p3d[i]:
+        for match_idx, ((i, j), p3d_id) in enumerate(zip(matches, p3d_ids)):
+            if p3d_id != -1 and (i, p3d_id) in p2d_to_p3d:
+                # Avoid duplicate observations.
                 continue
-            p2d_to_p3d[i].append(p3d_id)
-            p2d_q.append(p2d[i])
-            p3d.append(xyz)
-            indices.append(idx)
+            p2d_to_p3d.add((i, p3d_id))
+            p2d_ids.append(i)
+            match_indices.append(match_idx)
             node_ids_ref.append((ref_key, j))
-        if len(p2d_q) == 0:
+        if len(p2d_ids) == 0:
             continue
 
-        ret['kp_q'].append(np.array(p2d_q))
-        ret['p3d'].append(np.array(p3d))
-        ret['indices'].append(np.array(indices))
+        ret['kp_q'].append(p2d[p2d_ids])
+        ret['p3d'].append(p3ds[match_indices])
+        ret['indices'].append(np.full(len(match_indices), idx))
         ret['node_ids_ref'].append(np.array(node_ids_ref, dtype=object))
     ret = {k: np.concatenate(v, 0) for k, v in ret.items()}
 
@@ -64,26 +63,39 @@ def recover_matches_2d3d(query: str, ref_key_names: List[Tuple[KeyType, str]],
 def estimate_camera_pose(query: str, camera: Camera,
                          ref_key_names: List[Tuple[KeyType, str]],
                          recover_matches: Callable,
-                         pnp_error_multiplier: float,
-                         return_covariance: bool) -> Pose:
+                         config: Dict[str, Any],
+                         return_covariance: bool) -> Tuple[Pose, Dict[str, Any]]:
     matches_2d3d = recover_matches(query, ref_key_names)
     keypoint_noise = matches_2d3d['keypoint_noise']
-
-    ret = pycolmap.absolute_pose_estimation(
-        matches_2d3d['kp_q'], matches_2d3d['p3d'],
-        camera.asdict, pnp_error_multiplier * keypoint_noise,
-        return_covariance=return_covariance)
-
-    if ret['success']:
-        if return_covariance:
-            ret['covariance'] *= keypoint_noise ** 2
-            # the covariance returned by pycolmap is on the left side,
-            # which is the right side of the inverse.
-            pose = Pose(*Pose(ret['qvec'], ret['tvec']).inv.qt, ret['covariance'])
+    points_2d = matches_2d3d['kp_q']
+    points_3d = matches_2d3d['p3d']
+    inlier_threshold = config['pnp_error_multiplier'] * keypoint_noise
+
+    if config['estimator'] == 'pycolmap':
+        ret = pycolmap.absolute_pose_estimation(
+            points_2d, points_3d, camera.asdict, inlier_threshold,
+            return_covariance=return_covariance)
+        if ret['success']:
+            if return_covariance:
+                ret['covariance'] *= keypoint_noise ** 2
+                # the covariance returned by pycolmap is on the left side,
+                # which is the right side of the inverse.
+                pose = Pose(*Pose(ret['qvec'], ret['tvec']).inv.qt, ret['covariance'])
+            else:
+                pose = Pose(ret['qvec'], ret['tvec']).inv
         else:
-            pose = Pose(ret['qvec'], ret['tvec']).inv
+            pose = None
+    elif config['estimator'] == 'poselib':
+        if poselib is None:
+            raise ImportError('Could not import PoseLib - did you forget to install it?')
+        if return_covariance:
+            raise ValueError('PoseLib does not support return_covariance=True')
+        pose, ret = poselib.estimate_absolute_pose(
+            points_2d, points_3d, camera.asdict,
+            {'max_reproj_error': inlier_threshold}, {})
+        pose = Pose(pose.q, pose.t).inv
     else:
-        pose = None
+        raise NotImplementedError(f'Unknown estimator: {config["estimator"]}')
 
     ret = {**ret, 'matches_2d3d_list': [matches_2d3d]}
     return pose, ret
@@ -92,37 +104,55 @@ def estimate_camera_pose(query: str, camera: Camera,
 def estimate_camera_pose_rig(queries: List[str], cameras: List[Camera], T_cams2rig: List[Pose],
                              refs_key_names: List[List[Tuple[KeyType, str]]],
                              recover_matches: Callable,
-                             pnp_error_multiplier: float, return_covariance: bool) -> Pose:
+                             config: Dict[str, Any],
+                             return_covariance: bool) -> Tuple[Pose, Dict[str, Any]]:
     matches_2d3d_list = []
     keypoint_noises = []
     for query, ref_key_names in zip(queries, refs_key_names):
         matches_2d3d = recover_matches(query, ref_key_names)
         matches_2d3d_list.append(matches_2d3d)
         keypoint_noises.append(matches_2d3d['keypoint_noise'])
 
-    p2d_m_list = [m['kp_q'] for m in matches_2d3d_list]
-    p3d_m_list = [m['p3d'] for m in matches_2d3d_list]
+    points_2d = [m['kp_q'] for m in matches_2d3d_list]
+    points_3d = [m['p3d'] for m in matches_2d3d_list]
     camera_dicts = [camera.asdict for camera in cameras]
     rel_poses = [T.inverse() for T in T_cams2rig]
     qvecs = [p.qvec for p in rel_poses]
     tvecs = [p.t for p in rel_poses]
     keypoint_noise = np.mean(keypoint_noises)
-
-    ret = pycolmap.rig_absolute_pose_estimation(
-        p2d_m_list, p3d_m_list, camera_dicts, qvecs,
-        tvecs, pnp_error_multiplier * keypoint_noise,
-        return_covariance=return_covariance)
-
-    if ret['success']:
-        if return_covariance:
-            ret['covariance'] *= keypoint_noise ** 2
-            # the covariance returned by pycolmap is on the left side,
-            # which is the right side of the inverse.
-            pose = Pose(*Pose(ret['qvec'], ret['tvec']).inv.qt, ret['covariance'])
+    inlier_threshold = config['pnp_error_multiplier'] * keypoint_noise
+
+    if config['estimator'] == 'pycolmap':
+        ret = pycolmap.rig_absolute_pose_estimation(
+            points_2d, points_3d, camera_dicts, qvecs, tvecs, inlier_threshold,
+            return_covariance=return_covariance)
+        if ret['success']:
+            if return_covariance:
+                ret['covariance'] *= keypoint_noise ** 2
+                # the covariance returned by pycolmap is on the left side,
+                # which is the right side of the inverse.
+                pose = Pose(*Pose(ret['qvec'], ret['tvec']).inv.qt, ret['covariance'])
+            else:
+                pose = Pose(ret['qvec'], ret['tvec']).inv
         else:
-            pose = Pose(ret['qvec'], ret['tvec']).inv
+            pose = None
+    elif config['estimator'] == 'poselib':
+        if poselib is None:
+            raise ImportError('Could not import PoseLib - did you forget to install it?')
+        if return_covariance:
+            raise ValueError('PoseLib does not support return_covariance=True')
+        rel_poses_poselib = []
+        for q, t in zip(qvecs, tvecs):
+            p = poselib.CameraPose()
+            p.q = q
+            p.t = t
+            rel_poses_poselib.append(p)
+        pose, ret = poselib.estimate_generalized_absolute_pose(
+            points_2d, points_3d, rel_poses_poselib, camera_dicts,
+            {'max_reproj_error': inlier_threshold}, {})
+        pose = Pose(pose.q, pose.t).inv
     else:
-        pose = None
+        raise NotImplementedError(f'Unknown estimator: {config["estimator"]}')
 
     ret = {**ret, 'matches_2d3d_list': matches_2d3d_list}
     return pose, ret