diff --git a/gtsfm/configs/synthetic_front_end.yaml b/gtsfm/configs/synthetic_front_end.yaml new file mode 100644 index 000000000..ff2e50f2f --- /dev/null +++ b/gtsfm/configs/synthetic_front_end.yaml @@ -0,0 +1,93 @@ +# Synthetic front-end configuration specifically for the Tanks & Temples dataset. + +SceneOptimizer: + _target_: gtsfm.scene_optimizer.SceneOptimizer + save_gtsfm_data: True + save_two_view_correspondences_viz: False + save_3d_viz: True + pose_angular_error_thresh: 5 # degrees + + image_pairs_generator: + _target_: gtsfm.retriever.image_pairs_generator.ImagePairsGenerator + global_descriptor: + _target_: gtsfm.frontend.cacher.global_descriptor_cacher.GlobalDescriptorCacher + global_descriptor_obj: + _target_: gtsfm.frontend.global_descriptor.netvlad_global_descriptor.NetVLADGlobalDescriptor + + # NOTE: May not converge with SequentialRetriever only. + retriever: + _target_: gtsfm.retriever.joint_netvlad_sequential_retriever.JointNetVLADSequentialRetriever + num_matched: 2 + min_score: 0.2 + max_frame_lookahead: 3 + + correspondence_generator: + _target_: gtsfm.frontend.correspondence_generator.synthetic_correspondence_generator.SyntheticCorrespondenceGenerator + dataset_root: /usr/local/gtsfm-data/Tanks_and_Temples_Barn_410 + scene_name: Barn + + two_view_estimator: + _target_: gtsfm.two_view_estimator.TwoViewEstimator + bundle_adjust_2view: False + eval_threshold_px: 4 # in px + ba_reproj_error_thresholds: [0.5] + bundle_adjust_2view_maxiters: 100 + + verifier: + _target_: gtsfm.frontend.verifier.loransac.LoRansac + use_intrinsics_in_verification: True + estimation_threshold_px: 0.5 # for H/E/F estimators + + triangulation_options: + _target_: gtsfm.data_association.point3d_initializer.TriangulationOptions + mode: + _target_: gtsfm.data_association.point3d_initializer.TriangulationSamplingMode + value: NO_RANSAC + + inlier_support_processor: + _target_: gtsfm.two_view_estimator.InlierSupportProcessor + min_num_inliers_est_model: 15 + min_inlier_ratio_est_model: 0.1 + + multiview_optimizer: + _target_: gtsfm.multi_view_optimizer.MultiViewOptimizer + + # comment out to not run + view_graph_estimator: + _target_: gtsfm.view_graph_estimator.cycle_consistent_rotation_estimator.CycleConsistentRotationViewGraphEstimator + edge_error_aggregation_criterion: MEDIAN_EDGE_ERROR + + rot_avg_module: + _target_: gtsfm.averaging.rotation.shonan.ShonanRotationAveraging + # Use a very low value. + two_view_rotation_sigma: 0.1 + + trans_avg_module: + _target_: gtsfm.averaging.translation.averaging_1dsfm.TranslationAveraging1DSFM + robust_measurement_noise: True + projection_sampling_method: SAMPLE_INPUT_MEASUREMENTS + + data_association_module: + _target_: gtsfm.data_association.data_assoc.DataAssociation + min_track_len: 2 + triangulation_options: + _target_: gtsfm.data_association.point3d_initializer.TriangulationOptions + reproj_error_threshold: 10 + mode: + _target_: gtsfm.data_association.point3d_initializer.TriangulationSamplingMode + value: RANSAC_SAMPLE_UNIFORM + max_num_hypotheses: 100 + save_track_patches_viz: False + + bundle_adjustment_module: + _target_: gtsfm.bundle.bundle_adjustment.BundleAdjustmentOptimizer + reproj_error_thresholds: [10, 5, 3] # for (multistage) post-optimization filtering + robust_measurement_noise: True + shared_calib: False + cam_pose3_prior_noise_sigma: 0.01 + calibration_prior_noise_sigma: 1e-5 + measurement_noise_sigma: 1.0 + + # # comment out to not run + # dense_multiview_optimizer: + # _target_: gtsfm.densify.mvs_patchmatchnet.MVSPatchmatchNet diff --git a/gtsfm/frontend/correspondence_generator/synthetic_correspondence_generator.py b/gtsfm/frontend/correspondence_generator/synthetic_correspondence_generator.py new file mode 100644 index 000000000..7332e72ab --- /dev/null +++ b/gtsfm/frontend/correspondence_generator/synthetic_correspondence_generator.py @@ -0,0 +1,309 @@ +"""Correspondence generator that creates synthetic keypoint correspondences using a 3d mesh. + +Authors: John Lambert +""" +import tempfile +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +from dask.distributed import Client, Future +import numpy as np +import open3d +import trimesh +from gtsam import Pose3 + +import gtsfm.common.types as gtsfm_types +import gtsfm.utils.logger as logger_utils +import gtsfm.visualization.open3d_vis_utils as open3d_vis_utils +from gtsfm.common.keypoints import Keypoints +from gtsfm.common.types import CAMERA_TYPE +from gtsfm.frontend.correspondence_generator.correspondence_generator_base import CorrespondenceGeneratorBase +from gtsfm.frontend.correspondence_generator.keypoint_aggregator.keypoint_aggregator_base import KeypointAggregatorBase +from gtsfm.frontend.correspondence_generator.keypoint_aggregator.keypoint_aggregator_dedup import ( + KeypointAggregatorDedup, +) +from gtsfm.frontend.correspondence_generator.keypoint_aggregator.keypoint_aggregator_unique import ( + KeypointAggregatorUnique, +) +from gtsfm.loader.loader_base import LoaderBase +from gtsfm.loader.tanks_and_temples_loader import TanksAndTemplesLoader + + +logger = logger_utils.get_logger() + + +class SyntheticCorrespondenceGenerator(CorrespondenceGeneratorBase): + """Pair-wise synthetic keypoint correspondence generator.""" + + def __init__(self, dataset_root: str, scene_name: str, deduplicate: bool = True) -> None: + """ + Args: + dataset_root: Path to where Tanks & Temples dataset is stored. + scene_name: Name of scene from Tanks & Temples dataset. + deduplicate: Whether to de-duplicate with a single image the detections received from each image pair. + """ + self._dataset_root = dataset_root + self._scene_name = scene_name + self._aggregator: KeypointAggregatorBase = ( + KeypointAggregatorDedup() if deduplicate else KeypointAggregatorUnique() + ) + + def generate_correspondences( + self, + client: Client, + images: List[Future], + image_pairs: List[Tuple[int, int]], + num_sampled_3d_points: int = 5000, + ) -> Tuple[List[Keypoints], Dict[Tuple[int, int], np.ndarray]]: + """Apply the correspondence generator to generate putative correspondences (in parallel). + + Args: + client: Dask client, used to execute the front-end as futures. + images: List of all images, as futures. + image_pairs: Indices of the pairs of images to estimate two-view pose and correspondences. + num_sampled_3d_points: Number of 3d points to sample from the mesh surface and to project. + + Returns: + List of keypoints, with one entry for each input image. + Putative correspondences as indices of keypoints (N,2), for pairs of images (i1,i2). + """ + dataset_root = self._dataset_root + scene_name = self._scene_name + + img_dir = f"{dataset_root}/{scene_name}" + poses_fpath = f"{dataset_root}/{scene_name}_COLMAP_SfM.log" + lidar_ply_fpath = f"{dataset_root}/{scene_name}.ply" + colmap_ply_fpath = f"{dataset_root}/{scene_name}_COLMAP.ply" + ply_alignment_fpath = f"{dataset_root}/{scene_name}_trans.txt" + bounding_polyhedron_json_fpath = f"{dataset_root}/{scene_name}.json" + loader = TanksAndTemplesLoader( + img_dir=img_dir, + poses_fpath=poses_fpath, + lidar_ply_fpath=lidar_ply_fpath, + ply_alignment_fpath=ply_alignment_fpath, + bounding_polyhedron_json_fpath=bounding_polyhedron_json_fpath, + colmap_ply_fpath=colmap_ply_fpath, + ) + + mesh = loader.reconstruct_mesh() + + # Sample random 3d points. This sampling must occur only once, to avoid clusters from repeated sampling. + pcd = mesh.sample_points_uniformly(number_of_points=num_sampled_3d_points) + pcd = mesh.sample_points_poisson_disk(number_of_points=num_sampled_3d_points, pcl=pcd) + sampled_points = np.asarray(pcd.points) + + # TODO(johnwlambert): File Open3d bug to add pickle support for TriangleMesh. + open3d_mesh_path = tempfile.NamedTemporaryFile(suffix=".obj").name + open3d.io.write_triangle_mesh(filename=open3d_mesh_path, mesh=mesh) + + loader_future = client.scatter(loader, broadcast=False) + + # TODO(johnwlambert): Remove assumption that image pair shares the same image shape. + image_height_px, image_width_px, _ = loader.get_image(0).shape + + def apply_synthetic_corr_generator( + loader_: LoaderBase, + camera_i1: CAMERA_TYPE, + camera_i2: CAMERA_TYPE, + open3d_mesh_fpath: str, + points: np.ndarray, + ) -> Tuple[Keypoints, Keypoints]: + return generate_synthetic_correspondences_for_image_pair( + camera_i1, + camera_i2, + open3d_mesh_fpath, + points, + image_height_px=image_height_px, + image_width_px=image_width_px, + ) + + pairwise_correspondence_futures = { + (i1, i2): client.submit( + apply_synthetic_corr_generator, + loader_future, + loader.get_camera(index=i1), + loader.get_camera(index=i2), + open3d_mesh_path, + sampled_points, + ) + for i1, i2 in image_pairs + } + + pairwise_correspondences: Dict[Tuple[int, int], Tuple[Keypoints, Keypoints]] = client.gather( + pairwise_correspondence_futures + ) + + keypoints_list, putative_corr_idxs_dict = self._aggregator.aggregate(keypoints_dict=pairwise_correspondences) + return keypoints_list, putative_corr_idxs_dict + + +def generate_synthetic_correspondences_for_image_pair( + camera_i1: gtsfm_types.CAMERA_TYPE, + camera_i2: gtsfm_types.CAMERA_TYPE, + open3d_mesh_fpath: str, + points: np.ndarray, + image_height_px: int, + image_width_px: int, +) -> Tuple[Keypoints, Keypoints]: + """Generates synthetic correspondences for image pair. + + Args: + camera_i1: First camera. + camera_i2: Second camera. + open3d_mesh_fpath: Path to saved Open3d mesh. + points: 3d points sampled from mesh surface. + image_height_px: Image height, in pixels. + image_width_px: Image width, in pixels. + + Returns: + Tuple of `Keypoints` objects, one for each image in the input image pair. + """ + mesh = open3d.io.read_triangle_mesh(filename=open3d_mesh_fpath) + trimesh_mesh = load_from_trimesh(open3d_mesh_fpath) + + wTi_list = [camera_i1.pose(), camera_i2.pose()] + calibrations = [camera_i1.calibration(), camera_i2.calibration()] + + keypoints_i1 = [] + keypoints_i2 = [] + + # TODO(johnwlambert): Vectorize this code. On CPU, rays cannot be simultaneously cast against mesh + # due to RAM limitations. + for point in points: + # Try projecting point into each camera. If inside FOV of both and unoccluded by mesh, keep. + uv_i1 = verify_camera_fov_and_occlusion(camera_i1, point, trimesh_mesh, image_height_px, image_width_px) + uv_i2 = verify_camera_fov_and_occlusion(camera_i2, point, trimesh_mesh, image_height_px, image_width_px) + if uv_i1 is not None and uv_i2 is not None: + keypoints_i1.append(uv_i1) + keypoints_i2.append(uv_i2) + + visualize = False + if visualize: + visualize_ray_to_sampled_mesh_point(camera_i1, point, wTi_list, calibrations, mesh) + + keypoints_i1 = Keypoints(coordinates=np.array(keypoints_i1)) + keypoints_i2 = Keypoints(coordinates=np.array(keypoints_i2)) + print(f"Generated {len(keypoints_i1)} keypoints from sampled {points.shape[0]} 3d points.") + return keypoints_i1, keypoints_i2 + + +def visualize_ray_to_sampled_mesh_point( + camera: gtsfm_types.CAMERA_TYPE, + point: np.ndarray, + wTi_list: List[Pose3], + calibrations: List[gtsfm_types.CALIBRATION_TYPE], + mesh: open3d.geometry.TriangleMesh, +) -> None: + """Visualizes ray from camera center to 3d point, along with camera frustum, mesh, and 3d point as ball. + + Args: + camera: Camera to use. + point: 3d point as (3,) array. + wTi_list: All camera poses. + calibrations: Calibration for each camera. + mesh: 3d surface mesh. + """ + frustums = open3d_vis_utils.create_all_frustums_open3d(wTi_list, calibrations, frustum_ray_len=0.3) + + # Create line segment to represent ray. + cam_center = camera.pose().translation() + ray_dirs = point - camera.pose().translation() + line_set = _make_line_plot(cam_center, cam_center + ray_dirs) + # line_set = _make_line_plot(cam_center, camera.backproject(uv_reprojected, depth=1.0)) + + # Plot 3d point as red sphere. + point_cloud = np.reshape(point, (1, 3)) + rgb = np.array([255, 0, 0]).reshape(1, 3).astype(np.uint8) + spheres = open3d_vis_utils.create_colored_spheres_open3d(point_cloud, rgb, sphere_radius=0.5) + + # Plot all camera frustums and mesh, with sphere and ray line segment. + open3d.visualization.draw_geometries([mesh] + frustums + spheres + [line_set]) + + +def verify_camera_fov_and_occlusion( + camera: gtsfm_types.CAMERA_TYPE, + point: np.ndarray, + trimesh_mesh: trimesh.Trimesh, + image_height_px: int, + image_width_px: int, +) -> Optional[np.ndarray]: + """Verifies point lies within camera FOV and is unoccluded by mesh faces. + + Args: + camera: Camera to use. + point: 3d point as (3,) array. + trimesh_mesh: Trimesh mesh object to raycast against. + image_height_px: Image height, in pixels. + image_width_px: Image width, in pixels. + + Returns: + 2d keypoint as (2,) array. + """ + # Project to camera. + uv_reprojected, success_flag = camera.projectSafe(point) + # Check for projection error in camera. + if not success_flag: + return None + + if ( + (uv_reprojected[0] < 0) + or (uv_reprojected[0] > image_width_px) + or (uv_reprojected[1] < 0) + or (uv_reprojected[1] > image_height_px) + ): + # Outside of synthetic camera's FOV. + return None + + # Cast ray through keypoint back towards scene. + cam_center = camera.pose().translation() + # Choose an arbitrary depth for the ray direction. + ray_dirs = point - camera.pose().translation() + + # Returns the location of where a ray hits a surface mesh. + # keypoint_ind: (M,) array of keypoint indices whose corresponding ray intersected the ground truth mesh. + # intersections_locations: (M, 3), array of ray intersection locations. + intersections, keypoint_ind, _ = trimesh_mesh.ray.intersects_location( + ray_origins=cam_center.reshape(1, 3), ray_directions=ray_dirs.reshape(1, 3), multiple_hits=False + ) + + if intersections.shape[0] > 1 or intersections.shape[0] == 0: + print(f"Unknown failure: intersections= {intersections} with shape {intersections.shape}") + return None + + # TODO(johnwlambert): Tune this tolerance threshold to allow looser matches. + eps = 0.1 + if not np.linalg.norm(intersections[0] - point) < eps: + # print("Skip occluded: ", intersections[0], ", vs. : ", point) + # There was a closer intersection along the ray than `point`, meaning `point` is occluded by the mesh. + return None + + return uv_reprojected + + +def load_from_trimesh(mesh_path: str) -> trimesh.Trimesh: + """Read in scene mesh as Trimesh object.""" + if not Path(mesh_path).exists(): + raise FileNotFoundError(f"No mesh found at {mesh_path}") + mesh = trimesh.load(mesh_path, process=False, maintain_order=True) + logger.info( + "Tanks & Temples loader read in mesh with %d vertices and %d faces.", + mesh.vertices.shape[0], + mesh.faces.shape[0], + ) + return mesh + + +def _make_line_plot(point1: np.ndarray, point2: np.ndarray) -> open3d.geometry.LineSet: + """Plot a line segment from `point1` to `point2` using Open3D.""" + verts_worldfr = np.array([point1, point2]) + lines = [[0, 1]] + # Color is in range [0,1] + color = (0, 0, 1) + colors = [color for i in range(len(lines))] + + line_set = open3d.geometry.LineSet( + points=open3d.utility.Vector3dVector(verts_worldfr), + lines=open3d.utility.Vector2iVector(lines), + ) + line_set.colors = open3d.utility.Vector3dVector(colors) + return line_set diff --git a/gtsfm/loader/tanks_and_temples_loader.py b/gtsfm/loader/tanks_and_temples_loader.py index 84deade39..9f36e57fa 100644 --- a/gtsfm/loader/tanks_and_temples_loader.py +++ b/gtsfm/loader/tanks_and_temples_loader.py @@ -2,23 +2,24 @@ See https://www.tanksandtemples.org/download/ for more information. -Authors: John Lambert +Author: John Lambert """ +from enum import auto, Enum from pathlib import Path from typing import Dict, List, Optional import numpy as np import open3d -import trimesh from gtsam import Cal3Bundler, Rot3, Pose3 +import gtsfm.utils.geometry_comparisons as geom_comp_utils import gtsfm.utils.io as io_utils import gtsfm.utils.logger as logger_utils +import gtsfm.visualization.open3d_vis_utils as open3d_vis_utils from gtsfm.common.image import Image from gtsfm.loader.loader_base import LoaderBase -import gtsfm.utils.geometry_comparisons as geom_comp_utils -import gtsfm.visualization.open3d_vis_utils as open3d_vis_utils + logger = logger_utils.get_logger() @@ -27,6 +28,26 @@ _DEFAULT_IMAGE_WIDTH_PX = 1920 +class MeshReconstructionType(Enum): + """Mesh reconstruction algorithm choices. + + The alpha shape [Edelsbrunner1983] is a generalization of a convex hull. + See Edelsbrunner and D. G. Kirkpatrick and R. Seidel: On the shape of a set of points in the plane, 1983. + https://www.cs.jhu.edu/~misha/Fall13b/Papers/Edelsbrunner93.pdf + + The ball pivoting algorithm (BPA) [Bernardini1999]. + See https://www.cs.jhu.edu/~misha/Fall13b/Papers/Bernardini99.pdf + + The Poisson surface reconstruction method [Kazhdan2006]. + See: M.Kazhdan and M. Bolitho and H. Hoppe: Poisson surface reconstruction, Eurographics, 2006. + See https://hhoppe.com/poissonrecon.pdf + """ + + ALPHA_SHAPE = auto() + BALL_PIVOTING = auto() + POISSON_SURFACE = auto() + + class TanksAndTemplesLoader(LoaderBase): def __init__( self, @@ -131,19 +152,8 @@ def get_camera_intrinsics_full_res(self, index: int) -> Optional[Cal3Bundler]: if index < 0 or index >= len(self): raise IndexError("Image index is invalid") - # Use synthetic camera. - H = _DEFAULT_IMAGE_HEIGHT_PX - W = _DEFAULT_IMAGE_WIDTH_PX - # Principal point offset: - cx = W / 2 - cy = H / 2 - # Focal length: - fx = 0.7 * W - - # TODO(johnwlambert): Add Sony A7SM2 to sensor DB (35.6 x 23.8 mm), and get intrinsics from exif. - # intrinsics = io_utils.load_image(self._image_paths[index]).get_intrinsics_from_exif() - - intrinsics = Cal3Bundler(fx, k1=0.0, k2=0.0, u0=cx, v0=cy) + # Retrieve focal length from EXIF, and principal point will be `cx = IMG_W / 2`, `cy = IMG_H / 2`. + intrinsics = io_utils.load_image(self._image_paths[index]).get_intrinsics_from_exif() return intrinsics def get_camera_pose(self, index: int) -> Optional[Pose3]: @@ -178,9 +188,10 @@ def get_lidar_point_cloud(self, downsample_factor: int = 10) -> open3d.geometry. Point cloud captured by laser scanner, in the COLMAP world frame. """ if not Path(self.lidar_ply_fpath).exists(): - raise ValueError("") + raise ValueError("Cannot retrieve LiDAR scanned point cloud if `lidar_ply_fpath` not provided.") pcd = open3d.io.read_point_cloud(self.lidar_ply_fpath) points, rgb = open3d_vis_utils.convert_colored_open3d_point_cloud_to_numpy(pointcloud=pcd) + if downsample_factor > 1: points = points[::downsample_factor] rgb = rgb[::downsample_factor] @@ -201,14 +212,61 @@ def get_colmap_point_cloud(self, downsample_factor: int = 1) -> open3d.geometry. Point cloud reconstructed by COLMAP, in the COLMAP world frame. """ if not Path(self.colmap_ply_fpath).exists(): - raise ValueError("") + raise ValueError("Cannot retrieve COLMAP-reconstructed point cloud if `colmap_ply_fpath` not provided.") pcd = open3d.io.read_point_cloud(self.colmap_ply_fpath) points, rgb = open3d_vis_utils.convert_colored_open3d_point_cloud_to_numpy(pointcloud=pcd) + if downsample_factor > 1: points = points[::downsample_factor] rgb = rgb[::downsample_factor] return open3d_vis_utils.create_colored_point_cloud_open3d(point_cloud=points, rgb=rgb) + def reconstruct_mesh( + self, + crop_by_polyhedron: bool = True, + reconstruction_algorithm: MeshReconstructionType = MeshReconstructionType.ALPHA_SHAPE, + ) -> open3d.geometry.TriangleMesh: + """Reconstructs mesh from LiDAR PLY file. + + Args: + crop_by_polyhedron: Whether to crop by a manually specified polyhedron, vs. simply + by range from global origin. + reconstruction_algorithm: Mesh reconstruction algorithm to use, given input point cloud. + + Returns: + Reconstructed mesh. + """ + # Get LiDAR point cloud, in camera coordinate frame. + pcd = self.get_lidar_point_cloud() + if crop_by_polyhedron: + pass + # pcd = crop_points_to_bounding_polyhedron(pcd, self.bounding_polyhedron_json_fpath) + + points, rgb = open3d_vis_utils.convert_colored_open3d_point_cloud_to_numpy(pcd) + if not crop_by_polyhedron: + max_radius = 4.0 + valid = np.linalg.norm(points, axis=1) < max_radius + points = points[valid] + rgb = rgb[valid] + pcd = open3d_vis_utils.create_colored_point_cloud_open3d(points, rgb) + pcd.estimate_normals() + + if reconstruction_algorithm == MeshReconstructionType.ALPHA_SHAPE: + alpha = 0.1 # 0.03 + print(f"alpha={alpha:.3f}") + mesh = open3d.geometry.TriangleMesh.create_from_point_cloud_alpha_shape(pcd, alpha) + mesh.compute_vertex_normals() + + elif reconstruction_algorithm == MeshReconstructionType.BALL_PIVOTING: + radii = [0.005, 0.01, 0.02, 0.04] + mesh = open3d.geometry.TriangleMesh.create_from_point_cloud_ball_pivoting( + pcd, open3d.utility.DoubleVector(radii) + ) + + elif reconstruction_algorithm == MeshReconstructionType.POISSON_SURFACE: + mesh, densities = open3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=9) + return mesh + def crop_points_to_bounding_polyhedron(pcd: open3d.geometry.PointCloud, json_fpath: str) -> open3d.geometry.PointCloud: """Crops a point cloud according to JSON-specified polyhedron crop bounds. @@ -264,19 +322,6 @@ def _parse_redwood_data_log_file(log_fpath: str) -> Dict[int, Pose3]: return wTi_gt_dict -def load_from_trimesh(mesh_path: str) -> trimesh.Trimesh: - """Read in scene mesh as Trimesh object.""" - if not Path(mesh_path).exists(): - raise FileNotFoundError(f"No mesh found at {mesh_path}") - mesh = trimesh.load(mesh_path, process=False, maintain_order=True) - logger.info( - "Tanks & Temples loader read in mesh with %d vertices and %d faces.", - mesh.vertices.shape[0], - mesh.faces.shape[0], - ) - return mesh - - def _create_Sim3_from_tt_dataset_alignment_transform(lidar_Sim3_colmap: np.ndarray) -> np.ndarray: """Create member of Sim(3) matrix group given Tanks & Temples dataset alignment transform. diff --git a/gtsfm/multi_view_optimizer.py b/gtsfm/multi_view_optimizer.py index c14ba3f1b..03b14a1a9 100644 --- a/gtsfm/multi_view_optimizer.py +++ b/gtsfm/multi_view_optimizer.py @@ -123,7 +123,6 @@ def create_computation_graph( pruned_i2Ri1_graph, pruned_i2Ui1_graph = dask.delayed(graph_utils.prune_to_largest_connected_component, nout=2)( viewgraph_i2Ri1_graph, viewgraph_i2Ui1_graph, relative_pose_priors ) - delayed_wRi, rot_avg_metrics = self.rot_avg_module.create_computation_graph( num_images, pruned_i2Ri1_graph, i1Ti2_priors=relative_pose_priors, gt_wTi_list=gt_wTi_list ) diff --git a/gtsfm/runner/gtsfm_runner_base.py b/gtsfm/runner/gtsfm_runner_base.py index 018715d21..b58f6bb8a 100644 --- a/gtsfm/runner/gtsfm_runner_base.py +++ b/gtsfm/runner/gtsfm_runner_base.py @@ -344,7 +344,9 @@ def run(self) -> GtsfmData: ) two_view_estimation_duration_sec = time.time() - two_view_estimation_start_time - i2Ri1_dict, i2Ui1_dict, v_corr_idxs_dict, two_view_reports_dict = unzip_two_view_results(two_view_results_dict) + i2Ri1_dict, i2Ui1_dict, v_corr_idxs_dict, _, two_view_reports_dict = unzip_two_view_results( + two_view_results_dict + ) if self.scene_optimizer._save_two_view_correspondences_viz: for i1, i2 in v_corr_idxs_dict.keys(): @@ -419,25 +421,31 @@ def unzip_two_view_results( Dict[Tuple[int, int], Unit3], Dict[Tuple[int, int], np.ndarray], Dict[Tuple[int, int], TwoViewEstimationReport], + Dict[Tuple[int, int], TwoViewEstimationReport], ]: """Unzip the tuple TWO_VIEW_OUTPUT into 1 dictionary for 1 element in the tuple.""" i2Ri1_dict: Dict[Tuple[int, int], Rot3] = {} i2Ui1_dict: Dict[Tuple[int, int], Unit3] = {} v_corr_idxs_dict: Dict[Tuple[int, int], np.ndarray] = {} - two_view_reports_dict: Dict[Tuple[int, int], TwoViewEstimationReport] = {} + pre_ba_two_view_reports_dict: Dict[Tuple[int, int], TwoViewEstimationReport] = {} + post_isp_two_view_reports_dict: Dict[Tuple[int, int], TwoViewEstimationReport] = {} for (i1, i2), two_view_output in two_view_results.items(): + # Value is ordered as (post_isp_i2Ri1, post_isp_i2Ui1, post_isp_v_corr_idxs, + # pre_ba_report, post_ba_report, post_isp_report). i2Ri1 = two_view_output[0] i2Ui1 = two_view_output[1] if i2Ri1 is None or i2Ui1 is None: + print(f"Skip {i1},{i2} since None") continue i2Ri1_dict[(i1, i2)] = i2Ri1 i2Ui1_dict[(i1, i2)] = i2Ui1 v_corr_idxs_dict[(i1, i2)] = two_view_output[2] - two_view_reports_dict[(i1, i2)] = two_view_output[5] + pre_ba_two_view_reports_dict[(i1, i2)] = two_view_output[3] + post_isp_two_view_reports_dict[(i1, i2)] = two_view_output[5] - return i2Ri1_dict, i2Ui1_dict, v_corr_idxs_dict, two_view_reports_dict + return i2Ri1_dict, i2Ui1_dict, v_corr_idxs_dict, pre_ba_two_view_reports_dict, post_isp_two_view_reports_dict def save_metrics_reports(metrics_group_list: List[GtsfmMetricsGroup], metrics_path: str) -> None: diff --git a/gtsfm/runner/run_scene_optimizer_synthetic_tanks_and_temples.py b/gtsfm/runner/run_scene_optimizer_synthetic_tanks_and_temples.py new file mode 100644 index 000000000..bd647d4b3 --- /dev/null +++ b/gtsfm/runner/run_scene_optimizer_synthetic_tanks_and_temples.py @@ -0,0 +1,63 @@ +"""Runs GTSfM on a Tanks and Temples dataset, using synthetic correspondences. + +Author: John Lambert +""" + +import argparse + +import gtsfm.utils.logger as logger_utils +from gtsfm.loader.tanks_and_temples_loader import TanksAndTemplesLoader +from gtsfm.loader.loader_base import LoaderBase +from gtsfm.runner.gtsfm_runner_base import GtsfmRunnerBase + +logger = logger_utils.get_logger() + + +_TANKS_AND_TEMPLES_RESOLUTION_PX = 1080 + + +# TODO(johnwlambert,travisdriver): Make this generic for any dataset with a GT mesh. +class GtsfmRunnerSyntheticTanksAndTemplesLoader(GtsfmRunnerBase): + tag = "GTSFM with LiDAR scans, COLMAP camera poses, and image names stored in Tanks and Temples format" + + def construct_argparser(self) -> argparse.ArgumentParser: + parser = super().construct_argparser() + + parser.add_argument( + "--dataset_root", type=str, required=True, help="Path to zip file containing packaged data." + ) + parser.add_argument("--scene_name", type=str, required=True, help="Name of dataset scene.") + parser.add_argument( + "--max_num_images", + type=int, + default=None, + help="Optionally specifies the maximum number of images from the dataset to use for reconstruction.", + ) + return parser + + def construct_loader(self) -> LoaderBase: + dataset_root = self.parsed_args.dataset_root + scene_name = self.parsed_args.scene_name + + img_dir = f"{dataset_root}/{scene_name}" + poses_fpath = f"{dataset_root}/{scene_name}_COLMAP_SfM.log" + lidar_ply_fpath = f"{dataset_root}/{scene_name}.ply" + colmap_ply_fpath = f"{dataset_root}/{scene_name}_COLMAP.ply" + ply_alignment_fpath = f"{dataset_root}/{scene_name}_trans.txt" + bounding_polyhedron_json_fpath = f"{dataset_root}/{scene_name}.json" + loader = TanksAndTemplesLoader( + img_dir=img_dir, + poses_fpath=poses_fpath, + lidar_ply_fpath=lidar_ply_fpath, + ply_alignment_fpath=ply_alignment_fpath, + bounding_polyhedron_json_fpath=bounding_polyhedron_json_fpath, + colmap_ply_fpath=colmap_ply_fpath, + max_resolution=_TANKS_AND_TEMPLES_RESOLUTION_PX, + max_num_images=self.parsed_args.max_num_images, + ) + return loader + + +if __name__ == "__main__": + runner = GtsfmRunnerSyntheticTanksAndTemplesLoader() + runner.run() diff --git a/gtsfm/utils/graph.py b/gtsfm/utils/graph.py index 9b1f4f6f6..ec03bbe7c 100644 --- a/gtsfm/utils/graph.py +++ b/gtsfm/utils/graph.py @@ -10,9 +10,12 @@ import numpy as np from gtsam import PinholeCameraCal3Bundler, Rot3, Unit3 +import gtsfm.utils.logger as logger_utils from gtsfm.common.pose_prior import PosePrior from gtsfm.common.two_view_estimation_report import TwoViewEstimationReport +logger = logger_utils.get_logger() + GREEN = [0, 1, 0] RED = [1, 0, 0] @@ -32,9 +35,13 @@ def get_nodes_in_largest_connected_component(edges: List[Tuple[int, int]]) -> Li input_graph = nx.Graph() input_graph.add_edges_from(edges) - # get the largest connected component - largest_cc = max(nx.connected_components(input_graph), key=len) - subgraph = input_graph.subgraph(largest_cc).copy() + # Log the sizes of the connected components. + cc_sizes = [len(x) for x in sorted(list(nx.connected_components(input_graph)))] + logger.info("Connected component sizes: %s nodes.", str(cc_sizes)) + + # Get the largest connected component. + largest_cc_nodes = max(nx.connected_components(input_graph), key=len) + subgraph = input_graph.subgraph(largest_cc_nodes).copy() return list(subgraph.nodes()) diff --git a/gtsfm/visualization/open3d_vis_utils.py b/gtsfm/visualization/open3d_vis_utils.py index b7c0ea58e..806751074 100644 --- a/gtsfm/visualization/open3d_vis_utils.py +++ b/gtsfm/visualization/open3d_vis_utils.py @@ -203,3 +203,37 @@ def draw_scene_open3d( geometries = frustums + spheres open3d.visualization.draw_geometries(geometries) + + +def draw_scene_with_gt_open3d( + point_cloud: np.ndarray, + rgb: np.ndarray, + wTi_list: List[Pose3], + calibrations: List[Cal3Bundler], + gt_wTi_list: List[Pose3], + gt_calibrations: List[Cal3Bundler], + args: argparse.Namespace, +) -> None: + """Render GT camera frustums, estimated camera frustums, and a 3d point cloud, using Open3d. + + GT frustums are shown in a blue-purple colormap, whereas estimated frustums are shown in a red-green colormap. + + Args: + point_cloud: Array of shape (N,3) representing 3d points. + rgb: Uint8 array of shape (N,3) representing colors in RGB order, in the range [0,255]. + wTi_list: List of camera poses for each image. + calibrations: Calibration object for each camera. + gt_wTi_list: List of ground truth camera poses for each image. + gt_calibrations: Ground truth calibration object for each camera. + args: Rendering options. + """ + frustums = create_all_frustums_open3d(wTi_list, calibrations, args.frustum_ray_len, color_names=("red", "green")) + gt_frustums = create_all_frustums_open3d( + wTi_list, calibrations, args.frustum_ray_len, color_names=("blue", "purple") + ) + + # spheres = create_colored_spheres_open3d(point_cloud, rgb, args.sphere_radius) + pcd = create_colored_point_cloud_open3d(point_cloud, rgb) + geometries = frustums + gt_frustums + [pcd] # + spheres + + open3d.visualization.draw_geometries(geometries) diff --git a/gtsfm/visualization/view_scene_tanks_and_temples.py b/gtsfm/visualization/view_scene_tanks_and_temples.py index 14ff1786c..2619fc63d 100644 --- a/gtsfm/visualization/view_scene_tanks_and_temples.py +++ b/gtsfm/visualization/view_scene_tanks_and_temples.py @@ -7,12 +7,19 @@ import os from pathlib import Path +import matplotlib.pyplot as plt import numpy as np import open3d +from gtsam import Cal3Bundler +from dask.distributed import Client, LocalCluster import gtsfm.visualization.open3d_vis_utils as open3d_vis_utils +from gtsfm.frontend.correspondence_generator.synthetic_correspondence_generator import SyntheticCorrespondenceGenerator from gtsfm.loader.tanks_and_temples_loader import TanksAndTemplesLoader +import gtsfm.utils.viz as correspondence_viz_utils + + TEST_DATA_ROOT = Path(__file__).resolve().parent.parent.parent / "tests" / "data" @@ -31,7 +38,6 @@ def view_scene(args: argparse.Namespace) -> None: lidar_ply_fpath = data_root / f"{args.scene_name}.ply" colmap_ply_fpath = data_root / f"{args.scene_name}_COLMAP.ply" - # Note: PLY files are not provided here, as they are too large to include as test data (300 MB each). loader = TanksAndTemplesLoader( img_dir=str(img_dir), poses_fpath=str(poses_fpath), @@ -42,14 +48,14 @@ def view_scene(args: argparse.Namespace) -> None: ) # Both are loaded in the COLMAP world coordinate frame. - lidar_pcd = loader.get_lidar_point_cloud(downsample_factor=1000) + lidar_pcd = loader.get_lidar_point_cloud(downsample_factor=1) overlay_point_clouds = False if overlay_point_clouds: red = np.zeros_like(lidar_pcd.colors) red[:, 0] = 1.0 lidar_pcd.colors = open3d.utility.Vector3dVector(red) - colmap_pcd = loader.get_colmap_point_cloud(downsample_factor=100) + colmap_pcd = loader.get_colmap_point_cloud(downsample_factor=1) calibrations = [loader.get_camera_intrinsics_full_res(0)] * len(loader) wTi_list = loader.get_gt_poses() @@ -58,11 +64,89 @@ def view_scene(args: argparse.Namespace) -> None: geometries = frustums + [colmap_pcd] + [lidar_pcd] open3d.visualization.draw_geometries(geometries) + visualize_mesh = True + if visualize_mesh: + mesh = loader.reconstruct_mesh() + open3d.visualization.draw_geometries([mesh], mesh_show_back_face=True) + + visualize_mesh_sampled_points = True + if visualize_mesh_sampled_points: + num_sampled_3d_points = 20000 + pcd = mesh.sample_points_uniformly(number_of_points=num_sampled_3d_points) + # pcd = mesh.sample_points_poisson_disk(number_of_points=num_sampled_3d_points, pcl=pcd) + open3d.visualization.draw_geometries([pcd]) + + visualize_projected_points = False + if visualize_projected_points: + i1 = 0 + camera_i1 = loader.get_camera(index=i1) + img_i1 = loader.get_image_full_res(index=i1) + + # Project LiDAR point cloud into image 1. + pcd = loader.get_lidar_point_cloud(downsample_factor=10) + lidar_points = np.asarray(pcd.points) + keypoints_i1 = _project_points_onto_image(lidar_points, camera_i1) + + # Plot projected LiDAR points. + plt.imshow(img_i1.value_array.astype(np.uint8)) + plt.scatter(keypoints_i1[:, 0], keypoints_i1[:, 1], 10, color="r", marker=".", alpha=0.007) + plt.show() + + # Project mesh vertices into image 1. + mesh = loader.reconstruct_mesh() + mesh_points = np.asarray(mesh.vertices) + keypoints_i1_ = _project_points_onto_image(mesh_points, camera_i1) + # Plot projected mesh points. + plt.imshow(img_i1.value_array.astype(np.uint8)) + plt.scatter(keypoints_i1_[:, 0], keypoints_i1_[:, 1], 10, color="g", marker=".", alpha=0.007) + plt.show() + + visualize_synthetic_correspondences = True + if visualize_synthetic_correspondences: + cluster = LocalCluster(n_workers=1, threads_per_worker=1) + client = Client(cluster) + + i1 = 0 + i2 = 4 + + synthetic_generator = SyntheticCorrespondenceGenerator(args.data_root, args.scene_name) + images = [loader.get_image_full_res(index=i) for i in range(i2 + 1)] + keypoints_list, corr_idx_dict = synthetic_generator.generate_correspondences( + client=client, + images=images, + image_pairs=[[i1, i2]], + num_sampled_3d_points=500, + ) + + plot_img = correspondence_viz_utils.plot_twoview_correspondences( + images[i1], + images[i2], + keypoints_list[i1], + keypoints_list[i2], + corr_idx_dict[(i1, i2)], + inlier_mask=np.ones(len(corr_idx_dict[(i1, i2)]), dtype=bool), + ) + plt.imshow(plot_img.value_array) + plt.axis("off") + plt.show() + + +def _project_points_onto_image(points: np.ndarray, camera: Cal3Bundler) -> np.ndarray: + """Returns (N,2) array of keypoint coordinates.""" + keypoints = [] + for point in points: + keypoints.append(camera.projectSafe(point)[0]) + + return np.array(keypoints) + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Visualize Tanks & Temples dataset w/ Open3d.") parser.add_argument( - "--data_root", type=str, default=os.path.join(TEST_DATA_ROOT, "tanks_and_temples_barn"), help="" + "--data_root", + type=str, + default=os.path.join(TEST_DATA_ROOT, "tanks_and_temples_barn"), + help="Path to data for a specific Tanks & Temples scene.", ) parser.add_argument("--scene_name", type=str, default="Barn") parser.add_argument( diff --git a/tests/loader/test_tanks_and_temples_loader.py b/tests/loader/test_tanks_and_temples_loader.py index 83a34e8ec..6fd162516 100644 --- a/tests/loader/test_tanks_and_temples_loader.py +++ b/tests/loader/test_tanks_and_temples_loader.py @@ -1,13 +1,17 @@ -"""Unit tests for Tanks & Temple dataset loader.""" +"""Unit tests for Tanks & Temple dataset loader. +Author: John Lambert +""" import unittest from pathlib import Path import numpy as np -from gtsam import Cal3Bundler +from gtsam import Cal3Bundler, Unit3 +import gtsfm.utils.geometry_comparisons as geom_comp_utils from gtsfm.common.image import Image from gtsfm.loader.tanks_and_temples_loader import TanksAndTemplesLoader +from gtsfm.frontend.verifier.loransac import LoRansac _TEST_DATA_ROOT = Path(__file__).resolve().parent.parent / "data" / "tanks_and_temples_barn" @@ -88,3 +92,57 @@ def test_get_image_full_res(self) -> None: assert image.height == 1080 assert image.width == 1920 + + def test_synthetic_correspondences_have_zero_two_view_error(self) -> None: + # Skip this test in the CI, and only uncomment it to run it locally, since it requires PLY. + return + # Compute 2-view error using a front-end. + verifier = LoRansac(use_intrinsics_in_verification=True, estimation_threshold_px=0.5) + + i1 = 0 + i2 = 1 + + keypoints_list, match_indices_dict = self.loader.generate_synthetic_correspondences( + images=[], image_pairs=[(i1, i2)] + ) + keypoints_i1, keypoints_i2 = keypoints_list + + wTi1 = self.loader.get_camera_pose(index=i1) + wTi2 = self.loader.get_camera_pose(index=i2) + + i2Ti1 = wTi2.between(wTi1) + i2Ri1_expected = i2Ti1.rotation() + i2Ui1_expected = Unit3(i2Ti1.translation()) + + camera_intrinsics_i1 = self.loader.get_camera_intrinsics_full_res(index=i1) + camera_intrinsics_i2 = self.loader.get_camera_intrinsics_full_res(index=i2) + + i2Ri1_computed, i2Ui1_computed, verified_indices_computed, _ = verifier.verify( + keypoints_i1, + keypoints_i2, + match_indices_dict[(i1, i2)], + camera_intrinsics_i1, + camera_intrinsics_i2, + ) + + rot_angular_err = geom_comp_utils.compute_relative_rotation_angle(i2Ri1_expected, i2Ri1_computed) + direction_angular_err = geom_comp_utils.compute_relative_unit_translation_angle(i2Ui1_expected, i2Ui1_computed) + + print(f"Errors: rotation {rot_angular_err:.2f}, direction {direction_angular_err:.2f}") + # if i2Ri1_expected is None: + # self.assertIsNone(i2Ri1_computed) + # else: + # angular_err = geom_comp_utils.compute_relative_rotation_angle(i2Ri1_expected, i2Ri1_computed) + # self.assertLess( + # angular_err, + # ROTATION_ANGULAR_ERROR_DEG_THRESHOLD, + # msg=f"Angular error {angular_err:.1f} vs. tol. {ROTATION_ANGULAR_ERROR_DEG_THRESHOLD:.1f}", + # ) + # if i2Ui1_expected is None: + # self.assertIsNone(i2Ui1_computed) + # else: + # self.assertLess( + # geom_comp_utils.compute_relative_unit_translation_angle(i2Ui1_expected, i2Ui1_computed), + # DIRECTION_ANGULAR_ERROR_DEG_THRESHOLD, + # ) +