We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hi, great efforts open-sourcing the dataset. I'm wondering how I should get the world coordinate of the gripper with the raw data.
I have the following now:
def extract_extrinsics(pose: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Takes a vector with dimension 6 and extracts the translation vector and the rotation matrix""" translation = pose[:3] rotation = Rotation.from_euler("xyz", np.array(pose[3:])).as_matrix() return (translation, rotation) json_metadata = "/mnt/disks/data/droid_raw/1.0.1/AUTOLab/failure/2023-07-31/Mon_Jul_31_19:06:17_2023/metadata_AUTOLab+5d05c5aa+2023-07-31-19h-06m-17s.json" with open(json_metadata, 'r') as f: data = json.load(f) wrist_cam_serial = data['wrist_cam_serial'] ext1_cam_serial = data['ext1_cam_serial'] ext2_cam_serial = data['ext2_cam_serial'] wrist_cam_extrinsics = data['wrist_cam_extrinsics'] ext1_cam_extrinsics = data['ext1_cam_extrinsics'] ext2_cam_extrinsics = data['ext2_cam_extrinsics'] wrist_svo_path = f"recordings/SVO/{wrist_cam_serial}.svo" ext1_svo_path = f"recordings/SVO/{ext1_cam_serial}.svo" ext2_svo_path = f"recordings/SVO/{ext2_cam_serial}.svo" trajectory_path = "/mnt/disks/data/droid_raw/1.0.1/AUTOLab/failure/2023-07-31/Mon_Jul_31_19:06:17_2023/trajectory.h5" with h5py.File(trajectory_path, 'r') as f: action = f['action'] observation = f['observation'] cartesian_position = action['cartesian_position'][:] camera_extrinsics = observation['camera_extrinsics'] gripper_offset = camera_extrinsics[f"{wrist_cam_serial}_left_gripper_offset"][:] wrist_camera_extrinsics_all = camera_extrinsics[f"{wrist_cam_serial}_left"][:] timestep = 0 trans, rot = extract_extrinsics(cartesian_position[timestep]) gripper_extrinsics_mat = np.zeros((4, 4)) # gripper to world gripper_extrinsics_mat[:3, :3] = rot gripper_extrinsics_mat[:3, 3] = trans gripper_extrinsics_mat[3, 3] = 1.0 # Get camera intrinsics wrist_cam_intrinsics = np.loadtxt("wrist_left_intrinsic_mat.txt", delimiter=',') # Transform trans to the pixel coordinate # wrist_cam_trans, wrist_cam_rot = extract_extrinsics(np.array(wrist_cam_extrinsics)) wrist_cam_trans, wrist_cam_rot = extract_extrinsics(wrist_camera_extrinsics_all[timestep]) wrist_cam_extrinsics_mat = np.zeros((4, 4)) # wrist cam to world wrist_cam_extrinsics_mat[:3, :3] = wrist_cam_rot wrist_cam_extrinsics_mat[:3, 3] = wrist_cam_trans wrist_cam_extrinsics_mat[3, 3] = 1.0 # Convert robot base coordinates to camera coordinates gripper_camera_pos = wrist_cam_extrinsics_mat @ np.linalg.inv(gripper_extrinsics_mat) @ np.array([0.0, 0.0, 0.0, 1.0]) gripper_camera_pos = gripper_camera_pos[:3] / gripper_camera_pos[3] # Use the padded intrinsics matrix for the projection pixel_coords = wrist_cam_intrinsics @ gripper_camera_pos # Normalize the homogeneous coordinates pixel_coords = pixel_coords[:2] / pixel_coords[-1] print(f"Pixel coordinates: {pixel_coords}") # Get the first frame of the wrist cam wrist_video_path = os.path.join("/mnt/disks/data/droid_raw/1.0.1/AUTOLab", data["wrist_mp4_path"]) # Open the video file cap = cv2.VideoCapture(wrist_video_path) # Read the first frame ret, frame = cap.read() if ret: # Convert BGR to RGB (OpenCV uses BGR by default) frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Ensure the frame is in the range 0-255 frame_rgb = frame_rgb.astype(np.uint8) else: print("Failed to read the first frame from the video.") # Release the video capture object cap.release() print(frame_rgb.shape) cv2.circle(frame_rgb, (int(pixel_coords[0]), int(pixel_coords[1])), 10, (255, 0, 0), 2) cv2.imwrite("frame_rgb.png", cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR))
However, the pixel coordinate obtained is pretty absurd: Pixel coordinates: [-19944.78101899 14879.94500748]
Pixel coordinates: [-19944.78101899 14879.94500748]
I can confirm the camera coordinate --> pixel coordinate is correct, so the issue must be in the gripper position in the camera coordinate.
What might I have done wrong? Many thanks in advance!
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Hi, great efforts open-sourcing the dataset. I'm wondering how I should get the world coordinate of the gripper with the raw data.
I have the following now:
However, the pixel coordinate obtained is pretty absurd:
Pixel coordinates: [-19944.78101899 14879.94500748]
I can confirm the camera coordinate --> pixel coordinate is correct, so the issue must be in the gripper position in the camera coordinate.
What might I have done wrong? Many thanks in advance!
The text was updated successfully, but these errors were encountered: