diff --git a/airo-dataset-tools/airo_dataset_tools/coco_tools/merge_datasets.py b/airo-dataset-tools/airo_dataset_tools/coco_tools/merge_datasets.py index 37ecc4b0..fb9b41b8 100644 --- a/airo-dataset-tools/airo_dataset_tools/coco_tools/merge_datasets.py +++ b/airo-dataset-tools/airo_dataset_tools/coco_tools/merge_datasets.py @@ -77,22 +77,37 @@ def merge_coco_image_folders(dataset1_base_dir: str, dataset2_base_dir: str, tar dataset2_base_dir_path = pathlib.Path(dataset2_base_dir) target_dir_path = pathlib.Path(target_dir) - dataset1_image_paths = [image_path for image_path in dataset1_base_dir_path.iterdir()] - dataset2_image_paths = [image_path for image_path in dataset2_base_dir_path.iterdir()] - - target_image_dir = target_dir_path / "images" + # find all images in the base dirs + # iteratively search for all images in the base dirs and subdirs + import glob + dataset1_image_paths = [pathlib.Path(image_path) for image_path in glob.glob(str(dataset1_base_dir_path / "**" / "*"), recursive=True)] + dataset2_image_paths = [pathlib.Path(image_path) for image_path in glob.glob(str(dataset2_base_dir_path / "**" / "*"), recursive=True)] + + print(len(dataset1_image_paths), len(dataset2_image_paths)) + # remove all non image files + dataset1_image_paths = [image_path for image_path in dataset1_image_paths if image_path.suffix in [".jpg", ".jpeg", ".png"]] + dataset2_image_paths = [image_path for image_path in dataset2_image_paths if image_path.suffix in [".jpg", ".jpeg", ".png"]] + + target_image_dir = target_dir_path target_image_dir.mkdir(parents=True, exist_ok=True) for image_path in tqdm.tqdm( dataset1_image_paths, desc=f"copying images from {dataset1_base_dir_path.name} to {target_dir_path.name}" ): - shutil.copy(image_path, target_image_dir / image_path.name) + # ensure directory exists + if not (target_image_dir / image_path.relative_to(dataset1_base_dir_path)).parent.exists(): + (target_image_dir / image_path.relative_to(dataset1_base_dir_path)).parent.mkdir(parents=True, exist_ok=True) + + shutil.copy(image_path, target_image_dir / image_path.relative_to(dataset1_base_dir_path)) for image_path in tqdm.tqdm( - dataset2_image_paths, desc=f"copying images from {dataset2_base_dir_path.name} to {target_dir_path.name}" - ): - if not (target_image_dir / image_path.name).exists(): - shutil.copy(image_path, target_image_dir / image_path.name) + dataset2_image_paths, desc=f"copying images from {dataset2_base_dir_path.name} to {target_dir_path}" + ): # ensure directory exists + if not (target_image_dir / image_path.relative_to(dataset2_base_dir_path)).parent.exists(): + (target_image_dir / image_path.relative_to(dataset2_base_dir_path)).parent.mkdir(parents=True, exist_ok=True) + + if not (target_image_dir / image_path.relative_to(dataset2_base_dir_path)).exists(): + shutil.copy(image_path, target_image_dir / image_path.relative_to(dataset2_base_dir_path)) def merge_coco_datasets(json_path_1: str, json_path_2: str, target_json_path: str) -> None: @@ -102,8 +117,14 @@ def merge_coco_datasets(json_path_1: str, json_path_2: str, target_json_path: st Annotation IDs will be changed to avoid conflicts and their image IDs will be updated if needed.""" - image_path_1 = pathlib.Path(json_path_1).parent / "images" - image_path_2 = pathlib.Path(json_path_2).parent / "images" + # find the base image dir + # take image path from coco, first dir after parent dir of json is the base image dir + # load the jsons + + + image_path_1 = pathlib.Path(json_path_1).parent + image_path_2 = pathlib.Path(json_path_2).parent + print(image_path_1, image_path_2) merge_coco_image_folders(str(image_path_1), str(image_path_2), str(pathlib.Path(target_json_path).parent)) diff --git a/airo-dataset-tools/airo_dataset_tools/coco_tools/transform_dataset.py b/airo-dataset-tools/airo_dataset_tools/coco_tools/transform_dataset.py index 1a193f29..623f7e12 100644 --- a/airo-dataset-tools/airo_dataset_tools/coco_tools/transform_dataset.py +++ b/airo-dataset-tools/airo_dataset_tools/coco_tools/transform_dataset.py @@ -41,11 +41,23 @@ def apply_transform_to_coco_dataset( # type: ignore # noqa: C901 coco_dataset = CocoKeypointsDataset(**coco_dataset.model_dump(exclude_none=False)) transform_keypoints = all(annotation.keypoints is not None for annotation in coco_dataset.annotations) - except PydanticValidationError: + except PydanticValidationError as e: + print("not transforming keypoints due to pydantic validation error") + print(e) transform_keypoints = False + # check if bboxes and masks are present transform_bbox = all(annotation.bbox is not None for annotation in coco_dataset.annotations) transform_segmentation = all(annotation.segmentation is not None for annotation in coco_dataset.annotations) + + # check if seg masks are not empty arrays + for annotation in coco_dataset.annotations: + print(annotation.segmentation) + if isinstance(annotation.segmentation, list) and len(annotation.segmentation) == 0: + print("Empty segmentation mask found. Skipping segmentation transformation.") + transform_segmentation = False + break + print(f"Transforming keypoints = {transform_keypoints}") print(f"Transforming bbox = {transform_bbox}") print(f"Transforming segmentation = {transform_segmentation}") diff --git a/airo-dataset-tools/airo_dataset_tools/data_parsers/coco.py b/airo-dataset-tools/airo_dataset_tools/data_parsers/coco.py index 555018a7..0b5a4925 100644 --- a/airo-dataset-tools/airo_dataset_tools/data_parsers/coco.py +++ b/airo-dataset-tools/airo_dataset_tools/data_parsers/coco.py @@ -147,10 +147,17 @@ def keypoints_coordinates_must_be_in_pixel_space(cls, v: Keypoints) -> Keypoints @model_validator(mode="after") def num_keypoints_matches_amount_of_labeled_keypoints(self) -> "CocoKeypointAnnotation": + + labeled_keypoints = 0 for v in self.keypoints[2::3]: if v > 0: labeled_keypoints += 1 + + # if num_keypoints is not set, set it to the number of labeled keypoints + if self.num_keypoints is None: + self.num_keypoints = labeled_keypoints + assert ( labeled_keypoints == self.num_keypoints ), f"num_keypoints {self.num_keypoints} does not match number of labeled of keypoints {labeled_keypoints} for annotation {self.id}"