Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 32 additions & 11 deletions airo-dataset-tools/airo_dataset_tools/coco_tools/merge_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,37 @@ def merge_coco_image_folders(dataset1_base_dir: str, dataset2_base_dir: str, tar
dataset2_base_dir_path = pathlib.Path(dataset2_base_dir)
target_dir_path = pathlib.Path(target_dir)

dataset1_image_paths = [image_path for image_path in dataset1_base_dir_path.iterdir()]
dataset2_image_paths = [image_path for image_path in dataset2_base_dir_path.iterdir()]

target_image_dir = target_dir_path / "images"
# find all images in the base dirs
# iteratively search for all images in the base dirs and subdirs
import glob
dataset1_image_paths = [pathlib.Path(image_path) for image_path in glob.glob(str(dataset1_base_dir_path / "**" / "*"), recursive=True)]
dataset2_image_paths = [pathlib.Path(image_path) for image_path in glob.glob(str(dataset2_base_dir_path / "**" / "*"), recursive=True)]

print(len(dataset1_image_paths), len(dataset2_image_paths))
# remove all non image files
dataset1_image_paths = [image_path for image_path in dataset1_image_paths if image_path.suffix in [".jpg", ".jpeg", ".png"]]
dataset2_image_paths = [image_path for image_path in dataset2_image_paths if image_path.suffix in [".jpg", ".jpeg", ".png"]]

target_image_dir = target_dir_path
target_image_dir.mkdir(parents=True, exist_ok=True)

for image_path in tqdm.tqdm(
dataset1_image_paths, desc=f"copying images from {dataset1_base_dir_path.name} to {target_dir_path.name}"
):
shutil.copy(image_path, target_image_dir / image_path.name)
# ensure directory exists
if not (target_image_dir / image_path.relative_to(dataset1_base_dir_path)).parent.exists():
(target_image_dir / image_path.relative_to(dataset1_base_dir_path)).parent.mkdir(parents=True, exist_ok=True)

shutil.copy(image_path, target_image_dir / image_path.relative_to(dataset1_base_dir_path))

for image_path in tqdm.tqdm(
dataset2_image_paths, desc=f"copying images from {dataset2_base_dir_path.name} to {target_dir_path.name}"
):
if not (target_image_dir / image_path.name).exists():
shutil.copy(image_path, target_image_dir / image_path.name)
dataset2_image_paths, desc=f"copying images from {dataset2_base_dir_path.name} to {target_dir_path}"
): # ensure directory exists
if not (target_image_dir / image_path.relative_to(dataset2_base_dir_path)).parent.exists():
(target_image_dir / image_path.relative_to(dataset2_base_dir_path)).parent.mkdir(parents=True, exist_ok=True)

if not (target_image_dir / image_path.relative_to(dataset2_base_dir_path)).exists():
shutil.copy(image_path, target_image_dir / image_path.relative_to(dataset2_base_dir_path))


def merge_coco_datasets(json_path_1: str, json_path_2: str, target_json_path: str) -> None:
Expand All @@ -102,8 +117,14 @@ def merge_coco_datasets(json_path_1: str, json_path_2: str, target_json_path: st

Annotation IDs will be changed to avoid conflicts and their image IDs will be updated if needed."""

image_path_1 = pathlib.Path(json_path_1).parent / "images"
image_path_2 = pathlib.Path(json_path_2).parent / "images"
# find the base image dir
# take image path from coco, first dir after parent dir of json is the base image dir
# load the jsons


image_path_1 = pathlib.Path(json_path_1).parent
image_path_2 = pathlib.Path(json_path_2).parent
print(image_path_1, image_path_2)

merge_coco_image_folders(str(image_path_1), str(image_path_2), str(pathlib.Path(target_json_path).parent))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,23 @@ def apply_transform_to_coco_dataset( # type: ignore # noqa: C901
coco_dataset = CocoKeypointsDataset(**coco_dataset.model_dump(exclude_none=False))
transform_keypoints = all(annotation.keypoints is not None for annotation in coco_dataset.annotations)

except PydanticValidationError:
except PydanticValidationError as e:
print("not transforming keypoints due to pydantic validation error")
print(e)
transform_keypoints = False

# check if bboxes and masks are present
transform_bbox = all(annotation.bbox is not None for annotation in coco_dataset.annotations)
transform_segmentation = all(annotation.segmentation is not None for annotation in coco_dataset.annotations)

# check if seg masks are not empty arrays
for annotation in coco_dataset.annotations:
print(annotation.segmentation)
if isinstance(annotation.segmentation, list) and len(annotation.segmentation) == 0:
print("Empty segmentation mask found. Skipping segmentation transformation.")
transform_segmentation = False
break

print(f"Transforming keypoints = {transform_keypoints}")
print(f"Transforming bbox = {transform_bbox}")
print(f"Transforming segmentation = {transform_segmentation}")
Expand Down
7 changes: 7 additions & 0 deletions airo-dataset-tools/airo_dataset_tools/data_parsers/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,17 @@ def keypoints_coordinates_must_be_in_pixel_space(cls, v: Keypoints) -> Keypoints

@model_validator(mode="after")
def num_keypoints_matches_amount_of_labeled_keypoints(self) -> "CocoKeypointAnnotation":


labeled_keypoints = 0
for v in self.keypoints[2::3]:
if v > 0:
labeled_keypoints += 1

# if num_keypoints is not set, set it to the number of labeled keypoints
if self.num_keypoints is None:
self.num_keypoints = labeled_keypoints

assert (
labeled_keypoints == self.num_keypoints
), f"num_keypoints {self.num_keypoints} does not match number of labeled of keypoints {labeled_keypoints} for annotation {self.id}"
Expand Down
Loading