diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c19c580..c20585b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -44,6 +44,6 @@ jobs: --no-site-packages \ --cache-dir=/dev/null - # - name: Run tests - # run: | - # poetry run pytest --color=yes -rf + - name: Run tests + run: | + poetry run pytest --color=yes -rf diff --git a/MSCOCO.py b/MSCOCO.py index 80b1b8a..7b8d034 100644 --- a/MSCOCO.py +++ b/MSCOCO.py @@ -1,3 +1,4 @@ +import abc import json import logging import os @@ -89,6 +90,104 @@ }, } +CATEGORIES: Final[List[str]] = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] + +SUPER_CATEGORIES: Final[List[str]] = [ + "person", + "vehicle", + "outdoor", + "animal", + "accessory", + "sports", + "kitchen", + "food", + "furniture", + "electronic", + "appliance", + "indoor", +] + @dataclass class AnnotationInfo(object): @@ -250,21 +349,21 @@ def from_dict( image_data = images[image_id] iscrowd = bool(json_dict["iscrowd"]) - if decode_rle: - segmentation_mask = cls.rle_segmentation_to_mask( + segmentation_mask = ( + cls.rle_segmentation_to_mask( segmentation=segmentation, iscrowd=iscrowd, height=image_data.height, width=image_data.width, ) - assert segmentation_mask.shape == image_data.shape - else: - segmentation_mask = cls.compress_rle( + if decode_rle + else cls.compress_rle( segmentation=segmentation, iscrowd=iscrowd, height=image_data.height, width=image_data.width, ) + ) return cls( # # for AnnotationData @@ -327,22 +426,21 @@ def from_dict( image_data = images[image_id] iscrowd = bool(json_dict["iscrowd"]) - if decode_rle: - segmentation_mask = cls.rle_segmentation_to_mask( + segmentation_mask = ( + cls.rle_segmentation_to_mask( segmentation=segmentation, iscrowd=iscrowd, height=image_data.height, width=image_data.width, ) - assert segmentation_mask.shape == image_data.shape - else: - segmentation_mask = cls.compress_rle( + if decode_rle + else cls.compress_rle( segmentation=segmentation, iscrowd=iscrowd, height=image_data.height, width=image_data.width, ) - + ) flatten_keypoints = json_dict["keypoints"] num_keypoints = json_dict["num_keypoints"] keypoints = cls.get_person_keypoints(flatten_keypoints, num_keypoints) @@ -397,32 +495,6 @@ class CaptionExample(BaseExample): annotations: List[CaptionAnnotationDict] -def generate_captions_examples( - image_dir: str, - images: Dict[ImageId, ImageData], - annotations: Dict[ImageId, List[CaptionsAnnotationData]], - licenses: Dict[LicenseId, LicenseData], -) -> Iterator[Tuple[int, CaptionExample]]: - for idx, image_id in enumerate(images.keys()): - image_data = images[image_id] - image_anns = annotations[image_id] - - assert len(image_anns) > 0 - - image = _load_image( - image_path=os.path.join(image_dir, image_data.file_name), - ) - example = asdict(image_data) - example["image"] = image - example["license"] = asdict(licenses[image_data.license_id]) - - example["annotations"] = [] - for ann in image_anns: - example["annotations"].append(asdict(ann)) - - yield idx, example # type: ignore - - class CategoryDict(TypedDict): category_id: CategoryId name: str @@ -444,38 +516,6 @@ class InstanceExample(BaseExample): annotations: List[InstanceAnnotationDict] -def generate_instances_examples( - image_dir: str, - images: Dict[ImageId, ImageData], - annotations: Dict[ImageId, List[InstancesAnnotationData]], - licenses: Dict[LicenseId, LicenseData], - categories: Dict[CategoryId, CategoryData], -) -> Iterator[Tuple[int, InstanceExample]]: - for idx, image_id in enumerate(images.keys()): - image_data = images[image_id] - image_anns = annotations[image_id] - - if len(image_anns) < 1: - logger.warning(f"No annotation found for image id: {image_id}.") - continue - - image = _load_image( - image_path=os.path.join(image_dir, image_data.file_name), - ) - example = asdict(image_data) - example["image"] = image - example["license"] = asdict(licenses[image_data.license_id]) - - example["annotations"] = [] - for ann in image_anns: - ann_dict = asdict(ann) - category = categories[ann.category_id] - ann_dict["category"] = asdict(category) - example["annotations"].append(ann_dict) - - yield idx, example # type: ignore - - class KeypointDict(TypedDict): x: int y: int @@ -492,37 +532,300 @@ class PersonKeypointExample(BaseExample): annotations: List[PersonKeypointAnnotationDict] -def generate_person_keypoints_examples( - image_dir: str, - images: Dict[ImageId, ImageData], - annotations: Dict[ImageId, List[PersonKeypointsAnnotationData]], - licenses: Dict[LicenseId, LicenseData], - categories: Dict[CategoryId, CategoryData], -) -> Iterator[Tuple[int, PersonKeypointExample]]: - for idx, image_id in enumerate(images.keys()): - image_data = images[image_id] - image_anns = annotations[image_id] +class MsCocoProcessor(object, metaclass=abc.ABCMeta): + def load_image(self, image_path: str) -> PilImage: + return Image.open(image_path) + + def load_annotation_json(self, ann_file_path: str) -> JsonDict: + logger.info(f"Load annotation json from {ann_file_path}") + with open(ann_file_path, "r") as rf: + ann_json = json.load(rf) + return ann_json - if len(image_anns) < 1: - # If there are no persons in the image, - # no keypoint annotations will be assigned. - continue + def load_licenses_data( + self, license_dicts: List[JsonDict] + ) -> Dict[LicenseId, LicenseData]: + licenses = {} + for license_dict in license_dicts: + license_data = LicenseData.from_dict(license_dict) + licenses[license_data.license_id] = license_data + return licenses + + def load_images_data( + self, + image_dicts: List[JsonDict], + tqdm_desc: str = "Load images", + ) -> Dict[ImageId, ImageData]: + images = {} + for image_dict in tqdm(image_dicts, desc=tqdm_desc): + image_data = ImageData.from_dict(image_dict) + images[image_data.image_id] = image_data + return images + + def load_categories_data( + self, + category_dicts: List[JsonDict], + tqdm_desc: str = "Load categories", + ) -> Dict[CategoryId, CategoryData]: + categories = {} + for category_dict in tqdm(category_dicts, desc=tqdm_desc): + category_data = CategoryData.from_dict(category_dict) + categories[category_data.category_id] = category_data + return categories + + def get_features_base_dict(self): + return { + "image_id": ds.Value("int64"), + "image": ds.Image(), + "file_name": ds.Value("string"), + "coco_url": ds.Value("string"), + "height": ds.Value("int32"), + "width": ds.Value("int32"), + "date_captured": ds.Value("string"), + "flickr_url": ds.Value("string"), + "license_id": ds.Value("int32"), + "license": { + "url": ds.Value("string"), + "license_id": ds.Value("int8"), + "name": ds.Value("string"), + }, + } + + @abc.abstractmethod + def get_features(self, *args, **kwargs) -> ds.Features: + raise NotImplementedError + + @abc.abstractmethod + def load_data( + self, ann_dicts: List[JsonDict], tqdm_desc: Optional[str] = None, **kwargs + ): + raise NotImplementedError - image = _load_image( - image_path=os.path.join(image_dir, image_data.file_name), + @abc.abstractmethod + def generate_examples( + self, + image_dir: str, + images: Dict[ImageId, ImageData], + annotations: Dict[ImageId, List[CaptionsAnnotationData]], + licenses: Dict[LicenseId, LicenseData], + **kwargs, + ): + raise NotImplementedError + + +class CaptionsProcessor(MsCocoProcessor): + def get_features(self, *args, **kwargs) -> ds.Features: + features_dict = self.get_features_base_dict() + annotations = ds.Sequence( + { + "annotation_id": ds.Value("int64"), + "image_id": ds.Value("int64"), + "caption": ds.Value("string"), + } ) - example = asdict(image_data) - example["image"] = image - example["license"] = asdict(licenses[image_data.license_id]) + features_dict.update({"annotations": annotations}) + return ds.Features(features_dict) - example["annotations"] = [] - for ann in image_anns: - ann_dict = asdict(ann) - category = categories[ann.category_id] - ann_dict["category"] = asdict(category) - example["annotations"].append(ann_dict) + def load_data( + self, + ann_dicts: List[JsonDict], + tqdm_desc: str = "Load captions data", + **kwargs, + ) -> Dict[ImageId, List[CaptionsAnnotationData]]: + annotations = defaultdict(list) + for ann_dict in tqdm(ann_dicts, desc=tqdm_desc): + ann_data = CaptionsAnnotationData.from_dict(ann_dict) + annotations[ann_data.image_id].append(ann_data) + return annotations + + def generate_examples( + self, + image_dir: str, + images: Dict[ImageId, ImageData], + annotations: Dict[ImageId, List[CaptionsAnnotationData]], + licenses: Dict[LicenseId, LicenseData], + **kwargs, + ) -> Iterator[Tuple[int, CaptionExample]]: + for idx, image_id in enumerate(images.keys()): + image_data = images[image_id] + image_anns = annotations[image_id] + + assert len(image_anns) > 0 + + image = self.load_image( + image_path=os.path.join(image_dir, image_data.file_name), + ) + example = asdict(image_data) + example["image"] = image + example["license"] = asdict(licenses[image_data.license_id]) + + example["annotations"] = [] + for ann in image_anns: + example["annotations"].append(asdict(ann)) + + yield idx, example # type: ignore + + +class InstancesProcessor(MsCocoProcessor): + def get_features_instance_dict(self, decode_rle: bool): + segmentation_feature = ( + ds.Image() + if decode_rle + else { + "counts": ds.Sequence(ds.Value("int64")), + "size": ds.Sequence(ds.Value("int32")), + } + ) + return { + "annotation_id": ds.Value("int64"), + "image_id": ds.Value("int64"), + "segmentation": segmentation_feature, + "area": ds.Value("float32"), + "iscrowd": ds.Value("bool"), + "bbox": ds.Sequence(ds.Value("float32"), length=4), + "category_id": ds.Value("int32"), + "category": { + "category_id": ds.Value("int32"), + "name": ds.ClassLabel( + num_classes=len(CATEGORIES), + names=CATEGORIES, + ), + "supercategory": ds.ClassLabel( + num_classes=len(SUPER_CATEGORIES), + names=SUPER_CATEGORIES, + ), + }, + } - yield idx, example # type: ignore + def get_features(self, decode_rle: bool) -> ds.Features: + features_dict = self.get_features_base_dict() + annotations = ds.Sequence( + self.get_features_instance_dict(decode_rle=decode_rle) + ) + features_dict.update({"annotations": annotations}) + return ds.Features(features_dict) + + def load_data( # type: ignore[override] + self, + ann_dicts: List[JsonDict], + images: Dict[ImageId, ImageData], + decode_rle: bool, + tqdm_desc: str = "Load instances data", + ) -> Dict[ImageId, List[InstancesAnnotationData]]: + annotations = defaultdict(list) + ann_dicts = sorted(ann_dicts, key=lambda d: d["image_id"]) + + for ann_dict in tqdm(ann_dicts, desc=tqdm_desc): + ann_data = InstancesAnnotationData.from_dict( + ann_dict, images=images, decode_rle=decode_rle + ) + annotations[ann_data.image_id].append(ann_data) + + return annotations + + def generate_examples( # type: ignore[override] + self, + image_dir: str, + images: Dict[ImageId, ImageData], + annotations: Dict[ImageId, List[InstancesAnnotationData]], + licenses: Dict[LicenseId, LicenseData], + categories: Dict[CategoryId, CategoryData], + ) -> Iterator[Tuple[int, InstanceExample]]: + for idx, image_id in enumerate(images.keys()): + image_data = images[image_id] + image_anns = annotations[image_id] + + if len(image_anns) < 1: + logger.warning(f"No annotation found for image id: {image_id}.") + continue + + image = self.load_image( + image_path=os.path.join(image_dir, image_data.file_name), + ) + example = asdict(image_data) + example["image"] = image + example["license"] = asdict(licenses[image_data.license_id]) + + example["annotations"] = [] + for ann in image_anns: + ann_dict = asdict(ann) + category = categories[ann.category_id] + ann_dict["category"] = asdict(category) + example["annotations"].append(ann_dict) + + yield idx, example # type: ignore + + +class PersonKeypointsProcessor(InstancesProcessor): + def get_features(self, decode_rle: bool) -> ds.Features: + features_dict = self.get_features_base_dict() + features_instance_dict = self.get_features_instance_dict(decode_rle=decode_rle) + features_instance_dict.update( + { + "keypoints": ds.Sequence( + { + "state": ds.Value("string"), + "x": ds.Value("int32"), + "y": ds.Value("int32"), + "v": ds.Value("int32"), + } + ), + "num_keypoints": ds.Value("int32"), + } + ) + annotations = ds.Sequence(features_instance_dict) + features_dict.update({"annotations": annotations}) + return ds.Features(features_dict) + + def load_data( # type: ignore[override] + self, + ann_dicts: List[JsonDict], + images: Dict[ImageId, ImageData], + decode_rle: bool, + tqdm_desc: str = "Load person keypoints data", + ) -> Dict[ImageId, List[PersonKeypointsAnnotationData]]: + annotations = defaultdict(list) + ann_dicts = sorted(ann_dicts, key=lambda d: d["image_id"]) + + for ann_dict in tqdm(ann_dicts, desc=tqdm_desc): + ann_data = PersonKeypointsAnnotationData.from_dict( + ann_dict, images=images, decode_rle=decode_rle + ) + annotations[ann_data.image_id].append(ann_data) + return annotations + + def generate_examples( # type: ignore[override] + self, + image_dir: str, + images: Dict[ImageId, ImageData], + annotations: Dict[ImageId, List[PersonKeypointsAnnotationData]], + licenses: Dict[LicenseId, LicenseData], + categories: Dict[CategoryId, CategoryData], + ) -> Iterator[Tuple[int, PersonKeypointExample]]: + for idx, image_id in enumerate(images.keys()): + image_data = images[image_id] + image_anns = annotations[image_id] + + if len(image_anns) < 1: + # If there are no persons in the image, + # no keypoint annotations will be assigned. + continue + + image = self.load_image( + image_path=os.path.join(image_dir, image_data.file_name), + ) + example = asdict(image_data) + example["image"] = image + example["license"] = asdict(licenses[image_data.license_id]) + + example["annotations"] = [] + for ann in image_anns: + ann_dict = asdict(ann) + category = categories[ann.category_id] + ann_dict["category"] = asdict(category) + example["annotations"].append(ann_dict) + + yield idx, example # type: ignore class MsCocoConfig(ds.BuilderConfig): @@ -558,6 +861,7 @@ def __init__( self._year = year self._task = coco_task + self.processor = self.get_processor() self.decode_rle = decode_rle def _check_year(self, year: int) -> None: @@ -568,7 +872,7 @@ def _check_task(self, task: Union[str, Sequence[str]]) -> None: assert task in self.TASKS, task elif isinstance(task, list) or isinstance(task, tuple): for t in task: - assert self.TASKS, task + assert t, task else: raise ValueError(f"Invalid task: {task}") @@ -585,6 +889,16 @@ def task(self) -> str: else: raise ValueError(f"Invalid task: {self._task}") + def get_processor(self) -> MsCocoProcessor: + if self.task == "captions": + return CaptionsProcessor() + elif self.task == "instances": + return InstancesProcessor() + elif self.task == "person_keypoints": + return PersonKeypointsProcessor() + else: + raise ValueError(f"Invalid task: {self.task}") + @classmethod def config_name(cls, year: int, task: Union[str, Sequence[str]]) -> str: if isinstance(task, str): @@ -596,178 +910,6 @@ def config_name(cls, year: int, task: Union[str, Sequence[str]]) -> str: raise ValueError(f"Invalid task: {task}") -def _load_image(image_path: str) -> PilImage: - return Image.open(image_path) - - -def _load_annotation_json(ann_file_path: str) -> JsonDict: - logger.info(f"Load annotation json from {ann_file_path}") - with open(ann_file_path, "r") as rf: - ann_json = json.load(rf) - return ann_json - - -def _load_licenses_data(license_dicts: List[JsonDict]) -> Dict[LicenseId, LicenseData]: - licenses = {} - for license_dict in license_dicts: - license_data = LicenseData.from_dict(license_dict) - licenses[license_data.license_id] = license_data - return licenses - - -def _load_images_data( - image_dicts: List[JsonDict], - tqdm_desc: str = "Load images", -) -> Dict[ImageId, ImageData]: - images = {} - for image_dict in tqdm(image_dicts, desc=tqdm_desc): - image_data = ImageData.from_dict(image_dict) - images[image_data.image_id] = image_data - return images - - -def _load_categories_data( - category_dicts: List[JsonDict], - tqdm_desc: str = "Load categories", -) -> Dict[CategoryId, CategoryData]: - categories = {} - for category_dict in tqdm(category_dicts, desc=tqdm_desc): - category_data = CategoryData.from_dict(category_dict) - categories[category_data.category_id] = category_data - return categories - - -def _load_captions_data( - ann_dicts: List[JsonDict], - tqdm_desc: str = "Load captions data", -) -> Dict[ImageId, List[CaptionsAnnotationData]]: - annotations = defaultdict(list) - for ann_dict in tqdm(ann_dicts, desc=tqdm_desc): - ann_data = CaptionsAnnotationData.from_dict(ann_dict) - annotations[ann_data.image_id].append(ann_data) - return annotations - - -def _load_instances_data( - ann_dicts: List[JsonDict], - images: Dict[ImageId, ImageData], - decode_rle: bool, - tqdm_desc: str = "Load instances data", -) -> Dict[ImageId, List[InstancesAnnotationData]]: - annotations = defaultdict(list) - ann_dicts = sorted(ann_dicts, key=lambda d: d["image_id"]) - - for ann_dict in tqdm(ann_dicts, desc=tqdm_desc): - ann_data = InstancesAnnotationData.from_dict( - ann_dict, images=images, decode_rle=decode_rle - ) - annotations[ann_data.image_id].append(ann_data) - - return annotations - - -def _load_person_keypoints_data( - ann_dicts: List[JsonDict], - images: Dict[ImageId, ImageData], - decode_rle: bool, - tqdm_desc: str = "Load person keypoints data", -) -> Dict[ImageId, List[PersonKeypointsAnnotationData]]: - annotations = defaultdict(list) - ann_dicts = sorted(ann_dicts, key=lambda d: d["image_id"]) - - for ann_dict in tqdm(ann_dicts, desc=tqdm_desc): - ann_data = PersonKeypointsAnnotationData.from_dict( - ann_dict, images=images, decode_rle=decode_rle - ) - annotations[ann_data.image_id].append(ann_data) - return annotations - - -def get_features_base_dict(): - return { - "image_id": ds.Value("int64"), - "image": ds.Image(), - "file_name": ds.Value("string"), - "coco_url": ds.Value("string"), - "height": ds.Value("int32"), - "width": ds.Value("int32"), - "date_captured": ds.Value("string"), - "flickr_url": ds.Value("string"), - "license_id": ds.Value("int32"), - "license": { - "url": ds.Value("string"), - "license_id": ds.Value("int8"), - "name": ds.Value("string"), - }, - } - - -def get_features_instance_dict(decode_rle: bool): - if decode_rle: - segmentation_feature = ds.Image() - else: - segmentation_feature = { - "counts": ds.Sequence(ds.Value("int64")), - "size": ds.Sequence(ds.Value("int32")), - } - return { - "annotation_id": ds.Value("int64"), - "image_id": ds.Value("int64"), - "segmentation": segmentation_feature, - "area": ds.Value("float32"), - "iscrowd": ds.Value("bool"), - "bbox": ds.Sequence(ds.Value("float32"), length=4), - "category_id": ds.Value("int32"), - "category": { - "category_id": ds.Value("int32"), - "name": ds.Value("string"), - "supercategory": ds.Value("string"), - }, - } - - -def get_features_captions() -> ds.Features: - features_dict = get_features_base_dict() - annotations = ds.Sequence( - { - "annotation_id": ds.Value("int64"), - "image_id": ds.Value("int64"), - "caption": ds.Value("string"), - } - ) - features_dict.update({"annotations": annotations}) - - return ds.Features(features_dict) - - -def get_features_instances(decode_rle: bool) -> ds.Features: - features_dict = get_features_base_dict() - annotations = ds.Sequence(get_features_instance_dict(decode_rle=decode_rle)) - features_dict.update({"annotations": annotations}) - return ds.Features(features_dict) - - -def get_features_person_keypoints(decode_rle: bool) -> ds.Features: - features_dict = get_features_base_dict() - features_instance_dict = get_features_instance_dict(decode_rle=decode_rle) - features_instance_dict.update( - { - "keypoints": ds.Sequence( - { - "state": ds.Value("string"), - "x": ds.Value("int32"), - "y": ds.Value("int32"), - "v": ds.Value("int32"), - } - ), - "num_keypoints": ds.Value("int32"), - } - ) - annotations = ds.Sequence(features_instance_dict) - features_dict.update({"annotations": annotations}) - return ds.Features(features_dict) - - def dataset_configs(year: int, version: ds.Version) -> List[MsCocoConfig]: return [ MsCocoConfig( @@ -785,16 +927,16 @@ def dataset_configs(year: int, version: ds.Version) -> List[MsCocoConfig]: coco_task="person_keypoints", version=version, ), - MsCocoConfig( - year=year, - coco_task=("captions", "instances"), - version=version, - ), - MsCocoConfig( - year=year, - coco_task=("captions", "person_keypoints"), - version=version, - ), + # MsCocoConfig( + # year=year, + # coco_task=("captions", "instances"), + # version=version, + # ), + # MsCocoConfig( + # year=year, + # coco_task=("captions", "person_keypoints"), + # version=version, + # ), ] @@ -822,19 +964,8 @@ def task(self) -> str: return config.task def _info(self) -> ds.DatasetInfo: - if self.task == "captions": - features = get_features_captions() - elif self.task == "instances": - features = get_features_instances( - decode_rle=self.config.decode_rle, # type: ignore - ) - elif self.task == "person_keypoints": - features = get_features_person_keypoints( - decode_rle=self.config.decode_rle, # type: ignore - ) - else: - raise ValueError(f"Invalid task: {self.task}") - + processor: MsCocoProcessor = self.config.processor + features = processor.get_features(decode_rle=self.config.decode_rle) return ds.DatasetInfo( description=_DESCRIPTION, citation=_CITATION, @@ -884,57 +1015,33 @@ def _generate_train_val_examples( ann_dir = os.path.join(base_annotation_dir, "annotations") ann_file_path = os.path.join(ann_dir, f"{self.task}_{split}{self.year}.json") - ann_json = _load_annotation_json(ann_file_path=ann_file_path) + processor: MsCocoProcessor = self.config.processor + + ann_json = processor.load_annotation_json(ann_file_path=ann_file_path) # info = AnnotationInfo.from_dict(ann_json["info"]) - licenses = _load_licenses_data(license_dicts=ann_json["licenses"]) - images = _load_images_data(image_dicts=ann_json["images"]) + licenses = processor.load_licenses_data(license_dicts=ann_json["licenses"]) + images = processor.load_images_data(image_dicts=ann_json["images"]) category_dicts = ann_json.get("categories") categories = ( - _load_categories_data(category_dicts=category_dicts) + processor.load_categories_data(category_dicts=category_dicts) if category_dicts is not None else None ) - config: MsCocoConfig = self.config # type: ignore - if config.task == "captions": - yield from generate_captions_examples( - annotations=_load_captions_data( - ann_dicts=ann_json["annotations"], - ), - image_dir=image_dir, - images=images, - licenses=licenses, - ) - elif config.task == "instances": - assert categories is not None - yield from generate_instances_examples( - annotations=_load_instances_data( - images=images, - ann_dicts=ann_json["annotations"], - decode_rle=self.config.decode_rle, # type: ignore - ), - categories=categories, - image_dir=image_dir, - images=images, - licenses=licenses, - ) - elif config.task == "person_keypoints": - assert categories is not None - yield from generate_person_keypoints_examples( - annotations=_load_person_keypoints_data( - images=images, - ann_dicts=ann_json["annotations"], - decode_rle=self.config.decode_rle, # type: ignore - ), - categories=categories, - image_dir=image_dir, + config: MsCocoConfig = self.config + yield from processor.generate_examples( + annotations=processor.load_data( + ann_dicts=ann_json["annotations"], images=images, - licenses=licenses, - ) - else: - raise ValueError(f"Invalid task: {config.task}") + decode_rle=config.decode_rle, + ), + categories=categories, + image_dir=image_dir, + images=images, + licenses=licenses, + ) def _generate_test_examples(self, test_image_info_path: str): raise NotImplementedError diff --git a/README.md b/README.md index f3f43ce..a1a1d39 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,276 @@ +--- +annotations_creators: +- crowdsourced +language: +- en +language_creators: +- found +license: +- cc-by-4.0 +multilinguality: +- monolingual +pretty_name: MSCOCO +size_categories: [] +source_datasets: +- original +tags: +- image-captioning +- object-detection +- keypoint-detection +- stuff-segmentation +- panoptic-segmentation +task_categories: +- image-segmentation +- object-detection +- other +task_ids: +- instance-segmentation +- semantic-segmentation +- panoptic-segmentation +--- + # Dataset Card for MSCOCO [![CI](https://github.com/shunk031/huggingface-datasets_MSCOCO/actions/workflows/ci.yaml/badge.svg)](https://github.com/shunk031/huggingface-datasets_MSCOCO/actions/workflows/ci.yaml) + +## Table of Contents +- [Dataset Card Creation Guide](#dataset-card-creation-guide) + - [Table of Contents](#table-of-contents) + - [Dataset Description](#dataset-description) + - [Dataset Summary](#dataset-summary) + - [Supported Tasks and Leaderboards](#supported-tasks-and-leaderboards) + - [Languages](#languages) + - [Dataset Structure](#dataset-structure) + - [Data Instances](#data-instances) + - [Data Fields](#data-fields) + - [Data Splits](#data-splits) + - [Dataset Creation](#dataset-creation) + - [Curation Rationale](#curation-rationale) + - [Source Data](#source-data) + - [Initial Data Collection and Normalization](#initial-data-collection-and-normalization) + - [Who are the source language producers?](#who-are-the-source-language-producers) + - [Annotations](#annotations) + - [Annotation process](#annotation-process) + - [Who are the annotators?](#who-are-the-annotators) + - [Personal and Sensitive Information](#personal-and-sensitive-information) + - [Considerations for Using the Data](#considerations-for-using-the-data) + - [Social Impact of Dataset](#social-impact-of-dataset) + - [Discussion of Biases](#discussion-of-biases) + - [Other Known Limitations](#other-known-limitations) + - [Additional Information](#additional-information) + - [Dataset Curators](#dataset-curators) + - [Licensing Information](#licensing-information) + - [Citation Information](#citation-information) + - [Contributions](#contributions) + +## Dataset Description + +- **Homepage:** https://cocodataset.org/#home +- **Repository:** https://github.com/shunk031/huggingface-datasets_MSCOCO +- **Paper (Preprint):** https://arxiv.org/abs/1405.0312 +- **Paper (ECCV2014):** https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48 +- **Leaderboard (Detection):** https://cocodataset.org/#detection-leaderboard +- **Leaderboard (Keypoint):** https://cocodataset.org/#keypoints-leaderboard +- **Leaderboard (Stuff):** https://cocodataset.org/#stuff-leaderboard +- **Leaderboard (Panoptic):** https://cocodataset.org/#panoptic-leaderboard +- **Leaderboard (Captioning):** https://cocodataset.org/#captions-leaderboard +- **Point of Contact:** info@cocodataset.org + +### Dataset Summary + +> COCO is a large-scale object detection, segmentation, and captioning dataset. COCO has several features: +> - Object segmentation +> - Recognition in context +> - Superpixel stuff segmentation +> - 330K images (>200K labeled) +> - 1.5 million object instances +> - 80 object categories +> - 91 stuff categories +> - 5 captions per image +> - 250,000 people with keypoints + +### Supported Tasks and Leaderboards + +[More Information Needed] + +### Languages + +[More Information Needed] + +## Dataset Structure + +### Data Instances + +#### 2014 + +- captioning dataset + +```python +import datasets as ds + +dataset = ds.load_dataset( + "shunk031/MSCOCO", + year=2014, + coco_task="captions", +) +``` + +- instances dataset + +```python +import datasets as ds + +dataset = ds.load_dataset( + "shunk031/MSCOCO", + year=2014, + coco_task="instances", + decode_rle=True, # True if Run-length Encoding (RLE) is to be decoded and converted to binary mask. +) +``` + +- person keypoints dataset + +```python +import datasets as ds + +dataset = ds.load_dataset( + "shunk031/MSCOCO", + year=2014, + coco_task="person_keypoints", + decode_rle=True, # True if Run-length Encoding (RLE) is to be decoded and converted to binary mask. +) +``` + +#### 2017 + +- captioning dataset + +```python +import datasets as ds + +dataset = ds.load_dataset( + "shunk031/MSCOCO", + year=2017, + coco_task="captions", +) +``` + +- instances dataset + +```python +import datasets as ds + +dataset = ds.load_dataset( + "shunk031/MSCOCO", + year=2017, + coco_task="instances", + decode_rle=True, # True if Run-length Encoding (RLE) is to be decoded and converted to binary mask. +) +``` + +- person keypoints dataset + +```python +import datasets as ds + +dataset = ds.load_dataset( + "shunk031/MSCOCO", + year=2017, + coco_task="person_keypoints", + decode_rle=True, # True if Run-length Encoding (RLE) is to be decoded and converted to binary mask. +) +``` + +### Data Fields + +[More Information Needed] + +### Data Splits + +[More Information Needed] + +## Dataset Creation + +### Curation Rationale + +[More Information Needed] + +### Source Data + +[More Information Needed] + +#### Initial Data Collection and Normalization + +[More Information Needed] + +#### Who are the source language producers? + +[More Information Needed] + +### Annotations + +[More Information Needed] + +#### Annotation process + +[More Information Needed] + +#### Who are the annotators? + +[More Information Needed] + +### Personal and Sensitive Information + +[More Information Needed] + +## Considerations for Using the Data + +### Social Impact of Dataset + +[More Information Needed] + +### Discussion of Biases + +[More Information Needed] + +### Other Known Limitations + +[More Information Needed] + +## Additional Information + +### Dataset Curators + +[More Information Needed] + +### Licensing Information + +> The annotations in this dataset along with this website belong to the COCO Consortium and are licensed under a [Creative Commons Attribution 4.0 License](https://creativecommons.org/licenses/by/4.0/legalcode). +> +> ## Images +> The COCO Consortium does not own the copyright of the images. Use of the images must abide by the Flickr Terms of Use. The users of the images accept full responsibility for the use of the dataset, including but not limited to the use of any copies of copyrighted images that they may create from the dataset. +> +> ## Software +> Copyright (c) 2015, COCO Consortium. All rights reserved. Redistribution and use software in source and binary form, with or without modification, are permitted provided that the following conditions are met: +> - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +> - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +> - Neither the name of the COCO Consortium nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +> +> THIS SOFTWARE AND ANNOTATIONS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +### Citation Information + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +### Contributions + +Thanks to [COCO Consortium](https://cocodataset.org/#people) for creating this dataset. diff --git a/tests/MSCOCO_test.py b/tests/MSCOCO_test.py index 426b3cb..56fd48d 100644 --- a/tests/MSCOCO_test.py +++ b/tests/MSCOCO_test.py @@ -1,12 +1,23 @@ +import os + import datasets as ds import pytest +from MSCOCO import CATEGORIES, SUPER_CATEGORIES + @pytest.fixture def dataset_path() -> str: return "MSCOCO.py" +@pytest.mark.skipif( + condition=bool(os.environ.get("CI", False)), + reason=( + "Because this loading script downloads a large dataset, " + "we will skip running it on CI." + ), +) @pytest.mark.parametrize( argnames="decode_rle,", argvalues=( @@ -46,3 +57,8 @@ def test_load_dataset( ) assert dataset["train"].num_rows == expected_num_train assert dataset["validation"].num_rows == expected_num_validation + + +def test_consts(): + assert len(CATEGORIES) == 80 + assert len(SUPER_CATEGORIES) == 12