-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathpycococreatortools.py
executable file
·136 lines (109 loc) · 3.74 KB
/
pycococreatortools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
import os
import re
import datetime
import numpy as np
from itertools import groupby
from skimage import measure
from PIL import Image
from pycocotools import mask
convert = lambda text: int(text) if text.isdigit() else text.lower()
natrual_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)]
def resize_binary_mask(array, new_size):
image = Image.fromarray(array.astype(np.uint8) * 255)
image = image.resize(new_size)
return np.asarray(image).astype(np.bool_)
def close_contour(contour):
if not np.array_equal(contour[0], contour[-1]):
contour = np.vstack((contour, contour[0]))
return contour
def binary_mask_to_rle(binary_mask):
rle = {"counts": [], "size": list(binary_mask.shape)}
counts = rle.get("counts")
for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order="F"))):
if i == 0 and value == 1:
counts.append(0)
counts.append(len(list(elements)))
return rle
def binary_mask_to_polygon(binary_mask, tolerance=0):
"""Converts a binary mask to COCO polygon representation
Args:
binary_mask: a 2D binary numpy array where '1's represent the object
tolerance: Maximum distance from original points of polygon to approximated
polygonal chain. If tolerance is 0, the original coordinate array is returned.
"""
polygons = []
# pad mask to close contours of shapes which start and end at an edge
padded_binary_mask = np.pad(
binary_mask, pad_width=1, mode="constant", constant_values=0
)
contours = measure.find_contours(padded_binary_mask, 0.5)
contours = np.subtract(contours, 1)
for contour in contours:
contour = close_contour(contour)
contour = measure.approximate_polygon(contour, tolerance)
if len(contour) < 3:
continue
contour = np.flip(contour, axis=1)
segmentation = contour.ravel().tolist()
# after padding and subtracting 1 we may get -0.5 points in our segmentation
segmentation = [0 if i < 0 else i for i in segmentation]
polygons.append(segmentation)
return polygons
def create_image_info(
image_id,
file_name,
image_size,
date_captured=datetime.datetime.utcnow().isoformat(" "),
license_id=1,
coco_url="",
flickr_url="",
):
image_info = {
"id": image_id,
"file_name": file_name,
"width": image_size[0],
"height": image_size[1],
"date_captured": date_captured,
"license": license_id,
"coco_url": coco_url,
"flickr_url": flickr_url,
}
return image_info
def create_annotation_info(
annotation_id,
image_id,
category_info,
binary_mask,
image_size=None,
tolerance=2,
bounding_box=None,
):
if image_size is not None:
binary_mask = resize_binary_mask(binary_mask, image_size)
binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
area = mask.area(binary_mask_encoded)
if area < 1:
return None
if bounding_box is None:
bounding_box = mask.toBbox(binary_mask_encoded)
if category_info["is_crowd"]:
is_crowd = 1
segmentation = binary_mask_to_rle(binary_mask)
else:
is_crowd = 0
segmentation = binary_mask_to_polygon(binary_mask, tolerance)
if not segmentation:
return None
annotation_info = {
"id": annotation_id,
"image_id": image_id,
"category_id": category_info["id"],
"iscrowd": is_crowd,
"area": area.tolist(),
"bbox": bounding_box.tolist(),
"segmentation": segmentation,
"width": binary_mask.shape[1],
"height": binary_mask.shape[0],
}
return annotation_info