Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Fat dataset loader #312

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 31 additions & 10 deletions paz/datasets/fat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

class FAT(Loader):
""" Dataset loader for the falling things dataset (FAT).

# Arguments
path: String indicating full path to dataset
e.g. /home/user/fat/
Expand All @@ -25,16 +24,26 @@ class FAT(Loader):
Estimation (DOPE)](https://github.com/NVlabs/Deep_Object_Pose)
"""
# TODO: Allow selection of class_names.
def __init__(self, path, split='train', class_names='all'):
if class_names == 'all':
class_names = get_class_names('FAT')
self.class_to_arg = dict(
zip(class_names, list(range(len(class_names)))))

super(FAT, self).__init__(path, split, class_names, 'FAT')
def __init__(self, path, split='train', class_type = 'all'):
self.class_type = class_type
if class_type == 'all':
self.class_names = get_class_names('FAT')
self.class_to_arg = dict(
zip(self.class_names, list(range(len(
self.class_names)))))
else:
self.class_names = class_type
self.class_to_arg = {class_type: 0}
self.split = split
super(FAT, self).__init__(path, split, self.class_names, 'FAT')

def load_data(self):
scene_names = glob(self.path + 'mixed/*')
if self.class_type == 'all':
scene_names = glob(self.path + 'mixed/*')
print(self.path)
else:
object_name = self.class_names + '_16k'
scene_names = glob(self.path + 'single/' + object_name + '/*')
image_paths, label_paths = [], []
for scene_name in scene_names:
scene_image_paths, scene_label_paths = [], []
Expand All @@ -47,7 +56,6 @@ def load_data(self):
scene_label_paths = scene_label_paths + side_label_paths
image_paths = image_paths + scene_image_paths
label_paths = label_paths + scene_label_paths

self.data = []
progress_bar = Progbar(len(image_paths))
for sample_arg, sample in enumerate(zip(image_paths, label_paths)):
Expand All @@ -59,8 +67,17 @@ def load_data(self):
continue
self.data.append({'image': image_path, 'boxes': boxes})
progress_bar.update(sample_arg + 1)
train_split = int(len(self.data) * 0.4)
test_split = int(len(self.data) * 0.4)
if self.split == 'train':
self.data = self.data[:train_split]
if self.split == 'test':
self.data = self.data[train_split:(train_split + test_split)]
if self.split == 'validation':
self.data = self.data[(train_split + test_split):]
return self.data


def _extract_boxes(self, json_filename):
json_data = json.load(open(json_filename, 'r'))
num_objects = len(json_data['objects'])
Expand All @@ -78,13 +95,17 @@ def _extract_boxes(self, json_filename):
box_data[object_arg, -1] = self.class_to_arg[class_name]
return box_data


def _base_number(self, filename):
order = os.path.basename(filename)
order = order.split('.')[0]
order = float(order)
return order


def _valid_name_match(self, image_path, label_path):
image_name = os.path.basename(image_path)
label_name = os.path.basename(label_path)
return image_name[:-3] == label_name[:-4]


194 changes: 191 additions & 3 deletions paz/models/segmentation/unet.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from tensorflow.keras.applications import ConvNeXtTiny, ConvNeXtSmall
from tensorflow.keras.applications import ConvNeXtBase, ConvNeXtLarge
from tensorflow.keras.applications import ConvNeXtXLarge
from tensorflow.keras.layers import Conv2DTranspose, Concatenate, UpSampling2D
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation
from tensorflow.keras.layers import MaxPooling2D, Input
Expand All @@ -6,6 +9,20 @@
from tensorflow.keras.applications import ResNet50V2


def compute_upsampling_size(first_layer, second_layer):
"""Function to compute the upsampling size

# Arguments
first_layer: branch layer
second_layer: decoder layer

# Returns
upsampling size
"""
size = int(first_layer.shape[1]/second_layer.shape[1])
return size


def convolution_block(inputs, filters, kernel_size=3, activation='relu'):
"""UNET convolution block containing Conv2D -> BatchNorm -> Activation

Expand All @@ -25,7 +42,7 @@ def convolution_block(inputs, filters, kernel_size=3, activation='relu'):
return x


def upsample_block(x, filters, branch):
def upsample_block(x, filters, branch, size):
"""UNET upsample block. This block upsamples ``x``, concatenates a
``branch`` tensor and applies two convolution blocks:
Upsample -> Concatenate -> 2 x ConvBlock.
Expand All @@ -38,7 +55,7 @@ def upsample_block(x, filters, branch):
# Returns
A Keras tensor.
"""
x = UpSampling2D(size=2)(x)
x = UpSampling2D(size)(x)
x = Concatenate(axis=3)([x, branch])
x = convolution_block(x, filters)
x = convolution_block(x, filters)
Expand Down Expand Up @@ -146,7 +163,8 @@ def build_UNET(num_classes, backbone, branch_tensors,
x = convolution_block(x, 512)

for branch, filters in zip(branch_tensors, decoder_filters):
x = decoder(x, filters, branch)
size = compute_upsampling_size(branch, x)
x = decoder(x, filters, branch, size)

kwargs = {'use_bias': True, 'kernel_initializer': 'glorot_uniform'}
x = Conv2D(num_classes, 3, (1, 1), 'same', **kwargs)(x)
Expand Down Expand Up @@ -289,3 +307,173 @@ def UNET_RESNET50(num_classes=1, input_shape=(224, 224, 3), weights='imagenet',
return UNET(input_shape, num_classes, RESNET50_branches, ResNet50V2,
weights, freeze_backbone, activation, decoder_type,
decode_filters, input_tensor, 'UNET-RESNET50')


def UNET_ConvNeXtTiny(num_classes=1, input_shape=(224, 224, 3),
weights='imagenet', freeze_backbone=False,
activation='sigmoid', decoder_type='upsample',
decode_filters=[256, 128, 64, 32, 16]):
"""Build a UNET model with a ``ConvNeXtTiny`` backbone.

# Arguments
input_shape: List of integers: ``(H, W, num_channels)``.
num_classes: Integer used for output number of channels.
branch_names: List of strings containing layer names of ``BACKBONE()``.
BACKBONE: Class for instantiating a backbone model
weights: String indicating backbone weights e.g.
''imagenet'', ``None``.
freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen.
decoder_type: String indicating decoding function e.g.
''upsample ''transpose''.
decoder_filters: List of integers used in each application of decoder.
activation: Output activation of the model.
input_tensor: Input tensor. If given ``shape`` is overwritten and this
tensor is used instead as input.
name: String. indicating the name of the model.

# Returns
A UNET-VGG16 Keras/tensorflow model.
"""
ConvNeXtTiny_branches = ['convnext_tiny_stage_2_block_8_identity',
'convnext_tiny_stage_1_block_2_identity',
'convnext_tiny_stage_0_block_2_identity',
'convnext_tiny_prestem_normalization']
return UNET(input_shape, num_classes, ConvNeXtTiny_branches, ConvNeXtTiny,
weights, freeze_backbone, activation, decoder_type,
decode_filters, name='UNET-ConvNeXtTiny')


def UNET_ConvNeXtSmall(num_classes=1, input_shape=(224, 224, 3),
weights='imagenet', freeze_backbone=False,
activation='sigmoid', decoder_type='upsample',
decode_filters=[256, 128, 64, 32, 16]):
"""Build a UNET model with a ``ConvNeXtSmall`` backbone.

# Arguments
input_shape: List of integers: ``(H, W, num_channels)``.
num_classes: Integer used for output number of channels.
branch_names: List of strings containing layer names of ``BACKBONE()``.
BACKBONE: Class for instantiating a backbone model
weights: String indicating backbone weights e.g.
''imagenet'', ``None``.
freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen.
decoder_type: String indicating decoding function e.g.
''upsample ''transpose''.
decoder_filters: List of integers used in each application of decoder.
activation: Output activation of the model.
input_tensor: Input tensor. If given ``shape`` is overwritten and this
tensor is used instead as input.
name: String. indicating the name of the model.

# Returns
A UNET-VGG16 Keras/tensorflow model.
"""
ConvNeXtSmall_branches = ['convnext_small_stage_2_block_8_identity',
'convnext_small_stage_1_block_2_identity',
'convnext_small_stage_0_block_2_identity',
'convnext_small_prestem_normalization']
return UNET(input_shape, num_classes, ConvNeXtSmall_branches,
ConvNeXtSmall, weights, freeze_backbone, activation,
decoder_type, decode_filters, name='UNET-ConvNeXtSmall')


def UNET_ConvNeXtBase(num_classes=1, input_shape=(224, 224, 3),
weights='imagenet', freeze_backbone=False,
activation='sigmoid', decoder_type='upsample',
decode_filters=[256, 128, 64, 32, 16]):
"""Build a UNET model with a ``ConvNeXtBase`` backbone.

# Arguments
input_shape: List of integers: ``(H, W, num_channels)``.
num_classes: Integer used for output number of channels.
branch_names: List of strings containing layer names of ``BACKBONE()``.
BACKBONE: Class for instantiating a backbone model
weights: String indicating backbone weights e.g.
''imagenet'', ``None``.
freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen.
decoder_type: String indicating decoding function e.g.
''upsample ''transpose''.
decoder_filters: List of integers used in each application of decoder.
activation: Output activation of the model.
input_tensor: Input tensor. If given ``shape`` is overwritten and this
tensor is used instead as input.
name: String. indicating the name of the model.

# Returns
A UNET-VGG16 Keras/tensorflow model.
"""
ConvNeXtBase_branches = ['convnext_base_stage_2_block_26_identity',
'convnext_base_stage_1_block_2_identity',
'convnext_base_stage_0_block_2_identity',
'convnext_base_prestem_normalization']
return UNET(input_shape, num_classes, ConvNeXtBase_branches, ConvNeXtBase,
weights, freeze_backbone, activation, decoder_type,
decode_filters, name='UNET-ConvNeXtBase')


def UNET_ConvNeXtLarge(num_classes=1, input_shape=(224, 224, 3),
weights='imagenet', freeze_backbone=False,
activation='sigmoid', decoder_type='upsample',
decode_filters=[256, 128, 64, 32, 16]):
"""Build a UNET model with a ``ConvNeXtLarge`` backbone.

# Arguments
input_shape: List of integers: ``(H, W, num_channels)``.
num_classes: Integer used for output number of channels.
branch_names: List of strings containing layer names of ``BACKBONE()``.
BACKBONE: Class for instantiating a backbone model
weights: String indicating backbone weights e.g.
''imagenet'', ``None``.
freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen.
decoder_type: String indicating decoding function e.g.
''upsample ''transpose''.
decoder_filters: List of integers used in each application of decoder.
activation: Output activation of the model.
input_tensor: Input tensor. If given ``shape`` is overwritten and this
tensor is used instead as input.
name: String. indicating the name of the model.

# Returns
A UNET-VGG16 Keras/tensorflow model.
"""
ConvNeXtLarge_branches = ['convnext_large_stage_2_block_26_identity',
'convnext_large_stage_1_block_2_identity',
'convnext_large_stage_0_block_2_identity',
'convnext_large_prestem_normalization']
return UNET(input_shape, num_classes, ConvNeXtLarge_branches,
ConvNeXtLarge, weights, freeze_backbone, activation,
decoder_type, decode_filters, name='UNET-ConvNeXtLarge')


def UNET_ConvNeXtXLarge(num_classes=1, input_shape=(224, 224, 3),
weights='imagenet', freeze_backbone=False,
activation='sigmoid', decoder_type='upsample',
decode_filters=[256, 128, 64, 32, 16]):
"""Build a UNET model with a ``ConvNeXtXLarge`` backbone.

# Arguments
input_shape: List of integers: ``(H, W, num_channels)``.
num_classes: Integer used for output number of channels.
branch_names: List of strings containing layer names of ``BACKBONE()``.
BACKBONE: Class for instantiating a backbone model
weights: String indicating backbone weights e.g.
''imagenet'', ``None``.
freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen.
decoder_type: String indicating decoding function e.g.
''upsample ''transpose''.
decoder_filters: List of integers used in each application of decoder.
activation: Output activation of the model.
input_tensor: Input tensor. If given ``shape`` is overwritten and this
tensor is used instead as input.
name: String. indicating the name of the model.

# Returns
A UNET-VGG16 Keras/tensorflow model.
"""
ConvNeXtXLarge_branches = ['convnext_xlarge_stage_2_block_26_identity',
'convnext_xlarge_stage_1_block_2_identity',
'convnext_xlarge_stage_0_block_2_identity',
'convnext_xlarge_prestem_normalization']
return UNET(input_shape, num_classes, ConvNeXtXLarge_branches,
ConvNeXtXLarge, weights, freeze_backbone, activation,
decoder_type, decode_filters, name='UNET-ConvNeXtXLarge')