From d41eeb3657fbd4604b094ec855490834483056ba Mon Sep 17 00:00:00 2001 From: amine789 Date: Tue, 13 Jun 2023 22:42:51 +0200 Subject: [PATCH 1/3] This commit will add ConvNext backbones to the UNET model --- paz/models/segmentation/unet.py | 194 +++++++++++++++++++++++++++++++- 1 file changed, 191 insertions(+), 3 deletions(-) diff --git a/paz/models/segmentation/unet.py b/paz/models/segmentation/unet.py index 4fb7d92a4..378c62e1f 100644 --- a/paz/models/segmentation/unet.py +++ b/paz/models/segmentation/unet.py @@ -1,3 +1,6 @@ +from tensorflow.keras.applications import ConvNeXtTiny, ConvNeXtSmall +from tensorflow.keras.applications import ConvNeXtBase, ConvNeXtLarge +from tensorflow.keras.applications import ConvNeXtXLarge from tensorflow.keras.layers import Conv2DTranspose, Concatenate, UpSampling2D from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation from tensorflow.keras.layers import MaxPooling2D, Input @@ -6,6 +9,20 @@ from tensorflow.keras.applications import ResNet50V2 +def compute_upsampling_size(first_layer, second_layer): + """Function to compute the upsampling size + + # Arguments + first_layer: branch layer + second_layer: decoder layer + + # Returns + upsampling size + """ + size = int(first_layer.shape[1]/second_layer.shape[1]) + return size + + def convolution_block(inputs, filters, kernel_size=3, activation='relu'): """UNET convolution block containing Conv2D -> BatchNorm -> Activation @@ -25,7 +42,7 @@ def convolution_block(inputs, filters, kernel_size=3, activation='relu'): return x -def upsample_block(x, filters, branch): +def upsample_block(x, filters, branch, size): """UNET upsample block. This block upsamples ``x``, concatenates a ``branch`` tensor and applies two convolution blocks: Upsample -> Concatenate -> 2 x ConvBlock. @@ -38,7 +55,7 @@ def upsample_block(x, filters, branch): # Returns A Keras tensor. """ - x = UpSampling2D(size=2)(x) + x = UpSampling2D(size)(x) x = Concatenate(axis=3)([x, branch]) x = convolution_block(x, filters) x = convolution_block(x, filters) @@ -146,7 +163,8 @@ def build_UNET(num_classes, backbone, branch_tensors, x = convolution_block(x, 512) for branch, filters in zip(branch_tensors, decoder_filters): - x = decoder(x, filters, branch) + size = compute_upsampling_size(branch, x) + x = decoder(x, filters, branch, size) kwargs = {'use_bias': True, 'kernel_initializer': 'glorot_uniform'} x = Conv2D(num_classes, 3, (1, 1), 'same', **kwargs)(x) @@ -289,3 +307,173 @@ def UNET_RESNET50(num_classes=1, input_shape=(224, 224, 3), weights='imagenet', return UNET(input_shape, num_classes, RESNET50_branches, ResNet50V2, weights, freeze_backbone, activation, decoder_type, decode_filters, input_tensor, 'UNET-RESNET50') + + +def UNET_ConvNeXtTiny(num_classes=1, input_shape=(224, 224, 3), + weights='imagenet', freeze_backbone=False, + activation='sigmoid', decoder_type='upsample', + decode_filters=[256, 128, 64, 32, 16]): + """Build a UNET model with a ``ConvNeXtTiny`` backbone. + + # Arguments + input_shape: List of integers: ``(H, W, num_channels)``. + num_classes: Integer used for output number of channels. + branch_names: List of strings containing layer names of ``BACKBONE()``. + BACKBONE: Class for instantiating a backbone model + weights: String indicating backbone weights e.g. + ''imagenet'', ``None``. + freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen. + decoder_type: String indicating decoding function e.g. + ''upsample ''transpose''. + decoder_filters: List of integers used in each application of decoder. + activation: Output activation of the model. + input_tensor: Input tensor. If given ``shape`` is overwritten and this + tensor is used instead as input. + name: String. indicating the name of the model. + + # Returns + A UNET-VGG16 Keras/tensorflow model. + """ + ConvNeXtTiny_branches = ['convnext_tiny_stage_2_block_8_identity', + 'convnext_tiny_stage_1_block_2_identity', + 'convnext_tiny_stage_0_block_2_identity', + 'convnext_tiny_prestem_normalization'] + return UNET(input_shape, num_classes, ConvNeXtTiny_branches, ConvNeXtTiny, + weights, freeze_backbone, activation, decoder_type, + decode_filters, name='UNET-ConvNeXtTiny') + + +def UNET_ConvNeXtSmall(num_classes=1, input_shape=(224, 224, 3), + weights='imagenet', freeze_backbone=False, + activation='sigmoid', decoder_type='upsample', + decode_filters=[256, 128, 64, 32, 16]): + """Build a UNET model with a ``ConvNeXtSmall`` backbone. + + # Arguments + input_shape: List of integers: ``(H, W, num_channels)``. + num_classes: Integer used for output number of channels. + branch_names: List of strings containing layer names of ``BACKBONE()``. + BACKBONE: Class for instantiating a backbone model + weights: String indicating backbone weights e.g. + ''imagenet'', ``None``. + freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen. + decoder_type: String indicating decoding function e.g. + ''upsample ''transpose''. + decoder_filters: List of integers used in each application of decoder. + activation: Output activation of the model. + input_tensor: Input tensor. If given ``shape`` is overwritten and this + tensor is used instead as input. + name: String. indicating the name of the model. + + # Returns + A UNET-VGG16 Keras/tensorflow model. + """ + ConvNeXtSmall_branches = ['convnext_small_stage_2_block_8_identity', + 'convnext_small_stage_1_block_2_identity', + 'convnext_small_stage_0_block_2_identity', + 'convnext_small_prestem_normalization'] + return UNET(input_shape, num_classes, ConvNeXtSmall_branches, + ConvNeXtSmall, weights, freeze_backbone, activation, + decoder_type, decode_filters, name='UNET-ConvNeXtSmall') + + +def UNET_ConvNeXtBase(num_classes=1, input_shape=(224, 224, 3), + weights='imagenet', freeze_backbone=False, + activation='sigmoid', decoder_type='upsample', + decode_filters=[256, 128, 64, 32, 16]): + """Build a UNET model with a ``ConvNeXtBase`` backbone. + + # Arguments + input_shape: List of integers: ``(H, W, num_channels)``. + num_classes: Integer used for output number of channels. + branch_names: List of strings containing layer names of ``BACKBONE()``. + BACKBONE: Class for instantiating a backbone model + weights: String indicating backbone weights e.g. + ''imagenet'', ``None``. + freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen. + decoder_type: String indicating decoding function e.g. + ''upsample ''transpose''. + decoder_filters: List of integers used in each application of decoder. + activation: Output activation of the model. + input_tensor: Input tensor. If given ``shape`` is overwritten and this + tensor is used instead as input. + name: String. indicating the name of the model. + + # Returns + A UNET-VGG16 Keras/tensorflow model. + """ + ConvNeXtBase_branches = ['convnext_base_stage_2_block_26_identity', + 'convnext_base_stage_1_block_2_identity', + 'convnext_base_stage_0_block_2_identity', + 'convnext_base_prestem_normalization'] + return UNET(input_shape, num_classes, ConvNeXtBase_branches, ConvNeXtBase, + weights, freeze_backbone, activation, decoder_type, + decode_filters, name='UNET-ConvNeXtBase') + + +def UNET_ConvNeXtLarge(num_classes=1, input_shape=(224, 224, 3), + weights='imagenet', freeze_backbone=False, + activation='sigmoid', decoder_type='upsample', + decode_filters=[256, 128, 64, 32, 16]): + """Build a UNET model with a ``ConvNeXtLarge`` backbone. + + # Arguments + input_shape: List of integers: ``(H, W, num_channels)``. + num_classes: Integer used for output number of channels. + branch_names: List of strings containing layer names of ``BACKBONE()``. + BACKBONE: Class for instantiating a backbone model + weights: String indicating backbone weights e.g. + ''imagenet'', ``None``. + freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen. + decoder_type: String indicating decoding function e.g. + ''upsample ''transpose''. + decoder_filters: List of integers used in each application of decoder. + activation: Output activation of the model. + input_tensor: Input tensor. If given ``shape`` is overwritten and this + tensor is used instead as input. + name: String. indicating the name of the model. + + # Returns + A UNET-VGG16 Keras/tensorflow model. + """ + ConvNeXtLarge_branches = ['convnext_large_stage_2_block_26_identity', + 'convnext_large_stage_1_block_2_identity', + 'convnext_large_stage_0_block_2_identity', + 'convnext_large_prestem_normalization'] + return UNET(input_shape, num_classes, ConvNeXtLarge_branches, + ConvNeXtLarge, weights, freeze_backbone, activation, + decoder_type, decode_filters, name='UNET-ConvNeXtLarge') + + +def UNET_ConvNeXtXLarge(num_classes=1, input_shape=(224, 224, 3), + weights='imagenet', freeze_backbone=False, + activation='sigmoid', decoder_type='upsample', + decode_filters=[256, 128, 64, 32, 16]): + """Build a UNET model with a ``ConvNeXtXLarge`` backbone. + + # Arguments + input_shape: List of integers: ``(H, W, num_channels)``. + num_classes: Integer used for output number of channels. + branch_names: List of strings containing layer names of ``BACKBONE()``. + BACKBONE: Class for instantiating a backbone model + weights: String indicating backbone weights e.g. + ''imagenet'', ``None``. + freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen. + decoder_type: String indicating decoding function e.g. + ''upsample ''transpose''. + decoder_filters: List of integers used in each application of decoder. + activation: Output activation of the model. + input_tensor: Input tensor. If given ``shape`` is overwritten and this + tensor is used instead as input. + name: String. indicating the name of the model. + + # Returns + A UNET-VGG16 Keras/tensorflow model. + """ + ConvNeXtXLarge_branches = ['convnext_xlarge_stage_2_block_26_identity', + 'convnext_xlarge_stage_1_block_2_identity', + 'convnext_xlarge_stage_0_block_2_identity', + 'convnext_xlarge_prestem_normalization'] + return UNET(input_shape, num_classes, ConvNeXtXLarge_branches, + ConvNeXtXLarge, weights, freeze_backbone, activation, + decoder_type, decode_filters, name='UNET-ConvNeXtXLarge') From e59737e7a03fa243603f2e8567a2fea97a06a3f6 Mon Sep 17 00:00:00 2001 From: amine789 Date: Thu, 31 Aug 2023 16:18:30 +0200 Subject: [PATCH 2/3] fix fat loader dataset --- paz/datasets/fat.py | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/paz/datasets/fat.py b/paz/datasets/fat.py index 2f71c7d5e..f775e5b8c 100644 --- a/paz/datasets/fat.py +++ b/paz/datasets/fat.py @@ -5,13 +5,12 @@ import numpy as np from tensorflow.keras.utils import Progbar -from ..abstract import Loader -from .utils import get_class_names +from paz.abstract import Loader +from utils import get_class_names class FAT(Loader): """ Dataset loader for the falling things dataset (FAT). - # Arguments path: String indicating full path to dataset e.g. /home/user/fat/ @@ -25,16 +24,23 @@ class FAT(Loader): Estimation (DOPE)](https://github.com/NVlabs/Deep_Object_Pose) """ # TODO: Allow selection of class_names. - def __init__(self, path, split='train', class_names='all'): + def __init__(self, path, split=(0.4, 0.4, 0.2), class_names = 'all'): if class_names == 'all': - class_names = get_class_names('FAT') - self.class_to_arg = dict( - zip(class_names, list(range(len(class_names))))) - - super(FAT, self).__init__(path, split, class_names, 'FAT') + self.class_names = get_class_names('FAT') + self.class_to_arg = dict( + zip(class_names, list(range(len(class_names))))) + else: + self.class_names = class_names + self.class_to_arg = {class_names: 0} + self.split = split + super(FAT, self).__init__(path, split, self.class_names, 'FAT') def load_data(self): - scene_names = glob(self.path + 'mixed/*') + if self.class_names == 'all': + scene_names = glob(self.path + 'mixed/*') + else: + object_name = self.class_names + '_16k' + scene_names = glob(self.path + 'single/' + object_name + '/*') image_paths, label_paths = [], [] for scene_name in scene_names: scene_image_paths, scene_label_paths = [], [] @@ -59,7 +65,16 @@ def load_data(self): continue self.data.append({'image': image_path, 'boxes': boxes}) progress_bar.update(sample_arg + 1) - return self.data + data_dict = {} + train_split = int(len(self.data) * self.split[0]) + test_split = int(len(self.data) * self.split[1]) + validation_split = int(len(self.data) * self.split[2]) + data_dict['train'] = self.data[:train_split] + data_dict['test'] = self.data[ + train_split:(train_split + test_split)] + data_dict['validation'] = self.data[(train_split + test_split):] + return data_dict + def _extract_boxes(self, json_filename): json_data = json.load(open(json_filename, 'r')) @@ -78,13 +93,18 @@ def _extract_boxes(self, json_filename): box_data[object_arg, -1] = self.class_to_arg[class_name] return box_data + def _base_number(self, filename): order = os.path.basename(filename) order = order.split('.')[0] order = float(order) return order + def _valid_name_match(self, image_path, label_path): image_name = os.path.basename(image_path) label_name = os.path.basename(label_path) return image_name[:-3] == label_name[:-4] + +fat = Fat('/home/ramit/git/paz') + From 2ade213d6ad99a720ec3dc658d67e71715554026 Mon Sep 17 00:00:00 2001 From: amine789 Date: Mon, 4 Sep 2023 16:36:35 +0200 Subject: [PATCH 3/3] Fix Fat dataset loader --- paz/datasets/fat.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/paz/datasets/fat.py b/paz/datasets/fat.py index f775e5b8c..52f1d2f26 100644 --- a/paz/datasets/fat.py +++ b/paz/datasets/fat.py @@ -5,8 +5,8 @@ import numpy as np from tensorflow.keras.utils import Progbar -from paz.abstract import Loader -from utils import get_class_names +from ..abstract import Loader +from .utils import get_class_names class FAT(Loader): @@ -24,20 +24,23 @@ class FAT(Loader): Estimation (DOPE)](https://github.com/NVlabs/Deep_Object_Pose) """ # TODO: Allow selection of class_names. - def __init__(self, path, split=(0.4, 0.4, 0.2), class_names = 'all'): - if class_names == 'all': + def __init__(self, path, split='train', class_type = 'all'): + self.class_type = class_type + if class_type == 'all': self.class_names = get_class_names('FAT') self.class_to_arg = dict( - zip(class_names, list(range(len(class_names))))) + zip(self.class_names, list(range(len( + self.class_names))))) else: - self.class_names = class_names - self.class_to_arg = {class_names: 0} + self.class_names = class_type + self.class_to_arg = {class_type: 0} self.split = split super(FAT, self).__init__(path, split, self.class_names, 'FAT') def load_data(self): - if self.class_names == 'all': + if self.class_type == 'all': scene_names = glob(self.path + 'mixed/*') + print(self.path) else: object_name = self.class_names + '_16k' scene_names = glob(self.path + 'single/' + object_name + '/*') @@ -53,7 +56,6 @@ def load_data(self): scene_label_paths = scene_label_paths + side_label_paths image_paths = image_paths + scene_image_paths label_paths = label_paths + scene_label_paths - self.data = [] progress_bar = Progbar(len(image_paths)) for sample_arg, sample in enumerate(zip(image_paths, label_paths)): @@ -65,15 +67,15 @@ def load_data(self): continue self.data.append({'image': image_path, 'boxes': boxes}) progress_bar.update(sample_arg + 1) - data_dict = {} - train_split = int(len(self.data) * self.split[0]) - test_split = int(len(self.data) * self.split[1]) - validation_split = int(len(self.data) * self.split[2]) - data_dict['train'] = self.data[:train_split] - data_dict['test'] = self.data[ - train_split:(train_split + test_split)] - data_dict['validation'] = self.data[(train_split + test_split):] - return data_dict + train_split = int(len(self.data) * 0.4) + test_split = int(len(self.data) * 0.4) + if self.split == 'train': + self.data = self.data[:train_split] + if self.split == 'test': + self.data = self.data[train_split:(train_split + test_split)] + if self.split == 'validation': + self.data = self.data[(train_split + test_split):] + return self.data def _extract_boxes(self, json_filename): @@ -105,6 +107,5 @@ def _valid_name_match(self, image_path, label_path): image_name = os.path.basename(image_path) label_name = os.path.basename(label_path) return image_name[:-3] == label_name[:-4] - -fat = Fat('/home/ramit/git/paz') +