From 241c74a976657b28c7379243d545a11038358a90 Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Fri, 6 Dec 2024 18:54:03 +0100 Subject: [PATCH 01/12] Added necessary requirements. Added integration with pytorch-metric-learning (loss). Added custom reID/embedding metrics. Implemented a test to verify trainability, exportability and inference. Removed GhostFaceNetsV2 from the backbone tests as it only generates embeddings instead of the usual features. --- .../attached_modules/losses/__init__.py | 2 + .../attached_modules/losses/pml_loss.py | 119 ++++ .../attached_modules/metrics/__init__.py | 3 + .../attached_modules/metrics/pml_metrics.py | 248 ++++++++ .../attached_modules/visualizers/__init__.py | 2 + .../visualizers/embeddings_visualizer.py | 95 ++++ luxonis_train/loaders/utils.py | 1 + luxonis_train/nodes/backbones/__init__.py | 2 + luxonis_train/nodes/backbones/ghostfacenet.py | 534 ++++++++++++++++++ requirements.txt | 2 + tests/configs/reid.yaml | 60 ++ tests/integration/test_detection.py | 4 +- tests/integration/test_reid.py | 91 +++ tests/integration/test_segmentation.py | 4 +- 14 files changed, 1165 insertions(+), 2 deletions(-) create mode 100644 luxonis_train/attached_modules/losses/pml_loss.py create mode 100644 luxonis_train/attached_modules/metrics/pml_metrics.py create mode 100644 luxonis_train/attached_modules/visualizers/embeddings_visualizer.py create mode 100644 luxonis_train/nodes/backbones/ghostfacenet.py create mode 100644 tests/configs/reid.yaml create mode 100644 tests/integration/test_reid.py diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py index ff0bafc8..b320fada 100644 --- a/luxonis_train/attached_modules/losses/__init__.py +++ b/luxonis_train/attached_modules/losses/__init__.py @@ -7,6 +7,7 @@ from .ohem_bce_with_logits import OHEMBCEWithLogitsLoss from .ohem_cross_entropy import OHEMCrossEntropyLoss from .ohem_loss import OHEMLoss +from .pml_loss import MetricLearningLoss from .reconstruction_segmentation_loss import ReconstructionSegmentationLoss from .sigmoid_focal_loss import SigmoidFocalLoss from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss @@ -26,4 +27,5 @@ "OHEMCrossEntropyLoss", "OHEMBCEWithLogitsLoss", "FOMOLocalizationLoss", + "MetricLearningLoss", ] diff --git a/luxonis_train/attached_modules/losses/pml_loss.py b/luxonis_train/attached_modules/losses/pml_loss.py new file mode 100644 index 00000000..aacd667b --- /dev/null +++ b/luxonis_train/attached_modules/losses/pml_loss.py @@ -0,0 +1,119 @@ +import warnings + +from pytorch_metric_learning.losses import ( + AngularLoss, + ArcFaceLoss, + CircleLoss, + ContrastiveLoss, + CosFaceLoss, + CrossBatchMemory, + DynamicSoftMarginLoss, + FastAPLoss, + GeneralizedLiftedStructureLoss, + HistogramLoss, + InstanceLoss, + IntraPairVarianceLoss, + LargeMarginSoftmaxLoss, + LiftedStructureLoss, + ManifoldLoss, + MarginLoss, + MultiSimilarityLoss, + NCALoss, + NormalizedSoftmaxLoss, + NPairsLoss, + NTXentLoss, + P2SGradLoss, + PNPLoss, + ProxyAnchorLoss, + ProxyNCALoss, + RankedListLoss, + SignalToNoiseRatioContrastiveLoss, + SoftTripleLoss, + SphereFaceLoss, + SubCenterArcFaceLoss, + SupConLoss, + TripletMarginLoss, + TupletMarginLoss, +) +from torch import Tensor + +from .base_loss import BaseLoss + +# Dictionary mapping string keys to loss classes +loss_dict = { + "AngularLoss": AngularLoss, + "ArcFaceLoss": ArcFaceLoss, + "CircleLoss": CircleLoss, + "ContrastiveLoss": ContrastiveLoss, + "CosFaceLoss": CosFaceLoss, + "DynamicSoftMarginLoss": DynamicSoftMarginLoss, + "FastAPLoss": FastAPLoss, + "GeneralizedLiftedStructureLoss": GeneralizedLiftedStructureLoss, + "InstanceLoss": InstanceLoss, + "HistogramLoss": HistogramLoss, + "IntraPairVarianceLoss": IntraPairVarianceLoss, + "LargeMarginSoftmaxLoss": LargeMarginSoftmaxLoss, + "LiftedStructureLoss": LiftedStructureLoss, + "ManifoldLoss": ManifoldLoss, + "MarginLoss": MarginLoss, + "MultiSimilarityLoss": MultiSimilarityLoss, + "NCALoss": NCALoss, + "NormalizedSoftmaxLoss": NormalizedSoftmaxLoss, + "NPairsLoss": NPairsLoss, + "NTXentLoss": NTXentLoss, + "P2SGradLoss": P2SGradLoss, + "PNPLoss": PNPLoss, + "ProxyAnchorLoss": ProxyAnchorLoss, + "ProxyNCALoss": ProxyNCALoss, + "RankedListLoss": RankedListLoss, + "SignalToNoiseRatioContrastiveLoss": SignalToNoiseRatioContrastiveLoss, + "SoftTripleLoss": SoftTripleLoss, + "SphereFaceLoss": SphereFaceLoss, + "SubCenterArcFaceLoss": SubCenterArcFaceLoss, + "SupConLoss": SupConLoss, + "TripletMarginLoss": TripletMarginLoss, + "TupletMarginLoss": TupletMarginLoss, +} + + +class MetricLearningLoss(BaseLoss): + def __init__( + self, + loss_name: str, + embedding_size: int = 512, + cross_batch_memory_size=0, + loss_kwargs: dict | None = None, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + if loss_kwargs is None: + loss_kwargs = {} + self.loss_func = loss_dict[loss_name]( + **loss_kwargs + ) # Instantiate the loss object + if cross_batch_memory_size > 0: + if loss_name in CrossBatchMemory.supported_losses(): + self.loss_func = CrossBatchMemory( + self.loss_func, embedding_size=embedding_size + ) + else: + # Warn that cross_batch_memory_size is ignored + warnings.warn( + f"Cross batch memory is not supported for {loss_name}. Ignoring cross_batch_memory_size" + ) + + # self.miner_func = miner_func + + def prepare(self, inputs, labels): + embeddings = inputs["features"][0] + + IDs = labels["id"][0][:, 0] + return embeddings, IDs + + def forward(self, inputs: Tensor, target: Tensor): + # miner_output = self.miner_func(inputs, target) + + loss = self.loss_func(inputs, target) + + return loss diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py index b1dc40ea..c43f32b4 100644 --- a/luxonis_train/attached_modules/metrics/__init__.py +++ b/luxonis_train/attached_modules/metrics/__init__.py @@ -2,6 +2,7 @@ from .mean_average_precision import MeanAveragePrecision from .mean_average_precision_keypoints import MeanAveragePrecisionKeypoints from .object_keypoint_similarity import ObjectKeypointSimilarity +from .pml_metrics import ClosestIsPositiveAccuracy, MedianDistances from .torchmetrics import Accuracy, F1Score, JaccardIndex, Precision, Recall __all__ = [ @@ -14,4 +15,6 @@ "ObjectKeypointSimilarity", "Precision", "Recall", + "ClosestIsPositiveAccuracy", + "MedianDistances", ] diff --git a/luxonis_train/attached_modules/metrics/pml_metrics.py b/luxonis_train/attached_modules/metrics/pml_metrics.py new file mode 100644 index 00000000..b280742d --- /dev/null +++ b/luxonis_train/attached_modules/metrics/pml_metrics.py @@ -0,0 +1,248 @@ +import torch +from torch import Tensor + +from .base_metric import BaseMetric + +# Converted from https://omoindrot.github.io/triplet-loss#offline-and-online-triplet-mining +# to PyTorch from TensorFlow + + +def _pairwise_distances(embeddings, squared=False): + """Compute the 2D matrix of distances between all the embeddings. + + Args: + embeddings: tensor of shape (batch_size, embed_dim) + squared: Boolean. If true, output is the pairwise squared euclidean distance matrix. + If false, output is the pairwise euclidean distance matrix. + + Returns: + pairwise_distances: tensor of shape (batch_size, batch_size) + """ + # Get the dot product between all embeddings + # shape (batch_size, batch_size) + dot_product = torch.matmul(embeddings, embeddings.t()) + + # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`. + # This also provides more numerical stability (the diagonal of the result will be exactly 0). + # shape (batch_size,) + square_norm = torch.diag(dot_product) + + # Compute the pairwise distance matrix as we have: + # ||a - b||^2 = ||a||^2 - 2 + ||b||^2 + # shape (batch_size, batch_size) + distances = ( + square_norm.unsqueeze(0) - 2.0 * dot_product + square_norm.unsqueeze(1) + ) + + # Because of computation errors, some distances might be negative so we put everything >= 0.0 + distances = torch.max(distances, torch.tensor(0.0)) + + if not squared: + # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal) + # we need to add a small epsilon where distances == 0.0 + mask = (distances == 0.0).float() + distances = distances + mask * 1e-16 + + distances = torch.sqrt(distances) + + # Correct the epsilon added: set the distances on the mask to be exactly 0.0 + distances = distances * (1.0 - mask) + + return distances + + +def _get_anchor_positive_triplet_mask(labels): + indices_equal = torch.eye( + labels.shape[0], dtype=torch.uint8, device=labels.device + ) + indices_not_equal = ~indices_equal + labels_equal = labels.unsqueeze(0) == labels.unsqueeze(1) + mask = indices_not_equal & labels_equal + return mask + + +class ClosestIsPositiveAccuracy(BaseMetric): + def __init__(self, cross_batch_memory_size=0, **kwargs): + super().__init__(**kwargs) + self.cross_batch_memory_size = cross_batch_memory_size + self.add_state("cross_batch_memory", default=[], dist_reduce_fx="cat") + self.add_state( + "correct_predictions", + default=torch.tensor(0), + dist_reduce_fx="sum", + ) + self.add_state( + "total_predictions", default=torch.tensor(0), dist_reduce_fx="sum" + ) + + def prepare(self, inputs, labels): + embeddings = inputs["features"][0] + IDs = labels["id"][0][:, 0] + return embeddings, IDs + + def update(self, inputs: Tensor, target: Tensor): + embeddings, labels = inputs, target + + if self.cross_batch_memory_size > 0: + # Append embedding and labels to the memory + self.cross_batch_memory.extend(list(zip(embeddings, labels))) + + # If the memory is full, remove the oldest elements + if len(self.cross_batch_memory) > self.cross_batch_memory_size: + self.cross_batch_memory = self.cross_batch_memory[ + -self.cross_batch_memory_size : + ] + + # If the memory is not full, return + if len(self.cross_batch_memory) < self.cross_batch_memory_size: + return + + # Get the embeddings and labels from the memory + embeddings, labels = zip(*self.cross_batch_memory) + embeddings = torch.stack(embeddings) + labels = torch.stack(labels) + + # print(f"Calculating accuracy for {len(embeddings)} embeddings") + + # Get the pairwise distances between all embeddings + pairwise_distances = _pairwise_distances(embeddings) + + # Set diagonal to infinity so that the closest embedding is not the same embedding + pairwise_distances.fill_diagonal_(float("inf")) + + # Find the closest embedding for each query embedding + closest_indices = torch.argmin(pairwise_distances, dim=1) + + # Get the labels of the closest embeddings + closest_labels = labels[closest_indices] + + # Filter out embeddings that don't have both positive and negative examples + positive_mask = _get_anchor_positive_triplet_mask(labels) + num_positives = positive_mask.sum(dim=1) + has_at_least_one_positive_and_negative = (num_positives > 0) & ( + num_positives < len(labels) + ) + + # Filter embeddings, labels, and closest indices based on valid indices + filtered_labels = labels[has_at_least_one_positive_and_negative] + filtered_closest_labels = closest_labels[ + has_at_least_one_positive_and_negative + ] + + # Calculate the number of correct predictions where the closest is positive + correct_predictions = ( + filtered_labels == filtered_closest_labels + ).sum() + + # Update the metric state + self.correct_predictions += correct_predictions + self.total_predictions += len(filtered_labels) + + def compute(self): + return self.correct_predictions / self.total_predictions + + +class MedianDistances(BaseMetric): + def __init__(self, cross_batch_memory_size=0, **kwargs): + super().__init__(**kwargs) + self.cross_batch_memory_size = cross_batch_memory_size + self.add_state("cross_batch_memory", default=[], dist_reduce_fx="cat") + self.add_state("all_distances", default=[], dist_reduce_fx="cat") + self.add_state("closest_distances", default=[], dist_reduce_fx="cat") + self.add_state("positive_distances", default=[], dist_reduce_fx="cat") + self.add_state( + "closest_vs_positive_distances", default=[], dist_reduce_fx="cat" + ) + + def prepare(self, inputs, labels): + embeddings = inputs["features"][0] + IDs = labels["id"][0][:, 0] + return embeddings, IDs + + def update(self, inputs: Tensor, target: Tensor): + embeddings, labels = inputs, target + + if self.cross_batch_memory_size > 0: + # Append embedding and labels to the memory + self.cross_batch_memory.extend(list(zip(embeddings, labels))) + + # If the memory is full, remove the oldest elements + if len(self.cross_batch_memory) > self.cross_batch_memory_size: + self.cross_batch_memory = self.cross_batch_memory[ + -self.cross_batch_memory_size : + ] + + # If the memory is not full, return + if len(self.cross_batch_memory) < self.cross_batch_memory_size: + return + + # Get the embeddings and labels from the memory + embeddings, labels = zip(*self.cross_batch_memory) + embeddings = torch.stack(embeddings) + labels = torch.stack(labels) + + # Get the pairwise distances between all embeddings + pairwise_distances = _pairwise_distances(embeddings) + # Append only upper triangular part of the matrix + self.all_distances.append( + pairwise_distances[ + torch.triu(torch.ones_like(pairwise_distances), diagonal=1) + == 1 + ].flatten() + ) + + # Set diagonal to infinity so that the closest embedding is not the same embedding + pairwise_distances.fill_diagonal_(float("inf")) + + # Get the closest distance for each query embedding + closest_distances, _ = torch.min(pairwise_distances, dim=1) + self.closest_distances.append(closest_distances) + + # Get the positive mask and convert it to boolean + positive_mask = _get_anchor_positive_triplet_mask(labels).bool() + + only_positive_distances = pairwise_distances.clone() + only_positive_distances[~positive_mask] = float("inf") + + closest_positive_distances, _ = torch.min( + only_positive_distances, dim=1 + ) + + non_inf_mask = closest_positive_distances != float("inf") + difference = closest_positive_distances - closest_distances + difference = difference[non_inf_mask] + + # Update the metric state + self.closest_vs_positive_distances.append(difference) + self.positive_distances.append( + closest_positive_distances[non_inf_mask] + ) + + def compute(self): + if len(self.all_distances) == 0: + # Return NaN tensor if no distances were calculated + return { + "MedianDistance": torch.tensor(float("nan")), + "MedianClosestDistance": torch.tensor(float("nan")), + "MedianClosestPositiveDistance": torch.tensor(float("nan")), + "MedianClosestVsClosestPositiveDistance": torch.tensor( + float("nan") + ), + } + + all_distances = torch.cat(self.all_distances) + closest_distances = torch.cat(self.closest_distances) + positive_distances = torch.cat(self.positive_distances) + closest_vs_positive_distances = torch.cat( + self.closest_vs_positive_distances + ) + + # Return medians + return { + "MedianDistance": torch.median(all_distances), + "MedianClosestDistance": torch.median(closest_distances), + "MedianClosestPositiveDistance": torch.median(positive_distances), + "MedianClosestVsClosestPositiveDistance": torch.median( + closest_vs_positive_distances + ), + } diff --git a/luxonis_train/attached_modules/visualizers/__init__.py b/luxonis_train/attached_modules/visualizers/__init__.py index 50b90471..69ecc3c4 100644 --- a/luxonis_train/attached_modules/visualizers/__init__.py +++ b/luxonis_train/attached_modules/visualizers/__init__.py @@ -1,6 +1,7 @@ from .base_visualizer import BaseVisualizer from .bbox_visualizer import BBoxVisualizer from .classification_visualizer import ClassificationVisualizer +from .embeddings_visualizer import EmbeddingsVisualizer from .keypoint_visualizer import KeypointVisualizer from .multi_visualizer import MultiVisualizer from .segmentation_visualizer import SegmentationVisualizer @@ -23,6 +24,7 @@ "KeypointVisualizer", "MultiVisualizer", "SegmentationVisualizer", + "EmbeddingsVisualizer", "combine_visualizations", "draw_bounding_box_labels", "draw_keypoint_labels", diff --git a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py new file mode 100644 index 00000000..b5fb5f0e --- /dev/null +++ b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py @@ -0,0 +1,95 @@ +import logging + +from matplotlib import pyplot as plt +from sklearn.manifold import TSNE +from torch import Tensor + +from luxonis_train.utils import Labels, Packet + +from .base_visualizer import BaseVisualizer +from .utils import ( + figure_to_torch, +) + +logger = logging.getLogger(__name__) +log_disable = False + + +class EmbeddingsVisualizer(BaseVisualizer[Tensor, Tensor]): + # supported_tasks: list[TaskType] = [TaskType.LABEL] + + def __init__( + self, + **kwargs, + ): + """Visualizer for embedding tasks like reID.""" + super().__init__(**kwargs) + + def prepare( + self, inputs: Packet[Tensor], labels: Labels | None + ) -> tuple[Tensor, Tensor]: + embeddings = inputs["features"][0] + IDs = labels["id"][0] + return embeddings, IDs + + def forward( + self, + label_canvas: Tensor, + prediction_canvas: Tensor, + embeddings: Tensor, + IDs: Tensor | None, + **kwargs, + ) -> Tensor: + """Creates a visualization of the embeddings. + + @type label_canvas: Tensor + @param label_canvas: The canvas to draw the labels on. + @type prediction_canvas: Tensor + @param prediction_canvas: The canvas to draw the predictions on. + @type embeddings: Tensor + @param embeddings: The embeddings to visualize. + @type IDs: Tensor + @param IDs: The IDs to visualize. + @rtype: Tensor + @return: An embedding space projection. + """ + + # Embeddings: [B, D], D = e.g. 512 + # IDs: [B, 1], corresponding to the embeddings + + # Convert embeddings to numpy array + embeddings_np = embeddings.detach().cpu().numpy() + + # Perplexity must be less than the number of samples + perplexity = min(30, embeddings_np.shape[0] - 1) + + # Reduce dimensionality to 2D using t-SNE + tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity) + embeddings_2d = tsne.fit_transform(embeddings_np) + + # Plot the embeddings + fig, ax = plt.subplots(figsize=(10, 10)) + scatter = ax.scatter( + embeddings_2d[:, 0], + embeddings_2d[:, 1], + c=IDs.detach().cpu().numpy(), + cmap="viridis", + s=5, + ) + fig.colorbar(scatter, ax=ax) + ax.set_title("Embeddings Visualization") + ax.set_xlabel("Dimension 1") + ax.set_ylabel("Dimension 2") + + # Convert figure to tensor + image_tensor = figure_to_torch( + fig, width=label_canvas.shape[3], height=label_canvas.shape[2] + ) + + # Close the figure to free memory + plt.close(fig) + + # Add fake batch dimension + image_tensor = image_tensor.unsqueeze(0) + + return image_tensor diff --git a/luxonis_train/loaders/utils.py b/luxonis_train/loaders/utils.py index b030e218..2782500e 100644 --- a/luxonis_train/loaders/utils.py +++ b/luxonis_train/loaders/utils.py @@ -38,6 +38,7 @@ def collate_fn( TaskType.CLASSIFICATION, TaskType.SEGMENTATION, TaskType.ARRAY, + TaskType.LABEL, ]: out_labels[task] = torch.stack(annos, 0), task_type diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py index cc621625..f5319981 100644 --- a/luxonis_train/nodes/backbones/__init__.py +++ b/luxonis_train/nodes/backbones/__init__.py @@ -2,6 +2,7 @@ from .ddrnet import DDRNet from .efficientnet import EfficientNet from .efficientrep import EfficientRep +from .ghostfacenet import GhostFaceNetsV2 from .micronet import MicroNet from .mobilenetv2 import MobileNetV2 from .mobileone import MobileOne @@ -22,4 +23,5 @@ "ResNet", "DDRNet", "RecSubNet", + "GhostFaceNetsV2", ] diff --git a/luxonis_train/nodes/backbones/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet.py new file mode 100644 index 00000000..b4b17758 --- /dev/null +++ b/luxonis_train/nodes/backbones/ghostfacenet.py @@ -0,0 +1,534 @@ +# Original source: https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py + + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from luxonis_train.nodes.base_node import BaseNode + + +def _make_divisible(v, divisor, min_value=None): + """This function is taken from the original tf repo. + + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def hard_sigmoid(x, inplace: bool = False): + if inplace: + return x.add_(3.0).clamp_(0.0, 6.0).div_(6.0) + else: + return F.relu6(x + 3.0) / 6.0 + + +class SqueezeExcite(nn.Module): + def __init__( + self, + in_chs, + se_ratio=0.25, + reduced_base_chs=None, + act_layer=nn.PReLU, + gate_fn=hard_sigmoid, + divisor=4, + **_, + ): + super(SqueezeExcite, self).__init__() + self.gate_fn = gate_fn + reduced_chs = _make_divisible( + (reduced_base_chs or in_chs) * se_ratio, divisor + ) + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True) + self.act1 = act_layer() + self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True) + + def forward(self, x): + x_se = self.avg_pool(x) + x_se = self.conv_reduce(x_se) + x_se = self.act1(x_se) + x_se = self.conv_expand(x_se) + x = x * self.gate_fn(x_se) + return x + + +class ConvBnAct(nn.Module): + def __init__( + self, in_chs, out_chs, kernel_size, stride=1, act_layer=nn.PReLU + ): + super(ConvBnAct, self).__init__() + self.conv = nn.Conv2d( + in_chs, out_chs, kernel_size, stride, kernel_size // 2, bias=False + ) + self.bn1 = nn.BatchNorm2d(out_chs) + self.act1 = act_layer() + + def forward(self, x): + x = self.conv(x) + x = self.bn1(x) + x = self.act1(x) + return x + + +class ModifiedGDC(nn.Module): + def __init__( + self, image_size, in_chs, num_classes, dropout, emb=512 + ): # dropout implementation is in the original code but not in the paper + super(ModifiedGDC, self).__init__() + + if image_size % 32 == 0: + self.conv_dw = nn.Conv2d( + in_chs, + in_chs, + kernel_size=(image_size // 32), + groups=in_chs, + bias=False, + ) + else: + self.conv_dw = nn.Conv2d( + in_chs, + in_chs, + kernel_size=(image_size // 32 + 1), + groups=in_chs, + bias=False, + ) + self.bn1 = nn.BatchNorm2d(in_chs) + self.dropout = nn.Dropout(dropout) + + self.conv = nn.Conv2d(in_chs, emb, kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm1d(emb) + self.linear = ( + nn.Linear(emb, num_classes) if num_classes else nn.Identity() + ) + + def forward(self, inps): + x = inps + x = self.conv_dw(x) + x = self.bn1(x) + x = self.dropout(x) + # # Add spots to the features + # x = torch.cat([x, spots.view(spots.size(0), -1, 1, 1)], dim=1) + x = self.conv(x) + x = x.view(x.size(0), -1) # Flatten + x = self.bn2(x) + x = self.linear(x) + return x + + +class GhostModuleV2(nn.Module): + def __init__( + self, + inp, + oup, + kernel_size=1, + ratio=2, + dw_size=3, + stride=1, + prelu=True, + mode=None, + args=None, + ): + super(GhostModuleV2, self).__init__() + self.mode = mode + self.gate_fn = nn.Sigmoid() + + if self.mode in ["original"]: + self.oup = oup + init_channels = math.ceil(oup / ratio) + new_channels = init_channels * (ratio - 1) + self.primary_conv = nn.Sequential( + nn.Conv2d( + inp, + init_channels, + kernel_size, + stride, + kernel_size // 2, + bias=False, + ), + nn.BatchNorm2d(init_channels), + nn.PReLU() if prelu else nn.Sequential(), + ) + self.cheap_operation = nn.Sequential( + nn.Conv2d( + init_channels, + new_channels, + dw_size, + 1, + dw_size // 2, + groups=init_channels, + bias=False, + ), + nn.BatchNorm2d(new_channels), + nn.PReLU() if prelu else nn.Sequential(), + ) + elif self.mode in ["attn"]: # DFC + self.oup = oup + init_channels = math.ceil(oup / ratio) + new_channels = init_channels * (ratio - 1) + self.primary_conv = nn.Sequential( + nn.Conv2d( + inp, + init_channels, + kernel_size, + stride, + kernel_size // 2, + bias=False, + ), + nn.BatchNorm2d(init_channels), + nn.PReLU() if prelu else nn.Sequential(), + ) + self.cheap_operation = nn.Sequential( + nn.Conv2d( + init_channels, + new_channels, + dw_size, + 1, + dw_size // 2, + groups=init_channels, + bias=False, + ), + nn.BatchNorm2d(new_channels), + nn.PReLU() if prelu else nn.Sequential(), + ) + self.short_conv = nn.Sequential( + nn.Conv2d( + inp, oup, kernel_size, stride, kernel_size // 2, bias=False + ), + nn.BatchNorm2d(oup), + nn.Conv2d( + oup, + oup, + kernel_size=(1, 5), + stride=1, + padding=(0, 2), + groups=oup, + bias=False, + ), + nn.BatchNorm2d(oup), + nn.Conv2d( + oup, + oup, + kernel_size=(5, 1), + stride=1, + padding=(2, 0), + groups=oup, + bias=False, + ), + nn.BatchNorm2d(oup), + ) + + def forward(self, x): + if self.mode in ["original"]: + x1 = self.primary_conv(x) + x2 = self.cheap_operation(x1) + out = torch.cat([x1, x2], dim=1) + return out[:, : self.oup, :, :] + elif self.mode in ["attn"]: + res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2)) + x1 = self.primary_conv(x) + x2 = self.cheap_operation(x1) + out = torch.cat([x1, x2], dim=1) + return out[:, : self.oup, :, :] * F.interpolate( + self.gate_fn(res), + size=(out.shape[-2], out.shape[-1]), + mode="nearest", + ) + + +class GhostBottleneckV2(nn.Module): + def __init__( + self, + in_chs, + mid_chs, + out_chs, + dw_kernel_size=3, + stride=1, + act_layer=nn.PReLU, + se_ratio=0.0, + layer_id=None, + args=None, + ): + super(GhostBottleneckV2, self).__init__() + has_se = se_ratio is not None and se_ratio > 0.0 + self.stride = stride + + # Point-wise expansion + if layer_id <= 1: + self.ghost1 = GhostModuleV2( + in_chs, mid_chs, prelu=True, mode="original", args=args + ) + else: + self.ghost1 = GhostModuleV2( + in_chs, mid_chs, prelu=True, mode="attn", args=args + ) + + # Depth-wise convolution + if self.stride > 1: + self.conv_dw = nn.Conv2d( + mid_chs, + mid_chs, + dw_kernel_size, + stride=stride, + padding=(dw_kernel_size - 1) // 2, + groups=mid_chs, + bias=False, + ) + self.bn_dw = nn.BatchNorm2d(mid_chs) + + # Squeeze-and-excitation + if has_se: + self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio) + else: + self.se = None + + self.ghost2 = GhostModuleV2( + mid_chs, out_chs, prelu=False, mode="original", args=args + ) + + # shortcut + if in_chs == out_chs and self.stride == 1: + self.shortcut = nn.Sequential() + else: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_chs, + in_chs, + dw_kernel_size, + stride=stride, + padding=(dw_kernel_size - 1) // 2, + groups=in_chs, + bias=False, + ), + nn.BatchNorm2d(in_chs), + nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(out_chs), + ) + + def forward(self, x): + residual = x + x = self.ghost1(x) + if self.stride > 1: + x = self.conv_dw(x) + x = self.bn_dw(x) + if self.se is not None: + x = self.se(x) + x = self.ghost2(x) + x += self.shortcut(residual) + return x + + +# NODES.register_module() +class GhostFaceNetsV2(BaseNode[torch.Tensor, list[torch.Tensor]]): + def unwrap(self, inputs): + return [inputs[0]["features"][0]] + + def wrap(self, outputs): + return {"features": [outputs]} + + def set_export_mode(self, mode: bool = True): + self.export_mode = mode + self.train(not mode) + + def __init__( + self, + cfgs=None, + embedding_size=512, + num_classes=0, + width=1.0, + dropout=0.2, + block=GhostBottleneckV2, + add_pointwise_conv=False, + bn_momentum=0.9, + bn_epsilon=1e-5, + init_kaiming=True, + block_args=None, + *args, + **kwargs, + ): + # kwargs['_tasks'] = {TaskType.LABEL: 'features'} + super().__init__(*args, **kwargs) + + inp_shape = kwargs["input_shapes"][0]["features"][0] + # spots_shape = kwargs['input_shapes'][0]['features'][1] + + image_size = inp_shape[2] + channels = inp_shape[1] + if cfgs is None: + self.cfgs = [ + # k, t, c, SE, s + [[3, 16, 16, 0, 1]], + [[3, 48, 24, 0, 2]], + [[3, 72, 24, 0, 1]], + [[5, 72, 40, 0.25, 2]], + [[5, 120, 40, 0.25, 1]], + [[3, 240, 80, 0, 2]], + [ + [3, 200, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 480, 112, 0.25, 1], + [3, 672, 112, 0.25, 1], + ], + [[5, 672, 160, 0.25, 2]], + [ + [5, 960, 160, 0, 1], + [5, 960, 160, 0.25, 1], + [5, 960, 160, 0, 1], + [5, 960, 160, 0.25, 1], + ], + ] + else: + self.cfgs = cfgs + + # building first layer + output_channel = _make_divisible(16 * width, 4) + self.conv_stem = nn.Conv2d( + channels, output_channel, 3, 2, 1, bias=False + ) + self.bn1 = nn.BatchNorm2d(output_channel) + self.act1 = nn.PReLU() + input_channel = output_channel + + # building inverted residual blocks + stages = [] + layer_id = 0 + for cfg in self.cfgs: + layers = [] + for k, exp_size, c, se_ratio, s in cfg: + output_channel = _make_divisible(c * width, 4) + hidden_channel = _make_divisible(exp_size * width, 4) + if block == GhostBottleneckV2: + layers.append( + block( + input_channel, + hidden_channel, + output_channel, + k, + s, + se_ratio=se_ratio, + layer_id=layer_id, + args=block_args, + ) + ) + input_channel = output_channel + layer_id += 1 + stages.append(nn.Sequential(*layers)) + + output_channel = _make_divisible(exp_size * width, 4) + stages.append( + nn.Sequential(ConvBnAct(input_channel, output_channel, 1)) + ) + + self.blocks = nn.Sequential(*stages) + + # building last several layers + pointwise_conv = [] + if add_pointwise_conv: + pointwise_conv.append( + nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True) + ) + pointwise_conv.append(nn.BatchNorm2d(output_channel)) + pointwise_conv.append(nn.PReLU()) + else: + pointwise_conv.append(nn.Sequential()) + + self.pointwise_conv = nn.Sequential(*pointwise_conv) + self.classifier = ModifiedGDC( + image_size, output_channel, num_classes, dropout, embedding_size + ) + + # Initialize weights + for m in self.modules(): + if init_kaiming: + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(m.weight) + negative_slope = 0.25 # Default value for PReLU in PyTorch, change it if you use custom value + m.weight.data.normal_( + 0, math.sqrt(2.0 / (fan_in * (1 + negative_slope**2))) + ) + if isinstance(m, nn.BatchNorm2d): + m.momentum, m.eps = bn_momentum, bn_epsilon + + def forward(self, inps): + x = inps[0] + x = self.conv_stem(x) + x = self.bn1(x) + x = self.act1(x) + x = self.blocks(x) + x = self.pointwise_conv(x) + x = self.classifier(x) + return x + + # @property + # def task(self) -> str: + # return "label" + + # @property + # def tasks(self) -> dict: + # return [TaskType.LABEL] + + +if __name__ == "__main__": + W, H = 256, 256 + model = GhostFaceNetsV2(image_size=W) + model.eval() # Set the model to evaluation mode + + # Create a dummy input tensor of the appropriate size + x = torch.randn(1, 3, H, W) + + # Export the model + onnx_path = "ghostfacenet.onnx" + torch.onnx.export( + model, # model being run + x, # model input (or a tuple for multiple inputs) + onnx_path, # where to save the model (can be a file or file-like object) + export_params=True, # store the trained parameter weights inside the model file + opset_version=12, # the ONNX version to export the model to + do_constant_folding=True, # whether to execute constant folding for optimization + input_names=["input"], # the model's input names + output_names=["output"], # the model's output names + # dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes + # 'output' : {0 : 'batch_size'}} + ) + import os + + import numpy as np + import onnx + import onnxsim + + # logger.info("Simplifying ONNX model...") + model_onnx = onnx.load(onnx_path) + onnx_model, check = onnxsim.simplify(model_onnx) + if not check: + raise RuntimeError("Onnx simplify failed.") + onnx.save(onnx_model, onnx_path) + + # Add calibration data + dir = "shared_with_container/calibration_data/" + for file in os.listdir(dir): + os.remove(dir + file) + for i in range(20): + np_array = np.random.rand(1, 3, H, W).astype(np.float32) + np.save(f"{dir}{i:02d}.npy", np_array) + np_array.tofile(f"{dir}{i:02d}.raw") + + # Test backpropagation on the model + # Create a dummy target tensor of the appropriate size + Y = model(x) + target = torch.randn(1, 512) + loss_fn = torch.nn.MSELoss() + loss = loss_fn(Y, target) + model.zero_grad() + loss.backward() + print("Backpropagation test successful") diff --git a/requirements.txt b/requirements.txt index 5d0fcb28..94badc1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,5 @@ mlflow>=2.10.0 psutil>=5.0.0 tabulate>=0.9.0 grad-cam>=1.5.4 +pytorch_metric_learning>=2.7.0 +scikit-learn>=1.5.0 \ No newline at end of file diff --git a/tests/configs/reid.yaml b/tests/configs/reid.yaml new file mode 100644 index 00000000..d9c0ec11 --- /dev/null +++ b/tests/configs/reid.yaml @@ -0,0 +1,60 @@ +loader: + name: CustomReIDLoader + +model: + name: reid_test + nodes: + - name: GhostFaceNetsV2 + input_sources: + - image + params: + embedding_size: &embedding_size 512 + + losses: + - name: MetricLearningLoss + params: + loss_name: SupConLoss + embedding_size: *embedding_size + cross_batch_memory_size: &memory_size 200 + attached_to: GhostFaceNetsV2 + + metrics: + - name: ClosestIsPositiveAccuracy + params: + cross_batch_memory_size: *memory_size + attached_to: GhostFaceNetsV2 + is_main_metric: True + - name: MedianDistances + params: + cross_batch_memory_size: *memory_size + attached_to: GhostFaceNetsV2 + is_main_metric: False + + visualizers: + - name: EmbeddingsVisualizer + attached_to: GhostFaceNetsV2 + +trainer: + preprocessing: + train_image_size: [256, 256] + + batch_size: 16 + epochs: 10 + n_workers: 0 + validation_interval: 10 + + callbacks: + - name: ExportOnTrainEnd + + optimizer: + name: Adam + params: + lr: 0.01 + +tracker: + project_name: reid_example + is_tensorboard: True + +exporter: + onnx: + opset_version: 11 \ No newline at end of file diff --git a/tests/integration/test_detection.py b/tests/integration/test_detection.py index 45e83f0a..060e84e2 100644 --- a/tests/integration/test_detection.py +++ b/tests/integration/test_detection.py @@ -103,7 +103,9 @@ def train_and_test( assert value > 0.8, f"{name} = {value} (expected > 0.8)" -@pytest.mark.parametrize("backbone", BACKBONES) +@pytest.mark.parametrize( + "backbone", [b for b in BACKBONES if b != "GhostFaceNetsV2"] +) def test_backbones( backbone: str, config: dict[str, Any], diff --git a/tests/integration/test_reid.py b/tests/integration/test_reid.py new file mode 100644 index 00000000..9ed4e867 --- /dev/null +++ b/tests/integration/test_reid.py @@ -0,0 +1,91 @@ +import shutil +from pathlib import Path +from typing import Any + +import pytest +import torch + +from luxonis_train.core import LuxonisModel +from luxonis_train.enums import TaskType +from luxonis_train.loaders import BaseLoaderTorch + +from .multi_input_modules import * + +INFER_PATH = Path("tests/integration/infer-save-directory") +ONNX_PATH = Path("tests/integration/_model.onnx") +STUDY_PATH = Path("study_local.db") + + +class CustomReIDLoader(BaseLoaderTorch): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @property + def input_shapes(self): + return { + "image": torch.Size([3, 256, 256]), + "id": torch.Size([1]), + } + + def __getitem__(self, _): # pragma: no cover + # Fake data + image = torch.rand(3, 256, 256, dtype=torch.float32) + inputs = { + "image": image, + } + + # Fake labels + id = torch.randint(0, 1000, (1,), dtype=torch.int64) + labels = { + "id": (id, TaskType.LABEL), + } + + return inputs, labels + + def __len__(self): + return 10 + + def get_classes(self) -> dict[TaskType, list[str]]: + return {TaskType.LABEL: ["id"]} + + +@pytest.fixture +def infer_path() -> Path: + if INFER_PATH.exists(): + shutil.rmtree(INFER_PATH) + INFER_PATH.mkdir() + return INFER_PATH + + +@pytest.fixture +def opts(test_output_dir: Path) -> dict[str, Any]: + return { + "trainer.epochs": 1, + "trainer.batch_size": 2, + "trainer.validation_interval": 1, + "trainer.callbacks": "[]", + "tracker.save_directory": str(test_output_dir), + "tuner.n_trials": 4, + } + + +@pytest.fixture(scope="function", autouse=True) +def clear_files(): + yield + STUDY_PATH.unlink(missing_ok=True) + ONNX_PATH.unlink(missing_ok=True) + + +def test_reid(opts: dict[str, Any], infer_path: Path): + config_file = "tests/configs/reid.yaml" + model = LuxonisModel(config_file, opts) + model.train() + model.test(view="val") + + assert not ONNX_PATH.exists() + model.export(str(ONNX_PATH)) + assert ONNX_PATH.exists() + + assert len(list(infer_path.iterdir())) == 0 + model.infer(view="val", save_dir=infer_path) + assert infer_path.exists() diff --git a/tests/integration/test_segmentation.py b/tests/integration/test_segmentation.py index a8b4df91..4ab4478a 100644 --- a/tests/integration/test_segmentation.py +++ b/tests/integration/test_segmentation.py @@ -123,7 +123,9 @@ def train_and_test( assert value > 0.8, f"{name} = {value} (expected > 0.8)" -@pytest.mark.parametrize("backbone", BACKBONES) +@pytest.mark.parametrize( + "backbone", [b for b in BACKBONES if b != "GhostFaceNetsV2"] +) def test_backbones( backbone: str, config: dict[str, Any], From be4c2d23f3039496c0e220f5efea7e6f9256fbdc Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Fri, 6 Dec 2024 19:05:22 +0100 Subject: [PATCH 02/12] Add detailed docstring for GhostFaceNetsV2 backbone class --- luxonis_train/nodes/backbones/ghostfacenet.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/luxonis_train/nodes/backbones/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet.py index b4b17758..c242633f 100644 --- a/luxonis_train/nodes/backbones/ghostfacenet.py +++ b/luxonis_train/nodes/backbones/ghostfacenet.py @@ -356,6 +356,44 @@ def __init__( *args, **kwargs, ): + """GhostFaceNetsV2 backbone. + + GhostFaceNetsV2 is a convolutional neural network architecture focused on face recognition, but it is + adaptable to generic embedding tasks. It is based on the GhostNet architecture and uses Ghost BottleneckV2 blocks. + + Source: U{https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py} + + @license: U{MIT License + } + + @see: U{GhostFaceNets: Lightweight Face Recognition Model From Cheap Operations + } + + @type cfgs: list[list[list[int]]] | None + @param cfgs: List of Ghost BottleneckV2 configurations. Defaults to None, which uses the original GhostFaceNetsV2 configuration. + @type embedding_size: int + @param embedding_size: Size of the embedding. Defaults to 512. + @type num_classes: int + @param num_classes: Number of classes. Defaults to 0, which makes the network output the raw embeddings. Otherwise it can be used to + add another linear layer to the network, which is useful for training using ArcFace or similar classification-based losses that + require the user to drop the last layer of the network. + @type width: float + @param width: Width multiplier. Increases complexity and number of parameters. Defaults to 1.0. + @type dropout: float + @param dropout: Dropout rate. Defaults to 0.2. + @type block: nn.Module + @param block: Ghost BottleneckV2 block. Defaults to GhostBottleneckV2. + @type add_pointwise_conv: bool + @param add_pointwise_conv: If True, adds a pointwise convolution layer at the end of the network. Defaults to False. + @type bn_momentum: float + @param bn_momentum: Batch normalization momentum. Defaults to 0.9. + @type bn_epsilon: float + @param bn_epsilon: Batch normalization epsilon. Defaults to 1e-5. + @type init_kaiming: bool + @param init_kaiming: If True, initializes the weights using the Kaiming initialization. Defaults to True. + @type block_args: dict + @param block_args: Arguments to pass to the block. Defaults to None. + """ # kwargs['_tasks'] = {TaskType.LABEL: 'features'} super().__init__(*args, **kwargs) From c5c4f16463efc41f20898a47e65573740513b0c4 Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Fri, 6 Dec 2024 19:08:01 +0100 Subject: [PATCH 03/12] fix: update docstring for pairwise_distances function in pml_metrics.py --- .../attached_modules/metrics/pml_metrics.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/luxonis_train/attached_modules/metrics/pml_metrics.py b/luxonis_train/attached_modules/metrics/pml_metrics.py index b280742d..fdd66a41 100644 --- a/luxonis_train/attached_modules/metrics/pml_metrics.py +++ b/luxonis_train/attached_modules/metrics/pml_metrics.py @@ -10,13 +10,15 @@ def _pairwise_distances(embeddings, squared=False): """Compute the 2D matrix of distances between all the embeddings. - Args: - embeddings: tensor of shape (batch_size, embed_dim) - squared: Boolean. If true, output is the pairwise squared euclidean distance matrix. - If false, output is the pairwise euclidean distance matrix. - - Returns: - pairwise_distances: tensor of shape (batch_size, batch_size) + @param embeddings: tensor of shape (batch_size, embed_dim) + @type embeddings: torch.Tensor + @param squared: If true, output is the pairwise squared euclidean + distance matrix. If false, output is the pairwise euclidean + distance matrix. + @type squared: bool + @return: pairwise_distances: tensor of shape (batch_size, + batch_size) + @rtype: torch.Tensor """ # Get the dot product between all embeddings # shape (batch_size, batch_size) From c360f15bc953405cf1ea15c6787ced733e479e41 Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Fri, 6 Dec 2024 19:12:30 +0100 Subject: [PATCH 04/12] Fixed type errors --- .../attached_modules/losses/pml_loss.py | 3 + .../attached_modules/metrics/pml_metrics.py | 8 +++ .../visualizers/embeddings_visualizer.py | 25 +++++-- luxonis_train/nodes/backbones/ghostfacenet.py | 65 +------------------ 4 files changed, 31 insertions(+), 70 deletions(-) diff --git a/luxonis_train/attached_modules/losses/pml_loss.py b/luxonis_train/attached_modules/losses/pml_loss.py index aacd667b..1727d091 100644 --- a/luxonis_train/attached_modules/losses/pml_loss.py +++ b/luxonis_train/attached_modules/losses/pml_loss.py @@ -108,6 +108,9 @@ def __init__( def prepare(self, inputs, labels): embeddings = inputs["features"][0] + assert ( + labels is not None and "id" in labels + ), "ID labels are required for metric learning losses" IDs = labels["id"][0][:, 0] return embeddings, IDs diff --git a/luxonis_train/attached_modules/metrics/pml_metrics.py b/luxonis_train/attached_modules/metrics/pml_metrics.py index fdd66a41..a6d4effa 100644 --- a/luxonis_train/attached_modules/metrics/pml_metrics.py +++ b/luxonis_train/attached_modules/metrics/pml_metrics.py @@ -79,6 +79,10 @@ def __init__(self, cross_batch_memory_size=0, **kwargs): def prepare(self, inputs, labels): embeddings = inputs["features"][0] + + assert ( + labels is not None and "id" in labels + ), "ID labels are required for metric learning losses" IDs = labels["id"][0][:, 0] return embeddings, IDs @@ -158,6 +162,10 @@ def __init__(self, cross_batch_memory_size=0, **kwargs): def prepare(self, inputs, labels): embeddings = inputs["features"][0] + + assert ( + labels is not None and "id" in labels + ), "ID labels are required for metric learning losses" IDs = labels["id"][0][:, 0] return embeddings, IDs diff --git a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py index b5fb5f0e..d1096bfa 100644 --- a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py @@ -29,6 +29,10 @@ def prepare( self, inputs: Packet[Tensor], labels: Labels | None ) -> tuple[Tensor, Tensor]: embeddings = inputs["features"][0] + + assert ( + labels is not None and "id" in labels + ), "ID labels are required for metric learning losses" IDs = labels["id"][0] return embeddings, IDs @@ -69,13 +73,20 @@ def forward( # Plot the embeddings fig, ax = plt.subplots(figsize=(10, 10)) - scatter = ax.scatter( - embeddings_2d[:, 0], - embeddings_2d[:, 1], - c=IDs.detach().cpu().numpy(), - cmap="viridis", - s=5, - ) + if IDs is not None: + scatter = ax.scatter( + embeddings_2d[:, 0], + embeddings_2d[:, 1], + c=IDs.detach().cpu().numpy(), + cmap="viridis", + s=5, + ) + else: + scatter = ax.scatter( + embeddings_2d[:, 0], + embeddings_2d[:, 1], + s=5, + ) fig.colorbar(scatter, ax=ax) ax.set_title("Embeddings Visualization") ax.set_xlabel("Dimension 1") diff --git a/luxonis_train/nodes/backbones/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet.py index c242633f..9641596d 100644 --- a/luxonis_train/nodes/backbones/ghostfacenet.py +++ b/luxonis_train/nodes/backbones/ghostfacenet.py @@ -263,6 +263,8 @@ def __init__( has_se = se_ratio is not None and se_ratio > 0.0 self.stride = stride + assert layer_id is not None, "Layer ID must be explicitly provided" + # Point-wise expansion if layer_id <= 1: self.ghost1 = GhostModuleV2( @@ -507,66 +509,3 @@ def forward(self, inps): x = self.pointwise_conv(x) x = self.classifier(x) return x - - # @property - # def task(self) -> str: - # return "label" - - # @property - # def tasks(self) -> dict: - # return [TaskType.LABEL] - - -if __name__ == "__main__": - W, H = 256, 256 - model = GhostFaceNetsV2(image_size=W) - model.eval() # Set the model to evaluation mode - - # Create a dummy input tensor of the appropriate size - x = torch.randn(1, 3, H, W) - - # Export the model - onnx_path = "ghostfacenet.onnx" - torch.onnx.export( - model, # model being run - x, # model input (or a tuple for multiple inputs) - onnx_path, # where to save the model (can be a file or file-like object) - export_params=True, # store the trained parameter weights inside the model file - opset_version=12, # the ONNX version to export the model to - do_constant_folding=True, # whether to execute constant folding for optimization - input_names=["input"], # the model's input names - output_names=["output"], # the model's output names - # dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes - # 'output' : {0 : 'batch_size'}} - ) - import os - - import numpy as np - import onnx - import onnxsim - - # logger.info("Simplifying ONNX model...") - model_onnx = onnx.load(onnx_path) - onnx_model, check = onnxsim.simplify(model_onnx) - if not check: - raise RuntimeError("Onnx simplify failed.") - onnx.save(onnx_model, onnx_path) - - # Add calibration data - dir = "shared_with_container/calibration_data/" - for file in os.listdir(dir): - os.remove(dir + file) - for i in range(20): - np_array = np.random.rand(1, 3, H, W).astype(np.float32) - np.save(f"{dir}{i:02d}.npy", np_array) - np_array.tofile(f"{dir}{i:02d}.raw") - - # Test backpropagation on the model - # Create a dummy target tensor of the appropriate size - Y = model(x) - target = torch.randn(1, 512) - loss_fn = torch.nn.MSELoss() - loss = loss_fn(Y, target) - model.zero_grad() - loss.backward() - print("Backpropagation test successful") From 6eda12af3fc0a591b95adf498434f28aa6863c09 Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Mon, 16 Dec 2024 11:51:08 +0100 Subject: [PATCH 05/12] Implemented improvements and suggestions. Separated GFN into class, blocks and variants. Added tests for all supported pytorch metric learning losses. --- .../attached_modules/losses/__init__.py | 4 +- .../attached_modules/losses/pml_loss.py | 183 +++---- .../attached_modules/metrics/pml_metrics.py | 125 +++-- .../visualizers/embeddings_visualizer.py | 16 +- luxonis_train/nodes/backbones/__init__.py | 2 +- luxonis_train/nodes/backbones/ghostfacenet.py | 511 ------------------ .../nodes/backbones/ghostfacenet/__init__.py | 3 + .../nodes/backbones/ghostfacenet/blocks.py | 256 +++++++++ .../backbones/ghostfacenet/ghostfacenet.py | 159 ++++++ .../nodes/backbones/ghostfacenet/variants.py | 214 ++++++++ .../nodes/backbones/micronet/blocks.py | 23 +- tests/configs/reid.yaml | 2 +- tests/integration/test_reid.py | 28 +- 13 files changed, 837 insertions(+), 689 deletions(-) delete mode 100644 luxonis_train/nodes/backbones/ghostfacenet.py create mode 100644 luxonis_train/nodes/backbones/ghostfacenet/__init__.py create mode 100644 luxonis_train/nodes/backbones/ghostfacenet/blocks.py create mode 100644 luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py create mode 100644 luxonis_train/nodes/backbones/ghostfacenet/variants.py diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py index b320fada..2d0c77e1 100644 --- a/luxonis_train/attached_modules/losses/__init__.py +++ b/luxonis_train/attached_modules/losses/__init__.py @@ -7,7 +7,7 @@ from .ohem_bce_with_logits import OHEMBCEWithLogitsLoss from .ohem_cross_entropy import OHEMCrossEntropyLoss from .ohem_loss import OHEMLoss -from .pml_loss import MetricLearningLoss +from .pml_loss import EmbeddingLossWrapper from .reconstruction_segmentation_loss import ReconstructionSegmentationLoss from .sigmoid_focal_loss import SigmoidFocalLoss from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss @@ -27,5 +27,5 @@ "OHEMCrossEntropyLoss", "OHEMBCEWithLogitsLoss", "FOMOLocalizationLoss", - "MetricLearningLoss", + "EmbeddingLossWrapper", ] diff --git a/luxonis_train/attached_modules/losses/pml_loss.py b/luxonis_train/attached_modules/losses/pml_loss.py index 1727d091..959a5f68 100644 --- a/luxonis_train/attached_modules/losses/pml_loss.py +++ b/luxonis_train/attached_modules/losses/pml_loss.py @@ -1,87 +1,67 @@ -import warnings - -from pytorch_metric_learning.losses import ( - AngularLoss, - ArcFaceLoss, - CircleLoss, - ContrastiveLoss, - CosFaceLoss, - CrossBatchMemory, - DynamicSoftMarginLoss, - FastAPLoss, - GeneralizedLiftedStructureLoss, - HistogramLoss, - InstanceLoss, - IntraPairVarianceLoss, - LargeMarginSoftmaxLoss, - LiftedStructureLoss, - ManifoldLoss, - MarginLoss, - MultiSimilarityLoss, - NCALoss, - NormalizedSoftmaxLoss, - NPairsLoss, - NTXentLoss, - P2SGradLoss, - PNPLoss, - ProxyAnchorLoss, - ProxyNCALoss, - RankedListLoss, - SignalToNoiseRatioContrastiveLoss, - SoftTripleLoss, - SphereFaceLoss, - SubCenterArcFaceLoss, - SupConLoss, - TripletMarginLoss, - TupletMarginLoss, -) +import logging + +import pytorch_metric_learning.losses as pml_losses +from pytorch_metric_learning.losses import CrossBatchMemory from torch import Tensor from .base_loss import BaseLoss -# Dictionary mapping string keys to loss classes -loss_dict = { - "AngularLoss": AngularLoss, - "ArcFaceLoss": ArcFaceLoss, - "CircleLoss": CircleLoss, - "ContrastiveLoss": ContrastiveLoss, - "CosFaceLoss": CosFaceLoss, - "DynamicSoftMarginLoss": DynamicSoftMarginLoss, - "FastAPLoss": FastAPLoss, - "GeneralizedLiftedStructureLoss": GeneralizedLiftedStructureLoss, - "InstanceLoss": InstanceLoss, - "HistogramLoss": HistogramLoss, - "IntraPairVarianceLoss": IntraPairVarianceLoss, - "LargeMarginSoftmaxLoss": LargeMarginSoftmaxLoss, - "LiftedStructureLoss": LiftedStructureLoss, - "ManifoldLoss": ManifoldLoss, - "MarginLoss": MarginLoss, - "MultiSimilarityLoss": MultiSimilarityLoss, - "NCALoss": NCALoss, - "NormalizedSoftmaxLoss": NormalizedSoftmaxLoss, - "NPairsLoss": NPairsLoss, - "NTXentLoss": NTXentLoss, - "P2SGradLoss": P2SGradLoss, - "PNPLoss": PNPLoss, - "ProxyAnchorLoss": ProxyAnchorLoss, - "ProxyNCALoss": ProxyNCALoss, - "RankedListLoss": RankedListLoss, - "SignalToNoiseRatioContrastiveLoss": SignalToNoiseRatioContrastiveLoss, - "SoftTripleLoss": SoftTripleLoss, - "SphereFaceLoss": SphereFaceLoss, - "SubCenterArcFaceLoss": SubCenterArcFaceLoss, - "SupConLoss": SupConLoss, - "TripletMarginLoss": TripletMarginLoss, - "TupletMarginLoss": TupletMarginLoss, -} - - -class MetricLearningLoss(BaseLoss): +logger = logging.getLogger(__name__) + +ALL_EMBEDDING_LOSSES = [ + "AngularLoss", + "ArcFaceLoss", + "CircleLoss", + "ContrastiveLoss", + "CosFaceLoss", + "DynamicSoftMarginLoss", + "FastAPLoss", + "HistogramLoss", + "InstanceLoss", + "IntraPairVarianceLoss", + "LargeMarginSoftmaxLoss", + "GeneralizedLiftedStructureLoss", + "LiftedStructureLoss", + "MarginLoss", + "MultiSimilarityLoss", + "NPairsLoss", + "NCALoss", + "NormalizedSoftmaxLoss", + "NTXentLoss", + "PNPLoss", + "ProxyAnchorLoss", + "ProxyNCALoss", + "RankedListLoss", + "SignalToNoiseRatioContrastiveLoss", + "SoftTripleLoss", + "SphereFaceLoss", + "SubCenterArcFaceLoss", + "SupConLoss", + "ThresholdConsistentMarginLoss", + "TripletMarginLoss", + "TupletMarginLoss", +] + +CLASS_EMBEDDING_LOSSES = [ + "ArcFaceLoss", + "CosFaceLoss", + "LargeMarginSoftmaxLoss", + "NormalizedSoftmaxLoss", + "ProxyAnchorLoss", + "ProxyNCALoss", + "SoftTripleLoss", + "SphereFaceLoss", + "SubCenterArcFaceLoss", +] + + +class EmbeddingLossWrapper(BaseLoss): def __init__( self, loss_name: str, embedding_size: int = 512, cross_batch_memory_size=0, + num_classes: int = 0, loss_kwargs: dict | None = None, *args, **kwargs, @@ -89,34 +69,51 @@ def __init__( super().__init__(*args, **kwargs) if loss_kwargs is None: loss_kwargs = {} - self.loss_func = loss_dict[loss_name]( - **loss_kwargs - ) # Instantiate the loss object + + try: + loss_cls = getattr(pml_losses, loss_name) + except AttributeError as e: + raise ValueError( + f"Loss {loss_name} not found in pytorch_metric_learning" + ) from e + + if loss_name in CLASS_EMBEDDING_LOSSES: + if num_classes < 0: + raise ValueError( + f"Loss {loss_name} requires num_classes to be set to a positive value" + ) + loss_kwargs["num_classes"] = num_classes + loss_kwargs["embedding_size"] = embedding_size + + # If we wanted to support these losses, we would need to add a separate optimizer for them. + # They may be useful in some scenarios, so leaving this here for future reference. + raise ValueError( + f"Loss {loss_name} requires its own optimizer, and that is not currently supported." + ) + + self.loss_func = loss_cls(**loss_kwargs) + if cross_batch_memory_size > 0: if loss_name in CrossBatchMemory.supported_losses(): self.loss_func = CrossBatchMemory( self.loss_func, embedding_size=embedding_size ) else: - # Warn that cross_batch_memory_size is ignored - warnings.warn( - f"Cross batch memory is not supported for {loss_name}. Ignoring cross_batch_memory_size" + logger.warning( + f"Cross batch memory is not supported for {loss_name}. Ignoring cross_batch_memory_size." ) - # self.miner_func = miner_func - - def prepare(self, inputs, labels): - embeddings = inputs["features"][0] + def prepare( + self, inputs: dict[str, list[Tensor]], labels: dict[str, list[Tensor]] + ) -> tuple[Tensor, Tensor]: + embeddings = self.get_input_tensors(inputs, "features")[0] - assert ( - labels is not None and "id" in labels - ), "ID labels are required for metric learning losses" - IDs = labels["id"][0][:, 0] - return embeddings, IDs + if labels is None or "id" not in labels: + raise ValueError("Labels must contain 'id' key") - def forward(self, inputs: Tensor, target: Tensor): - # miner_output = self.miner_func(inputs, target) + ids = labels["id"][0][:, 0] + return embeddings, ids + def forward(self, inputs: Tensor, target: Tensor) -> Tensor: loss = self.loss_func(inputs, target) - return loss diff --git a/luxonis_train/attached_modules/metrics/pml_metrics.py b/luxonis_train/attached_modules/metrics/pml_metrics.py index a6d4effa..ad8b0d88 100644 --- a/luxonis_train/attached_modules/metrics/pml_metrics.py +++ b/luxonis_train/attached_modules/metrics/pml_metrics.py @@ -7,62 +7,6 @@ # to PyTorch from TensorFlow -def _pairwise_distances(embeddings, squared=False): - """Compute the 2D matrix of distances between all the embeddings. - - @param embeddings: tensor of shape (batch_size, embed_dim) - @type embeddings: torch.Tensor - @param squared: If true, output is the pairwise squared euclidean - distance matrix. If false, output is the pairwise euclidean - distance matrix. - @type squared: bool - @return: pairwise_distances: tensor of shape (batch_size, - batch_size) - @rtype: torch.Tensor - """ - # Get the dot product between all embeddings - # shape (batch_size, batch_size) - dot_product = torch.matmul(embeddings, embeddings.t()) - - # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`. - # This also provides more numerical stability (the diagonal of the result will be exactly 0). - # shape (batch_size,) - square_norm = torch.diag(dot_product) - - # Compute the pairwise distance matrix as we have: - # ||a - b||^2 = ||a||^2 - 2 + ||b||^2 - # shape (batch_size, batch_size) - distances = ( - square_norm.unsqueeze(0) - 2.0 * dot_product + square_norm.unsqueeze(1) - ) - - # Because of computation errors, some distances might be negative so we put everything >= 0.0 - distances = torch.max(distances, torch.tensor(0.0)) - - if not squared: - # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal) - # we need to add a small epsilon where distances == 0.0 - mask = (distances == 0.0).float() - distances = distances + mask * 1e-16 - - distances = torch.sqrt(distances) - - # Correct the epsilon added: set the distances on the mask to be exactly 0.0 - distances = distances * (1.0 - mask) - - return distances - - -def _get_anchor_positive_triplet_mask(labels): - indices_equal = torch.eye( - labels.shape[0], dtype=torch.uint8, device=labels.device - ) - indices_not_equal = ~indices_equal - labels_equal = labels.unsqueeze(0) == labels.unsqueeze(1) - mask = indices_not_equal & labels_equal - return mask - - class ClosestIsPositiveAccuracy(BaseMetric): def __init__(self, cross_batch_memory_size=0, **kwargs): super().__init__(**kwargs) @@ -83,8 +27,8 @@ def prepare(self, inputs, labels): assert ( labels is not None and "id" in labels ), "ID labels are required for metric learning losses" - IDs = labels["id"][0][:, 0] - return embeddings, IDs + ids = labels["id"][0][:, 0] + return embeddings, ids def update(self, inputs: Tensor, target: Tensor): embeddings, labels = inputs, target @@ -166,8 +110,8 @@ def prepare(self, inputs, labels): assert ( labels is not None and "id" in labels ), "ID labels are required for metric learning losses" - IDs = labels["id"][0][:, 0] - return embeddings, IDs + ids = labels["id"][0][:, 0] + return embeddings, ids def update(self, inputs: Tensor, target: Tensor): embeddings, labels = inputs, target @@ -211,13 +155,18 @@ def update(self, inputs: Tensor, target: Tensor): # Get the positive mask and convert it to boolean positive_mask = _get_anchor_positive_triplet_mask(labels).bool() + # Filter out distances to negative elements w.r.t. each query embedding only_positive_distances = pairwise_distances.clone() only_positive_distances[~positive_mask] = float("inf") + # From the positive distances, get the closest positive distance for each query embedding closest_positive_distances, _ = torch.min( only_positive_distances, dim=1 ) + # Calculate the difference between the closest distance (any) and closest positive distances + # - this tells us how much closer should the closest positive be in order for the embedding + # to be considered correct non_inf_mask = closest_positive_distances != float("inf") difference = closest_positive_distances - closest_distances difference = difference[non_inf_mask] @@ -256,3 +205,59 @@ def compute(self): closest_vs_positive_distances ), } + + +def _pairwise_distances(embeddings, squared=False): + """Compute the 2D matrix of distances between all the embeddings. + + @param embeddings: tensor of shape (batch_size, embed_dim) + @type embeddings: torch.Tensor + @param squared: If true, output is the pairwise squared euclidean + distance matrix. If false, output is the pairwise euclidean + distance matrix. + @type squared: bool + @return: pairwise_distances: tensor of shape (batch_size, + batch_size) + @rtype: torch.Tensor + """ + # Get the dot product between all embeddings + # shape (batch_size, batch_size) + dot_product = torch.matmul(embeddings, embeddings.t()) + + # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`. + # This also provides more numerical stability (the diagonal of the result will be exactly 0). + # shape (batch_size,) + square_norm = torch.diag(dot_product) + + # Compute the pairwise distance matrix as we have: + # ||a - b||^2 = ||a||^2 - 2 + ||b||^2 + # shape (batch_size, batch_size) + distances = ( + square_norm.unsqueeze(0) - 2.0 * dot_product + square_norm.unsqueeze(1) + ) + + # Because of computation errors, some distances might be negative so we put everything >= 0.0 + distances = torch.max(distances, torch.tensor(0.0)) + + if not squared: + # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal) + # we need to add a small epsilon where distances == 0.0 + mask = (distances == 0.0).float() + distances = distances + mask * 1e-16 + + distances = torch.sqrt(distances) + + # Correct the epsilon added: set the distances on the mask to be exactly 0.0 + distances = distances * (1.0 - mask) + + return distances + + +def _get_anchor_positive_triplet_mask(labels): + indices_equal = torch.eye( + labels.shape[0], dtype=torch.uint8, device=labels.device + ) + indices_not_equal = ~indices_equal + labels_equal = labels.unsqueeze(0) == labels.unsqueeze(1) + mask = indices_not_equal & labels_equal + return mask diff --git a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py index d1096bfa..d8e5c940 100644 --- a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py @@ -33,15 +33,15 @@ def prepare( assert ( labels is not None and "id" in labels ), "ID labels are required for metric learning losses" - IDs = labels["id"][0] - return embeddings, IDs + ids = labels["id"][0] + return embeddings, ids def forward( self, label_canvas: Tensor, prediction_canvas: Tensor, embeddings: Tensor, - IDs: Tensor | None, + ids: Tensor | None, **kwargs, ) -> Tensor: """Creates a visualization of the embeddings. @@ -52,14 +52,14 @@ def forward( @param prediction_canvas: The canvas to draw the predictions on. @type embeddings: Tensor @param embeddings: The embeddings to visualize. - @type IDs: Tensor - @param IDs: The IDs to visualize. + @type ids: Tensor + @param ids: The ids to visualize. @rtype: Tensor @return: An embedding space projection. """ # Embeddings: [B, D], D = e.g. 512 - # IDs: [B, 1], corresponding to the embeddings + # ids: [B, 1], corresponding to the embeddings # Convert embeddings to numpy array embeddings_np = embeddings.detach().cpu().numpy() @@ -73,11 +73,11 @@ def forward( # Plot the embeddings fig, ax = plt.subplots(figsize=(10, 10)) - if IDs is not None: + if ids is not None: scatter = ax.scatter( embeddings_2d[:, 0], embeddings_2d[:, 1], - c=IDs.detach().cpu().numpy(), + c=ids.detach().cpu().numpy(), cmap="viridis", s=5, ) diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py index f5319981..da063a5e 100644 --- a/luxonis_train/nodes/backbones/__init__.py +++ b/luxonis_train/nodes/backbones/__init__.py @@ -2,7 +2,7 @@ from .ddrnet import DDRNet from .efficientnet import EfficientNet from .efficientrep import EfficientRep -from .ghostfacenet import GhostFaceNetsV2 +from .ghostfacenet.ghostfacenet import GhostFaceNetsV2 from .micronet import MicroNet from .mobilenetv2 import MobileNetV2 from .mobileone import MobileOne diff --git a/luxonis_train/nodes/backbones/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet.py deleted file mode 100644 index 9641596d..00000000 --- a/luxonis_train/nodes/backbones/ghostfacenet.py +++ /dev/null @@ -1,511 +0,0 @@ -# Original source: https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py - - -import math - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from luxonis_train.nodes.base_node import BaseNode - - -def _make_divisible(v, divisor, min_value=None): - """This function is taken from the original tf repo. - - It ensures that all layers have a channel number that is divisible by 8 - It can be seen here: - https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py - """ - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -def hard_sigmoid(x, inplace: bool = False): - if inplace: - return x.add_(3.0).clamp_(0.0, 6.0).div_(6.0) - else: - return F.relu6(x + 3.0) / 6.0 - - -class SqueezeExcite(nn.Module): - def __init__( - self, - in_chs, - se_ratio=0.25, - reduced_base_chs=None, - act_layer=nn.PReLU, - gate_fn=hard_sigmoid, - divisor=4, - **_, - ): - super(SqueezeExcite, self).__init__() - self.gate_fn = gate_fn - reduced_chs = _make_divisible( - (reduced_base_chs or in_chs) * se_ratio, divisor - ) - self.avg_pool = nn.AdaptiveAvgPool2d(1) - self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True) - self.act1 = act_layer() - self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True) - - def forward(self, x): - x_se = self.avg_pool(x) - x_se = self.conv_reduce(x_se) - x_se = self.act1(x_se) - x_se = self.conv_expand(x_se) - x = x * self.gate_fn(x_se) - return x - - -class ConvBnAct(nn.Module): - def __init__( - self, in_chs, out_chs, kernel_size, stride=1, act_layer=nn.PReLU - ): - super(ConvBnAct, self).__init__() - self.conv = nn.Conv2d( - in_chs, out_chs, kernel_size, stride, kernel_size // 2, bias=False - ) - self.bn1 = nn.BatchNorm2d(out_chs) - self.act1 = act_layer() - - def forward(self, x): - x = self.conv(x) - x = self.bn1(x) - x = self.act1(x) - return x - - -class ModifiedGDC(nn.Module): - def __init__( - self, image_size, in_chs, num_classes, dropout, emb=512 - ): # dropout implementation is in the original code but not in the paper - super(ModifiedGDC, self).__init__() - - if image_size % 32 == 0: - self.conv_dw = nn.Conv2d( - in_chs, - in_chs, - kernel_size=(image_size // 32), - groups=in_chs, - bias=False, - ) - else: - self.conv_dw = nn.Conv2d( - in_chs, - in_chs, - kernel_size=(image_size // 32 + 1), - groups=in_chs, - bias=False, - ) - self.bn1 = nn.BatchNorm2d(in_chs) - self.dropout = nn.Dropout(dropout) - - self.conv = nn.Conv2d(in_chs, emb, kernel_size=1, bias=False) - self.bn2 = nn.BatchNorm1d(emb) - self.linear = ( - nn.Linear(emb, num_classes) if num_classes else nn.Identity() - ) - - def forward(self, inps): - x = inps - x = self.conv_dw(x) - x = self.bn1(x) - x = self.dropout(x) - # # Add spots to the features - # x = torch.cat([x, spots.view(spots.size(0), -1, 1, 1)], dim=1) - x = self.conv(x) - x = x.view(x.size(0), -1) # Flatten - x = self.bn2(x) - x = self.linear(x) - return x - - -class GhostModuleV2(nn.Module): - def __init__( - self, - inp, - oup, - kernel_size=1, - ratio=2, - dw_size=3, - stride=1, - prelu=True, - mode=None, - args=None, - ): - super(GhostModuleV2, self).__init__() - self.mode = mode - self.gate_fn = nn.Sigmoid() - - if self.mode in ["original"]: - self.oup = oup - init_channels = math.ceil(oup / ratio) - new_channels = init_channels * (ratio - 1) - self.primary_conv = nn.Sequential( - nn.Conv2d( - inp, - init_channels, - kernel_size, - stride, - kernel_size // 2, - bias=False, - ), - nn.BatchNorm2d(init_channels), - nn.PReLU() if prelu else nn.Sequential(), - ) - self.cheap_operation = nn.Sequential( - nn.Conv2d( - init_channels, - new_channels, - dw_size, - 1, - dw_size // 2, - groups=init_channels, - bias=False, - ), - nn.BatchNorm2d(new_channels), - nn.PReLU() if prelu else nn.Sequential(), - ) - elif self.mode in ["attn"]: # DFC - self.oup = oup - init_channels = math.ceil(oup / ratio) - new_channels = init_channels * (ratio - 1) - self.primary_conv = nn.Sequential( - nn.Conv2d( - inp, - init_channels, - kernel_size, - stride, - kernel_size // 2, - bias=False, - ), - nn.BatchNorm2d(init_channels), - nn.PReLU() if prelu else nn.Sequential(), - ) - self.cheap_operation = nn.Sequential( - nn.Conv2d( - init_channels, - new_channels, - dw_size, - 1, - dw_size // 2, - groups=init_channels, - bias=False, - ), - nn.BatchNorm2d(new_channels), - nn.PReLU() if prelu else nn.Sequential(), - ) - self.short_conv = nn.Sequential( - nn.Conv2d( - inp, oup, kernel_size, stride, kernel_size // 2, bias=False - ), - nn.BatchNorm2d(oup), - nn.Conv2d( - oup, - oup, - kernel_size=(1, 5), - stride=1, - padding=(0, 2), - groups=oup, - bias=False, - ), - nn.BatchNorm2d(oup), - nn.Conv2d( - oup, - oup, - kernel_size=(5, 1), - stride=1, - padding=(2, 0), - groups=oup, - bias=False, - ), - nn.BatchNorm2d(oup), - ) - - def forward(self, x): - if self.mode in ["original"]: - x1 = self.primary_conv(x) - x2 = self.cheap_operation(x1) - out = torch.cat([x1, x2], dim=1) - return out[:, : self.oup, :, :] - elif self.mode in ["attn"]: - res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2)) - x1 = self.primary_conv(x) - x2 = self.cheap_operation(x1) - out = torch.cat([x1, x2], dim=1) - return out[:, : self.oup, :, :] * F.interpolate( - self.gate_fn(res), - size=(out.shape[-2], out.shape[-1]), - mode="nearest", - ) - - -class GhostBottleneckV2(nn.Module): - def __init__( - self, - in_chs, - mid_chs, - out_chs, - dw_kernel_size=3, - stride=1, - act_layer=nn.PReLU, - se_ratio=0.0, - layer_id=None, - args=None, - ): - super(GhostBottleneckV2, self).__init__() - has_se = se_ratio is not None and se_ratio > 0.0 - self.stride = stride - - assert layer_id is not None, "Layer ID must be explicitly provided" - - # Point-wise expansion - if layer_id <= 1: - self.ghost1 = GhostModuleV2( - in_chs, mid_chs, prelu=True, mode="original", args=args - ) - else: - self.ghost1 = GhostModuleV2( - in_chs, mid_chs, prelu=True, mode="attn", args=args - ) - - # Depth-wise convolution - if self.stride > 1: - self.conv_dw = nn.Conv2d( - mid_chs, - mid_chs, - dw_kernel_size, - stride=stride, - padding=(dw_kernel_size - 1) // 2, - groups=mid_chs, - bias=False, - ) - self.bn_dw = nn.BatchNorm2d(mid_chs) - - # Squeeze-and-excitation - if has_se: - self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio) - else: - self.se = None - - self.ghost2 = GhostModuleV2( - mid_chs, out_chs, prelu=False, mode="original", args=args - ) - - # shortcut - if in_chs == out_chs and self.stride == 1: - self.shortcut = nn.Sequential() - else: - self.shortcut = nn.Sequential( - nn.Conv2d( - in_chs, - in_chs, - dw_kernel_size, - stride=stride, - padding=(dw_kernel_size - 1) // 2, - groups=in_chs, - bias=False, - ), - nn.BatchNorm2d(in_chs), - nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(out_chs), - ) - - def forward(self, x): - residual = x - x = self.ghost1(x) - if self.stride > 1: - x = self.conv_dw(x) - x = self.bn_dw(x) - if self.se is not None: - x = self.se(x) - x = self.ghost2(x) - x += self.shortcut(residual) - return x - - -# NODES.register_module() -class GhostFaceNetsV2(BaseNode[torch.Tensor, list[torch.Tensor]]): - def unwrap(self, inputs): - return [inputs[0]["features"][0]] - - def wrap(self, outputs): - return {"features": [outputs]} - - def set_export_mode(self, mode: bool = True): - self.export_mode = mode - self.train(not mode) - - def __init__( - self, - cfgs=None, - embedding_size=512, - num_classes=0, - width=1.0, - dropout=0.2, - block=GhostBottleneckV2, - add_pointwise_conv=False, - bn_momentum=0.9, - bn_epsilon=1e-5, - init_kaiming=True, - block_args=None, - *args, - **kwargs, - ): - """GhostFaceNetsV2 backbone. - - GhostFaceNetsV2 is a convolutional neural network architecture focused on face recognition, but it is - adaptable to generic embedding tasks. It is based on the GhostNet architecture and uses Ghost BottleneckV2 blocks. - - Source: U{https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py} - - @license: U{MIT License - } - - @see: U{GhostFaceNets: Lightweight Face Recognition Model From Cheap Operations - } - - @type cfgs: list[list[list[int]]] | None - @param cfgs: List of Ghost BottleneckV2 configurations. Defaults to None, which uses the original GhostFaceNetsV2 configuration. - @type embedding_size: int - @param embedding_size: Size of the embedding. Defaults to 512. - @type num_classes: int - @param num_classes: Number of classes. Defaults to 0, which makes the network output the raw embeddings. Otherwise it can be used to - add another linear layer to the network, which is useful for training using ArcFace or similar classification-based losses that - require the user to drop the last layer of the network. - @type width: float - @param width: Width multiplier. Increases complexity and number of parameters. Defaults to 1.0. - @type dropout: float - @param dropout: Dropout rate. Defaults to 0.2. - @type block: nn.Module - @param block: Ghost BottleneckV2 block. Defaults to GhostBottleneckV2. - @type add_pointwise_conv: bool - @param add_pointwise_conv: If True, adds a pointwise convolution layer at the end of the network. Defaults to False. - @type bn_momentum: float - @param bn_momentum: Batch normalization momentum. Defaults to 0.9. - @type bn_epsilon: float - @param bn_epsilon: Batch normalization epsilon. Defaults to 1e-5. - @type init_kaiming: bool - @param init_kaiming: If True, initializes the weights using the Kaiming initialization. Defaults to True. - @type block_args: dict - @param block_args: Arguments to pass to the block. Defaults to None. - """ - # kwargs['_tasks'] = {TaskType.LABEL: 'features'} - super().__init__(*args, **kwargs) - - inp_shape = kwargs["input_shapes"][0]["features"][0] - # spots_shape = kwargs['input_shapes'][0]['features'][1] - - image_size = inp_shape[2] - channels = inp_shape[1] - if cfgs is None: - self.cfgs = [ - # k, t, c, SE, s - [[3, 16, 16, 0, 1]], - [[3, 48, 24, 0, 2]], - [[3, 72, 24, 0, 1]], - [[5, 72, 40, 0.25, 2]], - [[5, 120, 40, 0.25, 1]], - [[3, 240, 80, 0, 2]], - [ - [3, 200, 80, 0, 1], - [3, 184, 80, 0, 1], - [3, 184, 80, 0, 1], - [3, 480, 112, 0.25, 1], - [3, 672, 112, 0.25, 1], - ], - [[5, 672, 160, 0.25, 2]], - [ - [5, 960, 160, 0, 1], - [5, 960, 160, 0.25, 1], - [5, 960, 160, 0, 1], - [5, 960, 160, 0.25, 1], - ], - ] - else: - self.cfgs = cfgs - - # building first layer - output_channel = _make_divisible(16 * width, 4) - self.conv_stem = nn.Conv2d( - channels, output_channel, 3, 2, 1, bias=False - ) - self.bn1 = nn.BatchNorm2d(output_channel) - self.act1 = nn.PReLU() - input_channel = output_channel - - # building inverted residual blocks - stages = [] - layer_id = 0 - for cfg in self.cfgs: - layers = [] - for k, exp_size, c, se_ratio, s in cfg: - output_channel = _make_divisible(c * width, 4) - hidden_channel = _make_divisible(exp_size * width, 4) - if block == GhostBottleneckV2: - layers.append( - block( - input_channel, - hidden_channel, - output_channel, - k, - s, - se_ratio=se_ratio, - layer_id=layer_id, - args=block_args, - ) - ) - input_channel = output_channel - layer_id += 1 - stages.append(nn.Sequential(*layers)) - - output_channel = _make_divisible(exp_size * width, 4) - stages.append( - nn.Sequential(ConvBnAct(input_channel, output_channel, 1)) - ) - - self.blocks = nn.Sequential(*stages) - - # building last several layers - pointwise_conv = [] - if add_pointwise_conv: - pointwise_conv.append( - nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True) - ) - pointwise_conv.append(nn.BatchNorm2d(output_channel)) - pointwise_conv.append(nn.PReLU()) - else: - pointwise_conv.append(nn.Sequential()) - - self.pointwise_conv = nn.Sequential(*pointwise_conv) - self.classifier = ModifiedGDC( - image_size, output_channel, num_classes, dropout, embedding_size - ) - - # Initialize weights - for m in self.modules(): - if init_kaiming: - if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): - fan_in, _ = nn.init._calculate_fan_in_and_fan_out(m.weight) - negative_slope = 0.25 # Default value for PReLU in PyTorch, change it if you use custom value - m.weight.data.normal_( - 0, math.sqrt(2.0 / (fan_in * (1 + negative_slope**2))) - ) - if isinstance(m, nn.BatchNorm2d): - m.momentum, m.eps = bn_momentum, bn_epsilon - - def forward(self, inps): - x = inps[0] - x = self.conv_stem(x) - x = self.bn1(x) - x = self.act1(x) - x = self.blocks(x) - x = self.pointwise_conv(x) - x = self.classifier(x) - return x diff --git a/luxonis_train/nodes/backbones/ghostfacenet/__init__.py b/luxonis_train/nodes/backbones/ghostfacenet/__init__.py new file mode 100644 index 00000000..85ed4447 --- /dev/null +++ b/luxonis_train/nodes/backbones/ghostfacenet/__init__.py @@ -0,0 +1,3 @@ +from .ghostfacenet import GhostFaceNetsV2 + +__all__ = ["GhostFaceNetsV2"] diff --git a/luxonis_train/nodes/backbones/ghostfacenet/blocks.py b/luxonis_train/nodes/backbones/ghostfacenet/blocks.py new file mode 100644 index 00000000..46a9ba27 --- /dev/null +++ b/luxonis_train/nodes/backbones/ghostfacenet/blocks.py @@ -0,0 +1,256 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from luxonis_train.nodes.backbones.micronet.blocks import _make_divisible +from luxonis_train.nodes.blocks import SqueezeExciteBlock + + +class ModifiedGDC(nn.Module): + def __init__(self, image_size, in_chs, num_classes, dropout, emb=512): + super().__init__() + + if image_size % 32 == 0: + self.conv_dw = nn.Conv2d( + in_chs, + in_chs, + kernel_size=(image_size // 32), + groups=in_chs, + bias=False, + ) + else: + self.conv_dw = nn.Conv2d( + in_chs, + in_chs, + kernel_size=(image_size // 32 + 1), + groups=in_chs, + bias=False, + ) + self.bn1 = nn.BatchNorm2d(in_chs) + self.dropout = nn.Dropout(dropout) + + self.conv = nn.Conv2d(in_chs, emb, kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm1d(emb) + self.linear = ( + nn.Linear(emb, num_classes) if num_classes else nn.Identity() + ) + + def forward(self, inps): + x = inps + x = self.conv_dw(x) + x = self.bn1(x) + x = self.dropout(x) + x = self.conv(x) + x = x.view(x.size(0), -1) + x = self.bn2(x) + x = self.linear(x) + return x + + +class GhostModuleV2(nn.Module): + def __init__( + self, + inp, + oup, + kernel_size=1, + ratio=2, + dw_size=3, + stride=1, + prelu=True, + mode=None, + args=None, + ): + super(GhostModuleV2, self).__init__() + self.mode = mode + self.gate_fn = nn.Sigmoid() + + if self.mode in ["original"]: + self.oup = oup + init_channels = math.ceil(oup / ratio) + new_channels = init_channels * (ratio - 1) + self.primary_conv = nn.Sequential( + nn.Conv2d( + inp, + init_channels, + kernel_size, + stride, + kernel_size // 2, + bias=False, + ), + nn.BatchNorm2d(init_channels), + nn.PReLU() if prelu else nn.Sequential(), + ) + self.cheap_operation = nn.Sequential( + nn.Conv2d( + init_channels, + new_channels, + dw_size, + 1, + dw_size // 2, + groups=init_channels, + bias=False, + ), + nn.BatchNorm2d(new_channels), + nn.PReLU() if prelu else nn.Sequential(), + ) + elif self.mode in ["attn"]: # DFC + self.oup = oup + init_channels = math.ceil(oup / ratio) + new_channels = init_channels * (ratio - 1) + self.primary_conv = nn.Sequential( + nn.Conv2d( + inp, + init_channels, + kernel_size, + stride, + kernel_size // 2, + bias=False, + ), + nn.BatchNorm2d(init_channels), + nn.PReLU() if prelu else nn.Sequential(), + ) + self.cheap_operation = nn.Sequential( + nn.Conv2d( + init_channels, + new_channels, + dw_size, + 1, + dw_size // 2, + groups=init_channels, + bias=False, + ), + nn.BatchNorm2d(new_channels), + nn.PReLU() if prelu else nn.Sequential(), + ) + self.short_conv = nn.Sequential( + nn.Conv2d( + inp, oup, kernel_size, stride, kernel_size // 2, bias=False + ), + nn.BatchNorm2d(oup), + nn.Conv2d( + oup, + oup, + kernel_size=(1, 5), + stride=1, + padding=(0, 2), + groups=oup, + bias=False, + ), + nn.BatchNorm2d(oup), + nn.Conv2d( + oup, + oup, + kernel_size=(5, 1), + stride=1, + padding=(2, 0), + groups=oup, + bias=False, + ), + nn.BatchNorm2d(oup), + ) + + def forward(self, x): + if self.mode in ["original"]: + x1 = self.primary_conv(x) + x2 = self.cheap_operation(x1) + out = torch.cat([x1, x2], dim=1) + return out[:, : self.oup, :, :] + elif self.mode in ["attn"]: + res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2)) + x1 = self.primary_conv(x) + x2 = self.cheap_operation(x1) + out = torch.cat([x1, x2], dim=1) + return out[:, : self.oup, :, :] * F.interpolate( + self.gate_fn(res), + size=(out.shape[-2], out.shape[-1]), + mode="nearest", + ) + + +class GhostBottleneckV2(nn.Module): + def __init__( + self, + in_chs, + mid_chs, + out_chs, + dw_kernel_size=3, + stride=1, + act_layer=nn.PReLU, + se_ratio=0.0, + layer_id=None, + args=None, + ): + super(GhostBottleneckV2, self).__init__() + has_se = se_ratio is not None and se_ratio > 0.0 + self.stride = stride + + assert layer_id is not None, "Layer ID must be explicitly provided" + + # Point-wise expansion + if layer_id <= 1: + self.ghost1 = GhostModuleV2( + in_chs, mid_chs, prelu=True, mode="original", args=args + ) + else: + self.ghost1 = GhostModuleV2( + in_chs, mid_chs, prelu=True, mode="attn", args=args + ) + + # Depth-wise convolution + if self.stride > 1: + self.conv_dw = nn.Conv2d( + mid_chs, + mid_chs, + dw_kernel_size, + stride=stride, + padding=(dw_kernel_size - 1) // 2, + groups=mid_chs, + bias=False, + ) + self.bn_dw = nn.BatchNorm2d(mid_chs) + + # Squeeze-and-excitation + if has_se: + reduced_chs = _make_divisible(mid_chs * se_ratio, 4) + self.se = SqueezeExciteBlock( + mid_chs, reduced_chs, True, activation=nn.PReLU() + ) + else: + self.se = None + + self.ghost2 = GhostModuleV2( + mid_chs, out_chs, prelu=False, mode="original", args=args + ) + + # shortcut + if in_chs == out_chs and self.stride == 1: + self.shortcut = nn.Sequential() + else: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_chs, + in_chs, + dw_kernel_size, + stride=stride, + padding=(dw_kernel_size - 1) // 2, + groups=in_chs, + bias=False, + ), + nn.BatchNorm2d(in_chs), + nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(out_chs), + ) + + def forward(self, x): + residual = x + x = self.ghost1(x) + if self.stride > 1: + x = self.conv_dw(x) + x = self.bn_dw(x) + if self.se is not None: + x = self.se(x) + x = self.ghost2(x) + x += self.shortcut(residual) + return x diff --git a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py new file mode 100644 index 00000000..8bb61fee --- /dev/null +++ b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py @@ -0,0 +1,159 @@ +# Original source: https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py +import math +from typing import Literal + +import torch +import torch.nn as nn + +from luxonis_train.nodes.backbones.ghostfacenet.blocks import ( + GhostBottleneckV2, + ModifiedGDC, +) +from luxonis_train.nodes.backbones.ghostfacenet.variants import get_variant +from luxonis_train.nodes.backbones.micronet.blocks import _make_divisible +from luxonis_train.nodes.base_node import BaseNode +from luxonis_train.nodes.blocks import ConvModule + + +class GhostFaceNetsV2(BaseNode[torch.Tensor, list[torch.Tensor]]): + in_channels: list[int] + in_width: list[int] + + def __init__( + self, + embedding_size=512, + num_classes=-1, + variant: Literal["V2"] = "V2", + *args, + **kwargs, + ): + """GhostFaceNetsV2 backbone. + + GhostFaceNetsV2 is a convolutional neural network architecture focused on face recognition, but it is + adaptable to generic embedding tasks. It is based on the GhostNet architecture and uses Ghost BottleneckV2 blocks. + + Source: U{https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py} + + @license: U{MIT License + } + + @see: U{GhostFaceNets: Lightweight Face Recognition Model From Cheap Operations + } + + @type embedding_size: int + @param embedding_size: Size of the embedding. Defaults to 512. + @type num_classes: int + @param num_classes: Number of classes. Defaults to -1, which leaves the default variant value in. Otherwise it can be used to + have the network return raw embeddings (=0) or add another linear layer to the network, which is useful for training using + ArcFace or similar classification-based losses that require the user to drop the last layer of the network. + @type variant: Literal["V2"] + @param variant: Variant of the GhostFaceNets embedding model. Defaults to "V2" (which is the only variant available). + """ + super().__init__(*args, **kwargs) + + image_size = self.in_width[0] + channels = self.in_channels[0] + var = get_variant(variant) + if num_classes >= 0: + var.num_classes = num_classes + self.cfgs = var.cfgs + + # Building first layer + output_channel = _make_divisible(int(16 * var.width), 4) + self.conv_stem = nn.Conv2d( + channels, output_channel, 3, 2, 1, bias=False + ) + self.bn1 = nn.BatchNorm2d(output_channel) + self.act1 = nn.PReLU() + input_channel = output_channel + + # Building Ghost BottleneckV2 blocks + stages = [] + layer_id = 0 + for cfg in self.cfgs: + layers = [] + for b_cfg in cfg: + output_channel = _make_divisible( + b_cfg.output_channels * var.width, 4 + ) + hidden_channel = _make_divisible( + b_cfg.expand_size * var.width, 4 + ) + if var.block == GhostBottleneckV2: + layers.append( + var.block( + input_channel, + hidden_channel, + output_channel, + b_cfg.kernel_size, + b_cfg.stride, + se_ratio=b_cfg.se_ratio, + layer_id=layer_id, + args=var.block_args, + ) + ) + input_channel = output_channel + layer_id += 1 + stages.append(nn.Sequential(*layers)) + + output_channel = _make_divisible(b_cfg.expand_size * var.width, 4) + stages.append( + nn.Sequential( + ConvModule( + input_channel, + output_channel, + kernel_size=1, + activation=nn.PReLU(), + ) + ) + ) + + self.blocks = nn.Sequential(*stages) + + # Building pointwise convolution + pointwise_conv = [] + if var.add_pointwise_conv: + pointwise_conv.append( + nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True) + ) + pointwise_conv.append(nn.BatchNorm2d(output_channel)) + pointwise_conv.append(nn.PReLU()) + else: + pointwise_conv.append(nn.Sequential()) + + self.pointwise_conv = nn.Sequential(*pointwise_conv) + self.classifier = ModifiedGDC( + image_size, + output_channel, + var.num_classes, + var.dropout, + embedding_size, + ) + + # Initializing weights + for m in self.modules(): + if var.init_kaiming: + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(m.weight) + negative_slope = 0.25 + m.weight.data.normal_( + 0, math.sqrt(2.0 / (fan_in * (1 + negative_slope**2))) + ) + if isinstance(m, nn.BatchNorm2d): + m.momentum, m.eps = var.bn_momentum, var.bn_epsilon + + def unwrap(self, inputs): + return [inputs[0]["features"][0]] + + def wrap(self, outputs): + return {"features": [outputs]} + + def forward(self, inps): + x = inps[0] + x = self.conv_stem(x) + x = self.bn1(x) + x = self.act1(x) + x = self.blocks(x) + x = self.pointwise_conv(x) + x = self.classifier(x) + return x diff --git a/luxonis_train/nodes/backbones/ghostfacenet/variants.py b/luxonis_train/nodes/backbones/ghostfacenet/variants.py new file mode 100644 index 00000000..0e88ecfc --- /dev/null +++ b/luxonis_train/nodes/backbones/ghostfacenet/variants.py @@ -0,0 +1,214 @@ +from typing import List, Literal + +from pydantic import BaseModel +from torch import nn + +from luxonis_train.nodes.backbones.ghostfacenet.blocks import GhostBottleneckV2 + + +class BlockConfig(BaseModel): + kernel_size: int + expand_size: int + output_channels: int + se_ratio: float + stride: int + + +class GhostFaceNetsVariant(BaseModel): + """Variant of the GhostFaceNets embedding model. + + @type cfgs: List[List[BlockConfig]] + @param cfgs: List of Ghost BottleneckV2 configurations. + @type num_classes: int + @param num_classes: Number of classes. Defaults to 0, which makes + the network output the raw embeddings. Otherwise it can be used + to add another linear layer to the network, which is useful for + training using ArcFace or similar classification-based losses + that require the user to drop the last layer of the network. + @type width: int + @param width: Width multiplier. Increases complexity and number of + parameters. Defaults to 1.0. + @type dropout: float + @param dropout: Dropout rate. Defaults to 0.2. + @type block: nn.Module + @param block: Ghost BottleneckV2 block. Defaults to + GhostBottleneckV2. + @type add_pointwise_conv: bool + @param add_pointwise_conv: If True, adds a pointwise convolution + layer at the end of the network. Defaults to False. + @type bn_momentum: float + @param bn_momentum: Batch normalization momentum. Defaults to 0.9. + @type bn_epsilon: float + @param bn_epsilon: Batch normalization epsilon. Defaults to 1e-5. + @type init_kaiming: bool + @param init_kaiming: If True, initializes the weights using the + Kaiming initialization. Defaults to True. + @type block_args: dict + @param block_args: Arguments to pass to the block. Defaults to None. + """ + + num_classes: int + width: int + dropout: float + block: type[nn.Module] + add_pointwise_conv: bool + bn_momentum: float + bn_epsilon: float + init_kaiming: bool + block_args: dict | None + cfgs: List[List[BlockConfig]] + + +V2 = GhostFaceNetsVariant( + num_classes=0, + width=1, + dropout=0.2, + block=GhostBottleneckV2, + add_pointwise_conv=False, + bn_momentum=0.9, + bn_epsilon=1e-5, + init_kaiming=True, + block_args=None, + cfgs=[ + [ + BlockConfig( + kernel_size=3, + expand_size=16, + output_channels=16, + se_ratio=0.0, + stride=1, + ) + ], + [ + BlockConfig( + kernel_size=3, + expand_size=48, + output_channels=24, + se_ratio=0.0, + stride=2, + ) + ], + [ + BlockConfig( + kernel_size=3, + expand_size=72, + output_channels=24, + se_ratio=0.0, + stride=1, + ) + ], + [ + BlockConfig( + kernel_size=5, + expand_size=72, + output_channels=40, + se_ratio=0.25, + stride=2, + ) + ], + [ + BlockConfig( + kernel_size=5, + expand_size=120, + output_channels=40, + se_ratio=0.25, + stride=1, + ) + ], + [ + BlockConfig( + kernel_size=3, + expand_size=240, + output_channels=80, + se_ratio=0.0, + stride=2, + ) + ], + [ + BlockConfig( + kernel_size=3, + expand_size=200, + output_channels=80, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=3, + expand_size=184, + output_channels=80, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=3, + expand_size=184, + output_channels=80, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=3, + expand_size=480, + output_channels=112, + se_ratio=0.25, + stride=1, + ), + BlockConfig( + kernel_size=3, + expand_size=672, + output_channels=112, + se_ratio=0.25, + stride=1, + ), + ], + [ + BlockConfig( + kernel_size=5, + expand_size=672, + output_channels=160, + se_ratio=0.25, + stride=2, + ) + ], + [ + BlockConfig( + kernel_size=5, + expand_size=960, + output_channels=160, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=5, + expand_size=960, + output_channels=160, + se_ratio=0.25, + stride=1, + ), + BlockConfig( + kernel_size=5, + expand_size=960, + output_channels=160, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=5, + expand_size=960, + output_channels=160, + se_ratio=0.25, + stride=1, + ), + ], + ], +) + + +def get_variant(variant: Literal["V2"]) -> GhostFaceNetsVariant: + variants = {"V2": V2} + if variant not in variants: # pragma: no cover + raise ValueError( + "GhostFaceNets model variant should be in " + f"{list(variants.keys())}, got {variant}." + ) + return variants[variant] diff --git a/luxonis_train/nodes/backbones/micronet/blocks.py b/luxonis_train/nodes/backbones/micronet/blocks.py index 3da5e15e..b29082cf 100644 --- a/luxonis_train/nodes/backbones/micronet/blocks.py +++ b/luxonis_train/nodes/backbones/micronet/blocks.py @@ -357,7 +357,7 @@ def __init__( self.avg_pool = nn.AdaptiveAvgPool2d(1) - squeeze_channels = self._make_divisible(in_channels // reduction, 4) + squeeze_channels = _make_divisible(in_channels // reduction, 4) self.fc = nn.Sequential( nn.Linear(in_channels, squeeze_channels), @@ -413,16 +413,17 @@ def forward(self, x: Tensor) -> Tensor: return out - def _make_divisible( - self, value: int, divisor: int, min_value: int | None = None - ) -> int: - if min_value is None: - min_value = divisor - new_v = max(min_value, int(value + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * value: - new_v += divisor - return new_v + +def _make_divisible( + value: int, divisor: int, min_value: int | None = None +) -> int: + if min_value is None: + min_value = divisor + new_v = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * value: + new_v += divisor + return new_v class SpatialSepConvSF(nn.Module): diff --git a/tests/configs/reid.yaml b/tests/configs/reid.yaml index d9c0ec11..21ca2748 100644 --- a/tests/configs/reid.yaml +++ b/tests/configs/reid.yaml @@ -11,7 +11,7 @@ model: embedding_size: &embedding_size 512 losses: - - name: MetricLearningLoss + - name: EmbeddingLossWrapper params: loss_name: SupConLoss embedding_size: *embedding_size diff --git a/tests/integration/test_reid.py b/tests/integration/test_reid.py index 9ed4e867..0d006072 100644 --- a/tests/integration/test_reid.py +++ b/tests/integration/test_reid.py @@ -5,6 +5,10 @@ import pytest import torch +from luxonis_train.attached_modules.losses.pml_loss import ( + ALL_EMBEDDING_LOSSES, + CLASS_EMBEDDING_LOSSES, +) from luxonis_train.core import LuxonisModel from luxonis_train.enums import TaskType from luxonis_train.loaders import BaseLoaderTorch @@ -15,6 +19,8 @@ ONNX_PATH = Path("tests/integration/_model.onnx") STUDY_PATH = Path("study_local.db") +NUM_INDIVIDUALS = 100 + class CustomReIDLoader(BaseLoaderTorch): def __init__(self, *args, **kwargs): @@ -35,7 +41,7 @@ def __getitem__(self, _): # pragma: no cover } # Fake labels - id = torch.randint(0, 1000, (1,), dtype=torch.int64) + id = torch.randint(0, NUM_INDIVIDUALS, (1,), dtype=torch.int64) labels = { "id": (id, TaskType.LABEL), } @@ -76,8 +82,26 @@ def clear_files(): ONNX_PATH.unlink(missing_ok=True) -def test_reid(opts: dict[str, Any], infer_path: Path): +not_class_based_losses = ALL_EMBEDDING_LOSSES.copy() +for loss in CLASS_EMBEDDING_LOSSES: + not_class_based_losses.remove(loss) + + +@pytest.mark.parametrize("loss_name", not_class_based_losses) +def test_reid(opts: dict[str, Any], infer_path: Path, loss_name: str): config_file = "tests/configs/reid.yaml" + opts["model.losses.0.params.loss_name"] = loss_name + + # if loss_name in CLASS_EMBEDDING_LOSSES: + # opts["model.losses.0.params.num_classes"] = NUM_INDIVIDUALS + # opts["model.nodes.0.params.num_classes"] = NUM_INDIVIDUALS + # else: + # opts["model.losses.0.params.num_classes"] = 0 + # opts["model.nodes.0.params.num_classes"] = 0 + + if loss_name == "RankedListLoss": + opts["model.losses.0.params.loss_kwargs"] = {"margin": 1.0, "Tn": 0.5} + model = LuxonisModel(config_file, opts) model.train() model.test(view="val") From 06899357c0ba236114f778a3300c92d79b426a36 Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Mon, 16 Dec 2024 11:54:55 +0100 Subject: [PATCH 06/12] refactor: update type hint for GhostFaceNetsV2 class to use Tensor from torch --- luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py index 8bb61fee..2188645f 100644 --- a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py +++ b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py @@ -2,8 +2,8 @@ import math from typing import Literal -import torch import torch.nn as nn +from torch import Tensor from luxonis_train.nodes.backbones.ghostfacenet.blocks import ( GhostBottleneckV2, @@ -15,7 +15,7 @@ from luxonis_train.nodes.blocks import ConvModule -class GhostFaceNetsV2(BaseNode[torch.Tensor, list[torch.Tensor]]): +class GhostFaceNetsV2(BaseNode[Tensor, list[Tensor]]): in_channels: list[int] in_width: list[int] From 94639972c720172a27505fbe4439480bc640d824 Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Mon, 16 Dec 2024 11:55:52 +0100 Subject: [PATCH 07/12] refactor: remove unused unwrap and wrap methods from GhostFaceNetsV2 class --- luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py index 2188645f..cb065c43 100644 --- a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py +++ b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py @@ -142,12 +142,6 @@ def __init__( if isinstance(m, nn.BatchNorm2d): m.momentum, m.eps = var.bn_momentum, var.bn_epsilon - def unwrap(self, inputs): - return [inputs[0]["features"][0]] - - def wrap(self, outputs): - return {"features": [outputs]} - def forward(self, inps): x = inps[0] x = self.conv_stem(x) From 555fe2aa483d2493e1a07bc4e99df5c05954be00 Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Mon, 16 Dec 2024 12:17:50 +0100 Subject: [PATCH 08/12] fix: correct formatting in __all__ list in metrics module --- luxonis_train/attached_modules/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py index 10f993ee..59e9cc57 100644 --- a/luxonis_train/attached_modules/metrics/__init__.py +++ b/luxonis_train/attached_modules/metrics/__init__.py @@ -17,6 +17,6 @@ "Precision", "Recall", "ClosestIsPositiveAccuracy", - "ConfusionMatrix", + "ConfusionMatrix", "MedianDistances", ] From 9fe0b798a121d480b067fe9cb5f4b9c939e1afe8 Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Mon, 16 Dec 2024 14:02:28 +0100 Subject: [PATCH 09/12] Improved coverage, explicitly set mdformat github version --- .pre-commit-config.yaml | 2 +- tests/configs/reid.yaml | 2 +- tests/integration/test_reid.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c7779beb..226a18b8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,4 +20,4 @@ repos: hooks: - id: mdformat additional_dependencies: - - mdformat-gfm + - mdformat-gfm==0.3.7 diff --git a/tests/configs/reid.yaml b/tests/configs/reid.yaml index 21ca2748..c79e4f8e 100644 --- a/tests/configs/reid.yaml +++ b/tests/configs/reid.yaml @@ -15,7 +15,7 @@ model: params: loss_name: SupConLoss embedding_size: *embedding_size - cross_batch_memory_size: &memory_size 200 + cross_batch_memory_size: &memory_size 4 attached_to: GhostFaceNetsV2 metrics: diff --git a/tests/integration/test_reid.py b/tests/integration/test_reid.py index 0d006072..8094dd80 100644 --- a/tests/integration/test_reid.py +++ b/tests/integration/test_reid.py @@ -88,7 +88,9 @@ def clear_files(): @pytest.mark.parametrize("loss_name", not_class_based_losses) -def test_reid(opts: dict[str, Any], infer_path: Path, loss_name: str): +def test_available_losses( + opts: dict[str, Any], infer_path: Path, loss_name: str +): config_file = "tests/configs/reid.yaml" opts["model.losses.0.params.loss_name"] = loss_name @@ -113,3 +115,28 @@ def test_reid(opts: dict[str, Any], infer_path: Path, loss_name: str): assert len(list(infer_path.iterdir())) == 0 model.infer(view="val", save_dir=infer_path) assert infer_path.exists() + + +@pytest.mark.parametrize("loss_name", CLASS_EMBEDDING_LOSSES) +@pytest.mark.parametrize("num_classes", [-2, NUM_INDIVIDUALS]) +def test_unsupported_class_based_losses( + opts: dict[str, Any], loss_name: str, num_classes: int +): + config_file = "tests/configs/reid.yaml" + opts["model.losses.0.params.loss_name"] = loss_name + opts["model.losses.0.params.num_classes"] = num_classes + opts["model.nodes.0.params.num_classes"] = num_classes + + with pytest.raises(ValueError): + model = LuxonisModel(config_file, opts) + model.train() + + +@pytest.mark.parametrize("loss_name", ["NonExistentLoss"]) +def test_nonexistent_losses(opts: dict[str, Any], loss_name: str): + config_file = "tests/configs/reid.yaml" + opts["model.losses.0.params.loss_name"] = loss_name + + with pytest.raises(ValueError): + model = LuxonisModel(config_file, opts) + model.train() From b47e79ea82771758745f0ef6c84605e5e7e72e4f Mon Sep 17 00:00:00 2001 From: CaptainTrojan <49991681+CaptainTrojan@users.noreply.github.com> Date: Tue, 17 Dec 2024 18:20:51 +0100 Subject: [PATCH 10/12] Reduced mdformat-gfm version to 0.3.6 to support Python 3.8 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 226a18b8..c9355abb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,4 +20,4 @@ repos: hooks: - id: mdformat additional_dependencies: - - mdformat-gfm==0.3.7 + - mdformat-gfm==0.3.6 From 8e376a0367e85081d77227a8890ef5fde3bfa11a Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Wed, 1 Jan 2025 22:58:59 +0100 Subject: [PATCH 11/12] Coverage fixes --- .../visualizers/embeddings_visualizer.py | 24 ++++------ .../backbones/ghostfacenet/ghostfacenet.py | 11 +---- tests/integration/test_reid.py | 48 +++++++++++++++++-- 3 files changed, 55 insertions(+), 28 deletions(-) diff --git a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py index d8e5c940..f3591c83 100644 --- a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py @@ -41,7 +41,7 @@ def forward( label_canvas: Tensor, prediction_canvas: Tensor, embeddings: Tensor, - ids: Tensor | None, + ids: Tensor, **kwargs, ) -> Tensor: """Creates a visualization of the embeddings. @@ -73,20 +73,14 @@ def forward( # Plot the embeddings fig, ax = plt.subplots(figsize=(10, 10)) - if ids is not None: - scatter = ax.scatter( - embeddings_2d[:, 0], - embeddings_2d[:, 1], - c=ids.detach().cpu().numpy(), - cmap="viridis", - s=5, - ) - else: - scatter = ax.scatter( - embeddings_2d[:, 0], - embeddings_2d[:, 1], - s=5, - ) + scatter = ax.scatter( + embeddings_2d[:, 0], + embeddings_2d[:, 1], + c=ids.detach().cpu().numpy(), + cmap="viridis", + s=5, + ) + fig.colorbar(scatter, ax=ax) ax.set_title("Embeddings Visualization") ax.set_xlabel("Dimension 1") diff --git a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py index cb065c43..5a99ae28 100644 --- a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py +++ b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py @@ -111,16 +111,7 @@ def __init__( self.blocks = nn.Sequential(*stages) # Building pointwise convolution - pointwise_conv = [] - if var.add_pointwise_conv: - pointwise_conv.append( - nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True) - ) - pointwise_conv.append(nn.BatchNorm2d(output_channel)) - pointwise_conv.append(nn.PReLU()) - else: - pointwise_conv.append(nn.Sequential()) - + pointwise_conv = [nn.Sequential()] self.pointwise_conv = nn.Sequential(*pointwise_conv) self.classifier = ModifiedGDC( image_size, diff --git a/tests/integration/test_reid.py b/tests/integration/test_reid.py index 8094dd80..53355025 100644 --- a/tests/integration/test_reid.py +++ b/tests/integration/test_reid.py @@ -35,7 +35,7 @@ def input_shapes(self): def __getitem__(self, _): # pragma: no cover # Fake data - image = torch.rand(3, 256, 256, dtype=torch.float32) + image = torch.rand(self.input_shapes["image"], dtype=torch.float32) inputs = { "image": image, } @@ -55,6 +55,24 @@ def get_classes(self) -> dict[TaskType, list[str]]: return {TaskType.LABEL: ["id"]} +class CustomReIDLoaderNoID(CustomReIDLoader): + def __getitem__(self, _): + inputs, labels = super().__getitem__(_) + labels["something_else"] = labels["id"] + del labels["id"] + + return inputs, labels + + +class CustomReIDLoaderImageSize2(CustomReIDLoader): + @property + def input_shapes(self): + return { + "image": torch.Size([3, 200, 200]), + "id": torch.Size([1]), + } + + @pytest.fixture def infer_path() -> Path: if INFER_PATH.exists(): @@ -128,8 +146,7 @@ def test_unsupported_class_based_losses( opts["model.nodes.0.params.num_classes"] = num_classes with pytest.raises(ValueError): - model = LuxonisModel(config_file, opts) - model.train() + LuxonisModel(config_file, opts) @pytest.mark.parametrize("loss_name", ["NonExistentLoss"]) @@ -137,6 +154,31 @@ def test_nonexistent_losses(opts: dict[str, Any], loss_name: str): config_file = "tests/configs/reid.yaml" opts["model.losses.0.params.loss_name"] = loss_name + with pytest.raises(ValueError): + LuxonisModel(config_file, opts) + + +def test_bad_loader(opts: dict[str, Any]): + config_file = "tests/configs/reid.yaml" + opts["loader.name"] = "CustomReIDLoaderNoID" + with pytest.raises(ValueError): model = LuxonisModel(config_file, opts) model.train() + + +def test_not_enough_samples_for_metrics(opts: dict[str, Any]): + config_file = "tests/configs/reid.yaml" + opts["model.metrics.1.params.cross_batch_memory_size"] = 100 + + model = LuxonisModel(config_file, opts) + model.train() + + +def test_image_size_not_divisible_by_32(opts: dict[str, Any]): + config_file = "tests/configs/reid.yaml" + opts["loader.name"] = "CustomReIDLoaderImageSize2" + + # with pytest.raises(ValueError): + model = LuxonisModel(config_file, opts) + model.train() From 23e75001a56b810a2047aeac69e4c2faacd79860 Mon Sep 17 00:00:00 2001 From: Michal Sejak Date: Thu, 2 Jan 2025 00:44:48 +0100 Subject: [PATCH 12/12] fix: return a model copy for the specified GhostFaceNets variant --- luxonis_train/nodes/backbones/ghostfacenet/variants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/nodes/backbones/ghostfacenet/variants.py b/luxonis_train/nodes/backbones/ghostfacenet/variants.py index 0e88ecfc..aa78daf8 100644 --- a/luxonis_train/nodes/backbones/ghostfacenet/variants.py +++ b/luxonis_train/nodes/backbones/ghostfacenet/variants.py @@ -211,4 +211,4 @@ def get_variant(variant: Literal["V2"]) -> GhostFaceNetsVariant: "GhostFaceNets model variant should be in " f"{list(variants.keys())}, got {variant}." ) - return variants[variant] + return variants[variant].model_copy()