From 241c74a976657b28c7379243d545a11038358a90 Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Fri, 6 Dec 2024 18:54:03 +0100
Subject: [PATCH 01/12] Added necessary requirements. Added integration with
 pytorch-metric-learning (loss). Added custom reID/embedding metrics.
 Implemented a test to verify trainability, exportability and inference.

Removed GhostFaceNetsV2 from the backbone tests as it only generates embeddings instead of the usual features.
---
 .../attached_modules/losses/__init__.py       |   2 +
 .../attached_modules/losses/pml_loss.py       | 119 ++++
 .../attached_modules/metrics/__init__.py      |   3 +
 .../attached_modules/metrics/pml_metrics.py   | 248 ++++++++
 .../attached_modules/visualizers/__init__.py  |   2 +
 .../visualizers/embeddings_visualizer.py      |  95 ++++
 luxonis_train/loaders/utils.py                |   1 +
 luxonis_train/nodes/backbones/__init__.py     |   2 +
 luxonis_train/nodes/backbones/ghostfacenet.py | 534 ++++++++++++++++++
 requirements.txt                              |   2 +
 tests/configs/reid.yaml                       |  60 ++
 tests/integration/test_detection.py           |   4 +-
 tests/integration/test_reid.py                |  91 +++
 tests/integration/test_segmentation.py        |   4 +-
 14 files changed, 1165 insertions(+), 2 deletions(-)
 create mode 100644 luxonis_train/attached_modules/losses/pml_loss.py
 create mode 100644 luxonis_train/attached_modules/metrics/pml_metrics.py
 create mode 100644 luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
 create mode 100644 luxonis_train/nodes/backbones/ghostfacenet.py
 create mode 100644 tests/configs/reid.yaml
 create mode 100644 tests/integration/test_reid.py

diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py
index ff0bafc8..b320fada 100644
--- a/luxonis_train/attached_modules/losses/__init__.py
+++ b/luxonis_train/attached_modules/losses/__init__.py
@@ -7,6 +7,7 @@
 from .ohem_bce_with_logits import OHEMBCEWithLogitsLoss
 from .ohem_cross_entropy import OHEMCrossEntropyLoss
 from .ohem_loss import OHEMLoss
+from .pml_loss import MetricLearningLoss
 from .reconstruction_segmentation_loss import ReconstructionSegmentationLoss
 from .sigmoid_focal_loss import SigmoidFocalLoss
 from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss
@@ -26,4 +27,5 @@
     "OHEMCrossEntropyLoss",
     "OHEMBCEWithLogitsLoss",
     "FOMOLocalizationLoss",
+    "MetricLearningLoss",
 ]
diff --git a/luxonis_train/attached_modules/losses/pml_loss.py b/luxonis_train/attached_modules/losses/pml_loss.py
new file mode 100644
index 00000000..aacd667b
--- /dev/null
+++ b/luxonis_train/attached_modules/losses/pml_loss.py
@@ -0,0 +1,119 @@
+import warnings
+
+from pytorch_metric_learning.losses import (
+    AngularLoss,
+    ArcFaceLoss,
+    CircleLoss,
+    ContrastiveLoss,
+    CosFaceLoss,
+    CrossBatchMemory,
+    DynamicSoftMarginLoss,
+    FastAPLoss,
+    GeneralizedLiftedStructureLoss,
+    HistogramLoss,
+    InstanceLoss,
+    IntraPairVarianceLoss,
+    LargeMarginSoftmaxLoss,
+    LiftedStructureLoss,
+    ManifoldLoss,
+    MarginLoss,
+    MultiSimilarityLoss,
+    NCALoss,
+    NormalizedSoftmaxLoss,
+    NPairsLoss,
+    NTXentLoss,
+    P2SGradLoss,
+    PNPLoss,
+    ProxyAnchorLoss,
+    ProxyNCALoss,
+    RankedListLoss,
+    SignalToNoiseRatioContrastiveLoss,
+    SoftTripleLoss,
+    SphereFaceLoss,
+    SubCenterArcFaceLoss,
+    SupConLoss,
+    TripletMarginLoss,
+    TupletMarginLoss,
+)
+from torch import Tensor
+
+from .base_loss import BaseLoss
+
+# Dictionary mapping string keys to loss classes
+loss_dict = {
+    "AngularLoss": AngularLoss,
+    "ArcFaceLoss": ArcFaceLoss,
+    "CircleLoss": CircleLoss,
+    "ContrastiveLoss": ContrastiveLoss,
+    "CosFaceLoss": CosFaceLoss,
+    "DynamicSoftMarginLoss": DynamicSoftMarginLoss,
+    "FastAPLoss": FastAPLoss,
+    "GeneralizedLiftedStructureLoss": GeneralizedLiftedStructureLoss,
+    "InstanceLoss": InstanceLoss,
+    "HistogramLoss": HistogramLoss,
+    "IntraPairVarianceLoss": IntraPairVarianceLoss,
+    "LargeMarginSoftmaxLoss": LargeMarginSoftmaxLoss,
+    "LiftedStructureLoss": LiftedStructureLoss,
+    "ManifoldLoss": ManifoldLoss,
+    "MarginLoss": MarginLoss,
+    "MultiSimilarityLoss": MultiSimilarityLoss,
+    "NCALoss": NCALoss,
+    "NormalizedSoftmaxLoss": NormalizedSoftmaxLoss,
+    "NPairsLoss": NPairsLoss,
+    "NTXentLoss": NTXentLoss,
+    "P2SGradLoss": P2SGradLoss,
+    "PNPLoss": PNPLoss,
+    "ProxyAnchorLoss": ProxyAnchorLoss,
+    "ProxyNCALoss": ProxyNCALoss,
+    "RankedListLoss": RankedListLoss,
+    "SignalToNoiseRatioContrastiveLoss": SignalToNoiseRatioContrastiveLoss,
+    "SoftTripleLoss": SoftTripleLoss,
+    "SphereFaceLoss": SphereFaceLoss,
+    "SubCenterArcFaceLoss": SubCenterArcFaceLoss,
+    "SupConLoss": SupConLoss,
+    "TripletMarginLoss": TripletMarginLoss,
+    "TupletMarginLoss": TupletMarginLoss,
+}
+
+
+class MetricLearningLoss(BaseLoss):
+    def __init__(
+        self,
+        loss_name: str,
+        embedding_size: int = 512,
+        cross_batch_memory_size=0,
+        loss_kwargs: dict | None = None,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        if loss_kwargs is None:
+            loss_kwargs = {}
+        self.loss_func = loss_dict[loss_name](
+            **loss_kwargs
+        )  # Instantiate the loss object
+        if cross_batch_memory_size > 0:
+            if loss_name in CrossBatchMemory.supported_losses():
+                self.loss_func = CrossBatchMemory(
+                    self.loss_func, embedding_size=embedding_size
+                )
+            else:
+                # Warn that cross_batch_memory_size is ignored
+                warnings.warn(
+                    f"Cross batch memory is not supported for {loss_name}. Ignoring cross_batch_memory_size"
+                )
+
+        # self.miner_func = miner_func
+
+    def prepare(self, inputs, labels):
+        embeddings = inputs["features"][0]
+
+        IDs = labels["id"][0][:, 0]
+        return embeddings, IDs
+
+    def forward(self, inputs: Tensor, target: Tensor):
+        # miner_output = self.miner_func(inputs, target)
+
+        loss = self.loss_func(inputs, target)
+
+        return loss
diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py
index b1dc40ea..c43f32b4 100644
--- a/luxonis_train/attached_modules/metrics/__init__.py
+++ b/luxonis_train/attached_modules/metrics/__init__.py
@@ -2,6 +2,7 @@
 from .mean_average_precision import MeanAveragePrecision
 from .mean_average_precision_keypoints import MeanAveragePrecisionKeypoints
 from .object_keypoint_similarity import ObjectKeypointSimilarity
+from .pml_metrics import ClosestIsPositiveAccuracy, MedianDistances
 from .torchmetrics import Accuracy, F1Score, JaccardIndex, Precision, Recall
 
 __all__ = [
@@ -14,4 +15,6 @@
     "ObjectKeypointSimilarity",
     "Precision",
     "Recall",
+    "ClosestIsPositiveAccuracy",
+    "MedianDistances",
 ]
diff --git a/luxonis_train/attached_modules/metrics/pml_metrics.py b/luxonis_train/attached_modules/metrics/pml_metrics.py
new file mode 100644
index 00000000..b280742d
--- /dev/null
+++ b/luxonis_train/attached_modules/metrics/pml_metrics.py
@@ -0,0 +1,248 @@
+import torch
+from torch import Tensor
+
+from .base_metric import BaseMetric
+
+# Converted from https://omoindrot.github.io/triplet-loss#offline-and-online-triplet-mining
+# to PyTorch from TensorFlow
+
+
+def _pairwise_distances(embeddings, squared=False):
+    """Compute the 2D matrix of distances between all the embeddings.
+
+    Args:
+        embeddings: tensor of shape (batch_size, embed_dim)
+        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
+                If false, output is the pairwise euclidean distance matrix.
+
+    Returns:
+        pairwise_distances: tensor of shape (batch_size, batch_size)
+    """
+    # Get the dot product between all embeddings
+    # shape (batch_size, batch_size)
+    dot_product = torch.matmul(embeddings, embeddings.t())
+
+    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
+    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
+    # shape (batch_size,)
+    square_norm = torch.diag(dot_product)
+
+    # Compute the pairwise distance matrix as we have:
+    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
+    # shape (batch_size, batch_size)
+    distances = (
+        square_norm.unsqueeze(0) - 2.0 * dot_product + square_norm.unsqueeze(1)
+    )
+
+    # Because of computation errors, some distances might be negative so we put everything >= 0.0
+    distances = torch.max(distances, torch.tensor(0.0))
+
+    if not squared:
+        # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal)
+        # we need to add a small epsilon where distances == 0.0
+        mask = (distances == 0.0).float()
+        distances = distances + mask * 1e-16
+
+        distances = torch.sqrt(distances)
+
+        # Correct the epsilon added: set the distances on the mask to be exactly 0.0
+        distances = distances * (1.0 - mask)
+
+    return distances
+
+
+def _get_anchor_positive_triplet_mask(labels):
+    indices_equal = torch.eye(
+        labels.shape[0], dtype=torch.uint8, device=labels.device
+    )
+    indices_not_equal = ~indices_equal
+    labels_equal = labels.unsqueeze(0) == labels.unsqueeze(1)
+    mask = indices_not_equal & labels_equal
+    return mask
+
+
+class ClosestIsPositiveAccuracy(BaseMetric):
+    def __init__(self, cross_batch_memory_size=0, **kwargs):
+        super().__init__(**kwargs)
+        self.cross_batch_memory_size = cross_batch_memory_size
+        self.add_state("cross_batch_memory", default=[], dist_reduce_fx="cat")
+        self.add_state(
+            "correct_predictions",
+            default=torch.tensor(0),
+            dist_reduce_fx="sum",
+        )
+        self.add_state(
+            "total_predictions", default=torch.tensor(0), dist_reduce_fx="sum"
+        )
+
+    def prepare(self, inputs, labels):
+        embeddings = inputs["features"][0]
+        IDs = labels["id"][0][:, 0]
+        return embeddings, IDs
+
+    def update(self, inputs: Tensor, target: Tensor):
+        embeddings, labels = inputs, target
+
+        if self.cross_batch_memory_size > 0:
+            # Append embedding and labels to the memory
+            self.cross_batch_memory.extend(list(zip(embeddings, labels)))
+
+            # If the memory is full, remove the oldest elements
+            if len(self.cross_batch_memory) > self.cross_batch_memory_size:
+                self.cross_batch_memory = self.cross_batch_memory[
+                    -self.cross_batch_memory_size :
+                ]
+
+            # If the memory is not full, return
+            if len(self.cross_batch_memory) < self.cross_batch_memory_size:
+                return
+
+            # Get the embeddings and labels from the memory
+            embeddings, labels = zip(*self.cross_batch_memory)
+            embeddings = torch.stack(embeddings)
+            labels = torch.stack(labels)
+
+        # print(f"Calculating accuracy for {len(embeddings)} embeddings")
+
+        # Get the pairwise distances between all embeddings
+        pairwise_distances = _pairwise_distances(embeddings)
+
+        # Set diagonal to infinity so that the closest embedding is not the same embedding
+        pairwise_distances.fill_diagonal_(float("inf"))
+
+        # Find the closest embedding for each query embedding
+        closest_indices = torch.argmin(pairwise_distances, dim=1)
+
+        # Get the labels of the closest embeddings
+        closest_labels = labels[closest_indices]
+
+        # Filter out embeddings that don't have both positive and negative examples
+        positive_mask = _get_anchor_positive_triplet_mask(labels)
+        num_positives = positive_mask.sum(dim=1)
+        has_at_least_one_positive_and_negative = (num_positives > 0) & (
+            num_positives < len(labels)
+        )
+
+        # Filter embeddings, labels, and closest indices based on valid indices
+        filtered_labels = labels[has_at_least_one_positive_and_negative]
+        filtered_closest_labels = closest_labels[
+            has_at_least_one_positive_and_negative
+        ]
+
+        # Calculate the number of correct predictions where the closest is positive
+        correct_predictions = (
+            filtered_labels == filtered_closest_labels
+        ).sum()
+
+        # Update the metric state
+        self.correct_predictions += correct_predictions
+        self.total_predictions += len(filtered_labels)
+
+    def compute(self):
+        return self.correct_predictions / self.total_predictions
+
+
+class MedianDistances(BaseMetric):
+    def __init__(self, cross_batch_memory_size=0, **kwargs):
+        super().__init__(**kwargs)
+        self.cross_batch_memory_size = cross_batch_memory_size
+        self.add_state("cross_batch_memory", default=[], dist_reduce_fx="cat")
+        self.add_state("all_distances", default=[], dist_reduce_fx="cat")
+        self.add_state("closest_distances", default=[], dist_reduce_fx="cat")
+        self.add_state("positive_distances", default=[], dist_reduce_fx="cat")
+        self.add_state(
+            "closest_vs_positive_distances", default=[], dist_reduce_fx="cat"
+        )
+
+    def prepare(self, inputs, labels):
+        embeddings = inputs["features"][0]
+        IDs = labels["id"][0][:, 0]
+        return embeddings, IDs
+
+    def update(self, inputs: Tensor, target: Tensor):
+        embeddings, labels = inputs, target
+
+        if self.cross_batch_memory_size > 0:
+            # Append embedding and labels to the memory
+            self.cross_batch_memory.extend(list(zip(embeddings, labels)))
+
+            # If the memory is full, remove the oldest elements
+            if len(self.cross_batch_memory) > self.cross_batch_memory_size:
+                self.cross_batch_memory = self.cross_batch_memory[
+                    -self.cross_batch_memory_size :
+                ]
+
+            # If the memory is not full, return
+            if len(self.cross_batch_memory) < self.cross_batch_memory_size:
+                return
+
+            # Get the embeddings and labels from the memory
+            embeddings, labels = zip(*self.cross_batch_memory)
+            embeddings = torch.stack(embeddings)
+            labels = torch.stack(labels)
+
+        # Get the pairwise distances between all embeddings
+        pairwise_distances = _pairwise_distances(embeddings)
+        # Append only upper triangular part of the matrix
+        self.all_distances.append(
+            pairwise_distances[
+                torch.triu(torch.ones_like(pairwise_distances), diagonal=1)
+                == 1
+            ].flatten()
+        )
+
+        # Set diagonal to infinity so that the closest embedding is not the same embedding
+        pairwise_distances.fill_diagonal_(float("inf"))
+
+        # Get the closest distance for each query embedding
+        closest_distances, _ = torch.min(pairwise_distances, dim=1)
+        self.closest_distances.append(closest_distances)
+
+        # Get the positive mask and convert it to boolean
+        positive_mask = _get_anchor_positive_triplet_mask(labels).bool()
+
+        only_positive_distances = pairwise_distances.clone()
+        only_positive_distances[~positive_mask] = float("inf")
+
+        closest_positive_distances, _ = torch.min(
+            only_positive_distances, dim=1
+        )
+
+        non_inf_mask = closest_positive_distances != float("inf")
+        difference = closest_positive_distances - closest_distances
+        difference = difference[non_inf_mask]
+
+        # Update the metric state
+        self.closest_vs_positive_distances.append(difference)
+        self.positive_distances.append(
+            closest_positive_distances[non_inf_mask]
+        )
+
+    def compute(self):
+        if len(self.all_distances) == 0:
+            # Return NaN tensor if no distances were calculated
+            return {
+                "MedianDistance": torch.tensor(float("nan")),
+                "MedianClosestDistance": torch.tensor(float("nan")),
+                "MedianClosestPositiveDistance": torch.tensor(float("nan")),
+                "MedianClosestVsClosestPositiveDistance": torch.tensor(
+                    float("nan")
+                ),
+            }
+
+        all_distances = torch.cat(self.all_distances)
+        closest_distances = torch.cat(self.closest_distances)
+        positive_distances = torch.cat(self.positive_distances)
+        closest_vs_positive_distances = torch.cat(
+            self.closest_vs_positive_distances
+        )
+
+        # Return medians
+        return {
+            "MedianDistance": torch.median(all_distances),
+            "MedianClosestDistance": torch.median(closest_distances),
+            "MedianClosestPositiveDistance": torch.median(positive_distances),
+            "MedianClosestVsClosestPositiveDistance": torch.median(
+                closest_vs_positive_distances
+            ),
+        }
diff --git a/luxonis_train/attached_modules/visualizers/__init__.py b/luxonis_train/attached_modules/visualizers/__init__.py
index 50b90471..69ecc3c4 100644
--- a/luxonis_train/attached_modules/visualizers/__init__.py
+++ b/luxonis_train/attached_modules/visualizers/__init__.py
@@ -1,6 +1,7 @@
 from .base_visualizer import BaseVisualizer
 from .bbox_visualizer import BBoxVisualizer
 from .classification_visualizer import ClassificationVisualizer
+from .embeddings_visualizer import EmbeddingsVisualizer
 from .keypoint_visualizer import KeypointVisualizer
 from .multi_visualizer import MultiVisualizer
 from .segmentation_visualizer import SegmentationVisualizer
@@ -23,6 +24,7 @@
     "KeypointVisualizer",
     "MultiVisualizer",
     "SegmentationVisualizer",
+    "EmbeddingsVisualizer",
     "combine_visualizations",
     "draw_bounding_box_labels",
     "draw_keypoint_labels",
diff --git a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
new file mode 100644
index 00000000..b5fb5f0e
--- /dev/null
+++ b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
@@ -0,0 +1,95 @@
+import logging
+
+from matplotlib import pyplot as plt
+from sklearn.manifold import TSNE
+from torch import Tensor
+
+from luxonis_train.utils import Labels, Packet
+
+from .base_visualizer import BaseVisualizer
+from .utils import (
+    figure_to_torch,
+)
+
+logger = logging.getLogger(__name__)
+log_disable = False
+
+
+class EmbeddingsVisualizer(BaseVisualizer[Tensor, Tensor]):
+    # supported_tasks: list[TaskType] = [TaskType.LABEL]
+
+    def __init__(
+        self,
+        **kwargs,
+    ):
+        """Visualizer for embedding tasks like reID."""
+        super().__init__(**kwargs)
+
+    def prepare(
+        self, inputs: Packet[Tensor], labels: Labels | None
+    ) -> tuple[Tensor, Tensor]:
+        embeddings = inputs["features"][0]
+        IDs = labels["id"][0]
+        return embeddings, IDs
+
+    def forward(
+        self,
+        label_canvas: Tensor,
+        prediction_canvas: Tensor,
+        embeddings: Tensor,
+        IDs: Tensor | None,
+        **kwargs,
+    ) -> Tensor:
+        """Creates a visualization of the embeddings.
+
+        @type label_canvas: Tensor
+        @param label_canvas: The canvas to draw the labels on.
+        @type prediction_canvas: Tensor
+        @param prediction_canvas: The canvas to draw the predictions on.
+        @type embeddings: Tensor
+        @param embeddings: The embeddings to visualize.
+        @type IDs: Tensor
+        @param IDs: The IDs to visualize.
+        @rtype: Tensor
+        @return: An embedding space projection.
+        """
+
+        # Embeddings: [B, D], D = e.g. 512
+        # IDs: [B, 1], corresponding to the embeddings
+
+        # Convert embeddings to numpy array
+        embeddings_np = embeddings.detach().cpu().numpy()
+
+        # Perplexity must be less than the number of samples
+        perplexity = min(30, embeddings_np.shape[0] - 1)
+
+        # Reduce dimensionality to 2D using t-SNE
+        tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
+        embeddings_2d = tsne.fit_transform(embeddings_np)
+
+        # Plot the embeddings
+        fig, ax = plt.subplots(figsize=(10, 10))
+        scatter = ax.scatter(
+            embeddings_2d[:, 0],
+            embeddings_2d[:, 1],
+            c=IDs.detach().cpu().numpy(),
+            cmap="viridis",
+            s=5,
+        )
+        fig.colorbar(scatter, ax=ax)
+        ax.set_title("Embeddings Visualization")
+        ax.set_xlabel("Dimension 1")
+        ax.set_ylabel("Dimension 2")
+
+        # Convert figure to tensor
+        image_tensor = figure_to_torch(
+            fig, width=label_canvas.shape[3], height=label_canvas.shape[2]
+        )
+
+        # Close the figure to free memory
+        plt.close(fig)
+
+        # Add fake batch dimension
+        image_tensor = image_tensor.unsqueeze(0)
+
+        return image_tensor
diff --git a/luxonis_train/loaders/utils.py b/luxonis_train/loaders/utils.py
index b030e218..2782500e 100644
--- a/luxonis_train/loaders/utils.py
+++ b/luxonis_train/loaders/utils.py
@@ -38,6 +38,7 @@ def collate_fn(
             TaskType.CLASSIFICATION,
             TaskType.SEGMENTATION,
             TaskType.ARRAY,
+            TaskType.LABEL,
         ]:
             out_labels[task] = torch.stack(annos, 0), task_type
 
diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py
index cc621625..f5319981 100644
--- a/luxonis_train/nodes/backbones/__init__.py
+++ b/luxonis_train/nodes/backbones/__init__.py
@@ -2,6 +2,7 @@
 from .ddrnet import DDRNet
 from .efficientnet import EfficientNet
 from .efficientrep import EfficientRep
+from .ghostfacenet import GhostFaceNetsV2
 from .micronet import MicroNet
 from .mobilenetv2 import MobileNetV2
 from .mobileone import MobileOne
@@ -22,4 +23,5 @@
     "ResNet",
     "DDRNet",
     "RecSubNet",
+    "GhostFaceNetsV2",
 ]
diff --git a/luxonis_train/nodes/backbones/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet.py
new file mode 100644
index 00000000..b4b17758
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ghostfacenet.py
@@ -0,0 +1,534 @@
+# Original source: https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py
+
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from luxonis_train.nodes.base_node import BaseNode
+
+
+def _make_divisible(v, divisor, min_value=None):
+    """This function is taken from the original tf repo.
+
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+def hard_sigmoid(x, inplace: bool = False):
+    if inplace:
+        return x.add_(3.0).clamp_(0.0, 6.0).div_(6.0)
+    else:
+        return F.relu6(x + 3.0) / 6.0
+
+
+class SqueezeExcite(nn.Module):
+    def __init__(
+        self,
+        in_chs,
+        se_ratio=0.25,
+        reduced_base_chs=None,
+        act_layer=nn.PReLU,
+        gate_fn=hard_sigmoid,
+        divisor=4,
+        **_,
+    ):
+        super(SqueezeExcite, self).__init__()
+        self.gate_fn = gate_fn
+        reduced_chs = _make_divisible(
+            (reduced_base_chs or in_chs) * se_ratio, divisor
+        )
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)
+        self.act1 = act_layer()
+        self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)
+
+    def forward(self, x):
+        x_se = self.avg_pool(x)
+        x_se = self.conv_reduce(x_se)
+        x_se = self.act1(x_se)
+        x_se = self.conv_expand(x_se)
+        x = x * self.gate_fn(x_se)
+        return x
+
+
+class ConvBnAct(nn.Module):
+    def __init__(
+        self, in_chs, out_chs, kernel_size, stride=1, act_layer=nn.PReLU
+    ):
+        super(ConvBnAct, self).__init__()
+        self.conv = nn.Conv2d(
+            in_chs, out_chs, kernel_size, stride, kernel_size // 2, bias=False
+        )
+        self.bn1 = nn.BatchNorm2d(out_chs)
+        self.act1 = act_layer()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        return x
+
+
+class ModifiedGDC(nn.Module):
+    def __init__(
+        self, image_size, in_chs, num_classes, dropout, emb=512
+    ):  # dropout implementation is in the original code but not in the paper
+        super(ModifiedGDC, self).__init__()
+
+        if image_size % 32 == 0:
+            self.conv_dw = nn.Conv2d(
+                in_chs,
+                in_chs,
+                kernel_size=(image_size // 32),
+                groups=in_chs,
+                bias=False,
+            )
+        else:
+            self.conv_dw = nn.Conv2d(
+                in_chs,
+                in_chs,
+                kernel_size=(image_size // 32 + 1),
+                groups=in_chs,
+                bias=False,
+            )
+        self.bn1 = nn.BatchNorm2d(in_chs)
+        self.dropout = nn.Dropout(dropout)
+
+        self.conv = nn.Conv2d(in_chs, emb, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm1d(emb)
+        self.linear = (
+            nn.Linear(emb, num_classes) if num_classes else nn.Identity()
+        )
+
+    def forward(self, inps):
+        x = inps
+        x = self.conv_dw(x)
+        x = self.bn1(x)
+        x = self.dropout(x)
+        # # Add spots to the features
+        # x = torch.cat([x, spots.view(spots.size(0), -1, 1, 1)], dim=1)
+        x = self.conv(x)
+        x = x.view(x.size(0), -1)  # Flatten
+        x = self.bn2(x)
+        x = self.linear(x)
+        return x
+
+
+class GhostModuleV2(nn.Module):
+    def __init__(
+        self,
+        inp,
+        oup,
+        kernel_size=1,
+        ratio=2,
+        dw_size=3,
+        stride=1,
+        prelu=True,
+        mode=None,
+        args=None,
+    ):
+        super(GhostModuleV2, self).__init__()
+        self.mode = mode
+        self.gate_fn = nn.Sigmoid()
+
+        if self.mode in ["original"]:
+            self.oup = oup
+            init_channels = math.ceil(oup / ratio)
+            new_channels = init_channels * (ratio - 1)
+            self.primary_conv = nn.Sequential(
+                nn.Conv2d(
+                    inp,
+                    init_channels,
+                    kernel_size,
+                    stride,
+                    kernel_size // 2,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(init_channels),
+                nn.PReLU() if prelu else nn.Sequential(),
+            )
+            self.cheap_operation = nn.Sequential(
+                nn.Conv2d(
+                    init_channels,
+                    new_channels,
+                    dw_size,
+                    1,
+                    dw_size // 2,
+                    groups=init_channels,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(new_channels),
+                nn.PReLU() if prelu else nn.Sequential(),
+            )
+        elif self.mode in ["attn"]:  # DFC
+            self.oup = oup
+            init_channels = math.ceil(oup / ratio)
+            new_channels = init_channels * (ratio - 1)
+            self.primary_conv = nn.Sequential(
+                nn.Conv2d(
+                    inp,
+                    init_channels,
+                    kernel_size,
+                    stride,
+                    kernel_size // 2,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(init_channels),
+                nn.PReLU() if prelu else nn.Sequential(),
+            )
+            self.cheap_operation = nn.Sequential(
+                nn.Conv2d(
+                    init_channels,
+                    new_channels,
+                    dw_size,
+                    1,
+                    dw_size // 2,
+                    groups=init_channels,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(new_channels),
+                nn.PReLU() if prelu else nn.Sequential(),
+            )
+            self.short_conv = nn.Sequential(
+                nn.Conv2d(
+                    inp, oup, kernel_size, stride, kernel_size // 2, bias=False
+                ),
+                nn.BatchNorm2d(oup),
+                nn.Conv2d(
+                    oup,
+                    oup,
+                    kernel_size=(1, 5),
+                    stride=1,
+                    padding=(0, 2),
+                    groups=oup,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(oup),
+                nn.Conv2d(
+                    oup,
+                    oup,
+                    kernel_size=(5, 1),
+                    stride=1,
+                    padding=(2, 0),
+                    groups=oup,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(oup),
+            )
+
+    def forward(self, x):
+        if self.mode in ["original"]:
+            x1 = self.primary_conv(x)
+            x2 = self.cheap_operation(x1)
+            out = torch.cat([x1, x2], dim=1)
+            return out[:, : self.oup, :, :]
+        elif self.mode in ["attn"]:
+            res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2))
+            x1 = self.primary_conv(x)
+            x2 = self.cheap_operation(x1)
+            out = torch.cat([x1, x2], dim=1)
+            return out[:, : self.oup, :, :] * F.interpolate(
+                self.gate_fn(res),
+                size=(out.shape[-2], out.shape[-1]),
+                mode="nearest",
+            )
+
+
+class GhostBottleneckV2(nn.Module):
+    def __init__(
+        self,
+        in_chs,
+        mid_chs,
+        out_chs,
+        dw_kernel_size=3,
+        stride=1,
+        act_layer=nn.PReLU,
+        se_ratio=0.0,
+        layer_id=None,
+        args=None,
+    ):
+        super(GhostBottleneckV2, self).__init__()
+        has_se = se_ratio is not None and se_ratio > 0.0
+        self.stride = stride
+
+        # Point-wise expansion
+        if layer_id <= 1:
+            self.ghost1 = GhostModuleV2(
+                in_chs, mid_chs, prelu=True, mode="original", args=args
+            )
+        else:
+            self.ghost1 = GhostModuleV2(
+                in_chs, mid_chs, prelu=True, mode="attn", args=args
+            )
+
+        # Depth-wise convolution
+        if self.stride > 1:
+            self.conv_dw = nn.Conv2d(
+                mid_chs,
+                mid_chs,
+                dw_kernel_size,
+                stride=stride,
+                padding=(dw_kernel_size - 1) // 2,
+                groups=mid_chs,
+                bias=False,
+            )
+            self.bn_dw = nn.BatchNorm2d(mid_chs)
+
+        # Squeeze-and-excitation
+        if has_se:
+            self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio)
+        else:
+            self.se = None
+
+        self.ghost2 = GhostModuleV2(
+            mid_chs, out_chs, prelu=False, mode="original", args=args
+        )
+
+        # shortcut
+        if in_chs == out_chs and self.stride == 1:
+            self.shortcut = nn.Sequential()
+        else:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_chs,
+                    in_chs,
+                    dw_kernel_size,
+                    stride=stride,
+                    padding=(dw_kernel_size - 1) // 2,
+                    groups=in_chs,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(in_chs),
+                nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False),
+                nn.BatchNorm2d(out_chs),
+            )
+
+    def forward(self, x):
+        residual = x
+        x = self.ghost1(x)
+        if self.stride > 1:
+            x = self.conv_dw(x)
+            x = self.bn_dw(x)
+        if self.se is not None:
+            x = self.se(x)
+        x = self.ghost2(x)
+        x += self.shortcut(residual)
+        return x
+
+
+# NODES.register_module()
+class GhostFaceNetsV2(BaseNode[torch.Tensor, list[torch.Tensor]]):
+    def unwrap(self, inputs):
+        return [inputs[0]["features"][0]]
+
+    def wrap(self, outputs):
+        return {"features": [outputs]}
+
+    def set_export_mode(self, mode: bool = True):
+        self.export_mode = mode
+        self.train(not mode)
+
+    def __init__(
+        self,
+        cfgs=None,
+        embedding_size=512,
+        num_classes=0,
+        width=1.0,
+        dropout=0.2,
+        block=GhostBottleneckV2,
+        add_pointwise_conv=False,
+        bn_momentum=0.9,
+        bn_epsilon=1e-5,
+        init_kaiming=True,
+        block_args=None,
+        *args,
+        **kwargs,
+    ):
+        # kwargs['_tasks'] = {TaskType.LABEL: 'features'}
+        super().__init__(*args, **kwargs)
+
+        inp_shape = kwargs["input_shapes"][0]["features"][0]
+        # spots_shape = kwargs['input_shapes'][0]['features'][1]
+
+        image_size = inp_shape[2]
+        channels = inp_shape[1]
+        if cfgs is None:
+            self.cfgs = [
+                # k, t, c, SE, s
+                [[3, 16, 16, 0, 1]],
+                [[3, 48, 24, 0, 2]],
+                [[3, 72, 24, 0, 1]],
+                [[5, 72, 40, 0.25, 2]],
+                [[5, 120, 40, 0.25, 1]],
+                [[3, 240, 80, 0, 2]],
+                [
+                    [3, 200, 80, 0, 1],
+                    [3, 184, 80, 0, 1],
+                    [3, 184, 80, 0, 1],
+                    [3, 480, 112, 0.25, 1],
+                    [3, 672, 112, 0.25, 1],
+                ],
+                [[5, 672, 160, 0.25, 2]],
+                [
+                    [5, 960, 160, 0, 1],
+                    [5, 960, 160, 0.25, 1],
+                    [5, 960, 160, 0, 1],
+                    [5, 960, 160, 0.25, 1],
+                ],
+            ]
+        else:
+            self.cfgs = cfgs
+
+        # building first layer
+        output_channel = _make_divisible(16 * width, 4)
+        self.conv_stem = nn.Conv2d(
+            channels, output_channel, 3, 2, 1, bias=False
+        )
+        self.bn1 = nn.BatchNorm2d(output_channel)
+        self.act1 = nn.PReLU()
+        input_channel = output_channel
+
+        # building inverted residual blocks
+        stages = []
+        layer_id = 0
+        for cfg in self.cfgs:
+            layers = []
+            for k, exp_size, c, se_ratio, s in cfg:
+                output_channel = _make_divisible(c * width, 4)
+                hidden_channel = _make_divisible(exp_size * width, 4)
+                if block == GhostBottleneckV2:
+                    layers.append(
+                        block(
+                            input_channel,
+                            hidden_channel,
+                            output_channel,
+                            k,
+                            s,
+                            se_ratio=se_ratio,
+                            layer_id=layer_id,
+                            args=block_args,
+                        )
+                    )
+                input_channel = output_channel
+                layer_id += 1
+            stages.append(nn.Sequential(*layers))
+
+        output_channel = _make_divisible(exp_size * width, 4)
+        stages.append(
+            nn.Sequential(ConvBnAct(input_channel, output_channel, 1))
+        )
+
+        self.blocks = nn.Sequential(*stages)
+
+        # building last several layers
+        pointwise_conv = []
+        if add_pointwise_conv:
+            pointwise_conv.append(
+                nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True)
+            )
+            pointwise_conv.append(nn.BatchNorm2d(output_channel))
+            pointwise_conv.append(nn.PReLU())
+        else:
+            pointwise_conv.append(nn.Sequential())
+
+        self.pointwise_conv = nn.Sequential(*pointwise_conv)
+        self.classifier = ModifiedGDC(
+            image_size, output_channel, num_classes, dropout, embedding_size
+        )
+
+        # Initialize weights
+        for m in self.modules():
+            if init_kaiming:
+                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                    fan_in, _ = nn.init._calculate_fan_in_and_fan_out(m.weight)
+                    negative_slope = 0.25  # Default value for PReLU in PyTorch, change it if you use custom value
+                    m.weight.data.normal_(
+                        0, math.sqrt(2.0 / (fan_in * (1 + negative_slope**2)))
+                    )
+            if isinstance(m, nn.BatchNorm2d):
+                m.momentum, m.eps = bn_momentum, bn_epsilon
+
+    def forward(self, inps):
+        x = inps[0]
+        x = self.conv_stem(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.blocks(x)
+        x = self.pointwise_conv(x)
+        x = self.classifier(x)
+        return x
+
+    # @property
+    # def task(self) -> str:
+    #     return "label"
+
+    # @property
+    # def tasks(self) -> dict:
+    #     return [TaskType.LABEL]
+
+
+if __name__ == "__main__":
+    W, H = 256, 256
+    model = GhostFaceNetsV2(image_size=W)
+    model.eval()  # Set the model to evaluation mode
+
+    # Create a dummy input tensor of the appropriate size
+    x = torch.randn(1, 3, H, W)
+
+    # Export the model
+    onnx_path = "ghostfacenet.onnx"
+    torch.onnx.export(
+        model,  # model being run
+        x,  # model input (or a tuple for multiple inputs)
+        onnx_path,  # where to save the model (can be a file or file-like object)
+        export_params=True,  # store the trained parameter weights inside the model file
+        opset_version=12,  # the ONNX version to export the model to
+        do_constant_folding=True,  # whether to execute constant folding for optimization
+        input_names=["input"],  # the model's input names
+        output_names=["output"],  # the model's output names
+        #   dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
+        #                 'output' : {0 : 'batch_size'}}
+    )
+    import os
+
+    import numpy as np
+    import onnx
+    import onnxsim
+
+    # logger.info("Simplifying ONNX model...")
+    model_onnx = onnx.load(onnx_path)
+    onnx_model, check = onnxsim.simplify(model_onnx)
+    if not check:
+        raise RuntimeError("Onnx simplify failed.")
+    onnx.save(onnx_model, onnx_path)
+
+    # Add calibration data
+    dir = "shared_with_container/calibration_data/"
+    for file in os.listdir(dir):
+        os.remove(dir + file)
+    for i in range(20):
+        np_array = np.random.rand(1, 3, H, W).astype(np.float32)
+        np.save(f"{dir}{i:02d}.npy", np_array)
+        np_array.tofile(f"{dir}{i:02d}.raw")
+
+    # Test backpropagation on the model
+    # Create a dummy target tensor of the appropriate size
+    Y = model(x)
+    target = torch.randn(1, 512)
+    loss_fn = torch.nn.MSELoss()
+    loss = loss_fn(Y, target)
+    model.zero_grad()
+    loss.backward()
+    print("Backpropagation test successful")
diff --git a/requirements.txt b/requirements.txt
index 5d0fcb28..94badc1c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,5 @@ mlflow>=2.10.0
 psutil>=5.0.0
 tabulate>=0.9.0
 grad-cam>=1.5.4
+pytorch_metric_learning>=2.7.0
+scikit-learn>=1.5.0
\ No newline at end of file
diff --git a/tests/configs/reid.yaml b/tests/configs/reid.yaml
new file mode 100644
index 00000000..d9c0ec11
--- /dev/null
+++ b/tests/configs/reid.yaml
@@ -0,0 +1,60 @@
+loader:
+  name: CustomReIDLoader
+
+model:
+  name: reid_test
+  nodes:
+    - name: GhostFaceNetsV2
+      input_sources: 
+        - image
+      params:
+        embedding_size: &embedding_size 512
+  
+  losses:
+    - name: MetricLearningLoss
+      params: 
+        loss_name: SupConLoss
+        embedding_size: *embedding_size
+        cross_batch_memory_size: &memory_size 200
+      attached_to: GhostFaceNetsV2
+  
+  metrics:
+    - name: ClosestIsPositiveAccuracy
+      params:
+        cross_batch_memory_size: *memory_size
+      attached_to: GhostFaceNetsV2
+      is_main_metric: True
+    - name: MedianDistances
+      params:
+        cross_batch_memory_size: *memory_size
+      attached_to: GhostFaceNetsV2
+      is_main_metric: False
+
+  visualizers:
+    - name: EmbeddingsVisualizer
+      attached_to: GhostFaceNetsV2
+
+trainer:
+  preprocessing:
+    train_image_size: [256, 256]
+
+  batch_size: 16
+  epochs: 10
+  n_workers: 0
+  validation_interval: 10
+
+  callbacks:
+    - name: ExportOnTrainEnd
+
+  optimizer:
+    name: Adam
+    params:
+      lr: 0.01
+
+tracker:
+  project_name: reid_example
+  is_tensorboard: True
+
+exporter:
+  onnx:
+    opset_version: 11
\ No newline at end of file
diff --git a/tests/integration/test_detection.py b/tests/integration/test_detection.py
index 45e83f0a..060e84e2 100644
--- a/tests/integration/test_detection.py
+++ b/tests/integration/test_detection.py
@@ -103,7 +103,9 @@ def train_and_test(
                 assert value > 0.8, f"{name} = {value} (expected > 0.8)"
 
 
-@pytest.mark.parametrize("backbone", BACKBONES)
+@pytest.mark.parametrize(
+    "backbone", [b for b in BACKBONES if b != "GhostFaceNetsV2"]
+)
 def test_backbones(
     backbone: str,
     config: dict[str, Any],
diff --git a/tests/integration/test_reid.py b/tests/integration/test_reid.py
new file mode 100644
index 00000000..9ed4e867
--- /dev/null
+++ b/tests/integration/test_reid.py
@@ -0,0 +1,91 @@
+import shutil
+from pathlib import Path
+from typing import Any
+
+import pytest
+import torch
+
+from luxonis_train.core import LuxonisModel
+from luxonis_train.enums import TaskType
+from luxonis_train.loaders import BaseLoaderTorch
+
+from .multi_input_modules import *
+
+INFER_PATH = Path("tests/integration/infer-save-directory")
+ONNX_PATH = Path("tests/integration/_model.onnx")
+STUDY_PATH = Path("study_local.db")
+
+
+class CustomReIDLoader(BaseLoaderTorch):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @property
+    def input_shapes(self):
+        return {
+            "image": torch.Size([3, 256, 256]),
+            "id": torch.Size([1]),
+        }
+
+    def __getitem__(self, _):  # pragma: no cover
+        # Fake data
+        image = torch.rand(3, 256, 256, dtype=torch.float32)
+        inputs = {
+            "image": image,
+        }
+
+        # Fake labels
+        id = torch.randint(0, 1000, (1,), dtype=torch.int64)
+        labels = {
+            "id": (id, TaskType.LABEL),
+        }
+
+        return inputs, labels
+
+    def __len__(self):
+        return 10
+
+    def get_classes(self) -> dict[TaskType, list[str]]:
+        return {TaskType.LABEL: ["id"]}
+
+
+@pytest.fixture
+def infer_path() -> Path:
+    if INFER_PATH.exists():
+        shutil.rmtree(INFER_PATH)
+    INFER_PATH.mkdir()
+    return INFER_PATH
+
+
+@pytest.fixture
+def opts(test_output_dir: Path) -> dict[str, Any]:
+    return {
+        "trainer.epochs": 1,
+        "trainer.batch_size": 2,
+        "trainer.validation_interval": 1,
+        "trainer.callbacks": "[]",
+        "tracker.save_directory": str(test_output_dir),
+        "tuner.n_trials": 4,
+    }
+
+
+@pytest.fixture(scope="function", autouse=True)
+def clear_files():
+    yield
+    STUDY_PATH.unlink(missing_ok=True)
+    ONNX_PATH.unlink(missing_ok=True)
+
+
+def test_reid(opts: dict[str, Any], infer_path: Path):
+    config_file = "tests/configs/reid.yaml"
+    model = LuxonisModel(config_file, opts)
+    model.train()
+    model.test(view="val")
+
+    assert not ONNX_PATH.exists()
+    model.export(str(ONNX_PATH))
+    assert ONNX_PATH.exists()
+
+    assert len(list(infer_path.iterdir())) == 0
+    model.infer(view="val", save_dir=infer_path)
+    assert infer_path.exists()
diff --git a/tests/integration/test_segmentation.py b/tests/integration/test_segmentation.py
index a8b4df91..4ab4478a 100644
--- a/tests/integration/test_segmentation.py
+++ b/tests/integration/test_segmentation.py
@@ -123,7 +123,9 @@ def train_and_test(
                 assert value > 0.8, f"{name} = {value} (expected > 0.8)"
 
 
-@pytest.mark.parametrize("backbone", BACKBONES)
+@pytest.mark.parametrize(
+    "backbone", [b for b in BACKBONES if b != "GhostFaceNetsV2"]
+)
 def test_backbones(
     backbone: str,
     config: dict[str, Any],

From be4c2d23f3039496c0e220f5efea7e6f9256fbdc Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Fri, 6 Dec 2024 19:05:22 +0100
Subject: [PATCH 02/12] Add detailed docstring for GhostFaceNetsV2 backbone
 class

---
 luxonis_train/nodes/backbones/ghostfacenet.py | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/luxonis_train/nodes/backbones/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet.py
index b4b17758..c242633f 100644
--- a/luxonis_train/nodes/backbones/ghostfacenet.py
+++ b/luxonis_train/nodes/backbones/ghostfacenet.py
@@ -356,6 +356,44 @@ def __init__(
         *args,
         **kwargs,
     ):
+        """GhostFaceNetsV2 backbone.
+
+        GhostFaceNetsV2 is a convolutional neural network architecture focused on face recognition, but it is
+        adaptable to generic embedding tasks. It is based on the GhostNet architecture and uses Ghost BottleneckV2 blocks.
+
+        Source: U{https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py}
+
+        @license: U{MIT License
+            <https://github.com/Hazqeel09/ellzaf_ml/blob/main/LICENSE>}
+
+        @see: U{GhostFaceNets: Lightweight Face Recognition Model From Cheap Operations
+            <https://www.researchgate.net/publication/369930264_GhostFaceNets_Lightweight_Face_Recognition_Model_from_Cheap_Operations>}
+
+        @type cfgs: list[list[list[int]]] | None
+        @param cfgs: List of Ghost BottleneckV2 configurations. Defaults to None, which uses the original GhostFaceNetsV2 configuration.
+        @type embedding_size: int
+        @param embedding_size: Size of the embedding. Defaults to 512.
+        @type num_classes: int
+        @param num_classes: Number of classes. Defaults to 0, which makes the network output the raw embeddings. Otherwise it can be used to
+            add another linear layer to the network, which is useful for training using ArcFace or similar classification-based losses that
+            require the user to drop the last layer of the network.
+        @type width: float
+        @param width: Width multiplier. Increases complexity and number of parameters. Defaults to 1.0.
+        @type dropout: float
+        @param dropout: Dropout rate. Defaults to 0.2.
+        @type block: nn.Module
+        @param block: Ghost BottleneckV2 block. Defaults to GhostBottleneckV2.
+        @type add_pointwise_conv: bool
+        @param add_pointwise_conv: If True, adds a pointwise convolution layer at the end of the network. Defaults to False.
+        @type bn_momentum: float
+        @param bn_momentum: Batch normalization momentum. Defaults to 0.9.
+        @type bn_epsilon: float
+        @param bn_epsilon: Batch normalization epsilon. Defaults to 1e-5.
+        @type init_kaiming: bool
+        @param init_kaiming: If True, initializes the weights using the Kaiming initialization. Defaults to True.
+        @type block_args: dict
+        @param block_args: Arguments to pass to the block. Defaults to None.
+        """
         # kwargs['_tasks'] = {TaskType.LABEL: 'features'}
         super().__init__(*args, **kwargs)
 

From c5c4f16463efc41f20898a47e65573740513b0c4 Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Fri, 6 Dec 2024 19:08:01 +0100
Subject: [PATCH 03/12] fix: update docstring for pairwise_distances function
 in pml_metrics.py

---
 .../attached_modules/metrics/pml_metrics.py      | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/luxonis_train/attached_modules/metrics/pml_metrics.py b/luxonis_train/attached_modules/metrics/pml_metrics.py
index b280742d..fdd66a41 100644
--- a/luxonis_train/attached_modules/metrics/pml_metrics.py
+++ b/luxonis_train/attached_modules/metrics/pml_metrics.py
@@ -10,13 +10,15 @@
 def _pairwise_distances(embeddings, squared=False):
     """Compute the 2D matrix of distances between all the embeddings.
 
-    Args:
-        embeddings: tensor of shape (batch_size, embed_dim)
-        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
-                If false, output is the pairwise euclidean distance matrix.
-
-    Returns:
-        pairwise_distances: tensor of shape (batch_size, batch_size)
+    @param embeddings: tensor of shape (batch_size, embed_dim)
+    @type embeddings: torch.Tensor
+    @param squared: If true, output is the pairwise squared euclidean
+        distance matrix. If false, output is the pairwise euclidean
+        distance matrix.
+    @type squared: bool
+    @return: pairwise_distances: tensor of shape (batch_size,
+        batch_size)
+    @rtype: torch.Tensor
     """
     # Get the dot product between all embeddings
     # shape (batch_size, batch_size)

From c360f15bc953405cf1ea15c6787ced733e479e41 Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Fri, 6 Dec 2024 19:12:30 +0100
Subject: [PATCH 04/12] Fixed type errors

---
 .../attached_modules/losses/pml_loss.py       |  3 +
 .../attached_modules/metrics/pml_metrics.py   |  8 +++
 .../visualizers/embeddings_visualizer.py      | 25 +++++--
 luxonis_train/nodes/backbones/ghostfacenet.py | 65 +------------------
 4 files changed, 31 insertions(+), 70 deletions(-)

diff --git a/luxonis_train/attached_modules/losses/pml_loss.py b/luxonis_train/attached_modules/losses/pml_loss.py
index aacd667b..1727d091 100644
--- a/luxonis_train/attached_modules/losses/pml_loss.py
+++ b/luxonis_train/attached_modules/losses/pml_loss.py
@@ -108,6 +108,9 @@ def __init__(
     def prepare(self, inputs, labels):
         embeddings = inputs["features"][0]
 
+        assert (
+            labels is not None and "id" in labels
+        ), "ID labels are required for metric learning losses"
         IDs = labels["id"][0][:, 0]
         return embeddings, IDs
 
diff --git a/luxonis_train/attached_modules/metrics/pml_metrics.py b/luxonis_train/attached_modules/metrics/pml_metrics.py
index fdd66a41..a6d4effa 100644
--- a/luxonis_train/attached_modules/metrics/pml_metrics.py
+++ b/luxonis_train/attached_modules/metrics/pml_metrics.py
@@ -79,6 +79,10 @@ def __init__(self, cross_batch_memory_size=0, **kwargs):
 
     def prepare(self, inputs, labels):
         embeddings = inputs["features"][0]
+
+        assert (
+            labels is not None and "id" in labels
+        ), "ID labels are required for metric learning losses"
         IDs = labels["id"][0][:, 0]
         return embeddings, IDs
 
@@ -158,6 +162,10 @@ def __init__(self, cross_batch_memory_size=0, **kwargs):
 
     def prepare(self, inputs, labels):
         embeddings = inputs["features"][0]
+
+        assert (
+            labels is not None and "id" in labels
+        ), "ID labels are required for metric learning losses"
         IDs = labels["id"][0][:, 0]
         return embeddings, IDs
 
diff --git a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
index b5fb5f0e..d1096bfa 100644
--- a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
@@ -29,6 +29,10 @@ def prepare(
         self, inputs: Packet[Tensor], labels: Labels | None
     ) -> tuple[Tensor, Tensor]:
         embeddings = inputs["features"][0]
+
+        assert (
+            labels is not None and "id" in labels
+        ), "ID labels are required for metric learning losses"
         IDs = labels["id"][0]
         return embeddings, IDs
 
@@ -69,13 +73,20 @@ def forward(
 
         # Plot the embeddings
         fig, ax = plt.subplots(figsize=(10, 10))
-        scatter = ax.scatter(
-            embeddings_2d[:, 0],
-            embeddings_2d[:, 1],
-            c=IDs.detach().cpu().numpy(),
-            cmap="viridis",
-            s=5,
-        )
+        if IDs is not None:
+            scatter = ax.scatter(
+                embeddings_2d[:, 0],
+                embeddings_2d[:, 1],
+                c=IDs.detach().cpu().numpy(),
+                cmap="viridis",
+                s=5,
+            )
+        else:
+            scatter = ax.scatter(
+                embeddings_2d[:, 0],
+                embeddings_2d[:, 1],
+                s=5,
+            )
         fig.colorbar(scatter, ax=ax)
         ax.set_title("Embeddings Visualization")
         ax.set_xlabel("Dimension 1")
diff --git a/luxonis_train/nodes/backbones/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet.py
index c242633f..9641596d 100644
--- a/luxonis_train/nodes/backbones/ghostfacenet.py
+++ b/luxonis_train/nodes/backbones/ghostfacenet.py
@@ -263,6 +263,8 @@ def __init__(
         has_se = se_ratio is not None and se_ratio > 0.0
         self.stride = stride
 
+        assert layer_id is not None, "Layer ID must be explicitly provided"
+
         # Point-wise expansion
         if layer_id <= 1:
             self.ghost1 = GhostModuleV2(
@@ -507,66 +509,3 @@ def forward(self, inps):
         x = self.pointwise_conv(x)
         x = self.classifier(x)
         return x
-
-    # @property
-    # def task(self) -> str:
-    #     return "label"
-
-    # @property
-    # def tasks(self) -> dict:
-    #     return [TaskType.LABEL]
-
-
-if __name__ == "__main__":
-    W, H = 256, 256
-    model = GhostFaceNetsV2(image_size=W)
-    model.eval()  # Set the model to evaluation mode
-
-    # Create a dummy input tensor of the appropriate size
-    x = torch.randn(1, 3, H, W)
-
-    # Export the model
-    onnx_path = "ghostfacenet.onnx"
-    torch.onnx.export(
-        model,  # model being run
-        x,  # model input (or a tuple for multiple inputs)
-        onnx_path,  # where to save the model (can be a file or file-like object)
-        export_params=True,  # store the trained parameter weights inside the model file
-        opset_version=12,  # the ONNX version to export the model to
-        do_constant_folding=True,  # whether to execute constant folding for optimization
-        input_names=["input"],  # the model's input names
-        output_names=["output"],  # the model's output names
-        #   dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
-        #                 'output' : {0 : 'batch_size'}}
-    )
-    import os
-
-    import numpy as np
-    import onnx
-    import onnxsim
-
-    # logger.info("Simplifying ONNX model...")
-    model_onnx = onnx.load(onnx_path)
-    onnx_model, check = onnxsim.simplify(model_onnx)
-    if not check:
-        raise RuntimeError("Onnx simplify failed.")
-    onnx.save(onnx_model, onnx_path)
-
-    # Add calibration data
-    dir = "shared_with_container/calibration_data/"
-    for file in os.listdir(dir):
-        os.remove(dir + file)
-    for i in range(20):
-        np_array = np.random.rand(1, 3, H, W).astype(np.float32)
-        np.save(f"{dir}{i:02d}.npy", np_array)
-        np_array.tofile(f"{dir}{i:02d}.raw")
-
-    # Test backpropagation on the model
-    # Create a dummy target tensor of the appropriate size
-    Y = model(x)
-    target = torch.randn(1, 512)
-    loss_fn = torch.nn.MSELoss()
-    loss = loss_fn(Y, target)
-    model.zero_grad()
-    loss.backward()
-    print("Backpropagation test successful")

From 6eda12af3fc0a591b95adf498434f28aa6863c09 Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Mon, 16 Dec 2024 11:51:08 +0100
Subject: [PATCH 05/12] Implemented improvements and suggestions. Separated GFN
 into class, blocks and variants. Added tests for all supported pytorch metric
 learning losses.

---
 .../attached_modules/losses/__init__.py       |   4 +-
 .../attached_modules/losses/pml_loss.py       | 183 +++----
 .../attached_modules/metrics/pml_metrics.py   | 125 +++--
 .../visualizers/embeddings_visualizer.py      |  16 +-
 luxonis_train/nodes/backbones/__init__.py     |   2 +-
 luxonis_train/nodes/backbones/ghostfacenet.py | 511 ------------------
 .../nodes/backbones/ghostfacenet/__init__.py  |   3 +
 .../nodes/backbones/ghostfacenet/blocks.py    | 256 +++++++++
 .../backbones/ghostfacenet/ghostfacenet.py    | 159 ++++++
 .../nodes/backbones/ghostfacenet/variants.py  | 214 ++++++++
 .../nodes/backbones/micronet/blocks.py        |  23 +-
 tests/configs/reid.yaml                       |   2 +-
 tests/integration/test_reid.py                |  28 +-
 13 files changed, 837 insertions(+), 689 deletions(-)
 delete mode 100644 luxonis_train/nodes/backbones/ghostfacenet.py
 create mode 100644 luxonis_train/nodes/backbones/ghostfacenet/__init__.py
 create mode 100644 luxonis_train/nodes/backbones/ghostfacenet/blocks.py
 create mode 100644 luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
 create mode 100644 luxonis_train/nodes/backbones/ghostfacenet/variants.py

diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py
index b320fada..2d0c77e1 100644
--- a/luxonis_train/attached_modules/losses/__init__.py
+++ b/luxonis_train/attached_modules/losses/__init__.py
@@ -7,7 +7,7 @@
 from .ohem_bce_with_logits import OHEMBCEWithLogitsLoss
 from .ohem_cross_entropy import OHEMCrossEntropyLoss
 from .ohem_loss import OHEMLoss
-from .pml_loss import MetricLearningLoss
+from .pml_loss import EmbeddingLossWrapper
 from .reconstruction_segmentation_loss import ReconstructionSegmentationLoss
 from .sigmoid_focal_loss import SigmoidFocalLoss
 from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss
@@ -27,5 +27,5 @@
     "OHEMCrossEntropyLoss",
     "OHEMBCEWithLogitsLoss",
     "FOMOLocalizationLoss",
-    "MetricLearningLoss",
+    "EmbeddingLossWrapper",
 ]
diff --git a/luxonis_train/attached_modules/losses/pml_loss.py b/luxonis_train/attached_modules/losses/pml_loss.py
index 1727d091..959a5f68 100644
--- a/luxonis_train/attached_modules/losses/pml_loss.py
+++ b/luxonis_train/attached_modules/losses/pml_loss.py
@@ -1,87 +1,67 @@
-import warnings
-
-from pytorch_metric_learning.losses import (
-    AngularLoss,
-    ArcFaceLoss,
-    CircleLoss,
-    ContrastiveLoss,
-    CosFaceLoss,
-    CrossBatchMemory,
-    DynamicSoftMarginLoss,
-    FastAPLoss,
-    GeneralizedLiftedStructureLoss,
-    HistogramLoss,
-    InstanceLoss,
-    IntraPairVarianceLoss,
-    LargeMarginSoftmaxLoss,
-    LiftedStructureLoss,
-    ManifoldLoss,
-    MarginLoss,
-    MultiSimilarityLoss,
-    NCALoss,
-    NormalizedSoftmaxLoss,
-    NPairsLoss,
-    NTXentLoss,
-    P2SGradLoss,
-    PNPLoss,
-    ProxyAnchorLoss,
-    ProxyNCALoss,
-    RankedListLoss,
-    SignalToNoiseRatioContrastiveLoss,
-    SoftTripleLoss,
-    SphereFaceLoss,
-    SubCenterArcFaceLoss,
-    SupConLoss,
-    TripletMarginLoss,
-    TupletMarginLoss,
-)
+import logging
+
+import pytorch_metric_learning.losses as pml_losses
+from pytorch_metric_learning.losses import CrossBatchMemory
 from torch import Tensor
 
 from .base_loss import BaseLoss
 
-# Dictionary mapping string keys to loss classes
-loss_dict = {
-    "AngularLoss": AngularLoss,
-    "ArcFaceLoss": ArcFaceLoss,
-    "CircleLoss": CircleLoss,
-    "ContrastiveLoss": ContrastiveLoss,
-    "CosFaceLoss": CosFaceLoss,
-    "DynamicSoftMarginLoss": DynamicSoftMarginLoss,
-    "FastAPLoss": FastAPLoss,
-    "GeneralizedLiftedStructureLoss": GeneralizedLiftedStructureLoss,
-    "InstanceLoss": InstanceLoss,
-    "HistogramLoss": HistogramLoss,
-    "IntraPairVarianceLoss": IntraPairVarianceLoss,
-    "LargeMarginSoftmaxLoss": LargeMarginSoftmaxLoss,
-    "LiftedStructureLoss": LiftedStructureLoss,
-    "ManifoldLoss": ManifoldLoss,
-    "MarginLoss": MarginLoss,
-    "MultiSimilarityLoss": MultiSimilarityLoss,
-    "NCALoss": NCALoss,
-    "NormalizedSoftmaxLoss": NormalizedSoftmaxLoss,
-    "NPairsLoss": NPairsLoss,
-    "NTXentLoss": NTXentLoss,
-    "P2SGradLoss": P2SGradLoss,
-    "PNPLoss": PNPLoss,
-    "ProxyAnchorLoss": ProxyAnchorLoss,
-    "ProxyNCALoss": ProxyNCALoss,
-    "RankedListLoss": RankedListLoss,
-    "SignalToNoiseRatioContrastiveLoss": SignalToNoiseRatioContrastiveLoss,
-    "SoftTripleLoss": SoftTripleLoss,
-    "SphereFaceLoss": SphereFaceLoss,
-    "SubCenterArcFaceLoss": SubCenterArcFaceLoss,
-    "SupConLoss": SupConLoss,
-    "TripletMarginLoss": TripletMarginLoss,
-    "TupletMarginLoss": TupletMarginLoss,
-}
-
-
-class MetricLearningLoss(BaseLoss):
+logger = logging.getLogger(__name__)
+
+ALL_EMBEDDING_LOSSES = [
+    "AngularLoss",
+    "ArcFaceLoss",
+    "CircleLoss",
+    "ContrastiveLoss",
+    "CosFaceLoss",
+    "DynamicSoftMarginLoss",
+    "FastAPLoss",
+    "HistogramLoss",
+    "InstanceLoss",
+    "IntraPairVarianceLoss",
+    "LargeMarginSoftmaxLoss",
+    "GeneralizedLiftedStructureLoss",
+    "LiftedStructureLoss",
+    "MarginLoss",
+    "MultiSimilarityLoss",
+    "NPairsLoss",
+    "NCALoss",
+    "NormalizedSoftmaxLoss",
+    "NTXentLoss",
+    "PNPLoss",
+    "ProxyAnchorLoss",
+    "ProxyNCALoss",
+    "RankedListLoss",
+    "SignalToNoiseRatioContrastiveLoss",
+    "SoftTripleLoss",
+    "SphereFaceLoss",
+    "SubCenterArcFaceLoss",
+    "SupConLoss",
+    "ThresholdConsistentMarginLoss",
+    "TripletMarginLoss",
+    "TupletMarginLoss",
+]
+
+CLASS_EMBEDDING_LOSSES = [
+    "ArcFaceLoss",
+    "CosFaceLoss",
+    "LargeMarginSoftmaxLoss",
+    "NormalizedSoftmaxLoss",
+    "ProxyAnchorLoss",
+    "ProxyNCALoss",
+    "SoftTripleLoss",
+    "SphereFaceLoss",
+    "SubCenterArcFaceLoss",
+]
+
+
+class EmbeddingLossWrapper(BaseLoss):
     def __init__(
         self,
         loss_name: str,
         embedding_size: int = 512,
         cross_batch_memory_size=0,
+        num_classes: int = 0,
         loss_kwargs: dict | None = None,
         *args,
         **kwargs,
@@ -89,34 +69,51 @@ def __init__(
         super().__init__(*args, **kwargs)
         if loss_kwargs is None:
             loss_kwargs = {}
-        self.loss_func = loss_dict[loss_name](
-            **loss_kwargs
-        )  # Instantiate the loss object
+
+        try:
+            loss_cls = getattr(pml_losses, loss_name)
+        except AttributeError as e:
+            raise ValueError(
+                f"Loss {loss_name} not found in pytorch_metric_learning"
+            ) from e
+
+        if loss_name in CLASS_EMBEDDING_LOSSES:
+            if num_classes < 0:
+                raise ValueError(
+                    f"Loss {loss_name} requires num_classes to be set to a positive value"
+                )
+            loss_kwargs["num_classes"] = num_classes
+            loss_kwargs["embedding_size"] = embedding_size
+
+            # If we wanted to support these losses, we would need to add a separate optimizer for them.
+            # They may be useful in some scenarios, so leaving this here for future reference.
+            raise ValueError(
+                f"Loss {loss_name} requires its own optimizer, and that is not currently supported."
+            )
+
+        self.loss_func = loss_cls(**loss_kwargs)
+
         if cross_batch_memory_size > 0:
             if loss_name in CrossBatchMemory.supported_losses():
                 self.loss_func = CrossBatchMemory(
                     self.loss_func, embedding_size=embedding_size
                 )
             else:
-                # Warn that cross_batch_memory_size is ignored
-                warnings.warn(
-                    f"Cross batch memory is not supported for {loss_name}. Ignoring cross_batch_memory_size"
+                logger.warning(
+                    f"Cross batch memory is not supported for {loss_name}. Ignoring cross_batch_memory_size."
                 )
 
-        # self.miner_func = miner_func
-
-    def prepare(self, inputs, labels):
-        embeddings = inputs["features"][0]
+    def prepare(
+        self, inputs: dict[str, list[Tensor]], labels: dict[str, list[Tensor]]
+    ) -> tuple[Tensor, Tensor]:
+        embeddings = self.get_input_tensors(inputs, "features")[0]
 
-        assert (
-            labels is not None and "id" in labels
-        ), "ID labels are required for metric learning losses"
-        IDs = labels["id"][0][:, 0]
-        return embeddings, IDs
+        if labels is None or "id" not in labels:
+            raise ValueError("Labels must contain 'id' key")
 
-    def forward(self, inputs: Tensor, target: Tensor):
-        # miner_output = self.miner_func(inputs, target)
+        ids = labels["id"][0][:, 0]
+        return embeddings, ids
 
+    def forward(self, inputs: Tensor, target: Tensor) -> Tensor:
         loss = self.loss_func(inputs, target)
-
         return loss
diff --git a/luxonis_train/attached_modules/metrics/pml_metrics.py b/luxonis_train/attached_modules/metrics/pml_metrics.py
index a6d4effa..ad8b0d88 100644
--- a/luxonis_train/attached_modules/metrics/pml_metrics.py
+++ b/luxonis_train/attached_modules/metrics/pml_metrics.py
@@ -7,62 +7,6 @@
 # to PyTorch from TensorFlow
 
 
-def _pairwise_distances(embeddings, squared=False):
-    """Compute the 2D matrix of distances between all the embeddings.
-
-    @param embeddings: tensor of shape (batch_size, embed_dim)
-    @type embeddings: torch.Tensor
-    @param squared: If true, output is the pairwise squared euclidean
-        distance matrix. If false, output is the pairwise euclidean
-        distance matrix.
-    @type squared: bool
-    @return: pairwise_distances: tensor of shape (batch_size,
-        batch_size)
-    @rtype: torch.Tensor
-    """
-    # Get the dot product between all embeddings
-    # shape (batch_size, batch_size)
-    dot_product = torch.matmul(embeddings, embeddings.t())
-
-    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
-    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
-    # shape (batch_size,)
-    square_norm = torch.diag(dot_product)
-
-    # Compute the pairwise distance matrix as we have:
-    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
-    # shape (batch_size, batch_size)
-    distances = (
-        square_norm.unsqueeze(0) - 2.0 * dot_product + square_norm.unsqueeze(1)
-    )
-
-    # Because of computation errors, some distances might be negative so we put everything >= 0.0
-    distances = torch.max(distances, torch.tensor(0.0))
-
-    if not squared:
-        # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal)
-        # we need to add a small epsilon where distances == 0.0
-        mask = (distances == 0.0).float()
-        distances = distances + mask * 1e-16
-
-        distances = torch.sqrt(distances)
-
-        # Correct the epsilon added: set the distances on the mask to be exactly 0.0
-        distances = distances * (1.0 - mask)
-
-    return distances
-
-
-def _get_anchor_positive_triplet_mask(labels):
-    indices_equal = torch.eye(
-        labels.shape[0], dtype=torch.uint8, device=labels.device
-    )
-    indices_not_equal = ~indices_equal
-    labels_equal = labels.unsqueeze(0) == labels.unsqueeze(1)
-    mask = indices_not_equal & labels_equal
-    return mask
-
-
 class ClosestIsPositiveAccuracy(BaseMetric):
     def __init__(self, cross_batch_memory_size=0, **kwargs):
         super().__init__(**kwargs)
@@ -83,8 +27,8 @@ def prepare(self, inputs, labels):
         assert (
             labels is not None and "id" in labels
         ), "ID labels are required for metric learning losses"
-        IDs = labels["id"][0][:, 0]
-        return embeddings, IDs
+        ids = labels["id"][0][:, 0]
+        return embeddings, ids
 
     def update(self, inputs: Tensor, target: Tensor):
         embeddings, labels = inputs, target
@@ -166,8 +110,8 @@ def prepare(self, inputs, labels):
         assert (
             labels is not None and "id" in labels
         ), "ID labels are required for metric learning losses"
-        IDs = labels["id"][0][:, 0]
-        return embeddings, IDs
+        ids = labels["id"][0][:, 0]
+        return embeddings, ids
 
     def update(self, inputs: Tensor, target: Tensor):
         embeddings, labels = inputs, target
@@ -211,13 +155,18 @@ def update(self, inputs: Tensor, target: Tensor):
         # Get the positive mask and convert it to boolean
         positive_mask = _get_anchor_positive_triplet_mask(labels).bool()
 
+        # Filter out distances to negative elements w.r.t. each query embedding
         only_positive_distances = pairwise_distances.clone()
         only_positive_distances[~positive_mask] = float("inf")
 
+        # From the positive distances, get the closest positive distance for each query embedding
         closest_positive_distances, _ = torch.min(
             only_positive_distances, dim=1
         )
 
+        # Calculate the difference between the closest distance (any) and closest positive distances
+        # - this tells us how much closer should the closest positive be in order for the embedding
+        # to be considered correct
         non_inf_mask = closest_positive_distances != float("inf")
         difference = closest_positive_distances - closest_distances
         difference = difference[non_inf_mask]
@@ -256,3 +205,59 @@ def compute(self):
                 closest_vs_positive_distances
             ),
         }
+
+
+def _pairwise_distances(embeddings, squared=False):
+    """Compute the 2D matrix of distances between all the embeddings.
+
+    @param embeddings: tensor of shape (batch_size, embed_dim)
+    @type embeddings: torch.Tensor
+    @param squared: If true, output is the pairwise squared euclidean
+        distance matrix. If false, output is the pairwise euclidean
+        distance matrix.
+    @type squared: bool
+    @return: pairwise_distances: tensor of shape (batch_size,
+        batch_size)
+    @rtype: torch.Tensor
+    """
+    # Get the dot product between all embeddings
+    # shape (batch_size, batch_size)
+    dot_product = torch.matmul(embeddings, embeddings.t())
+
+    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
+    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
+    # shape (batch_size,)
+    square_norm = torch.diag(dot_product)
+
+    # Compute the pairwise distance matrix as we have:
+    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
+    # shape (batch_size, batch_size)
+    distances = (
+        square_norm.unsqueeze(0) - 2.0 * dot_product + square_norm.unsqueeze(1)
+    )
+
+    # Because of computation errors, some distances might be negative so we put everything >= 0.0
+    distances = torch.max(distances, torch.tensor(0.0))
+
+    if not squared:
+        # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal)
+        # we need to add a small epsilon where distances == 0.0
+        mask = (distances == 0.0).float()
+        distances = distances + mask * 1e-16
+
+        distances = torch.sqrt(distances)
+
+        # Correct the epsilon added: set the distances on the mask to be exactly 0.0
+        distances = distances * (1.0 - mask)
+
+    return distances
+
+
+def _get_anchor_positive_triplet_mask(labels):
+    indices_equal = torch.eye(
+        labels.shape[0], dtype=torch.uint8, device=labels.device
+    )
+    indices_not_equal = ~indices_equal
+    labels_equal = labels.unsqueeze(0) == labels.unsqueeze(1)
+    mask = indices_not_equal & labels_equal
+    return mask
diff --git a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
index d1096bfa..d8e5c940 100644
--- a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
@@ -33,15 +33,15 @@ def prepare(
         assert (
             labels is not None and "id" in labels
         ), "ID labels are required for metric learning losses"
-        IDs = labels["id"][0]
-        return embeddings, IDs
+        ids = labels["id"][0]
+        return embeddings, ids
 
     def forward(
         self,
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         embeddings: Tensor,
-        IDs: Tensor | None,
+        ids: Tensor | None,
         **kwargs,
     ) -> Tensor:
         """Creates a visualization of the embeddings.
@@ -52,14 +52,14 @@ def forward(
         @param prediction_canvas: The canvas to draw the predictions on.
         @type embeddings: Tensor
         @param embeddings: The embeddings to visualize.
-        @type IDs: Tensor
-        @param IDs: The IDs to visualize.
+        @type ids: Tensor
+        @param ids: The ids to visualize.
         @rtype: Tensor
         @return: An embedding space projection.
         """
 
         # Embeddings: [B, D], D = e.g. 512
-        # IDs: [B, 1], corresponding to the embeddings
+        # ids: [B, 1], corresponding to the embeddings
 
         # Convert embeddings to numpy array
         embeddings_np = embeddings.detach().cpu().numpy()
@@ -73,11 +73,11 @@ def forward(
 
         # Plot the embeddings
         fig, ax = plt.subplots(figsize=(10, 10))
-        if IDs is not None:
+        if ids is not None:
             scatter = ax.scatter(
                 embeddings_2d[:, 0],
                 embeddings_2d[:, 1],
-                c=IDs.detach().cpu().numpy(),
+                c=ids.detach().cpu().numpy(),
                 cmap="viridis",
                 s=5,
             )
diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py
index f5319981..da063a5e 100644
--- a/luxonis_train/nodes/backbones/__init__.py
+++ b/luxonis_train/nodes/backbones/__init__.py
@@ -2,7 +2,7 @@
 from .ddrnet import DDRNet
 from .efficientnet import EfficientNet
 from .efficientrep import EfficientRep
-from .ghostfacenet import GhostFaceNetsV2
+from .ghostfacenet.ghostfacenet import GhostFaceNetsV2
 from .micronet import MicroNet
 from .mobilenetv2 import MobileNetV2
 from .mobileone import MobileOne
diff --git a/luxonis_train/nodes/backbones/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet.py
deleted file mode 100644
index 9641596d..00000000
--- a/luxonis_train/nodes/backbones/ghostfacenet.py
+++ /dev/null
@@ -1,511 +0,0 @@
-# Original source: https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py
-
-
-import math
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from luxonis_train.nodes.base_node import BaseNode
-
-
-def _make_divisible(v, divisor, min_value=None):
-    """This function is taken from the original tf repo.
-
-    It ensures that all layers have a channel number that is divisible by 8
-    It can be seen here:
-    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
-    """
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    # Make sure that round down does not go down by more than 10%.
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-
-def hard_sigmoid(x, inplace: bool = False):
-    if inplace:
-        return x.add_(3.0).clamp_(0.0, 6.0).div_(6.0)
-    else:
-        return F.relu6(x + 3.0) / 6.0
-
-
-class SqueezeExcite(nn.Module):
-    def __init__(
-        self,
-        in_chs,
-        se_ratio=0.25,
-        reduced_base_chs=None,
-        act_layer=nn.PReLU,
-        gate_fn=hard_sigmoid,
-        divisor=4,
-        **_,
-    ):
-        super(SqueezeExcite, self).__init__()
-        self.gate_fn = gate_fn
-        reduced_chs = _make_divisible(
-            (reduced_base_chs or in_chs) * se_ratio, divisor
-        )
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)
-        self.act1 = act_layer()
-        self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)
-
-    def forward(self, x):
-        x_se = self.avg_pool(x)
-        x_se = self.conv_reduce(x_se)
-        x_se = self.act1(x_se)
-        x_se = self.conv_expand(x_se)
-        x = x * self.gate_fn(x_se)
-        return x
-
-
-class ConvBnAct(nn.Module):
-    def __init__(
-        self, in_chs, out_chs, kernel_size, stride=1, act_layer=nn.PReLU
-    ):
-        super(ConvBnAct, self).__init__()
-        self.conv = nn.Conv2d(
-            in_chs, out_chs, kernel_size, stride, kernel_size // 2, bias=False
-        )
-        self.bn1 = nn.BatchNorm2d(out_chs)
-        self.act1 = act_layer()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn1(x)
-        x = self.act1(x)
-        return x
-
-
-class ModifiedGDC(nn.Module):
-    def __init__(
-        self, image_size, in_chs, num_classes, dropout, emb=512
-    ):  # dropout implementation is in the original code but not in the paper
-        super(ModifiedGDC, self).__init__()
-
-        if image_size % 32 == 0:
-            self.conv_dw = nn.Conv2d(
-                in_chs,
-                in_chs,
-                kernel_size=(image_size // 32),
-                groups=in_chs,
-                bias=False,
-            )
-        else:
-            self.conv_dw = nn.Conv2d(
-                in_chs,
-                in_chs,
-                kernel_size=(image_size // 32 + 1),
-                groups=in_chs,
-                bias=False,
-            )
-        self.bn1 = nn.BatchNorm2d(in_chs)
-        self.dropout = nn.Dropout(dropout)
-
-        self.conv = nn.Conv2d(in_chs, emb, kernel_size=1, bias=False)
-        self.bn2 = nn.BatchNorm1d(emb)
-        self.linear = (
-            nn.Linear(emb, num_classes) if num_classes else nn.Identity()
-        )
-
-    def forward(self, inps):
-        x = inps
-        x = self.conv_dw(x)
-        x = self.bn1(x)
-        x = self.dropout(x)
-        # # Add spots to the features
-        # x = torch.cat([x, spots.view(spots.size(0), -1, 1, 1)], dim=1)
-        x = self.conv(x)
-        x = x.view(x.size(0), -1)  # Flatten
-        x = self.bn2(x)
-        x = self.linear(x)
-        return x
-
-
-class GhostModuleV2(nn.Module):
-    def __init__(
-        self,
-        inp,
-        oup,
-        kernel_size=1,
-        ratio=2,
-        dw_size=3,
-        stride=1,
-        prelu=True,
-        mode=None,
-        args=None,
-    ):
-        super(GhostModuleV2, self).__init__()
-        self.mode = mode
-        self.gate_fn = nn.Sigmoid()
-
-        if self.mode in ["original"]:
-            self.oup = oup
-            init_channels = math.ceil(oup / ratio)
-            new_channels = init_channels * (ratio - 1)
-            self.primary_conv = nn.Sequential(
-                nn.Conv2d(
-                    inp,
-                    init_channels,
-                    kernel_size,
-                    stride,
-                    kernel_size // 2,
-                    bias=False,
-                ),
-                nn.BatchNorm2d(init_channels),
-                nn.PReLU() if prelu else nn.Sequential(),
-            )
-            self.cheap_operation = nn.Sequential(
-                nn.Conv2d(
-                    init_channels,
-                    new_channels,
-                    dw_size,
-                    1,
-                    dw_size // 2,
-                    groups=init_channels,
-                    bias=False,
-                ),
-                nn.BatchNorm2d(new_channels),
-                nn.PReLU() if prelu else nn.Sequential(),
-            )
-        elif self.mode in ["attn"]:  # DFC
-            self.oup = oup
-            init_channels = math.ceil(oup / ratio)
-            new_channels = init_channels * (ratio - 1)
-            self.primary_conv = nn.Sequential(
-                nn.Conv2d(
-                    inp,
-                    init_channels,
-                    kernel_size,
-                    stride,
-                    kernel_size // 2,
-                    bias=False,
-                ),
-                nn.BatchNorm2d(init_channels),
-                nn.PReLU() if prelu else nn.Sequential(),
-            )
-            self.cheap_operation = nn.Sequential(
-                nn.Conv2d(
-                    init_channels,
-                    new_channels,
-                    dw_size,
-                    1,
-                    dw_size // 2,
-                    groups=init_channels,
-                    bias=False,
-                ),
-                nn.BatchNorm2d(new_channels),
-                nn.PReLU() if prelu else nn.Sequential(),
-            )
-            self.short_conv = nn.Sequential(
-                nn.Conv2d(
-                    inp, oup, kernel_size, stride, kernel_size // 2, bias=False
-                ),
-                nn.BatchNorm2d(oup),
-                nn.Conv2d(
-                    oup,
-                    oup,
-                    kernel_size=(1, 5),
-                    stride=1,
-                    padding=(0, 2),
-                    groups=oup,
-                    bias=False,
-                ),
-                nn.BatchNorm2d(oup),
-                nn.Conv2d(
-                    oup,
-                    oup,
-                    kernel_size=(5, 1),
-                    stride=1,
-                    padding=(2, 0),
-                    groups=oup,
-                    bias=False,
-                ),
-                nn.BatchNorm2d(oup),
-            )
-
-    def forward(self, x):
-        if self.mode in ["original"]:
-            x1 = self.primary_conv(x)
-            x2 = self.cheap_operation(x1)
-            out = torch.cat([x1, x2], dim=1)
-            return out[:, : self.oup, :, :]
-        elif self.mode in ["attn"]:
-            res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2))
-            x1 = self.primary_conv(x)
-            x2 = self.cheap_operation(x1)
-            out = torch.cat([x1, x2], dim=1)
-            return out[:, : self.oup, :, :] * F.interpolate(
-                self.gate_fn(res),
-                size=(out.shape[-2], out.shape[-1]),
-                mode="nearest",
-            )
-
-
-class GhostBottleneckV2(nn.Module):
-    def __init__(
-        self,
-        in_chs,
-        mid_chs,
-        out_chs,
-        dw_kernel_size=3,
-        stride=1,
-        act_layer=nn.PReLU,
-        se_ratio=0.0,
-        layer_id=None,
-        args=None,
-    ):
-        super(GhostBottleneckV2, self).__init__()
-        has_se = se_ratio is not None and se_ratio > 0.0
-        self.stride = stride
-
-        assert layer_id is not None, "Layer ID must be explicitly provided"
-
-        # Point-wise expansion
-        if layer_id <= 1:
-            self.ghost1 = GhostModuleV2(
-                in_chs, mid_chs, prelu=True, mode="original", args=args
-            )
-        else:
-            self.ghost1 = GhostModuleV2(
-                in_chs, mid_chs, prelu=True, mode="attn", args=args
-            )
-
-        # Depth-wise convolution
-        if self.stride > 1:
-            self.conv_dw = nn.Conv2d(
-                mid_chs,
-                mid_chs,
-                dw_kernel_size,
-                stride=stride,
-                padding=(dw_kernel_size - 1) // 2,
-                groups=mid_chs,
-                bias=False,
-            )
-            self.bn_dw = nn.BatchNorm2d(mid_chs)
-
-        # Squeeze-and-excitation
-        if has_se:
-            self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio)
-        else:
-            self.se = None
-
-        self.ghost2 = GhostModuleV2(
-            mid_chs, out_chs, prelu=False, mode="original", args=args
-        )
-
-        # shortcut
-        if in_chs == out_chs and self.stride == 1:
-            self.shortcut = nn.Sequential()
-        else:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(
-                    in_chs,
-                    in_chs,
-                    dw_kernel_size,
-                    stride=stride,
-                    padding=(dw_kernel_size - 1) // 2,
-                    groups=in_chs,
-                    bias=False,
-                ),
-                nn.BatchNorm2d(in_chs),
-                nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False),
-                nn.BatchNorm2d(out_chs),
-            )
-
-    def forward(self, x):
-        residual = x
-        x = self.ghost1(x)
-        if self.stride > 1:
-            x = self.conv_dw(x)
-            x = self.bn_dw(x)
-        if self.se is not None:
-            x = self.se(x)
-        x = self.ghost2(x)
-        x += self.shortcut(residual)
-        return x
-
-
-# NODES.register_module()
-class GhostFaceNetsV2(BaseNode[torch.Tensor, list[torch.Tensor]]):
-    def unwrap(self, inputs):
-        return [inputs[0]["features"][0]]
-
-    def wrap(self, outputs):
-        return {"features": [outputs]}
-
-    def set_export_mode(self, mode: bool = True):
-        self.export_mode = mode
-        self.train(not mode)
-
-    def __init__(
-        self,
-        cfgs=None,
-        embedding_size=512,
-        num_classes=0,
-        width=1.0,
-        dropout=0.2,
-        block=GhostBottleneckV2,
-        add_pointwise_conv=False,
-        bn_momentum=0.9,
-        bn_epsilon=1e-5,
-        init_kaiming=True,
-        block_args=None,
-        *args,
-        **kwargs,
-    ):
-        """GhostFaceNetsV2 backbone.
-
-        GhostFaceNetsV2 is a convolutional neural network architecture focused on face recognition, but it is
-        adaptable to generic embedding tasks. It is based on the GhostNet architecture and uses Ghost BottleneckV2 blocks.
-
-        Source: U{https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py}
-
-        @license: U{MIT License
-            <https://github.com/Hazqeel09/ellzaf_ml/blob/main/LICENSE>}
-
-        @see: U{GhostFaceNets: Lightweight Face Recognition Model From Cheap Operations
-            <https://www.researchgate.net/publication/369930264_GhostFaceNets_Lightweight_Face_Recognition_Model_from_Cheap_Operations>}
-
-        @type cfgs: list[list[list[int]]] | None
-        @param cfgs: List of Ghost BottleneckV2 configurations. Defaults to None, which uses the original GhostFaceNetsV2 configuration.
-        @type embedding_size: int
-        @param embedding_size: Size of the embedding. Defaults to 512.
-        @type num_classes: int
-        @param num_classes: Number of classes. Defaults to 0, which makes the network output the raw embeddings. Otherwise it can be used to
-            add another linear layer to the network, which is useful for training using ArcFace or similar classification-based losses that
-            require the user to drop the last layer of the network.
-        @type width: float
-        @param width: Width multiplier. Increases complexity and number of parameters. Defaults to 1.0.
-        @type dropout: float
-        @param dropout: Dropout rate. Defaults to 0.2.
-        @type block: nn.Module
-        @param block: Ghost BottleneckV2 block. Defaults to GhostBottleneckV2.
-        @type add_pointwise_conv: bool
-        @param add_pointwise_conv: If True, adds a pointwise convolution layer at the end of the network. Defaults to False.
-        @type bn_momentum: float
-        @param bn_momentum: Batch normalization momentum. Defaults to 0.9.
-        @type bn_epsilon: float
-        @param bn_epsilon: Batch normalization epsilon. Defaults to 1e-5.
-        @type init_kaiming: bool
-        @param init_kaiming: If True, initializes the weights using the Kaiming initialization. Defaults to True.
-        @type block_args: dict
-        @param block_args: Arguments to pass to the block. Defaults to None.
-        """
-        # kwargs['_tasks'] = {TaskType.LABEL: 'features'}
-        super().__init__(*args, **kwargs)
-
-        inp_shape = kwargs["input_shapes"][0]["features"][0]
-        # spots_shape = kwargs['input_shapes'][0]['features'][1]
-
-        image_size = inp_shape[2]
-        channels = inp_shape[1]
-        if cfgs is None:
-            self.cfgs = [
-                # k, t, c, SE, s
-                [[3, 16, 16, 0, 1]],
-                [[3, 48, 24, 0, 2]],
-                [[3, 72, 24, 0, 1]],
-                [[5, 72, 40, 0.25, 2]],
-                [[5, 120, 40, 0.25, 1]],
-                [[3, 240, 80, 0, 2]],
-                [
-                    [3, 200, 80, 0, 1],
-                    [3, 184, 80, 0, 1],
-                    [3, 184, 80, 0, 1],
-                    [3, 480, 112, 0.25, 1],
-                    [3, 672, 112, 0.25, 1],
-                ],
-                [[5, 672, 160, 0.25, 2]],
-                [
-                    [5, 960, 160, 0, 1],
-                    [5, 960, 160, 0.25, 1],
-                    [5, 960, 160, 0, 1],
-                    [5, 960, 160, 0.25, 1],
-                ],
-            ]
-        else:
-            self.cfgs = cfgs
-
-        # building first layer
-        output_channel = _make_divisible(16 * width, 4)
-        self.conv_stem = nn.Conv2d(
-            channels, output_channel, 3, 2, 1, bias=False
-        )
-        self.bn1 = nn.BatchNorm2d(output_channel)
-        self.act1 = nn.PReLU()
-        input_channel = output_channel
-
-        # building inverted residual blocks
-        stages = []
-        layer_id = 0
-        for cfg in self.cfgs:
-            layers = []
-            for k, exp_size, c, se_ratio, s in cfg:
-                output_channel = _make_divisible(c * width, 4)
-                hidden_channel = _make_divisible(exp_size * width, 4)
-                if block == GhostBottleneckV2:
-                    layers.append(
-                        block(
-                            input_channel,
-                            hidden_channel,
-                            output_channel,
-                            k,
-                            s,
-                            se_ratio=se_ratio,
-                            layer_id=layer_id,
-                            args=block_args,
-                        )
-                    )
-                input_channel = output_channel
-                layer_id += 1
-            stages.append(nn.Sequential(*layers))
-
-        output_channel = _make_divisible(exp_size * width, 4)
-        stages.append(
-            nn.Sequential(ConvBnAct(input_channel, output_channel, 1))
-        )
-
-        self.blocks = nn.Sequential(*stages)
-
-        # building last several layers
-        pointwise_conv = []
-        if add_pointwise_conv:
-            pointwise_conv.append(
-                nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True)
-            )
-            pointwise_conv.append(nn.BatchNorm2d(output_channel))
-            pointwise_conv.append(nn.PReLU())
-        else:
-            pointwise_conv.append(nn.Sequential())
-
-        self.pointwise_conv = nn.Sequential(*pointwise_conv)
-        self.classifier = ModifiedGDC(
-            image_size, output_channel, num_classes, dropout, embedding_size
-        )
-
-        # Initialize weights
-        for m in self.modules():
-            if init_kaiming:
-                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
-                    fan_in, _ = nn.init._calculate_fan_in_and_fan_out(m.weight)
-                    negative_slope = 0.25  # Default value for PReLU in PyTorch, change it if you use custom value
-                    m.weight.data.normal_(
-                        0, math.sqrt(2.0 / (fan_in * (1 + negative_slope**2)))
-                    )
-            if isinstance(m, nn.BatchNorm2d):
-                m.momentum, m.eps = bn_momentum, bn_epsilon
-
-    def forward(self, inps):
-        x = inps[0]
-        x = self.conv_stem(x)
-        x = self.bn1(x)
-        x = self.act1(x)
-        x = self.blocks(x)
-        x = self.pointwise_conv(x)
-        x = self.classifier(x)
-        return x
diff --git a/luxonis_train/nodes/backbones/ghostfacenet/__init__.py b/luxonis_train/nodes/backbones/ghostfacenet/__init__.py
new file mode 100644
index 00000000..85ed4447
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ghostfacenet/__init__.py
@@ -0,0 +1,3 @@
+from .ghostfacenet import GhostFaceNetsV2
+
+__all__ = ["GhostFaceNetsV2"]
diff --git a/luxonis_train/nodes/backbones/ghostfacenet/blocks.py b/luxonis_train/nodes/backbones/ghostfacenet/blocks.py
new file mode 100644
index 00000000..46a9ba27
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ghostfacenet/blocks.py
@@ -0,0 +1,256 @@
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from luxonis_train.nodes.backbones.micronet.blocks import _make_divisible
+from luxonis_train.nodes.blocks import SqueezeExciteBlock
+
+
+class ModifiedGDC(nn.Module):
+    def __init__(self, image_size, in_chs, num_classes, dropout, emb=512):
+        super().__init__()
+
+        if image_size % 32 == 0:
+            self.conv_dw = nn.Conv2d(
+                in_chs,
+                in_chs,
+                kernel_size=(image_size // 32),
+                groups=in_chs,
+                bias=False,
+            )
+        else:
+            self.conv_dw = nn.Conv2d(
+                in_chs,
+                in_chs,
+                kernel_size=(image_size // 32 + 1),
+                groups=in_chs,
+                bias=False,
+            )
+        self.bn1 = nn.BatchNorm2d(in_chs)
+        self.dropout = nn.Dropout(dropout)
+
+        self.conv = nn.Conv2d(in_chs, emb, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm1d(emb)
+        self.linear = (
+            nn.Linear(emb, num_classes) if num_classes else nn.Identity()
+        )
+
+    def forward(self, inps):
+        x = inps
+        x = self.conv_dw(x)
+        x = self.bn1(x)
+        x = self.dropout(x)
+        x = self.conv(x)
+        x = x.view(x.size(0), -1)
+        x = self.bn2(x)
+        x = self.linear(x)
+        return x
+
+
+class GhostModuleV2(nn.Module):
+    def __init__(
+        self,
+        inp,
+        oup,
+        kernel_size=1,
+        ratio=2,
+        dw_size=3,
+        stride=1,
+        prelu=True,
+        mode=None,
+        args=None,
+    ):
+        super(GhostModuleV2, self).__init__()
+        self.mode = mode
+        self.gate_fn = nn.Sigmoid()
+
+        if self.mode in ["original"]:
+            self.oup = oup
+            init_channels = math.ceil(oup / ratio)
+            new_channels = init_channels * (ratio - 1)
+            self.primary_conv = nn.Sequential(
+                nn.Conv2d(
+                    inp,
+                    init_channels,
+                    kernel_size,
+                    stride,
+                    kernel_size // 2,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(init_channels),
+                nn.PReLU() if prelu else nn.Sequential(),
+            )
+            self.cheap_operation = nn.Sequential(
+                nn.Conv2d(
+                    init_channels,
+                    new_channels,
+                    dw_size,
+                    1,
+                    dw_size // 2,
+                    groups=init_channels,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(new_channels),
+                nn.PReLU() if prelu else nn.Sequential(),
+            )
+        elif self.mode in ["attn"]:  # DFC
+            self.oup = oup
+            init_channels = math.ceil(oup / ratio)
+            new_channels = init_channels * (ratio - 1)
+            self.primary_conv = nn.Sequential(
+                nn.Conv2d(
+                    inp,
+                    init_channels,
+                    kernel_size,
+                    stride,
+                    kernel_size // 2,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(init_channels),
+                nn.PReLU() if prelu else nn.Sequential(),
+            )
+            self.cheap_operation = nn.Sequential(
+                nn.Conv2d(
+                    init_channels,
+                    new_channels,
+                    dw_size,
+                    1,
+                    dw_size // 2,
+                    groups=init_channels,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(new_channels),
+                nn.PReLU() if prelu else nn.Sequential(),
+            )
+            self.short_conv = nn.Sequential(
+                nn.Conv2d(
+                    inp, oup, kernel_size, stride, kernel_size // 2, bias=False
+                ),
+                nn.BatchNorm2d(oup),
+                nn.Conv2d(
+                    oup,
+                    oup,
+                    kernel_size=(1, 5),
+                    stride=1,
+                    padding=(0, 2),
+                    groups=oup,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(oup),
+                nn.Conv2d(
+                    oup,
+                    oup,
+                    kernel_size=(5, 1),
+                    stride=1,
+                    padding=(2, 0),
+                    groups=oup,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(oup),
+            )
+
+    def forward(self, x):
+        if self.mode in ["original"]:
+            x1 = self.primary_conv(x)
+            x2 = self.cheap_operation(x1)
+            out = torch.cat([x1, x2], dim=1)
+            return out[:, : self.oup, :, :]
+        elif self.mode in ["attn"]:
+            res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2))
+            x1 = self.primary_conv(x)
+            x2 = self.cheap_operation(x1)
+            out = torch.cat([x1, x2], dim=1)
+            return out[:, : self.oup, :, :] * F.interpolate(
+                self.gate_fn(res),
+                size=(out.shape[-2], out.shape[-1]),
+                mode="nearest",
+            )
+
+
+class GhostBottleneckV2(nn.Module):
+    def __init__(
+        self,
+        in_chs,
+        mid_chs,
+        out_chs,
+        dw_kernel_size=3,
+        stride=1,
+        act_layer=nn.PReLU,
+        se_ratio=0.0,
+        layer_id=None,
+        args=None,
+    ):
+        super(GhostBottleneckV2, self).__init__()
+        has_se = se_ratio is not None and se_ratio > 0.0
+        self.stride = stride
+
+        assert layer_id is not None, "Layer ID must be explicitly provided"
+
+        # Point-wise expansion
+        if layer_id <= 1:
+            self.ghost1 = GhostModuleV2(
+                in_chs, mid_chs, prelu=True, mode="original", args=args
+            )
+        else:
+            self.ghost1 = GhostModuleV2(
+                in_chs, mid_chs, prelu=True, mode="attn", args=args
+            )
+
+        # Depth-wise convolution
+        if self.stride > 1:
+            self.conv_dw = nn.Conv2d(
+                mid_chs,
+                mid_chs,
+                dw_kernel_size,
+                stride=stride,
+                padding=(dw_kernel_size - 1) // 2,
+                groups=mid_chs,
+                bias=False,
+            )
+            self.bn_dw = nn.BatchNorm2d(mid_chs)
+
+        # Squeeze-and-excitation
+        if has_se:
+            reduced_chs = _make_divisible(mid_chs * se_ratio, 4)
+            self.se = SqueezeExciteBlock(
+                mid_chs, reduced_chs, True, activation=nn.PReLU()
+            )
+        else:
+            self.se = None
+
+        self.ghost2 = GhostModuleV2(
+            mid_chs, out_chs, prelu=False, mode="original", args=args
+        )
+
+        # shortcut
+        if in_chs == out_chs and self.stride == 1:
+            self.shortcut = nn.Sequential()
+        else:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_chs,
+                    in_chs,
+                    dw_kernel_size,
+                    stride=stride,
+                    padding=(dw_kernel_size - 1) // 2,
+                    groups=in_chs,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(in_chs),
+                nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False),
+                nn.BatchNorm2d(out_chs),
+            )
+
+    def forward(self, x):
+        residual = x
+        x = self.ghost1(x)
+        if self.stride > 1:
+            x = self.conv_dw(x)
+            x = self.bn_dw(x)
+        if self.se is not None:
+            x = self.se(x)
+        x = self.ghost2(x)
+        x += self.shortcut(residual)
+        return x
diff --git a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
new file mode 100644
index 00000000..8bb61fee
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
@@ -0,0 +1,159 @@
+# Original source: https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py
+import math
+from typing import Literal
+
+import torch
+import torch.nn as nn
+
+from luxonis_train.nodes.backbones.ghostfacenet.blocks import (
+    GhostBottleneckV2,
+    ModifiedGDC,
+)
+from luxonis_train.nodes.backbones.ghostfacenet.variants import get_variant
+from luxonis_train.nodes.backbones.micronet.blocks import _make_divisible
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import ConvModule
+
+
+class GhostFaceNetsV2(BaseNode[torch.Tensor, list[torch.Tensor]]):
+    in_channels: list[int]
+    in_width: list[int]
+
+    def __init__(
+        self,
+        embedding_size=512,
+        num_classes=-1,
+        variant: Literal["V2"] = "V2",
+        *args,
+        **kwargs,
+    ):
+        """GhostFaceNetsV2 backbone.
+
+        GhostFaceNetsV2 is a convolutional neural network architecture focused on face recognition, but it is
+        adaptable to generic embedding tasks. It is based on the GhostNet architecture and uses Ghost BottleneckV2 blocks.
+
+        Source: U{https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py}
+
+        @license: U{MIT License
+            <https://github.com/Hazqeel09/ellzaf_ml/blob/main/LICENSE>}
+
+        @see: U{GhostFaceNets: Lightweight Face Recognition Model From Cheap Operations
+            <https://www.researchgate.net/publication/369930264_GhostFaceNets_Lightweight_Face_Recognition_Model_from_Cheap_Operations>}
+
+        @type embedding_size: int
+        @param embedding_size: Size of the embedding. Defaults to 512.
+        @type num_classes: int
+        @param num_classes: Number of classes. Defaults to -1, which leaves the default variant value in. Otherwise it can be used to
+            have the network return raw embeddings (=0) or add another linear layer to the network, which is useful for training using
+            ArcFace or similar classification-based losses that require the user to drop the last layer of the network.
+        @type variant: Literal["V2"]
+        @param variant: Variant of the GhostFaceNets embedding model. Defaults to "V2" (which is the only variant available).
+        """
+        super().__init__(*args, **kwargs)
+
+        image_size = self.in_width[0]
+        channels = self.in_channels[0]
+        var = get_variant(variant)
+        if num_classes >= 0:
+            var.num_classes = num_classes
+        self.cfgs = var.cfgs
+
+        # Building first layer
+        output_channel = _make_divisible(int(16 * var.width), 4)
+        self.conv_stem = nn.Conv2d(
+            channels, output_channel, 3, 2, 1, bias=False
+        )
+        self.bn1 = nn.BatchNorm2d(output_channel)
+        self.act1 = nn.PReLU()
+        input_channel = output_channel
+
+        # Building Ghost BottleneckV2 blocks
+        stages = []
+        layer_id = 0
+        for cfg in self.cfgs:
+            layers = []
+            for b_cfg in cfg:
+                output_channel = _make_divisible(
+                    b_cfg.output_channels * var.width, 4
+                )
+                hidden_channel = _make_divisible(
+                    b_cfg.expand_size * var.width, 4
+                )
+                if var.block == GhostBottleneckV2:
+                    layers.append(
+                        var.block(
+                            input_channel,
+                            hidden_channel,
+                            output_channel,
+                            b_cfg.kernel_size,
+                            b_cfg.stride,
+                            se_ratio=b_cfg.se_ratio,
+                            layer_id=layer_id,
+                            args=var.block_args,
+                        )
+                    )
+                input_channel = output_channel
+                layer_id += 1
+            stages.append(nn.Sequential(*layers))
+
+        output_channel = _make_divisible(b_cfg.expand_size * var.width, 4)
+        stages.append(
+            nn.Sequential(
+                ConvModule(
+                    input_channel,
+                    output_channel,
+                    kernel_size=1,
+                    activation=nn.PReLU(),
+                )
+            )
+        )
+
+        self.blocks = nn.Sequential(*stages)
+
+        # Building pointwise convolution
+        pointwise_conv = []
+        if var.add_pointwise_conv:
+            pointwise_conv.append(
+                nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True)
+            )
+            pointwise_conv.append(nn.BatchNorm2d(output_channel))
+            pointwise_conv.append(nn.PReLU())
+        else:
+            pointwise_conv.append(nn.Sequential())
+
+        self.pointwise_conv = nn.Sequential(*pointwise_conv)
+        self.classifier = ModifiedGDC(
+            image_size,
+            output_channel,
+            var.num_classes,
+            var.dropout,
+            embedding_size,
+        )
+
+        # Initializing weights
+        for m in self.modules():
+            if var.init_kaiming:
+                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                    fan_in, _ = nn.init._calculate_fan_in_and_fan_out(m.weight)
+                    negative_slope = 0.25
+                    m.weight.data.normal_(
+                        0, math.sqrt(2.0 / (fan_in * (1 + negative_slope**2)))
+                    )
+            if isinstance(m, nn.BatchNorm2d):
+                m.momentum, m.eps = var.bn_momentum, var.bn_epsilon
+
+    def unwrap(self, inputs):
+        return [inputs[0]["features"][0]]
+
+    def wrap(self, outputs):
+        return {"features": [outputs]}
+
+    def forward(self, inps):
+        x = inps[0]
+        x = self.conv_stem(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.blocks(x)
+        x = self.pointwise_conv(x)
+        x = self.classifier(x)
+        return x
diff --git a/luxonis_train/nodes/backbones/ghostfacenet/variants.py b/luxonis_train/nodes/backbones/ghostfacenet/variants.py
new file mode 100644
index 00000000..0e88ecfc
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ghostfacenet/variants.py
@@ -0,0 +1,214 @@
+from typing import List, Literal
+
+from pydantic import BaseModel
+from torch import nn
+
+from luxonis_train.nodes.backbones.ghostfacenet.blocks import GhostBottleneckV2
+
+
+class BlockConfig(BaseModel):
+    kernel_size: int
+    expand_size: int
+    output_channels: int
+    se_ratio: float
+    stride: int
+
+
+class GhostFaceNetsVariant(BaseModel):
+    """Variant of the GhostFaceNets embedding model.
+
+    @type cfgs: List[List[BlockConfig]]
+    @param cfgs: List of Ghost BottleneckV2 configurations.
+    @type num_classes: int
+    @param num_classes: Number of classes. Defaults to 0, which makes
+        the network output the raw embeddings. Otherwise it can be used
+        to add another linear layer to the network, which is useful for
+        training using ArcFace or similar classification-based losses
+        that require the user to drop the last layer of the network.
+    @type width: int
+    @param width: Width multiplier. Increases complexity and number of
+        parameters. Defaults to 1.0.
+    @type dropout: float
+    @param dropout: Dropout rate. Defaults to 0.2.
+    @type block: nn.Module
+    @param block: Ghost BottleneckV2 block. Defaults to
+        GhostBottleneckV2.
+    @type add_pointwise_conv: bool
+    @param add_pointwise_conv: If True, adds a pointwise convolution
+        layer at the end of the network. Defaults to False.
+    @type bn_momentum: float
+    @param bn_momentum: Batch normalization momentum. Defaults to 0.9.
+    @type bn_epsilon: float
+    @param bn_epsilon: Batch normalization epsilon. Defaults to 1e-5.
+    @type init_kaiming: bool
+    @param init_kaiming: If True, initializes the weights using the
+        Kaiming initialization. Defaults to True.
+    @type block_args: dict
+    @param block_args: Arguments to pass to the block. Defaults to None.
+    """
+
+    num_classes: int
+    width: int
+    dropout: float
+    block: type[nn.Module]
+    add_pointwise_conv: bool
+    bn_momentum: float
+    bn_epsilon: float
+    init_kaiming: bool
+    block_args: dict | None
+    cfgs: List[List[BlockConfig]]
+
+
+V2 = GhostFaceNetsVariant(
+    num_classes=0,
+    width=1,
+    dropout=0.2,
+    block=GhostBottleneckV2,
+    add_pointwise_conv=False,
+    bn_momentum=0.9,
+    bn_epsilon=1e-5,
+    init_kaiming=True,
+    block_args=None,
+    cfgs=[
+        [
+            BlockConfig(
+                kernel_size=3,
+                expand_size=16,
+                output_channels=16,
+                se_ratio=0.0,
+                stride=1,
+            )
+        ],
+        [
+            BlockConfig(
+                kernel_size=3,
+                expand_size=48,
+                output_channels=24,
+                se_ratio=0.0,
+                stride=2,
+            )
+        ],
+        [
+            BlockConfig(
+                kernel_size=3,
+                expand_size=72,
+                output_channels=24,
+                se_ratio=0.0,
+                stride=1,
+            )
+        ],
+        [
+            BlockConfig(
+                kernel_size=5,
+                expand_size=72,
+                output_channels=40,
+                se_ratio=0.25,
+                stride=2,
+            )
+        ],
+        [
+            BlockConfig(
+                kernel_size=5,
+                expand_size=120,
+                output_channels=40,
+                se_ratio=0.25,
+                stride=1,
+            )
+        ],
+        [
+            BlockConfig(
+                kernel_size=3,
+                expand_size=240,
+                output_channels=80,
+                se_ratio=0.0,
+                stride=2,
+            )
+        ],
+        [
+            BlockConfig(
+                kernel_size=3,
+                expand_size=200,
+                output_channels=80,
+                se_ratio=0.0,
+                stride=1,
+            ),
+            BlockConfig(
+                kernel_size=3,
+                expand_size=184,
+                output_channels=80,
+                se_ratio=0.0,
+                stride=1,
+            ),
+            BlockConfig(
+                kernel_size=3,
+                expand_size=184,
+                output_channels=80,
+                se_ratio=0.0,
+                stride=1,
+            ),
+            BlockConfig(
+                kernel_size=3,
+                expand_size=480,
+                output_channels=112,
+                se_ratio=0.25,
+                stride=1,
+            ),
+            BlockConfig(
+                kernel_size=3,
+                expand_size=672,
+                output_channels=112,
+                se_ratio=0.25,
+                stride=1,
+            ),
+        ],
+        [
+            BlockConfig(
+                kernel_size=5,
+                expand_size=672,
+                output_channels=160,
+                se_ratio=0.25,
+                stride=2,
+            )
+        ],
+        [
+            BlockConfig(
+                kernel_size=5,
+                expand_size=960,
+                output_channels=160,
+                se_ratio=0.0,
+                stride=1,
+            ),
+            BlockConfig(
+                kernel_size=5,
+                expand_size=960,
+                output_channels=160,
+                se_ratio=0.25,
+                stride=1,
+            ),
+            BlockConfig(
+                kernel_size=5,
+                expand_size=960,
+                output_channels=160,
+                se_ratio=0.0,
+                stride=1,
+            ),
+            BlockConfig(
+                kernel_size=5,
+                expand_size=960,
+                output_channels=160,
+                se_ratio=0.25,
+                stride=1,
+            ),
+        ],
+    ],
+)
+
+
+def get_variant(variant: Literal["V2"]) -> GhostFaceNetsVariant:
+    variants = {"V2": V2}
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            "GhostFaceNets model variant should be in "
+            f"{list(variants.keys())}, got {variant}."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/micronet/blocks.py b/luxonis_train/nodes/backbones/micronet/blocks.py
index 3da5e15e..b29082cf 100644
--- a/luxonis_train/nodes/backbones/micronet/blocks.py
+++ b/luxonis_train/nodes/backbones/micronet/blocks.py
@@ -357,7 +357,7 @@ def __init__(
 
         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 
-        squeeze_channels = self._make_divisible(in_channels // reduction, 4)
+        squeeze_channels = _make_divisible(in_channels // reduction, 4)
 
         self.fc = nn.Sequential(
             nn.Linear(in_channels, squeeze_channels),
@@ -413,16 +413,17 @@ def forward(self, x: Tensor) -> Tensor:
 
         return out
 
-    def _make_divisible(
-        self, value: int, divisor: int, min_value: int | None = None
-    ) -> int:
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(value + divisor / 2) // divisor * divisor)
-        # Make sure that round down does not go down by more than 10%.
-        if new_v < 0.9 * value:
-            new_v += divisor
-        return new_v
+
+def _make_divisible(
+    value: int, divisor: int, min_value: int | None = None
+) -> int:
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(value + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * value:
+        new_v += divisor
+    return new_v
 
 
 class SpatialSepConvSF(nn.Module):
diff --git a/tests/configs/reid.yaml b/tests/configs/reid.yaml
index d9c0ec11..21ca2748 100644
--- a/tests/configs/reid.yaml
+++ b/tests/configs/reid.yaml
@@ -11,7 +11,7 @@ model:
         embedding_size: &embedding_size 512
   
   losses:
-    - name: MetricLearningLoss
+    - name: EmbeddingLossWrapper
       params: 
         loss_name: SupConLoss
         embedding_size: *embedding_size
diff --git a/tests/integration/test_reid.py b/tests/integration/test_reid.py
index 9ed4e867..0d006072 100644
--- a/tests/integration/test_reid.py
+++ b/tests/integration/test_reid.py
@@ -5,6 +5,10 @@
 import pytest
 import torch
 
+from luxonis_train.attached_modules.losses.pml_loss import (
+    ALL_EMBEDDING_LOSSES,
+    CLASS_EMBEDDING_LOSSES,
+)
 from luxonis_train.core import LuxonisModel
 from luxonis_train.enums import TaskType
 from luxonis_train.loaders import BaseLoaderTorch
@@ -15,6 +19,8 @@
 ONNX_PATH = Path("tests/integration/_model.onnx")
 STUDY_PATH = Path("study_local.db")
 
+NUM_INDIVIDUALS = 100
+
 
 class CustomReIDLoader(BaseLoaderTorch):
     def __init__(self, *args, **kwargs):
@@ -35,7 +41,7 @@ def __getitem__(self, _):  # pragma: no cover
         }
 
         # Fake labels
-        id = torch.randint(0, 1000, (1,), dtype=torch.int64)
+        id = torch.randint(0, NUM_INDIVIDUALS, (1,), dtype=torch.int64)
         labels = {
             "id": (id, TaskType.LABEL),
         }
@@ -76,8 +82,26 @@ def clear_files():
     ONNX_PATH.unlink(missing_ok=True)
 
 
-def test_reid(opts: dict[str, Any], infer_path: Path):
+not_class_based_losses = ALL_EMBEDDING_LOSSES.copy()
+for loss in CLASS_EMBEDDING_LOSSES:
+    not_class_based_losses.remove(loss)
+
+
+@pytest.mark.parametrize("loss_name", not_class_based_losses)
+def test_reid(opts: dict[str, Any], infer_path: Path, loss_name: str):
     config_file = "tests/configs/reid.yaml"
+    opts["model.losses.0.params.loss_name"] = loss_name
+
+    # if loss_name in CLASS_EMBEDDING_LOSSES:
+    #     opts["model.losses.0.params.num_classes"] = NUM_INDIVIDUALS
+    #     opts["model.nodes.0.params.num_classes"] = NUM_INDIVIDUALS
+    # else:
+    #     opts["model.losses.0.params.num_classes"] = 0
+    #     opts["model.nodes.0.params.num_classes"] = 0
+
+    if loss_name == "RankedListLoss":
+        opts["model.losses.0.params.loss_kwargs"] = {"margin": 1.0, "Tn": 0.5}
+
     model = LuxonisModel(config_file, opts)
     model.train()
     model.test(view="val")

From 06899357c0ba236114f778a3300c92d79b426a36 Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Mon, 16 Dec 2024 11:54:55 +0100
Subject: [PATCH 06/12] refactor: update type hint for GhostFaceNetsV2 class to
 use Tensor from torch

---
 luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
index 8bb61fee..2188645f 100644
--- a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
+++ b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
@@ -2,8 +2,8 @@
 import math
 from typing import Literal
 
-import torch
 import torch.nn as nn
+from torch import Tensor
 
 from luxonis_train.nodes.backbones.ghostfacenet.blocks import (
     GhostBottleneckV2,
@@ -15,7 +15,7 @@
 from luxonis_train.nodes.blocks import ConvModule
 
 
-class GhostFaceNetsV2(BaseNode[torch.Tensor, list[torch.Tensor]]):
+class GhostFaceNetsV2(BaseNode[Tensor, list[Tensor]]):
     in_channels: list[int]
     in_width: list[int]
 

From 94639972c720172a27505fbe4439480bc640d824 Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Mon, 16 Dec 2024 11:55:52 +0100
Subject: [PATCH 07/12] refactor: remove unused unwrap and wrap methods from
 GhostFaceNetsV2 class

---
 luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
index 2188645f..cb065c43 100644
--- a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
+++ b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
@@ -142,12 +142,6 @@ def __init__(
             if isinstance(m, nn.BatchNorm2d):
                 m.momentum, m.eps = var.bn_momentum, var.bn_epsilon
 
-    def unwrap(self, inputs):
-        return [inputs[0]["features"][0]]
-
-    def wrap(self, outputs):
-        return {"features": [outputs]}
-
     def forward(self, inps):
         x = inps[0]
         x = self.conv_stem(x)

From 555fe2aa483d2493e1a07bc4e99df5c05954be00 Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Mon, 16 Dec 2024 12:17:50 +0100
Subject: [PATCH 08/12] fix: correct formatting in __all__ list in metrics
 module

---
 luxonis_train/attached_modules/metrics/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py
index 10f993ee..59e9cc57 100644
--- a/luxonis_train/attached_modules/metrics/__init__.py
+++ b/luxonis_train/attached_modules/metrics/__init__.py
@@ -17,6 +17,6 @@
     "Precision",
     "Recall",
     "ClosestIsPositiveAccuracy",
-    "ConfusionMatrix",  
+    "ConfusionMatrix",
     "MedianDistances",
 ]

From 9fe0b798a121d480b067fe9cb5f4b9c939e1afe8 Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Mon, 16 Dec 2024 14:02:28 +0100
Subject: [PATCH 09/12] Improved coverage, explicitly set mdformat github
 version

---
 .pre-commit-config.yaml        |  2 +-
 tests/configs/reid.yaml        |  2 +-
 tests/integration/test_reid.py | 29 ++++++++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c7779beb..226a18b8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,4 +20,4 @@ repos:
     hooks:
       - id: mdformat
         additional_dependencies:
-          - mdformat-gfm
+          - mdformat-gfm==0.3.7
diff --git a/tests/configs/reid.yaml b/tests/configs/reid.yaml
index 21ca2748..c79e4f8e 100644
--- a/tests/configs/reid.yaml
+++ b/tests/configs/reid.yaml
@@ -15,7 +15,7 @@ model:
       params: 
         loss_name: SupConLoss
         embedding_size: *embedding_size
-        cross_batch_memory_size: &memory_size 200
+        cross_batch_memory_size: &memory_size 4
       attached_to: GhostFaceNetsV2
   
   metrics:
diff --git a/tests/integration/test_reid.py b/tests/integration/test_reid.py
index 0d006072..8094dd80 100644
--- a/tests/integration/test_reid.py
+++ b/tests/integration/test_reid.py
@@ -88,7 +88,9 @@ def clear_files():
 
 
 @pytest.mark.parametrize("loss_name", not_class_based_losses)
-def test_reid(opts: dict[str, Any], infer_path: Path, loss_name: str):
+def test_available_losses(
+    opts: dict[str, Any], infer_path: Path, loss_name: str
+):
     config_file = "tests/configs/reid.yaml"
     opts["model.losses.0.params.loss_name"] = loss_name
 
@@ -113,3 +115,28 @@ def test_reid(opts: dict[str, Any], infer_path: Path, loss_name: str):
     assert len(list(infer_path.iterdir())) == 0
     model.infer(view="val", save_dir=infer_path)
     assert infer_path.exists()
+
+
+@pytest.mark.parametrize("loss_name", CLASS_EMBEDDING_LOSSES)
+@pytest.mark.parametrize("num_classes", [-2, NUM_INDIVIDUALS])
+def test_unsupported_class_based_losses(
+    opts: dict[str, Any], loss_name: str, num_classes: int
+):
+    config_file = "tests/configs/reid.yaml"
+    opts["model.losses.0.params.loss_name"] = loss_name
+    opts["model.losses.0.params.num_classes"] = num_classes
+    opts["model.nodes.0.params.num_classes"] = num_classes
+
+    with pytest.raises(ValueError):
+        model = LuxonisModel(config_file, opts)
+        model.train()
+
+
+@pytest.mark.parametrize("loss_name", ["NonExistentLoss"])
+def test_nonexistent_losses(opts: dict[str, Any], loss_name: str):
+    config_file = "tests/configs/reid.yaml"
+    opts["model.losses.0.params.loss_name"] = loss_name
+
+    with pytest.raises(ValueError):
+        model = LuxonisModel(config_file, opts)
+        model.train()

From b47e79ea82771758745f0ef6c84605e5e7e72e4f Mon Sep 17 00:00:00 2001
From: CaptainTrojan <49991681+CaptainTrojan@users.noreply.github.com>
Date: Tue, 17 Dec 2024 18:20:51 +0100
Subject: [PATCH 10/12] Reduced mdformat-gfm version to 0.3.6 to support Python
 3.8

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 226a18b8..c9355abb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,4 +20,4 @@ repos:
     hooks:
       - id: mdformat
         additional_dependencies:
-          - mdformat-gfm==0.3.7
+          - mdformat-gfm==0.3.6

From 8e376a0367e85081d77227a8890ef5fde3bfa11a Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Wed, 1 Jan 2025 22:58:59 +0100
Subject: [PATCH 11/12] Coverage fixes

---
 .../visualizers/embeddings_visualizer.py      | 24 ++++------
 .../backbones/ghostfacenet/ghostfacenet.py    | 11 +----
 tests/integration/test_reid.py                | 48 +++++++++++++++++--
 3 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
index d8e5c940..f3591c83 100644
--- a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py
@@ -41,7 +41,7 @@ def forward(
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         embeddings: Tensor,
-        ids: Tensor | None,
+        ids: Tensor,
         **kwargs,
     ) -> Tensor:
         """Creates a visualization of the embeddings.
@@ -73,20 +73,14 @@ def forward(
 
         # Plot the embeddings
         fig, ax = plt.subplots(figsize=(10, 10))
-        if ids is not None:
-            scatter = ax.scatter(
-                embeddings_2d[:, 0],
-                embeddings_2d[:, 1],
-                c=ids.detach().cpu().numpy(),
-                cmap="viridis",
-                s=5,
-            )
-        else:
-            scatter = ax.scatter(
-                embeddings_2d[:, 0],
-                embeddings_2d[:, 1],
-                s=5,
-            )
+        scatter = ax.scatter(
+            embeddings_2d[:, 0],
+            embeddings_2d[:, 1],
+            c=ids.detach().cpu().numpy(),
+            cmap="viridis",
+            s=5,
+        )
+
         fig.colorbar(scatter, ax=ax)
         ax.set_title("Embeddings Visualization")
         ax.set_xlabel("Dimension 1")
diff --git a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
index cb065c43..5a99ae28 100644
--- a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
+++ b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py
@@ -111,16 +111,7 @@ def __init__(
         self.blocks = nn.Sequential(*stages)
 
         # Building pointwise convolution
-        pointwise_conv = []
-        if var.add_pointwise_conv:
-            pointwise_conv.append(
-                nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True)
-            )
-            pointwise_conv.append(nn.BatchNorm2d(output_channel))
-            pointwise_conv.append(nn.PReLU())
-        else:
-            pointwise_conv.append(nn.Sequential())
-
+        pointwise_conv = [nn.Sequential()]
         self.pointwise_conv = nn.Sequential(*pointwise_conv)
         self.classifier = ModifiedGDC(
             image_size,
diff --git a/tests/integration/test_reid.py b/tests/integration/test_reid.py
index 8094dd80..53355025 100644
--- a/tests/integration/test_reid.py
+++ b/tests/integration/test_reid.py
@@ -35,7 +35,7 @@ def input_shapes(self):
 
     def __getitem__(self, _):  # pragma: no cover
         # Fake data
-        image = torch.rand(3, 256, 256, dtype=torch.float32)
+        image = torch.rand(self.input_shapes["image"], dtype=torch.float32)
         inputs = {
             "image": image,
         }
@@ -55,6 +55,24 @@ def get_classes(self) -> dict[TaskType, list[str]]:
         return {TaskType.LABEL: ["id"]}
 
 
+class CustomReIDLoaderNoID(CustomReIDLoader):
+    def __getitem__(self, _):
+        inputs, labels = super().__getitem__(_)
+        labels["something_else"] = labels["id"]
+        del labels["id"]
+
+        return inputs, labels
+
+
+class CustomReIDLoaderImageSize2(CustomReIDLoader):
+    @property
+    def input_shapes(self):
+        return {
+            "image": torch.Size([3, 200, 200]),
+            "id": torch.Size([1]),
+        }
+
+
 @pytest.fixture
 def infer_path() -> Path:
     if INFER_PATH.exists():
@@ -128,8 +146,7 @@ def test_unsupported_class_based_losses(
     opts["model.nodes.0.params.num_classes"] = num_classes
 
     with pytest.raises(ValueError):
-        model = LuxonisModel(config_file, opts)
-        model.train()
+        LuxonisModel(config_file, opts)
 
 
 @pytest.mark.parametrize("loss_name", ["NonExistentLoss"])
@@ -137,6 +154,31 @@ def test_nonexistent_losses(opts: dict[str, Any], loss_name: str):
     config_file = "tests/configs/reid.yaml"
     opts["model.losses.0.params.loss_name"] = loss_name
 
+    with pytest.raises(ValueError):
+        LuxonisModel(config_file, opts)
+
+
+def test_bad_loader(opts: dict[str, Any]):
+    config_file = "tests/configs/reid.yaml"
+    opts["loader.name"] = "CustomReIDLoaderNoID"
+
     with pytest.raises(ValueError):
         model = LuxonisModel(config_file, opts)
         model.train()
+
+
+def test_not_enough_samples_for_metrics(opts: dict[str, Any]):
+    config_file = "tests/configs/reid.yaml"
+    opts["model.metrics.1.params.cross_batch_memory_size"] = 100
+
+    model = LuxonisModel(config_file, opts)
+    model.train()
+
+
+def test_image_size_not_divisible_by_32(opts: dict[str, Any]):
+    config_file = "tests/configs/reid.yaml"
+    opts["loader.name"] = "CustomReIDLoaderImageSize2"
+
+    # with pytest.raises(ValueError):
+    model = LuxonisModel(config_file, opts)
+    model.train()

From 23e75001a56b810a2047aeac69e4c2faacd79860 Mon Sep 17 00:00:00 2001
From: Michal Sejak <hyas@seznam.cz>
Date: Thu, 2 Jan 2025 00:44:48 +0100
Subject: [PATCH 12/12] fix: return a model copy for the specified
 GhostFaceNets variant

---
 luxonis_train/nodes/backbones/ghostfacenet/variants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/luxonis_train/nodes/backbones/ghostfacenet/variants.py b/luxonis_train/nodes/backbones/ghostfacenet/variants.py
index 0e88ecfc..aa78daf8 100644
--- a/luxonis_train/nodes/backbones/ghostfacenet/variants.py
+++ b/luxonis_train/nodes/backbones/ghostfacenet/variants.py
@@ -211,4 +211,4 @@ def get_variant(variant: Literal["V2"]) -> GhostFaceNetsVariant:
             "GhostFaceNets model variant should be in "
             f"{list(variants.keys())}, got {variant}."
         )
-    return variants[variant]
+    return variants[variant].model_copy()