diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 00000000..a6eef919
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1 @@
+* @luxonis/ML-Reviewers
diff --git a/.github/labeler.yaml b/.github/labeler.yaml
new file mode 100644
index 00000000..33749bd5
--- /dev/null
+++ b/.github/labeler.yaml
@@ -0,0 +1,32 @@
+tests:
+  - changed-files:
+    - any-glob-to-any-file: 'tests/*'
+  - head-branch:
+    - 'test/*'
+    - 'tests/*'
+
+DevOps:
+  - changed-files:
+    - any-glob-to-any-file: '.github/*'
+
+CLI:
+  - changed-files:
+    - any-glob-to-any-file: '**/__main__.py'
+
+release:
+  - base-branch: 'main'
+
+enhancement:
+  - head-branch:
+    - 'feature/*'
+    - 'feat/*'
+    - 'enhancement/*'
+
+fix:
+  - head-branch:
+    - 'fix/*'
+    - 'bug/*'
+    - 'hotfix/*'
+    - 'issue/*'
+    - 'bugfix/*'
+    - 'patch/*'
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 00000000..6dbf1a87
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,175 @@
+name: CI
+
+on:
+  pull_request:
+    branches: [ dev, main ]
+    paths:
+      - 'luxonis_train/**'
+      - 'tests/**'
+      - .github/workflows/ci.yaml
+      - '!**/*.md'
+      - '!luxonis_train/__main__.py'
+
+permissions:
+  pull-requests: write
+  contents: write
+  checks: write
+
+jobs:
+  assigner:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Auto-assign
+        uses: toshimaru/auto-author-assign@v2.1.1
+
+  labeler:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.head_ref }}
+
+      - name: Labeler
+        uses: actions/labeler@v5
+        with:
+          configuration-path: .github/labeler.yaml
+
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.head_ref }}
+
+    - name: Run pre-commit
+      uses: pre-commit/action@v3.0.1
+
+  docs:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.head_ref }}
+
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y pandoc
+          pip install pydoctor
+          curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py"
+
+      - name: Build docs
+        run: python gen-docs.py luxonis_train
+
+  type-check:
+    needs:
+      - pre-commit
+      - docs
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.head_ref }}
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+        cache: pip
+
+    - name: Install dependencies
+      run: pip install -e .[dev]
+
+    - name: Type check
+      uses: jakebailey/pyright-action@v2
+      with:
+        version: '1.1.380'
+        level: warning
+        warnings: true
+        python-version: '3.10'
+        project: pyproject.toml
+
+  tests:
+    needs:
+      - type-check
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.head_ref }}
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+        cache: pip
+
+    - name: Install dependencies
+      run: pip install -e .[dev]
+
+    - name: Authenticate to Google Cloud
+      id: google-auth
+      uses: google-github-actions/auth@v2
+      with:
+        credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
+        create_credentials_file: true
+        export_environment_variables: true
+        token_format: access_token
+
+    - name: Run pytest
+      uses: pavelzw/pytest-action@v2
+      env:
+        LUXONISML_BUCKET: luxonis-test-bucket
+        PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0
+      with:
+        emoji: false
+        custom-arguments: --junit-xml pytest.xml --cov luxonis_train --cov-report xml
+
+    - name: Create Test Report
+      uses: EnricoMi/publish-unit-test-result-action@v2
+      if: matrix.os == 'ubuntu-latest'
+      with:
+        files: pytest.xml
+
+    - name: Generate coverage badge
+      uses: tj-actions/coverage-badge-py@v2
+      if: matrix.os == 'ubuntu-latest'
+      with:
+        output: media/coverage_badge.svg
+
+    - name: Generate coverage report
+      uses: orgoro/coverage@v3.2
+      if: matrix.os == 'ubuntu-latest'
+      with:
+        coverageFile: coverage.xml
+        token: ${{ secrets.GITHUB_TOKEN }}
+        thresholdAll: 0.9
+        thresholdNew: 0.8
+
+    - name: Commit coverage badge
+      if: matrix.os == 'ubuntu-latest'
+      run: |
+        git config --global user.name 'GitHub Actions'
+        git config --global user.email 'actions@github.com'
+        git diff --quiet media/coverage_badge.svg || {
+          git add media/coverage_badge.svg
+          git commit -m "[Automated] Updated coverage badge"
+        }
+
+    - name: Push changes
+      uses: ad-m/github-push-action@master
+      if: matrix.os == 'ubuntu-latest'
+      with:
+        branch: ${{ github.head_ref }}
+
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
deleted file mode 100644
index f3c69761..00000000
--- a/.github/workflows/docs.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-name: Docs
-
-on:
-  pull_request:
-    branches: [ dev, main ]
-    paths:
-      - 'luxonis_train/**'
-      - .github/workflows/docs.yaml
-
-jobs:
-  docs:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.head_ref }}
-
-      - name: Install dependencies
-        run: |
-          pip install pydoctor
-          curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py"
-
-      - name: Build docs
-        run: |
-          python gen-docs.py luxonis_train
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
deleted file mode 100644
index ce6b816b..00000000
--- a/.github/workflows/pre-commit.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: pre-commit
-
-on:
-  pull_request:
-    branches: [dev, main]
-
-jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-    - uses: actions/setup-python@v3
-    - uses: pre-commit/action@v3.0.0
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
deleted file mode 100644
index b5c0e44f..00000000
--- a/.github/workflows/tests.yaml
+++ /dev/null
@@ -1,115 +0,0 @@
-name: Tests
-
-on:
-  pull_request:
-    branches: [ dev, main ]
-    paths:
-      - 'luxonis_train/**/**.py'
-      - 'tests/**/**.py'
-      - .github/workflows/tests.yaml
-
-jobs:
-  run_tests:
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, windows-latest, macOS-latest]
-        version: ['3.10', '3.11']
-
-    runs-on: ${{ matrix.os }}
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v4
-      with:
-        ref: ${{ github.head_ref }}
-
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.version }}
-        cache: pip
-
-    - name: Install dependencies [Ubuntu]
-      if: matrix.os == 'ubuntu-latest'
-      run: |
-        sudo apt update
-        sudo apt install -y pandoc
-        pip install -e .[dev]
-
-    - name: Install dependencies [Windows]
-      if: matrix.os == 'windows-latest'
-      run: pip install -e .[dev]
-
-    - name: Install dependencies [macOS]
-      if: matrix.os == 'macOS-latest'
-      run: pip install -e .[dev]
-
-    - name: Run tests with coverage [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      run: pytest tests --cov=luxonis_train --cov-report xml --junit-xml pytest.xml
-
-    - name: Run tests [Windows, macOS]
-      if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10'
-      run: pytest tests --junit-xml pytest.xml
-
-    - name: Generate coverage badge [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      run: coverage-badge -o media/coverage_badge.svg -f
-
-    - name: Generate coverage report [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      uses: orgoro/coverage@v3.1
-      with:
-        coverageFile: coverage.xml
-        token: ${{ secrets.GITHUB_TOKEN }}
-
-    - name: Commit coverage badge [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      run: |
-        git config --global user.name 'GitHub Actions'
-        git config --global user.email 'actions@github.com'
-        git diff --quiet media/coverage_badge.svg || {
-          git add media/coverage_badge.svg
-          git commit -m "[Automated] Updated coverage badge"
-        }
-
-    - name: Push changes [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      uses: ad-m/github-push-action@master
-      with:
-        branch: ${{ github.head_ref }}
-
-    - name: Upload Test Results
-      if: always()
-      uses: actions/upload-artifact@v4
-      with:
-        name: Test Results [${{ matrix.os }}] (Python ${{ matrix.version }})
-        path: pytest.xml
-        retention-days: 10
-        if-no-files-found: error
-
-  publish-test-results:
-    name: "Publish Tests Results"
-    needs: run_tests
-    runs-on: ubuntu-latest
-    permissions:
-      checks: write
-      pull-requests: write
-    if: always()
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.head_ref }}
-
-      - name: Download Artifacts
-        uses: actions/download-artifact@v4
-        with:
-          path: artifacts
-
-      - name: Publish Test Results
-        uses: EnricoMi/publish-unit-test-result-action@v2
-        with:
-          files: "artifacts/**/*.xml"
diff --git a/.gitignore b/.gitignore
index 1204d2e2..03ba884c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -147,3 +147,10 @@ models_venv/*
 
 # vscode settings
 .vscode
+tests/data
+mlartifacts
+mlruns
+wandb
+tests/_data
+tests/integration/save-directory
+data
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3f95fc26..3d68c872 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,12 +1,11 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.8
+    rev: v0.6.4
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
         types_or: [python, pyi, jupyter]
       - id: ruff-format
-        args: [--line-length, '88']
         types_or: [python, pyi, jupyter]
 
   - repo: https://github.com/PyCQA/docformatter
@@ -14,7 +13,7 @@ repos:
     hooks:
       - id: docformatter
         additional_dependencies: [tomli]
-        args: [--in-place, --black, --style=epytext]
+        args: [--in-place, --style=epytext]
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.4.0
@@ -28,4 +27,3 @@ repos:
       - id: mdformat
         additional_dependencies:
           - mdformat-gfm
-          - mdformat-toc
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d113518b..20fd3607 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -3,25 +3,45 @@
 **This guide is intended for our internal development team.**
 It outlines our workflow and standards for contributing to this project.
 
-## Table of Contents
+## Table Of Contents
 
+- [Pre-requisites](#pre-requisites)
 - [Pre-commit Hooks](#pre-commit-hooks)
 - [Documentation](#documentation)
+- [Type Checking](#type-checking)
   - [Editor Support](#editor-support)
 - [Tests](#tests)
 - [GitHub Actions](#github-actions)
 - [Making and Reviewing Changes](#making-and-reviewing-changes)
-- [Notes](#notes)
+
+## Pre-requisites
+
+Clone the repository and navigate to the root directory:
+
+```bash
+git clone git@github.com:luxonis/luxonis-train.git
+cd luxonis-train
+```
+
+Install the development dependencies by running `pip install -r requirements-dev.txt` or install the package with the `dev` extra flag:
+
+```bash
+pip install -e .[dev]
+```
+
+> \[!NOTE\]
+> This will install the package in editable mode (`-e`),
+> so you can make changes to the code and run them immediately.
 
 ## Pre-commit Hooks
 
 We use pre-commit hooks to ensure code quality and consistency:
 
-1. Install pre-commit (see [pre-commit.com](https://pre-commit.com/#install)).
+1. Install `pre-commit` (see [pre-commit.com](https://pre-commit.com/#install)).
 1. Clone the repository and run `pre-commit install` in the root directory.
-1. The pre-commit hook will now run automatically on `git commit`.
+1. The `pre-commit` hook will now run automatically on `git commit`.
    - If the hook fails, it will print an error message and abort the commit.
-   - It will also modify the files in-place to fix any issues it can.
+   - Some hooks will also modify the files in-place to fix found issues.
 
 ## Documentation
 
@@ -29,52 +49,75 @@ We use the [Epytext](https://epydoc.sourceforge.net/epytext.html) markup languag
 To verify that your documentation is formatted correctly, follow these steps:
 
 1. Download [`get-docs.py`](https://github.com/luxonis/python-api-analyzer-to-json/blob/main/gen-docs.py) script
-1. Run `python3 get-docs.py luxonis_ml` in the root directory.
+1. Run `python3 get-docs.py luxonis_train` in the root directory.
    - If the script runs successfully and produces `docs.json` file, your documentation is formatted correctly.
-   - **NOTE:** If the script fails, it might not give the specific error message. In that case, you can run
-     the script for each file individually until you find the one that is causing the error.
 
-### Editor Support
+> \[!NOTE\]
+> If the script fails, it might not give a specific error message.
+> In that case, you can run the script for each file individually
+> until you find the one that is causing the error.
+
+**Editor Support:**
 
 - **PyCharm** - built in support for generating `epytext` docstrings
-- **Visual Studie Code** - [AI Docify](https://marketplace.visualstudio.com/items?itemName=AIC.docify) extension offers support for `epytext`
+- **Visual Studio Code** - [AI Docify](https://marketplace.visualstudio.com/items?itemName=AIC.docify) extension offers support for `epytext`
 - **NeoVim** - [vim-python-docstring](https://github.com/pixelneo/vim-python-docstring) supports `epytext` style
 
+## Type Checking
+
+The codebase is type-checked using [pyright](https://github.com/microsoft/pyright) `v1.1.380`. To run type checking, use the following command in the root project directory:
+
+```bash
+pyright --warnings --level warning --pythonversion 3.10 luxonis_train
+```
+
+**Editor Support:**
+
+- **PyCharm** - [Pyright](https://plugins.jetbrains.com/plugin/24145-pyright) extension
+- **Visual Studio Code** - [Pyright](https://marketplace.visualstudio.com/items?itemName=ms-pyright.pyright) extension
+- **NeoVim** - [LSP-Config](https://github.com/neovim/nvim-lspconfig) plugin with the [pyright configuration](https://github.com/neovim/nvim-lspconfig/blob/master/doc/server_configurations.md#pyright)
+
 ## Tests
 
 We use [pytest](https://docs.pytest.org/en/stable/) for testing.
-The tests are located in the `tests` directory. You can run the tests locally with:
+The tests are located in the `tests` directory. To run the tests with coverage, use the following command:
 
 ```bash
-pytest tests --cov=luxonis_train
+pytest --cov=luxonis_train --cov-report=html
 ```
 
-This command will run all tests and print a coverage report. The coverage report
-is only informational for now, but we may enforce a minimum coverage in the future.
+This command will run all tests and generate HTML coverage report.
+
+> \[!TIP\]
+> The coverage report will be saved to `htmlcov` directory.
+> If you want to inspect the coverage in more detail, open `htmlcov/index.html` in a browser.
+
+> \[!TIP\]
+> You can choose to run only the unit-tests or only the integration tests by adding `-m unit` or `-m integration` to the `pytest` command.
 
-**If a new feature is added, a new test should be added to cover it.**
+> \[!IMPORTANT\]
+> If a new feature is added, a new test should be added to cover it.
+> The minimum overall test coverage for a PR to be merged is 90%.
+> The minimum coverage for new files is 80%.
 
 ## GitHub Actions
 
 Our GitHub Actions workflow is run when a new PR is opened.
-It first checks that the pre-commit hook passes and that the documentation builds successfully.
-The tests are run only if the pre-commit hook and documentation build pass.
-Successful tests are required for merging a PR.
 
-1. Checks and tests are run automatically when you open a pull request.
-1. For the tests to run, the [pre-commit](#pre-commit-hooks) hook must pass and
-   the [documentation](#documentation) must be built successfully.
-1. Review the GitHub Actions output if your PR fails.
-1. Fix any issues to ensure that all checks and tests pass.
+1. First, the [pre-commit](#pre-commit-hooks) hooks must pass and the [documentation](#documentation) must be built successfully.
+1. Next, the [type checking](#type-checking) is run.
+1. If all previous checks pass, the [tests](#tests) are run.
+
+> \[!TIP\]
+> Review the GitHub Actions output if your PR fails.
+
+> \[!IMPORTANT\]
+> Successful completion of all the workflow checks is required for merging a PR.
 
-## Making and Reviewing Changes
+## Making and Submitting Changes
 
 1. Make changes in a new branch.
 1. Test your changes locally.
-1. Commit (pre-commit hook will run).
-1. Push to your branch and create a pull request. Always request a review from:
-   - [Martin Kozlovský](https://github.com/kozlov721)
-   - [Matija Teršek](https://github.com/tersekmatija)
-   - [Conor Simmons](https://github.com/conorsim)
-1. Any other relevant team members can be added as reviewers as well.
+1. Commit your changes (pre-commit hooks will run).
+1. Push your branch and create a pull request.
 1. The team will review and merge your PR.
diff --git a/README.md b/README.md
index a612b59e..8b645c06 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 ![PyBadge](https://github.com/luxonis/luxonis-train/blob/main/media/pybadge.svg)
 [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
-![UnitTests](https://github.com/luxonis/luxonis-train/actions/workflows/tests.yaml/badge.svg)
+![CI](https://github.com/luxonis/luxonis-train/actions/workflows/ci.yaml/badge.svg)
 ![Docs](https://github.com/luxonis/luxonis-train/actions/workflows/docs.yaml/badge.svg)
 [![Coverage](media/coverage_badge.svg)](https://github.com/luxonis/luxonis-train/actions)
 
@@ -50,6 +50,12 @@ For instructions on how to create a dataset in the LDF, follow the
 [examples](https://github.com/luxonis/luxonis-ml/tree/main/examples) in
 the [luxonis-ml](https://github.com/luxonis/luxonis-ml) repository.
 
+To inspect dataset images by split (train, val, test), use the command:
+
+```bash
+luxonis_train data inspect --config <config.yaml> --view <train/val/test>
+```
+
 ## Training
 
 Once you've created your `config.yaml` file you can train the model using this command:
@@ -66,6 +72,14 @@ luxonis_train train --config config.yaml trainer.batch_size 8 trainer.epochs 10
 
 where key and value are space separated and sub-keys are dot (`.`) separated. If the configuration field is a list, then key/sub-key should be a number (e.g. `trainer.preprocessing.augmentations.0.name RotateCustom`).
 
+## Evaluating
+
+To evaluate the model on a specific dataset split (train, test, or val), use the following command:
+
+```bash
+luxonis_train eval --config <config.yaml> --view <train/test/val>
+```
+
 ## Tuning
 
 To improve training performance you can use `Tuner` for hyperparameter optimization.
diff --git a/configs/README.md b/configs/README.md
index 27e2fb6e..96444f66 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -29,16 +29,15 @@ You can create your own config or use/edit one of the examples.
 
 ## Top-level Options
 
-| Key           | Type                  | Default value | Description                                   |
-| ------------- | --------------------- | ------------- | --------------------------------------------- |
-| use_rich_text | bool                  | True          | whether to use rich text for console printing |
-| model         | [Model](#model)       |               | model section                                 |
-| dataset       | [dataset](#dataset)   |               | dataset section                               |
-| train         | [train](#train)       |               | train section                                 |
-| tracker       | [tracker](#tracker)   |               | tracker section                               |
-| trainer       | [trainer](#trainer)   |               | trainer section                               |
-| exporter      | [exporter](#exporter) |               | exporter section                              |
-| tuner         | [tuner](#tuner)       |               | tuner section                                 |
+| Key      | Type                  | Default value | Description      |
+| -------- | --------------------- | ------------- | ---------------- |
+| model    | [Model](#model)       |               | model section    |
+| dataset  | [dataset](#dataset)   |               | dataset section  |
+| train    | [train](#train)       |               | train section    |
+| tracker  | [tracker](#tracker)   |               | tracker section  |
+| trainer  | [trainer](#trainer)   |               | trainer section  |
+| exporter | [exporter](#exporter) |               | exporter section |
+| tuner    | [tuner](#tuner)       |               | tuner section    |
 
 ## Model
 
@@ -126,52 +125,53 @@ You can configure it like this:
 
 To store and load the data we use LuxonisDataset and LuxonisLoader. For specific config parameters refer to [LuxonisML](https://github.com/luxonis/luxonis-ml).
 
-| Key            | Type                                     | Default value       | Description                                    |
-| -------------- | ---------------------------------------- | ------------------- | ---------------------------------------------- |
-| name           | str \| None                              | None                | name of the dataset                            |
-| id             | str \| None                              | None                | id of the dataset                              |
-| team_id        | str \| None                              | None                | team under which you can find all datasets     |
-| bucket_type    | Literal\["intenal", "external"\]         | internal            | type of underlying storage                     |
-| bucket_storage | Literal\["local", "s3", "gcc", "azure"\] | BucketStorage.LOCAL | underlying object storage for a bucket         |
-| train_view     | str                                      | train               | view to use for training                       |
-| val_view       | str                                      | val                 | view to use for validation                     |
-| test_view      | str                                      | test                | view to use for testing                        |
-| json_mode      | bool                                     | False               | load using JSON annotations instead of MongoDB |
+| Key            | Type                                     | Default value       | Description                                |
+| -------------- | ---------------------------------------- | ------------------- | ------------------------------------------ |
+| name           | str \| None                              | None                | name of the dataset                        |
+| id             | str \| None                              | None                | id of the dataset                          |
+| team_id        | str \| None                              | None                | team under which you can find all datasets |
+| bucket_type    | Literal\["intenal", "external"\]         | internal            | type of underlying storage                 |
+| bucket_storage | Literal\["local", "s3", "gcc", "azure"\] | BucketStorage.LOCAL | underlying object storage for a bucket     |
+| train_view     | str \| list\[str\]                       | train               | splits to use for training                 |
+| val_view       | str \| list\[str\]                       | val                 | splits to use for validation               |
+| test_view      | str \| list\[str\]                       | test                | splits to use for testing                  |
 
 ## Trainer
 
 Here you can change everything related to actual training of the model.
 
-| Key                     | Type                                    | Default value | Description                                                                                                                                      |
-| ----------------------- | --------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
-| batch_size              | int                                     | 32            | batch size used for training                                                                                                                     |
-| accumulate_grad_batches | int                                     | 1             | number of batches for gradient accumulation                                                                                                      |
-| use_weighted_sampler    | bool                                    | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
-| epochs                  | int                                     | 100           | number of training epochs                                                                                                                        |
-| num_workers             | int                                     | 2             | number of workers for data loading                                                                                                               |
-| train_metrics_interval  | int                                     | -1            | frequency of computing metrics on train data, -1 if don't perform                                                                                |
-| validation_interval     | int                                     | 1             | frequency of computing metrics on validation data                                                                                                |
-| num_log_images          | int                                     | 4             | maximum number of images to visualize and log                                                                                                    |
-| skip_last_batch         | bool                                    | True          | whether to skip last batch while training                                                                                                        |
-| accelerator             | Literal\["auto", "cpu", "gpu"\]         | "auto"        | What accelerator to use for training.                                                                                                            |
-| devices                 | int \| list\[int\] \| str               | "auto"        | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
-| strategy                | Literal\["auto", "ddp"\]                | "auto"        | What strategy to use for training.                                                                                                               |
-| num_sanity_val_steps    | int                                     | 2             | Number of sanity validation steps performed before training.                                                                                     |
-| profiler                | Literal\["simple", "advanced"\] \| None | None          | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
-| verbose                 | bool                                    | True          | Print all intermediate results to console.                                                                                                       |
+| Key                     | Type                                           | Default value | Description                                                                                                                                      |
+| ----------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| seed                    | int                                            | None          | seed for reproducibility                                                                                                                         |
+| batch_size              | int                                            | 32            | batch size used for training                                                                                                                     |
+| accumulate_grad_batches | int                                            | 1             | number of batches for gradient accumulation                                                                                                      |
+| use_weighted_sampler    | bool                                           | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
+| epochs                  | int                                            | 100           | number of training epochs                                                                                                                        |
+| n_workers               | int                                            | 2             | number of workers for data loading                                                                                                               |
+| train_metrics_interval  | int                                            | -1            | frequency of computing metrics on train data, -1 if don't perform                                                                                |
+| validation_interval     | int                                            | 1             | frequency of computing metrics on validation data                                                                                                |
+| n_log_images            | int                                            | 4             | maximum number of images to visualize and log                                                                                                    |
+| skip_last_batch         | bool                                           | True          | whether to skip last batch while training                                                                                                        |
+| accelerator             | Literal\["auto", "cpu", "gpu"\]                | "auto"        | What accelerator to use for training.                                                                                                            |
+| devices                 | int \| list\[int\] \| str                      | "auto"        | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
+| matmul_precision        | Literal\["medium", "high", "highest"\] \| None | None          | Sets the internal precision of float32 matrix multiplications.                                                                                   |
+| strategy                | Literal\["auto", "ddp"\]                       | "auto"        | What strategy to use for training.                                                                                                               |
+| n_sanity_val_steps      | int                                            | 2             | Number of sanity validation steps performed before training.                                                                                     |
+| profiler                | Literal\["simple", "advanced"\] \| None        | None          | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
+| verbose                 | bool                                           | True          | Print all intermediate results to console.                                                                                                       |
 
 ### Preprocessing
 
 We use [Albumentations](https://albumentations.ai/docs/) library for `augmentations`. [Here](https://albumentations.ai/docs/api_reference/full_reference/#pixel-level-transforms) you can see a list of all pixel level augmentations supported, and [here](https://albumentations.ai/docs/api_reference/full_reference/#spatial-level-transforms) you see all spatial level transformations. In config you can specify any augmentation from this lists and their params. Additionaly we support `Mosaic4` batch augmentation and letterbox resizing if `keep_aspect_ratio: True`.
 
-| Key               | Type                                                                                 | Default value | Description                                                                                                                                                             |
-| ----------------- | ------------------------------------------------------------------------------------ | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| train_image_size  | list\[int\]                                                                          | \[256, 256\]  | image size used for training \[height, width\]                                                                                                                          |
-| keep_aspect_ratio | bool                                                                                 | True          | bool if keep aspect ration while resizing                                                                                                                               |
-| train_rgb         | bool                                                                                 | True          | bool if train on rgb or bgr                                                                                                                                             |
-| normalize.active  | bool                                                                                 | True          | bool if use normalization                                                                                                                                               |
-| normalize.params  | dict                                                                                 | {}            | params for normalization, see [documentation](https://albumentations.ai/docs/api_reference/augmentations/transforms/#albumentations.augmentations.transforms.Normalize) |
-| augmentations     | list\[{"name": Name of the augmentation, "params": Parameters of the augmentation}\] | \[\]          | list of Albumentations augmentations                                                                                                                                    |
+| Key               | Type                                                                                                                                          | Default value | Description                                                                                                                                                             |
+| ----------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| train_image_size  | list\[int\]                                                                                                                                   | \[256, 256\]  | image size used for training \[height, width\]                                                                                                                          |
+| keep_aspect_ratio | bool                                                                                                                                          | True          | bool if keep aspect ration while resizing                                                                                                                               |
+| train_rgb         | bool                                                                                                                                          | True          | bool if train on rgb or bgr                                                                                                                                             |
+| normalize.active  | bool                                                                                                                                          | True          | bool if use normalization                                                                                                                                               |
+| normalize.params  | dict                                                                                                                                          | {}            | params for normalization, see [documentation](https://albumentations.ai/docs/api_reference/augmentations/transforms/#albumentations.augmentations.transforms.Normalize) |
+| augmentations     | list\[{"name": Name of the augmentation, "active": Bool if aug is active, by default set to True, "params": Parameters of the augmentation}\] | \[\]          | list of Albumentations augmentations                                                                                                                                    |
 
 ### Optimizer
 
@@ -212,9 +212,9 @@ Here you can define configuration for exporting.
 | ---------------------- | --------------------------------- | --------------- | ----------------------------------------------------------------------------------------------- |
 | export_save_directory  | str                               | "output_export" | Where to save the exported files.                                                               |
 | input_shape            | list\[int\] \| None               | None            | Input shape of the model. If not provided, inferred from the dataset.                           |
-| export_model_name      | str                               | "model"         | Name of the exported model.                                                                     |
-| data_type              | Literal\["INT8", "FP16", "FP32"\] | "FP16"          | Data type of the exported model.                                                                |
+| data_type              | Literal\["INT8", "FP16", "FP32"\] | "FP16"          | Data type of the exported model. Only used for conversion to BLOB.                              |
 | reverse_input_channels | bool                              | True            | Whether to reverse the image channels in the exported model. Relevant for `.blob` export        |
+| upload                 | bool                              | True            | Whether to upload the files created during export to the current tracker.                       |
 | scale_values           | list\[float\] \| None             | None            | What scale values to use for input normalization. If not provided, inferred from augmentations. |
 | mean_values            | list\[float\] \| None             | None            | What mean values to use for input normalizations. If not provided, inferred from augmentations. |
 | upload_directory       | str \| None                       | None            | Where to upload the exported models.                                                            |
@@ -230,22 +230,25 @@ Option specific for ONNX export.
 
 ### Blob
 
-| Key    | Type | Default value | Description                          |
-| ------ | ---- | ------------- | ------------------------------------ |
-| active | bool | False         | Whether to export to `.blob` format. |
-| shaves | int  | 6             | How many shaves.                     |
+| Key     | Type                                                             | Default value | Description                             |
+| ------- | ---------------------------------------------------------------- | ------------- | --------------------------------------- |
+| active  | bool                                                             | False         | Whether to export to `.blob` format.    |
+| shaves  | int                                                              | 6             | How many shaves.                        |
+| version | Literal\["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"\] | "2022.1"      | OpenVINO version to use for conversion. |
 
 ## Tuner
 
 Here you can specify options for tuning.
 
-| Key        | Type              | Default value | Description                                                                                                                                                                                                                                                                                                        |
-| ---------- | ----------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| study_name | str               | "test-study"  | Name of the study.                                                                                                                                                                                                                                                                                                 |
-| use_pruner | bool              | True          | Whether to use the MedianPruner.                                                                                                                                                                                                                                                                                   |
-| n_trials   | int \| None       | 15            | Number of trials for each process. `None` represents no limit in terms of numbner of trials.                                                                                                                                                                                                                       |
-| timeout    | int \| None       | None          | Stop study after the given number of seconds.                                                                                                                                                                                                                                                                      |
-| params     | dict\[str, list\] | {}            | Which parameters to tune. The keys should be in the format `key1.key2.key3_<type>`. Type can be one of `[categorical, float, int, longuniform, uniform]`. For more information about the types, visit [Optuna documentation](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html). |
+| Key        | Type              | Default value | Description                                                                                                                                                                                                                                                                                                                |
+| ---------- | ----------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| study_name | str               | "test-study"  | Name of the study.                                                                                                                                                                                                                                                                                                         |
+| use_pruner | bool              | True          | Whether to use the MedianPruner.                                                                                                                                                                                                                                                                                           |
+| n_trials   | int \| None       | 15            | Number of trials for each process. `None` represents no limit in terms of numbner of trials.                                                                                                                                                                                                                               |
+| timeout    | int \| None       | None          | Stop study after the given number of seconds.                                                                                                                                                                                                                                                                              |
+| params     | dict\[str, list\] | {}            | Which parameters to tune. The keys should be in the format `key1.key2.key3_<type>`. Type can be one of `[categorical, float, int, longuniform, uniform, subset]`. For more information about the types, visit [Optuna documentation](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html). |
+
+**Note**: "subset" sampling is currently only supported for augmentations. You can specify a set of augmentations defined in `trainer` to choose from and every run subset of random N augmentations will be active (`is_active` parameter will be True for chosen ones and False for the rest in the set).
 
 Example of params for tuner block:
 
@@ -255,6 +258,7 @@ tuner:
     trainer.optimizer.name_categorical: ["Adam", "SGD"]
     trainer.optimizer.params.lr_float: [0.0001, 0.001]
     trainer.batch_size_int: [4, 16, 4]
+    trainer.preprocessing.augmentations_subset: [["Defocus", "Sharpen", "Flip"], 2]
 ```
 
 ### Storage
diff --git a/configs/classification_model.yaml b/configs/classification_model.yaml
old mode 100755
new mode 100644
index 62c1014e..4db7a9b1
--- a/configs/classification_model.yaml
+++ b/configs/classification_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: cifar10_classification
   predefined_model:
@@ -15,8 +12,9 @@ model:
         thickness: 2
         include_plot: True
 
-dataset:
-  name: cifar10_test
+loader:
+  params:
+    dataset_name: cifar10_test
 
 trainer:
   preprocessing:
@@ -27,9 +25,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
old mode 100755
new mode 100644
index 491152ce..23516bea
--- a/configs/coco_model.yaml
+++ b/configs/coco_model.yaml
@@ -7,7 +7,7 @@ model:
     - name: EfficientRep
       params:
         channels_list: [64, 128, 256, 512, 1024]
-        num_repeats: [1, 6, 12, 18, 6]
+        n_repeats: [1, 6, 12, 18, 6]
         depth_mul: 0.33
         width_mul: 0.33
 
@@ -16,7 +16,7 @@ model:
         - EfficientRep
       params:
         channels_list: [256, 128, 128, 256, 256, 512]
-        num_repeats: [12, 12, 12, 12]
+        n_repeats: [12, 12, 12, 12]
         depth_mul: 0.33
         width_mul: 0.33
 
@@ -46,7 +46,7 @@ model:
     - name: ImplicitKeypointBBoxLoss
       attached_to: ImplicitKeypointBBoxHead
       params:
-        keypoint_distance_loss_weight: 0.5
+        keypoint_regression_loss_weight: 0.5
         keypoint_visibility_loss_weight: 0.7
         bbox_loss_weight: 0.05
         objectness_loss_weight: 0.2
@@ -95,29 +95,30 @@ tracker:
   wandb_entity: luxonis
   is_mlflow: False
 
-dataset:
-  name: coco_test
+loader:
   train_view: train
   val_view: val
   test_view: test
 
+  params:
+    dataset_name: coco_test
+
 trainer:
   accelerator: auto
   devices: auto
   strategy: auto
 
-  num_sanity_val_steps: 1
+  n_sanity_val_steps: 1
   profiler: null
   verbose: True
   batch_size: 4
   accumulate_grad_batches: 1
   epochs: &epochs 200
-  num_workers: 8
+  n_workers: 8
   train_metrics_interval: -1
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
   skip_last_batch: True
-  main_head_index: 0
   log_sub_losses: True
   save_top_k: 3
 
@@ -154,7 +155,6 @@ trainer:
         monitor: val/loss
         mode: min
         verbose: true
-    - name: DeviceStatsMonitor
     - name: ExportOnTrainEnd
     - name: TestOnTrainEnd
 
diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml
new file mode 100644
index 00000000..2bd3b7e8
--- /dev/null
+++ b/configs/ddrnet_segmentation_model.yaml
@@ -0,0 +1,45 @@
+# DDRNet-23-slim model for segmentation
+# Refer to here for optimal hyperparameters for this model: https://github.com/Deci-AI/super-gradients/blob/4797c974c7c445d12e2575c468848d9c3e04becd/src/super_gradients/recipes/cityscapes_ddrnet.yaml#L4
+
+model:
+  name: ddrnet_segmentation
+  predefined_model:
+    name: DDRNetSegmentationModel
+    params:
+      task: binary
+      backbone_params:
+        use_aux_heads: True # set to False to disable auxiliary heads (for export)
+        variant: '23-slim'
+
+loader:
+  params:
+    dataset_name: coco_test
+
+trainer:
+  preprocessing:
+    train_image_size: [&height 256, &width 320]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  batch_size: 4
+  epochs: &epochs 500
+  num_workers: 4
+  validation_interval: 10
+  num_log_images: 8
+
+  callbacks:
+    - name: TestOnTrainEnd
+    - name: ExportOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.01
+      momentum: 0.9
+      weight_decay: 0.0005
+
+  scheduler:
+    name: CosineAnnealingLR
+    params:
+      T_max: *epochs
diff --git a/configs/detection_model.yaml b/configs/detection_model.yaml
old mode 100755
new mode 100644
index 8d7f9c25..7bc87eef
--- a/configs/detection_model.yaml
+++ b/configs/detection_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined detection model
 
-
-use_rich_text: True
-
 model:
   name: coco_detection
   predefined_model:
@@ -10,8 +7,9 @@ model:
     params:
       use_neck: True
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
@@ -22,9 +20,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/configs/efficient_coco_model.yaml b/configs/efficient_coco_model.yaml
new file mode 100644
index 00000000..f2c9db5d
--- /dev/null
+++ b/configs/efficient_coco_model.yaml
@@ -0,0 +1,114 @@
+
+model:
+  name: coco_test
+  nodes:
+    - name: EfficientRep
+      params:
+        channels_list: [64, 128, 256, 512, 1024]
+        n_repeats: [1, 6, 12, 18, 6]
+        depth_mul: 0.33
+        width_mul: 0.33
+
+    - name: RepPANNeck
+      inputs:
+        - EfficientRep
+      params:
+        channels_list: [256, 128, 128, 256, 256, 512]
+        n_repeats: [12, 12, 12, 12]
+        depth_mul: 0.33
+        width_mul: 0.33
+
+    - name: EfficientKeypointBBoxHead
+      inputs:
+        - RepPANNeck
+      params:
+        conf_thres: 0.25
+        iou_thres: 0.45
+
+    - name: SegmentationHead
+      inputs:
+        - RepPANNeck
+
+    - name: EfficientBBoxHead
+      inputs:
+        - RepPANNeck
+      params:
+        conf_thres: 0.75
+        iou_thres: 0.45
+
+  losses:
+    - name: AdaptiveDetectionLoss
+      attached_to: EfficientBBoxHead
+    - name: BCEWithLogitsLoss
+      attached_to: SegmentationHead
+    - name: EfficientKeypointBBoxLoss
+      attached_to: EfficientKeypointBBoxHead
+
+  metrics:
+    - name: ObjectKeypointSimilarity
+      is_main_metric: true
+      attached_to: EfficientKeypointBBoxHead
+    - name: MeanAveragePrecisionKeypoints
+      attached_to: EfficientKeypointBBoxHead
+    - name: MeanAveragePrecision
+      attached_to: EfficientBBoxHead
+    - name: F1Score
+      attached_to: SegmentationHead
+      params:
+        task: binary
+    - name: JaccardIndex
+      attached_to: SegmentationHead
+      params:
+        task: binary
+
+  visualizers:
+    - name: MultiVisualizer
+      attached_to: EfficientKeypointBBoxHead
+      params:
+        visualizers:
+          - name: KeypointVisualizer
+            params:
+              nonvisible_color: blue
+          - name: BBoxVisualizer
+            params:
+              colors:
+                person: "#FF5055"
+    - name: SegmentationVisualizer
+      attached_to: SegmentationHead
+      params:
+        colors: "#FF5055"
+    - name: BBoxVisualizer
+      attached_to: EfficientBBoxHead
+
+tracker:
+  project_name: coco_test
+  save_directory: output
+  is_tensorboard: True
+
+loader:
+  params:
+    dataset_name: coco_test
+
+trainer:
+
+  n_sanity_val_steps: 1
+  batch_size: 4
+  accumulate_grad_batches: 1
+  epochs: 200
+  n_workers: 4
+  train_metrics_interval: -1
+  validation_interval: 10
+  n_log_images: 8
+  save_top_k: 3
+
+  preprocessing:
+    train_image_size: [&height 256, &width 320]
+    keep_aspect_ratio: False
+    train_rgb: True
+    normalize:
+      active: True
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
diff --git a/configs/example_export.yaml b/configs/example_export.yaml
old mode 100755
new mode 100644
index a999a2bd..51f768dc
--- a/configs/example_export.yaml
+++ b/configs/example_export.yaml
@@ -1,8 +1,5 @@
 # Example configuration for exporting a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: coco_segmentation
   weights: null  # specify a path to the weights here
@@ -12,8 +9,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
@@ -24,9 +22,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   optimizer:
     name: SGD
diff --git a/configs/example_multi_input.yaml b/configs/example_multi_input.yaml
new file mode 100644
index 00000000..9632ed43
--- /dev/null
+++ b/configs/example_multi_input.yaml
@@ -0,0 +1,114 @@
+loader:
+
+  # Yields "left", "right", "disparity", and "pointcloud" inputs.
+  # See implementation in `tests/integration/test_multi_input.py`.
+  name: CustomMultiInputLoader
+
+  # Name of the key in the batch that contains image-like data.
+  # Needs to be set for visualizers and evaluators to work.
+  image_source: left
+
+model:
+  name: example_multi_input
+  nodes:
+    - name: FullBackbone
+      alias: full_backbone
+
+    - name: RGBDBackbone
+      alias: rgbd_backbone
+      input_sources:
+        - left
+        - right
+        - disparity
+
+    - name: PointcloudBackbone
+      alias: pointcloud_backbone
+      input_sources:
+        - pointcloud
+
+    - name: FusionNeck
+      alias: fusion_neck
+      inputs:
+        - rgbd_backbone
+        - pointcloud_backbone
+      input_sources:
+        - disparity
+
+    - name: FusionNeck2
+      alias: fusion_neck_2
+      inputs:
+        - rgbd_backbone
+        - pointcloud_backbone
+        - full_backbone
+
+    - name: CustomSegHead1
+      alias: head_1
+      inputs:
+        - fusion_neck
+
+    - name: CustomSegHead2
+      alias: head_2
+      inputs:
+        - fusion_neck
+        - fusion_neck_2
+      input_sources:
+        - disparity
+
+  losses:
+    - name: BCEWithLogitsLoss
+      alias: loss_1
+      attached_to: head_1
+
+    - name: CrossEntropyLoss
+      alias: loss_2
+      attached_to: head_2
+
+  metrics:
+    - name: JaccardIndex
+      alias: jaccard_index_1
+      attached_to: head_1
+      is_main_metric: True
+      params:
+        task: binary
+
+    - name: JaccardIndex
+      alias: jaccard_index_2
+      attached_to: head_2
+      params:
+        task: binary
+
+  visualizers:
+    - name: SegmentationVisualizer
+      alias: seg_vis_1
+      attached_to: head_1
+      params:
+        colors: "#FF5055"
+
+    - name: SegmentationVisualizer
+      alias: seg_vis_2
+      attached_to: head_2
+      params:
+        colors: "#55AAFF"
+
+tracker:
+  project_name: multi_input_example
+  is_tensorboard: True
+
+trainer:
+  batch_size: 1
+  epochs: 10
+  n_workers: 4
+  validation_interval: 10
+  n_log_images: 4
+
+  callbacks:
+    - name: ExportOnTrainEnd
+
+  optimizer:
+    name: Adam
+    params:
+      lr: 0.01
+
+exporter:
+  onnx:
+    opset_version: 11
diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml
old mode 100755
new mode 100644
index 980036ae..d8c9027d
--- a/configs/example_tuning.yaml
+++ b/configs/example_tuning.yaml
@@ -1,8 +1,5 @@
 # Example configuration for tuning a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: coco_segmentation
   predefined_model:
@@ -11,8 +8,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
@@ -20,11 +18,19 @@ trainer:
     keep_aspect_ratio: False
     normalize:
       active: True
+    augmentations:
+      - name: Defocus
+        params:
+          p: 0.1
+      - name: Sharpen
+        params:
+          p: 0.1
+      - name: Flip
 
   batch_size: 4
-  epochs: &epochs 1
-  validation_interval: 1
-  num_log_images: 8
+  epochs: &epochs 100
+  validation_interval: 10
+  n_log_images: 8
 
   scheduler:
     name: CosineAnnealingLR
@@ -37,3 +43,4 @@ tuner:
     trainer.optimizer.name_categorical: ["Adam", "SGD"]
     trainer.optimizer.params.lr_float: [0.0001, 0.001]
     trainer.batch_size_int: [4, 16, 4]
+    trainer.preprocessing.augmentations_subset: [["Defocus", "Sharpen", "Flip"], 2]
diff --git a/configs/keypoint_bbox_model.yaml b/configs/keypoint_bbox_model.yaml
old mode 100755
new mode 100644
index dc4fe3d7..51554f73
--- a/configs/keypoint_bbox_model.yaml
+++ b/configs/keypoint_bbox_model.yaml
@@ -1,15 +1,13 @@
 # Example configuration for training a predefined keypoint-detection model
 
-
-use_rich_text: True
-
 model:
   name: coco_keypoints
   predefined_model:
     name: KeypointDetectionModel
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
@@ -20,9 +18,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/configs/obb_detection_model.yaml b/configs/obb_detection_model.yaml
new file mode 100644
index 00000000..316bcfd1
--- /dev/null
+++ b/configs/obb_detection_model.yaml
@@ -0,0 +1,48 @@
+# Example configuration for training a predefined obb (oriented bounding box) detection model
+
+model:
+  name: obb_detection
+  predefined_model:
+    name: OBBDetectionModel
+    params:
+      use_neck: True
+
+loader:
+  # name: OBBLoaderTorch
+  train_view: train
+  val_view: val
+  # test_view: train
+
+  params:
+    dataset_name: obb_test
+    # dataset_dir: "../dota8"
+    dataset_dir: "../DOTA"
+    dataset_type: YOLOV6OBB
+
+trainer:
+  # preprocessing:
+    # train_image_size: [&height 512, &width 512]
+    # normalize:
+    #   active: True
+  #   keep_aspect_ratio: False
+  #   normalize:
+  #     active: True
+
+  batch_size: 8
+  epochs: &epochs 10
+  num_workers: 4
+  validation_interval: 10
+  num_log_images: 8
+
+  callbacks:
+    # - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: Adam
+    params:
+      lr: 0.001
+
+  # scheduler:
+  #   name: ConstantLR
+
diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml
new file mode 100644
index 00000000..bb9f8f62
--- /dev/null
+++ b/configs/resnet_model.yaml
@@ -0,0 +1,59 @@
+
+model:
+  name: resnet50_classification
+  nodes:
+    - name: ResNet
+      params:
+        variant: "50"
+        download_weights: True
+
+    - name: ClassificationHead
+      inputs:
+        - ResNet
+
+  losses:
+    - name: CrossEntropyLoss
+      attached_to: ClassificationHead
+
+  metrics:
+    - name: Accuracy
+      is_main_metric: true
+      attached_to: ClassificationHead
+
+  visualizers:
+    - name: ClassificationVisualizer
+      attached_to: ClassificationHead
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+loader:
+  params:
+    dataset_name: cifar10_test
+
+trainer:
+  batch_size: 4
+  epochs: &epochs 200
+  n_workers: 4
+  validation_interval: 10
+  n_log_images: 8
+
+  preprocessing:
+    train_image_size: [&height 224, &width 224]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.02
+
+  scheduler:
+    name: ConstantLR
diff --git a/configs/segmentation_model.yaml b/configs/segmentation_model.yaml
old mode 100755
new mode 100644
index c26fb0cc..b403a75e
--- a/configs/segmentation_model.yaml
+++ b/configs/segmentation_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: coco_segmentation
   predefined_model:
@@ -11,8 +8,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
@@ -23,9 +21,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/luxonis_train/__init__.py b/luxonis_train/__init__.py
index 59ec7367..ebc4a719 100644
--- a/luxonis_train/__init__.py
+++ b/luxonis_train/__init__.py
@@ -1,5 +1,11 @@
+__version__ = "0.0.1"
+
+
 from .attached_modules import *
+from .core import *
+from .loaders import *
 from .models import *
+from .nodes import *
+from .optimizers import *
+from .schedulers import *
 from .utils import *
-
-__version__ = "0.0.1"
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 73843593..c3164227 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -1,30 +1,35 @@
-import os
+import tempfile
 from enum import Enum
 from importlib.metadata import version
 from pathlib import Path
 from typing import Annotated, Optional
 
-import cv2
-import torch
 import typer
+import yaml
+from luxonis_ml.utils import setup_logging
 
-app = typer.Typer(help="Luxonis Train CLI", add_completion=False)
+setup_logging(use_rich=True)
 
 
-class View(str, Enum):
-    train = "train"
-    val = "val"
-    test = "test"
+class _ViewType(str, Enum):
+    TRAIN = "train"
+    VAL = "val"
+    TEST = "test"
 
-    def __str__(self):
-        return self.value
+
+app = typer.Typer(
+    help="Luxonis Train CLI",
+    add_completion=False,
+    pretty_exceptions_show_locals=False,
+)
 
 
 ConfigType = Annotated[
-    Optional[Path],
+    Optional[str],
     typer.Option(
         help="Path to the configuration file.",
         show_default=False,
+        metavar="FILE",
     ),
 ]
 
@@ -36,7 +41,9 @@ def __str__(self):
     ),
 ]
 
-ViewType = Annotated[View, typer.Option(help="Which dataset view to use.")]
+ViewType = Annotated[
+    _ViewType, typer.Option(help="Which dataset view to use.")
+]
 
 SaveDirType = Annotated[
     Optional[Path],
@@ -45,164 +52,140 @@ def __str__(self):
 
 
 @app.command()
-def train(config: ConfigType = None, opts: OptsType = None):
+def train(
+    config: ConfigType = None,
+    resume: Annotated[
+        Optional[str],
+        typer.Option(help="Resume training from this checkpoint."),
+    ] = None,
+    opts: OptsType = None,
+):
     """Start training."""
-    from luxonis_train.core import Trainer
+    from luxonis_train.core import LuxonisModel
 
-    Trainer(str(config), opts).train()
+    LuxonisModel(config, opts).train(resume_weights=resume)
 
 
 @app.command()
-def eval(config: ConfigType = None, view: ViewType = View.val, opts: OptsType = None):
+def test(
+    config: ConfigType = None,
+    view: ViewType = _ViewType.VAL,
+    opts: OptsType = None,
+):
     """Evaluate model."""
-    from luxonis_train.core import Trainer
+    from luxonis_train.core import LuxonisModel
 
-    Trainer(str(config), opts).test(view=view.name)
+    LuxonisModel(config, opts).test(view=view.value)
 
 
 @app.command()
 def tune(config: ConfigType = None, opts: OptsType = None):
     """Start hyperparameter tuning."""
-    from luxonis_train.core import Tuner
+    from luxonis_train.core import LuxonisModel
 
-    Tuner(str(config), opts).tune()
+    LuxonisModel(config, opts).tune()
 
 
 @app.command()
 def export(config: ConfigType = None, opts: OptsType = None):
     """Export model."""
-    from luxonis_train.core import Exporter
+    from luxonis_train.core import LuxonisModel
 
-    Exporter(str(config), opts).export()
+    LuxonisModel(config, opts).export()
 
 
 @app.command()
 def infer(
     config: ConfigType = None,
-    view: ViewType = View.val,
+    view: ViewType = _ViewType.VAL,
     save_dir: SaveDirType = None,
     opts: OptsType = None,
 ):
     """Run inference."""
-    from luxonis_train.core import Inferer
+    from luxonis_train.core import LuxonisModel
 
-    Inferer(str(config), opts, view=view.name, save_dir=save_dir).infer()
+    LuxonisModel(config, opts).infer(view=view.value, save_dir=save_dir)
 
 
 @app.command()
 def inspect(
     config: ConfigType = None,
-    view: ViewType = View.val,
-    save_dir: SaveDirType = None,
+    view: Annotated[
+        str,
+        typer.Option(
+            ...,
+            "--view",
+            "-v",
+            help="Which split of the dataset to inspect.",
+            case_sensitive=False,
+        ),
+    ] = "train",  # type: ignore
+    size_multiplier: Annotated[
+        float,
+        typer.Option(
+            ...,
+            "--size-multiplier",
+            "-s",
+            help=(
+                "Multiplier for the image size. "
+                "By default the images are shown in their original size."
+            ),
+            show_default=False,
+        ),
+    ] = 1.0,
     opts: OptsType = None,
 ):
     """Inspect dataset."""
-    from luxonis_ml.data import (
-        LuxonisDataset,
-        TrainAugmentations,
-        ValAugmentations,
-    )
-
-    from luxonis_train.attached_modules.visualizers.utils import (
-        draw_bounding_box_labels,
-        draw_keypoint_labels,
-        draw_segmentation_labels,
-        get_unnormalized_images,
-    )
-    from luxonis_train.utils.config import Config
-    from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
-    from luxonis_train.utils.types import LabelType
-
-    overrides = {}
-    if opts:
-        if len(opts) % 2 != 0:
-            raise ValueError("Override options should be a list of key-value pairs")
-
-        for i in range(0, len(opts), 2):
-            overrides[opts[i]] = opts[i + 1]
-
-    cfg = Config.get_config(str(config), overrides)
-
-    image_size = cfg.trainer.preprocessing.train_image_size
-
-    dataset = LuxonisDataset(
-        dataset_name=cfg.dataset.name,
-        team_id=cfg.dataset.team_id,
-        dataset_id=cfg.dataset.id,
-        bucket_type=cfg.dataset.bucket_type,
-        bucket_storage=cfg.dataset.bucket_storage,
-    )
-    augmentations = (
-        TrainAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
+    from lightning.pytorch import seed_everything
+    from luxonis_ml.data.__main__ import inspect as lxml_inspect
+
+    from luxonis_train.utils import Config
+
+    cfg = Config.get_config(config, opts)
+    if cfg.trainer.seed is not None:
+        seed_everything(cfg.trainer.seed, workers=True)
+
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as f:
+        yaml.dump(
+            [
+                a.model_dump()
+                for a in cfg.trainer.preprocessing.get_active_augmentations()
+                if a.name != "Normalize"
             ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
+            f,
         )
-        if view == "train"
-        else ValAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
+
+        if "dataset_name" not in cfg.loader.params:
+            raise ValueError("dataset_name is not set in the config")
+
+        lxml_inspect(
+            name=cfg.loader.params["dataset_name"],
+            view=[view],
+            aug_config=f.name,
+            size_multiplier=size_multiplier,
         )
-    )
-
-    loader_train = LuxonisLoaderTorch(
-        dataset,
-        view=view,
-        augmentations=augmentations,
-    )
-
-    pytorch_loader_train = torch.utils.data.DataLoader(
-        loader_train,
-        batch_size=4,
-        num_workers=1,
-        collate_fn=collate_fn,
-    )
-
-    if save_dir is not None:
-        os.makedirs(save_dir, exist_ok=True)
-
-    counter = 0
-    for data in pytorch_loader_train:
-        imgs, label_dict = data
-        images = get_unnormalized_images(cfg, imgs)
-        for i, img in enumerate(images):
-            for label_type, labels in label_dict.items():
-                if label_type == LabelType.CLASSIFICATION:
-                    continue
-                elif label_type == LabelType.BOUNDINGBOX:
-                    img = draw_bounding_box_labels(
-                        img, labels[labels[:, 0] == i][:, 2:], colors="yellow", width=1
-                    )
-                elif label_type == LabelType.KEYPOINT:
-                    img = draw_keypoint_labels(
-                        img, labels[labels[:, 0] == i][:, 1:], colors="red"
-                    )
-                elif label_type == LabelType.SEGMENTATION:
-                    img = draw_segmentation_labels(
-                        img, labels[i], alpha=0.8, colors="#5050FF"
-                    )
-
-            img_arr = img.permute(1, 2, 0).numpy()
-            img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)
-            if save_dir is not None:
-                counter += 1
-                cv2.imwrite(os.path.join(save_dir, f"{counter}.png"), img_arr)
-            else:
-                cv2.imshow("img", img_arr)
-                if cv2.waitKey() == ord("q"):
-                    exit()
+
+
+@app.command()
+def archive(
+    executable: Annotated[
+        str,
+        typer.Option(
+            help="Path to the model file.", show_default=False, metavar="FILE"
+        ),
+    ],
+    config: ConfigType = None,
+    opts: OptsType = None,
+):
+    """Generate NN archive."""
+    from luxonis_train.core import LuxonisModel
+
+    LuxonisModel(str(config), opts).archive(executable)
 
 
 def version_callback(value: bool):
     if value:
-        typer.echo(f"LuxonisTrain Version: {version(__package__)}")
+        typer.echo(f"LuxonisTrain Version: {version('luxonis_train')}")
         raise typer.Exit()
 
 
@@ -211,16 +194,23 @@ def common(
     _: Annotated[
         bool,
         typer.Option(
-            "--version", callback=version_callback, help="Show version and exit."
+            "--version",
+            callback=version_callback,
+            help="Show version and exit.",
         ),
     ] = False,
+    source: Annotated[
+        Optional[Path],
+        typer.Option(
+            help="Path to a python file with custom components. "
+            "Will be sourced before running the command.",
+            metavar="FILE",
+        ),
+    ] = None,
 ):
-    ...
-
-
-def main():
-    app()
+    if source:
+        exec(source.read_text(), globals(), globals())
 
 
 if __name__ == "__main__":
-    main()
+    app()
diff --git a/luxonis_train/assigners/__init__.py b/luxonis_train/assigners/__init__.py
new file mode 100644
index 00000000..fb7d5fdd
--- /dev/null
+++ b/luxonis_train/assigners/__init__.py
@@ -0,0 +1,4 @@
+from .atts_assigner import ATSSAssigner
+from .tal_assigner import RotatedTaskAlignedAssigner, TaskAlignedAssigner
+
+__all__ = ["ATSSAssigner", "TaskAlignedAssigner", "RotatedTaskAlignedAssigner"]
diff --git a/luxonis_train/utils/assigners/atts_assigner.py b/luxonis_train/assigners/atts_assigner.py
similarity index 81%
rename from luxonis_train/utils/assigners/atts_assigner.py
rename to luxonis_train/assigners/atts_assigner.py
index 26b4dc23..269496fa 100644
--- a/luxonis_train/utils/assigners/atts_assigner.py
+++ b/luxonis_train/assigners/atts_assigner.py
@@ -2,12 +2,7 @@
 import torch.nn.functional as F
 from torch import Tensor, nn
 
-from .utils import (
-    batch_iou,
-    bbox_iou,
-    candidates_in_gt,
-    fix_collisions,
-)
+from .utils import batch_iou, bbox_iou, candidates_in_gt, fix_collisions
 
 
 class ATSSAssigner(nn.Module):
@@ -38,7 +33,7 @@ def forward(
         gt_bboxes: Tensor,
         mask_gt: Tensor,
         pred_bboxes: Tensor,
-    ) -> tuple[Tensor, Tensor, Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
         """Assigner's forward method which generates final assignments.
 
         @type anchor_bboxes: Tensor
@@ -53,10 +48,11 @@ def forward(
         @param mask_gt: Mask for valid GTs [bs, n_max_boxes, 1]
         @type pred_bboxes: Tensor
         @param pred_bboxes: Predicted bboxes of shape [bs, n_anchors, 4]
-        @rtype: tuple[Tensor, Tensor, Tensor, Tensor]
-        @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
-            n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and
-            output positive mask of shape [bs, n_anchors].
+        @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
+        @return: Assigned labels of shape [bs, n_anchors], assigned
+            bboxes of shape [bs, n_anchors, 4], assigned scores of shape
+            [bs, n_anchors, n_classes] and output positive mask of shape
+            [bs, n_anchors].
         """
 
         self.n_anchors = anchor_bboxes.size(0)
@@ -66,9 +62,14 @@ def forward(
         if self.n_max_boxes == 0:
             device = gt_bboxes.device
             return (
-                torch.full([self.bs, self.n_anchors], self.n_classes).to(device),
+                torch.full([self.bs, self.n_anchors], self.n_classes).to(
+                    device
+                ),
                 torch.zeros([self.bs, self.n_anchors, 4]).to(device),
-                torch.zeros([self.bs, self.n_anchors, self.n_classes]).to(device),
+                torch.zeros([self.bs, self.n_anchors, self.n_classes]).to(
+                    device
+                ),
+                torch.zeros([self.bs, self.n_anchors]).to(device),
                 torch.zeros([self.bs, self.n_anchors]).to(device),
             )
 
@@ -82,7 +83,10 @@ def forward(
         gt_centers = self._get_bbox_center(gt_bboxes_flat)
         anchor_centers = self._get_bbox_center(anchor_bboxes)
         distances = (
-            (gt_centers[:, None, :] - anchor_centers[None, :, :]).pow(2).sum(-1).sqrt()
+            (gt_centers[:, None, :] - anchor_centers[None, :, :])
+            .pow(2)
+            .sum(-1)
+            .sqrt()
         )
         distances = distances.reshape([self.bs, -1, self.n_anchors])
 
@@ -107,15 +111,18 @@ def forward(
         )
 
         # Generate final assignments based on masks
-        assigned_labels, assigned_bboxes, assigned_scores = self._get_final_assignments(
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+        ) = self._get_final_assignments(
             gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
         )
 
         # Soft label with IoU
-        if pred_bboxes is not None:
-            ious = batch_iou(gt_bboxes, pred_bboxes) * mask_pos
-            ious = ious.max(dim=-2)[0].unsqueeze(-1)
-            assigned_scores *= ious
+        ious = batch_iou(gt_bboxes, pred_bboxes) * mask_pos
+        ious = ious.max(dim=-2)[0].unsqueeze(-1)
+        assigned_scores *= ious
 
         out_mask_positive = mask_pos_sum.bool()
 
@@ -124,6 +131,7 @@ def forward(
             assigned_bboxes,
             assigned_scores,
             out_mask_positive,
+            assigned_gt_idx,
         )
 
     def _get_bbox_center(self, bbox: Tensor) -> Tensor:
@@ -144,12 +152,13 @@ def _select_topk_candidates(
         @type mask_gt: Tensor
         @param mask_gt: Mask for valid GT per image.
         @rtype: tuple[Tensor, Tensor]
-        @return: Mask of selected anchors and indices of selected anchors.
+        @return: Mask of selected anchors and indices of selected
+            anchors.
         """
         mask_gt = mask_gt.repeat(1, 1, self.topk).bool()
         level_distances = torch.split(distances, n_level_bboxes, dim=-1)
-        is_in_topk_list = []
-        topk_idxs = []
+        is_in_topk_list: list[Tensor] = []
+        topk_idxs: list[Tensor] = []
         start_idx = 0
         for per_level_distances, per_level_boxes in zip(
             level_distances, n_level_bboxes
@@ -161,18 +170,20 @@ def _select_topk_candidates(
             )
             topk_idxs.append(per_level_topk_idxs + start_idx)
             per_level_topk_idxs = torch.where(
-                mask_gt, per_level_topk_idxs, torch.zeros_like(per_level_topk_idxs)
+                mask_gt,
+                per_level_topk_idxs,
+                torch.zeros_like(per_level_topk_idxs),
+            )
+            is_in_topk = F.one_hot(per_level_topk_idxs, per_level_boxes).sum(
+                dim=-2
             )
-            is_in_topk = F.one_hot(per_level_topk_idxs, per_level_boxes).sum(dim=-2)
             is_in_topk = torch.where(
                 is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk
             )
             is_in_topk_list.append(is_in_topk.to(distances.dtype))
             start_idx = end_idx
 
-        is_in_topk_list = torch.cat(is_in_topk_list, dim=-1)
-        topk_idxs = torch.cat(topk_idxs, dim=-1)
-        return is_in_topk_list, topk_idxs
+        return torch.cat(is_in_topk_list, dim=-1), torch.cat(topk_idxs, dim=-1)
 
     def _get_positive_samples(
         self,
@@ -180,14 +191,18 @@ def _get_positive_samples(
         topk_idxs: Tensor,
         overlaps: Tensor,
     ) -> Tensor:
-        """Computes threshold and returns mask for samples over threshold.
+        """Computes threshold and returns mask for samples over
+        threshold.
 
         @type is_in_topk: Tensor
-        @param is_in_topk: Mask of selected anchors [bx, n_max_boxes, n_anchors]
+        @param is_in_topk: Mask of selected anchors [bx, n_max_boxes,
+            n_anchors]
         @type topk_idxs: Tensor
-        @param topk_idxs: Indices of selected anchors [bx, n_max_boxes, topK * n_levels]
+        @param topk_idxs: Indices of selected anchors [bx, n_max_boxes,
+            topK * n_levels]
         @type overlaps: Tensor
-        @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, n_anchors]
+        @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes,
+            n_anchors]
         @rtype: Tensor
         @return: Mask of positive samples [bx, n_max_boxes, n_anchors]
         """
@@ -202,14 +217,17 @@ def _get_positive_samples(
         assist_idxs = assist_idxs[:, None]
         flatten_idxs = topk_idxs + assist_idxs
         candidate_overlaps = _candidate_overlaps.reshape(-1)[flatten_idxs]
-        candidate_overlaps = candidate_overlaps.reshape([self.bs, self.n_max_boxes, -1])
+        candidate_overlaps = candidate_overlaps.reshape(
+            [self.bs, self.n_max_boxes, -1]
+        )
 
         overlaps_mean_per_gt = candidate_overlaps.mean(dim=-1, keepdim=True)
         overlaps_std_per_gt = candidate_overlaps.std(dim=-1, keepdim=True)
         overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
 
         is_pos = torch.where(
-            _candidate_overlaps > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]),
+            _candidate_overlaps
+            > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]),
             is_in_topk,
             torch.zeros_like(is_in_topk),
         )
@@ -233,15 +251,18 @@ def _get_final_assignments(
         @type mask_pos_sum: Tensor
         @param mask_pos_sum: Mask of matched GTs [bs, n_anchors]
         @rtype: tuple[Tensor, Tensor, Tensor]
-        @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
-            n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes].
+        @return: Assigned labels of shape [bs, n_anchors], assigned
+            bboxes of shape [bs, n_anchors, 4], assigned scores of shape
+            [bs, n_anchors, n_classes].
         """
         # assigned target labels
         batch_idx = torch.arange(
             self.bs, dtype=gt_labels.dtype, device=gt_labels.device
         )
         batch_idx = batch_idx[..., None]
-        assigned_gt_idx = (assigned_gt_idx + batch_idx * self.n_max_boxes).long()
+        assigned_gt_idx = (
+            assigned_gt_idx + batch_idx * self.n_max_boxes
+        ).long()
         assigned_labels = gt_labels.flatten()[assigned_gt_idx.flatten()]
         assigned_labels = assigned_labels.reshape([self.bs, self.n_anchors])
         assigned_labels = torch.where(
@@ -255,7 +276,9 @@ def _get_final_assignments(
         assigned_bboxes = assigned_bboxes.reshape([self.bs, self.n_anchors, 4])
 
         # assigned target scores
-        assigned_scores = F.one_hot(assigned_labels.long(), self.n_classes + 1).float()
+        assigned_scores = F.one_hot(
+            assigned_labels.long(), self.n_classes + 1
+        ).float()
         assigned_scores = assigned_scores[:, :, : self.n_classes]
 
         return assigned_labels, assigned_bboxes, assigned_scores
diff --git a/luxonis_train/utils/assigners/tal_assigner.py b/luxonis_train/assigners/tal_assigner.py
similarity index 53%
rename from luxonis_train/utils/assigners/tal_assigner.py
rename to luxonis_train/assigners/tal_assigner.py
index 0765ad6a..4d2fa6da 100644
--- a/luxonis_train/utils/assigners/tal_assigner.py
+++ b/luxonis_train/assigners/tal_assigner.py
@@ -2,7 +2,13 @@
 import torch.nn.functional as F
 from torch import Tensor, nn
 
-from .utils import batch_iou, candidates_in_gt, fix_collisions
+from .utils import (
+    batch_iou,
+    batch_iou_obb,
+    candidates_in_gt,
+    candidates_in_gt_obb,
+    fix_collisions,
+)
 
 
 class TaskAlignedAssigner(nn.Module):
@@ -50,7 +56,7 @@ def forward(
         gt_labels: Tensor,
         gt_bboxes: Tensor,
         mask_gt: Tensor,
-    ) -> tuple[Tensor, Tensor, Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
         """Assigner's forward method which generates final assignments.
 
         @type pred_scores: Tensor
@@ -65,10 +71,11 @@ def forward(
         @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 4]
         @type mask_gt: Tensor
         @param mask_gt: Mask for valid GTs [bs, n_max_boxes, 1]
-        @rtype: tuple[Tensor, Tensor, Tensor, Tensor]
-        @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
-            n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and
-            output mask of shape [bs, n_anchors]
+        @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
+        @return: Assigned labels of shape [bs, n_anchors], assigned
+            bboxes of shape [bs, n_anchors, 4], assigned scores of shape
+            [bs, n_anchors, n_classes] and output mask of shape [bs,
+            n_anchors]
         """
         self.bs = pred_scores.size(0)
         self.n_max_boxes = gt_bboxes.size(1)
@@ -76,10 +83,13 @@ def forward(
         if self.n_max_boxes == 0:
             device = gt_bboxes.device
             return (
-                torch.full_like(pred_scores[..., 0], self.n_classes).to(device),
+                torch.full_like(pred_scores[..., 0], self.n_classes).to(
+                    device
+                ),
                 torch.zeros_like(pred_bboxes).to(device),
                 torch.zeros_like(pred_scores).to(device),
                 torch.zeros_like(pred_scores[..., 0]).to(device),
+                torch.zeros_like(pred_scores[..., 0]).to(device),
             )
 
         # Compute alignment metric between all bboxes (bboxes of all pyramid levels) and GT
@@ -104,7 +114,11 @@ def forward(
         )
 
         # Generate final targets based on masks
-        assigned_labels, assigned_bboxes, assigned_scores = self._get_final_assignments(
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+        ) = self._get_final_assignments(
             gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
         )
 
@@ -121,7 +135,13 @@ def forward(
 
         out_mask_positive = mask_pos_sum.bool()
 
-        return assigned_labels, assigned_bboxes, assigned_scores, out_mask_positive
+        return (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            out_mask_positive,
+            assigned_gt_idx,
+        )
 
     def _get_alignment_metric(
         self,
@@ -130,7 +150,8 @@ def _get_alignment_metric(
         gt_labels: Tensor,
         gt_bboxes: Tensor,
     ):
-        """Calculates anchor alignment metric and IoU between GTs and predicted bboxes.
+        """Calculates anchor alignment metric and IoU between GTs and
+        predicted bboxes.
 
         @type pred_scores: Tensor
         @param pred_scores: Predicted scores [bs, n_anchors, 1]
@@ -144,7 +165,9 @@ def _get_alignment_metric(
         pred_scores = pred_scores.permute(0, 2, 1)
         gt_labels = gt_labels.to(torch.long)
         ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long)
-        ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes)
+        ind[0] = (
+            torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes)
+        )
         ind[1] = gt_labels.squeeze(-1)
         bbox_scores = pred_scores[ind[0], ind[1]]
 
@@ -162,24 +185,30 @@ def _select_topk_candidates(
         """Selects k anchors based on provided metrics tensor.
 
         @type metrics: Tensor
-        @param metrics: Metrics tensor of shape [bs, n_max_boxes, n_anchors]
+        @param metrics: Metrics tensor of shape [bs, n_max_boxes,
+            n_anchors]
         @type largest: bool
-        @param largest: Flag if should keep largest topK. Defaults to True.
+        @param largest: Flag if should keep largest topK. Defaults to
+            True.
         @type topk_mask: Tensor
-        @param topk_mask: Mask for valid GTs of shape [bs, n_max_boxes, topk]
+        @param topk_mask: Mask for valid GTs of shape [bs, n_max_boxes,
+            topk]
         @rtype: Tensor
-        @return: Mask of selected anchors of shape [bs, n_max_boxes, n_anchors]
+        @return: Mask of selected anchors of shape [bs, n_max_boxes,
+            n_anchors]
         """
-        num_anchors = metrics.shape[-1]
+        n_anchors = metrics.shape[-1]
         topk_metrics, topk_idxs = torch.topk(
             metrics, self.topk, dim=-1, largest=largest
         )
         if topk_mask is None:
-            topk_mask = (topk_metrics.max(dim=-1, keepdim=True)[0] > self.eps).tile(
-                [1, 1, self.topk]
-            )
-        topk_idxs = torch.where(topk_mask, topk_idxs, torch.zeros_like(topk_idxs))
-        is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(dim=-2)
+            topk_mask = (
+                topk_metrics.max(dim=-1, keepdim=True)[0] > self.eps
+            ).tile([1, 1, self.topk])
+        topk_idxs = torch.where(
+            topk_mask, topk_idxs, torch.zeros_like(topk_idxs)
+        )
+        is_in_topk = F.one_hot(topk_idxs, n_anchors).sum(dim=-2)
         is_in_topk = torch.where(
             is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk
         )
@@ -197,14 +226,17 @@ def _get_final_assignments(
         @type gt_labels: Tensor
         @param gt_labels: Initial GT labels [bs, n_max_boxes, 1]
         @type gt_bboxes: Tensor
-        @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 4]
+        @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 4] or [bs, n_max_boxes, 5]
+            for obb
         @type assigned_gt_idx: Tensor
         @param assigned_gt_idx: Indices of matched GTs [bs, n_anchors]
         @type mask_pos_sum: Tensor
         @param mask_pos_sum: Mask of matched GTs [bs, n_anchors]
         @rtype: tuple[Tensor, Tensor, Tensor]
-        @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
-            n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes].
+        @return: A tuple containing:
+            - Tensor of assigned labels of shape [bs, n_anchors]
+            - Tensor of assigned bboxes of shape [bs, n_anchors, 4] or [bs, n_max_boxes, 5] for rotated boxes
+            - Tensor of assigned scores of shape [bs, n_anchors, n_classes].
         """
         # assigned target labels
         batch_ind = torch.arange(
@@ -214,14 +246,18 @@ def _get_final_assignments(
         assigned_labels = gt_labels.long().flatten()[assigned_gt_idx]
 
         # assigned target boxes
-        assigned_bboxes = gt_bboxes.reshape([-1, 4])[assigned_gt_idx]
+        assigned_bboxes = gt_bboxes.reshape([-1, gt_bboxes.shape[-1]])[
+            assigned_gt_idx
+        ]
 
         # assigned target scores
         assigned_labels[assigned_labels < 0] = 0
         assigned_scores = F.one_hot(assigned_labels, self.n_classes)
         mask_pos_scores = mask_pos_sum[:, :, None].repeat(1, 1, self.n_classes)
         assigned_scores = torch.where(
-            mask_pos_scores > 0, assigned_scores, torch.full_like(assigned_scores, 0)
+            mask_pos_scores > 0,
+            assigned_scores,
+            torch.full_like(assigned_scores, 0),
         )
 
         assigned_labels = torch.where(
@@ -231,3 +267,135 @@ def _get_final_assignments(
         )
 
         return assigned_labels, assigned_bboxes, assigned_scores
+
+
+class RotatedTaskAlignedAssigner(TaskAlignedAssigner):
+    """Assigns ground-truth objects to rotated bounding boxes using a
+    task-aligned metric."""
+
+    @torch.no_grad()
+    def forward(
+        self,
+        pred_scores: Tensor,
+        pred_bboxes: Tensor,
+        anchor_points: Tensor,
+        gt_labels: Tensor,
+        gt_bboxes: Tensor,
+        mask_gt: Tensor,
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+        """Assigner's forward method which generates final assignments.
+
+        @type pred_scores: Tensor
+        @param pred_scores: Predicted scores [bs, n_anchors, 1]
+        @type pred_bboxes: Tensor
+        @param pred_bboxes: Predicted bboxes [bs, n_anchors, 5]
+        @type anchor_points: Tensor
+        @param anchor_points: Anchor points [n_anchors, 2]
+        @type gt_labels: Tensor
+        @param gt_labels: Initial GT labels [bs, n_max_boxes, 1]
+        @type gt_bboxes: Tensor
+        @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 5]
+        @type mask_gt: Tensor
+        @param mask_gt: Mask for valid GTs [bs, n_max_boxes, 1]
+        @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
+        @return: Assigned labels of shape [bs, n_anchors], assigned
+            bboxes of shape [bs, n_anchors, 5], assigned scores of shape
+            [bs, n_anchors, n_classes] and output mask of shape [bs,
+            n_anchors]
+        """
+        self.bs = pred_scores.size(0)
+        self.n_max_boxes = gt_bboxes.size(1)
+
+        if self.n_max_boxes == 0:
+            device = gt_bboxes.device
+            return (
+                torch.full_like(pred_scores[..., 0], self.n_classes).to(
+                    device
+                ),
+                torch.zeros_like(pred_bboxes).to(device),
+                torch.zeros_like(pred_scores).to(device),
+                torch.zeros_like(pred_scores[..., 0]).to(device),
+                torch.zeros_like(pred_scores[..., 0]).to(device),
+            )
+
+        # Compute alignment metric between all bboxes (bboxes of all pyramid levels) and GT
+        align_metric, overlaps = self._get_alignment_metric(
+            pred_scores, pred_bboxes, gt_labels, gt_bboxes
+        )
+
+        # Select top-k bboxes as candidates for each GT
+        is_in_gts = candidates_in_gt_obb(anchor_points, gt_bboxes)
+        is_in_gts = torch.reshape(is_in_gts, (self.bs, self.n_max_boxes, -1))
+        is_in_topk = self._select_topk_candidates(
+            align_metric * is_in_gts,
+            topk_mask=mask_gt.repeat([1, 1, self.topk]).bool(),
+        )
+
+        # Final positive candidates
+        mask_pos = is_in_topk * is_in_gts * mask_gt
+
+        # If an anchor box is assigned to multiple gts, the one with the highest IoU is selected
+        assigned_gt_idx, mask_pos_sum, mask_pos = fix_collisions(
+            mask_pos, overlaps, self.n_max_boxes
+        )
+
+        # Generate final targets based on masks
+        assigned_labels, assigned_bboxes, assigned_scores = (
+            self._get_final_assignments(
+                gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
+            )
+        )
+
+        # normalize
+        align_metric *= mask_pos
+        pos_align_metrics = align_metric.max(dim=-1, keepdim=True)[0]
+        pos_overlaps = (overlaps * mask_pos).max(dim=-1, keepdim=True)[0]
+        norm_align_metric = (
+            (align_metric * pos_overlaps / (pos_align_metrics + self.eps))
+            .max(-2)[0]
+            .unsqueeze(-1)
+        )
+        assigned_scores = assigned_scores * norm_align_metric
+
+        out_mask_positive = mask_pos_sum.bool()
+
+        return (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            out_mask_positive,
+            assigned_gt_idx,
+        )
+
+    def _get_alignment_metric(
+        self,
+        pred_scores: Tensor,
+        pred_bboxes: Tensor,
+        gt_labels: Tensor,
+        gt_bboxes: Tensor,
+    ):
+        """Calculates anchor alignment metric and IoU between GTs and
+        predicted oriented bboxes.
+
+        @type pred_scores: Tensor
+        @param pred_scores: Predicted scores [bs, n_anchors, 1]
+        @type pred_bboxes: Tensor
+        @param pred_bboxes: Predicted bboxes [bs, n_anchors, 5]
+        @type gt_labels: Tensor
+        @param gt_labels: Initial GT labels [bs, n_max_boxes, 1]
+        @type gt_bboxes: Tensor
+        @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 5]
+        """
+        pred_scores = pred_scores.permute(0, 2, 1)
+        gt_labels = gt_labels.to(torch.long)
+        ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long)
+        ind[0] = (
+            torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes)
+        )
+        ind[1] = gt_labels.squeeze(-1)
+        bbox_scores = pred_scores[ind[0], ind[1]]
+
+        overlaps = batch_iou_obb(gt_bboxes, pred_bboxes)
+        align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta)
+
+        return align_metric, overlaps
diff --git a/luxonis_train/utils/assigners/utils.py b/luxonis_train/assigners/utils.py
similarity index 57%
rename from luxonis_train/utils/assigners/utils.py
rename to luxonis_train/assigners/utils.py
index fadf5f8e..3c3af4ff 100644
--- a/luxonis_train/utils/assigners/utils.py
+++ b/luxonis_train/assigners/utils.py
@@ -2,7 +2,7 @@
 import torch.nn.functional as F
 from torch import Tensor
 
-from luxonis_train.utils.boxutils import bbox_iou
+from luxonis_train.utils import batch_probiou, bbox_iou, xywhr2xyxyxyxy
 
 
 def candidates_in_gt(
@@ -20,7 +20,9 @@ def candidates_in_gt(
     @return: Mask for anchors inside any GT bbox
     """
     n_anchors = anchor_centers.size(0)
-    anchor_centers = anchor_centers.unsqueeze(0).repeat(gt_bboxes.size(0), 1, 1)
+    anchor_centers = anchor_centers.unsqueeze(0).repeat(
+        gt_bboxes.size(0), 1, 1
+    )
     gt_bboxes_lt = gt_bboxes[:, :2].unsqueeze(1).repeat(1, n_anchors, 1)
     gt_bboxes_rb = gt_bboxes[:, 2:].unsqueeze(1).repeat(1, n_anchors, 1)
     bbox_delta_lt = anchor_centers - gt_bboxes_lt
@@ -30,15 +32,49 @@ def candidates_in_gt(
     return candidates
 
 
+def candidates_in_gt_obb(xy_centers: Tensor, gt_bboxes: Tensor) -> Tensor:
+    """Select the positive anchor center in ground truth for rotated
+    bounding boxes.
+
+    @type xy_centers: Tensor
+    @param xy_centers: Shape (h*w, 2).
+    @type gt_bboxes: Tensor
+    @param gt_bboxes: Shape (b, n_boxes, 5).
+    @rtype: Tensor
+    @return: Shape (b, n_boxes, h*w).
+    """
+    corners = xywhr2xyxyxyxy(
+        gt_bboxes
+    )  # (b, n_boxes, 5) --> (b, n_boxes, 4, 2)
+    a, b, _, d = corners.split(1, dim=-2)  # (b, n_boxes, 1, 2)
+    ab = b - a
+    ad = d - a
+
+    ap = xy_centers - a  # (b, n_boxes, h*w, 2)
+    norm_ab = (ab * ab).sum(dim=-1)
+    norm_ad = (ad * ad).sum(dim=-1)
+    ap_dot_ab = (ap * ab).sum(dim=-1)
+    ap_dot_ad = (ap * ad).sum(dim=-1)
+    return (
+        (ap_dot_ab >= 0)
+        & (ap_dot_ab <= norm_ab)
+        & (ap_dot_ad >= 0)
+        & (ap_dot_ad <= norm_ad)
+    )  # is_in_box
+
+
 def fix_collisions(
     mask_pos: Tensor, overlaps: Tensor, n_max_boxes: int
 ) -> tuple[Tensor, Tensor, Tensor]:
-    """If an anchor is assigned to multiple GTs, the one with highest IoU is selected.
+    """If an anchor is assigned to multiple GTs, the one with highest
+    IoU is selected.
 
     @type mask_pos: Tensor
-    @param mask_pos: Mask of assigned anchors [bs, n_max_boxes, n_anchors]
+    @param mask_pos: Mask of assigned anchors [bs, n_max_boxes,
+        n_anchors]
     @type overlaps: Tensor
-    @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, n_anchors]
+    @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes,
+        n_anchors]
     @type n_max_boxes: int
     @param n_max_boxes: Number of maximum boxes per image
     @rtype: tuple[Tensor, Tensor, Tensor]
@@ -46,7 +82,9 @@ def fix_collisions(
     """
     mask_pos_sum = mask_pos.sum(dim=-2)
     if mask_pos_sum.max() > 1:
-        mask_multi_gts = (mask_pos_sum.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1])
+        mask_multi_gts = (mask_pos_sum.unsqueeze(1) > 1).repeat(
+            [1, n_max_boxes, 1]
+        )
         max_overlaps_idx = overlaps.argmax(dim=1)
         is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes)
         is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype)
@@ -57,8 +95,8 @@ def fix_collisions(
 
 
 def batch_iou(batch1: Tensor, batch2: Tensor) -> Tensor:
-    """Calculates IoU for each pair of bboxes in the batch. Bboxes must be in xyxy
-    format.
+    """Calculates IoU for each pair of bboxes in the batch. Bboxes must
+    be in xyxy format.
 
     @type batch1: Tensor
     @param batch1: Tensor of shape C{[bs, N, 4]}
@@ -71,3 +109,24 @@ def batch_iou(batch1: Tensor, batch2: Tensor) -> Tensor:
         [bbox_iou(batch1[i], batch2[i]) for i in range(batch1.size(0))], dim=0
     )
     return ious
+
+
+def batch_iou_obb(batch1: Tensor, batch2: Tensor) -> Tensor:
+    """Calculates IoU for each pair of oriented bboxes in the batch.
+    Bboxes must be in xcycwhr format.
+
+    @type batch1: Tensor
+    @param batch1: Tensor of shape C{[bs, N, 5]}
+    @type batch2: Tensor
+    @param batch2: Tensor of shape C{[bs, M, 5]}
+    @rtype: Tensor
+    @return: Per image box IoU of shape C{[bs, N, M]}
+    """
+    ious = torch.stack(
+        [
+            batch_probiou(batch1[i], batch2[i]).squeeze(-1).clamp_(0)
+            for i in range(batch1.size(0))
+        ],
+        dim=0,
+    )
+    return ious
diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py
index a015e09f..904120a2 100644
--- a/luxonis_train/attached_modules/base_attached_module.py
+++ b/luxonis_train/attached_modules/base_attached_module.py
@@ -1,26 +1,27 @@
+import logging
 from abc import ABC
+from contextlib import suppress
 from typing import Generic
 
+from luxonis_ml.data import LabelType
 from luxonis_ml.utils.registry import AutoRegisterMeta
-from pydantic import ValidationError
-from torch import Tensor, nn
+from torch import Size, Tensor, nn
 from typing_extensions import TypeVarTuple, Unpack
 
 from luxonis_train.nodes import BaseNode
-from luxonis_train.utils.general import validate_packet
-from luxonis_train.utils.types import (
-    BaseProtocol,
-    IncompatibleException,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils import IncompatibleException, Labels, Packet
+
+logger = logging.getLogger(__name__)
 
 Ts = TypeVarTuple("Ts")
 
 
 class BaseAttachedModule(
-    nn.Module, Generic[Unpack[Ts]], ABC, metaclass=AutoRegisterMeta, register=False
+    nn.Module,
+    Generic[Unpack[Ts]],
+    ABC,
+    metaclass=AutoRegisterMeta,
+    register=False,
 ):
     """Base class for all modules that are attached to a L{LuxonisNode}.
 
@@ -30,42 +31,83 @@ class BaseAttachedModule(
     should be sufficient for most simple cases. More complex modules should
     override the `prepare` method.
 
+    When subclassing, the following methods can be overridden:
+        - L{prepare}: Prepares node outputs for the forward pass of the module.
+          Override this method if the default implementation is not sufficient.
+
+    Additionally, the following attributes can be overridden:
+        - L{supported_labels}: List of label types that the module supports.
+          Used to determine which labels to extract from the dataset and to validate
+          compatibility with the node based on the node's tasks.
+
     @type node: BaseNode
-    @ivar node: Reference to the node that this module is attached to.
-    @type protocol: type[BaseProtocol]
-    @ivar protocol: Schema for validating inputs to the module.
-    @type required_labels: list[LabelType]
-    @ivar required_labels: List of labels required by this model.
+    @param node: Reference to the node that this module is attached to.
+
+    @type supported_labels: list[LabelType | tuple[LabelType, ...]] | None
+    @ivar supported_labels: List of label types that the module supports.
+        Elements of the list can be either a single label type or a tuple of
+        label types. In case of the latter, the module requires all of the
+        specified labels in the tuple to be present.
+
+        Example:
+            - C{[LabelType.CLASSIFICATION, LabelType.SEGMENTATION]} means that the
+              module requires either classification or segmentation labels.
+            - C{[(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS), LabelType.SEGMENTATION]}
+              means that the module requires either both bounding box I{and} keypoint
+              labels I{or} segmentation labels.
     """
 
-    def __init__(
-        self,
-        *,
-        node: BaseNode | None = None,
-        protocol: type[BaseProtocol] | None = None,
-        required_labels: list[LabelType] | None = None,
-    ):
-        """Base class for all modules that are attached to a L{LuxonisNode}.
-
-        @type node: L{BaseNode}
-        @param node: Reference to the node that this module is attached to.
-        @type protocol: type[BaseProtocol]
-        @param protocol: Schema for validating inputs to the module.
-        @type required_labels: list[LabelType]
-        @param required_labels: List of labels required by this model.
-        """
+    supported_labels: list[LabelType | tuple[LabelType, ...]] | None = None
+
+    def __init__(self, *, node: BaseNode | None = None):
         super().__init__()
-        self.required_labels = required_labels or []
-        self.protocol = protocol
         self._node = node
         self._epoch = 0
 
+        self.required_labels: list[LabelType] = []
+        if self._node and self.supported_labels:
+            module_supported = [
+                label.value
+                if isinstance(label, LabelType)
+                else f"({' + '.join(label)})"
+                for label in self.supported_labels
+            ]
+            module_supported = f"[{', '.join(module_supported)}]"
+            if not self.node.tasks:
+                raise IncompatibleException(
+                    f"Module '{self.name}' requires one of the following "
+                    f"labels or combinations of labels: {module_supported}, "
+                    f"but is connected to node '{self.node.name}' which does not specify any tasks."
+                )
+            node_tasks = set(self.node.tasks)
+            for required_labels in self.supported_labels:
+                if isinstance(required_labels, LabelType):
+                    required_labels = [required_labels]
+                else:
+                    required_labels = list(required_labels)
+                if set(required_labels) <= node_tasks:
+                    self.required_labels = required_labels
+                    break
+            else:
+                node_supported = [task.value for task in self.node.tasks]
+                raise IncompatibleException(
+                    f"Module '{self.name}' requires one of the following labels or combinations of labels: {module_supported}, "
+                    f"but is connected to node '{self.node.name}' which does not support any of them. "
+                    f"{self.node.name} supports {node_supported}."
+                )
+        self._check_node_type_override()
+
+    @property
+    def name(self) -> str:
+        return self.__class__.__name__
+
     @property
     def node(self) -> BaseNode:
         """Reference to the node that this module is attached to.
 
         @type: L{BaseNode}
-        @raises RuntimeError: If the node was not provided during initialization.
+        @raises RuntimeError: If the node was not provided during
+            initialization.
         """
         if self._node is None:
             raise RuntimeError(
@@ -74,11 +116,171 @@ def node(self) -> BaseNode:
             )
         return self._node
 
-    def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
+    @property
+    def n_keypoints(self) -> int:
+        """Getter for the number of keypoints.
+
+        @type: int
+        @raises ValueError: If the node does not support keypoints.
+        @raises RuntimeError: If the node doesn't define any task.
+        """
+        return self.node.n_keypoints
+
+    @property
+    def n_classes(self) -> int:
+        """Getter for the number of classes.
+
+        @type: int
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the number of classes is different for
+            different tasks. In that case, use the L{get_n_classes}
+            method.
+        """
+        return self.node.n_classes
+
+    @property
+    def original_in_shape(self) -> Size:
+        """Getter for the original input shape as [N, H, W].
+
+        @type: Size
+        """
+        return self.node.original_in_shape
+
+    @property
+    def class_names(self) -> list[str]:
+        """Getter for the class names.
+
+        @type: list[str]
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the class names are different for
+            different tasks. In that case, use the L{get_class_names}
+            method.
+        """
+        return self.node.class_names
+
+    @property
+    def node_tasks(self) -> dict[LabelType, str]:
+        """Getter for the tasks of the attached node.
+
+        @type: dict[LabelType, str]
+        @raises RuntimeError: If the node does not have the `tasks` attribute set.
+        """
+        if self.node._tasks is None:
+            raise RuntimeError(
+                "Node must have the `tasks` attribute specified."
+            )
+        return self.node._tasks
+
+    def get_label(
+        self, labels: Labels, label_type: LabelType | None = None
+    ) -> Tensor:
+        """Extracts a specific label from the labels dictionary.
+
+        If the label type is not provided, the first label that matches the
+        required label type is returned.
+
+        Example::
+            >>> # supported_labels = [LabelType.SEGMENTATION]
+            >>> labels = {"segmentation": seg_tensor, "boundingbox": bbox_tensor}
+            >>> get_label(labels)
+            seg_tensor  # returns the first matching label
+            >>> get_label(labels, LabelType.BOUNDINGBOX)
+            bbox_tensor # returns the bounding box label
+            >>> get_label(labels, LabelType.CLASSIFICATION)
+            IncompatibleException: Label 'classification' is missing from the dataset.
+
+        @type labels: L{Labels}
+        @param labels: Labels from the dataset.
+        @type label_type: LabelType | None
+        @param label_type: Type of the label to extract.
+
+        @rtype: Tensor
+        @return: Extracted label
+
+        @raises ValueError: If the module requires multiple labels and the C{label_type} is not provided.
+        @raises IncompatibleException: If the label is not found in the labels dictionary.
+        """
+        return self._get_label(labels, label_type)[0]
+
+    def _get_label(
+        self, labels: Labels, label_type: LabelType | None = None
+    ) -> tuple[Tensor, LabelType]:
+        if label_type is None:
+            if len(self.required_labels) == 1:
+                label_type = self.required_labels[0]
+
+        if label_type is not None:
+            task_name = self.node.get_task_name(label_type)
+            if task_name not in labels:
+                raise IncompatibleException.from_missing_task(
+                    label_type.value, list(labels.keys()), self.name
+                )
+            return labels[task_name]
+
+        raise ValueError(
+            f"{self.name} requires multiple labels. You must provide the "
+            "`label_type` argument to extract the desired label."
+        )
+
+    def get_input_tensors(
+        self, inputs: Packet[Tensor], task_type: LabelType | str | None = None
+    ) -> list[Tensor]:
+        """Extracts the input tensors from the packet.
+
+        Example::
+            >>> # supported_labels = [LabelType.SEGMENTATION]
+            >>> # node.tasks = {LabelType.SEGMENTATION: "segmentation-task"}
+            >>> inputs = [{"segmentation-task": [seg_tensor]}, {"features": [feat_tensor]}]
+            >>> get_input_tensors(inputs)  # matches supported labels to node's tasks
+            [seg_tensor]
+            >>> get_input_tensors(inputs, "features")
+            [feat_tensor]
+            >>> get_input_tensors(inputs, LabelType.CLASSIFICATION)
+            ValueError: Task 'classification' is not supported by the node.
+
+        @type inputs: L{Packet}[Tensor]
+        @param inputs: Output from the node this module is attached to.
+        @type task_type: LabelType | str | None
+        @param task_type: Type of the task to extract. Must be provided when the node
+            supports multiple tasks or if the module doesn't require any tasks.
+        @rtype: list[Tensor]
+        @return: Extracted input tensors
+
+        @raises IncompatibleException: If the task type is not supported by the node.
+        @raises IncompatibleException: If the task is not present in the inputs.
+
+        @raises ValueError: If the module requires multiple labels.
+            For such cases, the `prepare` method should be overridden.
+        """
+        if task_type is not None:
+            if isinstance(task_type, LabelType):
+                if task_type not in self.node_tasks:
+                    raise IncompatibleException(
+                        f"Task {task_type.value} is not supported by the node "
+                        f"{self.node.name}."
+                    )
+                return inputs[self.node_tasks[task_type]]
+            else:
+                if task_type not in inputs:
+                    raise IncompatibleException(
+                        f"Task {task_type} is not present in the inputs."
+                    )
+                return inputs[task_type]
+
+        if len(self.required_labels) > 1:
+            raise ValueError(
+                f"{self.name} requires multiple labels, "
+                "you must provide the `task_type` argument to extract the desired input."
+            )
+        return inputs[self.node_tasks[self.required_labels[0]]]
+
+    def prepare(
+        self, inputs: Packet[Tensor], labels: Labels
+    ) -> tuple[Unpack[Ts]]:
         """Prepares node outputs for the forward pass of the module.
 
         This default implementation selects the output and label based on
-        C{required_labels} attribute. If not set, then it returns the first
+        C{supported_labels} attribute. If not set, then it returns the first
         matching output and label.
         That is the first pair of outputs and labels that have the same type.
         For more complex modules this method should be overridden.
@@ -90,52 +292,63 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
 
         @rtype: tuple[Unpack[Ts]]
         @return: Prepared inputs. Should allow the following usage with the
-            L{forward} method:
+            L{forward} method::
 
                 >>> loss.forward(*loss.prepare(outputs, labels))
 
-        @raises NotImplementedError: If the module requires multiple labels.
-        @raises IncompatibleException: If the inputs are not compatible with the module.
+        @raises RuntimeError: If the module requires multiple labels and
+            is connected to a multi-task node. In this case, the default
+            implementation cannot be used and the C{prepare} method should be overridden.
+
+        @raises RuntimeError: If the C{tasks} attribute is not set on the node.
+        @raises RuntimeError: If the C{supported_labels} attribute is not set on the module.
         """
-        if len(self.required_labels) > 1:
-            raise NotImplementedError(
-                "This module requires multiple labels, the default `prepare` "
-                "implementation does not support this."
+        if self.node._tasks is None:
+            raise RuntimeError(
+                f"{self.node.name} must have the `tasks` attribute specified "
+                f"for {self.name} to make use of the default `prepare` method."
             )
-        if not self.required_labels:
-            if "boxes" in inputs and LabelType.BOUNDINGBOX in labels:
-                return inputs["boxes"], labels[LabelType.BOUNDINGBOX]  # type: ignore
-            if "classes" in inputs and LabelType.CLASSIFICATION in labels:
-                return inputs["classes"][0], labels[LabelType.CLASSIFICATION]  # type: ignore
-            if "keypoints" in inputs and LabelType.KEYPOINT in labels:
-                return inputs["keypoints"], labels[LabelType.KEYPOINT]  # type: ignore
-            if "segmentation" in inputs and LabelType.SEGMENTATION in labels:
-                return inputs["segmentation"][0], labels[LabelType.SEGMENTATION]  # type: ignore
-            raise IncompatibleException(
-                f"No matching labels and outputs found for {self.__class__.__name__}"
+        if self.supported_labels is None:
+            raise RuntimeError(
+                f"{self.name} must have the `supported_labels` attribute "
+                "specified in order to use the default `prepare` method."
             )
-        label_type = self.required_labels[0]
-        return inputs[label_type.value], labels[label_type]  # type: ignore
+        if len(self.supported_labels) > 1:
+            if len(self.node_tasks) > 1:
+                raise RuntimeError(
+                    f"{self.name} supports more than one label type"
+                    f"and is connected to {self.node.name} node "
+                    "which is a multi-task node. The default `prepare` "
+                    "implementation cannot be used in this case."
+                )
+            self.supported_labels = list(
+                set(self.supported_labels) & set(self.node_tasks)
+            )
+        x = self.get_input_tensors(inputs)
+        label, label_type = self._get_label(labels)
+        if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]:
+            if len(x) == 1:
+                x = x[0]
+            else:
+                logger.warning(
+                    f"Module {self.name} expects a single tensor as input, "
+                    f"but got {len(x)} tensors. Using the last tensor. "
+                    f"If this is not the desired behavior, please override the "
+                    "`prepare` method of the attached module or the `wrap` "
+                    f"method of {self.node.name}."
+                )
+                x = x[-1]
 
-    def validate(self, inputs: Packet[Tensor], labels: Labels) -> None:
-        """Validates that the inputs and labels are compatible with the module.
+        return x, label  # type: ignore
 
-        @type inputs: L{Packet}[Tensor]
-        @param inputs: Output from the node, inputs to the attached module.
-        @type labels: L{Labels}
-        @param labels: Labels from the dataset. @raises L{IncompatibleException}: If the
-            inputs are not compatible with the module.
-        """
-        for label in self.required_labels:
-            if label not in labels:
-                raise IncompatibleException.from_missing_label(
-                    label, list(labels.keys()), self.__class__.__name__
-                )
+    def _check_node_type_override(self) -> None:
+        if "node" not in self.__annotations__:
+            return
 
-        if self.protocol is not None:
-            try:
-                validate_packet(inputs, self.protocol)
-            except ValidationError as e:
-                raise IncompatibleException.from_validation_error(
-                    e, self.__class__.__name__
-                ) from e
+        node_type = self.__annotations__["node"]
+        with suppress(RuntimeError):
+            if not isinstance(self.node, node_type):
+                raise IncompatibleException(
+                    f"Module '{self.name}' is attached to the '{self.node.name}' node, "
+                    f"but '{self.name}' is only compatible with nodes of type '{node_type.__name__}'."
+                )
diff --git a/luxonis_train/attached_modules/losses/README.md b/luxonis_train/attached_modules/losses/README.md
index aafbc440..c5b1d348 100644
--- a/luxonis_train/attached_modules/losses/README.md
+++ b/luxonis_train/attached_modules/losses/README.md
@@ -11,6 +11,7 @@ List of all the available loss functions.
 - [SoftmaxFocalLoss](#softmaxfocalloss)
 - [AdaptiveDetectionLoss](#adaptivedetectionloss)
 - [ImplicitKeypointBBoxLoss](#implicitkeypointbboxloss)
+- [EfficientKeypointBBoxLoss](#efficientkeypointbboxloss)
 
 ## CrossEntropyLoss
 
@@ -97,10 +98,25 @@ Keypoint Similarity Loss](https://arxiv.org/ftp/arxiv/papers/2204/2204.06806.pdf
 | label_smoothing                 | float         | 0.0               | Smoothing for [SmothBCEWithLogitsLoss](#smoothbcewithlogitsloss) for classification loss.  |
 | min_objectness_iou              | float         | 0.0               | Minimum objectness IoU.                                                                    |
 | bbox_loss_weight                | float         | 0.05              | Weight for bbox detection sub-loss.                                                        |
-| keypoint_distance_loss_weight   | float         | 0.10              | Weight for keypoint distance sub-loss.                                                     |
+| keypoint_regression_loss_weight | float         | 0.5               | Weight for OKS sub-loss.                                                                   |
 | keypoint_visibility_loss_weight | float         | 0.6               | Weight for keypoint visibility sub-loss.                                                   |
 | class_loss_weight               | float         | 0.6               | Weight for classification sub-loss.                                                        |
 | objectness_loss_weight          | float         | 0.7               | Weight for objectness sub-loss.                                                            |
 | anchor_threshold                | float         | 4.0               | Threshold for matching anchors to targets.                                                 |
 | bias                            | float         | 0.5               | Bias for matchinf anchors to targets.                                                      |
 | balance                         | list\[float\] | \[4.0, 1.0, 0.4\] | Balance for objectness loss.                                                               |
+
+## EfficientKeypointBBoxLoss
+
+Adapted from [YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object
+Keypoint Similarity Loss](https://arxiv.org/ftp/arxiv/papers/2204/2204.06806.pdf).
+
+| Key                   | Type                                              | Default value | Description                                                                         |
+| --------------------- | ------------------------------------------------- | ------------- | ----------------------------------------------------------------------------------- |
+| viz_pw                | float                                             | 1.0           | Power for [BCEWithLogitsLoss](#bcewithlogitsloss) for keypoint visibility.          |
+| n_warmup_epochs       | int                                               | 4             | Number of epochs where ATSS assigner is used, after that we switch to TAL assigner. |
+| iou_type              | Literal\["none", "giou", "diou", "ciou", "siou"\] | "giou"        | IoU type used for bbox regression sub-loss                                          |
+| class_loss_weight     | float                                             | 1.0           | Weight used for the classification sub-loss.                                        |
+| iou_loss_weight       | float                                             | 2.5           | Weight used for the IoU sub-loss.                                                   |
+| regr_kpts_loss_weight | float                                             | 1.5           | Weight used for the OKS sub-loss.                                                   |
+| vis_kpts_loss_weight  | float                                             | 1.0           | Weight used for the keypoint visibility sub-loss.                                   |
diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py
index 737373d2..25f0b9e9 100644
--- a/luxonis_train/attached_modules/losses/__init__.py
+++ b/luxonis_train/attached_modules/losses/__init__.py
@@ -2,16 +2,20 @@
 from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
 from .cross_entropy import CrossEntropyLoss
+from .efficient_keypoint_bbox_loss import EfficientKeypointBBoxLoss
 from .implicit_keypoint_bbox_loss import ImplicitKeypointBBoxLoss
 from .keypoint_loss import KeypointLoss
+from .obb_detection_loss import OBBDetectionLoss
 from .sigmoid_focal_loss import SigmoidFocalLoss
 from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss
 from .softmax_focal_loss import SoftmaxFocalLoss
 
 __all__ = [
     "AdaptiveDetectionLoss",
+    "OBBDetectionLoss",
     "BCEWithLogitsLoss",
     "CrossEntropyLoss",
+    "EfficientKeypointBBoxLoss",
     "ImplicitKeypointBBoxLoss",
     "KeypointLoss",
     "BaseLoss",
diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
index af1a7e6a..d25825cb 100644
--- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -1,44 +1,39 @@
-from typing import Literal, cast
+import logging
+from typing import Any, Literal, cast
 
 import torch
 import torch.nn.functional as F
-from pydantic import Field
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 from torchvision.ops import box_convert
-from typing_extensions import Annotated
 
+from luxonis_train.assigners import ATSSAssigner, TaskAlignedAssigner
 from luxonis_train.nodes import EfficientBBoxHead
-from luxonis_train.utils.assigners import ATSSAssigner, TaskAlignedAssigner
-from luxonis_train.utils.boxutils import (
-    IoUType,
+from luxonis_train.utils import (
+    Labels,
+    Packet,
     anchors_for_fpn_features,
     compute_iou_loss,
     dist2bbox,
 )
-from luxonis_train.utils.types import (
-    BaseProtocol,
-    IncompatibleException,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.boundingbox import IoUType
 
 from .base_loss import BaseLoss
 
-
-class Protocol(BaseProtocol):
-    features: list[Tensor]
-    class_scores: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
-    distributions: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
+logger = logging.getLogger(__name__)
 
 
-class AdaptiveDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]):
+class AdaptiveDetectionLoss(
+    BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]
+):
     node: EfficientBBoxHead
+    supported_labels = [LabelType.BOUNDINGBOX]
 
-    class NodePacket(Packet[Tensor]):
-        features: list[Tensor]
-        class_scores: Tensor
-        distributions: Tensor
+    anchors: Tensor
+    anchor_points: Tensor
+    n_anchors_list: list[int]
+    stride_tensor: Tensor
+    gt_bboxes_scale: Tensor
 
     def __init__(
         self,
@@ -47,7 +42,7 @@ def __init__(
         reduction: Literal["sum", "mean"] = "mean",
         class_loss_weight: float = 1.0,
         iou_loss_weight: float = 2.5,
-        **kwargs,
+        **kwargs: Any,
     ):
         """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications
         <https://arxiv.org/pdf/2209.02976.pdf>}. It combines IoU based bbox regression loss and varifocal loss
@@ -64,25 +59,15 @@ def __init__(
         @param class_loss_weight: Weight of classification loss.
         @type iou_loss_weight: float
         @param iou_loss_weight: Weight of IoU loss.
-        @type kwargs: dict
-        @param kwargs: Additional arguments to pass to L{BaseLoss}.
         """
-        super().__init__(
-            required_labels=[LabelType.BOUNDINGBOX], protocol=Protocol, **kwargs
-        )
+        super().__init__(**kwargs)
 
-        if not isinstance(self.node, EfficientBBoxHead):
-            raise IncompatibleException(
-                f"Loss `{self.__class__.__name__}` is only "
-                "compatible with nodes of type `EfficientBBoxHead`."
-            )
         self.iou_type: IoUType = iou_type
         self.reduction = reduction
-        self.n_classes = self.node.n_classes
         self.stride = self.node.stride
         self.grid_cell_size = self.node.grid_cell_size
         self.grid_cell_offset = self.node.grid_cell_offset
-        self.original_img_size = self.node.original_in_shape[2:]
+        self.original_img_size = self.original_in_shape[1:]
 
         self.n_warmup_epochs = n_warmup_epochs
         self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes)
@@ -94,82 +79,46 @@ def __init__(
         self.class_loss_weight = class_loss_weight
         self.iou_loss_weight = iou_loss_weight
 
+        self._logged_assigner_change = False
+
     def prepare(
-        self, outputs: Packet[Tensor], labels: Labels
+        self, inputs: Packet[Tensor], labels: Labels
     ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
-        feats = outputs["features"]
-        pred_scores = outputs["class_scores"][0]
-        pred_distri = outputs["distributions"][0]
+        feats = self.get_input_tensors(inputs, "features")
+        pred_scores = self.get_input_tensors(inputs, "class_scores")[0]
+        pred_distri = self.get_input_tensors(inputs, "distributions")[0]
+
+        target = self.get_label(labels)
 
         batch_size = pred_scores.shape[0]
-        device = pred_scores.device
-
-        target = labels[LabelType.BOUNDINGBOX].to(device)
-        gt_bboxes_scale = torch.tensor(
-            [
-                self.original_img_size[1],
-                self.original_img_size[0],
-                self.original_img_size[1],
-                self.original_img_size[0],
-            ],
-            device=device,
-        )
-        (
-            anchors,
-            anchor_points,
-            n_anchors_list,
-            stride_tensor,
-        ) = anchors_for_fpn_features(
-            feats,
-            self.stride,
-            self.grid_cell_size,
-            self.grid_cell_offset,
-            multiply_with_stride=True,
-        )
 
-        anchor_points_strided = anchor_points / stride_tensor
-        pred_bboxes = dist2bbox(pred_distri, anchor_points_strided)
+        self._init_parameters(feats)
 
-        target = self._preprocess_target(target, batch_size, gt_bboxes_scale)
+        target = self._preprocess_bbox_target(target, batch_size)
+        pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided)
 
         gt_labels = target[:, :, :1]
         gt_xyxy = target[:, :, 1:]
         mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float()
 
-        if self._epoch < self.n_warmup_epochs:
-            (
-                assigned_labels,
-                assigned_bboxes,
-                assigned_scores,
-                mask_positive,
-            ) = self.atts_assigner(
-                anchors,
-                n_anchors_list,
-                gt_labels,
-                gt_xyxy,
-                mask_gt,
-                pred_bboxes.detach() * stride_tensor,
-            )
-        else:
-            # TODO: log change of assigner (once common Logger)
-            (
-                assigned_labels,
-                assigned_bboxes,
-                assigned_scores,
-                mask_positive,
-            ) = self.tal_assigner.forward(
-                pred_scores.detach(),
-                pred_bboxes.detach() * stride_tensor,
-                anchor_points,
-                gt_labels,
-                gt_xyxy,
-                mask_gt,
-            )
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            mask_positive,
+            _,
+        ) = self._run_assigner(
+            gt_labels,
+            gt_xyxy,
+            mask_gt,
+            pred_bboxes,
+            pred_scores,
+        )
 
         return (
             pred_bboxes,
             pred_scores,
-            assigned_bboxes / stride_tensor,
+            assigned_bboxes / self.stride_tensor,
             assigned_labels,
             assigned_scores,
             mask_positive,
@@ -184,8 +133,12 @@ def forward(
         assigned_scores: Tensor,
         mask_positive: Tensor,
     ):
-        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1]
-        loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label)
+        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[
+            ..., :-1
+        ]
+        loss_cls = self.varifocal_loss(
+            pred_scores, assigned_scores, one_hot_label
+        )
 
         if assigned_scores.sum() > 1:
             loss_cls /= assigned_scores.sum()
@@ -200,17 +153,77 @@ def forward(
             bbox_format="xyxy",
         )[0]
 
-        loss = self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou
+        loss = (
+            self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou
+        )
 
         sub_losses = {"class": loss_cls.detach(), "iou": loss_iou.detach()}
 
         return loss, sub_losses
 
-    def _preprocess_target(self, target: Tensor, batch_size: int, scale_tensor: Tensor):
-        """Preprocess target in shape [batch_size, N, 5] where N is maximum number of
-        instances in one image."""
+    def _init_parameters(self, features: list[Tensor]):
+        if not hasattr(self, "gt_bboxes_scale"):
+            self.gt_bboxes_scale = torch.tensor(
+                [
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                ],
+                device=features[0].device,
+            )
+            (
+                self.anchors,
+                self.anchor_points,
+                self.n_anchors_list,
+                self.stride_tensor,
+            ) = anchors_for_fpn_features(
+                features,
+                self.stride,
+                self.grid_cell_size,
+                self.grid_cell_offset,
+                multiply_with_stride=True,
+            )
+            self.anchor_points_strided = (
+                self.anchor_points / self.stride_tensor
+            )
+
+    def _run_assigner(
+        self,
+        gt_labels: Tensor,
+        gt_xyxy: Tensor,
+        mask_gt: Tensor,
+        pred_bboxes: Tensor,
+        pred_scores: Tensor,
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+        if self._epoch < self.n_warmup_epochs:
+            return self.atts_assigner(
+                self.anchors,
+                self.n_anchors_list,
+                gt_labels,
+                gt_xyxy,
+                mask_gt,
+                pred_bboxes.detach() * self.stride_tensor,
+            )
+        else:
+            self._log_assigner_change()
+            return self.tal_assigner(
+                pred_scores.detach(),
+                pred_bboxes.detach() * self.stride_tensor,
+                self.anchor_points,
+                gt_labels,
+                gt_xyxy,
+                mask_gt,
+            )
+
+    def _preprocess_bbox_target(
+        self, target: Tensor, batch_size: int
+    ) -> Tensor:
+        """Preprocess target in shape [batch_size, N, 5] where N is the
+        maximum number of instances in one image."""
         sample_ids, counts = cast(
-            tuple[Tensor, Tensor], torch.unique(target[:, 0].int(), return_counts=True)
+            tuple[Tensor, Tensor],
+            torch.unique(target[:, 0].int(), return_counts=True),
         )
         c_max = int(counts.max()) if counts.numel() > 0 else 0
         out_target = torch.zeros(batch_size, c_max, 5, device=target.device)
@@ -218,10 +231,20 @@ def _preprocess_target(self, target: Tensor, batch_size: int, scale_tensor: Tens
         for id, count in zip(sample_ids, counts):
             out_target[id, :count] = target[target[:, 0] == id][:, 1:]
 
-        scaled_target = out_target[:, :, 1:5] * scale_tensor
+        scaled_target = out_target[:, :, 1:5] * self.gt_bboxes_scale
         out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy")
         return out_target
 
+    def _log_assigner_change(self):
+        if self._logged_assigner_change:
+            return
+
+        logger.info(
+            f"Switching to Task Aligned Assigner after {self.n_warmup_epochs} warmup epochs.",
+            stacklevel=2,
+        )
+        self._logged_assigner_change = True
+
 
 class VarifocalLoss(nn.Module):
     def __init__(self, alpha: float = 0.75, gamma: float = 2.0):
@@ -244,7 +267,8 @@ def forward(
         self, pred_score: Tensor, target_score: Tensor, label: Tensor
     ) -> Tensor:
         weight = (
-            self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label
+            self.alpha * pred_score.pow(self.gamma) * (1 - label)
+            + target_score * label
         )
         ce_loss = F.binary_cross_entropy(
             pred_score.float(), target_score.float(), reduction="none"
diff --git a/luxonis_train/attached_modules/losses/base_loss.py b/luxonis_train/attached_modules/losses/base_loss.py
index 61297f10..7a69d0d8 100644
--- a/luxonis_train/attached_modules/losses/base_loss.py
+++ b/luxonis_train/attached_modules/losses/base_loss.py
@@ -17,19 +17,23 @@ class BaseLoss(
 ):
     """A base class for all loss functions.
 
-    This class defines the basic interface for all loss functions. It utilizes automatic
-    registration of defined subclasses to a L{LOSSES} registry.
+    This class defines the basic interface for all loss functions. It
+    utilizes automatic registration of defined subclasses to a L{LOSSES}
+    registry.
     """
 
     @abstractmethod
-    def forward(self, *args: Unpack[Ts]) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
+    def forward(
+        self, *args: Unpack[Ts]
+    ) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
         """Forward pass of the loss function.
 
         @type args: Unpack[Ts]
         @param args: Prepared inputs from the L{prepare} method.
         @rtype: Tensor | tuple[Tensor, dict[str, Tensor]]
-        @return: The main loss and optional a dictionary of sublosses (for logging).
-            Only the main loss is used for backpropagation.
+        @return: The main loss and optional a dictionary of sublosses
+            (for logging). Only the main loss is used for
+            backpropagation.
         """
         ...
 
@@ -45,9 +49,10 @@ def run(
         @type labels: L{Labels}
         @param labels: Labels from the dataset.
         @rtype: Tensor | tuple[Tensor, dict[str, Tensor]]
-        @return: The main loss and optional a dictionary of sublosses (for logging).
-            Only the main loss is used for backpropagation.
-        @raises IncompatibleException: If the inputs are not compatible with the module.
+        @return: The main loss and optional a dictionary of sublosses
+            (for logging). Only the main loss is used for
+            backpropagation.
+        @raises IncompatibleException: If the inputs are not compatible
+            with the module.
         """
-        self.validate(inputs, labels)
         return self(*self.prepare(inputs, labels))
diff --git a/luxonis_train/attached_modules/losses/bce_with_logits.py b/luxonis_train/attached_modules/losses/bce_with_logits.py
index 5800cbdb..b759d06b 100644
--- a/luxonis_train/attached_modules/losses/bce_with_logits.py
+++ b/luxonis_train/attached_modules/losses/bce_with_logits.py
@@ -1,46 +1,53 @@
-from typing import Literal
+from typing import Any, Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
 from .base_loss import BaseLoss
 
 
 class BCEWithLogitsLoss(BaseLoss[Tensor, Tensor]):
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         weight: list[float] | None = None,
         reduction: Literal["none", "mean", "sum"] = "mean",
         pos_weight: Tensor | None = None,
-        **kwargs,
+        **kwargs: Any,
     ):
-        """This loss combines a L{nn.Sigmoid} layer and the L{nn.BCELoss} in one single
-        class. This version is more numerically stable than using a plain C{Sigmoid}
-        followed by a {BCELoss} as, by combining the operations into one layer, we take
-        advantage of the log-sum-exp trick for numerical stability.
+        """This loss combines a L{nn.Sigmoid} layer and the
+        L{nn.BCELoss} in one single class. This version is more
+        numerically stable than using a plain C{Sigmoid} followed by a
+        {BCELoss} as, by combining the operations into one layer, we
+        take advantage of the log-sum-exp trick for numerical stability.
 
         @type weight: list[float] | None
-        @param weight: a manual rescaling weight given to the loss of each batch
-            element. If given, has to be a list of length C{nbatch}. Defaults to
-            C{None}.
+        @param weight: a manual rescaling weight given to the loss of
+            each batch element. If given, has to be a list of length
+            C{nbatch}. Defaults to C{None}.
         @type reduction: Literal["none", "mean", "sum"]
-        @param reduction: Specifies the reduction to apply to the output: C{"none"} |
-            C{"mean"} | C{"sum"}. C{"none"}: no reduction will be applied, C{"mean"}:
-            the sum of the output will be divided by the number of elements in the
-            output, C{"sum"}: the output will be summed. Note: C{size_average} and
-            C{reduce} are in the process of being deprecated, and in the meantime,
-            specifying either of those two args will override C{reduction}. Defaults to
-            C{"mean"}.
+        @param reduction: Specifies the reduction to apply to the
+            output: C{"none"} | C{"mean"} | C{"sum"}. C{"none"}: no
+            reduction will be applied, C{"mean"}: the sum of the output
+            will be divided by the number of elements in the output,
+            C{"sum"}: the output will be summed. Note: C{size_average}
+            and C{reduce} are in the process of being deprecated, and in
+            the meantime, specifying either of those two args will
+            override C{reduction}. Defaults to C{"mean"}.
         @type pos_weight: Tensor | None
-        @param pos_weight: a weight of positive examples to be broadcasted with target.
-            Must be a tensor with equal size along the class dimension to the number of
-            classes. Pay close attention to PyTorch's broadcasting semantics in order to
-            achieve the desired operations. For a target of size [B, C, H, W] (where B
-            is batch size) pos_weight of size [B, C, H, W] will apply different
-            pos_weights to each element of the batch or [C, H, W] the same pos_weights
-            across the batch. To apply the same positive weight along all spacial
-            dimensions for a 2D multi-class target [C, H, W] use: [C, 1, 1]. Defaults to
-            C{None}.
+        @param pos_weight: a weight of positive examples to be
+            broadcasted with target. Must be a tensor with equal size
+            along the class dimension to the number of classes. Pay
+            close attention to PyTorch's broadcasting semantics in order
+            to achieve the desired operations. For a target of size [B,
+            C, H, W] (where B is batch size) pos_weight of size [B, C,
+            H, W] will apply different pos_weights to each element of
+            the batch or [C, H, W] the same pos_weights across the
+            batch. To apply the same positive weight along all spacial
+            dimensions for a 2D multi-class target [C, H, W] use: [C, 1,
+            1]. Defaults to C{None}.
         """
         super().__init__(**kwargs)
         self.criterion = nn.BCEWithLogitsLoss(
@@ -50,6 +57,15 @@ def __init__(
         )
 
     def forward(self, predictions: Tensor, target: Tensor) -> Tensor:
+        """Computes the BCE loss from logits.
+
+        @type predictions: Tensor
+        @param predictions: Network predictions of shape (N, C, ...)
+        @type target: Tensor
+        @param target: A tensor of the same shape as predictions.
+        @rtype: Tensor
+        @return: A scalar tensor.
+        """
         if predictions.shape != target.shape:
             raise RuntimeError(
                 f"Target tensor dimension ({target.shape}) and preds tensor "
diff --git a/luxonis_train/attached_modules/losses/cross_entropy.py b/luxonis_train/attached_modules/losses/cross_entropy.py
index f073401e..4be0cfdc 100644
--- a/luxonis_train/attached_modules/losses/cross_entropy.py
+++ b/luxonis_train/attached_modules/losses/cross_entropy.py
@@ -1,19 +1,21 @@
 from logging import getLogger
-from typing import Literal
+from typing import Any, Literal
 
 import torch
 import torch.nn as nn
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
 from .base_loss import BaseLoss
 
 logger = getLogger(__name__)
-was_logged = False
 
 
 class CrossEntropyLoss(BaseLoss[Tensor, Tensor]):
-    """This criterion computes the cross entropy loss between input logits and
-    target."""
+    """This criterion computes the cross entropy loss between input
+    logits and target."""
+
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
 
     def __init__(
         self,
@@ -21,7 +23,7 @@ def __init__(
         ignore_index: int = -100,
         reduction: Literal["none", "mean", "sum"] = "mean",
         label_smoothing: float = 0.0,
-        **kwargs,
+        **kwargs: Any,
     ):
         super().__init__(**kwargs)
 
@@ -31,19 +33,19 @@ def __init__(
             reduction=reduction,
             label_smoothing=label_smoothing,
         )
+        self._was_logged = False
 
     def forward(self, preds: Tensor, target: Tensor) -> Tensor:
-        global was_logged
         if preds.ndim == target.ndim:
             ch_dim = 1 if preds.ndim > 1 else 0
             if preds.shape[ch_dim] == 1:
-                if not was_logged:
+                if not self._was_logged:
                     logger.warning(
                         "`CrossEntropyLoss` expects at least 2 classes. "
                         "Attempting to fix by adding a dummy channel. "
                         "If you want to be sure, use `BCEWithLogitsLoss` instead."
                     )
-                    was_logged = True
+                    self._was_logged = True
                 preds = torch.cat([torch.zeros_like(preds), preds], dim=ch_dim)
                 if target.shape[ch_dim] == 1:
                     target = torch.cat([1 - target, target], dim=ch_dim)
diff --git a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
new file mode 100644
index 00000000..d996dcfd
--- /dev/null
+++ b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
@@ -0,0 +1,279 @@
+from typing import Any, Literal
+
+import torch
+import torch.nn.functional as F
+from luxonis_ml.data import LabelType
+from torch import Tensor
+
+from luxonis_train.attached_modules.losses import AdaptiveDetectionLoss
+from luxonis_train.nodes import EfficientKeypointBBoxHead
+from luxonis_train.utils import (
+    Labels,
+    Packet,
+    compute_iou_loss,
+    dist2bbox,
+    get_sigmas,
+    get_with_default,
+)
+from luxonis_train.utils.boundingbox import IoUType
+
+from .bce_with_logits import BCEWithLogitsLoss
+
+
+class EfficientKeypointBBoxLoss(AdaptiveDetectionLoss):
+    node: EfficientKeypointBBoxHead
+    supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)]
+
+    gt_kpts_scale: Tensor
+
+    def __init__(
+        self,
+        n_warmup_epochs: int = 4,
+        iou_type: IoUType = "giou",
+        reduction: Literal["sum", "mean"] = "mean",
+        class_loss_weight: float = 1.0,
+        iou_loss_weight: float = 2.5,
+        viz_pw: float = 1.0,
+        regr_kpts_loss_weight: float = 1.5,
+        vis_kpts_loss_weight: float = 1.0,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
+        **kwargs: Any,
+    ):
+        """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications
+        <https://arxiv.org/pdf/2209.02976.pdf>}. It combines IoU based bbox regression loss and varifocal loss
+        for classification.
+        Code is adapted from U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models}.
+
+        @type n_warmup_epochs: int
+        @param n_warmup_epochs: Number of epochs where ATSS assigner is used, after that we switch to TAL assigner.
+        @type iou_type: Literal["none", "giou", "diou", "ciou", "siou"]
+        @param iou_type: IoU type used for bbox regression loss.
+        @type reduction: Literal["sum", "mean"]
+        @param reduction: Reduction type for loss.
+        @type class_loss_weight: float
+        @param class_loss_weight: Weight of classification loss for bounding boxes.
+        @type regr_kpts_loss_weight: float
+        @param regr_kpts_loss_weight: Weight of regression loss for keypoints.
+        @type vis_kpts_loss_weight: float
+        @param vis_kpts_loss_weight: Weight of visibility loss for keypoints.
+        @type iou_loss_weight: float
+        @param iou_loss_weight: Weight of IoU loss.
+        @type sigmas: list[float] | None
+        @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}.
+        """
+        super().__init__(
+            n_warmup_epochs=n_warmup_epochs,
+            iou_type=iou_type,
+            reduction=reduction,
+            class_loss_weight=class_loss_weight,
+            iou_loss_weight=iou_loss_weight,
+            **kwargs,
+        )
+
+        self.b_cross_entropy = BCEWithLogitsLoss(
+            pos_weight=torch.tensor([viz_pw])
+        )
+        self.sigmas = get_sigmas(
+            sigmas=sigmas,
+            n_keypoints=self.n_keypoints,
+            caller_name=self.name,
+        )
+        self.area_factor = get_with_default(
+            area_factor, "bbox area scaling", self.name, default=0.53
+        )
+        self.regr_kpts_loss_weight = regr_kpts_loss_weight
+        self.vis_kpts_loss_weight = vis_kpts_loss_weight
+
+    def prepare(
+        self, inputs: Packet[Tensor], labels: Labels
+    ) -> tuple[
+        Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor
+    ]:
+        feats = self.get_input_tensors(inputs, "features")
+        pred_scores = self.get_input_tensors(inputs, "class_scores")[0]
+        pred_distri = self.get_input_tensors(inputs, "distributions")[0]
+        pred_kpts = self.get_input_tensors(inputs, "keypoints_raw")[0]
+
+        target_kpts = self.get_label(labels, LabelType.KEYPOINTS)
+        target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX)
+
+        batch_size = pred_scores.shape[0]
+        n_kpts = (target_kpts.shape[1] - 2) // 3
+
+        self._init_parameters(feats)
+
+        pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided)
+        pred_kpts = self.dist2kpts_noscale(
+            self.anchor_points_strided,
+            pred_kpts.view(
+                batch_size,
+                -1,
+                n_kpts,
+                3,
+            ),
+        )
+
+        target_bbox = self._preprocess_bbox_target(target_bbox, batch_size)
+
+        gt_bbox_labels = target_bbox[:, :, :1]
+        gt_xyxy = target_bbox[:, :, 1:]
+        mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float()
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            mask_positive,
+            assigned_gt_idx,
+        ) = self._run_assigner(
+            gt_bbox_labels,
+            gt_xyxy,
+            mask_gt,
+            pred_bboxes,
+            pred_scores,
+        )
+
+        batched_kpts = self._preprocess_kpts_target(
+            target_kpts, batch_size, self.gt_kpts_scale
+        )
+        assigned_gt_idx_expanded = assigned_gt_idx.unsqueeze(-1).unsqueeze(-1)
+        selected_keypoints = batched_kpts.gather(
+            1, assigned_gt_idx_expanded.expand(-1, -1, self.n_keypoints, 3)
+        )
+        xy_components = selected_keypoints[:, :, :, :2]
+        normalized_xy = xy_components / self.stride_tensor.view(1, -1, 1, 1)
+        selected_keypoints = torch.cat(
+            (normalized_xy, selected_keypoints[:, :, :, 2:]), dim=-1
+        )
+        gt_kpt = selected_keypoints[mask_positive]
+        pred_kpts = pred_kpts[mask_positive]
+        assigned_bboxes = assigned_bboxes / self.stride_tensor
+
+        area = (
+            assigned_bboxes[mask_positive][:, 0]
+            - assigned_bboxes[mask_positive][:, 2]
+        ) * (
+            assigned_bboxes[mask_positive][:, 1]
+            - assigned_bboxes[mask_positive][:, 3]
+        )
+
+        return (
+            pred_bboxes,
+            pred_scores,
+            assigned_bboxes,
+            assigned_labels,
+            assigned_scores,
+            mask_positive,
+            gt_kpt,
+            pred_kpts,
+            area * self.area_factor,
+        )
+
+    def forward(
+        self,
+        pred_bboxes: Tensor,
+        pred_scores: Tensor,
+        assigned_bboxes: Tensor,
+        assigned_labels: Tensor,
+        assigned_scores: Tensor,
+        mask_positive: Tensor,
+        gt_kpts: Tensor,
+        pred_kpts: Tensor,
+        area: Tensor,
+    ):
+        device = pred_bboxes.device
+        sigmas = self.sigmas.to(device)
+        d = (gt_kpts[..., 0] - pred_kpts[..., 0]).pow(2) + (
+            gt_kpts[..., 1] - pred_kpts[..., 1]
+        ).pow(2)
+        e = d / ((2 * sigmas).pow(2) * ((area.view(-1, 1) + 1e-9) * 2))
+        mask = (gt_kpts[..., 2] > 0).float()
+        regression_loss = (
+            ((1 - torch.exp(-e)) * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
+        ).mean()
+        visibility_loss = self.b_cross_entropy.forward(pred_kpts[..., 2], mask)
+
+        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[
+            ..., :-1
+        ]
+        loss_cls = self.varifocal_loss(
+            pred_scores, assigned_scores, one_hot_label
+        )
+
+        if assigned_scores.sum() > 1:
+            loss_cls /= assigned_scores.sum()
+
+        loss_iou = compute_iou_loss(
+            pred_bboxes,
+            assigned_bboxes,
+            assigned_scores,
+            mask_positive,
+            reduction="sum",
+            iou_type=self.iou_type,
+            bbox_format="xyxy",
+        )[0]
+
+        loss = (
+            self.class_loss_weight * loss_cls
+            + self.iou_loss_weight * loss_iou
+            + regression_loss * self.regr_kpts_loss_weight
+            + visibility_loss * self.vis_kpts_loss_weight
+        )
+
+        sub_losses = {
+            "class": loss_cls.detach(),
+            "iou": loss_iou.detach(),
+            "regression": regression_loss.detach(),
+            "visibility": visibility_loss.detach(),
+        }
+
+        return loss, sub_losses
+
+    def _preprocess_kpts_target(
+        self, kpts_target: Tensor, batch_size: int, scale_tensor: Tensor
+    ) -> Tensor:
+        """Preprocesses the target keypoints in shape [batch_size, N,
+        n_keypoints, 3] where N is the maximum number of keypoints in
+        one image."""
+
+        _, counts = torch.unique(kpts_target[:, 0].int(), return_counts=True)
+        max_kpts = int(counts.max()) if counts.numel() > 0 else 0
+        batched_keypoints = torch.zeros(
+            (batch_size, max_kpts, self.n_keypoints, 3),
+            device=kpts_target.device,
+        )
+        for i in range(batch_size):
+            keypoints_i = kpts_target[kpts_target[:, 0] == i]
+            scaled_keypoints_i = keypoints_i[:, 2:].clone()
+            batched_keypoints[i, : keypoints_i.shape[0]] = (
+                scaled_keypoints_i.view(-1, self.n_keypoints, 3)
+            )
+            batched_keypoints[i, :, :, :2] *= scale_tensor[:2]
+
+        return batched_keypoints
+
+    def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor:
+        """Adjusts and scales predicted keypoints relative to anchor
+        points without considering image stride."""
+        adj_kpts = kpts.clone()
+        scale = 2.0
+        x_adj = anchor_points[:, [0]] - 0.5
+        y_adj = anchor_points[:, [1]] - 0.5
+
+        adj_kpts[..., :2] *= scale
+        adj_kpts[..., 0] += x_adj
+        adj_kpts[..., 1] += y_adj
+        return adj_kpts
+
+    def _init_parameters(self, features: list[Tensor]):
+        device = features[0].device
+        super()._init_parameters(features)
+        self.gt_kpts_scale = torch.tensor(
+            [
+                self.original_img_size[1],
+                self.original_img_size[0],
+            ],
+            device=device,
+        )
diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
index 7169d2a4..8c9230ae 100644
--- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
+++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
@@ -1,25 +1,20 @@
-from typing import cast
+import logging
+from typing import Any, cast
 
 import torch
-from pydantic import Field
+from luxonis_ml.data import LabelType
 from torch import Tensor
 from torchvision.ops import box_convert
-from typing_extensions import Annotated
 
 from luxonis_train.attached_modules.losses.keypoint_loss import KeypointLoss
 from luxonis_train.nodes import ImplicitKeypointBBoxHead
-from luxonis_train.utils.boxutils import (
+from luxonis_train.utils import (
+    Labels,
+    Packet,
     compute_iou_loss,
     match_to_anchor,
     process_bbox_predictions,
 )
-from luxonis_train.utils.types import (
-    BaseProtocol,
-    IncompatibleException,
-    Labels,
-    LabelType,
-    Packet,
-)
 
 from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
@@ -33,9 +28,13 @@
     list[Tensor],
 ]
 
+logger = logging.getLogger(__name__)
+
 
+# TODO: BROKEN!
 class ImplicitKeypointBBoxLoss(BaseLoss[list[Tensor], KeypointTargetType]):
     node: ImplicitKeypointBBoxHead
+    supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)]
 
     def __init__(
         self,
@@ -45,17 +44,19 @@ def __init__(
         label_smoothing: float = 0.0,
         min_objectness_iou: float = 0.0,
         bbox_loss_weight: float = 0.05,
-        keypoint_distance_loss_weight: float = 0.10,
         keypoint_visibility_loss_weight: float = 0.6,
+        keypoint_regression_loss_weight: float = 0.5,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
         class_loss_weight: float = 0.6,
         objectness_loss_weight: float = 0.7,
         anchor_threshold: float = 4.0,
         bias: float = 0.5,
         balance: list[float] | None = None,
-        **kwargs,
+        **kwargs: Any,
     ):
-        """Joint loss for keypoint and box predictions for cases where the keypoints and
-        boxes are inherently linked.
+        """Joint loss for keypoint and box predictions for cases where
+        the keypoints and boxes are inherently linked.
 
         Based on U{YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object
         Keypoint Similarity Loss<https://arxiv.org/ftp/arxiv/papers/2204/2204.06806.pdf>}.
@@ -72,10 +73,14 @@ def __init__(
         @param min_objectness_iou: Minimum objectness iou. Defaults to C{0.0}.
         @type bbox_loss_weight: float
         @param bbox_loss_weight: Weight for the bounding box loss.
-        @type keypoint_distance_loss_weight: float
-        @param keypoint_distance_loss_weight: Weight for the keypoint distance loss. Defaults to C{0.10}.
         @type keypoint_visibility_loss_weight: float
         @param keypoint_visibility_loss_weight: Weight for the keypoint visibility loss. Defaults to C{0.6}.
+        @type keypoint_regression_loss_weight: float
+        @param keypoint_regression_loss_weight: Weight for the keypoint regression loss. Defaults to C{0.5}.
+        @type sigmas: list[float] | None
+        @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}.
         @type class_loss_weight: float
         @param class_loss_weight: Weight for the class loss. Defaults to C{0.6}.
         @type objectness_loss_weight: float
@@ -88,56 +93,42 @@ def __init__(
         @param balance: Balance for the different heads. Defaults to C{None}.
         """
 
-        super().__init__(
-            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINT],
-            **kwargs,
-        )
+        super().__init__(**kwargs)
 
-        if not isinstance(self.node, ImplicitKeypointBBoxHead):
-            raise IncompatibleException(
-                f"Loss `{self.__class__.__name__}` is only "
-                "compatible with nodes of type `ImplicitKeypointBBoxHead`."
-            )
-        self.n_classes = self.node.n_classes
-        self.n_keypoints = self.node.n_keypoints
         self.n_anchors = self.node.n_anchors
-        self.num_heads = self.node.num_heads
+        self.n_heads = self.node.n_heads
         self.box_offset = self.node.box_offset
         self.anchors = self.node.anchors
         self.balance = balance or [4.0, 1.0, 0.4]
-        if len(self.balance) < self.num_heads:
-            raise ValueError(
-                f"Balance list must have at least {self.num_heads} elements."
+        if len(self.balance) < self.n_heads:
+            logger.warning(
+                f"Balance list must have at least {self.n_heads} elements."
+                "Filling the rest with 1.0."
             )
-
-        class Protocol(BaseProtocol):
-            features: Annotated[list[Tensor], Field(min_length=self.num_heads)]
-
-        self.protocol = Protocol  # type: ignore
+            self.balance += [1.0] * (self.n_heads - len(self.balance))
 
         self.min_objectness_iou = min_objectness_iou
         self.bbox_weight = bbox_loss_weight
-        self.kpt_distance_weight = keypoint_distance_loss_weight
         self.class_weight = class_loss_weight
         self.objectness_weight = objectness_loss_weight
-        self.kpt_visibility_weight = keypoint_visibility_loss_weight
         self.anchor_threshold = anchor_threshold
 
         self.bias = bias
 
         self.b_cross_entropy = BCEWithLogitsLoss(
-            pos_weight=torch.tensor([obj_pw]), **kwargs
+            pos_weight=torch.tensor([obj_pw])
         )
         self.class_loss = SmoothBCEWithLogitsLoss(
             label_smoothing=label_smoothing,
             bce_pow=cls_pw,
-            **kwargs,
         )
         self.keypoint_loss = KeypointLoss(
+            n_keypoints=self.n_keypoints,
             bce_power=viz_pw,
-            distance_weight=keypoint_distance_loss_weight,
-            visibility_weight=keypoint_visibility_loss_weight,
-            **kwargs,
+            sigmas=sigmas,
+            area_factor=area_factor,
+            regression_loss_weight=keypoint_regression_loss_weight,
+            visibility_loss_weight=keypoint_visibility_loss_weight,
         )
 
         self.positive_smooth_const = 1 - 0.5 * label_smoothing
@@ -146,38 +137,46 @@ class Protocol(BaseProtocol):
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[Tensor], KeypointTargetType]:
-        """Prepares the labels to be in the correct format for loss calculation.
+        """Prepares the labels to be in the correct format for loss
+        calculation.
 
         @type outputs: Packet[Tensor]
         @param outputs: Output from the forward pass.
         @type labels: L{Labels}
         @param labels: Dictionary containing the labels.
-        @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor], list[Tensor],
-            list[tuple[Tensor, Tensor, Tensor, Tensor]], list[Tensor]]]
-        @return: Tuple containing the original output and the postprocessed labels. The
-            processed labels are a tuple containing the class targets, box targets,
-            keypoint targets, indices and anchors. Indicies are a tuple containing
-            vectors of indices for batch, anchor, feature y and feature x dimensions,
-            respectively. They are all of shape (n_targets,). The indices are used to
-            index the output tensors of shape (batch_size, n_anchors, feature_height,
-            feature_width, n_classes + box_offset + n_keypoints * 3) to get a tensor of
-            shape (n_targets, n_classes + box_offset + n_keypoints * 3).
+        @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor],
+            list[Tensor], list[tuple[Tensor, Tensor, Tensor, Tensor]],
+            list[Tensor]]]
+        @return: Tuple containing the original output and the
+            postprocessed labels. The processed labels are a tuple
+            containing the class targets, box targets, keypoint targets,
+            indices and anchors. Indicies are a tuple containing vectors
+            of indices for batch, anchor, feature y and feature x
+            dimensions, respectively. They are all of shape
+            (n_targets,). The indices are used to index the output
+            tensors of shape (batch_size, n_anchors, feature_height,
+            feature_width, n_classes + box_offset + n_keypoints * 3) to
+            get a tensor of shape (n_targets, n_classes + box_offset +
+            n_keypoints * 3).
         """
-        predictions = outputs["features"]
+        predictions = self.get_input_tensors(outputs, "features")
 
-        kpts = labels[LabelType.KEYPOINT]
-        boxes = labels[LabelType.BOUNDINGBOX]
+        kpt_label = self.get_label(labels, LabelType.KEYPOINTS)
+        bbox_label = self.get_label(labels, LabelType.BOUNDINGBOX)
 
-        nkpts = (kpts.shape[1] - 2) // 3
-        targets = torch.zeros((len(boxes), nkpts * 2 + self.box_offset + 1))
-        targets[:, :2] = boxes[:, :2]
+        targets = torch.zeros(
+            (kpt_label.shape[0], self.n_keypoints * 3 + self.box_offset + 1)
+        )
+        targets[:, :2] = kpt_label[:, :2]
         targets[:, 2 : self.box_offset + 1] = box_convert(
-            boxes[:, 2:], "xywh", "cxcywh"
+            bbox_label[:, 2:], "xywh", "cxcywh"
         )
-        targets[:, self.box_offset + 1 :: 2] = kpts[:, 2::3]  # insert kp x coordinates
-        targets[:, self.box_offset + 2 :: 2] = kpts[:, 3::3]  # insert kp y coordinates
 
-        n_targets = len(targets)
+        # insert keypoints
+        for i in range(1, 4):
+            targets[:, self.box_offset + i :: 3] = kpt_label[:, i + 1 :: 3]
+
+        n_targets = targets.shape[0]
 
         class_targets: list[Tensor] = []
         box_targets: list[Tensor] = []
@@ -186,24 +185,28 @@ def prepare(
         anchors: list[Tensor] = []
 
         anchor_indices = (
-            torch.arange(self.n_anchors, device=targets.device, dtype=torch.float32)
+            torch.arange(
+                self.n_anchors, device=targets.device, dtype=torch.float32
+            )
             .reshape(self.n_anchors, 1)
             .repeat(1, n_targets)
             .unsqueeze(-1)
         )
-        targets = torch.cat((targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2)
+        targets = torch.cat(
+            (targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2
+        )
 
         xy_deltas = (
             torch.tensor(
-                [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device
+                [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]],
+                device=targets.device,
             ).float()
             * self.bias
         )
 
-        for i in range(self.num_heads):
+        for i in range(self.n_heads):
             anchor = self.anchors[i]
             feature_height, feature_width = predictions[i].shape[2:4]
-
             scaled_targets, xy_shifts = match_to_anchor(
                 targets,
                 anchor,
@@ -259,12 +262,18 @@ def forward(
             "objectness": torch.tensor(0.0, device=device),
             "class": torch.tensor(0.0, device=device),
             "kpt_visibility": torch.tensor(0.0, device=device),
-            "kpt_distance": torch.tensor(0.0, device=device),
+            "kpt_regression": torch.tensor(0.0, device=device),
         }
 
-        for pred, class_target, box_target, kpt_target, index, anchor, balance in zip(
-            predictions, *targets, self.balance
-        ):
+        for (
+            pred,
+            class_target,
+            box_target,
+            kpt_target,
+            index,
+            anchor,
+            balance,
+        ) in zip(predictions, *targets, self.balance):
             obj_targets = torch.zeros_like(pred[..., 0], device=device)
             n_targets = len(class_target)
 
@@ -284,17 +293,15 @@ def forward(
 
                 sub_losses["bboxes"] += bbox_loss * self.bbox_weight
 
+                area = box_target[:, 2] * box_target[:, 3]
+
                 _, kpt_sublosses = self.keypoint_loss.forward(
                     pred_subset[:, self.box_offset + self.n_classes :],
                     kpt_target.to(device),
+                    area.to(device),
                 )
-
-                sub_losses["kpt_distance"] += (
-                    kpt_sublosses["distance"] * self.kpt_distance_weight
-                )
-                sub_losses["kpt_visibility"] += (
-                    kpt_sublosses["visibility"] * self.kpt_visibility_weight
-                )
+                for name, kpt_subloss in kpt_sublosses.items():
+                    sub_losses[name] += kpt_subloss
 
                 obj_targets[index] = (self.min_objectness_iou) + (
                     1 - self.min_objectness_iou
@@ -303,11 +310,10 @@ def forward(
                 if self.n_classes > 1:
                     sub_losses["class"] += (
                         self.class_loss.forward(
-                            [
-                                pred_subset[
-                                    :,
-                                    self.box_offset : self.box_offset + self.n_classes,
-                                ]
+                            pred_subset[
+                                :,
+                                self.box_offset : self.box_offset
+                                + self.n_classes,
                             ],
                             class_target,
                         )
@@ -323,11 +329,19 @@ def forward(
         loss = cast(Tensor, sum(sub_losses.values())).reshape([])
         return loss, {name: loss.detach() for name, loss in sub_losses.items()}
 
-    def _create_keypoint_target(self, scaled_targets: Tensor, box_xy_deltas: Tensor):
+    def _create_keypoint_target(
+        self, scaled_targets: Tensor, box_xy_deltas: Tensor
+    ):
         keypoint_target = scaled_targets[:, self.box_offset + 1 : -1]
         for j in range(self.n_keypoints):
-            low = 2 * j
-            high = 2 * (j + 1)
-            keypoint_mask = keypoint_target[:, low:high] != 0
-            keypoint_target[:, low:high][keypoint_mask] -= box_xy_deltas[keypoint_mask]
+            idx = 3 * j
+            keypoint_coords = keypoint_target[:, idx : idx + 2]
+            visibility = keypoint_target[:, idx + 2]
+
+            keypoint_mask = visibility != 0
+            keypoint_coords[keypoint_mask] -= box_xy_deltas[keypoint_mask]
+
+            keypoint_target[:, idx : idx + 2] = keypoint_coords
+            keypoint_target[:, idx + 2] = visibility
+
         return keypoint_target
diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py
index 4728b045..c17ac7a1 100644
--- a/luxonis_train/attached_modules/losses/keypoint_loss.py
+++ b/luxonis_train/attached_modules/losses/keypoint_loss.py
@@ -1,77 +1,112 @@
-from typing import Annotated
+from typing import Any
 
 import torch
-from pydantic import Field
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
-from luxonis_train.utils.boxutils import process_keypoints_predictions
-from luxonis_train.utils.types import (
-    BaseProtocol,
-    Labels,
-    LabelType,
-    Packet,
+from luxonis_train.utils import (
+    get_sigmas,
+    get_with_default,
+    process_keypoints_predictions,
 )
 
 from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
 
 
-class Protocol(BaseProtocol):
-    keypoints: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
-
-
+# TODO: Make it work on its own
 class KeypointLoss(BaseLoss[Tensor, Tensor]):
+    supported_labels = [LabelType.KEYPOINTS]
+
     def __init__(
         self,
+        n_keypoints: int,
         bce_power: float = 1.0,
-        distance_weight: float = 0.1,
-        visibility_weight: float = 0.6,
-        **kwargs,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
+        regression_loss_weight: float = 1.0,
+        visibility_loss_weight: float = 1.0,
+        **kwargs: Any,
     ):
-        super().__init__(
-            protocol=Protocol, required_labels=[LabelType.KEYPOINT], **kwargs
-        )
+        """Keypoint based loss that is computed from OKS-based
+        regression and visibility loss.
+
+        @type n_keypoints: int
+        @param n_keypoints: Number of keypoints.
+        @type bce_power: float
+        @param bce_power: Power used for BCE visibility loss. Defaults
+            to C{1.0}.
+        @param sigmas: Sigmas used for OKS. If None then use COCO ones
+            if possible or default ones. Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area. If
+            None then use default one. Defaults to C{None}.
+        @type regression_loss_weight: float
+        @param regression_loss_weight: Weight of regression loss.
+            Defaults to C{1.0}.
+        @type visibility_loss_weight: float
+        @param visibility_loss_weight: Weight of visibility loss.
+            Defaults to C{1.0}.
+        """
+
+        super().__init__(**kwargs)
         self.b_cross_entropy = BCEWithLogitsLoss(
             pos_weight=torch.tensor([bce_power]), **kwargs
         )
-        self.distance_weight = distance_weight
-        self.visibility_weight = visibility_weight
-
-    def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Tensor, Tensor]:
-        return torch.cat(inputs["keypoints"], dim=0), labels[LabelType.KEYPOINT]
+        self.sigmas = get_sigmas(sigmas, n_keypoints, caller_name=self.name)
+        self.area_factor = get_with_default(
+            area_factor, "bbox area scaling", self.name, default=0.53
+        )
+        self.regression_loss_weight = regression_loss_weight
+        self.visibility_loss_weight = visibility_loss_weight
 
     def forward(
-        self, prediction: Tensor, target: Tensor
+        self, prediction: Tensor, target: Tensor, area: Tensor
     ) -> tuple[Tensor, dict[str, Tensor]]:
-        """Computes the keypoint loss and visibility loss for a given prediction and
-        target.
+        """Computes the keypoint loss and visibility loss for a given
+        prediction and target.
 
         @type prediction: Tensor
-        @param prediction: Predicted tensor of shape C{[n_detections, n_keypoints * 3]}.
+        @param prediction: Predicted tensor of shape C{[n_detections,
+            n_keypoints * 3]}.
         @type target: Tensor
-        @param target: Target tensor of shape C{[n_detections, n_keypoints * 2]}.
-        @rtype: tuple[Tensor, Tensor]
-        @return: A tuple containing the keypoint loss tensor of shape C{[1,]} and the
-            visibility loss tensor of shape C{[1,]}.
+        @param target: Target tensor of shape C{[n_detections,
+            n_keypoints * 3]}.
+        @type area: Tensor
+        @param area: Area tensor of shape C{[n_detections]}.
+        @rtype: tuple[Tensor, dict[str, Tensor]]
+        @return: A tuple containing the total loss tensor of shape
+            C{[1,]} and a dictionary with the regression loss and
+            visibility loss tensors.
         """
-        x, y, visibility_score = process_keypoints_predictions(prediction)
-        gt_x = target[:, 0::2]
-        gt_y = target[:, 1::2]
+        sigmas = self.sigmas.to(prediction.device)
+
+        pred_x, pred_y, pred_v = process_keypoints_predictions(prediction)
+        target_x = target[:, 0::3]
+        target_y = target[:, 1::3]
+        target_visibility = (target[:, 2::3] > 0).float()
 
-        mask = target[:, 0::2] != 0
         visibility_loss = (
-            self.b_cross_entropy.forward(visibility_score, mask.float())
-            * self.visibility_weight
+            self.b_cross_entropy.forward(pred_v, target_visibility)
+            * self.visibility_loss_weight
         )
-        distance = (x - gt_x) ** 2 + (y - gt_y) ** 2
+        scales = area * self.area_factor
 
-        loss_factor = (torch.sum(mask != 0) + torch.sum(mask == 0)) / (
-            torch.sum(mask != 0) + 1e-9
+        distance = (target_x - pred_x) ** 2 + (target_y - pred_y) ** 2
+        normalized_distance = (
+            distance / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2
         )
-        distance_loss = (
-            loss_factor
-            * (torch.log(distance + 1 + 1e-9) * mask).mean()
-            * self.distance_weight
+
+        regression_loss = 1 - torch.exp(-normalized_distance)
+        regression_loss = (regression_loss * target_visibility).sum(dim=1) / (
+            target_visibility.sum(dim=1) + 1e-9
         )
-        loss = distance_loss + visibility_loss
-        return loss, {"distance": distance_loss, "visibility": visibility_loss}
+        regression_loss = regression_loss.mean()
+        regression_loss *= self.regression_loss_weight
+
+        total_loss = regression_loss + visibility_loss
+
+        return total_loss, {
+            "kpt_regression": regression_loss,
+            "kpt_visibility": visibility_loss,
+        }
diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
new file mode 100644
index 00000000..5d9a622c
--- /dev/null
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -0,0 +1,391 @@
+from typing import Literal, cast
+
+import torch
+import torch.nn.functional as F
+from luxonis_ml.data import LabelType
+from torch import Tensor, nn
+
+from luxonis_train.assigners import RotatedTaskAlignedAssigner
+from luxonis_train.nodes.heads import EfficientOBBoxHead
+from luxonis_train.utils import (
+    IncompatibleException,
+    Labels,
+    Packet,
+    anchors_for_fpn_features,
+    bbox2dist,
+    dist2rbbox,
+    probiou,
+    xywh2xyxy,
+    xyxyxyxy2xywhr,
+)
+from luxonis_train.utils.boundingbox import IoUType
+
+from .base_loss import BaseLoss
+
+
+class OBBDetectionLoss(
+    BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]
+):
+    node: EfficientOBBoxHead
+    supported_labels = [LabelType.OBOUNDINGBOX]
+
+    class NodePacket(Packet[Tensor]):
+        features: list[Tensor]
+        class_scores: Tensor
+        distributions: Tensor
+        angles: Tensor
+
+    def __init__(
+        self,
+        iou_type: IoUType = "giou",
+        reduction: Literal["sum", "mean"] = "mean",
+        class_loss_weight: float = 1.0,
+        iou_loss_weight: float = 2.5,
+        dfl_loss_weight: float = 1.0,
+        reg_max: int = 16,
+        **kwargs,
+    ):
+        """OBBox (oriented bounding box) loss partially adapted from U{YOLOv8:
+        https://github.com/ultralytics/ultralytics/blob/ba438aea5ae4d0e7c28d59ed8408955d16ca71ec/ultralytics/utils/loss.py#L610
+        }. It combines IoU based bbox regression, varifocal, and dfl losses.
+
+        @type iou_type: L{IoUType}
+        @param iou_type: IoU type used for bbox regression loss.
+        @type reduction: Literal["sum", "mean"]
+        @param reduction: Reduction type for loss.
+        @type class_loss_weight: float
+        @param class_loss_weight: Weight of classification loss.
+        @type iou_loss_weight: float
+        @param iou_loss_weight: Weight of IoU loss.
+        @type dfl_loss_weight: float
+        @param dfl_loss_weight: Weight of DFL loss.
+        @type reg_max: int
+        @param reg_max: Number of bins for predicting the distributions of bounding box coordinates.
+        @type kwargs: dict
+        @param kwargs: Additional arguments to pass to L{BaseLoss}.
+        """
+        super().__init__(**kwargs)
+
+        if not isinstance(self.node, EfficientOBBoxHead):
+            raise IncompatibleException(
+                f"Loss `{self.name}` is only "
+                "compatible with nodes of type `EfficientOBBoxHead`."
+            )
+        self.iou_type: IoUType = iou_type
+        self.reduction = reduction
+        self.stride = self.node.stride
+        self.grid_cell_size = self.node.grid_cell_size
+        self.grid_cell_offset = self.node.grid_cell_offset
+        self.original_img_size = self.node.original_in_shape[1:]
+        self.reg_max = reg_max
+
+        self.assigner = RotatedTaskAlignedAssigner(
+            n_classes=self.n_classes, topk=10, alpha=0.5, beta=6.0
+        )
+        # Bounding box loss
+        self.bbox_loss = RotatedBboxLoss(self.reg_max)
+        # Class loss
+        self.varifocal_loss = VarifocalLoss()
+        # self.bce = nn.BCEWithLogitsLoss(reduction="none")
+
+        self.class_loss_weight = class_loss_weight
+        self.iou_loss_weight = iou_loss_weight
+        self.dfl_loss_weight = dfl_loss_weight
+
+        self.anchors = None
+        self.anchor_points = None
+        self.n_anchors_list = None
+        self.stride_tensor = None
+        self.gt_bboxes_scale = None
+
+    def prepare(
+        self, outputs: Packet[Tensor], labels: Labels
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+        feats = self.get_input_tensors(outputs, "features")
+        pred_scores = self.get_input_tensors(outputs, "class_scores")[0]
+        self.pred_distri = self.get_input_tensors(outputs, "distributions")[0]
+        pred_angles = self.get_input_tensors(outputs, "angles")[0]
+        batch_size = pred_scores.shape[0]
+        device = pred_scores.device
+
+        target = self.get_label(labels)[0]
+        if self.gt_bboxes_scale is None:
+            self.gt_bboxes_scale = torch.tensor(
+                [
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                ],
+                device=device,
+            )
+            (
+                self.anchors,
+                self.anchor_points,
+                self.n_anchors_list,
+                self.stride_tensor,
+            ) = anchors_for_fpn_features(
+                feats,
+                self.stride,
+                self.grid_cell_size,
+                self.grid_cell_offset,
+                multiply_with_stride=True,
+            )
+            self.anchor_points_strided = (
+                self.anchor_points / self.stride_tensor
+            )
+
+        target = self._preprocess_target(
+            target, batch_size
+        )  # [cls, x, y, w, h, r] unnormalized
+
+        proj = torch.arange(
+            self.reg_max, dtype=torch.float, device=self.pred_distri.device
+        )
+        b, a, c = self.pred_distri.shape  # batch, anchors, channels
+        pred_distri_tensor = (  # we get a tensor of the expected values (mean) of the regression predictions
+            self.pred_distri.view(b, a, 4, c // 4)
+            .softmax(3)
+            .matmul(proj.type(self.pred_distri.dtype))
+        )
+        pred_bboxes = torch.cat(
+            (
+                dist2rbbox(
+                    pred_distri_tensor, pred_angles, self.anchor_points_strided
+                ),
+                pred_angles,
+            ),
+            dim=-1,
+        )  # xywhr unnormalized
+
+        xy_strided = pred_bboxes[..., :2] * self.stride_tensor
+        pred_bboxes_strided = torch.cat(
+            [xy_strided, pred_bboxes[..., 2:]], dim=-1
+        )  # xywhr unnormalized with xy strided
+
+        gt_cls = target[:, :, :1]
+        gt_cxcywhr = target[:, :, 1:]
+        mask_gt = (gt_cxcywhr.sum(-1, keepdim=True) > 0).float()
+
+        # TODO: log change of assigner (once common Logger)
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            mask_positive,
+            _,
+        ) = self.assigner(
+            pred_scores.detach(),
+            pred_bboxes_strided.detach(),
+            self.anchor_points,
+            gt_cls,
+            gt_cxcywhr,
+            mask_gt,
+        )
+
+        xy_unstrided = assigned_bboxes[..., :2] / self.stride_tensor
+        assigned_bboxes_unstrided = torch.cat(
+            [xy_unstrided, assigned_bboxes[..., 2:]], dim=-1
+        )  # xywhr unnormalized with xy strided
+
+        return (
+            pred_bboxes,
+            pred_scores,
+            assigned_bboxes_unstrided,
+            assigned_labels,
+            assigned_scores,
+            mask_positive,
+        )
+
+    def forward(
+        self,
+        pred_bboxes: Tensor,
+        pred_scores: Tensor,
+        assigned_bboxes: Tensor,
+        assigned_labels: Tensor,
+        assigned_scores: Tensor,
+        mask_positive: Tensor,
+    ):
+        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[
+            ..., :-1
+        ]
+
+        # CLS loss
+        loss_cls = self.varifocal_loss(
+            pred_scores, assigned_scores, one_hot_label
+        )
+        # loss_cls = self.bce(pred_scores, assigned_scores)
+        if assigned_scores.sum() > 1:
+            loss_cls /= assigned_scores.sum()
+
+        assigned_scores_sum = max(assigned_scores.sum(), 1)
+        # Bbox loss
+        self.bbox_loss = self.bbox_loss.to(self.pred_distri.device)
+        loss_iou, loss_dfl = self.bbox_loss(
+            self.pred_distri,
+            pred_bboxes,
+            self.anchor_points,
+            assigned_bboxes,
+            assigned_scores,
+            assigned_scores_sum,
+            mask_positive,
+        )
+
+        loss = (
+            self.class_loss_weight * loss_cls
+            + self.iou_loss_weight * loss_iou
+            + self.dfl_loss_weight * loss_dfl
+        )
+
+        sub_losses = {
+            "class": loss_cls.detach(),
+            "iou": loss_iou.detach(),
+            "dfl": loss_dfl.detach(),
+        }
+
+        return loss, sub_losses
+
+    def _preprocess_target(self, target: Tensor, batch_size: int):
+        """Preprocess target in shape [batch_size, N, 6] where N is
+        maximum number of instances in one image."""
+        idx_cls = target[:, :2]
+        xyxyxyxy = target[:, 2:]
+        cxcywhr = xyxyxyxy2xywhr(xyxyxyxy)
+        if isinstance(cxcywhr, Tensor):
+            target = torch.cat([idx_cls, cxcywhr.clone().detach()], dim=-1)
+        else:
+            target = torch.cat([idx_cls, torch.tensor(cxcywhr)], dim=-1)
+        sample_ids, counts = cast(
+            tuple[Tensor, Tensor],
+            torch.unique(target[:, 0].int(), return_counts=True),
+        )
+        c_max = int(counts.max()) if counts.numel() > 0 else 0
+        out_target = torch.zeros(batch_size, c_max, 6, device=target.device)
+        out_target[:, :, 0] = -1
+        for id, count in zip(sample_ids, counts):
+            out_target[id, :count] = target[target[:, 0] == id][:, 1:]
+
+        scaled_target = out_target[:, :, 1:5] * self.gt_bboxes_scale
+        scaled_target_angle = torch.cat(
+            # [scaled_target, out_target[:, :, 5].transpose(0, 1).unsqueeze(0)], dim=-1
+            [scaled_target, out_target[:, :, 5].unsqueeze(-1)],
+            dim=-1,
+        )
+        out_target[..., 1:] = scaled_target_angle
+        return out_target
+
+
+class VarifocalLoss(nn.Module):
+    def __init__(self, alpha: float = 0.75, gamma: float = 2.0):
+        """Varifocal Loss is a loss function for training a dense object detector to predict
+        the IoU-aware classification score, inspired by focal loss.
+        Code is adapted from: U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models/losses.py}
+
+        @type alpha: float
+        @param alpha: alpha parameter in focal loss, default is 0.75.
+        @type gamma: float
+        @param gamma: gamma parameter in focal loss, default is 2.0.
+        """
+
+        super().__init__()
+
+        self.alpha = alpha
+        self.gamma = gamma
+
+    def forward(
+        self, pred_score: Tensor, target_score: Tensor, label: Tensor
+    ) -> Tensor:
+        weight = (
+            self.alpha * pred_score.pow(self.gamma) * (1 - label)
+            + target_score * label
+        )
+        ce_loss = F.binary_cross_entropy(
+            pred_score.float(), target_score.float(), reduction="none"
+        )
+        loss = (ce_loss * weight).sum()
+        return loss
+
+
+class DFLoss(nn.Module):
+    """Criterion class for computing DFL losses during training.
+
+    @type reg_max: int
+    @param reg_max: Number of bins for predicting the distributions of
+        bounding box coordinates.
+    """
+
+    def __init__(self, reg_max=16) -> None:
+        """Initialize the DFL module."""
+        super().__init__()
+        self.reg_max = reg_max
+
+    def __call__(self, pred_dist, target):
+        """Return sum of left and right DFL losses.
+
+        Distribution Focal Loss (DFL) proposed in Generalized Focal Loss
+        https://ieeexplore.ieee.org/document/9792391
+        """
+        target = target.clamp_(0, self.reg_max - 1 - 0.01)
+        tl = target.long()  # target left
+        # tl = target  # target left
+        tr = tl + 1  # target right
+        wl = tr - target  # weight left
+        wr = 1 - wl  # weight right
+        return (
+            F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(
+                tl.shape
+            )
+            * wl
+            + F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(
+                tl.shape
+            )
+            * wr
+        ).mean(-1, keepdim=True)
+
+
+class RotatedBboxLoss(nn.Module):
+    def __init__(self, reg_max):
+        """Criterion class for computing training losses during
+        training.
+
+        @type reg_max: int
+        @param reg_max: Number of bins for predicting the distributions
+            of bounding box coordinates.
+        """
+        super().__init__()
+        self.dfl_loss = DFLoss(reg_max) if reg_max > 1 else None
+
+    def forward(
+        self,
+        pred_dist,
+        pred_bboxes,
+        anchor_points,
+        target_bboxes,
+        target_scores,
+        target_scores_sum,
+        fg_mask,
+    ):
+        """IoU loss."""
+        weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
+        iou = probiou(pred_bboxes[fg_mask], target_bboxes[fg_mask])
+        loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum
+
+        # DFL loss
+        if self.dfl_loss:
+            target_ltrb = bbox2dist(
+                xywh2xyxy(target_bboxes[..., :4]),
+                anchor_points,
+                self.dfl_loss.reg_max - 1,
+            )
+            loss_dfl = (
+                self.dfl_loss(
+                    pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max),
+                    target_ltrb[fg_mask],
+                )
+                * weight
+            )
+            loss_dfl = loss_dfl.sum() / target_scores_sum
+        else:
+            loss_dfl = torch.tensor(0.0).to(pred_dist.device)
+
+        return loss_iou, loss_dfl
diff --git a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py
index 31e16051..884d4863 100644
--- a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py
+++ b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py
@@ -1,5 +1,6 @@
-from typing import Literal
+from typing import Any, Literal
 
+from luxonis_ml.data import LabelType
 from torch import Tensor
 from torchvision.ops import sigmoid_focal_loss
 
@@ -7,12 +8,14 @@
 
 
 class SigmoidFocalLoss(BaseLoss[Tensor, Tensor]):
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         alpha: float = 0.25,
         gamma: float = 2.0,
         reduction: Literal["none", "mean", "sum"] = "mean",
-        **kwargs,
+        **kwargs: Any,
     ):
         """Focal loss from U{Focal Loss for Dense Object Detection
         <https://arxiv.org/abs/1708.02002>}.
@@ -34,7 +37,11 @@ def __init__(
 
     def forward(self, preds: Tensor, target: Tensor) -> Tensor:
         loss = sigmoid_focal_loss(
-            preds, target, alpha=self.alpha, gamma=self.gamma, reduction=self.reduction
+            preds,
+            target,
+            alpha=self.alpha,
+            gamma=self.gamma,
+            reduction=self.reduction,
         )
 
         return loss
diff --git a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
index 48f827d6..edc2bf98 100644
--- a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
+++ b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
@@ -1,6 +1,7 @@
-from typing import Literal
+from typing import Any, Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
 from .base_loss import BaseLoss
@@ -8,39 +9,41 @@
 
 
 class SmoothBCEWithLogitsLoss(BaseLoss[list[Tensor], Tensor]):
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         label_smoothing: float = 0.0,
         bce_pow: float = 1.0,
         weight: list[float] | None = None,
         reduction: Literal["mean", "sum", "none"] = "mean",
-        **kwargs,
+        **kwargs: Any,
     ):
         """BCE with logits loss and label smoothing.
 
         @type label_smoothing: float
-        @param label_smoothing: Label smoothing factor. Defaults to C{0.0}.
+        @param label_smoothing: Label smoothing factor. Defaults to
+            C{0.0}.
         @type bce_pow: float
         @param bce_pow: Weight for positive samples. Defaults to C{1.0}.
         @type weight: list[float] | None
-        @param weight: a manual rescaling weight given to the loss of each batch
-            element. If given, it has to be a list of length C{nbatch}.
+        @param weight: a manual rescaling weight given to the loss of
+            each batch element. If given, it has to be a list of length
+            C{nbatch}.
         @type reduction: Literal["mean", "sum", "none"]
-        @param reduction: Specifies the reduction to apply to the output: C{'none'} |
-            C{'mean'} | C{'sum'}. C{'none'}: no reduction will be applied, C{'mean'}:
-            the sum of the output will be divided by the number of elements in the
-            output, C{'sum'}: the output will be summed. Note: C{size_average} and
-            C{reduce} are in the process of being deprecated, and in the meantime,
-            specifying either of those two args will override C{reduction}. Defaults to
-            C{'mean'}.
-        @type kwargs: dict
-        @param kwargs: Additional arguments to pass to L{BaseLoss}.
+        @param reduction: Specifies the reduction to apply to the
+            output: C{'none'} | C{'mean'} | C{'sum'}. C{'none'}: no
+            reduction will be applied, C{'mean'}: the sum of the output
+            will be divided by the number of elements in the output,
+            C{'sum'}: the output will be summed. Note: C{size_average}
+            and C{reduce} are in the process of being deprecated, and in
+            the meantime, specifying either of those two args will
+            override C{reduction}. Defaults to C{'mean'}.
         """
         super().__init__(**kwargs)
-        self.negative_smooth_const = 1.0 - 0.5 * label_smoothing
-        self.positive_smooth_const = 0.5 * label_smoothing
+        self.positive_smooth_const = 1.0 - label_smoothing
+        self.negative_smooth_const = label_smoothing
         self.criterion = BCEWithLogitsLoss(
-            node=self.node,
             pos_weight=torch.tensor(
                 [bce_pow],
             ),
@@ -48,22 +51,26 @@ def __init__(
             reduction=reduction,
         )
 
-    def forward(self, predictions: list[Tensor], target: Tensor) -> Tensor:
+    def forward(self, predictions: Tensor, target: Tensor) -> Tensor:
         """Computes the BCE loss with label smoothing.
 
-        @type predictions: list[Tensor]
-        @param predictions: List of tensors of shape (N, n_classes), containing the
-            predicted class scores.
+        @type predictions: Tensor
+        @param predictions: Network predictions of shape (N, C, ...)
         @type target: Tensor
-        @param target: A tensor of shape (N,), containing the ground-truth class labels
+        @param target: A tensor of the same shape as predictions.
         @rtype: Tensor
         @return: A scalar tensor.
         """
-        prediction = predictions[0]
-        smoothed_target = torch.full_like(
-            prediction,
-            self.negative_smooth_const,
-            device=prediction.device,
-        )
-        smoothed_target[torch.arange(len(target)), target] = self.positive_smooth_const
-        return self.criterion.forward(prediction, smoothed_target)
+        if predictions.shape != target.shape:
+            raise RuntimeError(
+                f"Target tensor dimension ({target.shape}) and predictions tensor "
+                f"dimension ({predictions.shape}) should be the same."
+            )
+
+        if self.negative_smooth_const != 0.0:
+            target = (
+                target * self.positive_smooth_const
+                + (1 - target) * self.negative_smooth_const
+            )
+
+        return self.criterion(predictions, target)
diff --git a/luxonis_train/attached_modules/losses/softmax_focal_loss.py b/luxonis_train/attached_modules/losses/softmax_focal_loss.py
index 57b288f3..43c844f3 100644
--- a/luxonis_train/attached_modules/losses/softmax_focal_loss.py
+++ b/luxonis_train/attached_modules/losses/softmax_focal_loss.py
@@ -1,27 +1,34 @@
-# TODO: document
-
-from typing import Literal
+import logging
+from typing import Any, Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
 from luxonis_train.attached_modules.losses import BaseLoss
 
 from .cross_entropy import CrossEntropyLoss
 
+logger = logging.getLogger(__name__)
+
 
+# TODO: Add support for multi-class tasks
 class SoftmaxFocalLoss(BaseLoss[Tensor, Tensor]):
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
+
     def __init__(
         self,
-        alpha: float | list[float] = 0.25,
+        alpha: float = 0.25,
         gamma: float = 2.0,
         reduction: Literal["none", "mean", "sum"] = "mean",
-        **kwargs,
+        **kwargs: Any,
     ):
-        """Focal loss implementation for multi-class/multi-label tasks using Softmax.
+        """Focal loss implementation for binary classification and
+        segmentation tasks using Softmax.
 
-        @type alpha: float | list[float]
-        @param alpha: Weighting factor for the rare class. Defaults to C{0.25}.
+        @type alpha: float
+        @param alpha: Weighting factor for the rare class. Defaults to
+            C{0.25}.
         @type gamma: float
         @param gamma: Focusing parameter. Defaults to C{2.0}.
         @type reduction: Literal["none", "mean", "sum"]
@@ -37,13 +44,7 @@ def __init__(
     def forward(self, predictions: Tensor, target: Tensor) -> Tensor:
         ce_loss = self.ce_criterion.forward(predictions, target)
         pt = torch.exp(-ce_loss)
-        loss = ce_loss * ((1 - pt) ** self.gamma)
-
-        if isinstance(self.alpha, float) and self.alpha >= 0:
-            loss = self.alpha * loss
-        elif isinstance(self.alpha, list):
-            alpha_t = torch.tensor(self.alpha)[target]
-            loss = alpha_t * loss
+        loss = ce_loss * ((1 - pt) ** self.gamma) * self.alpha
 
         if self.reduction == "mean":
             loss = loss.mean()
diff --git a/luxonis_train/attached_modules/metrics/README.md b/luxonis_train/attached_modules/metrics/README.md
index 4e452158..17735540 100644
--- a/luxonis_train/attached_modules/metrics/README.md
+++ b/luxonis_train/attached_modules/metrics/README.md
@@ -42,3 +42,5 @@ boxes.
 ## MeanAveragePrecisionKeypoints
 
 Similar to [MeanAveragePrecision](#meanaverageprecision), but uses [OKS](#objectkeypointsimilarity) as `IoU` measure.
+For a deeper understanding of how OKS works, please refer to the detailed explanation provided [here](https://learnopencv.com/object-keypoint-similarity/).
+Evaluation leverages  COCO evaluation framework (COCOeval) to assess mAP performance.
diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py
index 9e73e4ac..bef3b62a 100644
--- a/luxonis_train/attached_modules/metrics/__init__.py
+++ b/luxonis_train/attached_modules/metrics/__init__.py
@@ -1,8 +1,9 @@
 from .base_metric import BaseMetric
-from .common import Accuracy, F1Score, JaccardIndex, Precision, Recall
 from .mean_average_precision import MeanAveragePrecision
 from .mean_average_precision_keypoints import MeanAveragePrecisionKeypoints
+from .mean_average_precision_obb import MeanAveragePrecisionOBB
 from .object_keypoint_similarity import ObjectKeypointSimilarity
+from .torchmetrics import Accuracy, F1Score, JaccardIndex, Precision, Recall
 
 __all__ = [
     "Accuracy",
@@ -10,6 +11,7 @@
     "JaccardIndex",
     "BaseMetric",
     "MeanAveragePrecision",
+    "MeanAveragePrecisionOBB",
     "MeanAveragePrecisionKeypoints",
     "ObjectKeypointSimilarity",
     "Precision",
diff --git a/luxonis_train/attached_modules/metrics/base_metric.py b/luxonis_train/attached_modules/metrics/base_metric.py
index f2334163..a4109d2d 100644
--- a/luxonis_train/attached_modules/metrics/base_metric.py
+++ b/luxonis_train/attached_modules/metrics/base_metric.py
@@ -5,8 +5,8 @@
 from typing_extensions import TypeVarTuple, Unpack
 
 from luxonis_train.attached_modules import BaseAttachedModule
+from luxonis_train.utils import Labels, Packet
 from luxonis_train.utils.registry import METRICS
-from luxonis_train.utils.types import Labels, Packet
 
 Ts = TypeVarTuple("Ts")
 
@@ -19,8 +19,9 @@ class BaseMetric(
 ):
     """A base class for all metrics.
 
-    This class defines the basic interface for all metrics. It utilizes automatic
-    registration of defined subclasses to a L{METRICS} registry.
+    This class defines the basic interface for all metrics. It utilizes
+    automatic registration of defined subclasses to a L{METRICS}
+    registry.
     """
 
     @abstractmethod
@@ -33,7 +34,9 @@ def update(self, *args: Unpack[Ts]) -> None:
         ...
 
     @abstractmethod
-    def compute(self) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]:
+    def compute(
+        self,
+    ) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]:
         """Computes the metric.
 
         @rtype: Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]
@@ -48,13 +51,14 @@ def compute(self) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tenso
     def run_update(self, outputs: Packet[Tensor], labels: Labels) -> None:
         """Calls the metric's update method.
 
-        Validates and prepares the inputs, then calls the metric's update method.
+        Validates and prepares the inputs, then calls the metric's
+        update method.
 
         @type outputs: Packet[Tensor]
         @param outputs: The outputs of the model.
         @type labels: Labels
-        @param labels: The labels of the model. @raises L{IncompatibleException}: If the
-            inputs are not compatible with the module.
+        @param labels: The labels of the model. @raises
+            L{IncompatibleException}: If the inputs are not compatible
+            with the module.
         """
-        self.validate(outputs, labels)
         self.update(*self.prepare(outputs, labels))
diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py
deleted file mode 100644
index 27d1069a..00000000
--- a/luxonis_train/attached_modules/metrics/common.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import logging
-
-import torchmetrics
-
-from .base_metric import BaseMetric
-
-logger = logging.getLogger(__name__)
-
-
-class TorchMetricWrapper(BaseMetric):
-    def __init__(self, **kwargs):
-        super().__init__(
-            node=kwargs.pop("node", None),
-            protocol=kwargs.pop("protocol", None),
-            required_labels=kwargs.pop("required_labels", None),
-        )
-        task = kwargs.get("task")
-
-        if task is None:
-            if self.node.n_classes > 1:
-                task = "multiclass"
-            else:
-                task = "binary"
-            logger.warning(
-                f"Task type not specified for {self.__class__.__name__}, "
-                f"assuming {task}."
-            )
-            kwargs["task"] = task
-        self.task = task
-
-        if self.task == "multiclass":
-            if "num_classes" not in kwargs:
-                if self.node is None:
-                    raise ValueError(
-                        "Either `node` or `num_classes` must be provided to "
-                        "multiclass torchmetrics."
-                    )
-                kwargs["num_classes"] = self.node.n_classes
-        elif self.task == "multilabel":
-            if "num_labels" not in kwargs:
-                if self.node is None:
-                    raise ValueError(
-                        "Either `node` or `num_labels` must be provided to "
-                        "multilabel torchmetrics."
-                    )
-                kwargs["num_labels"] = self.node.n_classes
-
-        self.metric = self.Metric(**kwargs)
-
-    def update(self, preds, target, *args, **kwargs):
-        if self.task in ["multiclass"]:
-            target = target.argmax(dim=1)
-        self.metric.update(preds, target, *args, **kwargs)
-
-    def compute(self):
-        return self.metric.compute()
-
-
-class Accuracy(TorchMetricWrapper):
-    Metric = torchmetrics.Accuracy
-
-
-class F1Score(TorchMetricWrapper):
-    Metric = torchmetrics.F1Score
-
-
-class JaccardIndex(TorchMetricWrapper):
-    Metric = torchmetrics.JaccardIndex
-
-
-class Precision(TorchMetricWrapper):
-    Metric = torchmetrics.Precision
-
-
-class Recall(TorchMetricWrapper):
-    Metric = torchmetrics.Recall
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index 34adbcd9..6d51f55b 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -1,31 +1,30 @@
+from typing import Any
+
 import torchmetrics.detection as detection
+from luxonis_ml.data import LabelType
 from torch import Tensor
 from torchvision.ops import box_convert
 
-from luxonis_train.utils.types import (
-    BBoxProtocol,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils import Labels, Packet
 
 from .base_metric import BaseMetric
 
 
-class MeanAveragePrecision(BaseMetric, detection.MeanAveragePrecision):
-    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object
-    detection predictions.
+class MeanAveragePrecision(
+    BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]]
+):
+    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall
+    (mAR) for object detection predictions.
 
-    Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR)
+    Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall
+    (mAR)
     <https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html>}.
     """
 
-    def __init__(self, **kwargs):
-        super().__init__(
-            protocol=BBoxProtocol,
-            required_labels=[LabelType.BOUNDINGBOX],
-            **kwargs,
-        )
+    supported_labels = [LabelType.BOUNDINGBOX]
+
+    def __init__(self, **kwargs: Any):
+        super().__init__(**kwargs)
         self.metric = detection.MeanAveragePrecision()
 
     def update(
@@ -36,12 +35,12 @@ def update(
         self.metric.update(outputs, labels)
 
     def prepare(
-        self, outputs: Packet[Tensor], labels: Labels
+        self, inputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        label = labels[LabelType.BOUNDINGBOX]
-        output_nms = outputs["boxes"]
+        box_label = self.get_label(labels)
+        output_nms = self.get_input_tensors(inputs)
 
-        image_size = self.node.original_in_shape[2:]
+        image_size = self.original_in_shape[1:]
 
         output_list: list[dict[str, Tensor]] = []
         label_list: list[dict[str, Tensor]] = []
@@ -54,20 +53,35 @@ def prepare(
                 }
             )
 
-            curr_label = label[label[:, 0] == i]
+            curr_label = box_label[box_label[:, 0] == i]
             curr_bboxs = box_convert(curr_label[:, 2:], "xywh", "xyxy")
             curr_bboxs[:, 0::2] *= image_size[1]
             curr_bboxs[:, 1::2] *= image_size[0]
-            label_list.append({"boxes": curr_bboxs, "labels": curr_label[:, 1].int()})
+            label_list.append(
+                {"boxes": curr_bboxs, "labels": curr_label[:, 1].int()}
+            )
 
         return output_list, label_list
 
+    def reset(self) -> None:
+        self.metric.reset()
+
     def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
-        metric_dict = self.metric.compute()
+        metric_dict: dict[str, Tensor] = self.metric.compute()
 
         del metric_dict["classes"]
         del metric_dict["map_per_class"]
         del metric_dict["mar_100_per_class"]
+        for key in list(metric_dict.keys()):
+            if "map" in key:
+                map = metric_dict[key]
+                mar_key = key.replace("map", "mar")
+                if mar_key in metric_dict:
+                    mar = metric_dict[mar_key]
+                    metric_dict[key.replace("map", "f1")] = (
+                        2 * (map * mar) / (map + mar)
+                    )
+
         map = metric_dict.pop("map")
 
         return map, metric_dict
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
index 3740f58e..3b34c242 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
@@ -3,32 +3,27 @@
 from typing import Any, Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
 from torch import Tensor
 from torchvision.ops import box_convert
 
-from luxonis_train.utils.types import (
-    BBoxProtocol,
-    KeypointProtocol,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils import Labels, Packet, get_sigmas, get_with_default
 
 from .base_metric import BaseMetric
 
 
-class Protocol(KeypointProtocol, BBoxProtocol):
-    ...
-
-
-class MeanAveragePrecisionKeypoints(BaseMetric):
+class MeanAveragePrecisionKeypoints(
+    BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]]
+):
     """Mean Average Precision metric for keypoints.
 
     Uses C{OKS} as IoU measure.
     """
 
+    supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)]
+
     is_differentiable: bool = False
     higher_is_better: bool = True
     full_state_update: bool = True
@@ -46,37 +41,39 @@ class MeanAveragePrecisionKeypoints(BaseMetric):
 
     def __init__(
         self,
-        kpt_sigmas: Tensor | None = None,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
+        max_dets: int = 20,
         box_format: Literal["xyxy", "xywh", "cxcywh"] = "xyxy",
         **kwargs,
     ):
-        """Implementation of the mean average precision metric for keypoint detections.
+        """Implementation of the mean average precision metric for
+        keypoint detections.
 
         Adapted from: U{https://github.com/Lightning-AI/torchmetrics/blob/v1.0.1/src/
         torchmetrics/detection/mean_ap.py}.
 
-        @license: Apache-2.0 License
+        @license: Apache License, Version 2.0
 
-        @type num_keypoints: int
-        @param num_keypoints: Number of keypoints.
-        @type kpt_sigmas: Tensor or None
-        @param kpt_sigmas: Sigma for each keypoint to weigh its importance, if None use same weights for all.
+        @type sigmas: list[float] | None
+        @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then
+            use COCO if possible otherwise defaults. Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area. If None then use default one. Defaults to C{None}.
+        @type max_dets: int,
+        @param max_dets: Maximum number of detections to be considered per image. Defaults to C{20}.
         @type box_format: Literal["xyxy", "xywh", "cxcywh"]
         @param box_format: Input bbox format.
-        @type kwargs: Any
-        @param kwargs: Additional arguments to pass to L{BaseMetric}.
         """
-        super().__init__(
-            protocol=Protocol,
-            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINT],
-            **kwargs,
-        )
+        super().__init__(**kwargs)
 
-        self.n_keypoints = self.node.n_keypoints
-
-        if kpt_sigmas is not None and len(kpt_sigmas) != self.n_keypoints:
-            raise ValueError("Expected kpt_sigmas to be of shape (num_keypoints).")
-        self.kpt_sigmas = kpt_sigmas or torch.ones(self.n_keypoints)
+        self.sigmas = get_sigmas(
+            sigmas, self.n_keypoints, caller_name=self.name
+        )
+        self.area_factor = get_with_default(
+            area_factor, "bbox area scaling", self.name, default=0.53
+        )
+        self.max_dets = max_dets
 
         allowed_box_formats = ("xyxy", "xywh", "cxcywh")
         if box_format not in allowed_box_formats:
@@ -94,11 +91,17 @@ def __init__(
         self.add_state("groundtruth_labels", default=[], dist_reduce_fx=None)
         self.add_state("groundtruth_area", default=[], dist_reduce_fx=None)
         self.add_state("groundtruth_crowds", default=[], dist_reduce_fx=None)
-        self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None)
+        self.add_state(
+            "groundtruth_keypoints", default=[], dist_reduce_fx=None
+        )
+
+    def prepare(
+        self, inputs: Packet[Tensor], labels: Labels
+    ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
+        assert self.node.tasks is not None
+        kpts = self.get_label(labels, LabelType.KEYPOINTS)
+        boxes = self.get_label(labels, LabelType.BOUNDINGBOX)
 
-    def prepare(self, outputs: Packet[Tensor], labels: Labels):
-        kpts = labels[LabelType.KEYPOINT]
-        boxes = labels[LabelType.BOUNDINGBOX]
         nkpts = (kpts.shape[1] - 2) // 3
         label = torch.zeros((len(boxes), nkpts * 3 + 6))
         label[:, :2] = boxes[:, :2]
@@ -107,19 +110,21 @@ def prepare(self, outputs: Packet[Tensor], labels: Labels):
         label[:, 7::3] = kpts[:, 3::3]  # y
         label[:, 8::3] = kpts[:, 4::3]  # visiblity
 
-        output_list_kpt_map = []
-        label_list_kpt_map = []
-        image_size = self.node.original_in_shape[2:]
+        output_list_kpt_map: list[dict[str, Tensor]] = []
+        label_list_kpt_map: list[dict[str, Tensor]] = []
+        image_size = self.original_in_shape[1:]
 
-        output_kpts: list[Tensor] = outputs["keypoints"]
-        output_bboxes: list[Tensor] = outputs["boxes"]
+        output_kpts = self.get_input_tensors(inputs, LabelType.KEYPOINTS)
+        output_bboxes = self.get_input_tensors(inputs, LabelType.BOUNDINGBOX)
         for i in range(len(output_kpts)):
             output_list_kpt_map.append(
                 {
                     "boxes": output_bboxes[i][:, :4],
                     "scores": output_bboxes[i][:, 4],
                     "labels": output_bboxes[i][:, 5].int(),
-                    "keypoints": output_kpts[i].reshape(-1, self.n_keypoints * 3),
+                    "keypoints": output_kpts[i].reshape(
+                        -1, self.n_keypoints * 3
+                    ),
                 }
             )
 
@@ -214,7 +219,7 @@ def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
         coco_preds.dataset = self._get_coco_format(
             self.pred_boxes,
             self.pred_keypoints,
-            self.groundtruth_labels,
+            self.pred_labels,
             scores=self.pred_scores,
         )  # type: ignore
 
@@ -222,8 +227,11 @@ def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
             coco_target.createIndex()
             coco_preds.createIndex()
 
-            self.coco_eval = COCOeval(coco_target, coco_preds, iouType="keypoints")
-            self.coco_eval.params.kpt_oks_sigmas = self.kpt_sigmas.cpu().numpy()
+            self.coco_eval = COCOeval(
+                coco_target, coco_preds, iouType="keypoints"
+            )
+            self.coco_eval.params.kpt_oks_sigmas = self.sigmas.cpu().numpy()
+            self.coco_eval.params.maxDets = [self.max_dets]
 
             self.coco_eval.evaluate()
             self.coco_eval.accumulate()
@@ -252,20 +260,24 @@ def _get_coco_format(
         crowds: list[Tensor] | None = None,
         area: list[Tensor] | None = None,
     ) -> dict[str, list[dict[str, Any]]]:
-        """Transforms and returns all cached targets or predictions in COCO format.
+        """Transforms and returns all cached targets or predictions in
+        COCO format.
 
-        Format is defined at U{https://cocodataset.org/#format-data}.
+        Format is defined at U{
+        https://cocodataset.org/#format-data}.
         """
-        images = []
-        annotations = []
-        annotation_id = 1  # has to start with 1, otherwise COCOEval results are wrong
+        images: list[dict[str, int]] = []
+        annotations: list[dict[str, Any]] = []
+        annotation_id = (
+            1  # has to start with 1, otherwise COCOEval results are wrong
+        )
 
         for image_id, (image_boxes, image_kpts, image_labels) in enumerate(
             zip(boxes, keypoints, labels)
         ):
-            image_boxes_list = image_boxes.cpu().tolist()
-            image_kpts_list = image_kpts.cpu().tolist()
-            image_labels_list = image_labels.cpu().tolist()
+            image_boxes_list: list[list[float]] = image_boxes.cpu().tolist()
+            image_kpts_list: list[list[float]] = image_kpts.cpu().tolist()
+            image_labels_list: list[int] = image_labels.cpu().tolist()
 
             images.append({"id": image_id})
 
@@ -293,23 +305,34 @@ def _get_coco_format(
                 if area is not None and area[image_id][k].cpu().item() > 0:
                     area_stat = area[image_id][k].cpu().tolist()
                 else:
-                    area_stat = image_box[2] * image_box[3]
-
+                    area_stat = image_box[2] * image_box[3] * self.area_factor
+
+                n_keypoints = len(
+                    [
+                        i
+                        for i in range(2, len(image_kpt), 3)
+                        if image_kpt[i] != 0
+                    ]
+                )  # number of annotated keypoints
                 annotation = {
                     "id": annotation_id,
                     "image_id": image_id,
                     "bbox": image_box,
                     "area": area_stat,
                     "category_id": image_label,
-                    "iscrowd": crowds[image_id][k].cpu().tolist()
-                    if crowds is not None
-                    else 0,
+                    "iscrowd": (
+                        crowds[image_id][k].cpu().tolist()
+                        if crowds is not None
+                        else 0
+                    ),
                     "keypoints": image_kpt,
-                    "num_keypoints": self.n_keypoints,
+                    "num_keypoints": n_keypoints,
                 }
 
                 if scores is not None:
                     score = scores[image_id][k].cpu().tolist()
+                    # `tolist` returns a number for scalar tensors,
+                    # the name is misleading
                     if not isinstance(score, float):
                         raise ValueError(
                             f"Invalid input score of sample {image_id}, element {k}"
@@ -320,9 +343,15 @@ def _get_coco_format(
                 annotation_id += 1
 
         classes = [{"id": i, "name": str(i)} for i in self._get_classes()]
-        return {"images": images, "annotations": annotations, "categories": classes}
+        return {
+            "images": images,
+            "annotations": annotations,
+            "categories": classes,
+        }
 
-    def _get_safe_item_values(self, item: dict[str, Tensor]) -> tuple[Tensor, Tensor]:
+    def _get_safe_item_values(
+        self, item: dict[str, Tensor]
+    ) -> tuple[Tensor, Tensor]:
         """Convert and return the boxes."""
         boxes = self._fix_empty_tensors(item["boxes"])
         if boxes.numel() > 0:
@@ -331,7 +360,8 @@ def _get_safe_item_values(self, item: dict[str, Tensor]) -> tuple[Tensor, Tensor
         return boxes, keypoints
 
     def _get_classes(self) -> list[int]:
-        """Return a list of unique classes found in ground truth and detection data."""
+        """Return a list of unique classes found in ground truth and
+        detection data."""
         if len(self.pred_labels) > 0 or len(self.groundtruth_labels) > 0:
             return (
                 torch.cat(self.pred_labels + self.groundtruth_labels)
@@ -343,7 +373,8 @@ def _get_classes(self) -> list[int]:
 
     @staticmethod
     def _fix_empty_tensors(input_tensor: Tensor) -> Tensor:
-        """Empty tensors can cause problems in DDP mode, this methods corrects them."""
+        """Empty tensors can cause problems in DDP mode, this methods
+        corrects them."""
         if input_tensor.numel() == 0 and input_tensor.ndim == 1:
             return input_tensor.unsqueeze(0)
         return input_tensor
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
new file mode 100644
index 00000000..3421d765
--- /dev/null
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
@@ -0,0 +1,462 @@
+import numpy as np
+import torch
+from luxonis_ml.data import LabelType
+from torch import Tensor
+
+from luxonis_train.utils import Labels, Packet, batch_probiou, xyxyxyxy2xywhr
+
+from .base_metric import BaseMetric
+
+
+class MeanAveragePrecisionOBB(BaseMetric):
+    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall
+    (mAR) for object detection predictions using oriented bounding
+    boxes.
+
+    Partially adapted from U{YOLOv8 OBBMetrics
+    <https://github.com/ultralytics/ultralytics/blob/ba438aea5ae4d0e7c28d59ed8408955d16ca71ec/ultralytics/utils/metrics.py#L1223>}.
+    """
+
+    supported_labels = [LabelType.OBOUNDINGBOX]
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        self.p = []  # precision for each class. Shape: (nc,)
+        self.r = []  # recall for each class. Shape: (nc,)
+        self.f1 = []  # F1 score for each class. Shape: (nc,)
+        self.all_ap = []  # AP scores for all classes and all IoU thresholds. Shape: (nc, 10)
+        self.ap_class_index = []  # index of class for each AP score. Shape: (nc,)
+        self.nc = 0  # number of classes
+
+        self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[])
+
+        self.iouv = torch.linspace(
+            0.5, 0.95, 10
+        )  # IoU thresholds from 0.50 to 0.95 in spaces of 0.05 mAP@0.5:0.95
+
+    def update(
+        self,
+        outputs: list[Tensor],  # preds
+        labels: list[Tensor],  # batch
+    ):
+        """Update metrics without erasing stats from the previous batch,
+        i.e. the metrics are calculated cumulatively.
+
+        @type outputs: list[Tensor]
+        @param outputs: Network predictions [x1, y1, x2, y2, conf,
+            cls_idx, r] unnormalized (not in [0, 1] range)
+            [Tensor(n_bboxes, 7)]
+        @type labels: list[Tensor]
+        @param labels: [cls_idx, x1, y1, x2, y2, r] unnormalized (not in
+            [0, 1] range) [Tensor(n_bboxes, 6)]
+        """
+        for si, output in enumerate(outputs):
+            self.stats["conf"].append(output[:, 4])
+            self.stats["pred_cls"].append(output[:, 5])
+            self.stats["target_cls"].append(labels[si][:, 0])
+            gt_cls = labels[si][:, :1]  # cls_idx
+            gt_bboxes = labels[si][:, 1:]  # [x1, y1, x2, y2, r]
+            self.stats["tp"].append(
+                self._process_batch(
+                    detections=output, gt_bboxes=gt_bboxes, gt_cls=gt_cls
+                )
+            )
+
+        results = self._process(
+            torch.cat(self.stats["tp"]).cpu().numpy(),
+            torch.cat(self.stats["conf"]).cpu().numpy(),
+            torch.cat(self.stats["pred_cls"]).cpu().numpy(),
+            torch.cat(self.stats["target_cls"]).cpu().numpy(),
+        )
+
+        self._update_metrics(results)
+
+    def prepare(
+        self, outputs: Packet[Tensor], labels: Labels
+    ) -> tuple[list[Tensor], list[Tensor]]:
+        # outputs_nms: [x, y, w, h, r, conf, cls_idx] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)]
+        # obb_labels: [img_id, cls_idx, x1, y1, x2, y2, x3, y3, x4, y4] normalized (in [0, 1] range) [Tensor(n_bboxes, 10)]
+        obb_labels = self.get_label(labels)[0]
+        output_nms = self.get_input_tensors(outputs)
+        pred_scores = self.get_input_tensors(outputs, "class_scores")[
+            0
+        ]  # needed for batch size
+
+        batch_size = pred_scores.shape[0]
+        img_size = self.node.original_in_shape[1:]
+
+        output_labels = []
+        for i in range(len(output_nms)):
+            output_nms[i][..., [0, 1, 2, 3, 4, 5, 6]] = output_nms[i][
+                ..., [0, 1, 2, 3, 5, 6, 4]
+            ]  # move angle to the end
+
+            curr_label = obb_labels[obb_labels[:, 0] == i]
+            output_labels.append(
+                self._preprocess_target(curr_label, batch_size, img_size)
+            )
+
+        return output_nms, output_labels
+
+    def _preprocess_target(
+        self, target: Tensor, batch_size: int, img_size
+    ) -> Tensor:
+        """Preprocess target in shape [batch_size, N, 6] where N is
+        maximum number of instances in one image."""
+        cls_idx = target[:, 1].unsqueeze(-1)
+        xyxyxyxy = target[:, 2:]
+        xyxyxyxy[:, 0::2] *= img_size[1]  # scale x
+        xyxyxyxy[:, 1::2] *= img_size[0]  # scale y
+        xcycwhr = xyxyxyxy2xywhr(xyxyxyxy)
+        if isinstance(xcycwhr, np.ndarray):
+            xcycwhr = torch.tensor(xcycwhr)
+        out_target = torch.cat([cls_idx, xcycwhr], dim=-1)
+        return out_target
+
+    def reset(self) -> None:
+        self.p = []
+        self.r = []
+        self.f1 = []
+        self.all_ap = []
+        self.ap_class_index = []
+
+    def compute(
+        self,
+    ) -> tuple[Tensor, dict[str, Tensor]]:
+        """Process predicted results for object detection and update
+        metrics."""
+        results = self._process(
+            torch.cat(self.stats["tp"]).cpu().numpy(),
+            torch.cat(self.stats["conf"]).cpu().numpy(),
+            torch.cat(self.stats["pred_cls"]).cpu().numpy(),
+            torch.cat(self.stats["target_cls"]).cpu().numpy(),
+        )
+
+        metrics = {
+            "p": torch.tensor(np.mean(results[0])),
+            "r": torch.tensor(np.mean(results[1])),
+            "f1": torch.tensor(np.mean(results[2])),
+            "all_ap": torch.tensor(np.mean(results[3])),
+            "ap_class_index": torch.tensor(np.mean(results[4])),
+        }
+
+        map = torch.tensor(MeanAveragePrecisionOBB.map(results[5]))  # all_ap
+
+        return map, metrics
+
+    def _process_batch(
+        self, detections: Tensor, gt_bboxes: Tensor, gt_cls: Tensor
+    ) -> Tensor:
+        """Perform computation of the correct prediction matrix for a
+        batch of # "fp": torch.from_numpy(results[1]), detections and
+        ground truth bounding boxes.
+
+        Example:
+
+            >>> detections = torch.rand(100, 7)  # 100 sample detections
+            >>> gt_bboxes = torch.rand(50, 5)  # 50 sample ground truth boxes
+            >>> gt_cls = torch.randint(0, 5, (50,))  # 50 ground truth class labels
+            >>> correct_matrix = OBBValidator._process_batch(detections, gt_bboxes, gt_cls)
+
+        @type detections: Tensor
+        @param detections: A tensor of shape (N, 7) representing the detected bounding boxes and associated
+            data. Each detection is represented as (x1, y1, x2, y2, conf, class, angle).
+        @type gt_bboxes: Tensor
+        @param gt_bboxes: A tensor of shape (M, 5) representing the ground truth bounding boxes. Each box is
+            represented as (x1, y1, x2, y2, angle).
+        @type gt_cls: Tensor
+        @param gt_cls: A tensor of shape (M,) representing class labels for the ground truth bounding boxes.
+        @rtype: Tensor
+        @return: The correct prediction matrix with shape (N, 10), which includes 10 IoU (Intersection over
+            Union) levels for each detection, indicating the accuracy of predictions compared to the ground truth.
+
+        @note: This method relies on C{batch_probiou} to calculate IoU between detections and ground truth bounding boxes.
+        """
+        iou = batch_probiou(
+            gt_bboxes,
+            torch.cat([detections[:, :4], detections[:, -1:]], dim=-1),
+        )
+        return self.match_predictions(detections[:, 5], gt_cls, iou)
+
+    def match_predictions(
+        self,
+        pred_classes: Tensor,
+        true_classes: Tensor,
+        iou: Tensor,
+        use_scipy: bool = False,
+    ) -> Tensor:
+        """Matches predictions to ground truth objects (pred_classes,
+        true_classes) using IoU.
+
+        @type pred_classes: Tensor
+        @param pred_classes: Predicted class indices of shape(N,).
+        @type true_classes: Tensor
+        @param true_classes: Target class indices of shape(M,).
+        @type iou: Tensor
+        @param iou: An NxM tensor containing the pairwise IoU values for
+            predictions and ground of truth
+        @type use_scipy: bool
+        @param use_scipy: Whether to use scipy for matching (more
+            precise).
+        @rtype: Tensor
+        @return: Correct tensor of shape(N,10) for 10 IoU thresholds.
+        """
+        # Dx10 matrix, where D - detections, 10 - IoU thresholds
+        correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(
+            bool
+        )
+        # LxD matrix where L - labels (rows), D - detections (columns)
+        correct_class = true_classes[:, None] == pred_classes
+        iou = iou * correct_class  # zero out the wrong classes
+        iou = iou.cpu().numpy()
+        for i, threshold in enumerate(self.iouv.cpu().tolist()):
+            if use_scipy:
+                # WARNING: known issue that reduces mAP in https://github.com/ultralytics/ultralytics/pull/4708
+                import scipy  # scope import to avoid importing for all commands
+
+                cost_matrix = iou * (iou >= threshold)
+                if cost_matrix.any():
+                    labels_idx, detections_idx = (
+                        scipy.optimize.linear_sum_assignment(
+                            cost_matrix, maximize=True
+                        )
+                    )
+                    valid = cost_matrix[labels_idx, detections_idx] > 0
+                    if valid.any():
+                        correct[detections_idx[valid], i] = True
+            else:
+                matches = np.nonzero(
+                    iou >= threshold
+                )  # IoU > threshold and classes match
+                matches = np.array(matches).T
+                if matches.shape[0]:
+                    if matches.shape[0] > 1:
+                        matches = matches[
+                            iou[matches[:, 0], matches[:, 1]].argsort()[::-1]
+                        ]
+                        matches = matches[
+                            np.unique(matches[:, 1], return_index=True)[1]
+                        ]
+                        # matches = matches[matches[:, 2].argsort()[::-1]]
+                        matches = matches[
+                            np.unique(matches[:, 0], return_index=True)[1]
+                        ]
+                    correct[matches[:, 1].astype(int), i] = True
+        return torch.tensor(
+            correct, dtype=torch.bool, device=pred_classes.device
+        )
+
+    def _update_metrics(self, results: tuple[np.ndarray, ...]):
+        """Updates the evaluation metrics of the model with a new set of
+        results.
+
+        @type results: tuple[np.ndarray, ...]
+        @param results: A tuple containing the following evaluation metrics:
+            - p (list): Precision for each class. Shape: (nc,).
+            - r (list): Recall for each class. Shape: (nc,).
+            - f1 (list): F1 score for each class. Shape: (nc,).
+            - all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
+            - ap_class_index (list): Index of class for each AP score. Shape: (nc,).
+
+        @note: Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`,
+            and `self.ap_class_index` based on the values provided in the `results` tuple.
+        """
+        # The following logic impies averaging AP over all classes
+        self.p = torch.tensor(np.mean(results[0]))
+        self.r = torch.tensor(np.mean(results[1]))
+        self.f1 = torch.tensor(np.mean(results[2]))
+        self.all_ap = torch.tensor(np.mean(results[3]))
+        self.ap_class_index = torch.tensor(np.mean(results[4]))
+
+    def _process(
+        self,
+        tp: np.ndarray,
+        conf: np.ndarray,
+        pred_cls: np.ndarray,
+        target_cls: np.ndarray,
+    ) -> tuple[np.ndarray, ...]:
+        """Process predicted results for object detection and update
+        metrics."""
+        results = MeanAveragePrecisionOBB.ap_per_class(
+            tp,
+            conf,
+            pred_cls,
+            target_cls,
+        )[2:]
+        return results
+
+    @staticmethod
+    def ap_per_class(
+        tp: np.ndarray,
+        conf: np.ndarray,
+        pred_cls: np.ndarray,
+        target_cls: np.ndarray,
+        eps: float = 1e-16,
+    ) -> tuple[np.ndarray, ...]:
+        """Compute the average precision per class for object detection
+        evaluation.
+
+        @type tp: np.ndarray
+        @param tp: Binary array indicating whether the detection is correct (True) or not (False).
+        @type conf: np.ndarray
+        @param conf: Array of confidence scores of the detections.
+        @type pred_cls: np.ndarray
+        @param pred_cls: Array of predicted classes of the detections.
+        @type target_cls: np.ndarray
+        @param target_cls: Array of true classes of the detections.
+        @type eps: float
+        @param eps: A small value to avoid division by zero. Defaults to 1e-16.
+
+        @rtype: tuple[np.ndarray, ...]
+        @return: A tuple of six arrays and one array of unique classes, where:
+            - tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
+            - fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
+            - p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
+            - r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
+            - f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
+            - ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
+            - unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
+            - p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
+            - r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
+            - f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
+            - x (np.ndarray): X-axis values for the curves. Shape: (1000,).
+            - prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
+        """
+        # Sort by objectness
+        i = np.argsort(-conf)
+        tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+        # Find unique classes
+        unique_classes, nt = np.unique(target_cls, return_counts=True)
+        nc = unique_classes.shape[0]  # number of classes, number of detections
+
+        # Create Precision-Recall curve and compute AP for each class
+        x, prec_values = np.linspace(0, 1, 1000), []
+
+        # Average precision, precision and recall curves
+        ap, p_curve, r_curve = (
+            np.zeros((nc, tp.shape[1])),
+            np.zeros((nc, 1000)),
+            np.zeros((nc, 1000)),
+        )
+        for ci, c in enumerate(unique_classes):
+            i = pred_cls == c
+            n_l = nt[ci]  # number of labels
+            n_p = i.sum()  # number of predictions
+            if n_p == 0 or n_l == 0:
+                continue
+
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum(0)
+            tpc = tp[i].cumsum(0)
+
+            # Recall
+            recall = tpc / (n_l + eps)  # recall curve
+            r_curve[ci] = np.interp(
+                -x, -conf[i], recall[:, 0], left=0
+            )  # negative x, xp because xp decreases
+
+            # Precision
+            precision = tpc / (tpc + fpc)  # precision curve
+            p_curve[ci] = np.interp(
+                -x, -conf[i], precision[:, 0], left=1
+            )  # p at pr_score
+
+            # AP from recall-precision curve
+            for j in range(tp.shape[1]):
+                ap[ci, j], mpre, mrec = MeanAveragePrecisionOBB.compute_ap(
+                    recall[:, j], precision[:, j]
+                )
+
+        prec_values = np.array(prec_values)  # (nc, 1000)
+
+        # Compute F1 (harmonic mean of precision and recall)
+        f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
+
+        i = MeanAveragePrecisionOBB.smooth(
+            f1_curve.mean(0), 0.1
+        ).argmax()  # max F1 index
+        p, r, f1 = (
+            p_curve[:, i],
+            r_curve[:, i],
+            f1_curve[:, i],
+        )  # max-F1 precision, recall, F1 values
+        tp = (r * nt).round()  # true positives
+        fp = (tp / (p + eps) - tp).round()  # false positives
+        return (
+            tp,
+            fp,
+            p,
+            r,
+            f1,
+            ap,
+            unique_classes.astype(int),
+            p_curve,
+            r_curve,
+            f1_curve,
+            x,
+            prec_values,
+        )
+
+    @staticmethod
+    def compute_ap(
+        recall: list[float], precision: list[float]
+    ) -> tuple[float, np.ndarray, np.ndarray]:
+        """Compute average precision (AP) given recall and precision
+        curves.
+
+        @type recall: list[float]
+        @param recall: The recall curve.
+        @type precision: list
+        @param precision: The precision curve.
+
+        @rtype: tuple[float, np.ndarray, np.ndarray]
+        @return: A tuple containing:
+            - (float): Average precision.
+            - (np.ndarray): Precision envelope curve.
+            - (np.ndarray): Modified recall curve with sentinel values added at the beginning and end.
+        """
+        # Append sentinel values to beginning and end
+        mrec = np.concatenate(([0.0], recall, [1.0]))
+        mpre = np.concatenate(([1.0], precision, [0.0]))
+
+        # Compute the precision envelope
+        mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+
+        # Integrate area under curve
+        method = "interp"  # methods: 'continuous', 'interp'
+        if method == "interp":
+            x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
+            ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
+        else:  # 'continuous'
+            i = np.where(mrec[1:] != mrec[:-1])[
+                0
+            ]  # points where x-axis (recall) changes
+            ap = np.sum(
+                (mrec[i + 1] - mrec[i]) * mpre[i + 1]
+            )  # area under curve
+
+        return ap, mpre, mrec
+
+    @staticmethod
+    def smooth(y: np.ndarray, f: float = 0.05) -> np.ndarray:
+        """Box filter of fraction f."""
+        nf = (
+            round(len(y) * f * 2) // 2 + 1
+        )  # number of filter elements (must be odd)
+        p = np.ones(nf // 2)  # ones padding
+        yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
+        return np.convolve(yp, np.ones(nf) / nf, mode="valid")  # y-smoothed
+
+    @staticmethod
+    def map(all_ap: np.ndarray) -> float:
+        """Return mean Average Precision (mAP) over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
+
+        @type all_ap: np.ndarray
+        @param all_ap: Average Precission for all classes.
+
+        @rtype: float
+        @return: mAP over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
+        """
+        return all_ap.mean() if len(all_ap) else 0.0
diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
index c5e4a19b..503a00ad 100644
--- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
+++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
@@ -1,33 +1,22 @@
+import logging
+from typing import Any
+
 import torch
+from luxonis_ml.data import LabelType
 from scipy.optimize import linear_sum_assignment
 from torch import Tensor
 from torchvision.ops import box_convert
 
-from luxonis_train.utils.types import (
-    KeypointProtocol,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils import Labels, Packet, get_sigmas, get_with_default
 
 from .base_metric import BaseMetric
 
+logger = logging.getLogger(__name__)
+
 
 class ObjectKeypointSimilarity(
     BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]]
 ):
-    """Object Keypoint Similarity metric for evaluating keypoint predictions.
-
-    @type n_keypoints: int
-    @param n_keypoints: Number of keypoints.
-    @type kpt_sigmas: Tensor
-    @param kpt_sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then
-        use same weights for all.
-    @type use_cocoeval_oks: bool
-    @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use the
-        one from definition.
-    """
-
     is_differentiable: bool = False
     higher_is_better: bool = True
     full_state_update: bool = True
@@ -38,39 +27,54 @@ class ObjectKeypointSimilarity(
     groundtruth_keypoints: list[Tensor]
     groundtruth_scales: list[Tensor]
 
+    supported_labels = [LabelType.KEYPOINTS]
+
     def __init__(
         self,
         n_keypoints: int | None = None,
-        kpt_sigmas: Tensor | None = None,
-        use_cocoeval_oks: bool = False,
-        **kwargs,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
+        use_cocoeval_oks: bool = True,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(
-            required_labels=[LabelType.KEYPOINT], protocol=KeypointProtocol, **kwargs
-        )
+        """Object Keypoint Similarity metric for evaluating keypoint
+        predictions.
+
+        @type sigmas: list[float] | None
+        @param sigmas: Sigma for each keypoint to weigh its importance,
+            if C{None}, then use COCO if possible otherwise defaults.
+            Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area. If
+            None then use default one. Defaults to C{None}.
+        @type use_cocoeval_oks: bool
+        @param use_cocoeval_oks: Whether to use same OKS formula as in
+            COCOeval or use the one from definition. Defaults to
+            C{True}.
+        """
+        super().__init__(**kwargs)
 
-        if n_keypoints is None and self.node is None:
-            raise ValueError(
-                f"Either `n_keypoints` or `node` must be provided "
-                f"to {self.__class__.__name__}."
-            )
-        self.n_keypoints = n_keypoints or self.node.n_keypoints
-        if kpt_sigmas is not None and len(kpt_sigmas) != self.n_keypoints:
-            raise ValueError("Expected kpt_sigmas to be of shape (num_keypoints).")
-        self.kpt_sigmas = kpt_sigmas or torch.ones(self.n_keypoints) / self.n_keypoints
+        self.sigmas = get_sigmas(
+            sigmas, self.n_keypoints, caller_name=self.name
+        )
+        self.area_factor = get_with_default(
+            area_factor, "bbox area scaling", self.name, default=0.53
+        )
         self.use_cocoeval_oks = use_cocoeval_oks
 
         self.add_state("pred_keypoints", default=[], dist_reduce_fx=None)
-        self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None)
+        self.add_state(
+            "groundtruth_keypoints", default=[], dist_reduce_fx=None
+        )
         self.add_state("groundtruth_scales", default=[], dist_reduce_fx=None)
 
     def prepare(
-        self, outputs: Packet[Tensor], labels: Labels
+        self, inputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        kpts_labels = labels[LabelType.KEYPOINT]
-        bbox_labels = labels[LabelType.BOUNDINGBOX]
-        num_keypoints = (kpts_labels.shape[1] - 2) // 3
-        label = torch.zeros((len(bbox_labels), num_keypoints * 3 + 6))
+        kpts_labels = self.get_label(labels, LabelType.KEYPOINTS)
+        bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX)
+        n_keypoints = (kpts_labels.shape[1] - 2) // 3
+        label = torch.zeros((len(bbox_labels), n_keypoints * 3 + 6))
         label[:, :2] = bbox_labels[:, :2]
         label[:, 2:6] = box_convert(bbox_labels[:, 2:], "xywh", "xyxy")
         label[:, 6::3] = kpts_labels[:, 2::3]  # insert kp x coordinates
@@ -79,9 +83,11 @@ def prepare(
 
         output_list_oks = []
         label_list_oks = []
-        image_size = self.node.original_in_shape[2:]
+        image_size = self.original_in_shape[1:]
 
-        for i, pred_kpt in enumerate(outputs["keypoints"]):
+        for i, pred_kpt in enumerate(
+            self.get_input_tensors(inputs, LabelType.KEYPOINTS)
+        ):
             output_list_oks.append({"keypoints": pred_kpt})
 
             curr_label = label[label[:, 0] == i].to(pred_kpt.device)
@@ -93,8 +99,12 @@ def prepare(
             curr_kpts[:, 1::3] *= image_size[0]
             curr_bboxs_widths = curr_bboxs[:, 2] - curr_bboxs[:, 0]
             curr_bboxs_heights = curr_bboxs[:, 3] - curr_bboxs[:, 1]
-            curr_scales = torch.sqrt(curr_bboxs_widths * curr_bboxs_heights)
-            label_list_oks.append({"keypoints": curr_kpts, "scales": curr_scales})
+            curr_scales = (
+                curr_bboxs_widths * curr_bboxs_heights * self.area_factor
+            )
+            label_list_oks.append(
+                {"keypoints": curr_kpts, "scales": curr_scales}
+            )
 
         return output_list_oks, label_list_oks
 
@@ -125,79 +135,100 @@ def update(
                   width and height are unnormalized.
         """
         for item in preds:
-            keypoints = fix_empty_tensors(item["keypoints"])
+            keypoints = self._fix_empty_tensors(item["keypoints"])
             self.pred_keypoints.append(keypoints)
 
         for item in target:
-            keypoints = fix_empty_tensors(item["keypoints"])
+            keypoints = self._fix_empty_tensors(item["keypoints"])
             self.groundtruth_keypoints.append(keypoints)
             self.groundtruth_scales.append(item["scales"])
 
     def compute(self) -> Tensor:
         """Computes the OKS metric based on the inner state."""
 
-        self.kpt_sigmas = self.kpt_sigmas.to(self.device)
+        self.sigmas = self.sigmas.to(self.device)
         image_mean_oks = torch.zeros(len(self.groundtruth_keypoints))
         for i, (pred_kpts, gt_kpts, gt_scales) in enumerate(
             zip(
-                self.pred_keypoints, self.groundtruth_keypoints, self.groundtruth_scales
+                self.pred_keypoints,
+                self.groundtruth_keypoints,
+                self.groundtruth_scales,
             )
         ):
-            gt_kpts = torch.reshape(gt_kpts, (-1, self.n_keypoints, 3))  # [N, K, 3]
-
-            image_ious = self._compute_oks(pred_kpts, gt_kpts, gt_scales)  # [M, N]
+            gt_kpts = torch.reshape(
+                gt_kpts, (-1, self.n_keypoints, 3)
+            )  # [N, K, 3]
+
+            image_ious = compute_oks(
+                pred_kpts,
+                gt_kpts,
+                gt_scales,
+                self.sigmas,
+                self.use_cocoeval_oks,
+            )  # [M, N]
             gt_indices, pred_indices = linear_sum_assignment(
                 image_ious.cpu().numpy(), maximize=True
             )
-            matched_ious = [image_ious[n, m] for n, m in zip(gt_indices, pred_indices)]
+            matched_ious = [
+                image_ious[n, m] for n, m in zip(gt_indices, pred_indices)
+            ]
             image_mean_oks[i] = torch.tensor(matched_ious).mean()
 
         final_oks = image_mean_oks.nanmean()
 
         return final_oks
 
-    def _compute_oks(self, pred: Tensor, gt: Tensor, scales: Tensor) -> Tensor:
-        """Compute Object Keypoint Similarity between every GT and prediction.
-
-        @type pred: Tensor[N, K, 3]
-        @param pred: Predicted keypoints.
-        @type gt: Tensor[M, K, 3]
-        @param gt: Groundtruth keypoints.
-        @type scales: Tensor[M]
-        @param scales: Scales of the bounding boxes.
-        @rtype: Tensor
-        @return: Object Keypoint Similarity every pred and gt [M, N]
-        """
-        eps = 1e-7
-        distances = (gt[:, None, :, 0] - pred[..., 0]) ** 2 + (
-            gt[:, None, :, 1] - pred[..., 1]
-        ) ** 2
-        kpt_mask = gt[..., 2] != 0  # only compute on visible keypoints
-        if self.use_cocoeval_oks:
-            # use same formula as in COCOEval script here:
-            # https://github.com/cocodataset/cocoapi/blob/8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9/PythonAPI/pycocotools/cocoeval.py#L229
-            oks = (
-                distances
-                / (2 * self.kpt_sigmas) ** 2
-                / (scales[:, None, None] + eps)
-                / 2
-            )
-        else:
-            # use same formula as defined here: https://cocodataset.org/#keypoints-eval
-            oks = (
-                distances
-                / ((scales[:, None, None] + eps) * self.kpt_sigmas.to(scales.device))
-                ** 2
-                / 2
-            )
-
-        return (torch.exp(-oks) * kpt_mask[:, None]).sum(-1) / (
-            kpt_mask.sum(-1)[:, None] + eps
+    @staticmethod
+    def _fix_empty_tensors(input_tensor: Tensor) -> Tensor:
+        """Empty tensors can cause problems in DDP mode, this methods
+        corrects them."""
+        if input_tensor.numel() == 0 and input_tensor.ndim == 1:
+            return input_tensor.unsqueeze(0)
+        return input_tensor
+
+
+def compute_oks(
+    pred: Tensor,
+    gt: Tensor,
+    scales: Tensor,
+    sigmas: Tensor,
+    use_cocoeval_oks: bool,
+) -> Tensor:
+    """Compute Object Keypoint Similarity between every GT and
+    prediction.
+
+    @type pred: Tensor[N, K, 3]
+    @param pred: Predicted keypoints.
+    @type gt: Tensor[M, K, 3]
+    @param gt: Groundtruth keypoints.
+    @type scales: Tensor[M]
+    @param scales: Scales of the bounding boxes.
+    @type sigmas: Tensor
+    @param sigmas: Sigma for each keypoint to weigh its importance, if
+        C{None}, then use same weights for all.
+    @type use_cocoeval_oks: bool
+    @param use_cocoeval_oks: Whether to use same OKS formula as in
+        COCOeval or use the one from definition.
+    @rtype: Tensor
+    @return: Object Keypoint Similarity every pred and gt [M, N]
+    """
+    eps = 1e-7
+    distances = (gt[:, None, :, 0] - pred[..., 0]) ** 2 + (
+        gt[:, None, :, 1] - pred[..., 1]
+    ) ** 2
+    kpt_mask = gt[..., 2] != 0  # only compute on visible keypoints
+    if use_cocoeval_oks:
+        # use same formula as in COCOEval script here:
+        # https://github.com/cocodataset/cocoapi/blob/8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9/PythonAPI/pycocotools/cocoeval.py#L229
+        oks = distances / (2 * sigmas) ** 2 / (scales[:, None, None] + eps) / 2
+    else:
+        # use same formula as defined here: https://cocodataset.org/#keypoints-eval
+        oks = (
+            distances
+            / ((scales[:, None, None] + eps) * sigmas.to(scales.device)) ** 2
+            / 2
         )
 
-
-def fix_empty_tensors(input_tensor: Tensor) -> Tensor:
-    """Empty tensors can cause problems in DDP mode, this methods corrects them."""
-    if input_tensor.numel() == 0 and input_tensor.ndim == 1:
-        return input_tensor.unsqueeze(0)
-    return input_tensor
+    return (torch.exp(-oks) * kpt_mask[:, None]).sum(-1) / (
+        kpt_mask.sum(-1)[:, None] + eps
+    )
diff --git a/luxonis_train/attached_modules/metrics/torchmetrics.py b/luxonis_train/attached_modules/metrics/torchmetrics.py
new file mode 100644
index 00000000..a8797a13
--- /dev/null
+++ b/luxonis_train/attached_modules/metrics/torchmetrics.py
@@ -0,0 +1,114 @@
+import logging
+from contextlib import suppress
+from typing import Any
+
+import torchmetrics
+from luxonis_ml.data import LabelType
+from torch import Tensor
+
+from .base_metric import BaseMetric
+
+logger = logging.getLogger(__name__)
+
+
+class TorchMetricWrapper(BaseMetric[Tensor]):
+    Metric: type[torchmetrics.Metric]
+
+    def __init__(self, **kwargs: Any):
+        super().__init__(node=kwargs.pop("node", None))
+        task = kwargs.get("task")
+        if task is None:
+            if "num_classes" in kwargs:
+                if kwargs["num_classes"] == 1:
+                    task = "binary"
+                else:
+                    task = "multiclass"
+            elif "num_labels" in kwargs:
+                task = "multilabel"
+            else:
+                with suppress(RuntimeError, ValueError):
+                    if self.n_classes == 1:
+                        task = "binary"
+                    else:
+                        task = "multiclass"
+
+        if task is None:
+            raise ValueError(
+                f"'{self.name}' does not have the 'task' parameter set. "
+                "and it is not possible to infer it from the other arguments. "
+                "You can either set the 'task' parameter explicitly, provide either 'num_classes' or 'num_labels' argument, "
+                "or use this metric with a node. "
+                "The 'task' can be one of 'binary', 'multiclass', or 'multilabel'. "
+            )
+        self._task = task
+        kwargs["task"] = task
+
+        n_classes: int | None = kwargs.get(
+            "num_classes", kwargs.get("num_labels")
+        )
+
+        if n_classes is None:
+            with suppress(RuntimeError, ValueError):
+                n_classes = self.n_classes
+
+        if n_classes is None and task != "binary":
+            arg_name = "num_classes" if task == "multiclass" else "num_labels"
+            raise ValueError(
+                f"'{self.name}' metric does not have the '{arg_name}' parameter set "
+                "and it is not possible to infer it from the other arguments. "
+                "You can either set the '{arg_name}' parameter explicitly, or use this metric with a node."
+            )
+
+        if task == "binary" and n_classes is not None and n_classes > 1:
+            raise ValueError(
+                f"Task type set to '{task}', but the dataset has more than 1 class. "
+                f"Set the `task` argument of '{self.name}' to either 'multiclass' or 'multilabel'."
+            )
+        elif task != "binary" and n_classes == 1:
+            raise ValueError(
+                f"Task type set to '{task}', but the dataset has only 1 class. "
+                f"Set the `task` argument of '{self.name}' to 'binary'."
+            )
+
+        if task == "multiclass":
+            kwargs["num_classes"] = n_classes
+        elif task == "multilabel":
+            kwargs["num_labels"] = n_classes
+
+        self.metric = self.Metric(**kwargs)
+
+    def update(self, preds: Tensor, target: Tensor) -> None:
+        if self._task in ["multiclass"]:
+            target = target.argmax(dim=1)
+        self.metric.update(preds, target)
+
+    def compute(self) -> Tensor:
+        return self.metric.compute()
+
+    def reset(self) -> None:
+        self.metric.reset()
+
+
+class Accuracy(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.Accuracy
+
+
+class F1Score(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.F1Score
+
+
+class JaccardIndex(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.JaccardIndex
+
+
+class Precision(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.Precision
+
+
+class Recall(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.Recall
diff --git a/luxonis_train/attached_modules/visualizers/__init__.py b/luxonis_train/attached_modules/visualizers/__init__.py
index a5652cb4..5f29e744 100644
--- a/luxonis_train/attached_modules/visualizers/__init__.py
+++ b/luxonis_train/attached_modules/visualizers/__init__.py
@@ -3,6 +3,7 @@
 from .classification_visualizer import ClassificationVisualizer
 from .keypoint_visualizer import KeypointVisualizer
 from .multi_visualizer import MultiVisualizer
+from .obbox_visualizer import OBBoxVisualizer
 from .segmentation_visualizer import SegmentationVisualizer
 from .utils import (
     combine_visualizations,
@@ -18,6 +19,7 @@
 
 __all__ = [
     "BBoxVisualizer",
+    "OBBoxVisualizer",
     "BaseVisualizer",
     "ClassificationVisualizer",
     "KeypointVisualizer",
diff --git a/luxonis_train/attached_modules/visualizers/base_visualizer.py b/luxonis_train/attached_modules/visualizers/base_visualizer.py
index 050c9f4a..817a09d5 100644
--- a/luxonis_train/attached_modules/visualizers/base_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/base_visualizer.py
@@ -4,8 +4,8 @@
 from typing_extensions import TypeVarTuple, Unpack
 
 from luxonis_train.attached_modules import BaseAttachedModule
+from luxonis_train.utils import Labels, Packet
 from luxonis_train.utils.registry import VISUALIZERS
-from luxonis_train.utils.types import Labels, Packet
 
 Ts = TypeVarTuple("Ts")
 
@@ -17,8 +17,9 @@ class BaseVisualizer(
 ):
     """A base class for all visualizers.
 
-    This class defines the basic interface for all visualizers. It utilizes automatic
-    registration of defined subclasses to the L{VISUALIZERS} registry.
+    This class defines the basic interface for all visualizers. It
+    utilizes automatic registration of defined subclasses to the
+    L{VISUALIZERS} registry.
     """
 
     @abstractmethod
@@ -27,7 +28,12 @@ def forward(
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         *args: Unpack[Ts],
-    ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]] | list[Tensor]:
+    ) -> (
+        Tensor
+        | tuple[Tensor, Tensor]
+        | tuple[Tensor, list[Tensor]]
+        | list[Tensor]
+    ):
         """Forward pass of the visualizer.
 
         Takes an image and the prepared inputs from the `prepare` method and
@@ -62,5 +68,6 @@ def run(
         inputs: Packet[Tensor],
         labels: Labels,
     ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]]:
-        self.validate(inputs, labels)
-        return self(label_canvas, prediction_canvas, *self.prepare(inputs, labels))
+        return self(
+            label_canvas, prediction_canvas, *self.prepare(inputs, labels)
+        )
diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
index 14dd1ab9..e544bf06 100644
--- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
@@ -1,10 +1,9 @@
 import logging
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
-from luxonis_train.utils.types import BBoxProtocol, LabelType
-
 from .base_visualizer import BaseVisualizer
 from .utils import (
     Color,
@@ -15,6 +14,8 @@
 
 
 class BBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]):
+    supported_labels = [LabelType.BOUNDINGBOX]
+
     def __init__(
         self,
         labels: dict[int, str] | list[str] | None = None,
@@ -28,40 +29,50 @@ def __init__(
     ):
         """Visualizer for bounding box predictions.
 
-        Creates a visualization of the bounding box predictions and labels.
+        Creates a visualization of the bounding box predictions and
+        labels.
 
         @type labels: dict[int, str] | list[str] | None
-        @param labels: Either a dictionary mapping class indices to names, or a list of
-            names. If list is provided, the label mapping is done by index. By default,
-            no labels are drawn.
+        @param labels: Either a dictionary mapping class indices to
+            names, or a list of names. If list is provided, the label
+            mapping is done by index. By default, no labels are drawn.
         @type draw_labels: bool
-        @param draw_labels: Whether or not to draw labels. Defaults to C{True}.
+        @param draw_labels: Whether or not to draw labels. Defaults to
+            C{True}.
         @type colors: dict[int, Color] | list[Color] | None
-        @param colors: Either a dictionary mapping class indices to colors, or a list of
-            colors. If list is provided, the color mapping is done by index. By default,
-            random colors are used.
+        @param colors: Either a dictionary mapping class indices to
+            colors, or a list of colors. If list is provided, the color
+            mapping is done by index. By default, random colors are
+            used.
         @type fill: bool
-        @param fill: Whether or not to fill the bounding boxes. Defaults to C{False}.
+        @param fill: Whether or not to fill the bounding boxes. Defaults
+            to C{False}.
         @type width: int | None
-        @param width: The width of the bounding box lines. Defaults to C{1}.
+        @param width: The width of the bounding box lines. Defaults to
+            C{1}.
         @type font: str | None
-        @param font: A filename containing a TrueType font. Defaults to C{None}.
+        @param font: A filename containing a TrueType font. Defaults to
+            C{None}.
         @type font_size: int | None
-        @param font_size: The font size to use for the labels. Defaults to C{None}.
+        @param font_size: The font size to use for the labels. Defaults
+            to C{None}.
         """
-        super().__init__(
-            required_labels=[LabelType.BOUNDINGBOX], protocol=BBoxProtocol, **kwargs
-        )
+        super().__init__(**kwargs)
         if isinstance(labels, list):
             labels = {i: label for i, label in enumerate(labels)}
 
-        self.labels = labels or {
-            i: label for i, label in enumerate(self.node.class_names)
+        self.bbox_labels = labels or {
+            i: label for i, label in enumerate(self.class_names)
         }
+
         if colors is None:
-            colors = {label: get_color(i) for i, label in self.labels.items()}
+            colors = {
+                label: get_color(i) for i, label in self.bbox_labels.items()
+            }
         if isinstance(colors, list):
-            colors = {self.labels[i]: color for i, color in enumerate(colors)}
+            colors = {
+                self.bbox_labels[i]: color for i, color in enumerate(colors)
+            }
         self.colors = colors
         self.fill = fill
         self.width = width
@@ -163,16 +174,17 @@ def forward(
         predictions: list[Tensor],
         targets: Tensor,
     ) -> tuple[Tensor, Tensor]:
-        """Creates a visualization of the bounding box predictions and labels.
+        """Creates a visualization of the bounding box predictions and
+        labels.
 
         @type label_canvas: Tensor
         @param label_canvas: The canvas containing the labels.
         @type prediction_canvas: Tensor
         @param prediction_canvas: The canvas containing the predictions.
         @type prediction: Tensor
-        @param prediction: The predicted bounding boxes. The shape should be [N, 6],
-            where N is the number of bounding boxes and the last dimension is [x1, y1,
-            x2, y2, class, conf].
+        @param prediction: The predicted bounding boxes. The shape
+            should be [N, 6], where N is the number of bounding boxes
+            and the last dimension is [x1, y1, x2, y2, class, conf].
         @type targets: Tensor
         @param targets: The target bounding boxes.
         """
@@ -180,7 +192,7 @@ def forward(
             label_canvas,
             targets,
             color_dict=self.colors,
-            label_dict=self.labels,
+            label_dict=self.bbox_labels,
             draw_labels=self.draw_labels,
             fill=self.fill,
             font=self.font,
@@ -190,7 +202,7 @@ def forward(
         predictions_viz = self.draw_predictions(
             prediction_canvas,
             predictions,
-            label_dict=self.labels,
+            label_dict=self.bbox_labels,
             color_dict=self.colors,
             draw_labels=self.draw_labels,
             fill=self.fill,
diff --git a/luxonis_train/attached_modules/visualizers/classification_visualizer.py b/luxonis_train/attached_modules/visualizers/classification_visualizer.py
index e5920d21..9d26172b 100644
--- a/luxonis_train/attached_modules/visualizers/classification_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/classification_visualizer.py
@@ -2,17 +2,16 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
 from .base_visualizer import BaseVisualizer
-from .utils import (
-    figure_to_torch,
-    numpy_to_torch_img,
-    torch_img_to_numpy,
-)
+from .utils import figure_to_torch, numpy_to_torch_img, torch_img_to_numpy
 
 
 class ClassificationVisualizer(BaseVisualizer[Tensor, Tensor]):
+    supported_labels = [LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         include_plot: bool = True,
@@ -24,8 +23,8 @@ def __init__(
         """Visualizer for classification tasks.
 
         @type include_plot: bool
-        @param include_plot: Whether to include a plot of the class probabilities in the
-            visualization. Defaults to C{True}.
+        @param include_plot: Whether to include a plot of the class
+            probabilities in the visualization. Defaults to C{True}.
         """
         super().__init__(**kwargs)
         self.include_plot = include_plot
@@ -35,19 +34,21 @@ def __init__(
 
     def _get_class_name(self, pred: Tensor) -> str:
         idx = int((pred.argmax()).item())
-        if self.node.class_names is None:
+        if self.class_names is None:
             return str(idx)
-        return self.node.class_names[idx]
+        return self.class_names[idx]
 
-    def _generate_plot(self, prediction: Tensor, width: int, height: int) -> Tensor:
-        prediction = prediction.softmax(-1).detach().cpu().numpy()
+    def _generate_plot(
+        self, prediction: Tensor, width: int, height: int
+    ) -> Tensor:
+        pred = prediction.softmax(-1).detach().cpu().numpy()
         fig, ax = plt.subplots(figsize=(width / 100, height / 100))
-        ax.bar(np.arange(len(prediction)), prediction)
-        ax.set_xticks(np.arange(len(prediction)))
-        if self.node.class_names is not None:
-            ax.set_xticklabels(self.node.class_names, rotation=90)
+        ax.bar(np.arange(len(pred)), pred)
+        ax.set_xticks(np.arange(len(pred)))
+        if self.class_names is not None:
+            ax.set_xticklabels(self.class_names, rotation=90)
         else:
-            ax.set_xticklabels(np.arange(1, len(prediction) + 1))
+            ax.set_xticklabels(np.arange(1, len(pred) + 1))
         ax.set_ylim(0, 1)
         ax.set_xlabel("Class")
         ax.set_ylabel("Probability")
@@ -89,7 +90,9 @@ def forward(
             overlay[i] = numpy_to_torch_img(arr)
             if self.include_plot:
                 plots[i] = self._generate_plot(
-                    prediction, prediction_canvas.shape[3], prediction_canvas.shape[2]
+                    prediction,
+                    prediction_canvas.shape[3],
+                    prediction_canvas.shape[2],
                 )
 
         if self.include_plot:
diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
index beebaf3f..53b9cb88 100644
--- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
@@ -1,23 +1,16 @@
 from copy import deepcopy
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
-from luxonis_train.utils.types import (
-    Labels,
-    LabelType,
-    Packet,
-)
-
 from .base_visualizer import BaseVisualizer
-from .utils import (
-    Color,
-    draw_keypoint_labels,
-    draw_keypoints,
-)
+from .utils import Color, draw_keypoint_labels, draw_keypoints
 
 
 class KeypointVisualizer(BaseVisualizer[list[Tensor], Tensor]):
+    supported_labels = [LabelType.KEYPOINTS]
+
     def __init__(
         self,
         visibility_threshold: float = 0.5,
@@ -29,30 +22,27 @@ def __init__(
         """Visualizer for keypoints.
 
         @type visibility_threshold: float
-        @param visibility_threshold: Threshold for visibility of keypoints. If the
-            visibility of a keypoint is below this threshold, it is considered as not
-            visible. Defaults to C{0.5}.
+        @param visibility_threshold: Threshold for visibility of
+            keypoints. If the visibility of a keypoint is below this
+            threshold, it is considered as not visible. Defaults to
+            C{0.5}.
         @type connectivity: list[tuple[int, int]] | None
-        @param connectivity: List of tuples of keypoint indices that define the
-            connections in the skeleton. Defaults to C{None}.
+        @param connectivity: List of tuples of keypoint indices that
+            define the connections in the skeleton. Defaults to C{None}.
         @type visible_color: L{Color}
-        @param visible_color: Color of visible keypoints. Either a string or a tuple of
-            RGB values. Defaults to C{"red"}.
+        @param visible_color: Color of visible keypoints. Either a
+            string or a tuple of RGB values. Defaults to C{"red"}.
         @type nonvisible_color: L{Color} | None
-        @param nonvisible_color: Color of nonvisible keypoints. If C{None}, nonvisible
-            keypoints are not drawn. Defaults to C{None}.
+        @param nonvisible_color: Color of nonvisible keypoints. If
+            C{None}, nonvisible keypoints are not drawn. Defaults to
+            C{None}.
         """
-        super().__init__(required_labels=[LabelType.KEYPOINT], **kwargs)
+        super().__init__(**kwargs)
         self.visibility_threshold = visibility_threshold
         self.connectivity = connectivity
         self.visible_color = visible_color
         self.nonvisible_color = nonvisible_color
 
-    def prepare(
-        self, output: Packet[Tensor], label: Labels
-    ) -> tuple[list[Tensor], Tensor]:
-        return output["keypoints"], label[LabelType.KEYPOINT]
-
     @staticmethod
     def draw_predictions(
         canvas: Tensor,
@@ -63,7 +53,7 @@ def draw_predictions(
     ) -> Tensor:
         viz = torch.zeros_like(canvas)
         for i in range(len(canvas)):
-            prediction = predictions[i][:, 1:]
+            prediction = predictions[i]
             mask = prediction[..., 2] < visibility_threshold
             visible_kpts = prediction[..., :2] * (~mask).unsqueeze(-1).float()
             viz[i] = draw_keypoints(
@@ -74,7 +64,9 @@ def draw_predictions(
             if nonvisible_color is not None:
                 _kwargs = deepcopy(kwargs)
                 _kwargs["colors"] = nonvisible_color
-                nonvisible_kpts = prediction[..., :2] * mask.unsqueeze(-1).float()
+                nonvisible_kpts = (
+                    prediction[..., :2] * mask.unsqueeze(-1).float()
+                )
                 viz[i] = draw_keypoints(
                     viz[i].clone(),
                     nonvisible_kpts[..., :2],
diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
index 2fee8e1f..b7ecbfbb 100644
--- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
@@ -1,17 +1,14 @@
 from torch import Tensor
 
 from luxonis_train.utils.registry import VISUALIZERS
-from luxonis_train.utils.types import (
-    Kwargs,
-    Labels,
-    Packet,
-)
+from luxonis_train.utils.types import Kwargs, Labels, Packet
 
 from .base_visualizer import BaseVisualizer
 
 
 class MultiVisualizer(BaseVisualizer[Packet[Tensor], Labels]):
-    """Special type of visualizer that combines multiple visualizers together.
+    """Special type of visualizer that combines multiple visualizers
+    together.
 
     All the visualizers are applied in the order they are provided and they all draw on
     the same canvas.
@@ -29,14 +26,16 @@ def __init__(self, visualizers: list[Kwargs], **kwargs):
         self.visualizers = []
         for item in visualizers:
             visualizer_params = item.get("params", {})
-            visualizer = VISUALIZERS.get(item["name"])(**visualizer_params, **kwargs)
+            visualizer = VISUALIZERS.get(item["name"])(
+                **visualizer_params, **kwargs
+            )
             self.visualizers.append(visualizer)
 
     def prepare(
-        self, output: Packet[Tensor], label: Labels, idx: int = 0
+        self, inputs: Packet[Tensor], label: Labels, idx: int = 0
     ) -> tuple[Packet[Tensor], Labels]:
         self._idx = idx
-        return output, label
+        return inputs, label
 
     def forward(
         self,
@@ -46,12 +45,16 @@ def forward(
         labels: Labels,
     ) -> tuple[Tensor, Tensor]:
         for visualizer in self.visualizers:
-            match visualizer.run(label_canvas, prediction_canvas, outputs, labels):
-                case Tensor(data=prediction_viz):
+            match visualizer.run(
+                label_canvas, prediction_canvas, outputs, labels
+            ):
+                case Tensor() as prediction_viz:
                     prediction_canvas = prediction_viz
                 case (Tensor(data=label_viz), Tensor(data=prediction_viz)):
                     label_canvas = label_viz
                     prediction_canvas = prediction_viz
                 case _:
-                    raise NotImplementedError
+                    raise NotImplementedError(
+                        "Unexpected return type from visualizer."
+                    )
         return label_canvas, prediction_canvas
diff --git a/luxonis_train/attached_modules/visualizers/obbox_visualizer.py b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
new file mode 100644
index 00000000..94557a2a
--- /dev/null
+++ b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
@@ -0,0 +1,210 @@
+import logging
+
+import torch
+from torch import Tensor
+
+from luxonis_train.utils.types import LabelType
+
+from .base_visualizer import BaseVisualizer
+from .utils import Color, draw_obounding_box, get_color
+
+
+class OBBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]):
+    supported_labels = [LabelType.OBOUNDINGBOX]
+
+    def __init__(
+        self,
+        labels: dict[int, str] | list[str] | None = None,
+        draw_labels: bool = True,
+        colors: dict[str, Color] | list[Color] | None = None,
+        fill: bool = False,
+        width: int | None = None,
+        font: str | None = None,
+        font_size: int | None = None,
+        **kwargs,
+    ):
+        """Visualizer for oriented bounding box predictions.
+
+        Creates a visualization of the oriented bounding box predictions
+        and labels.
+
+        @type labels: dict[int, str] | list[str] | None
+        @param labels: Either a dictionary mapping class indices to
+            names, or a list of names. If list is provided, the label
+            mapping is done by index. By default, no labels are drawn.
+        @type draw_labels: bool
+        @param draw_labels: Whether or not to draw labels. Defaults to
+            C{True}.
+        @type colors: dict[int, Color] | list[Color] | None
+        @param colors: Either a dictionary mapping class indices to
+            colors, or a list of colors. If list is provided, the color
+            mapping is done by index. By default, random colors are
+            used.
+        @type fill: bool
+        @param fill: Whether or not to fill the bounding boxes. Defaults
+            to C{False}.
+        @type width: int | None
+        @param width: The width of the bounding box lines. Defaults to
+            C{1}.
+        @type font: str | None
+        @param font: A filename containing a TrueType font. Defaults to
+            C{None}.
+        @type font_size: int | None
+        @param font_size: The font size to use for the labels. Defaults
+            to C{None}.
+        """
+        super().__init__(**kwargs)
+        if isinstance(labels, list):
+            labels = {i: label for i, label in enumerate(labels)}
+
+        self.bbox_labels = labels or {
+            i: label for i, label in enumerate(self.node.class_names)
+        }
+
+        if colors is None:
+            colors = {
+                label: get_color(i) for i, label in self.bbox_labels.items()
+            }
+        if isinstance(colors, list):
+            colors = {
+                self.bbox_labels[i]: color for i, color in enumerate(colors)
+            }
+        self.colors = colors
+        self.fill = fill
+        self.width = width
+        self.font = font
+        self.font_size = font_size
+        self.draw_labels = draw_labels
+
+    @staticmethod
+    def draw_targets(
+        canvas: Tensor,
+        targets: Tensor,
+        width: int | None = None,
+        colors: list[Color] | None = None,
+        labels: list[str] | None = None,
+        label_dict: dict[int, str] | None = None,
+        color_dict: dict[str, Color] | None = None,
+        draw_labels: bool = True,
+        **kwargs,
+    ) -> Tensor:
+        viz = torch.zeros_like(canvas)
+
+        for i in range(len(canvas)):
+            target = targets[targets[:, 0] == i]
+            target_classes = target[:, 1].int()
+            cls_labels = labels or (
+                [label_dict[int(c)] for c in target_classes]
+                if draw_labels and label_dict is not None
+                else None
+            )
+            cls_colors = colors or (
+                [color_dict[label_dict[int(c)]] for c in target_classes]
+                if color_dict is not None and label_dict is not None
+                else None
+            )
+
+            *_, H, W = canvas.shape
+            width = width or max(1, int(min(H, W) / 100))
+            viz[i] = draw_obounding_box(
+                canvas[i].clone(),
+                target[:, 2:],
+                width=width,
+                labels=cls_labels,
+                colors=cls_colors,
+                **kwargs,
+            ).to(canvas.device)
+
+        return viz
+
+    @staticmethod
+    def draw_predictions(
+        canvas: Tensor,
+        predictions: list[Tensor],
+        width: int | None = None,
+        colors: list[Color] | None = None,
+        labels: list[str] | None = None,
+        label_dict: dict[int, str] | None = None,
+        color_dict: dict[str, Color] | None = None,
+        draw_labels: bool = True,
+        **kwargs,
+    ) -> Tensor:
+        viz = torch.zeros_like(canvas)
+
+        for i in range(len(canvas)):
+            prediction = predictions[i]
+            prediction_classes = prediction[..., 5].int()
+            cls_labels = labels or (
+                [label_dict[int(c)] for c in prediction_classes]
+                if draw_labels and label_dict is not None
+                else None
+            )
+            cls_colors = colors or (
+                [color_dict[label_dict[int(c)]] for c in prediction_classes]
+                if color_dict is not None and label_dict is not None
+                else None
+            )
+
+            *_, H, W = canvas.shape
+            width = width or max(1, int(min(H, W) / 100))
+            try:
+                viz[i] = draw_obounding_box(
+                    canvas[i].clone(),
+                    prediction[:, :5],
+                    width=width,
+                    labels=cls_labels,
+                    colors=cls_colors,
+                    **kwargs,
+                )
+            except ValueError as e:
+                logging.getLogger(__name__).warning(
+                    f"Failed to draw bounding boxes: {e}. Skipping visualization."
+                )
+                viz = canvas
+        return viz
+
+    def forward(
+        self,
+        label_canvas: Tensor,
+        prediction_canvas: Tensor,
+        predictions: list[Tensor],
+        targets: Tensor,
+    ) -> tuple[Tensor, Tensor]:
+        """Creates a visualization of the oriented bounding box
+        predictions and labels.
+
+        @type label_canvas: Tensor
+        @param label_canvas: The canvas containing the labels.
+        @type prediction_canvas: Tensor
+        @param prediction_canvas: The canvas containing the predictions.
+        @type predictions: Tensor
+        @param predictions: The predicted bounding boxes. The shape
+            should be [N, 7], where N is the number of bounding boxes
+            and the last dimension is [xc, yc, w, h, conf, class]. #
+            NOTE: check it
+        @type targets: Tensor
+        @param targets: The target bounding boxes.
+        """
+        targets_viz = self.draw_targets(
+            label_canvas,
+            targets,
+            color_dict=self.colors,
+            label_dict=self.bbox_labels,
+            draw_labels=self.draw_labels,
+            fill=self.fill,
+            font=self.font,
+            font_size=self.font_size,
+            width=self.width,
+        )
+        predictions_viz = self.draw_predictions(
+            prediction_canvas,
+            predictions,
+            label_dict=self.bbox_labels,
+            color_dict=self.colors,
+            draw_labels=self.draw_labels,
+            fill=self.fill,
+            font=self.font,
+            font_size=self.font_size,
+            width=self.width,
+        )
+        return targets_viz, predictions_viz.to(targets_viz.device)
diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
index 6d8f3c79..15e2fd09 100644
--- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
@@ -1,15 +1,13 @@
 import logging
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
-from luxonis_train.utils.types import Labels, LabelType, Packet, SegmentationProtocol
-
 from .base_visualizer import BaseVisualizer
 from .utils import (
     Color,
     draw_segmentation_labels,
-    draw_segmentation_masks,
     get_color,
     seg_output_to_bool,
 )
@@ -19,10 +17,13 @@
 
 
 class SegmentationVisualizer(BaseVisualizer[Tensor, Tensor]):
+    supported_labels = [LabelType.SEGMENTATION]
+
     def __init__(
         self,
         colors: Color | list[Color] = "#5050FF",
-        background_class: int | None = None,
+        background_class: int | None = 0,
+        background_color: Color = "#000000",
         alpha: float = 0.6,
         **kwargs,
     ):
@@ -30,43 +31,43 @@ def __init__(
 
         @type colors: L{Color} | list[L{Color}]
         @param colors: Color of the segmentation masks. Defaults to C{"#5050FF"}.
+        @type background_class: int | None
+        @param background_class: Index of the background class. Defaults to C{0}.
+          If set, the background class will be drawn with the `background_color`.
+        @type background_color: L{Color} | None
+        @param background_color: Color of the background class.
+            Defaults to C{"#000000"}.
         @type alpha: float
         @param alpha: Alpha value of the segmentation masks. Defaults to C{0.6}.
         """
-        super().__init__(
-            protocol=SegmentationProtocol,
-            required_labels=[LabelType.SEGMENTATION],
-            **kwargs,
-        )
+        super().__init__(**kwargs)
         if not isinstance(colors, list):
             colors = [colors]
 
         self.colors = colors
         self.background_class = background_class
+        self.background_color = background_color
         self.alpha = alpha
 
-    def prepare(self, output: Packet[Tensor], label: Labels) -> tuple[Tensor, Tensor]:
-        return output["segmentation"][0], label[LabelType.SEGMENTATION]
-
     @staticmethod
     def draw_predictions(
         canvas: Tensor,
         predictions: Tensor,
         colors: list[Color] | None = None,
         background_class: int | None = None,
+        background_color: Color = "#000000",
         **kwargs,
     ) -> Tensor:
         colors = SegmentationVisualizer._adjust_colors(
-            predictions, colors, background_class
+            predictions, colors, background_class, background_color
         )
         viz = torch.zeros_like(canvas)
         for i in range(len(canvas)):
             prediction = predictions[i]
             mask = seg_output_to_bool(prediction)
-            mask = mask.to(canvas.device)
-            viz[i] = draw_segmentation_masks(
+            viz[i] = draw_segmentation_labels(
                 canvas[i].clone(), mask, colors=colors, **kwargs
-            )
+            ).to(canvas.device)
         return viz
 
     @staticmethod
@@ -75,10 +76,11 @@ def draw_targets(
         targets: Tensor,
         colors: list[Color] | None = None,
         background_class: int | None = None,
+        background_color: Color = "#000000",
         **kwargs,
     ) -> Tensor:
         colors = SegmentationVisualizer._adjust_colors(
-            targets, colors, background_class
+            targets, colors, background_class, background_color
         )
         viz = torch.zeros_like(canvas)
         for i in range(len(viz)):
@@ -100,7 +102,8 @@ def forward(
         targets: Tensor,
         **kwargs,
     ) -> tuple[Tensor, Tensor]:
-        """Creates a visualization of the segmentation predictions and labels.
+        """Creates a visualization of the segmentation predictions and
+        labels.
 
         @type label_canvas: Tensor
         @param label_canvas: The canvas to draw the labels on.
@@ -120,6 +123,7 @@ def forward(
             colors=self.colors,
             alpha=self.alpha,
             background_class=self.background_class,
+            background_color=self.background_color,
             **kwargs,
         )
         predictions_vis = self.draw_predictions(
@@ -128,6 +132,7 @@ def forward(
             colors=self.colors,
             alpha=self.alpha,
             background_class=self.background_class,
+            background_color=self.background_color,
             **kwargs,
         )
         return targets_vis, predictions_vis
@@ -137,6 +142,7 @@ def _adjust_colors(
         data: Tensor,
         colors: list[Color] | None = None,
         background_class: int | None = None,
+        background_color: Color = "#000000",
     ) -> list[Color]:
         global log_disable
         n_classes = data.size(1)
@@ -145,7 +151,9 @@ def _adjust_colors(
 
         if not log_disable:
             if colors is None:
-                logger.warning("No colors provided. Using random colors instead.")
+                logger.warning(
+                    "No colors provided. Using random colors instead."
+                )
             elif data.size(1) != len(colors):
                 logger.warning(
                     f"Number of colors ({len(colors)}) does not match number of "
@@ -154,5 +162,5 @@ def _adjust_colors(
         log_disable = True
         colors = [get_color(i) for i in range(data.size(1))]
         if background_class is not None:
-            colors[background_class] = "#000000"
+            colors[background_class] = background_color
         return colors
diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py
index 52431204..7a3b74c4 100644
--- a/luxonis_train/attached_modules/visualizers/utils.py
+++ b/luxonis_train/attached_modules/visualizers/utils.py
@@ -1,6 +1,7 @@
 import colorsys
 import io
-from typing import Literal
+import warnings
+from typing import List, Literal, Optional, Tuple, Union
 
 import cv2
 import matplotlib.pyplot as plt
@@ -10,16 +11,18 @@
 import torchvision.transforms.functional as F
 import torchvision.transforms.functional as TF
 from matplotlib.figure import Figure
-from PIL import Image
+from PIL import Image, ImageDraw, ImageFont
 from torch import Tensor
 from torchvision.ops import box_convert
 from torchvision.utils import (
+    _log_api_usage_once,
+    _parse_colors,
     draw_bounding_boxes,
     draw_keypoints,
     draw_segmentation_masks,
 )
 
-from luxonis_train.utils.config import Config
+from luxonis_train.utils import Config, xywhr2xyxyxyxy, xyxyxyxy2xywhr
 
 Color = str | tuple[int, int, int]
 """Color type alias.
@@ -44,13 +47,14 @@ def figure_to_torch(fig: Figure, width: int, height: int) -> Tensor:
 def torch_img_to_numpy(
     img: Tensor, reverse_colors: bool = False
 ) -> npt.NDArray[np.uint8]:
-    """Converts a torch image (CHW) to a numpy array (HWC). Optionally also converts
-    colors.
+    """Converts a torch image (CHW) to a numpy array (HWC). Optionally
+    also converts colors.
 
     @type img: Tensor
     @param img: Torch image (CHW)
     @type reverse_colors: bool
-    @param reverse_colors: Whether to reverse colors (RGB to BGR). Defaults to False.
+    @param reverse_colors: Whether to reverse colors (RGB to BGR).
+        Defaults to False.
     @rtype: npt.NDArray[np.uint8]
     @return: Numpy image (HWC)
     """
@@ -129,8 +133,8 @@ def draw_bounding_box_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor:
     @type img: Tensor
     @param img: Image to draw on.
     @type label: Tensor
-    @param label: Bounding box label. The shape should be (n_instances, 4), where the
-        last dimension is (x, y, w, h).
+    @param label: Bounding box label. The shape should be (n_instances,
+        4), where the last dimension is (x, y, w, h).
     @type kwargs: dict
     @param kwargs: Additional arguments to pass to
         L{torchvision.utils.draw_bounding_boxes}.
@@ -144,16 +148,169 @@ def draw_bounding_box_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor:
     return draw_bounding_boxes(img, bboxs, **kwargs)
 
 
+def draw_obounding_box(
+    img: Tensor, obbox: Tensor | np.ndarray, **kwargs
+) -> Tensor:
+    """Draws oriented bounding box (obb) labels on an image.
+
+    @type img: Tensor
+    @param img: Image to draw on.
+    @type obbox: Tensor
+    @param obbox: Oriented bounding box. The shape should be
+        (n_instances, 8) or (n_instances, 5), where the last dimension
+        is (x1, y1, x2, y2, x3, y3, x4, y4) or (xc, yc, w, h, r).
+    @type kwargs: dict
+    @param kwargs: Additional arguments to pass to
+        L{draw_obounding_boxes}.
+    @rtype: Tensor
+    @return: Image with bounding box labels drawn on.
+    """
+    _, H, W = img.shape
+    # The conversion below is needed for fitting a rectangle to the 4 label points, which can form
+    # a polygon sometimes
+    if obbox.shape[-1] > 5:
+        obbox = xyxyxyxy2xywhr(obbox)  # xywhr
+    bboxs_2 = xywhr2xyxyxyxy(obbox)  # shape: (bs, 4, 2)
+    if isinstance(bboxs_2, np.ndarray):
+        bboxs_2 = torch.tensor(bboxs_2)
+    if bboxs_2.numel() == 0:
+        raise ValueError
+    bboxs = bboxs_2.view(bboxs_2.size(0), -1)  # x1y1x2y2x3y3x4y4
+    bboxs[:, 0::2] *= W
+    bboxs[:, 1::2] *= H
+    return draw_obounding_boxes(img, bboxs, **kwargs)
+
+
+def draw_obounding_boxes(
+    image: torch.Tensor,
+    boxes: torch.Tensor,
+    labels: Optional[List[str]] = None,
+    colors: Optional[
+        Union[
+            List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]
+        ]
+    ] = None,
+    fill: Optional[bool] = False,
+    width: int = 1,
+    font: Optional[str] = None,
+    font_size: Optional[int] = None,
+) -> torch.Tensor:
+    """Draws oriented bounding boxes (obb) on given RGB image. The image
+    values should be uint8 in [0, 255] or float in [0, 1]. If fill is
+    True, Resulting Tensor should be saved as PNG image.
+
+    Args:
+        image (Tensor): Tensor of shape (C, H, W) and dtype uint8 or float.
+        boxes (Tensor): Tensor of size (N, 8) containing bounding boxes in (x1, y1, x2, y2, x3, y3, x4, y4)
+            format. Note that the boxes are absolute coordinates with respect to the image. In other words: `0 <= x < W` and
+            `0 <= y < H`.
+        labels (List[str]): List containing the labels of bounding boxes.
+        colors (color or list of colors, optional): List containing the colors
+            of the boxes or single color for all boxes. The color can be represented as
+            PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
+            By default, random colors are generated for boxes.
+        fill (bool): If `True` fills the bounding box with specified color.
+        width (int): Width of bounding box.
+        font (str): A filename containing a TrueType font. If the file is not found in this filename, the loader may
+            also search in other directories, such as the `fonts/` directory on Windows or `/Library/Fonts/`,
+            `/System/Library/Fonts/` and `~/Library/Fonts/` on macOS.
+        font_size (int): The requested font size in points.
+
+    Returns:
+        img (Tensor[C, H, W]): Image Tensor of dtype uint8 with bounding boxes plotted.
+    """
+    import torchvision.transforms.v2.functional as F  # noqa
+
+    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
+        _log_api_usage_once(draw_obounding_boxes)
+    if not isinstance(image, torch.Tensor):
+        raise TypeError(f"Tensor expected, got {type(image)}")
+    elif not (image.dtype == torch.uint8 or image.is_floating_point()):
+        raise ValueError(
+            f"The image dtype must be uint8 or float, got {image.dtype}"
+        )
+    elif image.dim() != 3:
+        raise ValueError("Pass individual images, not batches")
+    elif image.size(0) not in {1, 3}:
+        raise ValueError("Only grayscale and RGB images are supported")
+    # elif (boxes[:, 0] > boxes[:, 2]).any() or (boxes[:, 1] > boxes[:, 3]).any():
+    #     raise ValueError(
+    #         "Boxes need to be in (xmin, ymin, xmax, ymax) format. Use torchvision.ops.box_convert to convert them"
+    #     )
+
+    num_boxes = boxes.shape[0]
+
+    if num_boxes == 0:
+        warnings.warn("boxes doesn't contain any box. No box was drawn")
+        return image
+
+    if labels is None:
+        labels: Union[List[str], List[None]] = [None] * num_boxes  # type: ignore[no-redef]
+    elif len(labels) != num_boxes:
+        raise ValueError(
+            f"Number of boxes ({num_boxes}) and labels ({len(labels)}) mismatch. Please specify labels for each box."
+        )
+
+    colors = _parse_colors(colors, num_objects=num_boxes)
+
+    if font is None:
+        if font_size is not None:
+            warnings.warn(
+                "Argument 'font_size' will be ignored since 'font' is not set."
+            )
+        txt_font = ImageFont.load_default()
+    else:
+        txt_font = ImageFont.truetype(font=font, size=font_size or 10)
+
+    # Handle Grayscale images
+    if image.size(0) == 1:
+        image = torch.tile(image, (3, 1, 1))
+
+    original_dtype = image.dtype
+    if original_dtype.is_floating_point:
+        image = F.to_dtype(image, dtype=torch.uint8, scale=True)
+
+    img_to_draw = F.to_pil_image(image)
+    img_boxes = boxes.to(torch.int64).tolist()
+
+    if fill:
+        draw = ImageDraw.Draw(img_to_draw, "RGBA")
+    else:
+        draw = ImageDraw.Draw(img_to_draw)
+
+    for bbox, color, label in zip(img_boxes, colors, labels):  # type: ignore[arg-type]
+        if fill:
+            fill_color = color + (100,)
+            draw.polygon(bbox, width=width, outline=color, fill=fill_color)
+        else:
+            draw.polygon(bbox, width=width, outline=color)
+
+        if label is not None:
+            margin = width + 1
+            draw.text(
+                (bbox[0] + margin, bbox[1] + margin),
+                label,
+                fill=color,
+                font=txt_font,
+            )
+
+    out = F.pil_to_tensor(img_to_draw)
+    if original_dtype.is_floating_point:
+        out = F.to_dtype(out, dtype=original_dtype, scale=True)
+    return out
+
+
 def draw_keypoint_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor:
     """Draws keypoint labels on an image.
 
     @type img: Tensor
     @param img: Image to draw on.
     @type label: Tensor
-    @param label: Keypoint label. The shape should be (n_instances, 3), where the last
-        dimension is (x, y, visibility).
+    @param label: Keypoint label. The shape should be (n_instances, 3),
+        where the last dimension is (x, y, visibility).
     @type kwargs: dict
-    @param kwargs: Additional arguments to pass to L{torchvision.utils.draw_keypoints}.
+    @param kwargs: Additional arguments to pass to
+        L{torchvision.utils.draw_keypoints}.
     @rtype: Tensor
     @return: Image with keypoint labels drawn on.
     """
@@ -191,7 +348,8 @@ def unnormalize(
     std: list[float] | float | None = None,
     to_uint8: bool = False,
 ) -> Tensor:
-    """Unnormalizes an image back to original values, optionally converts it to uint8.
+    """Unnormalizes an image back to original values, optionally
+    converts it to uint8.
 
     @type img: Tensor
     @param img: Image to unnormalize.
@@ -220,7 +378,10 @@ def unnormalize(
     return out_img
 
 
-def get_unnormalized_images(cfg: Config, images: Tensor) -> Tensor:
+def get_unnormalized_images(cfg: Config, inputs: dict[str, Tensor]) -> Tensor:
+    # Get images from inputs according to config
+    images = inputs[cfg.loader.image_source]
+
     normalize_params = cfg.trainer.preprocessing.normalize.params
     mean = std = None
     if cfg.trainer.preprocessing.normalize.active:
@@ -301,9 +462,12 @@ def get_color(seed: int) -> Color:
 #
 #  TEST:
 def combine_visualizations(
-    visualization: Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]],
+    visualization: Tensor
+    | tuple[Tensor, Tensor]
+    | tuple[Tensor, list[Tensor]],
 ) -> Tensor:
-    """Default way of combining multiple visualizations into one final image."""
+    """Default way of combining multiple visualizations into one final
+    image."""
 
     def resize_to_match(
         fst: Tensor,
@@ -312,7 +476,7 @@ def resize_to_match(
         keep_size: Literal["larger", "smaller", "first", "second"] = "larger",
         resize_along: Literal["width", "height", "exact"] = "height",
         keep_aspect_ratio: bool = True,
-    ):
+    ) -> tuple[Tensor, Tensor]:
         """Resizes two images so they have the same size.
 
         Resizes two images so they can be concateneted together. It's possible to
@@ -405,10 +569,12 @@ def resize_to_match(
         return fst_resized, snd_resized
 
     match visualization:
-        case Tensor(data=viz):
+        case Tensor() as viz:
             return viz
         case (Tensor(data=viz_labels), Tensor(data=viz_predictions)):
-            viz_labels, viz_predictions = resize_to_match(viz_labels, viz_predictions)
+            viz_labels, viz_predictions = resize_to_match(
+                viz_labels, viz_predictions
+            )
             return torch.cat([viz_labels, viz_predictions], dim=-1)
 
         case (Tensor(data=_), [*viz]) if isinstance(viz, list) and all(
diff --git a/luxonis_train/callbacks/README.md b/luxonis_train/callbacks/README.md
index d8e3da74..eb34b081 100644
--- a/luxonis_train/callbacks/README.md
+++ b/luxonis_train/callbacks/README.md
@@ -9,17 +9,18 @@ List of all supported callbacks.
 - [LuxonisProgressBar](#luxonisprogressbar)
 - [MetadataLogger](#metadatalogger)
 - [TestOnTrainEnd](#testontrainend)
+- [UploadCheckpoint](#uploadcheckpoint)
 
 ## PytorchLightning Callbacks
 
 List of supported callbacks from `lightning.pytorch`.
 
+- [GPUStatsMonitor](https://pytorch-lightning.readthedocs.io/en/1.5.10/api/pytorch_lightning.callbacks.gpu_stats_monitor.html)
 - [DeviceStatsMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.DeviceStatsMonitor.html#lightning.pytorch.callbacks.DeviceStatsMonitor)
-- [ EarlyStopping ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html#lightning.pytorch.callbacks.EarlyStopping)
-- [ LearningRateMonitor ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor)
-- [ ModelCheckpoint ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint)
-- [ RichModelSummary ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary)
-  - Added automatically if `use_rich_text` is set to `True` in [config](../../configs/README.md#topleveloptions).
+- [EarlyStopping](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html#lightning.pytorch.callbacks.EarlyStopping)
+- [LearningRateMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor)
+- [ModelCheckpoint](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint)
+- [RichModelSummary](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary)
 
 ## ExportOnTrainEnd
 
@@ -34,7 +35,6 @@ Performs export on train end with best weights according to the validation loss.
 ## LuxonisProgressBar
 
 Custom rich text progress bar based on RichProgressBar from Pytorch Lightning.
-Added automatically if `use_rich_text` is set to `True` in [config](../../configs/README.md#topleveloptions).
 
 ## MetadataLogger
 
@@ -51,3 +51,11 @@ Metadata include all defined hyperparameters together with git hashes of `luxoni
 ## TestOnTrainEnd
 
 Callback to perform a test run at the end of the training.
+
+## UploadCheckpoint
+
+Callback that uploads currently best checkpoint (based on validation loss) to specified cloud directory after every validation epoch.
+
+| Key              | Type | Default value | Description                                                                                                                   |
+| ---------------- | ---- | ------------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| upload_directory | str  | /             | Path to cloud directory where checkpoints should be uploaded to. If you want to use current mlflow run set it to `mlflow://`. |
diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py
index 4be94600..95f860a1 100644
--- a/luxonis_train/callbacks/__init__.py
+++ b/luxonis_train/callbacks/__init__.py
@@ -1,32 +1,50 @@
 from lightning.pytorch.callbacks import (
     DeviceStatsMonitor,
     EarlyStopping,
+    GradientAccumulationScheduler,
     LearningRateMonitor,
     ModelCheckpoint,
+    ModelPruning,
     RichModelSummary,
+    StochasticWeightAveraging,
+    Timer,
 )
 
 from luxonis_train.utils.registry import CALLBACKS
 
+from .archive_on_train_end import ArchiveOnTrainEnd
 from .export_on_train_end import ExportOnTrainEnd
-from .luxonis_progress_bar import LuxonisProgressBar
+from .gpu_stats_monitor import GPUStatsMonitor
+from .luxonis_progress_bar import (
+    BaseLuxonisProgressBar,
+    LuxonisRichProgressBar,
+    LuxonisTQDMProgressBar,
+)
 from .metadata_logger import MetadataLogger
 from .module_freezer import ModuleFreezer
 from .test_on_train_end import TestOnTrainEnd
-from .upload_checkpoint_on_train_end import UploadCheckpointOnTrainEnd
+from .upload_checkpoint import UploadCheckpoint
 
 CALLBACKS.register_module(module=EarlyStopping)
 CALLBACKS.register_module(module=LearningRateMonitor)
 CALLBACKS.register_module(module=ModelCheckpoint)
 CALLBACKS.register_module(module=RichModelSummary)
 CALLBACKS.register_module(module=DeviceStatsMonitor)
+CALLBACKS.register_module(module=GradientAccumulationScheduler)
+CALLBACKS.register_module(module=StochasticWeightAveraging)
+CALLBACKS.register_module(module=Timer)
+CALLBACKS.register_module(module=ModelPruning)
 
 
 __all__ = [
+    "ArchiveOnTrainEnd",
     "ExportOnTrainEnd",
-    "LuxonisProgressBar",
+    "LuxonisTQDMProgressBar",
+    "LuxonisRichProgressBar",
+    "BaseLuxonisProgressBar",
     "MetadataLogger",
     "ModuleFreezer",
     "TestOnTrainEnd",
-    "UploadCheckpointOnTrainEnd",
+    "UploadCheckpoint",
+    "GPUStatsMonitor",
 ]
diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py
new file mode 100644
index 00000000..30949e4e
--- /dev/null
+++ b/luxonis_train/callbacks/archive_on_train_end.py
@@ -0,0 +1,42 @@
+import logging
+
+import lightning.pytorch as pl
+
+import luxonis_train
+from luxonis_train.utils.registry import CALLBACKS
+
+from .needs_checkpoint import NeedsCheckpoint
+
+logger = logging.getLogger(__name__)
+
+
+@CALLBACKS.register_module()
+class ArchiveOnTrainEnd(NeedsCheckpoint):
+    def on_train_end(
+        self,
+        _: pl.Trainer,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
+    ) -> None:
+        """Archives the model on train end.
+
+        @type trainer: L{pl.Trainer}
+        @param trainer: Pytorch Lightning trainer.
+        @type pl_module: L{pl.LightningModule}
+        @param pl_module: Pytorch Lightning module.
+        """
+
+        path = self.get_checkpoint(pl_module)
+        if path is None:  # pragma: no cover
+            logger.warning("Skipping model archiving.")
+            return
+
+        onnx_path = pl_module.core._exported_models.get("onnx")
+        if onnx_path is None:  # pragma: no cover
+            logger.error(
+                "Model executable not found. "
+                "Make sure to run exporter callback before archiver callback. "
+                "Skipping model archiving."
+            )
+            return
+
+        pl_module.core.archive(onnx_path)
diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py
index de5fde88..e727e81f 100644
--- a/luxonis_train/callbacks/export_on_train_end.py
+++ b/luxonis_train/callbacks/export_on_train_end.py
@@ -1,63 +1,32 @@
 import logging
-from pathlib import Path
-from typing import cast
 
 import lightning.pytorch as pl
 
-from luxonis_train.utils.config import Config
+import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
-from luxonis_train.utils.tracker import LuxonisTrackerPL
 
+from .needs_checkpoint import NeedsCheckpoint
+
+logger = logging.getLogger(__name__)
 
-@CALLBACKS.register_module()
-class ExportOnTrainEnd(pl.Callback):
-    def __init__(self, upload_to_mlflow: bool = False):
-        """Callback that performs export on train end with best weights according to the
-        validation loss.
-
-        @type upload_to_mlflow: bool
-        @param upload_to_mlflow: If set to True, overrides the upload url in Exporter
-            with currently active MLFlow run (if present).
-        """
-        super().__init__()
-        self.upload_to_mlflow = upload_to_mlflow
 
-    def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
+@CALLBACKS.register_module()
+class ExportOnTrainEnd(NeedsCheckpoint):
+    def on_train_end(
+        self,
+        _: pl.Trainer,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
+    ) -> None:
         """Exports the model on train end.
 
         @type trainer: L{pl.Trainer}
         @param trainer: Pytorch Lightning trainer.
         @type pl_module: L{pl.LightningModule}
         @param pl_module: Pytorch Lightning module.
-        @raises RuntimeError: If no best model path is found.
         """
-        from luxonis_train.core.exporter import Exporter
-
-        model_checkpoint_callbacks = [
-            c
-            for c in trainer.callbacks  # type: ignore
-            if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
-        ]
-        # NOTE: assume that first checkpoint callback is based on val loss
-        best_model_path = model_checkpoint_callbacks[0].best_model_path
-        if not best_model_path:
-            raise RuntimeError(
-                "No best model path found. "
-                "Please make sure that ModelCheckpoint callback is present "
-                "and at least one validation epoch has been performed."
-            )
-        cfg: Config = pl_module.cfg
-        cfg.model.weights = best_model_path
-        if self.upload_to_mlflow:
-            if cfg.tracker.is_mlflow:
-                tracker = cast(LuxonisTrackerPL, trainer.logger)
-                new_upload_directory = f"mlflow://{tracker.project_id}/{tracker.run_id}"
-                cfg.exporter.upload_directory = new_upload_directory
-            else:
-                logging.getLogger(__name__).warning(
-                    "`upload_to_mlflow` is set to True, "
-                    "but there is  no MLFlow active run, skipping."
-                )
-        exporter = Exporter(cfg=cfg)
-        onnx_path = str(Path(best_model_path).parent.with_suffix(".onnx"))
-        exporter.export(onnx_path=onnx_path)
+        path = self.get_checkpoint(pl_module)
+        if path is None:  # pragma: no cover
+            logger.warning("Skipping model export.")
+            return
+
+        pl_module.core.export(weights=self.get_checkpoint(pl_module))
diff --git a/luxonis_train/callbacks/gpu_stats_monitor.py b/luxonis_train/callbacks/gpu_stats_monitor.py
new file mode 100644
index 00000000..a189ed3f
--- /dev/null
+++ b/luxonis_train/callbacks/gpu_stats_monitor.py
@@ -0,0 +1,296 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+GPU Stats Monitor
+=================
+
+Monitor and logs GPU stats during training.
+
+"""
+
+import os
+import shutil
+import subprocess
+import time
+from typing import Any, Dict, List, Optional, Tuple
+
+import pytorch_lightning as pl
+import torch
+from lightning.pytorch.accelerators.cuda import CUDAAccelerator
+from lightning_fabric.utilities.exceptions import (
+    MisconfigurationException,  # noqa: F401
+)
+from pytorch_lightning.utilities import rank_zero_only
+from pytorch_lightning.utilities.parsing import AttributeDict
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+
+from luxonis_train.utils.registry import CALLBACKS
+
+
+@CALLBACKS.register_module()
+class GPUStatsMonitor(pl.Callback):
+    def __init__(
+        self,
+        memory_utilization: bool = True,
+        gpu_utilization: bool = True,
+        intra_step_time: bool = False,
+        inter_step_time: bool = False,
+        fan_speed: bool = False,
+        temperature: bool = False,
+    ):
+        """Automatically monitors and logs GPU stats during training
+        stage. C{GPUStatsMonitor} is a callback and in order to use it
+        you need to assign a logger in the C{Trainer}.
+
+        GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows:
+
+            - C{fan.speed} – The fan speed value is the percent of maximum speed that the device's fan is currently
+              intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed.
+              If the fan is physically blocked and unable to spin, this output will not match the actual fan speed.
+              Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.
+            - C{memory.used} – Total memory allocated by active contexts.
+            - C{memory.free} – Total free memory.
+            - C{utilization.gpu} – Percent of time over the past sample period during which one or more kernels was
+              executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product.
+            - C{utilization.memory} – Percent of time over the past sample period during which global (device) memory was
+              being read or written. The sample period may be between 1 second and 1/6 second depending on the product.
+            - C{temperature.gpu} – Core GPU temperature, in degrees C.
+            - C{temperature.memory} – HBM memory temperature, in degrees C.
+
+        @type memory_utilization: bool
+        @param memory_utilization: Set to C{True} to monitor used, free and percentage of memory utilization at the start and end of each step. Defaults to C{True}.
+        @type gpu_utilization: bool
+        @param gpu_utilization: Set to C{True} to monitor percentage of GPU utilization at the start and end of each step. Defaults to C{True}.
+        @type intra_step_time: bool
+        @param intra_step_time: Set to C{True} to monitor the time of each step. Defaults to {False}.
+        @type inter_step_time: bool
+        @param inter_step_time: Set to C{True} to monitor the time between the end of one step and the start of the next step. Defaults to C{False}.
+        @type fan_speed: bool
+        @param fan_speed: Set to C{True} to monitor percentage of fan speed. Defaults to C{False}.
+        @type temperature: bool
+        @param temperature: Set to C{True} to monitor the memory and gpu temperature in degree Celsius. Defaults to C{False}.
+        @raises MisconfigurationException: If NVIDIA driver is not installed, not running on GPUs, or C{Trainer} has no logger.
+        """
+
+        super().__init__()
+
+        if shutil.which("nvidia-smi") is None:
+            raise MisconfigurationException(
+                "Cannot use GPUStatsMonitor callback because NVIDIA driver is not installed."
+            )
+
+        self._log_stats = AttributeDict(
+            {
+                "memory_utilization": memory_utilization,
+                "gpu_utilization": gpu_utilization,
+                "intra_step_time": intra_step_time,
+                "inter_step_time": inter_step_time,
+                "fan_speed": fan_speed,
+                "temperature": temperature,
+            }
+        )
+
+        # The logical device IDs for selected devices
+        self._device_ids: List[int] = []  # will be assigned later in setup()
+
+        # The unmasked real GPU IDs
+        self._gpu_ids: List[str] = []  # will be assigned later in setup()
+
+    @staticmethod
+    def is_available() -> bool:
+        if shutil.which("nvidia-smi") is None:
+            return False
+        return CUDAAccelerator.is_available()
+
+    def setup(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        stage: Optional[str] = None,
+    ) -> None:
+        if not trainer.logger:
+            raise MisconfigurationException(
+                "Cannot use GPUStatsMonitor callback with Trainer that has no logger."
+            )
+
+        if not CUDAAccelerator.is_available():
+            raise MisconfigurationException(
+                "You are using GPUStatsMonitor teh CUDA Accelerator is not available."
+            )
+
+        # The logical device IDs for selected devices
+        # ignoring mypy check because `trainer.data_parallel_device_ids` is None when using CPU
+        self._device_ids = sorted(set(trainer.device_ids))
+
+        # The unmasked real GPU IDs
+        self._gpu_ids = self._get_gpu_ids(self._device_ids)
+
+    def on_train_epoch_start(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"
+    ) -> None:
+        self._snap_intra_step_time: Optional[float] = None
+        self._snap_inter_step_time: Optional[float] = None
+
+    @rank_zero_only
+    def on_train_batch_start(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        batch: Any,
+        batch_idx: int,
+    ) -> None:
+        if self._log_stats.intra_step_time:
+            self._snap_intra_step_time = time.time()
+
+        if not trainer._logger_connector.should_update_logs:
+            return
+
+        gpu_stat_keys = self._get_gpu_stat_keys()
+        gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
+        logs = self._parse_gpu_stats(
+            self._device_ids, gpu_stats, gpu_stat_keys
+        )
+
+        if self._log_stats.inter_step_time and self._snap_inter_step_time:
+            # First log at beginning of second step
+            logs["batch_time/inter_step (ms)"] = (
+                time.time() - self._snap_inter_step_time
+            ) * 1000
+
+        assert trainer.logger is not None
+        trainer.logger.log_metrics(logs, step=trainer.global_step)
+
+    @rank_zero_only
+    def on_train_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: STEP_OUTPUT,
+        batch: Any,
+        batch_idx: int,
+    ) -> None:
+        if self._log_stats.inter_step_time:
+            self._snap_inter_step_time = time.time()
+
+        if not trainer._logger_connector.should_update_logs:
+            return
+
+        gpu_stat_keys = (
+            self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys()
+        )
+        gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
+        logs = self._parse_gpu_stats(
+            self._device_ids, gpu_stats, gpu_stat_keys
+        )
+
+        if self._log_stats.intra_step_time and self._snap_intra_step_time:
+            logs["batch_time/intra_step (ms)"] = (
+                time.time() - self._snap_intra_step_time
+            ) * 1000
+
+        assert trainer.logger is not None
+        trainer.logger.log_metrics(logs, step=trainer.global_step)
+
+    @staticmethod
+    def _get_gpu_ids(device_ids: List[int]) -> List[str]:
+        """Get the unmasked real GPU IDs."""
+        # All devices if `CUDA_VISIBLE_DEVICES` unset
+        default = ",".join(str(i) for i in range(torch.cuda.device_count()))
+        cuda_visible_devices: List[str] = os.getenv(
+            "CUDA_VISIBLE_DEVICES", default=default
+        ).split(",")
+        return [
+            cuda_visible_devices[device_id].strip() for device_id in device_ids
+        ]
+
+    def _get_gpu_stats(self, queries: List[str]) -> List[List[float]]:
+        if not queries:
+            return []
+
+        """Run nvidia-smi to get the gpu stats"""
+        gpu_query = ",".join(queries)
+        format = "csv,nounits,noheader"
+        gpu_ids = ",".join(self._gpu_ids)
+        result = subprocess.run(
+            [
+                # it's ok to supress the warning here since we ensure nvidia-smi exists during init
+                shutil.which("nvidia-smi"),  # type: ignore
+                f"--query-gpu={gpu_query}",
+                f"--format={format}",
+                f"--id={gpu_ids}",
+            ],
+            encoding="utf-8",
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,  # for backward compatibility with python version 3.6
+            check=True,
+        )
+
+        def _to_float(x: str) -> float:
+            try:
+                return float(x)
+            except ValueError:
+                return 0.0
+
+        stats = [
+            [_to_float(x) for x in s.split(", ")]
+            for s in result.stdout.strip().split(os.linesep)
+        ]
+        return stats
+
+    @staticmethod
+    def _parse_gpu_stats(
+        device_ids: List[int],
+        stats: List[List[float]],
+        keys: List[Tuple[str, str]],
+    ) -> Dict[str, float]:
+        """Parse the gpu stats into a loggable dict."""
+        logs = {}
+        for i, device_id in enumerate(device_ids):
+            for j, (x, unit) in enumerate(keys):
+                if unit == "%":
+                    unit = "percent"
+                logs[f"GPU_{device_id}/{x} - {unit}"] = stats[i][j]
+        return logs
+
+    def _get_gpu_stat_keys(self) -> List[Tuple[str, str]]:
+        """Get the GPU stats keys."""
+        stat_keys = []
+
+        if self._log_stats.gpu_utilization:
+            stat_keys.append(("utilization.gpu", "%"))
+
+        if self._log_stats.memory_utilization:
+            stat_keys.extend(
+                [
+                    ("memory.used", "MB"),
+                    ("memory.free", "MB"),
+                    ("utilization.memory", "%"),
+                ]
+            )
+
+        return stat_keys
+
+    def _get_gpu_device_stat_keys(self) -> List[Tuple[str, str]]:
+        """Get the device stats keys."""
+        stat_keys = []
+
+        if self._log_stats.fan_speed:
+            stat_keys.append(("fan.speed", "%"))
+
+        if self._log_stats.temperature:
+            stat_keys.extend(
+                [("temperature.gpu", "°C"), ("temperature.memory", "°C")]
+            )
+
+        return stat_keys
diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py
index fcc130cd..b8bf6512 100644
--- a/luxonis_train/callbacks/luxonis_progress_bar.py
+++ b/luxonis_train/callbacks/luxonis_progress_bar.py
@@ -1,47 +1,125 @@
+from abc import ABC, abstractmethod
 from collections.abc import Mapping
 
 import lightning.pytorch as pl
-import rich
-from lightning.pytorch.callbacks import RichProgressBar
+import tabulate
+from lightning.pytorch.callbacks import (
+    ProgressBar,
+    RichProgressBar,
+    TQDMProgressBar,
+)
+from rich.console import Console
 from rich.table import Table
 
 from luxonis_train.utils.registry import CALLBACKS
 
 
-@CALLBACKS.register_module()
-class LuxonisProgressBar(RichProgressBar):
-    """Custom rich text progress bar based on RichProgressBar from Pytorch Lightning."""
-
-    _console: rich.console.Console
-
-    def __init__(self):
-        super().__init__(leave=True)
-
-    def print_single_line(self, text: str, style: str = "magenta") -> None:
-        """Prints single line of text to the console."""
-        self._check_console()
-        text = f"[{style}]{text}[/{style}]"
-        self._console.print(text)
-
+class BaseLuxonisProgressBar(ABC, ProgressBar):
     def get_metrics(
         self, trainer: pl.Trainer, pl_module: pl.LightningModule
     ) -> dict[str, int | str | float | dict[str, float]]:
-        # NOTE: there might be a cleaner way of doing this
         items = super().get_metrics(trainer, pl_module)
-        if trainer.training:
+        items.pop("v_num", None)
+        if trainer.training and pl_module.training_step_outputs:
             items["Loss"] = pl_module.training_step_outputs[-1]["loss"].item()
         return items
 
-    def _check_console(self) -> None:
-        """Checks if console is set.
+    @abstractmethod
+    def print_results(
+        self,
+        stage: str,
+        loss: float,
+        metrics: Mapping[str, Mapping[str, int | str | float]],
+    ) -> None:
+        """Prints results to the console.
 
-        @raises RuntimeError: If console is not set.
+        This includes the stage name, loss value, and tables with
+        metrics.
+
+        @type stage: str
+        @param stage: Stage name.
+        @type loss: float
+        @param loss: Loss value.
+        @type metrics: Mapping[str, Mapping[str, int | str | float]]
+        @param metrics: Metrics in format {table_name: table}.
+        """
+        ...
+
+
+@CALLBACKS.register_module()
+class LuxonisTQDMProgressBar(TQDMProgressBar, BaseLuxonisProgressBar):
+    """Custom text progress bar based on TQDMProgressBar from Pytorch
+    Lightning."""
+
+    def __init__(self):
+        super().__init__(leave=True)
+
+    def _rule(self, title: str | None = None) -> None:
+        if title is not None:
+            print(f"------{title}-----")
+        else:
+            print("-----------------")
+
+    def _print_table(
+        self,
+        title: str,
+        table: Mapping[str, int | str | float],
+        key_name: str = "Name",
+        value_name: str = "Value",
+    ) -> None:
+        """Prints table to the console using tabulate.
+
+        @type title: str
+        @param title: Title of the table
+        @type table: Mapping[str, int | str | float]
+        @param table: Table to print
+        @type key_name: str
+        @param key_name: Name of the key column. Defaults to C{"Name"}.
+        @type value_name: str
+        @param value_name: Name of the value column. Defaults to
+            C{"Value"}.
         """
-        if self._console is None:
+        self._rule(title)
+        print(
+            tabulate.tabulate(
+                table.items(),
+                headers=[key_name, value_name],
+                tablefmt="fancy_grid",
+                numalign="right",
+            )
+        )
+        print()
+
+    def print_results(
+        self,
+        stage: str,
+        loss: float,
+        metrics: Mapping[str, Mapping[str, int | str | float]],
+    ) -> None:
+        self._rule(stage)
+        print(f"Loss: {loss}")
+        print("Metrics:")
+        for table_name, table in metrics.items():
+            self._print_table(table_name, table)
+        self._rule()
+
+
+@CALLBACKS.register_module()
+class LuxonisRichProgressBar(RichProgressBar, BaseLuxonisProgressBar):
+    """Custom rich text progress bar based on RichProgressBar from
+    Pytorch Lightning."""
+
+    def __init__(self):
+        super().__init__(leave=True)
+
+    @property
+    def console(self) -> Console:
+        if self._console is None:  # pragma: no cover
             raise RuntimeError(
-                "Console not set. Set `use_rich_text` to `False` "
-                "in your configuration file."
+                "Console is not initialized for the `LuxonisRichProgressBar`. "
+                "Consider setting `tracker.use_rich_progress_bar` to `False` in the configuration."
             )
+        return self._console
 
     def print_table(
         self,
@@ -59,7 +137,8 @@ def print_table(
         @type key_name: str
         @param key_name: Name of the key column. Defaults to C{"Name"}.
         @type value_name: str
-        @param value_name: Name of the value column. Defaults to C{"Value"}.
+        @param value_name: Name of the value column. Defaults to
+            C{"Value"}.
         """
         rich_table = Table(
             title=title,
@@ -69,23 +148,8 @@ def print_table(
         rich_table.add_column(key_name, style="magenta")
         rich_table.add_column(value_name, style="white")
         for name, value in table.items():
-            if isinstance(value, float):
-                rich_table.add_row(name, f"{value:.5f}")
-            else:
-                rich_table.add_row(name, str(value))
-        self._check_console()
-        self._console.print(rich_table)
-
-    def print_tables(
-        self, tables: Mapping[str, Mapping[str, int | str | float]]
-    ) -> None:
-        """Prints multiple tables to the console using rich text.
-
-        @type tables: Mapping[str, Mapping[str, int | str | float]]
-        @param tables: Tables to print in format {table_name: table}.
-        """
-        for table_name, table in tables.items():
-            self.print_table(table_name, table)
+            rich_table.add_row(name, f"{value:.5f}")
+        self.console.print(rich_table)
 
     def print_results(
         self,
@@ -93,19 +157,11 @@ def print_results(
         loss: float,
         metrics: Mapping[str, Mapping[str, int | str | float]],
     ) -> None:
-        """Prints results to the console using rich text.
-
-        @type stage: str
-        @param stage: Stage name.
-        @type loss: float
-        @param loss: Loss value.
-        @type metrics: Mapping[str, Mapping[str, int | str | float]]
-        @param metrics: Metrics in format {table_name: table}.
-        """
-        assert self._console is not None
-
-        self._console.print(f"------{stage}-----", style="bold magenta")
-        self._console.print(f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]")
-        self._console.print("[bold magenta]Metrics:[/bold magenta]")
-        self.print_tables(metrics)
-        self._console.print("---------------", style="bold magenta")
+        self.console.rule(f"{stage}", style="bold magenta")
+        self.console.print(
+            f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]"
+        )
+        self.console.print("[bold magenta]Metrics:[/bold magenta]")
+        for table_name, table in metrics.items():
+            self.print_table(table_name, table)
+        self.console.rule(style="bold magenta")
diff --git a/luxonis_train/callbacks/metadata_logger.py b/luxonis_train/callbacks/metadata_logger.py
index 5ccf542f..ab29f7d0 100644
--- a/luxonis_train/callbacks/metadata_logger.py
+++ b/luxonis_train/callbacks/metadata_logger.py
@@ -5,7 +5,8 @@
 import pkg_resources
 import yaml
 
-from luxonis_train.utils.config import Config
+import luxonis_train
+from luxonis_train.utils import Config
 from luxonis_train.utils.registry import CALLBACKS
 
 
@@ -14,8 +15,9 @@ class MetadataLogger(pl.Callback):
     def __init__(self, hyperparams: list[str]):
         """Callback that logs training metadata.
 
-        Metadata include all defined hyperparameters together with git hashes of
-        luxonis-ml and luxonis-train packages. Also stores this information locally.
+        Metadata include all defined hyperparameters together with git
+        hashes of luxonis-ml and luxonis-train packages. Also stores
+        this information locally.
 
         @type hyperparams: list[str]
         @param hyperparams: List of hyperparameters to log.
@@ -23,28 +25,45 @@ def __init__(self, hyperparams: list[str]):
         super().__init__()
         self.hyperparams = hyperparams
 
-    def on_fit_start(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
+    def on_fit_start(
+        self,
+        _: pl.Trainer,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
+    ) -> None:
         cfg: Config = pl_module.cfg
 
         hparams = {key: cfg.get(key) for key in self.hyperparams}
 
-        # try to get luxonis-ml and luxonis-train git commit hashes (if installed as editable)
         luxonis_ml_hash = self._get_editable_package_git_hash("luxonis_ml")
-        if luxonis_ml_hash:
+        if luxonis_ml_hash:  # pragma: no cover
             hparams["luxonis_ml"] = luxonis_ml_hash
 
-        luxonis_train_hash = self._get_editable_package_git_hash("luxonis_train")
-        if luxonis_train_hash:
+        luxonis_train_hash = self._get_editable_package_git_hash(
+            "luxonis_train"
+        )
+        if luxonis_train_hash:  # pragma: no cover
             hparams["luxonis_train"] = luxonis_train_hash
 
-        trainer.logger.log_hyperparams(hparams)  # type: ignore
-        # also save metadata locally
-        with open(osp.join(pl_module.save_dir, "metadata.yaml"), "w+") as f:
+        pl_module.logger.log_hyperparams(hparams)
+        with open(osp.join(pl_module.save_dir, "metadata.yaml"), "w") as f:
             yaml.dump(hparams, f, default_flow_style=False)
 
-    def _get_editable_package_git_hash(self, package_name: str) -> str | None:
+    @staticmethod
+    def _get_editable_package_git_hash(
+        package_name: str,
+    ) -> str | None:  # pragma: no cover
+        """Get git hash of an editable package.
+
+        @type package_name: str
+        @param package_name: Name of the package.
+        @rtype: str or None
+        @return: Git hash of the package or None if the package is not
+            installed in editable mode.
+        """
         try:
             distribution = pkg_resources.get_distribution(package_name)
+            if distribution.location is None:
+                return None
             package_location = osp.join(distribution.location, package_name)
 
             # remove any additional folders in path (e.g. "/src")
diff --git a/luxonis_train/callbacks/module_freezer.py b/luxonis_train/callbacks/module_freezer.py
index 4f73ff30..de0afa99 100644
--- a/luxonis_train/callbacks/module_freezer.py
+++ b/luxonis_train/callbacks/module_freezer.py
@@ -13,7 +13,8 @@ def __init__(self, frozen_modules: list[tuple[nn.Module, int]]):
         """Callback that freezes parts of the model.
 
         @type frozen_modules: list[tuple[nn.Module, int]]
-        @param frozen_modules: List of tuples of modules and epochs to freeze until.
+        @param frozen_modules: List of tuples of modules and epochs to
+            freeze until.
         """
         super().__init__()
         self.frozen_modules = frozen_modules
diff --git a/luxonis_train/callbacks/needs_checkpoint.py b/luxonis_train/callbacks/needs_checkpoint.py
new file mode 100644
index 00000000..b3de6aed
--- /dev/null
+++ b/luxonis_train/callbacks/needs_checkpoint.py
@@ -0,0 +1,59 @@
+import logging
+from typing import Literal
+
+import lightning.pytorch as pl
+
+import luxonis_train
+
+logger = logging.getLogger(__name__)
+
+
+class NeedsCheckpoint(pl.Callback):
+    def __init__(
+        self,
+        preferred_checkpoint: Literal["metric", "loss"] = "metric",
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.preferred_checkpoint = preferred_checkpoint
+
+    @staticmethod
+    def _get_checkpoint(
+        checkpoint_type: str,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
+    ) -> str | None:
+        if checkpoint_type == "loss":
+            path = pl_module.core.get_min_loss_checkpoint_path()
+            if not path:
+                logger.error(
+                    "No checkpoint for minimum loss found. "
+                    "Make sure that `ModelCheckpoint` callback is present "
+                    "and at least one validation epoch has been performed."
+                )
+            return path
+        else:
+            path = pl_module.core.get_best_metric_checkpoint_path()
+            if not path:
+                logger.error(
+                    "No checkpoint for best metric found. "
+                    "Make sure that `ModelCheckpoint` callback is present, "
+                    "at least one validation epoch has been performed and "
+                    "the model has at least one metric."
+                )
+            return path
+
+    @staticmethod
+    def _get_other_type(checkpoint_type: str) -> str:
+        if checkpoint_type == "loss":
+            return "metric"
+        return "loss"
+
+    def get_checkpoint(
+        self, pl_module: "luxonis_train.models.LuxonisLightningModule"
+    ) -> str | None:
+        path = self._get_checkpoint(self.preferred_checkpoint, pl_module)
+        if path is not None:
+            return path
+        other_checkpoint = self._get_other_type(self.preferred_checkpoint)
+        logger.info(f"Attempting to use {other_checkpoint} checkpoint.")
+        return self._get_checkpoint(other_checkpoint, pl_module)
diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py
index 8cf23e3c..a60a16dd 100644
--- a/luxonis_train/callbacks/test_on_train_end.py
+++ b/luxonis_train/callbacks/test_on_train_end.py
@@ -1,9 +1,7 @@
 import lightning.pytorch as pl
-from luxonis_ml.data import LuxonisDataset, ValAugmentations
-from torch.utils.data import DataLoader
+from lightning.pytorch.callbacks import ModelCheckpoint
 
-from luxonis_train.utils.config import Config
-from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
+import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
 
 
@@ -11,33 +9,24 @@
 class TestOnTrainEnd(pl.Callback):
     """Callback to perform a test run at the end of the training."""
 
-    def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
-        cfg: Config = pl_module.cfg
+    def on_train_end(
+        self,
+        trainer: pl.Trainer,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
+    ) -> None:
+        # `trainer.test` would delete the paths so we need to save them
+        best_paths = {
+            hash(callback.monitor): callback.best_model_path
+            for callback in trainer.callbacks  # type: ignore
+            if isinstance(callback, ModelCheckpoint)
+        }
 
-        dataset = LuxonisDataset(
-            dataset_name=cfg.dataset.name,
-            team_id=cfg.dataset.team_id,
-            dataset_id=cfg.dataset.id,
-            bucket_type=cfg.dataset.bucket_type,
-            bucket_storage=cfg.dataset.bucket_storage,
-        )
+        trainer.test(pl_module, pl_module.core.pytorch_loaders["test"])
 
-        loader_test = LuxonisLoaderTorch(
-            dataset,
-            view=cfg.dataset.test_view,
-            augmentations=ValAugmentations(
-                image_size=cfg.trainer.preprocessing.train_image_size,
-                augmentations=[
-                    i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-                ],
-                train_rgb=cfg.trainer.preprocessing.train_rgb,
-                keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-            ),
-        )
-        pytorch_loader_test = DataLoader(
-            loader_test,
-            batch_size=cfg.trainer.batch_size,
-            num_workers=cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-        )
-        trainer.test(pl_module, pytorch_loader_test)
+        # Restore the paths
+        for callback in trainer.callbacks:  # type: ignore
+            if isinstance(callback, ModelCheckpoint):
+                if hash(callback.monitor) in best_paths:
+                    callback.best_model_path = best_paths[
+                        hash(callback.monitor)
+                    ]
diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py
new file mode 100644
index 00000000..b9753e94
--- /dev/null
+++ b/luxonis_train/callbacks/upload_checkpoint.py
@@ -0,0 +1,59 @@
+import logging
+import os
+from pathlib import Path
+from typing import Any
+
+import lightning.pytorch as pl
+import torch
+
+import luxonis_train
+from luxonis_train.utils.registry import CALLBACKS
+
+
+@CALLBACKS.register_module()
+class UploadCheckpoint(pl.Callback):
+    """Callback that uploads best checkpoint based on the validation
+    loss."""
+
+    def __init__(self):
+        """Constructs `UploadCheckpoint`.
+
+        @type upload_directory: str
+        @param upload_directory: Path used as upload directory
+        """
+        super().__init__()
+        self.logger = logging.getLogger(__name__)
+        self.last_logged_epoch = None
+        self.last_best_checkpoints = set()
+
+    def on_save_checkpoint(
+        self,
+        trainer: pl.Trainer,
+        module: "luxonis_train.models.LuxonisLightningModule",
+        checkpoint: dict[str, Any],
+    ) -> None:
+        # Log only once per epoch in case there are multiple ModelCheckpoint callbacks
+        if not self.last_logged_epoch == trainer.current_epoch:
+            checkpoint_paths = [
+                c.best_model_path
+                for c in trainer.callbacks  # type: ignore
+                if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
+                and c.best_model_path
+            ]
+            for curr_best_checkpoint in checkpoint_paths:
+                if curr_best_checkpoint not in self.last_best_checkpoints:
+                    self.logger.info("Uploading checkpoint...")
+                    temp_filename = (
+                        Path(curr_best_checkpoint)
+                        .parent.with_suffix(".ckpt")
+                        .name
+                    )
+                    torch.save(checkpoint, temp_filename)
+                    module.logger.upload_artifact(temp_filename, typ="weights")
+
+                    os.remove(temp_filename)
+
+                    self.logger.info("Checkpoint upload finished")
+                    self.last_best_checkpoints.add(curr_best_checkpoint)
+
+            self.last_logged_epoch = trainer.current_epoch
diff --git a/luxonis_train/callbacks/upload_checkpoint_on_train_end.py b/luxonis_train/callbacks/upload_checkpoint_on_train_end.py
deleted file mode 100644
index 86879ec9..00000000
--- a/luxonis_train/callbacks/upload_checkpoint_on_train_end.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import logging
-
-import lightning.pytorch as pl
-from luxonis_ml.utils.filesystem import LuxonisFileSystem
-
-from luxonis_train.utils.registry import CALLBACKS
-
-
-@CALLBACKS.register_module()
-class UploadCheckpointOnTrainEnd(pl.Callback):
-    """Callback that uploads best checkpoint based on the validation loss."""
-
-    def __init__(self, upload_directory: str):
-        """Constructs `UploadCheckpointOnTrainEnd`.
-
-        @type upload_directory: str
-        @param upload_directory: Path used as upload directory
-        """
-        super().__init__()
-        self.fs = LuxonisFileSystem(
-            upload_directory, allow_active_mlflow_run=True, allow_local=False
-        )
-
-    def on_train_end(self, trainer: pl.Trainer, _: pl.LightningModule) -> None:
-        logger = logging.getLogger(__name__)
-        logger.info(f"Started checkpoint upload to {self.fs.full_path()}...")
-        model_checkpoint_callbacks = [
-            c
-            for c in trainer.callbacks  # type: ignore
-            if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
-        ]
-        # NOTE: assume that first checkpoint callback is based on val loss
-        local_path = model_checkpoint_callbacks[0].best_model_path
-        self.fs.put_file(
-            local_path=local_path,
-            remote_path=local_path.split("/")[-1],
-            mlflow_instance=trainer.logger.experiment.get(  # type: ignore
-                "mlflow", None
-            ),
-        )
-        logger.info("Checkpoint upload finished")
diff --git a/luxonis_train/core/__init__.py b/luxonis_train/core/__init__.py
index 6264473b..6d468af2 100644
--- a/luxonis_train/core/__init__.py
+++ b/luxonis_train/core/__init__.py
@@ -1,6 +1,3 @@
-from .exporter import Exporter
-from .inferer import Inferer
-from .trainer import Trainer
-from .tuner import Tuner
+from .core import LuxonisModel
 
-__all__ = ["Exporter", "Trainer", "Tuner", "Inferer"]
+__all__ = ["LuxonisModel"]
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 75bd1d2a..cffa3ff1 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -1,35 +1,55 @@
-import os
 import os.path as osp
+import signal
+import threading
 from logging import getLogger
-from typing import Any
+from pathlib import Path
+from typing import Any, Literal, Mapping, overload
 
 import lightning.pytorch as pl
 import lightning_utilities.core.rank_zero as rank_zero_module
 import rich.traceback
 import torch
-from lightning.pytorch.utilities import rank_zero_only  # type: ignore
-from luxonis_ml.data import LuxonisDataset, TrainAugmentations, ValAugmentations
-from luxonis_ml.utils import reset_logging, setup_logging
-
-from luxonis_train.callbacks import LuxonisProgressBar
-from luxonis_train.utils.config import Config
-from luxonis_train.utils.general import DatasetMetadata
-from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
-from luxonis_train.utils.tracker import LuxonisTrackerPL
+import torch.utils.data as torch_data
+import yaml
+from lightning.pytorch.utilities import rank_zero_only
+from luxonis_ml.data import Augmentations
+from luxonis_ml.nn_archive import ArchiveGenerator
+from luxonis_ml.nn_archive.config import CONFIG_VERSION
+from luxonis_ml.utils import LuxonisFileSystem, reset_logging, setup_logging
+from typeguard import typechecked
+
+from luxonis_train.attached_modules.visualizers import get_unnormalized_images
+from luxonis_train.callbacks import (
+    LuxonisRichProgressBar,
+    LuxonisTQDMProgressBar,
+)
+from luxonis_train.loaders import BaseLoaderTorch, collate_fn
+from luxonis_train.models import LuxonisLightningModule
+from luxonis_train.utils import Config, DatasetMetadata, LuxonisTrackerPL
+from luxonis_train.utils.registry import LOADERS
+
+from .utils.export_utils import (
+    blobconverter_export,
+    get_preprocessing,
+    replace_weights,
+    try_onnx_simplify,
+)
+from .utils.infer_utils import render_visualizations
+from .utils.train_utils import create_trainer
 
 logger = getLogger(__name__)
 
 
-class Core:
+class LuxonisModel:
     """Common logic of the core components.
 
-    This class contains common logic of the core components (trainer, evaluator,
-    exporter, etc.).
+    This class contains common logic of the core components (trainer,
+    evaluator, exporter, etc.).
     """
 
     def __init__(
         self,
-        cfg: str | dict[str, Any] | Config,
+        cfg: str | dict[str, Any] | Config | None,
         opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
     ):
         """Constructs a new Core instance.
@@ -44,171 +64,670 @@ def __init__(
         @param opts: Argument dict provided through command line, used for config overriding
         """
 
-        overrides = {}
-        if opts:
-            if isinstance(opts, dict):
-                overrides = opts
-            else:
-                if len(opts) % 2 != 0:
-                    raise ValueError(
-                        "Override options should be a list of key-value pairs"
-                    )
-
-                # NOTE: has to be done like this for torchx to work
-                for i in range(0, len(opts), 2):
-                    overrides[opts[i]] = opts[i + 1]
-
         if isinstance(cfg, Config):
             self.cfg = cfg
         else:
-            self.cfg = Config.get_config(cfg, overrides)
-
-        opts = opts or []
-
-        if self.cfg.use_rich_text:
-            rich.traceback.install(suppress=[pl, torch])
+            self.cfg = Config.get_config(cfg, opts)
 
-        self.rank = rank_zero_only.rank
+        rich.traceback.install(suppress=[pl, torch], show_locals=False)
 
         self.tracker = LuxonisTrackerPL(
-            rank=self.rank,
+            rank=rank_zero_only.rank,
             mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
+            _auto_finalize=False,
             **self.cfg.tracker.model_dump(),
         )
 
-        self.run_save_dir = os.path.join(
+        self.run_save_dir = osp.join(
             self.cfg.tracker.save_directory, self.tracker.run_name
         )
+        self.log_file = osp.join(self.run_save_dir, "luxonis_train.log")
+        self.error_message = None
+
         # NOTE: to add the file handler (we only get the save dir now,
         # but we want to use the logger before)
         reset_logging()
-        setup_logging(
-            use_rich=self.cfg.use_rich_text,
-            file=osp.join(self.run_save_dir, "luxonis_train.log"),
-        )
+        setup_logging(file=self.log_file, use_rich=True)
 
         # NOTE: overriding logger in pl so it uses our logger to log device info
         rank_zero_module.log = logger
 
-        self.train_augmentations = TrainAugmentations(
+        if self.cfg.trainer.seed is not None:
+            pl.seed_everything(self.cfg.trainer.seed, workers=True)
+
+        self.pl_trainer = create_trainer(
+            self.cfg.trainer,
+            logger=self.tracker,
+            callbacks=LuxonisRichProgressBar()
+            if self.cfg.trainer.use_rich_progress_bar
+            else LuxonisTQDMProgressBar(),
+        )
+
+        self.train_augmentations = Augmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
-                i.model_dump() for i in self.cfg.trainer.preprocessing.augmentations
+                i.model_dump()
+                for i in self.cfg.trainer.preprocessing.get_active_augmentations()
             ],
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
         )
-        self.val_augmentations = ValAugmentations(
+        self.val_augmentations = Augmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
-                i.model_dump() for i in self.cfg.trainer.preprocessing.augmentations
+                i.model_dump()
+                for i in self.cfg.trainer.preprocessing.get_active_augmentations()
             ],
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
+            only_normalize=True,
         )
 
-        self.pl_trainer = pl.Trainer(
-            accelerator=self.cfg.trainer.accelerator,
-            devices=self.cfg.trainer.devices,
-            strategy=self.cfg.trainer.strategy,
-            logger=self.tracker,  # type: ignore
-            max_epochs=self.cfg.trainer.epochs,
-            accumulate_grad_batches=self.cfg.trainer.accumulate_grad_batches,
-            check_val_every_n_epoch=self.cfg.trainer.validation_interval,
-            num_sanity_val_steps=self.cfg.trainer.num_sanity_val_steps,
-            profiler=self.cfg.trainer.profiler,  # for debugging purposes,
-            # NOTE: this is likely PL bug,
-            # should be configurable inside configure_callbacks(),
-            callbacks=LuxonisProgressBar() if self.cfg.use_rich_text else None,
+        self.loaders: dict[str, BaseLoaderTorch] = {}
+        for view in ["train", "val", "test"]:
+            loader_name = self.cfg.loader.name
+            Loader = LOADERS.get(loader_name)
+            if loader_name == "LuxonisLoaderTorch" and view != "train":
+                self.cfg.loader.params["delete_existing"] = False
+
+            self.loaders[view] = Loader(
+                augmentations=(
+                    self.train_augmentations
+                    if view == "train"
+                    else self.val_augmentations
+                ),
+                view={
+                    "train": self.cfg.loader.train_view,
+                    "val": self.cfg.loader.val_view,
+                    "test": self.cfg.loader.test_view,
+                }[view],
+                image_source=self.cfg.loader.image_source,
+                **self.cfg.loader.params,
+            )
+
+        for name, loader in self.loaders.items():
+            logger.info(
+                f"{name.capitalize()} loader - splits: {loader.view}, size: {len(loader)}"
+            )
+            if len(loader) == 0:
+                logger.warning(f"{name.capitalize()} loader is empty!")
+
+        sampler = None
+        # TODO: implement weighted sampler
+        if self.cfg.trainer.use_weighted_sampler:
+            raise NotImplementedError(
+                "Weighted sampler is not implemented yet."
+            )
+
+        self.pytorch_loaders = {
+            view: torch_data.DataLoader(
+                self.loaders[view],
+                batch_size=self.cfg.trainer.batch_size,
+                num_workers=self.cfg.trainer.n_workers,
+                collate_fn=collate_fn,
+                shuffle=view == "train",
+                drop_last=(
+                    self.cfg.trainer.skip_last_batch
+                    if view == "train"
+                    else False
+                ),
+                pin_memory=self.cfg.trainer.pin_memory,
+                sampler=sampler if view == "train" else None,
+            )
+            for view in ["train", "val", "test"]
+        }
+
+        self.dataset_metadata = DatasetMetadata.from_loader(
+            self.loaders["train"]
         )
-        self.dataset = LuxonisDataset(
-            dataset_name=self.cfg.dataset.name,
-            team_id=self.cfg.dataset.team_id,
-            dataset_id=self.cfg.dataset.id,
-            bucket_type=self.cfg.dataset.bucket_type,
-            bucket_storage=self.cfg.dataset.bucket_storage,
+
+        self.cfg.save_data(osp.join(self.run_save_dir, "config.yaml"))
+
+        self.input_shapes = self.loaders["train"].input_shapes
+
+        self.lightning_module = LuxonisLightningModule(
+            cfg=self.cfg,
+            dataset_metadata=self.dataset_metadata,
+            save_dir=self.run_save_dir,
+            input_shapes=self.input_shapes,
+            _core=self,
         )
 
-        self.loader_train = LuxonisLoaderTorch(
-            self.dataset,
-            view=self.cfg.dataset.train_view,
-            augmentations=self.train_augmentations,
+        self._exported_models: dict[str, Path] = {}
+
+    def _train(self, resume: str | None, *args, **kwargs):
+        status = "success"
+        try:
+            self.pl_trainer.fit(*args, ckpt_path=resume, **kwargs)
+        except Exception as e:  # pragma: no cover
+            logger.exception("Encountered an exception during training.")
+            status = "failed"
+            raise e
+        finally:
+            self.tracker.upload_artifact(self.log_file, typ="logs")
+            self.tracker._finalize(status)
+
+    def train(
+        self, new_thread: bool = False, resume_weights: str | None = None
+    ) -> None:
+        """Runs training.
+
+        @type new_thread: bool
+        @param new_thread: Runs training in new thread if set to True.
+        @type resume_weights: str | None
+        @param resume_weights: Path to the checkpoint from which to to
+            resume the training.
+        """
+
+        if self.cfg.trainer.matmul_precision is not None:
+            logger.info(
+                f"Setting matmul precision to {self.cfg.trainer.matmul_precision}"
+            )
+            torch.set_float32_matmul_precision(
+                self.cfg.trainer.matmul_precision
+            )
+
+        if resume_weights is not None:
+            resume_weights = str(
+                LuxonisFileSystem.download(resume_weights, self.run_save_dir)
+            )
+
+        def graceful_exit(signum: int, _):  # pragma: no cover
+            logger.info(
+                f"{signal.Signals(signum).name} received, stopping training..."
+            )
+            ckpt_path = osp.join(self.run_save_dir, "resume.ckpt")
+            self.pl_trainer.save_checkpoint(ckpt_path)
+            self.tracker.upload_artifact(
+                ckpt_path, typ="checkpoints", name="resume.ckpt"
+            )
+            self.tracker._finalize(status="failed")
+            exit()
+
+        signal.signal(signal.SIGTERM, graceful_exit)
+
+        if not new_thread:
+            logger.info(f"Checkpoints will be saved in: {self.run_save_dir}")
+            logger.info("Starting training...")
+            self._train(
+                resume_weights,
+                self.lightning_module,
+                self.pytorch_loaders["train"],
+                self.pytorch_loaders["val"],
+            )
+            logger.info("Training finished")
+            logger.info(f"Checkpoints saved in: {self.run_save_dir}")
+
+        else:  # pragma: no cover
+            # Every time exception happens in the Thread, this hook will activate
+            def thread_exception_hook(args):
+                self.error_message = str(args.exc_value)
+
+            threading.excepthook = thread_exception_hook
+
+            self.thread = threading.Thread(
+                target=self._train,
+                args=(
+                    resume_weights,
+                    self.lightning_module,
+                    self.pytorch_loaders["train"],
+                    self.pytorch_loaders["val"],
+                ),
+                daemon=True,
+            )
+            self.thread.start()
+
+    def export(
+        self,
+        onnx_save_path: str | None = None,
+        *,
+        weights: str | Path | None = None,
+    ) -> None:
+        """Runs export.
+
+        @type onnx_path: str | None
+        @param onnx_path: Path to .onnx model. If not specified, model will be saved
+            to export directory with name specified in config file.
+
+        @raises RuntimeError: If `onnxsim` fails to simplify the model.
+        """
+
+        weights = weights or self.cfg.model.weights
+
+        if weights is None:
+            logger.warning(
+                "No model weights specified. Exporting model without weights."
+            )
+
+        export_save_dir = Path(self.run_save_dir, "export")
+        export_save_dir.mkdir(parents=True, exist_ok=True)
+
+        export_path = export_save_dir / (
+            self.cfg.exporter.name or self.cfg.model.name
         )
-        self.loader_val = LuxonisLoaderTorch(
-            self.dataset,
-            view=self.cfg.dataset.val_view,
-            augmentations=self.val_augmentations,
+        onnx_save_path = onnx_save_path or str(
+            export_path.with_suffix(".onnx")
         )
-        self.loader_test = LuxonisLoaderTorch(
-            self.dataset,
-            view=self.cfg.dataset.test_view,
-            augmentations=self.val_augmentations,
+
+        with replace_weights(self.lightning_module, weights):
+            output_names = self.lightning_module.export_onnx(
+                onnx_save_path, **self.cfg.exporter.onnx.model_dump()
+            )
+
+        try_onnx_simplify(onnx_save_path)
+        self._exported_models["onnx"] = Path(onnx_save_path)
+
+        scale_values, mean_values, reverse_channels = get_preprocessing(
+            self.cfg
         )
 
-        self.pytorch_loader_val = torch.utils.data.DataLoader(
-            self.loader_val,
-            batch_size=self.cfg.trainer.batch_size,
-            num_workers=self.cfg.trainer.num_workers,
-            collate_fn=collate_fn,
+        if self.cfg.exporter.blobconverter.active:
+            try:
+                blobconverter_export(
+                    self.cfg.exporter,
+                    scale_values,
+                    mean_values,
+                    reverse_channels,
+                    str(export_save_dir),
+                    onnx_save_path,
+                )
+                self._exported_models["blob"] = export_path.with_suffix(
+                    ".blob"
+                )
+            except ImportError:
+                logger.error("Failed to import `blobconverter`")
+                logger.warning(
+                    "`blobconverter` not installed. Skipping .blob model conversion. "
+                    "Ensure `blobconverter` is installed in your environment."
+                )
+
+        if len(self.input_shapes) > 1:
+            logger.error(
+                "Generating modelconverter config for a model "
+                "with multiple inputs is not implemented yet."
+            )
+            return
+
+        modelconverter_config = {
+            "input_model": onnx_save_path,
+            "scale_values": scale_values,
+            "mean_values": mean_values,
+            "reverse_input_channels": reverse_channels,
+            "shape": [1, *next(iter(self.input_shapes.values()))],
+            "outputs": [{"name": name} for name in output_names],
+        }
+
+        for path in self._exported_models.values():
+            if self.cfg.exporter.upload_to_run:
+                self.tracker.upload_artifact(path, typ="export")
+            if self.cfg.exporter.upload_url is not None:  # pragma: no cover
+                LuxonisFileSystem.upload(path, self.cfg.exporter.upload_url)
+
+        with open(export_path.with_suffix(".yaml"), "w") as f:
+            yaml.dump(modelconverter_config, f)
+            if self.cfg.exporter.upload_to_run:
+                self.tracker.upload_artifact(f.name, name=f.name, typ="export")
+            if self.cfg.exporter.upload_url is not None:  # pragma: no cover
+                LuxonisFileSystem.upload(f.name, self.cfg.exporter.upload_url)
+
+    @overload
+    def test(
+        self,
+        new_thread: Literal[False] = ...,
+        view: Literal["train", "test", "val"] = "val",
+    ) -> Mapping[str, float]: ...
+
+    @overload
+    def test(
+        self,
+        new_thread: Literal[True] = ...,
+        view: Literal["train", "test", "val"] = "val",
+    ) -> None: ...
+
+    @typechecked
+    def test(
+        self,
+        new_thread: bool = False,
+        view: Literal["train", "val", "test"] = "val",
+    ) -> Mapping[str, float] | None:
+        """Runs testing.
+
+        @type new_thread: bool
+        @param new_thread: Runs testing in a new thread if set to True.
+        @type view: Literal["train", "test", "val"]
+        @param view: Which view to run the testing on. Defauls to "val".
+        @rtype: Mapping[str, float] | None
+        @return: If new_thread is False, returns a dictionary test
+            results.
+        """
+
+        loader = self.pytorch_loaders[view]
+
+        if not new_thread:
+            return self.pl_trainer.test(self.lightning_module, loader)[0]
+        else:  # pragma: no cover
+            self.thread = threading.Thread(
+                target=self.pl_trainer.test,
+                args=(self.lightning_module, loader),
+                daemon=True,
+            )
+            self.thread.start()
+
+    @typechecked
+    def infer(
+        self,
+        view: Literal["train", "val", "test"] = "val",
+        save_dir: str | Path | None = None,
+    ) -> None:
+        """Runs inference.
+
+        @type view: str
+        @param view: Which split to run the inference on. Valid values
+            are: 'train', 'val', 'test'. Defaults to "val".
+        @type save_dir: str | Path | None
+        @param save_dir: Directory where to save the visualizations. If
+            not specified, visualizations will be rendered on the
+            screen.
+        """
+        self.lightning_module.eval()
+
+        for inputs, labels in self.pytorch_loaders[view]:
+            images = get_unnormalized_images(self.cfg, inputs)
+            outputs = self.lightning_module.forward(
+                inputs, labels, images=images, compute_visualizations=True
+            )
+            render_visualizations(outputs.visualizations, save_dir)
+
+    def tune(self) -> None:
+        """Runs Optuna tunning of hyperparameters."""
+        import optuna
+        from optuna.integration import PyTorchLightningPruningCallback
+
+        from .utils.tune_utils import get_trial_params
+
+        def _objective(trial: optuna.trial.Trial) -> float:
+            """Objective function used to optimize Optuna study."""
+            cfg_tracker = self.cfg.tracker
+            tracker_params = cfg_tracker.model_dump()
+            child_tracker = LuxonisTrackerPL(
+                rank=rank_zero_only.rank,
+                mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
+                is_sweep=True,
+                **tracker_params,
+            )
+
+            run_save_dir = osp.join(
+                cfg_tracker.save_directory, child_tracker.run_name
+            )
+
+            assert self.cfg.tuner is not None
+            curr_params = get_trial_params(
+                all_augs, self.cfg.tuner.params, trial
+            )
+            curr_params["model.predefined_model"] = None
+
+            cfg_copy = self.cfg.model_copy(deep=True)
+            # manually remove Normalize so it doesn't
+            # get duplicated when creating new cfg instance
+            cfg_copy.trainer.preprocessing.augmentations = [
+                a
+                for a in cfg_copy.trainer.preprocessing.augmentations
+                if a.name != "Normalize"
+            ]
+            cfg = Config.get_config(cfg_copy.model_dump(), curr_params)
+
+            child_tracker.log_hyperparams(curr_params)
+
+            cfg.save_data(osp.join(run_save_dir, "config.yaml"))
+
+            lightning_module = LuxonisLightningModule(
+                cfg=cfg,
+                dataset_metadata=self.dataset_metadata,
+                save_dir=run_save_dir,
+                input_shapes=self.loaders["train"].input_shapes,
+                _core=self,
+            )
+            callbacks = [
+                LuxonisRichProgressBar()
+                if cfg.trainer.use_rich_progress_bar
+                else LuxonisTQDMProgressBar()
+            ]
+
+            pruner_callback = PyTorchLightningPruningCallback(
+                trial, monitor="val/loss"
+            )
+            callbacks.append(pruner_callback)
+
+            if self.cfg.trainer.seed is not None:
+                pl.seed_everything(cfg.trainer.seed, workers=True)
+
+            pl_trainer = create_trainer(
+                cfg.trainer, logger=child_tracker, callbacks=callbacks
+            )
+
+            try:
+                pl_trainer.fit(
+                    lightning_module,  # type: ignore
+                    self.pytorch_loaders["train"],
+                    self.pytorch_loaders["val"],
+                )
+                pruner_callback.check_pruned()
+
+            # Pruning is done by raising an error
+            except optuna.TrialPruned as e:
+                logger.info(e)
+
+            if (
+                "val/loss" not in pl_trainer.callback_metrics
+            ):  # pragma: no cover
+                raise ValueError(
+                    "No validation loss found. "
+                    "This can happen if `TestOnTrainEnd` callback is used."
+                )
+
+            return pl_trainer.callback_metrics["val/loss"].item()
+
+        cfg_tuner = self.cfg.tuner
+        if cfg_tuner is None:
+            raise ValueError(
+                "You have to specify the `tuner` section in config."
+            )
+
+        all_augs = [
+            a.name for a in self.cfg.trainer.preprocessing.augmentations
+        ]
+        rank = rank_zero_only.rank
+        cfg_tracker = self.cfg.tracker
+        tracker_params = cfg_tracker.model_dump()
+        # NOTE: wandb doesn't allow multiple concurrent runs, handle this separately
+        tracker_params["is_wandb"] = False
+        self.parent_tracker = LuxonisTrackerPL(
+            rank=rank,
+            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
+            is_sweep=False,
+            **tracker_params,
         )
-        self.pytorch_loader_test = torch.utils.data.DataLoader(
-            self.loader_test,
-            batch_size=self.cfg.trainer.batch_size,
-            num_workers=self.cfg.trainer.num_workers,
-            collate_fn=collate_fn,
+        if self.parent_tracker.is_mlflow:  # pragma: no cover
+            # Experiment needs to be interacted with to create actual MLFlow run
+            self.parent_tracker.experiment["mlflow"].active_run()
+
+        logger.info("Starting tuning...")
+
+        pruner = (
+            optuna.pruners.MedianPruner()
+            if cfg_tuner.use_pruner
+            else optuna.pruners.NopPruner()
         )
-        sampler = None
-        if self.cfg.trainer.use_weighted_sampler:
-            classes_count = self.dataset.get_classes()[1]
-            if len(classes_count) == 0:
-                logger.warning(
-                    "WeightedRandomSampler only available for classification tasks. Using default sampler instead."
+
+        storage = None
+        if cfg_tuner.storage.active:
+            if cfg_tuner.storage.storage_type == "local":
+                storage = "sqlite:///study_local.db"
+            else:  # pragma: no cover
+                storage = "postgresql://{}:{}@{}:{}/{}".format(
+                    self.cfg.ENVIRON.POSTGRES_USER,
+                    self.cfg.ENVIRON.POSTGRES_PASSWORD,
+                    self.cfg.ENVIRON.POSTGRES_HOST,
+                    self.cfg.ENVIRON.POSTGRES_PORT,
+                    self.cfg.ENVIRON.POSTGRES_DB,
                 )
-            else:
-                weights = [1 / i for i in classes_count.values()]
-                num_samples = sum(classes_count.values())
-                sampler = torch.utils.data.WeightedRandomSampler(weights, num_samples)
-
-        self.pytorch_loader_train = torch.utils.data.DataLoader(
-            self.loader_train,
-            shuffle=True,
-            batch_size=self.cfg.trainer.batch_size,
-            num_workers=self.cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-            drop_last=self.cfg.trainer.skip_last_batch,
-            sampler=sampler,
+
+        study = optuna.create_study(
+            study_name=cfg_tuner.study_name,
+            storage=storage,
+            direction="minimize",
+            pruner=pruner,
+            load_if_exists=cfg_tuner.continue_existing_study,
         )
-        self.error_message = None
 
-        self.dataset_metadata = DatasetMetadata.from_dataset(self.dataset)
-        self.dataset_metadata.set_loader(self.pytorch_loader_train)
+        study.optimize(
+            _objective, n_trials=cfg_tuner.n_trials, timeout=cfg_tuner.timeout
+        )
+
+        logger.info(f"Best study parameters: {study.best_params}")
+
+        self.parent_tracker.log_hyperparams(study.best_params)
+
+        if self.cfg.tracker.is_wandb:  # pragma: no cover
+            # If wandb used then init parent tracker separately at the end
+            wandb_parent_tracker = LuxonisTrackerPL(
+                rank=rank_zero_only.rank,
+                **(
+                    self.cfg.tracker.model_dump()
+                    | {"run_name": self.parent_tracker.run_name}
+                ),
+            )
+            wandb_parent_tracker.log_hyperparams(study.best_params)
+
+    def archive(self, path: str | Path | None = None) -> Path:
+        """Generates an NN Archive out of a model executable.
+
+        @type path: str | Path | None
+        @param path: Path to the model executable. If not specified, the
+            model will be exported first.
+        @rtype: Path
+        @return: Path to the generated NN Archive.
+        """
+        from .utils.archive_utils import get_heads, get_inputs, get_outputs
+
+        archive_name = self.cfg.archiver.name or self.cfg.model.name
+        archive_save_directory = Path(self.run_save_dir, "archive")
+        archive_save_directory.mkdir(parents=True, exist_ok=True)
+        inputs = []
+        outputs = []
+
+        if path is None:
+            if "onnx" not in self._exported_models:
+                logger.info("Exporting model to ONNX...")
+                self.export()
+            path = self._exported_models["onnx"]
+
+        path = Path(path)
+
+        executable_fname = path.name
+        archive_name += path.suffix
+
+        def _mult(lst: list[float | int]) -> list[float]:
+            return [round(x * 255.0, 5) for x in lst]
+
+        preprocessing = {  # TODO: keep preprocessing same for each input?
+            "mean": _mult(
+                self.cfg.trainer.preprocessing.normalize.params["mean"]
+            ),
+            "scale": _mult(
+                self.cfg.trainer.preprocessing.normalize.params["std"]
+            ),
+            "reverse_channels": self.cfg.trainer.preprocessing.train_rgb,
+            "interleaved_to_planar": False,  # TODO: make it modifiable?
+        }
+
+        inputs_dict = get_inputs(path)
+        for input_name, metadata in inputs_dict.items():
+            inputs.append(
+                {
+                    "name": input_name,
+                    "dtype": metadata["dtype"],
+                    "shape": metadata["shape"],
+                    "preprocessing": preprocessing,
+                    "input_type": "image",
+                }
+            )
+
+        outputs_dict = get_outputs(path)
+        for output_name, metadata in outputs_dict.items():
+            outputs.append(
+                {
+                    "name": output_name,
+                    "dtype": metadata["dtype"],
+                    "shape": metadata["shape"],
+                }
+            )
+
+        heads = get_heads(
+            self.cfg,
+            outputs,
+            self.loaders["train"].get_classes(),
+            self.lightning_module.nodes,  # type: ignore
+        )
 
-        self.cfg.save_data(os.path.join(self.run_save_dir, "config.yaml"))
+        model = {
+            "metadata": {
+                "name": self.cfg.model.name,
+                "path": executable_fname,
+            },
+            "inputs": inputs,
+            "outputs": outputs,
+            "heads": heads,
+        }
 
-    def set_train_augmentations(self, aug: TrainAugmentations) -> None:
-        """Sets augmentations used for training dataset."""
-        self.train_augmentations = aug
+        cfg_dict = {
+            "config_version": CONFIG_VERSION.__args__[0],  # type: ignore
+            "model": model,
+        }
 
-    def set_val_augmentations(self, aug: ValAugmentations) -> None:
-        """Sets augmentations used for validation dataset."""
-        self.val_augmentations = aug
+        archive_path = ArchiveGenerator(
+            archive_name=archive_name,
+            save_path=str(archive_save_directory),
+            cfg_dict=cfg_dict,
+            executables_paths=[str(path)],  # TODO: what if more executables?
+        ).make_archive()
 
-    def set_test_augmentations(self, aug: ValAugmentations) -> None:
-        """Sets augmentations used for test dataset."""
-        self.test_augmentations = aug
+        logger.info(f"NN Archive saved to {archive_path}")
+
+        if self.cfg.archiver.upload_url is not None:  # pragma: no cover
+            LuxonisFileSystem.upload(
+                archive_path, self.cfg.archiver.upload_url
+            )
+
+        if self.cfg.archiver.upload_to_run:
+            self.tracker.upload_artifact(archive_path, typ="archive")
+
+        return Path(archive_path)
 
     @rank_zero_only
-    def get_save_dir(self) -> str:
-        """Return path to directory where checkpoints are saved.
+    def get_status(self) -> tuple[int, int]:
+        """Get current status of training.
 
-        @rtype: str
-        @return: Save directory path
+        @rtype: tuple[int, int]
+        @return: First element is current epoch, second element is total
+            number of epochs.
+        """
+        return self.lightning_module.get_status()
+
+    @rank_zero_only
+    def get_status_percentage(self) -> float:
+        """Return percentage of current training, takes into account
+        early stopping.
+
+        @rtype: float
+        @return: Percentage of current training in range 0-100.
         """
-        return self.run_save_dir
+        return self.lightning_module.get_status_percentage()
 
     @rank_zero_only
     def get_error_message(self) -> str | None:
-        """Return error message if one occurs while running in thread, otherwise None.
+        """Return error message if one occurs while running in thread,
+        otherwise None.
 
         @rtype: str | None
         @return: Error message
@@ -216,19 +735,27 @@ def get_error_message(self) -> str | None:
         return self.error_message
 
     @rank_zero_only
-    def get_min_loss_checkpoint_path(self) -> str:
-        """Return best checkpoint path with respect to minimal validation loss.
+    def get_min_loss_checkpoint_path(self) -> str | None:
+        """Return best checkpoint path with respect to minimal
+        validation loss.
 
         @rtype: str
-        @return: Path to best checkpoint with respect to minimal validation loss
+        @return: Path to best checkpoint with respect to minimal
+            validation loss
         """
+        if not self.pl_trainer.checkpoint_callbacks:
+            return None
         return self.pl_trainer.checkpoint_callbacks[0].best_model_path  # type: ignore
 
     @rank_zero_only
-    def get_best_metric_checkpoint_path(self) -> str:
-        """Return best checkpoint path with respect to best validation metric.
+    def get_best_metric_checkpoint_path(self) -> str | None:
+        """Return best checkpoint path with respect to best validation
+        metric.
 
         @rtype: str
-        @return: Path to best checkpoint with respect to best validation metric
+        @return: Path to best checkpoint with respect to best validation
+            metric
         """
+        if len(self.pl_trainer.checkpoint_callbacks) < 2:
+            return None
         return self.pl_trainer.checkpoint_callbacks[1].best_model_path  # type: ignore
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
deleted file mode 100644
index ab73ce72..00000000
--- a/luxonis_train/core/exporter.py
+++ /dev/null
@@ -1,216 +0,0 @@
-import os
-import tempfile
-from logging import getLogger
-from pathlib import Path
-from typing import Any
-
-import onnx
-import yaml
-from luxonis_ml.utils import LuxonisFileSystem
-from torch import Size
-
-from luxonis_train.models import LuxonisModel
-from luxonis_train.utils.config import Config
-
-from .core import Core
-
-logger = getLogger(__name__)
-
-
-class Exporter(Core):
-    """Main API which is used to create the model, setup pytorch lightning environment
-    and perform training based on provided arguments and config."""
-
-    def __init__(
-        self,
-        cfg: str | dict[str, Any] | Config,
-        opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
-    ):
-        """Constructs a new Exporter instance.
-
-        @type cfg: str | dict[str, Any] | Config
-        @param cfg: Path to config file or config dict used to setup training.
-
-        @type opts: list[str] | tuple[str, ...] | dict[str, Any] | None
-        @param opts: Argument dict provided through command line,
-            used for config overriding.
-        """
-
-        super().__init__(cfg, opts)
-
-        input_shape = self.cfg.exporter.input_shape
-        if self.cfg.model.weights is None:
-            raise ValueError(
-                "Model weights must be specified in config file for export."
-            )
-        self.local_path = self.cfg.model.weights
-        if input_shape is None:
-            self.input_shape = self.loader_val.input_shape
-        else:
-            self.input_shape = Size(input_shape)
-
-        export_path = (
-            Path(self.cfg.exporter.export_save_directory)
-            / self.cfg.exporter.export_model_name
-        )
-
-        if not export_path.parent.exists():
-            logger.info(f"Creating export directory {export_path.parent}")
-            export_path.parent.mkdir(parents=True, exist_ok=True)
-        self.export_path = str(export_path)
-
-        normalize_params = self.cfg.trainer.preprocessing.normalize.params
-        if self.cfg.exporter.scale_values is not None:
-            self.scale_values = self.cfg.exporter.scale_values
-        else:
-            self.scale_values = normalize_params.get("std", None)
-            if self.scale_values:
-                self.scale_values = (
-                    [i * 255 for i in self.scale_values]
-                    if isinstance(self.scale_values, list)
-                    else self.scale_values * 255
-                )
-
-        if self.cfg.exporter.mean_values is not None:
-            self.mean_values = self.cfg.exporter.mean_values
-        else:
-            self.mean_values = normalize_params.get("mean", None)
-            if self.mean_values:
-                self.mean_values = (
-                    [i * 255 for i in self.mean_values]
-                    if isinstance(self.mean_values, list)
-                    else self.mean_values * 255
-                )
-
-        self.lightning_module = LuxonisModel(
-            cfg=self.cfg,
-            save_dir=self.run_save_dir,
-            input_shape=self.input_shape,
-            dataset_metadata=self.dataset_metadata,
-        )
-
-    def _get_modelconverter_config(self, onnx_path: str) -> dict[str, Any]:
-        """Generates export config from input config that is compatible with Luxonis
-        modelconverter tool.
-
-        @type onnx_path: str
-        @param onnx_path: Path to .onnx model
-        @rtype: dict[str, Any]
-        @return: Export config.
-        """
-        return {
-            "input_model": onnx_path,
-            "scale_values": self.scale_values,
-            "mean_values": self.mean_values,
-            "reverse_input_channels": self.cfg.exporter.reverse_input_channels,
-            "use_bgr": not self.cfg.trainer.preprocessing.train_rgb,
-            "input_shape": list(self.input_shape),
-            "data_type": self.cfg.exporter.data_type,
-            "output": [{"name": name} for name in self.output_names],
-            "meta": {"description": self.cfg.model.name},
-        }
-
-    def export(self, onnx_path: str | None = None):
-        """Runs export.
-
-        @type onnx_path: str | None
-        @param onnx_path: Path to .onnx model. If not specified, model will be saved
-            to export directory with name specified in config file.
-
-        @raises RuntimeError: If `onnxsim` fails to simplify the model.
-        """
-        onnx_path = onnx_path or self.export_path + ".onnx"
-        self.output_names = self.lightning_module.export_onnx(
-            onnx_path, **self.cfg.exporter.onnx.model_dump()
-        )
-
-        try:
-            import onnxsim
-
-            logger.info("Simplifying ONNX model...")
-            model_onnx = onnx.load(onnx_path)
-            onnx_model, check = onnxsim.simplify(model_onnx)
-            if not check:
-                raise RuntimeError("Onnx simplify failed.")
-            onnx.save(onnx_model, onnx_path)
-            logger.info(f"ONNX model saved to {onnx_path}")
-
-        except ImportError:
-            logger.error("Failed to import `onnxsim`")
-            logger.warning(
-                "`onnxsim` not installed. Skipping ONNX model simplification. "
-                "Ensure `onnxsim` is installed in your environment."
-            )
-
-        files_to_upload = [self.local_path, onnx_path]
-
-        if self.cfg.exporter.blobconverter.active:
-            try:
-                import blobconverter
-
-                logger.info("Converting ONNX to .blob")
-
-                optimizer_params = []
-                if self.scale_values:
-                    optimizer_params.append(f"--scale_values={self.scale_values}")
-                if self.mean_values:
-                    optimizer_params.append(f"--mean_values={self.mean_values}")
-                if self.cfg.exporter.reverse_input_channels:
-                    optimizer_params.append("--reverse_input_channels")
-
-                blob_path = blobconverter.from_onnx(
-                    model=onnx_path,
-                    optimizer_params=optimizer_params,
-                    data_type=self.cfg.exporter.data_type,
-                    shaves=self.cfg.exporter.blobconverter.shaves,
-                    use_cache=False,
-                    output_dir=self.export_path,
-                )
-                files_to_upload.append(blob_path)
-                logger.info(f".blob model saved to {blob_path}")
-
-            except ImportError:
-                logger.error("Failed to import `blobconverter`")
-                logger.warning(
-                    "`blobconverter` not installed. Skipping .blob model conversion. "
-                    "Ensure `blobconverter` is installed in your environment."
-                )
-
-        if self.cfg.exporter.upload_url is not None:
-            self._upload(files_to_upload)
-
-    def _upload(self, files_to_upload: list[str]):
-        """Uploads .pt, .onnx and current config.yaml to specified s3 bucket.
-
-        @type files_to_upload: list[str]
-        @param files_to_upload: List of files to upload.
-        @raises ValueError: If upload url was not specified in config file.
-        """
-
-        if self.cfg.exporter.upload_url is None:
-            raise ValueError("Upload url must be specified in config file.")
-
-        fs = LuxonisFileSystem(self.cfg.exporter.upload_url, allow_local=False)
-        logger.info(f"Started upload to {fs.full_path}...")
-
-        for file in files_to_upload:
-            suffix = Path(file).suffix
-            fs.put_file(
-                local_path=file,
-                remote_path=self.cfg.exporter.export_model_name + suffix,
-            )
-
-        with tempfile.TemporaryFile() as f:
-            self.cfg.save_data(f.name)
-            fs.put_file(local_path=f.name, remote_path="config.yaml")
-
-        onnx_path = os.path.join(
-            fs.full_path, f"{self.cfg.exporter.export_model_name}.onnx"
-        )
-        modelconverter_config = self._get_modelconverter_config(onnx_path)
-
-        with tempfile.TemporaryFile() as f:
-            yaml.dump(modelconverter_config, f, default_flow_style=False)
-            fs.put_file(local_path=f.name, remote_path="config_export.yaml")
-
-        logger.info("Files upload finished")
diff --git a/luxonis_train/core/inferer.py b/luxonis_train/core/inferer.py
deleted file mode 100644
index b4d13b77..00000000
--- a/luxonis_train/core/inferer.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from pathlib import Path
-from typing import Literal
-
-import cv2
-
-from luxonis_train.attached_modules.visualizers import (
-    get_unnormalized_images,
-)
-
-from .trainer import Trainer
-
-
-class Inferer(Trainer):
-    def __init__(
-        self,
-        cfg: str | dict,
-        opts: list[str] | tuple[str, ...] | None,
-        view: Literal["train", "test", "val"],
-        save_dir: Path | None = None,
-    ):
-        opts = list(opts or [])
-        opts += ["trainer.batch_size", "1"]
-        super().__init__(cfg, opts)
-        if view == "train":
-            self.loader = self.pytorch_loader_train
-        elif view == "test":
-            self.loader = self.pytorch_loader_test
-        else:
-            self.loader = self.pytorch_loader_val
-        self.save_dir = save_dir
-        if self.save_dir is not None:
-            self.save_dir.mkdir(exist_ok=True, parents=True)
-
-    def infer(self) -> None:
-        self.lightning_module.eval()
-        k = 0
-        for inputs, labels in self.loader:
-            images = get_unnormalized_images(self.cfg, inputs)
-            outputs = self.lightning_module.forward(
-                inputs, labels, images=images, compute_visualizations=True
-            )
-
-            for node_name, visualizations in outputs.visualizations.items():
-                for viz_name, viz_batch in visualizations.items():
-                    for i, viz in enumerate(viz_batch):
-                        viz_arr = viz.detach().cpu().numpy().transpose(1, 2, 0)
-                        viz_arr = cv2.cvtColor(viz_arr, cv2.COLOR_RGB2BGR)
-                        name = f"{node_name}/{viz_name}/{i}"
-                        if self.save_dir is not None:
-                            name = name.replace("/", "_")
-                            cv2.imwrite(str(self.save_dir / f"{name}_{k}.png"), viz_arr)
-                            k += 1
-                        else:
-                            cv2.imshow(name, viz_arr)
-            if self.save_dir is None:
-                if cv2.waitKey(0) == ord("q"):
-                    exit()
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
deleted file mode 100644
index cb2c5a2c..00000000
--- a/luxonis_train/core/trainer.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import threading
-from logging import getLogger
-from typing import Any, Literal
-
-from lightning.pytorch.utilities import rank_zero_only  # type: ignore
-
-from luxonis_train.models import LuxonisModel
-from luxonis_train.utils.config import Config
-
-from .core import Core
-
-logger = getLogger(__name__)
-
-
-class Trainer(Core):
-    """Main API which is used to create the model, setup pytorch lightning environment
-    and perform training based on provided arguments and config."""
-
-    def __init__(
-        self,
-        cfg: str | dict[str, Any] | Config,
-        opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
-    ):
-        """Constructs a new Trainer instance.
-
-        @type cfg: str | dict[str, Any] | Config
-        @param cfg: Path to config file or config dict used to setup training.
-
-        @type opts: list[str] | tuple[str, ...] | dict[str, Any] | None
-        @param opts: Argument dict provided through command line,
-            used for config overriding.
-        """
-        super().__init__(cfg, opts)
-
-        self.lightning_module = LuxonisModel(
-            cfg=self.cfg,
-            dataset_metadata=self.dataset_metadata,
-            save_dir=self.run_save_dir,
-            input_shape=self.loader_train.input_shape,
-        )
-
-    def train(self, new_thread: bool = False) -> None:
-        """Runs training.
-
-        @type new_thread: bool
-        @param new_thread: Runs training in new thread if set to True.
-        """
-        if not new_thread:
-            logger.info(f"Checkpoints will be saved in: {self.get_save_dir()}")
-            logger.info("Starting training...")
-            self.pl_trainer.fit(
-                self.lightning_module,
-                self.pytorch_loader_train,
-                self.pytorch_loader_val,
-            )
-            logger.info("Training finished")
-            logger.info(f"Checkpoints saved in: {self.get_save_dir()}")
-        else:
-            # Every time exception happens in the Thread, this hook will activate
-            def thread_exception_hook(args):
-                self.error_message = str(args.exc_value)
-
-            threading.excepthook = thread_exception_hook
-
-            self.thread = threading.Thread(
-                target=self.pl_trainer.fit,
-                args=(
-                    self.lightning_module,
-                    self.pytorch_loader_train,
-                    self.pytorch_loader_val,
-                ),
-                daemon=True,
-            )
-            self.thread.start()
-
-    def test(
-        self, new_thread: bool = False, view: Literal["train", "val", "test"] = "test"
-    ) -> None:
-        """Runs testing.
-
-        @type new_thread: bool
-        @param new_thread: Runs testing in new thread if set to True.
-        """
-
-        if view == "test":
-            loader = self.pytorch_loader_test
-        elif view == "val":
-            loader = self.pytorch_loader_val
-        elif view == "train":
-            loader = self.pytorch_loader_train
-
-        if not new_thread:
-            self.pl_trainer.test(self.lightning_module, loader)
-        else:
-            self.thread = threading.Thread(
-                target=self.pl_trainer.test,
-                args=(self.lightning_module, loader),
-                daemon=True,
-            )
-            self.thread.start()
-
-    @rank_zero_only
-    def get_status(self) -> tuple[int, int]:
-        """Get current status of training.
-
-        @rtype: tuple[int, int]
-        @return: First element is current epoch, second element is total number of
-            epochs.
-        """
-        return self.lightning_module.get_status()
-
-    @rank_zero_only
-    def get_status_percentage(self) -> float:
-        """Return percentage of current training, takes into account early stopping.
-
-        @rtype: float
-        @return: Percentage of current training in range 0-100.
-        """
-        return self.lightning_module.get_status_percentage()
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
deleted file mode 100644
index c9f8e151..00000000
--- a/luxonis_train/core/tuner.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import os.path as osp
-from typing import Any
-
-import lightning.pytorch as pl
-import optuna
-from lightning.pytorch.utilities import rank_zero_only  # type: ignore
-from optuna.integration import PyTorchLightningPruningCallback
-
-from luxonis_train.callbacks import LuxonisProgressBar
-from luxonis_train.models import LuxonisModel
-from luxonis_train.utils import Config
-from luxonis_train.utils.tracker import LuxonisTrackerPL
-
-from .core import Core
-
-
-class Tuner(Core):
-    def __init__(self, cfg: str | dict, args: list[str] | tuple[str, ...] | None):
-        """Main API which is used to perform hyperparameter tunning.
-
-        @type cfg: str | dict[str, Any] | Config
-        @param cfg: Path to config file or config dict used to setup training.
-
-        @type args: list[str] | tuple[str, ...] | None
-        @param args: Argument dict provided through command line,
-            used for config overriding.
-        """
-        super().__init__(cfg, args)
-        if self.cfg.tuner is None:
-            raise ValueError("You have to specify the `tuner` section in config.")
-        self.tune_cfg = self.cfg.tuner
-
-    def tune(self) -> None:
-        """Runs Optuna tunning of hyperparameters."""
-
-        pruner = (
-            optuna.pruners.MedianPruner()
-            if self.tune_cfg.use_pruner
-            else optuna.pruners.NopPruner()
-        )
-
-        storage = None
-        if self.tune_cfg.storage.active:
-            if self.tune_cfg.storage.storage_type == "local":
-                storage = "sqlite:///study_local.db"
-            else:
-                storage = "postgresql://{}:{}@{}:{}/{}".format(
-                    self.cfg.ENVIRON.POSTGRES_USER,
-                    self.cfg.ENVIRON.POSTGRES_PASSWORD,
-                    self.cfg.ENVIRON.POSTGRES_HOST,
-                    self.cfg.ENVIRON.POSTGRES_PORT,
-                    self.cfg.ENVIRON.POSTGRES_DB,
-                )
-
-        study = optuna.create_study(
-            study_name=self.tune_cfg.study_name,
-            storage=storage,
-            direction="minimize",
-            pruner=pruner,
-            load_if_exists=True,
-        )
-
-        study.optimize(
-            self._objective,
-            n_trials=self.tune_cfg.n_trials,
-            timeout=self.tune_cfg.timeout,
-        )
-
-    def _objective(self, trial: optuna.trial.Trial) -> float:
-        """Objective function used to optimize Optuna study."""
-        rank = rank_zero_only.rank
-        cfg_tracker = self.cfg.tracker
-        tracker_params = cfg_tracker.model_dump()
-        tracker = LuxonisTrackerPL(
-            rank=rank,
-            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
-            is_sweep=True,
-            **tracker_params,
-        )
-        run_save_dir = osp.join(cfg_tracker.save_directory, tracker.run_name)
-
-        curr_params = self._get_trial_params(trial)
-        curr_params["model.predefined_model"] = None
-        Config.clear_instance()
-        cfg = Config.get_config(self.cfg.model_dump(), curr_params)
-
-        tracker.log_hyperparams(curr_params)
-
-        cfg.save_data(osp.join(run_save_dir, "config.yaml"))
-
-        lightning_module = LuxonisModel(
-            cfg=cfg,
-            dataset_metadata=self.dataset_metadata,
-            save_dir=run_save_dir,
-            input_shape=self.loader_train.input_shape,
-        )
-        pruner_callback = PyTorchLightningPruningCallback(
-            trial, monitor="val_loss/loss"
-        )
-        callbacks: list[pl.Callback] = (
-            [LuxonisProgressBar()] if self.cfg.use_rich_text else []
-        )
-        callbacks.append(pruner_callback)
-        pl_trainer = pl.Trainer(
-            accelerator=cfg.trainer.accelerator,
-            devices=cfg.trainer.devices,
-            strategy=cfg.trainer.strategy,
-            logger=tracker,  # type: ignore
-            max_epochs=cfg.trainer.epochs,
-            accumulate_grad_batches=cfg.trainer.accumulate_grad_batches,
-            check_val_every_n_epoch=cfg.trainer.validation_interval,
-            num_sanity_val_steps=cfg.trainer.num_sanity_val_steps,
-            profiler=cfg.trainer.profiler,
-            callbacks=callbacks,
-        )
-
-        pl_trainer.fit(
-            lightning_module,  # type: ignore
-            self.pytorch_loader_train,
-            self.pytorch_loader_val,
-        )
-        pruner_callback.check_pruned()
-
-        if "val/loss" not in pl_trainer.callback_metrics:
-            raise ValueError(
-                "No validation loss found. "
-                "This can happen if `TestOnTrainEnd` callback is used."
-            )
-
-        return pl_trainer.callback_metrics["val/loss"].item()
-
-    def _get_trial_params(self, trial: optuna.trial.Trial) -> dict[str, Any]:
-        """Get trial params based on specified config."""
-        cfg_tuner = self.tune_cfg.params
-        new_params = {}
-        for key, value in cfg_tuner.items():
-            key_info = key.split("_")
-            key_name = "_".join(key_info[:-1])
-            key_type = key_info[-1]
-            match key_type, value:
-                case "categorical", list(lst):
-                    new_value = trial.suggest_categorical(key_name, lst)
-                case "float", [float(low), float(high), *tail]:
-                    step = tail[0] if tail else None
-                    if step is not None and not isinstance(step, float):
-                        raise ValueError(
-                            f"Step for float type must be float, but got {step}"
-                        )
-                    new_value = trial.suggest_float(key_name, low, high, step=step)
-                case "int", [int(low), int(high), *tail]:
-                    step = tail[0] if tail else 1
-                    if not isinstance(step, int):
-                        raise ValueError(
-                            f"Step for int type must be int, but got {step}"
-                        )
-                    new_value = trial.suggest_int(key_name, low, high, step=step)
-                case "loguniform", [float(low), float(high)]:
-                    new_value = trial.suggest_loguniform(key_name, low, high)
-                case "uniform", [float(low), float(high)]:
-                    new_value = trial.suggest_uniform(key_name, low, high)
-                case _, _:
-                    raise KeyError(
-                        f"Combination of {key_type} and {value} not supported"
-                    )
-
-            new_params[key_name] = new_value
-
-        if len(new_params) == 0:
-            raise ValueError(
-                "No paramteres to tune. Specify them under `tuner.params`."
-            )
-        return new_params
diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py
new file mode 100644
index 00000000..96c2bcde
--- /dev/null
+++ b/luxonis_train/core/utils/archive_utils.py
@@ -0,0 +1,265 @@
+import logging
+from collections import defaultdict
+from pathlib import Path
+from typing import TypedDict
+
+import onnx
+from luxonis_ml.nn_archive.config_building_blocks import (
+    DataType,
+    ObjectDetectionSubtypeYOLO,
+)
+from onnx.onnx_pb import TensorProto
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.enums.head_categorization import (
+    ImplementedHeads,
+    ImplementedHeadsIsSoxtmaxed,
+)
+from luxonis_train.utils import Config
+
+logger = logging.getLogger(__name__)
+
+
+class MetadataDict(TypedDict):
+    shape: list[int]
+    dtype: DataType
+
+
+def get_inputs(path: Path) -> dict[str, MetadataDict]:
+    """Get inputs of a model executable.
+
+    @type path: Path
+    @param path: Path to model executable file.
+    """
+
+    if path.suffix == ".onnx":
+        return _get_onnx_inputs(path)
+    else:
+        raise NotImplementedError(
+            f"Missing input reading function for {path.suffix} models."
+        )
+
+
+def get_outputs(path: Path) -> dict[str, MetadataDict]:
+    """Get outputs of a model executable.
+
+    @type path: Path
+    @param path: Path to model executable file.
+    """
+
+    if path.suffix == ".onnx":
+        return _get_onnx_outputs(path)
+    else:
+        raise NotImplementedError(
+            f"Missing input reading function for {path.suffix} models."
+        )
+
+
+def _from_onnx_dtype(dtype: int) -> DataType:
+    dtype_map = {
+        TensorProto.INT8: "int8",
+        TensorProto.INT32: "int32",
+        TensorProto.UINT8: "uint8",
+        TensorProto.FLOAT: "float32",
+        TensorProto.FLOAT16: "float16",
+    }
+    if dtype not in dtype_map:  # pragma: no cover
+        raise ValueError(f"Unsupported ONNX data type: `{dtype}`")
+
+    return DataType(dtype_map[dtype])
+
+
+def _load_onnx_model(onnx_path: Path) -> onnx.ModelProto:
+    try:
+        return onnx.load(str(onnx_path))
+    except Exception as e:  # pragma: no cover
+        raise ValueError(f"Failed to load ONNX model: `{onnx_path}`") from e
+
+
+def _get_onnx_outputs(onnx_path: Path) -> dict[str, MetadataDict]:
+    model = _load_onnx_model(onnx_path)
+    outputs: dict[str, MetadataDict] = defaultdict(dict)  # type: ignore
+
+    for output in model.graph.output:
+        shape = [dim.dim_value for dim in output.type.tensor_type.shape.dim]
+        outputs[output.name]["shape"] = shape
+        outputs[output.name]["dtype"] = _from_onnx_dtype(
+            output.type.tensor_type.elem_type
+        )
+
+    return outputs
+
+
+def _get_onnx_inputs(onnx_path: Path) -> dict[str, MetadataDict]:
+    model = _load_onnx_model(onnx_path)
+
+    inputs: dict[str, MetadataDict] = defaultdict(dict)  # type: ignore
+
+    for inp in model.graph.input:
+        shape = [dim.dim_value for dim in inp.type.tensor_type.shape.dim]
+        inputs[inp.name]["shape"] = shape
+        inputs[inp.name]["dtype"] = _from_onnx_dtype(
+            inp.type.tensor_type.elem_type
+        )
+
+    return inputs
+
+
+def _get_classes(
+    node_name: str, node_task: str | None, classes: dict[str, list[str]]
+) -> list[str]:
+    if not node_task:
+        match node_name:
+            case "ClassificationHead":
+                node_task = "classification"
+            case "EfficientBBoxHead":
+                node_task = "boundingbox"
+            case "SegmentationHead" | "BiSeNetHead":
+                node_task = "segmentation"
+            case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead":
+                node_task = "keypoints"
+            case _:  # pragma: no cover
+                raise ValueError("Node does not map to a default task.")
+
+    return classes.get(node_task, [])
+
+
+def _get_head_specific_parameters(
+    nodes: dict[str, BaseNode], head_name: str, head_alias: str
+) -> dict:
+    """Get parameters specific to head.
+
+    @type nodes: dict[str, BaseNode]
+    @param nodes: Dictionary of nodes.
+    @type head_name: str
+    @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
+    @type head_alias: str
+    @param head_alias: Alias of the head (e.g. 'detection_head').
+    """
+
+    parameters = {}
+    if head_name == "ClassificationHead":
+        parameters["is_softmax"] = getattr(
+            ImplementedHeadsIsSoxtmaxed, head_name
+        ).value
+    elif head_name == "EfficientBBoxHead":
+        parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv6.value
+        head_node = nodes[head_alias]
+        parameters["iou_threshold"] = head_node.iou_thres
+        parameters["conf_threshold"] = head_node.conf_thres
+        parameters["max_det"] = head_node.max_det
+    elif head_name in ["SegmentationHead", "BiSeNetHead"]:
+        parameters["is_softmax"] = getattr(
+            ImplementedHeadsIsSoxtmaxed, head_name
+        ).value
+    elif head_name == "ImplicitKeypointBBoxHead":
+        parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value
+        head_node = nodes[head_alias]
+        parameters["iou_threshold"] = head_node.iou_thres
+        parameters["conf_threshold"] = head_node.conf_thres
+        parameters["max_det"] = head_node.max_det
+        parameters["n_keypoints"] = head_node.n_keypoints
+        parameters["anchors"] = head_node.anchors.tolist()
+    elif head_name == "EfficientKeypointBBoxHead":
+        # or appropriate subtype
+        head_node = nodes[head_alias]
+        parameters["iou_threshold"] = head_node.iou_thres
+        parameters["conf_threshold"] = head_node.conf_thres
+        parameters["max_det"] = head_node.max_det
+        parameters["n_keypoints"] = head_node.n_keypoints
+    else:  # pragma: no cover
+        raise ValueError("Unknown head name")
+    return parameters
+
+
+def _get_head_outputs(
+    outputs: list[dict], head_name: str, head_type: str
+) -> list[str]:
+    """Get model outputs in a head-specific format.
+
+    @type outputs: list[dict]
+    @param outputs: List of NN Archive outputs.
+    @type head_name: str
+    @param head_name: Type of the head (e.g. 'EfficientBBoxHead') or its
+        custom alias.
+    @type head_type: str
+    @param head_name: Type of the head (e.g. 'EfficientBBoxHead').
+    @rtype: list[str]
+    @return: List of output names.
+    """
+
+    output_names = []
+    for output in outputs:
+        name = output["name"].split("/")[0]
+        if name == head_name:
+            output_names.append(output["name"])
+
+    if output_names:
+        return output_names
+
+    # TODO: Fix this, will require refactoring custom ONNX output names
+    logger.error(
+        "ONNX model uses custom output names, trying to determine outputs based on the head type. "
+        "This will likely result in incorrect archive for multi-head models."
+    )
+
+    if head_type == "ClassificationHead":
+        return [outputs[0]["name"]]
+    elif head_type == "EfficientBBoxHead":
+        return [output["name"] for output in outputs]
+    elif head_type in ["SegmentationHead", "BiSeNetHead"]:
+        return [outputs[0]["name"]]
+    elif head_type == "ImplicitKeypointBBoxHead":
+        return [outputs[0]["name"]]
+    elif head_type == "EfficientKeypointBBoxHead":
+        return [outputs[0]["name"]]
+    else:
+        raise ValueError("Unknown head name")
+
+
+def get_heads(
+    cfg: Config,
+    outputs: list[dict],
+    class_dict: dict[str, list[str]],
+    nodes: dict[str, BaseNode],
+) -> list[dict]:
+    """Get model heads.
+
+    @type cfg: Config
+    @param cfg: Configuration object.
+    @type outputs: list[dict]
+    @param outputs: List of model outputs.
+    @type class_dict: dict[str, list[str]]
+    @param class_dict: Dictionary of classes.
+    @type nodes: dict[str, BaseNode]
+    @param nodes: Dictionary of nodes.
+    """
+    heads = []
+
+    for node in cfg.model.nodes:
+        node_name = node.name
+        node_alias = node.alias or node_name
+        if node_alias in cfg.model.outputs:
+            if node_name in ImplementedHeads.__members__:
+                parser = getattr(ImplementedHeads, node_name).value
+                task = node.task
+                if isinstance(task, dict):
+                    task = str(next(iter(task.values())))
+
+                classes = _get_classes(node_name, task, class_dict)
+                head_outputs = _get_head_outputs(
+                    outputs, node_alias, node_name
+                )
+                head_dict = {
+                    "parser": parser,
+                    "metadata": {
+                        "classes": classes,
+                        "n_classes": len(classes),
+                    },
+                    "outputs": head_outputs,
+                }
+                head_dict["metadata"].update(
+                    _get_head_specific_parameters(nodes, node_name, node_alias)
+                )
+                heads.append(head_dict)
+    return heads
diff --git a/luxonis_train/core/utils/export_utils.py b/luxonis_train/core/utils/export_utils.py
new file mode 100644
index 00000000..b4863f1b
--- /dev/null
+++ b/luxonis_train/core/utils/export_utils.py
@@ -0,0 +1,121 @@
+import logging
+from contextlib import contextmanager
+from pathlib import Path
+
+import luxonis_train
+from luxonis_train.utils.config import Config, ExportConfig
+
+logger = logging.getLogger(__name__)
+
+
+@contextmanager
+def replace_weights(
+    module: "luxonis_train.models.LuxonisLightningModule",
+    weights: str | Path | None = None,
+):
+    old_weights = None
+    if weights is not None:
+        old_weights = module.state_dict()
+        module.load_checkpoint(str(weights))
+
+    yield
+
+    if old_weights is not None:
+        try:
+            module.load_state_dict(old_weights)
+        except RuntimeError:
+            logger.error(
+                "Failed to strictly load old weights. The model likey underwent reparametrization, "
+                "which is a destructive operation. Loading old weights with strict=False."
+            )
+            module.load_state_dict(old_weights, strict=False)
+        del old_weights
+
+
+def try_onnx_simplify(onnx_path: str) -> None:
+    import onnx
+
+    try:
+        import onnxsim
+
+        logger.info("Simplifying ONNX model...")
+        model_onnx = onnx.load(onnx_path)
+        onnx_model, check = onnxsim.simplify(model_onnx)
+        if not check:
+            raise RuntimeError("ONNX simplify failed.")  # pragma: no cover
+        onnx.save(onnx_model, onnx_path)
+        logger.info(f"ONNX model saved to {onnx_path}")
+
+    except ImportError:
+        logger.error("Failed to import `onnxsim`")
+        logger.warning(
+            "`onnxsim` not installed. Skipping ONNX model simplification. "
+            "Ensure `onnxsim` is installed in your environment."
+        )
+    except RuntimeError:  # pragma: no cover
+        logger.error(
+            "Failed to simplify ONNX model. Proceeding without simplification."
+        )
+
+
+def get_preprocessing(
+    cfg: Config,
+) -> tuple[list[float] | None, list[float] | None, bool]:
+    normalize_params = cfg.trainer.preprocessing.normalize.params
+    if cfg.exporter.scale_values is not None:
+        scale_values = cfg.exporter.scale_values
+    else:
+        scale_values = normalize_params.get("std", None)
+        if scale_values:
+            scale_values = (
+                [round(i * 255, 5) for i in scale_values]
+                if isinstance(scale_values, list)
+                else round(scale_values * 255, 5)
+            )
+
+    if cfg.exporter.mean_values is not None:
+        mean_values = cfg.exporter.mean_values
+    else:
+        mean_values = normalize_params.get("mean", None)
+        if mean_values:
+            mean_values = (
+                [round(i * 255, 5) for i in mean_values]
+                if isinstance(mean_values, list)
+                else round(mean_values * 255, 5)
+            )
+    reverse_channels = cfg.exporter.reverse_input_channels
+
+    return scale_values, mean_values, reverse_channels
+
+
+def blobconverter_export(
+    cfg: ExportConfig,
+    scale_values: list[float] | None,
+    mean_values: list[float] | None,
+    reverse_channels: bool,
+    export_path: str,
+    onnx_path: str,
+) -> str:
+    import blobconverter
+
+    logger.info("Converting ONNX to .blob")
+
+    optimizer_params: list[str] = []
+    if scale_values:
+        optimizer_params.append(f"--scale_values={scale_values}")
+    if mean_values:
+        optimizer_params.append(f"--mean_values={mean_values}")
+    if reverse_channels:
+        optimizer_params.append("--reverse_input_channels")
+
+    blob_path = blobconverter.from_onnx(
+        model=onnx_path,
+        optimizer_params=optimizer_params,
+        data_type=cfg.data_type.upper(),
+        shaves=cfg.blobconverter.shaves,
+        version=cfg.blobconverter.version,
+        use_cache=False,
+        output_dir=export_path,
+    )
+    logger.info(f".blob model saved to {blob_path}")
+    return blob_path
diff --git a/luxonis_train/core/utils/infer_utils.py b/luxonis_train/core/utils/infer_utils.py
new file mode 100644
index 00000000..17696705
--- /dev/null
+++ b/luxonis_train/core/utils/infer_utils.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import cv2
+from torch import Tensor
+
+
+def render_visualizations(
+    visualizations: dict[str, dict[str, Tensor]], save_dir: str | Path | None
+) -> None:
+    save_dir = Path(save_dir) if save_dir is not None else None
+    if save_dir is not None:
+        save_dir.mkdir(exist_ok=True, parents=True)
+
+    i = 0
+    for node_name, vzs in visualizations.items():
+        for viz_name, viz_batch in vzs.items():
+            for i, viz in enumerate(viz_batch):
+                viz_arr = viz.detach().cpu().numpy().transpose(1, 2, 0)
+                viz_arr = cv2.cvtColor(viz_arr, cv2.COLOR_RGB2BGR)
+                name = f"{node_name}/{viz_name}/{i}"
+                if save_dir is not None:
+                    name = name.replace("/", "_")
+                    cv2.imwrite(str(save_dir / f"{name}_{i}.png"), viz_arr)
+                    i += 1
+                else:
+                    cv2.imshow(name, viz_arr)
+
+    if save_dir is None:
+        if cv2.waitKey(0) == ord("q"):
+            exit()
diff --git a/luxonis_train/core/utils/train_utils.py b/luxonis_train/core/utils/train_utils.py
new file mode 100644
index 00000000..73b615cb
--- /dev/null
+++ b/luxonis_train/core/utils/train_utils.py
@@ -0,0 +1,28 @@
+from typing import Any
+
+import lightning.pytorch as pl
+
+from luxonis_train.utils.config import TrainerConfig
+
+
+def create_trainer(cfg: TrainerConfig, **kwargs: Any) -> pl.Trainer:
+    """Creates Pytorch Lightning trainer.
+
+    @type cfg: Config
+    @param cfg: Configuration object.
+    @param kwargs: Additional arguments to pass to the trainer.
+    @rtype: pl.Trainer
+    @return: Pytorch Lightning trainer.
+    """
+    return pl.Trainer(
+        accelerator=cfg.accelerator,
+        devices=cfg.devices,
+        strategy=cfg.strategy,
+        max_epochs=cfg.epochs,
+        accumulate_grad_batches=cfg.accumulate_grad_batches,
+        check_val_every_n_epoch=cfg.validation_interval,
+        num_sanity_val_steps=cfg.n_sanity_val_steps,
+        profiler=cfg.profiler,
+        deterministic=cfg.deterministic,
+        **kwargs,
+    )
diff --git a/luxonis_train/core/utils/tune_utils.py b/luxonis_train/core/utils/tune_utils.py
new file mode 100644
index 00000000..d9d6c4c0
--- /dev/null
+++ b/luxonis_train/core/utils/tune_utils.py
@@ -0,0 +1,83 @@
+import logging
+import random
+from typing import Any
+
+import optuna
+
+logger = logging.getLogger(__name__)
+
+
+def _augs_to_indices(all_augs: list[str], aug_names: list[str]) -> list[int]:
+    """Maps augmentation names to indices."""
+    aug_indices = []
+    for aug_name in aug_names:
+        if aug_name == "Normalize":
+            logger.warn(
+                f"'{aug_name}' should be tuned directly by adding '...normalize.active_categorical' to the tuner params, skipping."
+            )
+            continue
+        try:
+            index = all_augs.index(aug_name)
+            aug_indices.append(index)
+        except ValueError:
+            logger.warn(
+                f"Augmentation '{aug_name}' not found under trainer augemntations, skipping."
+            )
+            continue
+    return aug_indices
+
+
+def get_trial_params(
+    all_augs: list[str], params: dict[str, Any], trial: optuna.trial.Trial
+) -> dict[str, Any]:
+    """Get trial params based on specified config."""
+    new_params = {}
+    for key, value in params.items():
+        key_info = key.split("_")
+        key_name = "_".join(key_info[:-1])
+        key_type = key_info[-1]
+        match key_type, value:
+            case "subset", [list(whole_set), int(subset_size)]:
+                if key_name.split(".")[-1] != "augmentations":
+                    raise ValueError(
+                        "Subset sampling currently only supported for augmentations"
+                    )
+                whole_set_indices = _augs_to_indices(all_augs, whole_set)
+                subset = random.sample(whole_set_indices, subset_size)
+                for aug_id in whole_set_indices:
+                    new_params[f"{key_name}.{aug_id}.active"] = (
+                        True if aug_id in subset else False
+                    )
+                continue
+            case "categorical", list(lst):
+                new_value = trial.suggest_categorical(key_name, lst)
+            case "float", [float(low), float(high), *tail]:
+                step = tail[0] if tail else None
+                if step is not None and not isinstance(step, float):
+                    raise ValueError(
+                        f"Step for float type must be float, but got {step}"
+                    )
+                new_value = trial.suggest_float(key_name, low, high, step=step)
+            case "int", [int(low), int(high), *tail]:
+                step = tail[0] if tail else 1
+                if not isinstance(step, int):
+                    raise ValueError(
+                        f"Step for int type must be int, but got {step}"
+                    )
+                new_value = trial.suggest_int(key_name, low, high, step=step)
+            case "loguniform", [float(low), float(high)]:
+                new_value = trial.suggest_loguniform(key_name, low, high)
+            case "uniform", [float(low), float(high)]:
+                new_value = trial.suggest_uniform(key_name, low, high)
+            case _, _:
+                raise KeyError(
+                    f"Combination of {key_type} and {value} not supported"
+                )
+
+        new_params[key_name] = new_value
+
+    if len(new_params) == 0:
+        raise ValueError(
+            "No paramteres to tune. Specify them under `tuner.params`."
+        )
+    return new_params
diff --git a/luxonis_train/loaders/__init__.py b/luxonis_train/loaders/__init__.py
new file mode 100644
index 00000000..4b089462
--- /dev/null
+++ b/luxonis_train/loaders/__init__.py
@@ -0,0 +1,11 @@
+from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput, collate_fn
+from .luxonis_loader_torch import LuxonisLoaderTorch
+from .obb_tmp_loader import OBBLoaderTorch
+
+__all__ = [
+    "LuxonisLoaderTorch",
+    "OBBLoaderTorch",
+    "collate_fn",
+    "BaseLoaderTorch",
+    "LuxonisLoaderTorchOutput",
+]
diff --git a/luxonis_train/loaders/base_loader.py b/luxonis_train/loaders/base_loader.py
new file mode 100644
index 00000000..6d6d62dc
--- /dev/null
+++ b/luxonis_train/loaders/base_loader.py
@@ -0,0 +1,170 @@
+from abc import ABC, abstractmethod
+
+import torch
+from luxonis_ml.data import Augmentations, LabelType
+from luxonis_ml.utils.registry import AutoRegisterMeta
+from torch import Size, Tensor
+from torch.utils.data import Dataset
+
+from luxonis_train.utils.registry import LOADERS
+from luxonis_train.utils.types import Labels
+
+LuxonisLoaderTorchOutput = tuple[dict[str, Tensor], Labels]
+"""LuxonisLoaderTorchOutput is a tuple of source tensors and
+corresponding labels."""
+
+
+class BaseLoaderTorch(
+    Dataset[LuxonisLoaderTorchOutput],
+    ABC,
+    metaclass=AutoRegisterMeta,
+    register=False,
+    registry=LOADERS,
+):
+    """Base abstract loader class that enforces LuxonisLoaderTorchOutput
+    output label structure."""
+
+    def __init__(
+        self,
+        view: str | list[str],
+        augmentations: Augmentations | None = None,
+        image_source: str | None = None,
+    ):
+        self.view = view if isinstance(view, list) else [view]
+        self.augmentations = augmentations
+        self._image_source = image_source
+
+    @property
+    def image_source(self) -> str:
+        """Name of the input image group.
+
+        Example: 'image'
+
+        @type: str
+        """
+        if self._image_source is None:
+            raise ValueError("image_source is not set")
+        return self._image_source
+
+    @property
+    @abstractmethod
+    def input_shapes(self) -> dict[str, Size]:
+        """
+        Shape (c, h, w) of each loader group (sub-element), WITHOUT batch dimension.
+        Examples:
+
+            1. Single image input::
+                {
+                    'image': torch.Size([3, 224, 224]),
+                }
+
+            2. Image and segmentation input::
+                {
+                    'image': torch.Size([3, 224, 224]),
+                    'segmentation': torch.Size([1, 224, 224]),
+                }
+
+            3. Left image, right image and disparity input::
+                {
+                    'left': torch.Size([3, 224, 224]),
+                    'right': torch.Size([3, 224, 224]),
+                    'disparity': torch.Size([1, 224, 224]),
+                }
+
+            4. Image, keypoints, and point cloud input::
+                {
+                    'image': torch.Size([3, 224, 224]),
+                    'keypoints': torch.Size([17, 2]),
+                    'point_cloud': torch.Size([20000, 3]),
+                }
+
+        @type: dict[str, Size]
+        """
+        ...
+
+    @property
+    def input_shape(self) -> Size:
+        """Shape (c, h, w) of the input tensor, WITHOUT batch dimension.
+
+        @type: torch.Size
+        """
+        return self.input_shapes[self.image_source]
+
+    @abstractmethod
+    def __len__(self) -> int:
+        """Returns length of the dataset."""
+        ...
+
+    @abstractmethod
+    def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
+        """Loads sample from dataset.
+
+        @type idx: int
+        @param idx: Sample index.
+        @rtype: L{LuxonisLoaderTorchOutput}
+        @return: Sample's data in L{LuxonisLoaderTorchOutput} format
+        """
+        ...
+
+    @abstractmethod
+    def get_classes(self) -> dict[str, list[str]]:
+        """Gets classes according to computer vision task.
+
+        @rtype: dict[LabelType, list[str]]
+        @return: A dictionary mapping tasks to their classes.
+        """
+        ...
+
+    def get_n_keypoints(self) -> dict[str, int] | None:
+        """Returns the dictionary defining the semantic skeleton for
+        each class using keypoints.
+
+        @rtype: Dict[str, Dict]
+        @return: A dictionary mapping classes to their skeleton
+            definitions.
+        """
+        return None
+
+
+def collate_fn(
+    batch: list[LuxonisLoaderTorchOutput],
+) -> tuple[dict[str, Tensor], Labels]:
+    """Default collate function used for training.
+
+    @type batch: list[LuxonisLoaderTorchOutput]
+    @param batch: List of loader outputs (dict of Tensors) and labels
+        (dict of Tensors) in the LuxonisLoaderTorchOutput format.
+    @rtype: tuple[dict[str, Tensor], dict[LabelType, Tensor]]
+    @return: Tuple of inputs and annotations in the format expected by
+        the model.
+    """
+    inputs: tuple[dict[str, Tensor], ...]
+    labels: tuple[Labels, ...]
+    inputs, labels = zip(*batch)
+
+    out_inputs = {
+        k: torch.stack([i[k] for i in inputs], 0) for k in inputs[0].keys()
+    }
+
+    out_labels: Labels = {}
+
+    for task in labels[0].keys():
+        label_type = labels[0][task][1]
+        annos = [label[task][0] for label in labels]
+        if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]:
+            out_labels[task] = torch.stack(annos, 0), label_type
+
+        elif label_type in [
+            LabelType.KEYPOINTS,
+            LabelType.BOUNDINGBOX,
+            LabelType.OBOUNDINGBOX,
+        ]:
+            label_box: list[Tensor] = []
+            for i, box in enumerate(annos):
+                l_box = torch.zeros((box.shape[0], box.shape[1] + 1))
+                l_box[:, 0] = i  # add target image index for build_targets()
+                l_box[:, 1:] = box
+                label_box.append(l_box)
+            out_labels[task] = torch.cat(label_box, 0), label_type
+
+    return out_inputs, out_labels
diff --git a/luxonis_train/loaders/luxonis_loader_torch.py b/luxonis_train/loaders/luxonis_loader_torch.py
new file mode 100644
index 00000000..8286a7a2
--- /dev/null
+++ b/luxonis_train/loaders/luxonis_loader_torch.py
@@ -0,0 +1,169 @@
+import logging
+from pathlib import Path
+from typing import Literal
+
+import numpy as np
+from luxonis_ml.data import (
+    Augmentations,
+    BucketStorage,
+    BucketType,
+    LuxonisDataset,
+    LuxonisLoader,
+)
+from luxonis_ml.data.parsers import LuxonisParser
+from luxonis_ml.enums import DatasetType
+from torch import Size, Tensor
+from typeguard import typechecked
+
+from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput
+
+logger = logging.getLogger(__name__)
+
+
+class LuxonisLoaderTorch(BaseLoaderTorch):
+    @typechecked
+    def __init__(
+        self,
+        dataset_name: str | None = None,
+        dataset_dir: str | None = None,
+        dataset_type: DatasetType | None = None,
+        team_id: str | None = None,
+        bucket_type: Literal["internal", "external"] = "internal",
+        bucket_storage: Literal["local", "s3", "gcs", "azure"] = "local",
+        stream: bool = False,
+        delete_existing: bool = True,
+        view: str | list[str] = "train",
+        augmentations: Augmentations | None = None,
+        **kwargs,
+    ):
+        """Torch-compatible loader for Luxonis datasets.
+
+        Can either use an already existing dataset or parse a new one from a directory.
+
+        @type dataset_name: str | None
+        @param dataset_name: Name of the dataset to load. If not provided, the
+            C{dataset_dir} argument must be provided instead. If both C{dataset_dir} and
+            C{dataset_name} are provided, the dataset will be parsed from the directory
+            and saved with the provided name.
+        @type dataset_dir: str | None
+        @param dataset_dir: Path to the dataset directory. It can be either a local path
+            or a URL. The data can be in a zip file. If not provided, C{dataset_name} of
+            an existing dataset must be provided.
+        @type dataset_type: str | None
+        @param dataset_type: Type of the dataset. Only relevant when C{dataset_dir} is
+            provided. If not provided, the type will be inferred from the directory
+            structure.
+        @type team_id: str | None
+        @param team_id: Optional unique team identifier for the cloud.
+        @type bucket_type: Literal["internal", "external"]
+        @param bucket_type: Type of the bucket. Only relevant for remote datasets.
+            Defaults to 'internal'.
+        @type bucket_storage: Literal["local", "s3", "gcs", "azure"]
+        @param bucket_storage: Type of the bucket storage. Defaults to 'local'.
+        @type stream: bool
+        @param stream: Flag for data streaming. Defaults to C{False}.
+        @type delete_existing: bool
+        @param delete_existing: Only relevant when C{dataset_dir} is provided. By
+            default, the dataset is parsed again every time the loader is created
+            because the underlying data might have changed. If C{delete_existing} is set
+            to C{False} and a dataset of the same name already exists, the existing
+            dataset will be used instead of re-parsing the data.
+        @type view: str | list[str]
+        @param view: A single split or a list of splits that will be used to create a
+            view of the dataset. Each split is a string that represents a subset of the
+            dataset. The available splits depend on the dataset, but usually include
+            'train', 'val', and 'test'. Defaults to 'train'.
+        @type augmentations: Augmentations | None
+        @param augmentations: Augmentations to apply to the data. Defaults to C{None}.
+        """
+        super().__init__(view=view, augmentations=augmentations, **kwargs)
+        if dataset_dir is not None:
+            self.dataset = self._parse_dataset(
+                dataset_dir, dataset_name, dataset_type, delete_existing
+            )
+        else:
+            if dataset_name is None:
+                raise ValueError(
+                    "Either `dataset_dir` or `dataset_name` must be provided."
+                )
+            self.dataset = LuxonisDataset(
+                dataset_name=dataset_name,
+                team_id=team_id,
+                bucket_type=BucketType(bucket_type),
+                bucket_storage=BucketStorage(bucket_storage),
+            )
+        self.base_loader = LuxonisLoader(
+            dataset=self.dataset,
+            view=self.view,
+            stream=stream,
+            augmentations=self.augmentations,
+        )
+
+    def __len__(self) -> int:
+        return len(self.base_loader)
+
+    @property
+    def input_shapes(self) -> dict[str, Size]:
+        img = self[0][0][self.image_source]
+        return {self.image_source: img.shape}
+
+    def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
+        img, labels = self.base_loader[idx]
+
+        img = np.transpose(img, (2, 0, 1))  # HWC to CHW
+        tensor_img = Tensor(img)
+        tensor_labels = {}
+        for task, (array, label_type) in labels.items():
+            tensor_labels[task] = (Tensor(array), label_type)
+
+        return {self.image_source: tensor_img}, tensor_labels
+
+    def get_classes(self) -> dict[str, list[str]]:
+        _, classes = self.dataset.get_classes()
+        return {task: classes[task] for task in classes}
+
+    def get_n_keypoints(self) -> dict[str, int]:
+        skeletons = self.dataset.get_skeletons()
+        return {task: len(skeletons[task][0]) for task in skeletons}
+
+    def _parse_dataset(
+        self,
+        dataset_dir: str,
+        dataset_name: str | None,
+        dataset_type: DatasetType | None,
+        delete_existing: bool,
+    ) -> LuxonisDataset:
+        if dataset_name is None:
+            dataset_name = Path(dataset_dir).stem
+            if dataset_type is not None:
+                dataset_name += f"_{dataset_type.value}"
+
+        if LuxonisDataset.exists(dataset_name):
+            if not delete_existing:
+                return LuxonisDataset(dataset_name=dataset_name)
+            else:
+                logger.warning(
+                    f"Dataset {dataset_name} already exists. "
+                    "The dataset will be generated again to ensure the latest data are used. "
+                    "If you don't want to regenerate the dataset every time, set `delete_existing=False`'"
+                )
+
+        if dataset_type is None:
+            logger.warning(
+                "Dataset type is not set. "
+                "Attempting to infer it from the directory structure. "
+                "If this fails, please set the dataset type manually. "
+                f"Supported types are: {', '.join(DatasetType.__members__)}."
+            )
+
+        logger.info(
+            f"Parsing dataset from {dataset_dir} with name '{dataset_name}'"
+        )
+
+        return LuxonisParser(
+            dataset_dir,
+            dataset_name=dataset_name,
+            dataset_type=dataset_type,
+            save_dir="data",
+            delete_existing=True,
+        ).parse()
diff --git a/luxonis_train/loaders/obb_tmp_loader.py b/luxonis_train/loaders/obb_tmp_loader.py
new file mode 100644
index 00000000..d571dd43
--- /dev/null
+++ b/luxonis_train/loaders/obb_tmp_loader.py
@@ -0,0 +1,340 @@
+import json
+import logging
+import random
+import warnings
+from operator import itemgetter
+from pathlib import Path
+from typing import Dict, List, Literal, Optional, Tuple, Union
+
+import cv2
+import numpy as np
+from luxonis_ml.data import (
+    Augmentations,
+    BucketStorage,
+    BucketType,
+    LuxonisDataset,
+    LuxonisLoader,
+)
+from luxonis_ml.data.loaders.base_loader import LuxonisLoaderOutput
+from luxonis_ml.data.parsers import LuxonisParser
+from luxonis_ml.data.utils.enums import LabelType
+from luxonis_ml.enums import DatasetType
+from torch import Size, Tensor
+from typeguard import typechecked
+
+from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput
+
+logger = logging.getLogger(__name__)
+
+
+class OBBLoaderTorch(BaseLoaderTorch):
+    @typechecked
+    def __init__(
+        self,
+        dataset_name: str | None = None,
+        dataset_dir: str | None = None,
+        dataset_type: DatasetType | None = None,
+        team_id: str | None = None,
+        bucket_type: Literal["internal", "external"] = "internal",
+        bucket_storage: Literal["local", "s3", "gcs", "azure"] = "local",
+        stream: bool = False,
+        delete_existing: bool = True,
+        view: str | list[str] = "train",
+        augmentations: Augmentations | None = None,
+        **kwargs,
+    ):
+        """Torch-compatible loader for Luxonis datasets for obb.
+
+        Can either use an already existing dataset or parse a new one from a directory.
+
+        @type dataset_name: str | None
+        @param dataset_name: Name of the dataset to load. If not provided, the
+            C{dataset_dir} argument must be provided instead. If both C{dataset_dir} and
+            C{dataset_name} are provided, the dataset will be parsed from the directory
+            and saved with the provided name.
+        @type dataset_dir: str | None
+        @param dataset_dir: Path to the dataset directory. It can be either a local path
+            or a URL. The data can be in a zip file. If not provided, C{dataset_name} of
+            an existing dataset must be provided.
+        @type dataset_type: str | None
+        @param dataset_type: Type of the dataset. Only relevant when C{dataset_dir} is
+            provided. If not provided, the type will be inferred from the directory
+            structure.
+        @type team_id: str | None
+        @param team_id: Optional unique team identifier for the cloud.
+        @type bucket_type: Literal["internal", "external"]
+        @param bucket_type: Type of the bucket. Only relevant for remote datasets.
+            Defaults to 'internal'.
+        @type bucket_storage: Literal["local", "s3", "gcs", "azure"]
+        @param bucket_storage: Type of the bucket storage. Defaults to 'local'.
+        @type stream: bool
+        @param stream: Flag for data streaming. Defaults to C{False}.
+        @type delete_existing: bool
+        @param delete_existing: Only relevant when C{dataset_dir} is provided. By
+            default, the dataset is parsed again every time the loader is created
+            because the underlying data might have changed. If C{delete_existing} is set
+            to C{False} and a dataset of the same name already exists, the existing
+            dataset will be used instead of re-parsing the data.
+        @type view: str | list[str]
+        @param view: A single split or a list of splits that will be used to create a
+            view of the dataset. Each split is a string that represents a subset of the
+            dataset. The available splits depend on the dataset, but usually include
+            'train', 'val', and 'test'. Defaults to 'train'.
+        @type augmentations: Augmentations | None
+        @param augmentations: Augmentations to apply to the data. Defaults to C{None}.
+        """
+        super().__init__(view=view, augmentations=augmentations, **kwargs)
+        if dataset_dir is not None:
+            self.dataset = self._parse_dataset(
+                dataset_dir, dataset_name, dataset_type, delete_existing
+            )
+        else:
+            if dataset_name is None:
+                raise ValueError(
+                    "Either `dataset_dir` or `dataset_name` must be provided."
+                )
+            self.dataset = LuxonisDataset(
+                dataset_name=dataset_name,
+                team_id=team_id,
+                bucket_type=BucketType(bucket_type),
+                bucket_storage=BucketStorage(bucket_storage),
+            )
+
+        self.instances = []
+        splits_path = self.dataset.metadata_path / "splits.json"
+        if not splits_path.exists():
+            raise RuntimeError(
+                "Cannot find splits! Ensure you call dataset.make_splits()"
+            )
+        with open(splits_path, "r") as file:
+            splits = json.load(file)
+
+        for view in self.view:
+            self.instances.extend(splits[view])
+
+        self.base_loader = OBBLoader(
+            dataset=self.dataset,
+            view=self.view,
+            stream=stream,
+            augmentations=self.augmentations,
+        )
+
+    def __len__(self) -> int:
+        return len(self.base_loader)
+
+    @property
+    def input_shapes(self) -> dict[str, Size]:
+        img = self[0][0][self.image_source]
+        return {self.image_source: img.shape}
+
+    def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
+        img, labels = self.base_loader[idx]
+
+        img = np.transpose(img, (2, 0, 1))  # HWC to CHW
+        tensor_img = Tensor(img)
+        tensor_labels = {}
+        for task, (array, label_type) in labels.items():
+            tensor_labels[task] = (Tensor(array), label_type)
+
+        return {self.image_source: tensor_img}, tensor_labels
+
+    def get_classes(self) -> dict[str, list[str]]:
+        _, classes = self.dataset.get_classes()
+        return {task: classes[task] for task in classes}
+
+    def get_n_keypoints(self) -> dict[str, int]:
+        skeletons = self.dataset.get_skeletons()
+        return {task: len(skeletons[task][0]) for task in skeletons}
+
+    def _parse_dataset(
+        self,
+        dataset_dir: str,
+        dataset_name: str | None,
+        dataset_type: DatasetType | None,
+        delete_existing: bool,
+    ) -> LuxonisDataset:
+        if dataset_name is None:
+            dataset_name = Path(dataset_dir).stem
+            if dataset_type is not None:
+                dataset_name += f"_{dataset_type.value}"
+
+        if LuxonisDataset.exists(dataset_name):
+            if not delete_existing:
+                return LuxonisDataset(dataset_name=dataset_name)
+            else:
+                logger.warning(
+                    f"Dataset {dataset_name} already exists. "
+                    "The dataset will be generated again to ensure the latest data are used. "
+                    "If you don't want to regenerate the dataset every time, set `delete_existing=False`'"
+                )
+
+        if dataset_type is None:
+            logger.warning(
+                "Dataset type is not set. "
+                "Attempting to infer it from the directory structure. "
+                "If this fails, please set the dataset type manually. "
+                f"Supported types are: {', '.join(DatasetType.__members__)}."
+            )
+
+        logger.info(
+            f"Parsing dataset from {dataset_dir} with name '{dataset_name}'"
+        )
+
+        return LuxonisParser(
+            dataset_dir,
+            dataset_name=dataset_name,
+            dataset_type=dataset_type,
+            save_dir="data",
+            delete_existing=True,
+        ).parse()
+
+
+class OBBLoader(LuxonisLoader):
+    def __init__(
+        self,
+        dataset: LuxonisDataset,
+        view: Union[str, List[str]] = "train",
+        stream: bool = False,
+        augmentations: Optional[Augmentations] = None,
+        *,
+        force_resync: bool = False,
+    ) -> None:
+        """A loader class used for loading data from L{LuxonisDataset}
+        for oriented bounding boxes.
+
+        @type dataset: LuxonisDataset
+        @param dataset: LuxonisDataset to use
+        @type view: Union[str, List[str]]
+        @param view: What splits to use. Can be either a single split or
+            a list of splits. Defaults to "train".
+        @type stream: bool
+        @param stream: Flag for data streaming. Defaults to C{False}.
+        @type augmentations: Optional[luxonis_ml.loader.Augmentations]
+        @param augmentations: Augmentation class that performs
+            augmentations. Defaults to C{None}.
+        @type force_resync: bool
+        @param force_resync: Flag to force resync from cloud. Defaults
+            to C{False}.
+        """
+        super().__init__(
+            dataset=dataset,
+            view=view,
+            stream=stream,
+            augmentations=augmentations,
+            force_resync=force_resync,
+        )
+
+    def __getitem__(self, idx: int) -> LuxonisLoaderOutput:
+        """Function to load a sample consisting of an image and its
+        annotations.
+
+        @type idx: int
+        @param idx: The (often random) integer index to retrieve a
+            sample from the dataset.
+        @rtype: LuxonisLoaderOutput
+        @return: The loader ouput consisting of the image and a
+            dictionary defining its annotations.
+        """
+
+        if self.augmentations is None:
+            return self._load_image_with_annotations(idx)
+
+        indices = [idx]
+        if self.augmentations.is_batched:
+            other_indices = [i for i in range(len(self)) if i != idx]
+            if self.augmentations.aug_batch_size > len(self):
+                warnings.warn(
+                    f"Augmentations batch_size ({self.augmentations.aug_batch_size}) is larger than dataset size ({len(self)}), samples will include repetitions."
+                )
+                random_fun = random.choices
+            else:
+                random_fun = random.sample
+            picked_indices = random_fun(
+                other_indices, k=self.augmentations.aug_batch_size - 1
+            )
+            indices.extend(picked_indices)
+
+        out_dict: Dict[str, Tuple[np.ndarray, LabelType]] = {}
+        loaded_anns = [self._load_image_with_annotations(i) for i in indices]
+        random_state = random.getstate()
+        np_random_state = np.random.get_state()
+        while loaded_anns[0][1]:
+            aug_input_data = []
+            label_to_task = {}
+            nk = 0
+            ns = 0
+            for img, annotations in loaded_anns:
+                label_dict: Dict[LabelType, np.ndarray] = {}
+                task_dict: Dict[LabelType, str] = {}
+                for task in sorted(list(annotations.keys())):
+                    array, label_type = annotations[task]
+                    if label_type not in label_dict:
+                        # ensure that bounding box annotations are added to the
+                        # `label_dict` before keypoints
+                        if label_type == LabelType.KEYPOINTS:
+                            if (
+                                LabelType.BOUNDINGBOX
+                                in map(
+                                    itemgetter(1), list(annotations.values())
+                                )
+                                and LabelType.BOUNDINGBOX not in label_dict  # type: ignore
+                            ):
+                                continue
+
+                            if (
+                                LabelType.BOUNDINGBOX in label_dict  # type: ignore
+                                and LabelType.BOUNDINGBOX
+                                in map(
+                                    itemgetter(1), list(annotations.values())
+                                )
+                            ):
+                                bbox_task = task_dict[LabelType.BOUNDINGBOX]
+                                *_, bbox_suffix = bbox_task.split("-", 1)
+                                *_, kp_suffix = task.split("-", 1)
+                                if bbox_suffix != kp_suffix:
+                                    continue
+
+                        label_dict[label_type] = array
+                        label_to_task[label_type] = task
+                        task_dict[label_type] = task
+                        annotations.pop(task)
+                        if label_type == LabelType.KEYPOINTS:
+                            nk = (array.shape[1] - 1) // 3
+                        if label_type == LabelType.SEGMENTATION:
+                            ns = array.shape[0]
+
+                aug_input_data.append((img, label_dict))
+
+            # NOTE: To ensure the same augmentation is applied to all samples
+            # in case of multiple tasks per LabelType
+            random.setstate(random_state)
+            np.random.set_state(np_random_state)
+
+            # NOTE: consider implementing resizing using the aspect ratio of the original input images
+            # height, width = img.shape[0], img.shape[1]
+            # # Determine the larger dimension
+            # if height > width:
+            #     aspect_ratio = round(height / width, 2)
+            #     new_height = 640
+            #     new_width = round(int(640 / aspect_ratio), -1)
+            # else:
+            #     aspect_ratio = round(width / height, 2)
+            #     new_width = 640
+            #     new_height = round(int(640 / aspect_ratio), -1)
+
+            # img_resized = cv2.resize(img, (new_height, new_width), interpolation=cv2.INTER_AREA)
+
+            # NOTE: Temporary solution, to demonstrate training functionality oh the DOTA dataset.
+            # If it's needed can be changed to the size from config file
+            img_resized = cv2.resize(
+                img, (512, 512), interpolation=cv2.INTER_AREA
+            )
+            img_norm = img_resized / 255  # [0, 1]
+
+            img, aug_annotations = self.augmentations(
+                aug_input_data, nk=nk, ns=ns
+            )
+            for label_type, array in aug_annotations.items():
+                out_dict[label_to_task[label_type]] = (array, label_type)
+
+        return img_norm, out_dict  # type: ignore
diff --git a/luxonis_train/models/__init__.py b/luxonis_train/models/__init__.py
index 1e2f0d91..db71b9e5 100644
--- a/luxonis_train/models/__init__.py
+++ b/luxonis_train/models/__init__.py
@@ -1,5 +1,5 @@
-from .luxonis_model import LuxonisModel
+from .luxonis_lightning import LuxonisLightningModule
 from .luxonis_output import LuxonisOutput
 from .predefined_models import *
 
-__all__ = ["LuxonisModel", "LuxonisOutput"]
+__all__ = ["LuxonisLightningModule", "LuxonisOutput"]
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_lightning.py
similarity index 67%
rename from luxonis_train/models/luxonis_model.py
rename to luxonis_train/models/luxonis_lightning.py
index 80a57d99..2bbf8ca9 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_lightning.py
@@ -1,48 +1,56 @@
 from collections import defaultdict
 from collections.abc import Mapping
 from logging import getLogger
+from pathlib import Path
 from typing import Literal, cast
 
 import lightning.pytorch as pl
 import torch
-from lightning.pytorch.callbacks import (
-    ModelCheckpoint,
-    RichModelSummary,
-)
+from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
+from luxonis_ml.data import LuxonisDataset
 from torch import Size, Tensor, nn
 
+import luxonis_train
 from luxonis_train.attached_modules import (
     BaseAttachedModule,
     BaseLoss,
     BaseMetric,
     BaseVisualizer,
 )
+from luxonis_train.attached_modules.metrics.torchmetrics import (
+    TorchMetricWrapper,
+)
 from luxonis_train.attached_modules.visualizers import (
     combine_visualizations,
     get_unnormalized_images,
 )
-from luxonis_train.callbacks import (
-    LuxonisProgressBar,
-    ModuleFreezer,
-)
+from luxonis_train.callbacks import BaseLuxonisProgressBar, ModuleFreezer
 from luxonis_train.nodes import BaseNode
-from luxonis_train.utils.config import AttachedModuleConfig, Config
-from luxonis_train.utils.general import (
+from luxonis_train.utils import (
     DatasetMetadata,
-    get_shape_packet,
+    Kwargs,
+    Labels,
+    LuxonisTrackerPL,
+    Packet,
+    to_shape_packet,
     traverse_graph,
 )
-from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry
-from luxonis_train.utils.tracker import LuxonisTrackerPL
-from luxonis_train.utils.types import Kwargs, Labels, Packet
+from luxonis_train.utils.config import AttachedModuleConfig, Config
+from luxonis_train.utils.graph import Graph
+from luxonis_train.utils.registry import (
+    CALLBACKS,
+    OPTIMIZERS,
+    SCHEDULERS,
+    Registry,
+)
 
 from .luxonis_output import LuxonisOutput
 
 logger = getLogger(__name__)
 
 
-class LuxonisModel(pl.LightningModule):
+class LuxonisLightningModule(pl.LightningModule):
     """Class representing the entire model.
 
     This class keeps track of the model graph, nodes, and attached modules.
@@ -96,8 +104,10 @@ def __init__(
         self,
         cfg: Config,
         save_dir: str,
-        input_shape: list[int] | Size,
+        input_shapes: dict[str, Size],
         dataset_metadata: DatasetMetadata | None = None,
+        *,
+        _core: "luxonis_train.core.LuxonisModel | None" = None,
         **kwargs,
     ):
         """Constructs an instance of `LuxonisModel` from `Config`.
@@ -106,33 +116,43 @@ def __init__(
         @param cfg: Config object.
         @type save_dir: str
         @param save_dir: Directory to save checkpoints.
-        @type input_shape: list[int] | L{Size}
-        @param input_shape: Shape of the input tensor.
+        @type input_shapes: dict[str, Size]
+        @param input_shapes: Dictionary of input shapes. Keys are input
+            names, values are shapes.
         @type dataset_metadata: L{DatasetMetadata} | None
         @param dataset_metadata: Dataset metadata.
         @type kwargs: Any
-        @param kwargs: Additional arguments to pass to the L{LightningModule}
-            constructor.
+        @param kwargs: Additional arguments to pass to the
+            L{LightningModule} constructor.
         """
         super().__init__(**kwargs)
 
         self._export: bool = False
+        self._core = _core
 
         self.cfg = cfg
-        self.original_in_shape = Size(input_shape)
+        self.original_in_shapes = input_shapes
+        self.image_source = cfg.loader.image_source
         self.dataset_metadata = dataset_metadata or DatasetMetadata()
         self.frozen_nodes: list[tuple[nn.Module, int]] = []
-        self.graph: dict[str, list[str]] = {}
-        self.input_shapes: dict[str, list[Size]] = {}
+        self.graph: Graph = {}
+        self.loader_input_shapes: dict[str, dict[str, Size]] = {}
+        self.node_input_sources: dict[str, list[str]] = defaultdict(list)
         self.loss_weights: dict[str, float] = {}
         self.main_metric: str | None = None
         self.save_dir = save_dir
         self.test_step_outputs: list[Mapping[str, Tensor | float | int]] = []
-        self.training_step_outputs: list[Mapping[str, Tensor | float | int]] = []
-        self.validation_step_outputs: list[Mapping[str, Tensor | float | int]] = []
+        self.training_step_outputs: list[
+            Mapping[str, Tensor | float | int]
+        ] = []
+        self.validation_step_outputs: list[
+            Mapping[str, Tensor | float | int]
+        ] = []
         self.losses: dict[str, dict[str, BaseLoss]] = defaultdict(dict)
         self.metrics: dict[str, dict[str, BaseMetric]] = defaultdict(dict)
-        self.visualizers: dict[str, dict[str, BaseVisualizer]] = defaultdict(dict)
+        self.visualizers: dict[str, dict[str, BaseVisualizer]] = defaultdict(
+            dict
+        )
 
         self._logged_images = 0
 
@@ -141,7 +161,7 @@ def __init__(
 
         for node_cfg in self.cfg.model.nodes:
             node_name = node_cfg.name
-            Node = BaseNode.REGISTRY.get(node_name)
+            Node: type[BaseNode] = BaseNode.REGISTRY.get(node_name)
             node_name = node_cfg.alias or node_name
             if node_cfg.freezing.active:
                 epochs = self.cfg.trainer.epochs
@@ -150,11 +170,66 @@ def __init__(
                 elif isinstance(node_cfg.freezing.unfreeze_after, int):
                     unfreeze_after = node_cfg.freezing.unfreeze_after
                 else:
-                    unfreeze_after = int(node_cfg.freezing.unfreeze_after * epochs)
+                    unfreeze_after = int(
+                        node_cfg.freezing.unfreeze_after * epochs
+                    )
                 frozen_nodes.append((node_name, unfreeze_after))
-            nodes[node_name] = (Node, node_cfg.params)
-            if not node_cfg.inputs:
-                self.input_shapes[node_name] = [Size(input_shape)]
+
+            if node_cfg.task is not None:
+                if Node.tasks is None:
+                    raise ValueError(
+                        f"Cannot define tasks for node {node_name}."
+                        "This node doesn't specify any tasks."
+                    )
+                if isinstance(node_cfg.task, str):
+                    assert Node.tasks
+                    if len(Node.tasks) > 1:
+                        raise ValueError(
+                            f"Node {node_name} specifies multiple tasks, "
+                            "but only one task is specified in the config. "
+                            "Specify the tasks as a dictionary instead."
+                        )
+
+                    node_cfg.task = {next(iter(Node.tasks)): node_cfg.task}
+                else:
+                    node_cfg.task = {
+                        **Node._process_tasks(Node.tasks),
+                        **node_cfg.task,
+                    }
+            nodes[node_name] = (
+                Node,
+                {**node_cfg.params, "_tasks": node_cfg.task},
+            )
+
+            # Handle inputs for this node
+            if node_cfg.input_sources:
+                self.node_input_sources[node_name] = node_cfg.input_sources
+
+            if not node_cfg.inputs and not node_cfg.input_sources:
+                # If no inputs (= preceding nodes) nor any input_sources (= loader outputs) are specified,
+                # assume the node is the starting node and takes all inputs from the loader.
+
+                self.loader_input_shapes[node_name] = {
+                    k: Size(v) for k, v in input_shapes.items()
+                }
+                self.node_input_sources[node_name] = list(input_shapes.keys())
+            else:
+                # For each input_source, check if the loader provides the required output.
+                # If yes, add the shape to the input_shapes dict. If not, raise an error.
+                self.loader_input_shapes[node_name] = {}
+                for input_source in node_cfg.input_sources:
+                    if input_source not in input_shapes:
+                        raise ValueError(
+                            f"Node {node_name} requires input source {input_source}, "
+                            "which is not provided by the loader."
+                        )
+
+                    self.loader_input_shapes[node_name][input_source] = Size(
+                        input_shapes[input_source]
+                    )
+
+                # Inputs (= preceding nodes) are handled in the _initiate_nodes method.
+
             self.graph[node_name] = node_cfg.inputs
 
         self.nodes = self._initiate_nodes(nodes)
@@ -189,61 +264,73 @@ def __init__(
 
         self.load_checkpoint(self.cfg.model.weights)
 
+    @property
+    def core(self) -> "luxonis_train.core.LuxonisModel":
+        """Returns the core model."""
+        if self._core is None:  # pragma: no cover
+            raise ValueError("Core reference is not set.")
+        return self._core
+
     def _initiate_nodes(
         self,
         nodes: dict[str, tuple[type[BaseNode], Kwargs]],
     ) -> nn.ModuleDict:
         """Initializes all the nodes in the model.
 
-        Traverses the graph and initiates each node using outputs of the preceding
-        nodes.
+        Traverses the graph and initiates each node using outputs of the
+        preceding nodes.
 
         @type nodes: dict[str, tuple[type[LuxonisNode], Kwargs]]
-        @param nodes: Dictionary of nodes to be initiated. Keys are node names, values
-            are tuples of node class and node kwargs.
+        @param nodes: Dictionary of nodes to be initiated. Keys are node
+            names, values are tuples of node class and node kwargs.
         @rtype: L{nn.ModuleDict}[str, L{LuxonisNode}]
         @return: Dictionary of initiated nodes.
         """
         initiated_nodes: dict[str, BaseNode] = {}
 
-        dummy_outputs: dict[str, Packet[Tensor]] = {
-            f"__{node_name}_input__": {
-                "features": [torch.zeros(2, *shape[1:]) for shape in shapes]
-            }
-            for node_name, shapes in self.input_shapes.items()
+        dummy_inputs: dict[str, Packet[Tensor]] = {
+            source_name: {"features": [torch.zeros(2, *shape)]}
+            for shapes in self.loader_input_shapes.values()
+            for source_name, shape in shapes.items()
         }
 
-        for node_name, (Node, node_kwargs), node_input_names, _ in traverse_graph(
-            self.graph, nodes
-        ):
-            node_input_shapes: list[Packet[Size]] = []
+        for node_name, (
+            Node,
+            node_kwargs,
+        ), node_input_names, _ in traverse_graph(self.graph, nodes):
             node_dummy_inputs: list[Packet[Tensor]] = []
+            """List of dummy input packets for the node.
 
-            if not node_input_names:
-                node_input_names = [f"__{node_name}_input__"]
+            The first one is always from the loader.
+            """
+            node_input_shapes: list[Packet[Size]] = []
+            """Corresponding list of input shapes."""
 
+            node_input_names += self.node_input_sources[node_name]
             for node_input_name in node_input_names:
-                dummy_output = dummy_outputs[node_input_name]
-                shape_packet = get_shape_packet(dummy_output)
+                dummy_input = dummy_inputs[node_input_name]
+
+                node_dummy_inputs.append(dummy_input)
+
+                shape_packet = to_shape_packet(dummy_input)
                 node_input_shapes.append(shape_packet)
-                node_dummy_inputs.append(dummy_output)
 
-                node = Node(
-                    input_shapes=node_input_shapes,
-                    original_in_shape=self.original_in_shape,
-                    dataset_metadata=self.dataset_metadata,
-                    **node_kwargs,
-                )
-                node_outputs = node.run(node_dummy_inputs)
+            node = Node(
+                input_shapes=node_input_shapes,
+                original_in_shape=self.original_in_shapes[self.image_source],
+                dataset_metadata=self.dataset_metadata,
+                **node_kwargs,
+            )
+            node_outputs = node.run(node_dummy_inputs)
 
-                dummy_outputs[node_name] = node_outputs
-                initiated_nodes[node_name] = node
+            dummy_inputs[node_name] = node_outputs
+            initiated_nodes[node_name] = node
 
         return nn.ModuleDict(initiated_nodes)
 
     def forward(
         self,
-        inputs: Tensor,
+        inputs: dict[str, Tensor],
         labels: Labels | None = None,
         images: Tensor | None = None,
         *,
@@ -253,55 +340,63 @@ def forward(
     ) -> LuxonisOutput:
         """Forward pass of the model.
 
-        Traverses the graph and step-by-step computes the outputs of each node. Each
-        next node is computed only when all of its predecessors are computed. Once the
-        outputs are not needed anymore, they are removed from the memory.
+        Traverses the graph and step-by-step computes the outputs of
+        each node. Each next node is computed only when all of its
+        predecessors are computed. Once the outputs are not needed
+        anymore, they are removed from the memory.
 
         @type inputs: L{Tensor}
         @param inputs: Input tensor.
-        @type labels: L{Labels} | None
-        @param labels: Labels dictionary. Defaults to C{None}.
+        @type task_labels: L{TaskLabels} | None
+        @param task_labels: Labels dictionary. Defaults to C{None}.
         @type images: L{Tensor} | None
-        @param images: Canvas tensor for visualizers. Defaults to C{None}.
+        @param images: Canvas tensor for visualizers. Defaults to
+            C{None}.
         @type compute_loss: bool
-        @param compute_loss: Whether to compute losses. Defaults to C{True}.
+        @param compute_loss: Whether to compute losses. Defaults to
+            C{True}.
         @type compute_metrics: bool
-        @param compute_metrics: Whether to update metrics. Defaults to C{True}.
+        @param compute_metrics: Whether to update metrics. Defaults to
+            C{True}.
         @type compute_visualizations: bool
-        @param compute_visualizations: Whether to compute visualizations. Defaults to
-            C{False}.
+        @param compute_visualizations: Whether to compute
+            visualizations. Defaults to C{False}.
         @rtype: L{LuxonisOutput}
         @return: Output of the model.
         """
-        input_node_name = list(self.input_shapes.keys())[0]
-        input_dict = {input_node_name: [inputs]}
-
         losses: dict[
             str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]]
         ] = defaultdict(dict)
         visualizations: dict[str, dict[str, Tensor]] = defaultdict(dict)
 
-        computed: dict[str, Packet[Tensor]] = {
-            f"__{node_name}_input__": {"features": input_tensors}
-            for node_name, input_tensors in input_dict.items()
-        }
+        computed: dict[str, Packet[Tensor]] = {}
         for node_name, node, input_names, unprocessed in traverse_graph(
             self.graph, cast(dict[str, BaseNode], self.nodes)
         ):
-            # Special input for the first node. Will be changed when
-            # multiple inputs will be supported in `luxonis-ml.data`.
-            if not input_names:
-                input_names = [f"__{node_name}_input__"]
+            input_names += self.node_input_sources[node_name]
 
-            node_inputs = [computed[pred] for pred in input_names]
+            node_inputs: list[Packet[Tensor]] = []
+            for pred in input_names:
+                if pred in computed:
+                    node_inputs.append(computed[pred])
+                else:
+                    node_inputs.append({"features": [inputs[pred]]})
             outputs = node.run(node_inputs)
             computed[node_name] = outputs
 
-            if compute_loss and node_name in self.losses and labels is not None:
+            if (
+                compute_loss
+                and node_name in self.losses
+                and labels is not None
+            ):
                 for loss_name, loss in self.losses[node_name].items():
                     losses[node_name][loss_name] = loss.run(outputs, labels)
 
-            if compute_metrics and node_name in self.metrics and labels is not None:
+            if (
+                compute_metrics
+                and node_name in self.metrics
+                and labels is not None
+            ):
                 for metric in self.metrics[node_name].values():
                     metric.run_update(outputs, labels)
 
@@ -311,7 +406,9 @@ def forward(
                 and images is not None
                 and labels is not None
             ):
-                for viz_name, visualizer in self.visualizers[node_name].items():
+                for viz_name, visualizer in self.visualizers[
+                    node_name
+                ].items():
                     viz = combine_visualizations(
                         visualizer.run(
                             images,
@@ -360,11 +457,11 @@ def compute_metrics(self) -> dict[str, dict[str, Tensor]]:
                         computed_submetrics = {
                             metric_name: metric_value,
                         } | submetrics
-                    case Tensor(data=metric_value):
+                    case Tensor() as metric_value:
                         computed_submetrics = {metric_name: metric_value}
                     case dict(submetrics):
                         computed_submetrics = submetrics
-                    case unknown:
+                    case unknown:  # pragma: no cover
                         raise ValueError(
                             f"Metric {metric_name} returned unexpected value of "
                             f"type {type(unknown)}."
@@ -379,24 +476,31 @@ def export_onnx(self, save_path: str, **kwargs) -> list[str]:
         @type save_path: str
         @param save_path: Path where the exported model will be saved.
         @type kwargs: Any
-        @param kwargs: Additional arguments for the L{torch.onnx.export} method.
+        @param kwargs: Additional arguments for the L{torch.onnx.export}
+            method.
         @rtype: list[str]
         @return: List of output names.
         """
+        self.eval()
 
         inputs = {
-            name: [torch.zeros(shape).to(self.device) for shape in shapes]
-            for name, shapes in self.input_shapes.items()
+            input_name: torch.zeros([1, *shape]).to(self.device)
+            for shapes in self.loader_input_shapes.values()
+            for input_name, shape in shapes.items()
+        }
+
+        inputs_deep_clone = {
+            k: torch.zeros(elem.shape).to(self.device)
+            for k, elem in inputs.items()
         }
 
-        # TODO: multiple inputs
-        inp = list(inputs.values())[0][0]
+        inputs_for_onnx = {"inputs": inputs_deep_clone}
 
         for module in self.modules():
             if isinstance(module, BaseNode):
                 module.set_export_mode()
 
-        outputs = self.forward(inp.clone()).outputs
+        outputs = self.forward(inputs_deep_clone).outputs
         output_order = sorted(
             [
                 (node_name, output_name, i)
@@ -436,10 +540,13 @@ def export_forward(inputs) -> tuple[Tensor, ...]:
             )
 
         self.forward = export_forward  # type: ignore
+
+        if "input_names" not in kwargs:
+            kwargs["input_names"] = list(inputs.keys())
         if "output_names" not in kwargs:
             kwargs["output_names"] = output_names
 
-        self.to_onnx(save_path, inp, **kwargs)
+        self.to_onnx(save_path, inputs_for_onnx, **kwargs)
 
         self.forward = old_forward  # type: ignore
 
@@ -448,26 +555,33 @@ def export_forward(inputs) -> tuple[Tensor, ...]:
                 module.set_export_mode(False)
 
         logger.info(f"Model exported to {save_path}")
+
+        self.train()
+
         return output_names
 
     def process_losses(
         self,
-        losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]]],
+        losses_dict: dict[
+            str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]]
+        ],
     ) -> tuple[Tensor, dict[str, Tensor]]:
         """Processes individual losses from the model run.
 
-        Goes over the computed losses and computes the final loss as a weighted sum of
-        all the losses.
+        Goes over the computed losses and computes the final loss as a
+        weighted sum of all the losses.
 
-        @type losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, dict[str,
-            Tensor]]]]
-        @param losses_dict: Dictionary of computed losses. Each node can have multiple
-            losses attached. The first key identifies the node, the second key
-            identifies the specific loss. Values are either single tensors or tuples of
-            tensors and sublosses.
+        @type losses_dict: dict[str, dict[str, Tensor | tuple[Tensor,
+            dict[str, Tensor]]]]
+        @param losses_dict: Dictionary of computed losses. Each node can
+            have multiple losses attached. The first key identifies the
+            node, the second key identifies the specific loss. Values
+            are either single tensors or tuples of tensors and
+            sublosses.
         @rtype: tuple[Tensor, dict[str, Tensor]]
-        @return: Tuple of final loss and dictionary of processed sublosses. The
-            dictionary is in a format of {loss_name: loss_value}.
+        @return: Tuple of final loss and dictionary of processed
+            sublosses. The dictionary is in a format of {loss_name:
+            loss_value}.
         """
         final_loss = torch.zeros(1, device=self.device)
         training_step_output: dict[str, Tensor] = {}
@@ -481,9 +595,9 @@ def process_losses(
 
                 loss *= self.loss_weights[loss_name]
                 final_loss += loss
-                training_step_output[
-                    f"loss/{node_name}/{loss_name}"
-                ] = loss.detach().cpu()
+                training_step_output[f"loss/{node_name}/{loss_name}"] = (
+                    loss.detach().cpu()
+                )
                 if self.cfg.trainer.log_sub_losses and sublosses:
                     for subloss_name, subloss_value in sublosses.items():
                         training_step_output[
@@ -492,20 +606,28 @@ def process_losses(
         training_step_output["loss"] = final_loss.detach().cpu()
         return final_loss, training_step_output
 
-    def training_step(self, train_batch: tuple[Tensor, Labels]) -> Tensor:
+    def training_step(
+        self, train_batch: tuple[dict[str, Tensor], Labels]
+    ) -> Tensor:
         """Performs one step of training with provided batch."""
         outputs = self.forward(*train_batch)
-        assert outputs.losses, "Losses are empty, check if you have defined any loss"
+        assert (
+            outputs.losses
+        ), "Losses are empty, check if you have defined any loss"
 
         loss, training_step_output = self.process_losses(outputs.losses)
         self.training_step_outputs.append(training_step_output)
         return loss
 
-    def validation_step(self, val_batch: tuple[Tensor, Labels]) -> dict[str, Tensor]:
+    def validation_step(
+        self, val_batch: tuple[dict[str, Tensor], Labels]
+    ) -> dict[str, Tensor]:
         """Performs one step of validation with provided batch."""
         return self._evaluation_step("val", val_batch)
 
-    def test_step(self, test_batch: tuple[Tensor, Labels]) -> dict[str, Tensor]:
+    def test_step(
+        self, test_batch: tuple[dict[str, Tensor], Labels]
+    ) -> dict[str, Tensor]:
         """Performs one step of testing with provided batch."""
         return self._evaluation_step("test", test_batch)
 
@@ -534,7 +656,8 @@ def get_status(self) -> tuple[int, int]:
         return self.current_epoch, self.cfg.trainer.epochs
 
     def get_status_percentage(self) -> float:
-        """Returns percentage of current training, takes into account early stopping."""
+        """Returns percentage of current training, takes into account
+        early stopping."""
         if self._trainer.early_stopping_callback:
             # model haven't yet stop from early stopping callback
             if self._trainer.early_stopping_callback.stopped_epoch == 0:
@@ -545,11 +668,13 @@ def get_status_percentage(self) -> float:
             return (self.current_epoch / self.cfg.trainer.epochs) * 100
 
     def _evaluation_step(
-        self, mode: Literal["test", "val"], batch: tuple[Tensor, Labels]
+        self,
+        mode: Literal["test", "val"],
+        batch: tuple[dict[str, Tensor], Labels],
     ) -> dict[str, Tensor]:
         inputs, labels = batch
         images = None
-        if self._logged_images < self.cfg.trainer.num_log_images:
+        if self._logged_images < self.cfg.trainer.n_log_images:
             images = get_unnormalized_images(self.cfg, inputs)
         outputs = self.forward(
             inputs,
@@ -567,7 +692,7 @@ def _evaluation_step(
             for viz_name, viz_batch in visualizations.items():
                 logged_images = self._logged_images
                 for viz in viz_batch:
-                    if logged_images >= self.cfg.trainer.num_log_images:
+                    if logged_images >= self.cfg.trainer.n_log_images:
                         break
                     self.logger.log_image(
                         f"{mode}/visualizations/{node_name}/{viz_name}/{logged_images}",
@@ -591,7 +716,9 @@ def _evaluation_epoch_end(self, mode: Literal["test", "val"]) -> None:
         logger.info("Metrics computed.")
         for node_name, metrics in computed_metrics.items():
             for metric_name, metric_value in metrics.items():
-                metric_results[node_name][metric_name] = metric_value.cpu().item()
+                metric_results[node_name][metric_name] = (
+                    metric_value.cpu().item()
+                )
                 self.log(
                     f"{mode}/metric/{node_name}/{metric_name}",
                     metric_value,
@@ -611,12 +738,12 @@ def _evaluation_epoch_end(self, mode: Literal["test", "val"]) -> None:
     def configure_callbacks(self) -> list[pl.Callback]:
         """Configures Pytorch Lightning callbacks."""
         self.min_val_loss_checkpoints_path = f"{self.save_dir}/min_val_loss"
-        self.best_val_metric_checkpoints_path = f"{self.save_dir}/best_val_metric"
+        self.best_val_metric_checkpoints_path = (
+            f"{self.save_dir}/best_val_metric"
+        )
         model_name = self.cfg.model.name
 
-        callbacks: list[pl.Callback] = []
-
-        callbacks.append(
+        callbacks: list[pl.Callback] = [
             ModelCheckpoint(
                 monitor="val/loss",
                 dirpath=self.min_val_loss_checkpoints_path,
@@ -624,8 +751,9 @@ def configure_callbacks(self) -> list[pl.Callback]:
                 auto_insert_metric_name=False,
                 save_top_k=self.cfg.trainer.save_top_k,
                 mode="min",
-            )
-        )
+            ),
+            RichModelSummary(max_depth=2),
+        ]
 
         if self.main_metric is not None:
             main_metric = self.main_metric.replace("/", "_")
@@ -644,18 +772,20 @@ def configure_callbacks(self) -> list[pl.Callback]:
         if self.frozen_nodes:
             callbacks.append(ModuleFreezer(self.frozen_nodes))
 
-        if self.cfg.use_rich_text:
-            callbacks.append(RichModelSummary(max_depth=2))
-
         for callback in self.cfg.trainer.callbacks:
             if callback.active:
-                callbacks.append(CALLBACKS.get(callback.name)(**callback.params))
+                callbacks.append(
+                    CALLBACKS.get(callback.name)(**callback.params)
+                )
 
         return callbacks
 
     def configure_optimizers(
         self,
-    ) -> tuple[list[torch.optim.Optimizer], list[nn.Module]]:
+    ) -> tuple[
+        list[torch.optim.Optimizer],
+        list[torch.optim.lr_scheduler._LRScheduler],
+    ]:
         """Configures model optimizers and schedulers."""
         cfg_optimizer = self.cfg.trainer.optimizer
         cfg_scheduler = self.cfg.trainer.scheduler
@@ -670,18 +800,22 @@ def configure_optimizers(
 
         return [optimizer], [scheduler]
 
-    def load_checkpoint(self, path: str | None) -> None:
+    def load_checkpoint(self, path: str | Path | None) -> None:
         """Loads checkpoint weights from provided path.
 
-        Loads the checkpoints gracefully, ignoring keys that are not found in the model
-        state dict or in the checkpoint.
+        Loads the checkpoints gracefully, ignoring keys that are not
+        found in the model state dict or in the checkpoint.
 
         @type path: str | None
-        @param path: Path to the checkpoint. If C{None}, no checkpoint will be loaded.
+        @param path: Path to the checkpoint. If C{None}, no checkpoint
+            will be loaded.
         """
         if path is None:
             return
+
+        path = str(path)
         checkpoint = torch.load(path, map_location=self.device)
+
         if "state_dict" not in checkpoint:
             raise ValueError("Checkpoint does not contain state_dict.")
         state_dict = {}
@@ -716,12 +850,31 @@ def _init_attached_module(
         Module = registry.get(cfg.name)
         module_name = cfg.alias or cfg.name
         node_name = cfg.attached_to
-        module = Module(**cfg.params, node=self.nodes[node_name])
+        node: BaseNode = self.nodes[node_name]  # type: ignore
+        if issubclass(Module, TorchMetricWrapper):
+            if "task" not in cfg.params and self._core is not None:
+                loader = self._core.loaders["train"]
+                dataset = getattr(loader, "dataset", None)
+                if isinstance(dataset, LuxonisDataset):
+                    n_classes = len(dataset.get_classes()[1][node.task])
+                    if n_classes == 1:
+                        cfg.params["task"] = "binary"
+                    else:
+                        cfg.params["task"] = "multiclass"
+                    logger.warning(
+                        f"Parameter 'task' not specified for `TorchMetric` based '{module_name}' metric. "
+                        f"Assuming task type based on the number of classes: {cfg.params['task']}. "
+                        "If this is incorrect, please specify the 'task' parameter in the config."
+                    )
+
+        module = Module(**cfg.params, node=node)
         storage[node_name][module_name] = module  # type: ignore
         return module_name, node_name
 
     @staticmethod
-    def _to_module_dict(modules: dict[str, dict[str, nn.Module]]) -> nn.ModuleDict:
+    def _to_module_dict(
+        modules: dict[str, dict[str, nn.Module]],
+    ) -> nn.ModuleDict:
         return nn.ModuleDict(
             {
                 node_name: nn.ModuleDict(node_modules)
@@ -730,8 +883,10 @@ def _to_module_dict(modules: dict[str, dict[str, nn.Module]]) -> nn.ModuleDict:
         )
 
     @property
-    def _progress_bar(self) -> LuxonisProgressBar:
-        return cast(LuxonisProgressBar, self._trainer.progress_bar_callback)
+    def _progress_bar(self) -> BaseLuxonisProgressBar:
+        return cast(
+            BaseLuxonisProgressBar, self._trainer.progress_bar_callback
+        )
 
     @rank_zero_only
     def _print_results(
@@ -741,23 +896,20 @@ def _print_results(
 
         logger.info(f"{stage} loss: {loss:.4f}")
 
-        if self.cfg.use_rich_text:
-            self._progress_bar.print_results(stage=stage, loss=loss, metrics=metrics)
-        else:
-            for node_name, node_metrics in metrics.items():
-                for metric_name, metric_value in node_metrics.items():
-                    logger.info(
-                        f"{stage} metric: {node_name}/{metric_name}: {metric_value:.4f}"
-                    )
+        self._progress_bar.print_results(
+            stage=stage, loss=loss, metrics=metrics
+        )
 
         if self.main_metric is not None:
             main_metric_node, main_metric_name = self.main_metric.split("/")
             main_metric = metrics[main_metric_node][main_metric_name]
-            logger.info(f"{stage} main metric ({self.main_metric}): {main_metric:.4f}")
+            logger.info(
+                f"{stage} main metric ({self.main_metric}): {main_metric:.4f}"
+            )
 
     def _is_train_eval_epoch(self) -> bool:
-        """Checks if train eval should be performed on current epoch based on configured
-        train_metrics_interval."""
+        """Checks if train eval should be performed on current epoch
+        based on configured train_metrics_interval."""
         train_metrics_interval = self.cfg.trainer.train_metrics_interval
         # add +1 to current_epoch because starting epoch is at 0
         return (
diff --git a/luxonis_train/models/luxonis_output.py b/luxonis_train/models/luxonis_output.py
index e6b8e16c..3cf59329 100644
--- a/luxonis_train/models/luxonis_output.py
+++ b/luxonis_train/models/luxonis_output.py
@@ -3,8 +3,7 @@
 
 from torch import Tensor
 
-from luxonis_train.utils.general import get_shape_packet
-from luxonis_train.utils.types import Packet
+from luxonis_train.utils import Packet, to_shape_packet
 
 
 @dataclass
@@ -16,7 +15,7 @@ class LuxonisOutput:
 
     def __str__(self) -> str:
         outputs = {
-            node_name: get_shape_packet(packet)
+            node_name: to_shape_packet(packet)
             for node_name, packet in self.outputs.items()
         }
         viz = {
diff --git a/luxonis_train/models/predefined_models/README.md b/luxonis_train/models/predefined_models/README.md
index ddf0b46d..bdf49178 100644
--- a/luxonis_train/models/predefined_models/README.md
+++ b/luxonis_train/models/predefined_models/README.md
@@ -42,6 +42,7 @@ See an example configuration file using this predefined model [here](../../../co
 | Key               | Type                              | Default value | Description                                |
 | ----------------- | --------------------------------- | ------------- | ------------------------------------------ |
 | task              | Literal\["binary", "multiclass"\] | "binary"      | Type of the task of the model.             |
+| task_name         | str \| None                       | None          | Custom task name for the head.             |
 | backbone          | str                               | "MicroNet"    | Name of the node to be used as a backbone. |
 | backbone_params   | dict                              | {}            | Additional parameters to the backbone.     |
 | head_params       | dict                              | {}            | Additional parameters to the head.         |
@@ -65,14 +66,15 @@ See an example configuration file using this predefined model [here](../../../co
 
 **Params**
 
-| Key               | Type | Default value | Description                               |
-| ----------------- | ---- | ------------- | ----------------------------------------- |
-| use_neck          | bool | True          | Whether to include the neck in the model. |
-| backbone_params   | dict | {}            | Additional parameters to the backbone.    |
-| neck_params       | dict | {}            | Additional parameters to the neck.        |
-| head_params       | dict | {}            | Additional parameters to the head.        |
-| loss_params       | dict | {}            | Additional parameters to the loss.        |
-| visualizer_params | dict | {}            | Additional parameters to the visualizer.  |
+| Key               | Type        | Default value | Description                               |
+| ----------------- | ----------- | ------------- | ----------------------------------------- |
+| task_name         | str \| None | None          | Custom task name for the head.            |
+| use_neck          | bool        | True          | Whether to include the neck in the model. |
+| backbone_params   | dict        | {}            | Additional parameters to the backbone.    |
+| neck_params       | dict        | {}            | Additional parameters to the neck.        |
+| head_params       | dict        | {}            | Additional parameters to the head.        |
+| loss_params       | dict        | {}            | Additional parameters to the loss.        |
+| visualizer_params | dict        | {}            | Additional parameters to the visualizer.  |
 
 ## KeypointDetectionModel
 
@@ -84,8 +86,10 @@ See an example configuration file using this predefined model [here](../../../co
 | ------------------------------------------------------------------------------------------------------- | ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- |
 | [EfficientRep](../../nodes/README.md#efficientrep)                                                      | kpt_detection_backbone | Backbone of the model.                                                                                                                               |
 | [RepPANNeck](../../nodes/README.md#reppanneck)                                                          | kpt_detection_neck     | Neck of the model.                                                                                                                                   |
-| [ImplicitKeypointBBoxHead](../../nodes/README.md#implicitkeypointbboxhead)                              | kpt_detection_head     | Head of the model.                                                                                                                                   |
-| [ImplicitKeypointBBoxLoss](../../attached_modules/losses/README.md#implicitkeypointbboxloss)            | kpt_detection_loss     | Loss of the model.                                                                                                                                   |
+| [ImplicitKeypointBBoxHead](../../nodes/README.md#implicitkeypointbboxhead)                              | kpt_detection_head     | Possible head of the model, changes depending on the value of `head_type` argument.                                                                  |
+| [EfficientKeypointBBoxHead](../../nodes/README.md#efficientkeypointbboxhead)                            | kpt_detection_head     | Possible head of the model, changes depending on the value of `head_type` argument                                                                   |
+| [ImplicitKeypointBBoxLoss](../../attached_modules/losses/README.md#implicitkeypointbboxloss)            | kpt_detection_loss     | Loss of the model if the `head_type` is set to "ImplicitKeypointBBoxHead"                                                                            |
+| [EfficientKeypointBBoxLoss](../../attached_modules/losses/README.md#efficientkeypointbboxloss)          | kpt_detection_loss     | Loss of the model if `head_type` is set to "EfficientKeypointBBoxHead".                                                                              |
 | [ObjectKeypointSimilarity](../../attached_modules/metrics/README.md#objectkeypointsimilarity)           | kpt_detection_oks      | Main metric of the model.                                                                                                                            |
 | [MeanAveragePrecisionKeypoints](../../attached_modules/metrics/README.md#meanaverageprecisionkeypoints) | kpt_detection_map      | Secondary metric of the model.                                                                                                                       |
 | [BBoxVisualizer](../../attached_modules/visualizers/README.md#bboxvisualizer)                           |                        | Visualizer for bounding boxes. Combined with keypoint visualizer in [MultiVisualizer](../../attached_modules/visualizers/README.md#multivisualizer). |
@@ -93,15 +97,18 @@ See an example configuration file using this predefined model [here](../../../co
 
 **Params**
 
-| Key                    | Type | Default value | Description                                       |
-| ---------------------- | ---- | ------------- | ------------------------------------------------- |
-| use_neck               | bool | True          | Whether to include the neck in the model.         |
-| backbone_params        | dict | {}            | Additional parameters to the backbone.            |
-| neck_params            | dict | {}            | Additional parameters to the neck.                |
-| head_params            | dict | {}            | Additional parameters to the head.                |
-| loss_params            | dict | {}            | Additional parameters to the loss.                |
-| kpt_visualizer_params  | dict | {}            | Additional parameters to the keypoint visualizer. |
-| bbox_visualizer_params | dict | {}            | Additional parameters to the bbox visualizer.     |
+| Key                    | Type        | Default value                                             | Description                                       |
+| ---------------------- | ----------- | --------------------------------------------------------- | ------------------------------------------------- |
+| use_neck               | bool        | True                                                      | Whether to include the neck in the model.         |
+| backbone_params        | dict        | {}                                                        | Additional parameters to the backbone.            |
+| neck_params            | dict        | {}                                                        | Additional parameters to the neck.                |
+| head_params            | dict        | {}                                                        | Additional parameters to the head.                |
+| head_type              | str         | "ImplicitKeypointBBoxHead" \| "EfficientKeypointBBoxHead" | Type of the head.                                 |
+| loss_params            | dict        | {}                                                        | Additional parameters to the loss.                |
+| kpt_visualizer_params  | dict        | {}                                                        | Additional parameters to the keypoint visualizer. |
+| bbox_visualizer_params | dict        | {}                                                        | Additional parameters to the bbox visualizer.     |
+| bbox_task_name         | str \| None | None                                                      | Custom task name for the detection head.          |
+| kpt_task_name          | str \| None | None                                                      | Custom task name for the keypoint head.           |
 
 ## ClassificationModel
 
@@ -125,6 +132,7 @@ See an example configuration file using this predefined model [here](../../../co
 | Key               | Type                                  | Default value | Description                                |
 | ----------------- | ------------------------------------- | ------------- | ------------------------------------------ |
 | task              | Literal\["multiclass", "multilabel"\] | "multiclass"  | Type of the task of the model.             |
+| task_name         | str \| None                           | None          | Custom task name for the head.             |
 | backbone          | str                                   | "MicroNet"    | Name of the node to be used as a backbone. |
 | backbone_params   | dict                                  | {}            | Additional parameters to the backbone.     |
 | head_params       | dict                                  | {}            | Additional parameters to the head.         |
diff --git a/luxonis_train/models/predefined_models/__init__.py b/luxonis_train/models/predefined_models/__init__.py
index 0e8fe8c0..09a9a338 100644
--- a/luxonis_train/models/predefined_models/__init__.py
+++ b/luxonis_train/models/predefined_models/__init__.py
@@ -1,6 +1,8 @@
 from .base_predefined_model import BasePredefinedModel
 from .classification_model import ClassificationModel
+from .ddrnet_segmentation_model import DDRNetSegmentationModel
 from .detection_model import DetectionModel
+from .detection_model_obb import OBBDetectionModel
 from .keypoint_detection_model import KeypointDetectionModel
 from .segmentation_model import SegmentationModel
 
@@ -8,6 +10,8 @@
     "BasePredefinedModel",
     "SegmentationModel",
     "DetectionModel",
+    "OBBDetectionModel",
     "KeypointDetectionModel",
     "ClassificationModel",
+    "DDRNetSegmentationModel",
 ]
diff --git a/luxonis_train/models/predefined_models/base_predefined_model.py b/luxonis_train/models/predefined_models/base_predefined_model.py
index 33ababdc..9388f345 100644
--- a/luxonis_train/models/predefined_models/base_predefined_model.py
+++ b/luxonis_train/models/predefined_models/base_predefined_model.py
@@ -1,4 +1,4 @@
-from abc import ABC, abstractproperty
+from abc import ABC, abstractmethod
 
 from luxonis_ml.utils.registry import AutoRegisterMeta
 
@@ -17,21 +17,21 @@ class BasePredefinedModel(
     registry=MODELS,
     register=False,
 ):
-    @abstractproperty
-    def nodes(self) -> list[ModelNodeConfig]:
-        ...
+    @property
+    @abstractmethod
+    def nodes(self) -> list[ModelNodeConfig]: ...
 
-    @abstractproperty
-    def losses(self) -> list[LossModuleConfig]:
-        ...
+    @property
+    @abstractmethod
+    def losses(self) -> list[LossModuleConfig]: ...
 
-    @abstractproperty
-    def metrics(self) -> list[MetricModuleConfig]:
-        ...
+    @property
+    @abstractmethod
+    def metrics(self) -> list[MetricModuleConfig]: ...
 
-    @abstractproperty
-    def visualizers(self) -> list[AttachedModuleConfig]:
-        ...
+    @property
+    @abstractmethod
+    def visualizers(self) -> list[AttachedModuleConfig]: ...
 
     def generate_model(
         self,
diff --git a/luxonis_train/models/predefined_models/classification_model.py b/luxonis_train/models/predefined_models/classification_model.py
index 33e56242..e390b667 100644
--- a/luxonis_train/models/predefined_models/classification_model.py
+++ b/luxonis_train/models/predefined_models/classification_model.py
@@ -1,13 +1,13 @@
 from dataclasses import dataclass, field
 from typing import Literal
 
+from luxonis_train.utils import Kwargs
 from luxonis_train.utils.config import (
     AttachedModuleConfig,
     LossModuleConfig,
     MetricModuleConfig,
     ModelNodeConfig,
 )
-from luxonis_train.utils.types import Kwargs
 
 from .base_predefined_model import BasePredefinedModel
 
@@ -15,11 +15,12 @@
 @dataclass
 class ClassificationModel(BasePredefinedModel):
     backbone: str = "MicroNet"
-    task: Literal["multiclass", "multilabel"] = "multilabel"
+    task: Literal["multiclass", "multilabel"] = "multiclass"
     backbone_params: Kwargs = field(default_factory=dict)
     head_params: Kwargs = field(default_factory=dict)
     loss_params: Kwargs = field(default_factory=dict)
     visualizer_params: Kwargs = field(default_factory=dict)
+    task_name: str | None = None
 
     @property
     def nodes(self) -> list[ModelNodeConfig]:
@@ -36,6 +37,7 @@ def nodes(self) -> list[ModelNodeConfig]:
                 inputs=["classification_backbone"],
                 freezing=self.head_params.pop("freezing", {}),
                 params=self.head_params,
+                task=self.task_name,
             ),
         ]
 
diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py
new file mode 100644
index 00000000..beacca5e
--- /dev/null
+++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py
@@ -0,0 +1,77 @@
+from dataclasses import dataclass, field
+
+from luxonis_train.utils.config import (
+    LossModuleConfig,
+    ModelNodeConfig,
+)
+from luxonis_train.utils.types import Kwargs
+
+from .segmentation_model import SegmentationModel
+
+
+@dataclass
+class DDRNetSegmentationModel(SegmentationModel):
+    backbone: str = "DDRNet"
+    aux_head_params: Kwargs = field(default_factory=dict)
+
+    @property
+    def nodes(self) -> list[ModelNodeConfig]:
+        self.head_params.update({"attach_index": -1})
+
+        self.aux_head_params.update({"attach_index": -2})
+
+        node_list = [
+            ModelNodeConfig(
+                name=self.backbone,
+                alias="ddrnet_backbone",
+                freezing=self.backbone_params.pop("freezing", {}),
+                params=self.backbone_params,
+            ),
+            ModelNodeConfig(
+                name="DDRNetSegmentationHead",
+                alias="segmentation_head",
+                inputs=["ddrnet_backbone"],
+                freezing=self.head_params.pop("freezing", {}),
+                params=self.head_params,
+                task=self.task_name,
+            ),
+        ]
+        if self.backbone_params.get("use_aux_heads", False):
+            node_list.append(
+                ModelNodeConfig(
+                    name="DDRNetSegmentationHead",
+                    alias="aux_segmentation_head",
+                    inputs=["ddrnet_backbone"],
+                    freezing=self.aux_head_params.pop("freezing", {}),
+                    params=self.aux_head_params,
+                    task=self.task_name,
+                )
+            )
+        return node_list
+
+    @property
+    def losses(self) -> list[LossModuleConfig]:
+        loss_list = [
+            LossModuleConfig(
+                name="BCEWithLogitsLoss"
+                if self.task == "binary"
+                else "CrossEntropyLoss",
+                alias="segmentation_loss",
+                attached_to="segmentation_head",
+                params=self.loss_params,
+                weight=1.0,
+            ),
+        ]
+        if self.backbone_params.get("use_aux_heads", False):
+            loss_list.append(
+                LossModuleConfig(
+                    name="BCEWithLogitsLoss"
+                    if self.task == "binary"
+                    else "CrossEntropyLoss",
+                    alias="aux_segmentation_loss",
+                    attached_to="aux_segmentation_head",
+                    params=self.loss_params,
+                    weight=0.4,
+                )
+            )
+        return loss_list
diff --git a/luxonis_train/models/predefined_models/detection_model.py b/luxonis_train/models/predefined_models/detection_model.py
index 41a7dfdc..94c4487f 100644
--- a/luxonis_train/models/predefined_models/detection_model.py
+++ b/luxonis_train/models/predefined_models/detection_model.py
@@ -1,12 +1,12 @@
 from dataclasses import dataclass, field
 
+from luxonis_train.utils import Kwargs
 from luxonis_train.utils.config import (
     AttachedModuleConfig,
     LossModuleConfig,
     MetricModuleConfig,
     ModelNodeConfig,
 )
-from luxonis_train.utils.types import Kwargs
 
 from .base_predefined_model import BasePredefinedModel
 
@@ -19,6 +19,7 @@ class DetectionModel(BasePredefinedModel):
     head_params: Kwargs = field(default_factory=dict)
     loss_params: Kwargs = field(default_factory=dict)
     visualizer_params: Kwargs = field(default_factory=dict)
+    task_name: str | None = None
 
     @property
     def nodes(self) -> list[ModelNodeConfig]:
@@ -46,8 +47,11 @@ def nodes(self) -> list[ModelNodeConfig]:
                 name="EfficientBBoxHead",
                 alias="detection_head",
                 freezing=self.head_params.pop("freezing", {}),
-                inputs=["detection_neck"] if self.use_neck else ["detection_backbone"],
+                inputs=["detection_neck"]
+                if self.use_neck
+                else ["detection_backbone"],
                 params=self.head_params,
+                task=self.task_name,
             )
         )
         return nodes
diff --git a/luxonis_train/models/predefined_models/detection_model_obb.py b/luxonis_train/models/predefined_models/detection_model_obb.py
new file mode 100644
index 00000000..dd02901f
--- /dev/null
+++ b/luxonis_train/models/predefined_models/detection_model_obb.py
@@ -0,0 +1,91 @@
+from dataclasses import dataclass, field
+
+from luxonis_train.utils.config import (
+    AttachedModuleConfig,
+    LossModuleConfig,
+    MetricModuleConfig,
+    ModelNodeConfig,
+)
+from luxonis_train.utils.types import Kwargs
+
+from .base_predefined_model import BasePredefinedModel
+
+
+@dataclass
+class OBBDetectionModel(BasePredefinedModel):
+    use_neck: bool = True
+    backbone_params: Kwargs = field(default_factory=dict)
+    neck_params: Kwargs = field(default_factory=dict)
+    head_params: Kwargs = field(default_factory=dict)
+    loss_params: Kwargs = field(default_factory=dict)
+    visualizer_params: Kwargs = field(default_factory=dict)
+    task_name: str | None = None
+
+    @property
+    def nodes(self) -> list[ModelNodeConfig]:
+        nodes = [
+            ModelNodeConfig(
+                name="EfficientRep",
+                alias="detection_backbone",
+                freezing=self.backbone_params.pop("freezing", {}),
+                params=self.backbone_params,
+            ),
+        ]
+        if self.use_neck:
+            nodes.append(
+                ModelNodeConfig(
+                    name="RepPANNeck",
+                    alias="detection_neck",
+                    inputs=["detection_backbone"],
+                    freezing=self.neck_params.pop("freezing", {}),
+                    params=self.neck_params,
+                )
+            )
+
+        nodes.append(
+            ModelNodeConfig(
+                name="EfficientOBBoxHead",
+                alias="detection_obb_head",
+                freezing=self.head_params.pop("freezing", {}),
+                inputs=["detection_neck"]
+                if self.use_neck
+                else ["detection_backbone"],
+                params=self.head_params,
+                task=self.task_name,
+            )
+        )
+        return nodes
+
+    @property
+    def losses(self) -> list[LossModuleConfig]:
+        return [
+            LossModuleConfig(
+                name="OBBDetectionLoss",
+                alias="detection_obb_loss",
+                attached_to="detection_obb_head",
+                params=self.loss_params,
+                weight=1.0,
+            )
+        ]
+
+    @property
+    def metrics(self) -> list[MetricModuleConfig]:
+        return [
+            MetricModuleConfig(
+                name="MeanAveragePrecisionOBB",
+                alias="detection_map_obb",
+                attached_to="detection_obb_head",
+                is_main_metric=True,
+            ),
+        ]
+
+    @property
+    def visualizers(self) -> list[AttachedModuleConfig]:
+        return [
+            AttachedModuleConfig(
+                name="OBBoxVisualizer",
+                alias="detection_visualizer_obb",
+                attached_to="detection_obb_head",
+                params=self.visualizer_params,
+            )
+        ]
diff --git a/luxonis_train/models/predefined_models/keypoint_detection_model.py b/luxonis_train/models/predefined_models/keypoint_detection_model.py
index 96bef885..670b00b1 100644
--- a/luxonis_train/models/predefined_models/keypoint_detection_model.py
+++ b/luxonis_train/models/predefined_models/keypoint_detection_model.py
@@ -1,12 +1,13 @@
 from dataclasses import dataclass, field
+from typing import Literal
 
+from luxonis_train.utils import Kwargs
 from luxonis_train.utils.config import (
     AttachedModuleConfig,
     LossModuleConfig,
     MetricModuleConfig,
     ModelNodeConfig,
 )
-from luxonis_train.utils.types import Kwargs
 
 from .base_predefined_model import BasePredefinedModel
 
@@ -18,8 +19,13 @@ class KeypointDetectionModel(BasePredefinedModel):
     neck_params: Kwargs = field(default_factory=dict)
     head_params: Kwargs = field(default_factory=dict)
     loss_params: Kwargs = field(default_factory=dict)
+    head_type: Literal[
+        "ImplicitKeypointBBoxHead", "EfficientKeypointBBoxHead"
+    ] = "EfficientKeypointBBoxHead"
     kpt_visualizer_params: Kwargs = field(default_factory=dict)
     bbox_visualizer_params: Kwargs = field(default_factory=dict)
+    bbox_task_name: str | None = None
+    kpt_task_name: str | None = None
 
     @property
     def nodes(self) -> list[ModelNodeConfig]:
@@ -42,15 +48,22 @@ def nodes(self) -> list[ModelNodeConfig]:
                 )
             )
 
+        task = {}
+        if self.bbox_task_name is not None:
+            task["boundingbox"] = self.bbox_task_name
+        if self.kpt_task_name is not None:
+            task["keypoints"] = self.kpt_task_name
+
         nodes.append(
             ModelNodeConfig(
-                name="ImplicitKeypointBBoxHead",
+                name=self.head_type,
                 alias="kpt_detection_head",
                 inputs=["kpt_detection_neck"]
                 if self.use_neck
                 else ["kpt_detection_backbone"],
                 freezing=self.head_params.pop("freezing", {}),
                 params=self.head_params,
+                task=task,
             )
         )
         return nodes
@@ -59,7 +72,7 @@ def nodes(self) -> list[ModelNodeConfig]:
     def losses(self) -> list[LossModuleConfig]:
         return [
             LossModuleConfig(
-                name="ImplicitKeypointBBoxLoss",
+                name=self.head_type.replace("Head", "Loss"),
                 attached_to="kpt_detection_head",
                 params=self.loss_params,
                 weight=1.0,
diff --git a/luxonis_train/models/predefined_models/segmentation_model.py b/luxonis_train/models/predefined_models/segmentation_model.py
index 9bc936a7..d1076239 100644
--- a/luxonis_train/models/predefined_models/segmentation_model.py
+++ b/luxonis_train/models/predefined_models/segmentation_model.py
@@ -1,13 +1,13 @@
 from dataclasses import dataclass, field
 from typing import Literal
 
+from luxonis_train.utils import Kwargs
 from luxonis_train.utils.config import (
     AttachedModuleConfig,
     LossModuleConfig,
     MetricModuleConfig,
     ModelNodeConfig,
 )
-from luxonis_train.utils.types import Kwargs
 
 from .base_predefined_model import BasePredefinedModel
 
@@ -20,6 +20,7 @@ class SegmentationModel(BasePredefinedModel):
     head_params: Kwargs = field(default_factory=dict)
     loss_params: Kwargs = field(default_factory=dict)
     visualizer_params: Kwargs = field(default_factory=dict)
+    task_name: str | None = None
 
     @property
     def nodes(self) -> list[ModelNodeConfig]:
@@ -36,6 +37,7 @@ def nodes(self) -> list[ModelNodeConfig]:
                 inputs=["segmentation_backbone"],
                 freezing=self.head_params.pop("freezing", {}),
                 params=self.head_params,
+                task=self.task_name,
             ),
         ]
 
diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md
index bd44ac5a..60e5971c 100644
--- a/luxonis_train/nodes/README.md
+++ b/luxonis_train/nodes/README.md
@@ -5,7 +5,7 @@ arbitrarily as long as the two nodes are compatible with each other.
 
 ## Table Of Contents
 
-- [ResNet18](#resnet18)
+- [ResNet](#resnet)
 - [MicroNet](#micronet)
 - [RepVGG](#repvgg)
 - [EfficientRep](#efficientrep)
@@ -20,25 +20,33 @@ arbitrarily as long as the two nodes are compatible with each other.
 - [BiSeNetHead](#bisenethead)
 - [EfficientBBoxHead](#efficientbboxhead)
 - [ImplicitKeypointBBoxHead](#implicitkeypointbboxhead)
+- [EfficientKeypointBBoxHead](#efficientkeypointbboxhead)
 
 Every node takes these parameters:
 
-| Key          | Type        | Default value | Description                                                                                                               |
-| ------------ | ----------- | ------------- | ------------------------------------------------------------------------------------------------------------------------- |
-| attach_index | int \| None | None          | Index of previous output that the head attaches to. Each node has a sensible default. Usually should not be manually set. |
-| n_classes    | int \| None | None          | Number of classes in the dataset. Inferred from the dataset if not provided.                                              |
+| Key       | Type        | Default value | Description                                                                  |
+| --------- | ----------- | ------------- | ---------------------------------------------------------------------------- |
+| n_classes | int \| None | None          | Number of classes in the dataset. Inferred from the dataset if not provided. |
+
+In addition, the following class attributes can be overriden:
+
+| Key          | Type                                                                | Default value | Description                                                                                                                                   |
+| ------------ | ------------------------------------------------------------------- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
+| attach_index | int \| "all" \| Tuple\[int, int\] \| Tuple\[int, int, int\] \| None | None          | Index of previous output that the head attaches to. Each node has a sensible default. Usually should not be manually set in most cases.       |
+| tasks        | List\[LabelType\] \| Dict\[LabelType, str\] \| None                 | None          | Tasks supported by the node. Should be overriden for head nodes. Either a list of tasks or a dictionary mapping tasks to their default names. |
 
 Additional parameters for specific nodes are listed below.
 
-## ResNet18
+## ResNet
 
-Adapted from [here](https://pytorch.org/vision/main/models/generated/torchvision.models.resnet18.html).
+Adapted from [here](https://pytorch.org/vision/main/models/resnet.html).
 
 **Params**
 
-| Key              | Type | Default value | Description                            |
-| ---------------- | ---- | ------------- | -------------------------------------- |
-| download_weights | bool | False         | If True download weights from imagenet |
+| Key              | Type                                      | Default value | Description                            |
+| ---------------- | ----------------------------------------- | ------------- | -------------------------------------- |
+| variant          | Literal\["18", "34", "50", "101", "152"\] | "18"          | Variant of the network.                |
+| download_weights | bool                                      | False         | If True download weights from imagenet |
 
 ## MicroNet
 
@@ -69,7 +77,7 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf).
 | Key           | Type        | Default value               | Description                                         |
 | ------------- | ----------- | --------------------------- | --------------------------------------------------- |
 | channels_list | List\[int\] | \[64, 128, 256, 512, 1024\] | List of number of channels for each block           |
-| num_repeats   | List\[int\] | \[1, 6, 12, 18, 6\]         | List of number of repeats of RepVGGBlock            |
+| n_repeats     | List\[int\] | \[1, 6, 12, 18, 6\]         | List of number of repeats of RepVGGBlock            |
 | in_channels   | int         | 3                           | Number of input channels, should be 3 in most cases |
 | depth_mul     | int         | 0.33                        | Depth multiplier                                    |
 | width_mul     | int         | 0.25                        | Width multiplier                                    |
@@ -137,9 +145,9 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf).
 
 | Key           | Type             | Default value                                           | Description                               |
 | ------------- | ---------------- | ------------------------------------------------------- | ----------------------------------------- |
-| num_heads     | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads                    |
+| n_heads       | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads                    |
 | channels_list | List\[int\]      | \[256, 128, 128, 256, 256, 512\]                        | List of number of channels for each block |
-| num_repeats   | List\[int\]      | \[12, 12, 12, 12\]                                      | List of number of repeats of RepVGGBlock  |
+| n_repeats     | List\[int\]      | \[12, 12, 12, 12\]                                      | List of number of repeats of RepVGGBlock  |
 | depth_mul     | int              | 0.33                                                    | Depth multiplier                          |
 | width_mul     | int              | 0.25                                                    | Width multiplier                          |
 
@@ -174,7 +182,7 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf).
 
 | Key        | Type  | Default value | Description                                        |
 | ---------- | ----- | ------------- | -------------------------------------------------- |
-| num_heads  | bool  | 3             | Number of output heads                             |
+| n_heads    | bool  | 3             | Number of output heads                             |
 | conf_thres | float | 0.25          | confidence threshold for nms (used for evaluation) |
 | iou_thres  | float | 0.45          | iou threshold for nms (used for evaluation)        |
 
@@ -187,8 +195,21 @@ Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf).
 | Key              | Type                        | Default value | Description                                                                                                |
 | ---------------- | --------------------------- | ------------- | ---------------------------------------------------------------------------------------------------------- |
 | n_keypoints      | int \| None                 | None          | Number of keypoints.                                                                                       |
-| num_heads        | int                         | 3             | Number of output heads                                                                                     |
+| n_heads          | int                         | 3             | Number of output heads                                                                                     |
 | anchors          | List\[List\[int\]\] \| None | None          | Anchors used for object detection. If set to `None`, the anchors are computed at runtime from the dataset. |
 | init_coco_biases | bool                        | True          | Whether to use COCO bias and weight initialization                                                         |
 | conf_thres       | float                       | 0.25          | confidence threshold for nms (used for evaluation)                                                         |
 | iou_thres        | float                       | 0.45          | iou threshold for nms (used for evaluation)                                                                |
+
+## EfficientKeypointBBoxHead
+
+Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf).
+
+**Params**
+
+| Key         | Type        | Default value | Description                                        |
+| ----------- | ----------- | ------------- | -------------------------------------------------- |
+| n_keypoints | int \| None | None          | Number of keypoints.                               |
+| n_heads     | int         | 3             | Number of output heads                             |
+| conf_thres  | float       | 0.25          | confidence threshold for nms (used for evaluation) |
+| iou_thres   | float       | 0.45          | iou threshold for nms (used for evaluation)        |
diff --git a/luxonis_train/nodes/__init__.py b/luxonis_train/nodes/__init__.py
index d7ec70d0..5b4a889f 100644
--- a/luxonis_train/nodes/__init__.py
+++ b/luxonis_train/nodes/__init__.py
@@ -1,33 +1,4 @@
-from .base_node import BaseNode
-from .bisenet_head import BiSeNetHead
-from .classification_head import ClassificationHead
-from .contextspatial import ContextSpatial
-from .efficient_bbox_head import EfficientBBoxHead
-from .efficientrep import EfficientRep
-from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
-from .micronet import MicroNet
-from .mobilenetv2 import MobileNetV2
-from .mobileone import MobileOne
-from .reppan_neck import RepPANNeck
-from .repvgg import RepVGG
-from .resnet18 import ResNet18
-from .rexnetv1 import ReXNetV1_lite
-from .segmentation_head import SegmentationHead
-
-__all__ = [
-    "BiSeNetHead",
-    "ClassificationHead",
-    "ContextSpatial",
-    "EfficientBBoxHead",
-    "EfficientRep",
-    "ImplicitKeypointBBoxHead",
-    "BaseNode",
-    "MicroNet",
-    "MobileNetV2",
-    "MobileOne",
-    "ReXNetV1_lite",
-    "RepPANNeck",
-    "RepVGG",
-    "ResNet18",
-    "SegmentationHead",
-]
+from .backbones import *
+from .base_node import *
+from .heads import *
+from .necks import *
diff --git a/luxonis_train/nodes/activations/__init__.py b/luxonis_train/nodes/activations/__init__.py
index 37aea0fc..0d3d1e0b 100644
--- a/luxonis_train/nodes/activations/__init__.py
+++ b/luxonis_train/nodes/activations/__init__.py
@@ -1,3 +1,3 @@
-from .activations import HSigmoid, HSwish
+from .activations import HSigmoid
 
-__all__ = ["HSigmoid", "HSwish"]
+__all__ = ["HSigmoid"]
diff --git a/luxonis_train/nodes/activations/activations.py b/luxonis_train/nodes/activations/activations.py
index f3abedd6..93703a1c 100644
--- a/luxonis_train/nodes/activations/activations.py
+++ b/luxonis_train/nodes/activations/activations.py
@@ -10,14 +10,3 @@ def __init__(self):
 
     def forward(self, x: Tensor) -> Tensor:
         return self.relu(x + 3) / 6
-
-
-class HSwish(nn.Module):
-    def __init__(self):
-        """H-Swish activation function from U{Searching for MobileNetV3
-        <https://arxiv.org/abs/1905.02244>}."""
-        super().__init__()
-        self.sigmoid = HSigmoid()
-
-    def forward(self, x: Tensor) -> Tensor:
-        return x * self.sigmoid(x)
diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py
new file mode 100644
index 00000000..aad94198
--- /dev/null
+++ b/luxonis_train/nodes/backbones/__init__.py
@@ -0,0 +1,23 @@
+from .contextspatial import ContextSpatial
+from .ddrnet import DDRNet
+from .efficientnet import EfficientNet
+from .efficientrep import EfficientRep
+from .micronet import MicroNet
+from .mobilenetv2 import MobileNetV2
+from .mobileone import MobileOne
+from .repvgg import RepVGG
+from .resnet import ResNet
+from .rexnetv1 import ReXNetV1_lite
+
+__all__ = [
+    "ContextSpatial",
+    "EfficientNet",
+    "EfficientRep",
+    "MicroNet",
+    "MobileNetV2",
+    "MobileOne",
+    "ReXNetV1_lite",
+    "RepVGG",
+    "ResNet",
+    "DDRNet",
+]
diff --git a/luxonis_train/nodes/backbones/contextspatial.py b/luxonis_train/nodes/backbones/contextspatial.py
new file mode 100644
index 00000000..cf98cd4c
--- /dev/null
+++ b/luxonis_train/nodes/backbones/contextspatial.py
@@ -0,0 +1,143 @@
+from torch import Tensor, nn
+from torch.nn import functional as F
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import (
+    AttentionRefinmentBlock,
+    ConvModule,
+    FeatureFusionBlock,
+)
+from luxonis_train.utils import Kwargs
+from luxonis_train.utils.registry import NODES
+
+
+class ContextSpatial(BaseNode[Tensor, list[Tensor]]):
+    def __init__(
+        self,
+        context_backbone: str | nn.Module = "MobileNetV2",
+        backbone_kwargs: Kwargs | None = None,
+        **kwargs,
+    ):
+        """Context Spatial backbone introduced in BiseNetV1.
+
+        Source: U{BiseNetV1<https://github.com/taveraantonio/BiseNetv1>}
+
+        @see: U{BiseNetv1: Bilateral Segmentation Network for
+            Real-time Semantic Segmentation
+            <https://arxiv.org/abs/1808.00897>}
+
+        @type context_backbone: str
+        @param context_backbone: Backbone used in the context path.
+            Can be either a string or a C{torch.nn.Module}.
+            If a string argument is used, it has to be a name of a module
+            stored in the L{NODES} registry. Defaults to C{MobileNetV2}.
+
+        @type backbone_kwargs: dict
+        @param backbone_kwargs: Keyword arguments for the backbone.
+            Only used when the C{context_backbone} argument is a string.
+        """
+        super().__init__(**kwargs)
+
+        if isinstance(context_backbone, str):
+            backbone_kwargs = backbone_kwargs or {}
+            backbone_kwargs |= kwargs
+            context_backbone = NODES.get(context_backbone)(**backbone_kwargs)
+
+        self.context_path = ContextPath(context_backbone)
+        self.spatial_path = SpatialPath(3, 128)
+        self.ffm = FeatureFusionBlock(256, 256)
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        spatial_out = self.spatial_path(inputs)
+        context16, _ = self.context_path(inputs)
+        fm_fuse = self.ffm(spatial_out, context16)
+        return [fm_fuse]
+
+
+class SpatialPath(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int):
+        super().__init__()
+        intermediate_channels = 64
+        self.conv_7x7 = ConvModule(
+            in_channels,
+            intermediate_channels,
+            kernel_size=7,
+            stride=2,
+            padding=3,
+        )
+        self.conv_3x3_1 = ConvModule(
+            intermediate_channels,
+            intermediate_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+        )
+        self.conv_3x3_2 = ConvModule(
+            intermediate_channels,
+            intermediate_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+        )
+        self.conv_1x1 = ConvModule(
+            intermediate_channels,
+            out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.conv_7x7(x)
+        x = self.conv_3x3_1(x)
+        x = self.conv_3x3_2(x)
+        return self.conv_1x1(x)
+
+
+class ContextPath(nn.Module):
+    def __init__(self, backbone: nn.Module):
+        super().__init__()
+        self.backbone = backbone
+
+        self.up16 = nn.Upsample(
+            scale_factor=2.0, mode="bilinear", align_corners=True
+        )
+        self.up32 = nn.Upsample(
+            scale_factor=2.0, mode="bilinear", align_corners=True
+        )
+
+        self.refine16 = ConvModule(128, 128, 3, 1, 1)
+        self.refine32 = ConvModule(128, 128, 3, 1, 1)
+
+    def forward(self, x: Tensor) -> tuple[Tensor, Tensor]:
+        *_, down16, down32 = self.backbone(x)
+
+        if not hasattr(self, "arm16"):
+            self.arm16 = AttentionRefinmentBlock(down16.shape[1], 128)
+            self.arm32 = AttentionRefinmentBlock(down32.shape[1], 128)
+
+            self.global_context = nn.Sequential(
+                nn.AdaptiveAvgPool2d(1),
+                ConvModule(down32.shape[1], 128, 1, 1, 0),
+            )
+
+        arm_down16 = self.arm16(down16)
+        arm_down32 = self.arm32(down32)
+
+        global_down32 = self.global_context(down32)
+        global_down32 = F.interpolate(
+            global_down32,
+            size=down32.shape[2:],
+            mode="bilinear",
+            align_corners=True,
+        )
+
+        arm_down32 += global_down32
+        arm_down32 = self.up32(arm_down32)
+        arm_down32 = self.refine32(arm_down32)
+
+        arm_down16 += arm_down32
+        arm_down16 = self.up16(arm_down16)
+        arm_down16 = self.refine16(arm_down16)
+
+        return arm_down16, arm_down32
diff --git a/luxonis_train/nodes/backbones/ddrnet/__init__.py b/luxonis_train/nodes/backbones/ddrnet/__init__.py
new file mode 100644
index 00000000..8ecc5814
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ddrnet/__init__.py
@@ -0,0 +1,3 @@
+from .ddrnet import DDRNet
+
+__all__ = ["DDRNet"]
diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py
new file mode 100644
index 00000000..59f76b8b
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py
@@ -0,0 +1,358 @@
+"""DDRNet blocks.
+
+Adapted from: U{https://github.com/Deci-AI/super-gradients/blob/master/src/super_gradients/training/models/segmentation_models/ddrnet.py}
+Original source: U{https://github.com/ydhongHIT/DDRNet}
+Paper: U{https://arxiv.org/pdf/2101.06085.pdf}
+@license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md}
+"""
+
+import torch
+from torch import Tensor, nn
+
+from luxonis_train.nodes.blocks import ConvModule, UpscaleOnline
+
+
+class DAPPMBranch(nn.Module):
+    def __init__(
+        self,
+        kernel_size: int,
+        stride: int,
+        in_channels: int,
+        branch_channels: int,
+        inter_mode: str = "bilinear",
+    ):
+        """A DAPPM branch.
+
+        @type kernel_size: int
+        @param kernel_size: The kernel size for the average pooling.
+            When stride=0, this parameter is omitted, and
+            AdaptiveAvgPool2d over all the input is performed.
+        @type stride: int
+        @param stride: Stride for the average pooling. When stride=0, an
+            AdaptiveAvgPool2d over all the input is performed (output is
+            1x1). When stride=1, no average pooling is performed. When
+            stride>1, average pooling is performed (scaling the input
+            down and up again).
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type branch_channels: int
+        @param branch_channels: Width after the first convolution.
+        @type inter_mode: str
+        @param inter_mode: Interpolation mode for upscaling. Defaults to
+            "bilinear".
+        """
+        super().__init__()
+
+        down_list = []
+        down_list.append(nn.BatchNorm2d(in_channels))
+        if stride == 0:
+            down_list.append(nn.AdaptiveAvgPool2d((1, 1)))
+        elif stride > 1:
+            down_list.append(
+                nn.AvgPool2d(
+                    kernel_size=kernel_size, stride=stride, padding=stride
+                )
+            )
+
+        down_list.append(nn.ReLU(inplace=True))
+        down_list.append(
+            nn.Conv2d(in_channels, branch_channels, kernel_size=1, bias=False)
+        )
+
+        self.down_scale = nn.Sequential(*down_list)
+        self.up_scale = UpscaleOnline(inter_mode)
+
+        if stride != 1:
+            self.process = nn.Sequential(
+                nn.BatchNorm2d(branch_channels),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(
+                    branch_channels,
+                    branch_channels,
+                    kernel_size=3,
+                    padding=1,
+                    bias=False,
+                ),
+            )
+
+    def forward(self, x: Tensor | list[Tensor]) -> Tensor:
+        """Process input through the DAPPM branch.
+
+        @type x: Tensor or list[Tensor]
+        @param x: In branch 0 - the original input of the DAPPM. In other branches - a list containing the original
+                  input and the output of the previous branch.
+
+        @return: Processed output tensor.
+        """
+        if isinstance(x, list):
+            output_of_prev_branch = x[1]
+            x = x[0]
+        else:
+            output_of_prev_branch = None
+
+        in_width = x.shape[-1]
+        in_height = x.shape[-2]
+        out = self.down_scale(x)
+        out = self.up_scale(
+            out, output_height=in_height, output_width=in_width
+        )
+
+        if output_of_prev_branch is not None:
+            out = self.process(out + output_of_prev_branch)
+
+        return out
+
+
+class DAPPM(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        branch_channels: int,
+        out_channels: int,
+        kernel_sizes: list[int],
+        strides: list[int],
+        inter_mode: str = "bilinear",
+    ):
+        """DAPPM (Dynamic Attention Pyramid Pooling Module).
+
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type branch_channels: int
+        @param branch_channels: Width after the first convolution in
+            each branch.
+        @type out_channels: int
+        @param out_channels: Number of output channels.
+        @type kernel_sizes: list[int]
+        @param kernel_sizes: List of kernel sizes for each branch.
+        @type strides: list[int]
+        @param strides: List of strides for each branch.
+        @type inter_mode: str
+        @param inter_mode: Interpolation mode for upscaling. Defaults to
+            "bilinear".
+
+        @raises ValueError: If the lengths of `kernel_sizes` and `strides`
+            are not the same.
+        """
+        super().__init__()
+
+        if len(kernel_sizes) != len(strides):  # pragma: no cover
+            raise ValueError(
+                "The lenghts of `kernel_sizes` and `strides` must be the same"
+            )
+
+        self.branches = nn.ModuleList(
+            [
+                DAPPMBranch(
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    in_channels=in_channels,
+                    branch_channels=branch_channels,
+                    inter_mode=inter_mode,
+                )
+                for kernel_size, stride in zip(kernel_sizes, strides)
+            ]
+        )
+
+        self.compression = nn.Sequential(
+            nn.BatchNorm2d(branch_channels * len(self.branches)),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(
+                branch_channels * len(self.branches),
+                out_channels,
+                kernel_size=1,
+                bias=False,
+            ),
+        )
+        self.shortcut = nn.Sequential(
+            nn.BatchNorm2d(in_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Forward pass through the DAPPM module.
+
+        @type x: Tensor
+        @param x: Input tensor.
+        @return: Output tensor after processing through all branches and
+            compression.
+        """
+        x_list = [self.branches[0](x)]
+
+        for i in range(1, len(self.branches)):
+            x_list.append(self.branches[i]([x, x_list[i - 1]]))
+
+        out = self.compression(torch.cat(x_list, dim=1)) + self.shortcut(x)
+        return out
+
+
+class BasicDDRBackbone(nn.Module):
+    def __init__(
+        self,
+        block: type[nn.Module],
+        stem_channels: int,
+        layers: list[int],
+        in_channels: int,
+        layer3_repeats: int = 1,
+    ):
+        """Initialize the BasicDDRBackBone with specified parameters.
+
+        @type block: Type[nn.Module]
+        @param block: The block class to use for layers.
+        @type stem_channels: int
+        @param stem_channels: Number of output channels in the stem layer.
+        @type layers: list[int]
+        @param layers: Number of blocks in each layer.
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type layer3_repeats: int
+        @param layer3_repeats: Number of repeats for layer3. Defaults to
+            1.
+        """
+        super().__init__()
+        self.input_channels = in_channels
+
+        self.stem = nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=stem_channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias=True,
+                activation=nn.ReLU(inplace=True),
+            ),
+            ConvModule(
+                in_channels=stem_channels,
+                out_channels=stem_channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias=True,
+                activation=nn.ReLU(inplace=True),
+            ),
+        )
+
+        self.layer1 = make_layer(
+            block=block,
+            in_channels=stem_channels,
+            channels=stem_channels,
+            num_blocks=layers[0],
+        )
+
+        self.layer2 = make_layer(
+            block=block,
+            in_channels=stem_channels,
+            channels=stem_channels * 2,
+            num_blocks=layers[1],
+            stride=2,
+        )
+
+        self.layer3 = nn.ModuleList(
+            [
+                make_layer(
+                    block=block,
+                    in_channels=stem_channels * 2,
+                    channels=stem_channels * 4,
+                    num_blocks=layers[2],
+                    stride=2,
+                )
+            ]
+            + [
+                make_layer(
+                    block=block,
+                    in_channels=stem_channels * 4,
+                    channels=stem_channels * 4,
+                    num_blocks=layers[2],
+                    stride=1,
+                )
+                for _ in range(layer3_repeats - 1)
+            ]
+        )
+
+        self.layer4 = make_layer(
+            block=block,
+            in_channels=stem_channels * 4,
+            channels=stem_channels * 8,
+            num_blocks=layers[3],
+            stride=2,
+        )
+
+    def get_backbone_output_number_of_channels(self) -> dict[str, int]:
+        """Determine the number of output channels for each layer of the
+        backbone.
+
+        Returns a dictionary with keys "layer2", "layer3", "layer4" and
+        their respective number of output channels.
+
+        @return: Dictionary of output channel counts for each layer.
+        """
+        output_shapes = {}
+        x = torch.randn(1, self.input_channels, 320, 320)
+        x = self.stem(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        output_shapes["layer2"] = x.shape[1]
+
+        for layer in self.layer3:
+            x = layer(x)
+        output_shapes["layer3"] = x.shape[1]
+
+        x = self.layer4(x)
+        output_shapes["layer4"] = x.shape[1]
+
+        return output_shapes
+
+
+def make_layer(
+    block: type[nn.Module],
+    in_channels: int,
+    channels: int,
+    num_blocks: int,
+    stride: int = 1,
+    expansion: int = 1,
+) -> nn.Sequential:
+    """Creates a sequential layer consisting of a series of blocks.
+
+    @type block: Type[nn.Module]
+    @param block: The block class to be used.
+    @type in_channels: int
+    @param in_channels: Number of input channels.
+    @type channels: int
+    @param channels: Number of output channels.
+    @type num_blocks: int
+    @param num_blocks: Number of blocks in the layer.
+    @type stride: int
+    @param stride: Stride for the first block. Defaults to 1.
+    @type expansion: int
+    @param expansion: Expansion factor for the block. Defaults to 1.
+    @return: A sequential container of the blocks.
+    """
+    layers: list[nn.Module] = []
+
+    layers.append(
+        block(
+            in_channels,
+            channels,
+            stride,
+            final_relu=num_blocks > 1,
+            expansion=expansion,
+        )
+    )
+
+    in_channels = channels * expansion
+
+    if num_blocks > 1:
+        for i in range(1, num_blocks):
+            final_relu = i != (num_blocks - 1)
+            layers.append(
+                block(
+                    in_channels,
+                    channels,
+                    stride=1,
+                    final_relu=final_relu,
+                    expansion=expansion,
+                )
+            )
+
+    return nn.Sequential(*layers)
diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py
new file mode 100644
index 00000000..37779a19
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py
@@ -0,0 +1,294 @@
+from typing import Literal
+
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import (
+    BasicResNetBlock,
+    Bottleneck,
+    ConvModule,
+    UpscaleOnline,
+)
+
+from .blocks import DAPPM, BasicDDRBackbone, make_layer
+from .variants import get_variant
+
+
+class DDRNet(BaseNode[Tensor, list[Tensor]]):
+    in_channels: int
+
+    def __init__(
+        self,
+        variant: Literal["23-slim", "23"] = "23-slim",
+        channels: int | None = None,
+        highres_channels: int | None = None,
+        use_aux_heads: bool = True,
+        upscale_module: nn.Module | None = None,
+        spp_width: int = 128,
+        ssp_inter_mode: str = "bilinear",
+        segmentation_inter_mode: str = "bilinear",
+        # TODO: nn.Module registry
+        block: type[nn.Module] = BasicResNetBlock,
+        skip_block: type[nn.Module] = BasicResNetBlock,
+        layer5_block: type[nn.Module] = Bottleneck,
+        layer5_bottleneck_expansion: int = 2,
+        spp_kernel_sizes: list[int] | None = None,
+        spp_strides: list[int] | None = None,
+        layer3_repeats: int = 1,
+        layers: list[int] | None = None,
+        **kwargs,
+    ):
+        """DDRNet backbone.
+
+        @see: U{Adapted from <https://github.com/Deci-AI/super-gradients/blob/master/src
+            /super_gradients/training/models/segmentation_models/ddrnet.py>}
+        @see: U{Original code <https://github.com/ydhongHIT/DDRNet>}
+        @see: U{Paper <https://arxiv.org/pdf/2101.06085.pdf>}
+        @license: U{Apache License, Version 2.0 <https://github.com/Deci-AI/super-
+            gradients/blob/master/LICENSE.md>}
+        @type variant: Literal["23-slim", "23"]
+        @param variant: DDRNet variant. Defaults to "23-slim".
+            The variant determines the number of channels and highres_channels.
+            The following variants are available:
+                - "23-slim" (default): channels=32, highres_channels=64
+                - "23": channels=64, highres_channels=128
+        @type channels: int | None
+        @param channels: Base number of channels. If provided, overrides the variant values.
+        @type highres_channels: int | None
+        @param highres_channels: Number of channels in the high resolution net. If provided, overrides the variant values.
+        @type use_aux_heads: bool
+        @param use_aux_heads: Whether to use auxiliary heads. Defaults to True.
+        @type upscale_module: nn.Module
+        @param upscale_module: Module for upscaling (e.g., bilinear interpolation).
+            Defaults to UpscaleOnline().
+        @type spp_width: int
+        @param spp_width: Width of the branches in the SPP block. Defaults to 128.
+        @type ssp_inter_mode: str
+        @param ssp_inter_mode: Interpolation mode for the SPP block. Defaults to
+            "bilinear".
+        @type segmentation_inter_mode: str
+        @param segmentation_inter_mode: Interpolation mode for the segmentation head.
+            Defaults to "bilinear".
+        @type block: type[nn.Module]
+        @param block: type of block to use in the backbone. Defaults to
+            BasicResNetBlock.
+        @type skip_block: type[nn.Module]
+        @param skip_block: type of block for skip connections. Defaults to
+            BasicResNetBlock.
+        @type layer5_block: type[nn.Module]
+        @param layer5_block: type of block for layer5 and layer5_skip. Defaults to
+            Bottleneck.
+        @type layer5_bottleneck_expansion: int
+        @param layer5_bottleneck_expansion: Expansion factor for Bottleneck block in
+            layer5. Defaults to 2.
+        @type spp_kernel_sizes: list[int]
+        @param spp_kernel_sizes: Kernel sizes for the SPP module pooling. Defaults to
+            [1, 5, 9, 17, 0].
+        @type spp_strides: list[int]
+        @param spp_strides: Strides for the SPP module pooling. Defaults to [1, 2, 4, 8,
+            0].
+        @type layer3_repeats: int
+        @param layer3_repeats: Number of times to repeat the 3rd stage. Defaults to 1.
+        @type layers: list[int]
+        @param layers: Number of blocks in each layer of the backbone. Defaults to [2,
+            2, 2, 2, 1, 2, 2, 1].
+        @type kwargs: Any
+        @param kwargs: Additional arguments to pass to L{BaseNode}.
+        """
+        super().__init__(**kwargs)
+
+        upscale_module = upscale_module or UpscaleOnline()
+        spp_kernel_sizes = spp_kernel_sizes or [1, 5, 9, 17, 0]
+        spp_strides = spp_strides or [1, 2, 4, 8, 0]
+        layers = layers or [2, 2, 2, 2, 1, 2, 2, 1]
+
+        var = get_variant(variant)
+
+        channels = channels or var.channels
+        highres_channels = highres_channels or var.highres_channels
+
+        self._use_aux_heads = use_aux_heads
+        self.upscale = upscale_module
+        self.ssp_inter_mode = ssp_inter_mode
+        self.segmentation_inter_mode = segmentation_inter_mode
+        self.relu = nn.ReLU(inplace=False)
+        self.layer3_repeats = layer3_repeats
+        self.channels = channels
+        self.layers = layers
+        self.backbone_layers, self.additional_layers = (
+            self.layers[:4],
+            self.layers[4:],
+        )
+
+        self._backbone = BasicDDRBackbone(
+            block=block,
+            stem_channels=self.channels,
+            layers=self.backbone_layers,
+            in_channels=self.in_channels,
+            layer3_repeats=self.layer3_repeats,
+        )
+        out_chan_backbone = (
+            self._backbone.get_backbone_output_number_of_channels()
+        )
+
+        # Define layers for layer 3
+        self.compression3 = nn.ModuleList()
+        self.down3 = nn.ModuleList()
+        self.layer3_skip = nn.ModuleList()
+        for i in range(layer3_repeats):
+            self.compression3.append(
+                ConvModule(
+                    in_channels=out_chan_backbone["layer3"],
+                    out_channels=highres_channels,
+                    kernel_size=1,
+                    bias=False,
+                    activation=nn.Identity(),
+                )
+            )
+            self.down3.append(
+                ConvModule(
+                    in_channels=highres_channels,
+                    out_channels=out_chan_backbone["layer3"],
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    bias=False,
+                    activation=nn.Identity(),
+                )
+            )
+            self.layer3_skip.append(
+                make_layer(
+                    in_channels=out_chan_backbone["layer2"]
+                    if i == 0
+                    else highres_channels,
+                    channels=highres_channels,
+                    block=skip_block,
+                    num_blocks=self.additional_layers[1],
+                )
+            )
+
+        self.compression4 = ConvModule(
+            in_channels=out_chan_backbone["layer4"],
+            out_channels=highres_channels,
+            kernel_size=1,
+            bias=False,
+            activation=nn.Identity(),
+        )
+
+        self.down4 = nn.Sequential(
+            ConvModule(
+                in_channels=highres_channels,
+                out_channels=highres_channels * 2,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias=False,
+                activation=nn.ReLU(inplace=True),
+            ),
+            ConvModule(
+                in_channels=highres_channels * 2,
+                out_channels=out_chan_backbone["layer4"],
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias=False,
+                activation=nn.Identity(),
+            ),
+        )
+
+        self.layer4_skip = make_layer(
+            block=skip_block,
+            in_channels=highres_channels,
+            channels=highres_channels,
+            num_blocks=self.additional_layers[2],
+        )
+        self.layer5_skip = make_layer(
+            block=layer5_block,
+            in_channels=highres_channels,
+            channels=highres_channels,
+            num_blocks=self.additional_layers[3],
+            expansion=layer5_bottleneck_expansion,
+        )
+
+        self.layer5 = make_layer(
+            block=layer5_block,
+            in_channels=out_chan_backbone["layer4"],
+            channels=out_chan_backbone["layer4"],
+            num_blocks=self.additional_layers[0],
+            stride=2,
+            expansion=layer5_bottleneck_expansion,
+        )
+
+        self.spp = DAPPM(
+            in_channels=out_chan_backbone["layer4"]
+            * layer5_bottleneck_expansion,
+            branch_channels=spp_width,
+            out_channels=highres_channels * layer5_bottleneck_expansion,
+            inter_mode=self.ssp_inter_mode,
+            kernel_sizes=spp_kernel_sizes,
+            strides=spp_strides,
+        )
+
+        self.highres_channels = highres_channels
+        self.layer5_bottleneck_expansion = layer5_bottleneck_expansion
+        self.init_params()
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        width_output = inputs.shape[-1] // 8
+        height_output = inputs.shape[-2] // 8
+
+        x = self._backbone.stem(inputs)
+        x = self._backbone.layer1(x)
+        x = self._backbone.layer2(self.relu(x))
+
+        # Repeat layer 3
+        x_skip = x
+        for i in range(self.layer3_repeats):
+            out_layer3 = self._backbone.layer3[i](self.relu(x))
+            out_layer3_skip = self.layer3_skip[i](self.relu(x_skip))
+
+            x = out_layer3 + self.down3[i](self.relu(out_layer3_skip))
+            x_skip = out_layer3_skip + self.upscale(
+                self.compression3[i](self.relu(out_layer3)),
+                height_output,
+                width_output,
+            )
+
+        # Save for auxiliary head
+        if self._use_aux_heads:
+            x_extra = x_skip
+
+        out_layer4 = self._backbone.layer4(self.relu(x))
+        out_layer4_skip = self.layer4_skip(self.relu(x_skip))
+
+        x = out_layer4 + self.down4(self.relu(out_layer4_skip))
+        x_skip = out_layer4_skip + self.upscale(
+            self.compression4(self.relu(out_layer4)),
+            height_output,
+            width_output,
+        )
+
+        out_layer5_skip = self.layer5_skip(self.relu(x_skip))
+
+        x = self.upscale(
+            self.spp(self.layer5(self.relu(x))), height_output, width_output
+        )
+
+        x = x + out_layer5_skip
+
+        if self._use_aux_heads:
+            return [x_extra, x]
+        else:
+            return [x]
+
+    def init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
diff --git a/luxonis_train/nodes/backbones/ddrnet/variants.py b/luxonis_train/nodes/backbones/ddrnet/variants.py
new file mode 100644
index 00000000..0e2d66c7
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ddrnet/variants.py
@@ -0,0 +1,27 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+class DDRNetVariant(BaseModel):
+    channels: int = 32
+    highres_channels: int = 64
+
+
+def get_variant(variant: Literal["23-slim", "23"]) -> DDRNetVariant:
+    variants = {
+        "23-slim": DDRNetVariant(
+            channels=32,
+            highres_channels=64,
+        ),
+        "23": DDRNetVariant(
+            channels=64,
+            highres_channels=128,
+        ),
+    }
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            "DDRNet model variant should be in "
+            f"{list(variants.keys())}, got {variant}."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/efficientnet.py b/luxonis_train/nodes/backbones/efficientnet.py
new file mode 100644
index 00000000..7744236a
--- /dev/null
+++ b/luxonis_train/nodes/backbones/efficientnet.py
@@ -0,0 +1,58 @@
+from typing import Any
+
+import torch
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+
+
+class EfficientNet(BaseNode[Tensor, list[Tensor]]):
+    attach_index: int = -1
+
+    def __init__(
+        self,
+        download_weights: bool = False,
+        out_indices: list[int] | None = None,
+        **kwargs: Any,
+    ):
+        """EfficientNet backbone.
+
+        EfficientNet is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a compound coefficient. Unlike conventional practice that arbitrary scales these factors, the EfficientNet scaling method uniformly scales network width, depth, and resolution with a set of fixed scaling coefficients.
+
+        Source: U{https://github.com/rwightman/gen-efficientnet-pytorch}
+
+        @license: U{Apache License, Version 2.0
+            <https://github.com/rwightman/gen-efficientnet-pytorch/blob/master/LICENSE>}
+
+        @see: U{https://paperswithcode.com/method/efficientnet}
+        @see: U{EfficientNet: Rethinking Model Scaling for
+            Convolutional Neural Networks
+            <https://arxiv.org/abs/1905.11946>}
+        @type download_weights: bool
+        @param download_weights: If C{True} download weights from imagenet. Defaults to
+            C{False}.
+        @type out_indices: list[int] | None
+        @param out_indices: Indices of the output layers. Defaults to [0, 1, 2, 4, 6].
+        """
+        super().__init__(**kwargs)
+
+        self.backbone: nn.Module = torch.hub.load(  # type: ignore
+            "rwightman/gen-efficientnet-pytorch",
+            "efficientnet_lite0",
+            pretrained=download_weights,
+        )
+        self.out_indices = out_indices or [0, 1, 2, 4, 6]
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        x = self.backbone.conv_stem(inputs)
+        x = self.backbone.bn1(x)
+        x = self.backbone.act1(x)
+
+        outs: list[Tensor] = []
+
+        for i, layer in enumerate(self.backbone.blocks):
+            x = layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+
+        return outs
diff --git a/luxonis_train/nodes/backbones/efficientrep/__init__.py b/luxonis_train/nodes/backbones/efficientrep/__init__.py
new file mode 100644
index 00000000..51ff264a
--- /dev/null
+++ b/luxonis_train/nodes/backbones/efficientrep/__init__.py
@@ -0,0 +1,3 @@
+from .efficientrep import EfficientRep
+
+__all__ = ["EfficientRep"]
diff --git a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py
new file mode 100644
index 00000000..0143855c
--- /dev/null
+++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py
@@ -0,0 +1,125 @@
+import logging
+from typing import Any
+
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import (
+    BlockRepeater,
+    RepVGGBlock,
+    SpatialPyramidPoolingBlock,
+)
+from luxonis_train.utils import make_divisible
+
+from .variants import VariantLiteral, get_variant
+
+logger = logging.getLogger(__name__)
+
+
+class EfficientRep(BaseNode[Tensor, list[Tensor]]):
+    in_channels: int
+
+    def __init__(
+        self,
+        variant: VariantLiteral = "nano",
+        channels_list: list[int] | None = None,
+        n_repeats: list[int] | None = None,
+        depth_mul: float | None = None,
+        width_mul: float | None = None,
+        **kwargs: Any,
+    ):
+        """Implementation of the EfficientRep backbone.
+
+        Adapted from U{YOLOv6: A Single-Stage Object Detection Framework
+        for Industrial Applications
+        <https://arxiv.org/pdf/2209.02976.pdf>}.
+
+        @type variant: Literal["n", "nano", "s", "small", "m", "medium", "l", "large"]
+        @param variant: EfficientRep variant. Defaults to "nano".
+            The variant determines the depth and width multipliers.
+            The depth multiplier determines the number of blocks in each stage and the width multiplier determines the number of channels.
+            The following variants are available:
+                - "n" or "nano" (default): depth_multiplier=0.33, width_multiplier=0.25
+                - "s" or "small": depth_multiplier=0.33, width_multiplier=0.50
+                - "m" or "medium": depth_multiplier=0.60, width_multiplier=0.75
+                - "l" or "large": depth_multiplier=1.0, width_multiplier=1.0
+        @type channels_list: list[int] | None
+        @param channels_list: List of number of channels for each block. If unspecified,
+            defaults to [64, 128, 256, 512, 1024].
+        @type n_repeats: list[int] | None
+        @param n_repeats: List of number of repeats of RepVGGBlock. If unspecified,
+            defaults to [1, 6, 12, 18, 6].
+        @type depth_mul: float
+        @param depth_mul: Depth multiplier. If provided, overrides the variant value.
+        @type width_mul: float
+        @param width_mul: Width multiplier. If provided, overrides the variant value.
+        """
+        super().__init__(**kwargs)
+
+        var = get_variant(variant)
+        depth_mul = depth_mul or var.depth_multiplier
+        width_mul = width_mul or var.width_multiplier
+
+        channels_list = channels_list or [64, 128, 256, 512, 1024]
+        n_repeats = n_repeats or [1, 6, 12, 18, 6]
+        channels_list = [
+            make_divisible(i * width_mul, 8) for i in channels_list
+        ]
+        n_repeats = [
+            (max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats
+        ]
+
+        self.repvgg_encoder = RepVGGBlock(
+            in_channels=self.in_channels,
+            out_channels=channels_list[0],
+            kernel_size=3,
+            stride=2,
+        )
+
+        self.blocks = nn.ModuleList()
+        for i in range(4):
+            curr_block = nn.Sequential(
+                RepVGGBlock(
+                    in_channels=channels_list[i],
+                    out_channels=channels_list[i + 1],
+                    kernel_size=3,
+                    stride=2,
+                ),
+                BlockRepeater(
+                    block=RepVGGBlock,
+                    in_channels=channels_list[i + 1],
+                    out_channels=channels_list[i + 1],
+                    n_blocks=n_repeats[i + 1],
+                ),
+            )
+            self.blocks.append(curr_block)
+
+        self.blocks[-1].append(
+            SpatialPyramidPoolingBlock(
+                in_channels=channels_list[4],
+                out_channels=channels_list[4],
+                kernel_size=5,
+            )
+        )
+
+    def set_export_mode(self, mode: bool = True) -> None:
+        """Reparametrizes instances of L{RepVGGBlock} in the network.
+
+        @type mode: bool
+        @param mode: Whether to set the export mode. Defaults to
+            C{True}.
+        """
+        super().set_export_mode(mode)
+        if self.export:
+            logger.info("Reparametrizing 'EfficientRep'.")
+            for module in self.modules():
+                if isinstance(module, RepVGGBlock):
+                    module.reparametrize()
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outputs: list[Tensor] = []
+        x = self.repvgg_encoder(inputs)
+        for block in self.blocks:
+            x = block(x)
+            outputs.append(x)
+        return outputs
diff --git a/luxonis_train/nodes/backbones/efficientrep/variants.py b/luxonis_train/nodes/backbones/efficientrep/variants.py
new file mode 100644
index 00000000..7ced749e
--- /dev/null
+++ b/luxonis_train/nodes/backbones/efficientrep/variants.py
@@ -0,0 +1,44 @@
+from typing import Literal, TypeAlias
+
+from pydantic import BaseModel
+
+VariantLiteral: TypeAlias = Literal[
+    "n", "nano", "s", "small", "m", "medium", "l", "large"
+]
+
+
+class EfficientRepVariant(BaseModel):
+    depth_multiplier: float
+    width_multiplier: float
+
+
+def get_variant(variant: VariantLiteral) -> EfficientRepVariant:
+    variants = {
+        "n": EfficientRepVariant(
+            depth_multiplier=0.33,
+            width_multiplier=0.25,
+        ),
+        "s": EfficientRepVariant(
+            depth_multiplier=0.33,
+            width_multiplier=0.50,
+        ),
+        "m": EfficientRepVariant(
+            depth_multiplier=0.60,
+            width_multiplier=0.75,
+        ),
+        "l": EfficientRepVariant(
+            depth_multiplier=1.0,
+            width_multiplier=1.0,
+        ),
+    }
+    variants["nano"] = variants["n"]
+    variants["small"] = variants["s"]
+    variants["medium"] = variants["m"]
+    variants["large"] = variants["l"]
+
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            f"EfficientRep variant should be one of "
+            f"{list(variants.keys())}, got '{variant}'."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/micronet/__init__.py b/luxonis_train/nodes/backbones/micronet/__init__.py
new file mode 100644
index 00000000..5b41ece3
--- /dev/null
+++ b/luxonis_train/nodes/backbones/micronet/__init__.py
@@ -0,0 +1,3 @@
+from .micronet import MicroNet
+
+__all__ = ["MicroNet"]
diff --git a/luxonis_train/nodes/backbones/micronet/blocks.py b/luxonis_train/nodes/backbones/micronet/blocks.py
new file mode 100644
index 00000000..3da5e15e
--- /dev/null
+++ b/luxonis_train/nodes/backbones/micronet/blocks.py
@@ -0,0 +1,515 @@
+from typing import Literal
+
+import torch
+from torch import Tensor, nn
+
+from luxonis_train.nodes.activations import HSigmoid
+from luxonis_train.nodes.blocks import ConvModule
+
+
+class MicroBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int = 3,
+        stride: int = 1,
+        expansion_ratios: tuple[int, int] = (2, 2),
+        groups_1: tuple[int, int] = (0, 6),
+        groups_2: tuple[int, int] = (1, 1),
+        use_dynamic_shift: tuple[int, int, int] = (2, 0, 1),
+        reduction_factor: int = 1,
+        init_a: tuple[float, float] = (1.0, 1.0),
+        init_b: tuple[float, float] = (0.0, 0.0),
+    ):
+        """
+        MicroBlock: The basic building block of MicroNet.
+
+        This block implements the Micro-Factorized Convolution and Dynamic Shift-Max activation.
+        It can be configured to use different combinations of these components based on the network design.
+
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type out_channels: int
+        @param out_channels: Number of output channels.
+        @type kernel_size: int
+        @param kernel_size: Size of the convolution kernel. Defaults to 3.
+        @type stride: int
+        @param stride: Stride of the convolution. Defaults to 1.
+        @type expansion_ratios: tuple[int, int]
+        @param expansion_ratios: Expansion ratios for the intermediate channels. Defaults to (2, 2).
+        @type groups_1: tuple[int, int]
+        @param groups_1: Groups for the first set of convolutions. Defaults to (0, 6).
+        @type groups_2: tuple[int, int]
+        @param groups_2: Groups for the second set of convolutions. Defaults to (1, 1).
+        @type use_dynamic_shift: tuple[int, int, int]
+        @param use_dynamic_shift: Flags to use Dynamic Shift-Max in different positions. Defaults to (2, 0, 1).
+        @type reduction_factor: int
+        @param reduction_factor: Reduction factor for the squeeze-and-excitation-like operation. Defaults to 1.
+        @type init_a: tuple[float, float]
+        @param init_a: Initialization parameters for Dynamic Shift-Max. Defaults to (1.0, 1.0).
+        @type init_b: tuple[float, float]
+        @param init_b: Initialization parameters for Dynamic Shift-Max. Defaults to (0.0, 0.0).
+        """
+        super().__init__()
+
+        self.use_residual = stride == 1 and in_channels == out_channels
+        self.expansion_ratios = expansion_ratios
+        use_dy1, use_dy2, use_dy3 = use_dynamic_shift
+        group1, group2 = groups_2
+        reduction = 8 * reduction_factor
+        intermediate_channels = (
+            in_channels * expansion_ratios[0] * expansion_ratios[1]
+        )
+
+        if groups_1[0] == 0:
+            self.layers = self._create_lite_block(
+                in_channels,
+                out_channels,
+                intermediate_channels,
+                kernel_size,
+                stride,
+                groups_1[1],
+                group1,
+                group2,
+                use_dy2,
+                use_dy3,
+                reduction,
+                init_a,
+                init_b,
+            )
+        elif group2 == 0:
+            self.layers = self._create_transition_block(
+                in_channels,
+                intermediate_channels,
+                groups_1[0],
+                groups_1[1],
+                use_dy3,
+                reduction,
+            )
+        else:
+            self.layers = self._create_full_block(
+                in_channels,
+                out_channels,
+                intermediate_channels,
+                kernel_size,
+                stride,
+                groups_1,
+                group1,
+                group2,
+                use_dy1,
+                use_dy2,
+                use_dy3,
+                reduction,
+                init_a,
+                init_b,
+            )
+
+    def _create_lite_block(
+        self,
+        in_channels: int,
+        out_channels: int,
+        intermediate_channels: int,
+        kernel_size: int,
+        stride: int,
+        group1: int,
+        group2: int,
+        group3: int,
+        use_dy2: int,
+        use_dy3: int,
+        reduction: int,
+        init_a: tuple[float, float],
+        init_b: tuple[float, float],
+    ) -> nn.Sequential:
+        return nn.Sequential(
+            DepthSpatialSepConv(
+                in_channels, self.expansion_ratios, kernel_size, stride
+            ),
+            DYShiftMax(
+                intermediate_channels,
+                intermediate_channels,
+                init_a,
+                init_b,
+                True if use_dy2 == 2 else False,
+                group1,
+                reduction,
+            )
+            if use_dy2 > 0
+            else nn.ReLU6(True),
+            ChannelShuffle(group1),
+            ChannelShuffle(intermediate_channels // 2)
+            if use_dy2 != 0
+            else nn.Sequential(),
+            ConvModule(
+                in_channels=intermediate_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                groups=group2,
+                activation=nn.Identity(),
+            ),
+            DYShiftMax(
+                out_channels,
+                out_channels,
+                (1.0, 0.0),
+                (0.0, 0.0),
+                False,
+                group3,
+                reduction // 2,
+            )
+            if use_dy3 > 0
+            else nn.Sequential(),
+            ChannelShuffle(group3),
+            ChannelShuffle(out_channels // 2)
+            if out_channels % 2 == 0 and use_dy3 != 0
+            else nn.Sequential(),
+        )
+
+    def _create_transition_block(
+        self,
+        in_channels: int,
+        intermediate_channels: int,
+        group1: int,
+        group2: int,
+        use_dy3: int,
+        reduction: int,
+    ) -> nn.Sequential:
+        return nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=intermediate_channels,
+                kernel_size=1,
+                groups=group1,
+                activation=nn.Identity(),
+            ),
+            DYShiftMax(
+                intermediate_channels,
+                intermediate_channels,
+                (1.0, 0.0),
+                (0.0, 0.0),
+                False,
+                group2,
+                reduction,
+            )
+            if use_dy3 > 0
+            else nn.Sequential(),
+        )
+
+    def _create_full_block(
+        self,
+        in_channels: int,
+        out_channels: int,
+        intermediate_channels: int,
+        kernel_size: int,
+        stride: int,
+        groups_1: tuple[int, int],
+        group1: int,
+        group2: int,
+        use_dy1: int,
+        use_dy2: int,
+        use_dy3: int,
+        reduction: int,
+        init_a: tuple[float, float],
+        init_b: tuple[float, float],
+    ) -> nn.Sequential:
+        return nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=intermediate_channels,
+                kernel_size=1,
+                groups=groups_1[0],
+                activation=nn.Identity(),
+            ),
+            DYShiftMax(
+                intermediate_channels,
+                intermediate_channels,
+                init_a,
+                init_b,
+                True if use_dy1 == 2 else False,
+                groups_1[1],
+                reduction,
+            )
+            if use_dy1 > 0
+            else nn.ReLU6(True),
+            ChannelShuffle(groups_1[1]),
+            DepthSpatialSepConv(
+                intermediate_channels, (1, 1), kernel_size, stride
+            ),
+            DYShiftMax(
+                intermediate_channels,
+                intermediate_channels,
+                init_a,
+                init_b,
+                True if use_dy2 == 2 else False,
+                groups_1[1],
+                reduction,
+                True,
+            )
+            if use_dy2 > 0
+            else nn.ReLU6(True),
+            ChannelShuffle(intermediate_channels // 4)
+            if use_dy1 != 0 and use_dy2 != 0
+            else nn.Sequential()
+            if use_dy1 == 0 and use_dy2 == 0
+            else ChannelShuffle(intermediate_channels // 2),
+            ConvModule(
+                in_channels=intermediate_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                groups=group1,
+                activation=nn.Identity(),
+            ),
+            DYShiftMax(
+                out_channels,
+                out_channels,
+                (1.0, 0.0),
+                (0.0, 0.0),
+                False,
+                group2,
+                reduction=reduction // 2
+                if out_channels < intermediate_channels
+                else reduction,
+            )
+            if use_dy3 > 0
+            else nn.Sequential(),
+            ChannelShuffle(group2),
+            ChannelShuffle(out_channels // 2)
+            if use_dy3 != 0
+            else nn.Sequential(),
+        )
+
+    def forward(self, inputs: Tensor) -> Tensor:
+        out = self.layers(inputs)
+        if self.use_residual:
+            out += inputs
+        return out
+
+
+class ChannelShuffle(nn.Module):
+    def __init__(self, groups: int):
+        """Shuffle the channels of the input tensor.
+
+        This operation is used to mix information between groups after
+        grouped convolutions.
+
+        @type groups: int
+        @param groups: Number of groups to divide the channels into
+            before shuffling.
+        """
+
+        super().__init__()
+        self.groups = groups
+
+    def forward(self, x: Tensor) -> Tensor:
+        batch_size, channels, height, width = x.size()
+        channels_per_group = channels // self.groups
+        x = x.view(batch_size, self.groups, channels_per_group, height, width)
+        x = torch.transpose(x, 1, 2).contiguous()
+        out = x.view(batch_size, -1, height, width)
+        return out
+
+
+class DYShiftMax(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        init_a: tuple[float, float] = (0.0, 0.0),
+        init_b: tuple[float, float] = (0.0, 0.0),
+        use_relu: bool = True,
+        groups: int = 6,
+        reduction: int = 4,
+        expansion: bool = False,
+    ):
+        """Dynamic Shift-Max activation function.
+
+        This module implements the Dynamic Shift-Max operation, which
+        adaptively fuses and selects channel information based on the
+        input.
+
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type out_channels: int
+        @param out_channels: Number of output channels.
+        @type init_a: tuple[float, float]
+        @param init_a: Initial values for the 'a' parameters. Defaults
+            to (0.0, 0.0).
+        @type init_b: tuple[float, float]
+        @param init_b: Initial values for the 'b' parameters. Defaults
+            to (0.0, 0.0).
+        @type use_relu: bool
+        @param use_relu: Whether to use ReLU activation. Defaults to
+            True.
+        @type groups: int
+        @param groups: Number of groups for channel shuffling. Defaults
+            to 6.
+        @type reduction: int
+        @param reduction: Reduction factor for the squeeze operation.
+            Defaults to 4.
+        @type expansion: bool
+        @param expansion: Whether to use expansion in grouping. Defaults
+            to False.
+        """
+        super().__init__()
+        self.exp: Literal[2, 4] = 4 if use_relu else 2
+        self.init_a = init_a
+        self.init_b = init_b
+        self.out_channels = out_channels
+
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+
+        squeeze_channels = self._make_divisible(in_channels // reduction, 4)
+
+        self.fc = nn.Sequential(
+            nn.Linear(in_channels, squeeze_channels),
+            nn.ReLU(True),
+            nn.Linear(squeeze_channels, out_channels * self.exp),
+            HSigmoid(),
+        )
+
+        if groups != 1 and expansion:
+            groups = in_channels // groups
+
+        channels_per_group = in_channels // groups
+        index = torch.arange(in_channels).view(1, in_channels, 1, 1)
+        index = index.view(1, groups, channels_per_group, 1, 1)
+        index_groups = torch.split(index, [1, groups - 1], dim=1)
+        index_groups = torch.cat([index_groups[1], index_groups[0]], dim=1)
+        index_splits = torch.split(
+            index_groups, [1, channels_per_group - 1], dim=2
+        )
+        index_splits = torch.cat([index_splits[1], index_splits[0]], dim=2)
+        self.index = index_splits.view(in_channels).long()
+
+    def forward(self, x: Tensor) -> Tensor:
+        batch_size, channels, _, _ = x.shape
+        x_out = x
+
+        y = self.avg_pool(x).view(batch_size, channels)
+        y = self.fc(y).view(batch_size, -1, 1, 1)
+        y = (y - 0.5) * 4.0
+
+        x2 = x_out[:, self.index, :, :]
+
+        if self.exp == 4:
+            a1, b1, a2, b2 = torch.split(y, self.out_channels, dim=1)
+
+            a1 = a1 + self.init_a[0]
+            a2 = a2 + self.init_b[1]
+            b1 = b1 + self.init_b[0]
+            b2 = b2 + self.init_b[1]
+
+            z1 = x_out * a1 + x2 * b1
+            z2 = x_out * a2 + x2 * b2
+
+            out = torch.max(z1, z2)
+
+        elif self.exp == 2:
+            a1, b1 = torch.split(y, self.out_channels, dim=1)
+            a1 = a1 + self.init_a[0]
+            b1 = b1 + self.init_b[0]
+            out = x_out * a1 + x2 * b1
+        else:
+            raise RuntimeError("Expansion should be 2 or 4.")
+
+        return out
+
+    def _make_divisible(
+        self, value: int, divisor: int, min_value: int | None = None
+    ) -> int:
+        if min_value is None:
+            min_value = divisor
+        new_v = max(min_value, int(value + divisor / 2) // divisor * divisor)
+        # Make sure that round down does not go down by more than 10%.
+        if new_v < 0.9 * value:
+            new_v += divisor
+        return new_v
+
+
+class SpatialSepConvSF(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        outs: tuple[int, int],
+        kernel_size: int,
+        stride: int,
+    ):
+        super().__init__()
+        out_channels1, out_channels2 = outs
+        self.conv = nn.Sequential(
+            nn.Conv2d(
+                in_channels,
+                out_channels1,
+                kernel_size=(kernel_size, 1),
+                stride=(stride, 1),
+                padding=(kernel_size // 2, 0),
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels1),
+            nn.Conv2d(
+                out_channels1,
+                out_channels1 * out_channels2,
+                kernel_size=(1, kernel_size),
+                stride=(1, stride),
+                padding=(0, kernel_size // 2),
+                groups=out_channels1,
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels1 * out_channels2),
+            ChannelShuffle(out_channels1),
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self.conv(x)
+
+
+class Stem(nn.Module):
+    def __init__(
+        self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)
+    ):
+        super().__init__()
+        self.stem = nn.Sequential(
+            SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True)
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self.stem(x)
+
+
+class DepthSpatialSepConv(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        expand: tuple[int, int],
+        kernel_size: int,
+        stride: int,
+    ):
+        super().__init__()
+        exp1, exp2 = expand
+        intermediate_channels = in_channels * exp1
+        out_channels = in_channels * exp1 * exp2
+
+        self.conv = nn.Sequential(
+            nn.Conv2d(
+                in_channels,
+                intermediate_channels,
+                (kernel_size, 1),
+                (stride, 1),
+                padding=(kernel_size // 2, 0),
+                groups=in_channels,
+                bias=False,
+            ),
+            nn.BatchNorm2d(intermediate_channels),
+            nn.Conv2d(
+                intermediate_channels,
+                out_channels,
+                (1, kernel_size),
+                (1, stride),
+                padding=(0, kernel_size // 2),
+                groups=intermediate_channels,
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels),
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self.conv(x)
diff --git a/luxonis_train/nodes/backbones/micronet/micronet.py b/luxonis_train/nodes/backbones/micronet/micronet.py
new file mode 100644
index 00000000..82df5cb3
--- /dev/null
+++ b/luxonis_train/nodes/backbones/micronet/micronet.py
@@ -0,0 +1,62 @@
+from typing import Any, Literal
+
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+
+from .blocks import MicroBlock, Stem
+from .variants import get_variant
+
+
+class MicroNet(BaseNode[Tensor, list[Tensor]]):
+    def __init__(
+        self,
+        variant: Literal["M1", "M2", "M3"] = "M1",
+        out_indices: list[int] | None = None,
+        **kwargs: Any,
+    ):
+        """MicroNet backbone.
+
+        This class creates the full MicroNet architecture based on the
+        specified variant. It consists of a stem layer followed by
+        multiple MicroBlocks.
+
+        @type variant: Literal["M1", "M2", "M3"]
+        @param variant: Model variant to use. Defaults to "M1".
+        @type out_indices: list[int] | None
+        @param out_indices: Indices of the output layers. If provided,
+            overrides the variant value.
+        """
+        super().__init__(**kwargs)
+
+        var = get_variant(variant)
+        self.out_indices = out_indices or var.out_indices
+        in_channels = var.stem_channels
+
+        self.layers = nn.ModuleList([Stem(3, 2, var.stem_groups)])
+
+        for bc in var.block_configs:
+            self.layers.append(
+                MicroBlock(
+                    in_channels,
+                    bc.out_channels,
+                    bc.kernel_size,
+                    bc.stride,
+                    bc.expand_ratio,
+                    bc.groups_1,
+                    bc.groups_2,
+                    bc.dy_shifts,
+                    bc.reduction_factor,
+                    var.init_a,
+                    var.init_b,
+                )
+            )
+            in_channels = bc.out_channels
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outs: list[Tensor] = []
+        for i, layer in enumerate(self.layers):
+            inputs = layer(inputs)
+            if i in self.out_indices:
+                outs.append(inputs)
+        return outs
diff --git a/luxonis_train/nodes/backbones/micronet/variants.py b/luxonis_train/nodes/backbones/micronet/variants.py
new file mode 100644
index 00000000..22a8d552
--- /dev/null
+++ b/luxonis_train/nodes/backbones/micronet/variants.py
@@ -0,0 +1,344 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+class MicroBlockConfig(BaseModel):
+    stride: int
+    out_channels: int
+    kernel_size: int
+    expand_ratio: tuple[int, int]
+    groups_1: tuple[int, int]
+    groups_2: tuple[int, int]
+    dy_shifts: tuple[int, int, int]
+    reduction_factor: int
+
+
+class MicroNetVariant(BaseModel):
+    stem_channels: int
+    stem_groups: tuple[int, int]
+    init_a: tuple[float, float]
+    init_b: tuple[float, float]
+    out_indices: list[int]
+    block_configs: list[MicroBlockConfig]
+
+
+M1 = MicroNetVariant(
+    stem_channels=6,
+    stem_groups=(3, 2),
+    init_a=(1.0, 1.0),
+    init_b=(0.0, 0.0),
+    out_indices=[1, 2, 4, 7],
+    block_configs=[
+        MicroBlockConfig(
+            stride=2,
+            out_channels=8,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 6),
+            groups_2=(2, 2),
+            dy_shifts=(2, 0, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=16,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 8),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=16,
+            kernel_size=5,
+            expand_ratio=(2, 2),
+            groups_1=(0, 16),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(4, 4),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=64,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=96,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=576,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(0, 0),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+    ],
+)
+
+M2 = MicroNetVariant(
+    stem_channels=8,
+    stem_groups=(4, 2),
+    init_a=(1.0, 1.0),
+    init_b=(0.0, 0.0),
+    out_indices=[1, 3, 6, 9],
+    block_configs=[
+        MicroBlockConfig(
+            stride=2,
+            out_channels=12,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 8),
+            groups_2=(4, 4),
+            dy_shifts=(2, 0, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=16,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 12),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=24,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 16),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(6, 6),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=64,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=96,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=128,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=768,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(16, 16),
+            groups_2=(0, 0),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+    ],
+)
+
+M3 = MicroNetVariant(
+    stem_channels=12,
+    stem_groups=(4, 3),
+    init_a=(1.0, 0.5),
+    init_b=(0.0, 0.5),
+    out_indices=[1, 3, 8, 12],
+    block_configs=[
+        MicroBlockConfig(
+            stride=2,
+            out_channels=16,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 12),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=24,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 16),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=24,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 24),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(6, 6),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=64,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=80,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=80,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(10, 10),
+            groups_2=(8, 8),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=120,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(10, 10),
+            groups_2=(10, 10),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=120,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(10, 10),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=144,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(12, 12),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=864,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(0, 0),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+    ],
+)
+
+
+def get_variant(variant: Literal["M1", "M2", "M3"]) -> MicroNetVariant:
+    variants = {"M1": M1, "M2": M2, "M3": M3}
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            "MicroNet model variant should be in "
+            f"{list(variants.keys())}, got {variant}."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/mobilenetv2.py b/luxonis_train/nodes/backbones/mobilenetv2.py
new file mode 100644
index 00000000..8de19854
--- /dev/null
+++ b/luxonis_train/nodes/backbones/mobilenetv2.py
@@ -0,0 +1,51 @@
+from typing import Any
+
+import torchvision
+from torch import Tensor
+
+from luxonis_train.nodes.base_node import BaseNode
+
+
+class MobileNetV2(BaseNode[Tensor, list[Tensor]]):
+    def __init__(
+        self,
+        download_weights: bool = False,
+        out_indices: list[int] | None = None,
+        **kwargs: Any,
+    ):
+        """MobileNetV2 backbone.
+
+        This class implements the MobileNetV2 model as described in:
+        U{MobileNetV2: Inverted Residuals and Linear Bottlenecks <https://arxiv.org/pdf/1801.04381v4>} by Sandler I{et al.}
+
+        The network consists of an initial fully convolutional layer, followed by
+        19 bottleneck residual blocks, and a final 1x1 convolution. It can be used
+        as a feature extractor for tasks like image classification, object detection,
+        and semantic segmentation.
+
+        Key features:
+            - Inverted residual structure with linear bottlenecks
+            - Depth-wise separable convolutions for efficiency
+            - Configurable width multiplier and input resolution
+
+        @type download_weights: bool
+        @param download_weights: If True download weights from imagenet. Defaults to
+            False.
+        @type out_indices: list[int] | None
+        @param out_indices: Indices of the output layers. Defaults to [3, 6, 13, 18].
+        """
+        super().__init__(**kwargs)
+
+        self.backbone = torchvision.models.mobilenet_v2(
+            weights="DEFAULT" if download_weights else None
+        )
+        self.out_indices = out_indices or [3, 6, 13, 18]
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outs: list[Tensor] = []
+        for i, layer in enumerate(self.backbone.features):
+            inputs = layer(inputs)
+            if i in self.out_indices:
+                outs.append(inputs)
+
+        return outs
diff --git a/luxonis_train/nodes/backbones/mobileone/__init__.py b/luxonis_train/nodes/backbones/mobileone/__init__.py
new file mode 100644
index 00000000..a6e573aa
--- /dev/null
+++ b/luxonis_train/nodes/backbones/mobileone/__init__.py
@@ -0,0 +1,3 @@
+from .mobileone import MobileOne
+
+__all__ = ["MobileOne"]
diff --git a/luxonis_train/nodes/backbones/mobileone/blocks.py b/luxonis_train/nodes/backbones/mobileone/blocks.py
new file mode 100644
index 00000000..63e19eae
--- /dev/null
+++ b/luxonis_train/nodes/backbones/mobileone/blocks.py
@@ -0,0 +1,255 @@
+"""MobileOne backbone.
+
+Source: U{<https://github.com/apple/ml-mobileone>}
+@license: U{Apple<https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
+"""
+
+import torch
+from torch import Tensor, nn
+
+from luxonis_train.nodes.blocks import ConvModule, SqueezeExciteBlock
+
+
+class MobileOneBlock(nn.Module):
+    """MobileOne building block.
+
+    This block has a multi-branched architecture at train-time and
+    plain-CNN style architecture at inference time For more details,
+    please refer to our paper: U{An Improved One millisecond Mobile
+    Backbone<https://arxiv.org/pdf/2206.04040.pdf>}
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        stride: int = 1,
+        padding: int = 0,
+        groups: int = 1,
+        use_se: bool = False,
+        n_conv_branches: int = 1,
+    ):
+        """Construct a MobileOneBlock module.
+
+        @type in_channels: int
+        @param in_channels: Number of channels in the input.
+        @type out_channels: int
+        @param out_channels: Number of channels produced by the block.
+        @type kernel_size: int
+        @param kernel_size: Size of the convolution kernel.
+        @type stride: int
+        @param stride: Stride size. Defaults to 1.
+        @type padding: int
+        @param padding: Zero-padding size. Defaults to 0.
+        @type dilation: int
+        @param dilation: Kernel dilation factor. Defaults to 1.
+        @type groups: int
+        @param groups: Group number. Defaults to 1.
+        @type use_se: bool
+        @param use_se: Whether to use SE-ReLU activations. Defaults to
+            False.
+        @type n_conv_branches: int
+        @param n_conv_branches: Number of linear conv branches. Defaults
+            to 1.
+        """
+        super().__init__()
+
+        self.groups = groups
+        self.stride = stride
+        self.kernel_size = kernel_size
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.n_conv_branches = n_conv_branches
+        self.inference_mode = False
+
+        self.se: nn.Module
+        if use_se:
+            self.se = SqueezeExciteBlock(
+                in_channels=out_channels,
+                intermediate_channels=int(out_channels * 0.0625),
+            )
+        else:
+            self.se = nn.Identity()
+        self.activation = nn.ReLU()
+
+        # Re-parameterizable skip connection
+        self.rbr_skip = (
+            nn.BatchNorm2d(num_features=in_channels)
+            if out_channels == in_channels and stride == 1
+            else None
+        )
+
+        # Re-parameterizable conv branches
+        rbr_conv: list[nn.Module] = []
+        for _ in range(self.n_conv_branches):
+            rbr_conv.append(
+                ConvModule(
+                    in_channels=self.in_channels,
+                    out_channels=self.out_channels,
+                    kernel_size=kernel_size,
+                    stride=self.stride,
+                    padding=padding,
+                    groups=self.groups,
+                    activation=nn.Identity(),
+                )
+            )
+        self.rbr_conv: list[nn.Sequential] = nn.ModuleList(rbr_conv)  # type: ignore
+
+        # Re-parameterizable scale branch
+        self.rbr_scale = None
+        if kernel_size > 1:
+            self.rbr_scale = ConvModule(
+                in_channels=self.in_channels,
+                out_channels=self.out_channels,
+                kernel_size=1,
+                stride=self.stride,
+                padding=0,
+                groups=self.groups,
+                activation=nn.Identity(),
+            )
+
+    def forward(self, inputs: Tensor) -> Tensor:
+        """Apply forward pass."""
+
+        if self.inference_mode:
+            return self.activation(self.se(self.reparam_conv(inputs)))
+
+        # Multi-branched train-time forward pass.
+        # Skip branch output
+        identity_out = 0
+        if self.rbr_skip is not None:
+            identity_out = self.rbr_skip(inputs)
+
+        # Scale branch output
+        scale_out = 0
+        if self.rbr_scale is not None:
+            scale_out = self.rbr_scale(inputs)
+
+        # Other branches
+        out = scale_out + identity_out
+        for ix in range(self.n_conv_branches):
+            out += self.rbr_conv[ix](inputs)
+
+        return self.activation(self.se(out))
+
+    def reparameterize(self):
+        """Following works like U{RepVGG: Making VGG-style ConvNets Great Again
+        <https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched>}
+        architecture used at training time to obtain a plain CNN-like structure
+        for inference.
+        """
+        if self.inference_mode:
+            return
+        kernel, bias = self._get_kernel_bias()
+        self.reparam_conv = nn.Conv2d(
+            in_channels=self.rbr_conv[0][0].in_channels,
+            out_channels=self.rbr_conv[0][0].out_channels,
+            kernel_size=self.rbr_conv[0][0].kernel_size,
+            stride=self.rbr_conv[0][0].stride,
+            padding=self.rbr_conv[0][0].padding,
+            dilation=self.rbr_conv[0][0].dilation,
+            groups=self.rbr_conv[0][0].groups,
+            bias=True,
+        )
+        self.reparam_conv.weight.data = kernel
+        assert self.reparam_conv.bias is not None
+        self.reparam_conv.bias.data = bias
+
+        # Delete un-used branches
+        for para in self.parameters():
+            para.detach_()
+        del self.rbr_conv
+        del self.rbr_scale
+        if hasattr(self, "rbr_skip"):
+            del self.rbr_skip
+
+        self.inference_mode = True
+
+    def _get_kernel_bias(self) -> tuple[Tensor, Tensor]:
+        """Method to obtain re-parameterized kernel and bias.
+        Reference: U{https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83}
+
+        @rtype: tuple[Tensor, Tensor]
+        @return: Tuple of (kernel, bias) after re-parameterization.
+        """
+        # get weights and bias of scale branch
+        kernel_scale = torch.zeros(())
+        bias_scale = torch.zeros(())
+        if self.rbr_scale is not None:
+            kernel_scale, bias_scale = self._fuse_bn_tensor(self.rbr_scale)
+            # Pad scale branch kernel to match conv branch kernel size.
+            pad = self.kernel_size // 2
+            kernel_scale = torch.nn.functional.pad(
+                kernel_scale, [pad, pad, pad, pad]
+            )
+
+        # get weights and bias of skip branch
+        kernel_identity = torch.zeros(())
+        bias_identity = torch.zeros(())
+        if self.rbr_skip is not None:
+            kernel_identity, bias_identity = self._fuse_bn_tensor(
+                self.rbr_skip
+            )
+
+        # get weights and bias of conv branches
+        kernel_conv = torch.zeros(())
+        bias_conv = torch.zeros(())
+        for ix in range(self.n_conv_branches):
+            _kernel, _bias = self._fuse_bn_tensor(self.rbr_conv[ix])
+            kernel_conv = kernel_conv + _kernel
+            bias_conv = bias_conv + _bias
+
+        kernel_final = kernel_conv + kernel_scale + kernel_identity
+        bias_final = bias_conv + bias_scale + bias_identity
+        return kernel_final, bias_final
+
+    def _fuse_bn_tensor(self, branch: nn.Module) -> tuple[Tensor, Tensor]:
+        """Method to fuse batchnorm layer with preceeding conv layer.
+        Reference: U{https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95}
+
+        @rtype: tuple[Tensor, Tensor]
+        @return: Tuple of (kernel, bias) after fusing batchnorm.
+        """
+        if isinstance(branch, nn.Sequential):
+            kernel = branch[0].weight
+            running_mean = branch[1].running_mean
+            running_var = branch[1].running_var
+            gamma = branch[1].weight
+            beta = branch[1].bias
+            eps = branch[1].eps
+        elif isinstance(branch, nn.BatchNorm2d):
+            if not hasattr(self, "id_tensor"):
+                input_dim = self.in_channels // self.groups
+                kernel_value = torch.zeros(
+                    (
+                        self.in_channels,
+                        input_dim,
+                        self.kernel_size,
+                        self.kernel_size,
+                    ),
+                    dtype=branch.weight.dtype,
+                    device=branch.weight.device,
+                )
+                for i in range(self.in_channels):
+                    kernel_value[
+                        i,
+                        i % input_dim,
+                        self.kernel_size // 2,
+                        self.kernel_size // 2,
+                    ] = 1
+                self.id_tensor = kernel_value
+            kernel = self.id_tensor
+            running_mean = branch.running_mean
+            running_var = branch.running_var
+            gamma = branch.weight
+            beta = branch.bias
+            eps = branch.eps
+        else:
+            raise NotImplementedError(
+                "Only nn.BatchNorm2d and nn.Sequential " "are supported."
+            )
+        assert running_var is not None
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape(-1, 1, 1, 1)
+        return kernel * t, beta - running_mean * gamma / std
diff --git a/luxonis_train/nodes/backbones/mobileone/mobileone.py b/luxonis_train/nodes/backbones/mobileone/mobileone.py
new file mode 100644
index 00000000..8180f960
--- /dev/null
+++ b/luxonis_train/nodes/backbones/mobileone/mobileone.py
@@ -0,0 +1,197 @@
+"""MobileOne backbone.
+
+Source: U{<https://github.com/apple/ml-mobileone>}
+@license: U{Apple<https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
+"""
+
+import logging
+from typing import Any, Literal
+
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+
+from .blocks import MobileOneBlock
+from .variants import get_variant
+
+logger = logging.getLogger(__name__)
+
+
+class MobileOne(BaseNode[Tensor, list[Tensor]]):
+    in_channels: int
+
+    def __init__(
+        self,
+        variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0",
+        width_multipliers: tuple[float, float, float, float] | None = None,
+        n_conv_branches: int | None = None,
+        use_se: bool | None = None,
+        **kwargs: Any,
+    ):
+        """MobileOne: An efficient CNN backbone for mobile devices.
+
+        The architecture focuses on reducing memory access costs and improving parallelism
+        while allowing aggressive parameter scaling for better representation capacity.
+        Different variants (S0-S4) offer various accuracy-latency tradeoffs.
+
+        Key features:
+            - Designed for low latency on mobile while maintaining high accuracy
+            - Uses re-parameterizable branches during training that get folded at inference
+            - Employs trivial over-parameterization branches for improved accuracy
+            - Simple feed-forward structure at inference with no branches/skip connections
+            - Variants achieve <1ms inference time on iPhone 12 with up to 75.9% top-1 ImageNet accuracy
+            - Outperforms other efficient architectures like MobileNets on image classification,
+              object detection and semantic segmentation tasks
+            - Uses only basic operators available across platforms (no custom activations)
+
+
+        Reference: U{MobileOne: An Improved One millisecond Mobile Backbone
+        <https://arxiv.org/abs/2206.04040>}
+
+        @type variant: Literal["s0", "s1", "s2", "s3", "s4"]
+        @param variant: Specifies which variant of the MobileOne network to use. Defaults to "s0".
+            Each variant specifies a predefined set of values for:
+                - width multipliers - A tuple of 4 float values specifying the width multipliers for each stage of the network. If the use of SE blocks is disabled, the last two values are ignored.
+                - number of convolution branches - An integer specifying the number of linear convolution branches in MobileOne block.
+                - use of SE blocks - A boolean specifying whether to use SE blocks in the network.
+
+            The variants are as follows:
+                - s0 (default): width_multipliers=(0.75, 1.0, 1.0, 2.0), n_conv_branches=4, use_se=False
+                - s1: width_multipliers=(1.5, 1.5, 2.0, 2.5), n_conv_branches=1, use_se=False
+                - s2: width_multipliers=(1.5, 2.0, 2.5, 4.0), n_conv_branches=1, use_se=False
+                - s3: width_multipliers=(2.0, 2.5, 3.0, 4.0), n_conv_branches=1, use_se=False
+                - s4: width_multipliers=(3.0, 3.5, 3.5, 4.0), n_conv_branches=1, use_se=True
+
+        @type width_multipliers: tuple[float, float, float, float] | None
+        @param width_multipliers: Width multipliers for each stage. If provided, overrides the variant values.
+        @type n_conv_branches: int | None
+        @param n_conv_branches: Number of linear convolution branches in MobileOne block. If provided, overrides the variant values.
+        @type use_se: bool | None
+        @param use_se: Whether to use SE blocks in the network. If provided, overrides the variant value.
+        """
+        super().__init__(**kwargs)
+
+        var = get_variant(variant)
+
+        width_multipliers = width_multipliers or var.width_multipliers
+        use_se = use_se or var.use_se
+        self.n_blocks_per_stage = [2, 8, 10, 1]
+        self.n_conv_branches = n_conv_branches or var.n_conv_branches
+
+        self.in_planes = min(64, int(64 * width_multipliers[0]))
+
+        self.stage0 = MobileOneBlock(
+            in_channels=self.in_channels,
+            out_channels=self.in_planes,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+        )
+        self.cur_layer_idx = 1
+        self.stage1 = self._make_stage(
+            int(64 * width_multipliers[0]),
+            self.n_blocks_per_stage[0],
+            n_se_blocks=0,
+        )
+        self.stage2 = self._make_stage(
+            int(128 * width_multipliers[1]),
+            self.n_blocks_per_stage[1],
+            n_se_blocks=0,
+        )
+        self.stage3 = self._make_stage(
+            int(256 * width_multipliers[2]),
+            self.n_blocks_per_stage[2],
+            n_se_blocks=self.n_blocks_per_stage[2] // 2 if use_se else 0,
+        )
+        self.stage4 = self._make_stage(
+            int(512 * width_multipliers[3]),
+            self.n_blocks_per_stage[3],
+            n_se_blocks=self.n_blocks_per_stage[3] if use_se else 0,
+        )
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outs: list[Tensor] = []
+        x = self.stage0(inputs)
+        outs.append(x)
+        x = self.stage1(x)
+        outs.append(x)
+        x = self.stage2(x)
+        outs.append(x)
+        x = self.stage3(x)
+        outs.append(x)
+        x = self.stage4(x)
+        outs.append(x)
+
+        return outs
+
+    def set_export_mode(self, mode: bool = True) -> None:
+        """Sets the module to export mode.
+
+        Reparameterizes the model to obtain a plain CNN-like structure for inference.
+        TODO: add more details
+
+        @warning: The reparametrization is destructive and cannot be reversed!
+
+        @type export: bool
+        @param export: Whether to set the export mode to True or False. Defaults to True.
+        """
+        super().set_export_mode(mode)
+        if self.export:
+            logger.info("Reparametrizing 'MobileOne'.")
+            for module in self.modules():
+                if hasattr(module, "reparameterize"):
+                    module.reparameterize()
+
+    def _make_stage(self, planes: int, n_blocks: int, n_se_blocks: int):
+        """Build a stage of MobileOne model.
+
+        @type planes: int
+        @param planes: Number of output channels.
+        @type n_blocks: int
+        @param n_blocks: Number of blocks in this stage.
+        @type n_se_blocks: int
+        @param n_se_blocks: Number of SE blocks in this stage.
+        @rtype: nn.Sequential
+        @return: A stage of MobileOne model.
+        """
+        # Get strides for all layers
+        strides = [2] + [1] * (n_blocks - 1)
+        blocks: list[nn.Module] = []
+        for ix, stride in enumerate(strides):
+            use_se = False
+            if n_se_blocks > n_blocks:
+                raise ValueError(
+                    "Number of SE blocks cannot " "exceed number of layers."
+                )
+            if ix >= (n_blocks - n_se_blocks):
+                use_se = True
+
+            # Depthwise conv
+            blocks.append(
+                MobileOneBlock(
+                    in_channels=self.in_planes,
+                    out_channels=self.in_planes,
+                    kernel_size=3,
+                    stride=stride,
+                    padding=1,
+                    groups=self.in_planes,
+                    use_se=use_se,
+                    n_conv_branches=self.n_conv_branches,
+                )
+            )
+            # Pointwise conv
+            blocks.append(
+                MobileOneBlock(
+                    in_channels=self.in_planes,
+                    out_channels=planes,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0,
+                    groups=1,
+                    use_se=use_se,
+                    n_conv_branches=self.n_conv_branches,
+                )
+            )
+            self.in_planes = planes
+            self.cur_layer_idx += 1
+        return nn.Sequential(*blocks)
diff --git a/luxonis_train/nodes/backbones/mobileone/variants.py b/luxonis_train/nodes/backbones/mobileone/variants.py
new file mode 100644
index 00000000..fbb0add3
--- /dev/null
+++ b/luxonis_train/nodes/backbones/mobileone/variants.py
@@ -0,0 +1,39 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+class MobileOneVariant(BaseModel):
+    width_multipliers: tuple[float, float, float, float]
+    n_conv_branches: int = 1
+    use_se: bool = False
+
+
+def get_variant(
+    variant: Literal["s0", "s1", "s2", "s3", "s4"],
+) -> MobileOneVariant:
+    variants = {
+        "s0": MobileOneVariant(
+            width_multipliers=(0.75, 1.0, 1.0, 2.0),
+            n_conv_branches=4,
+        ),
+        "s1": MobileOneVariant(
+            width_multipliers=(1.5, 1.5, 2.0, 2.5),
+        ),
+        "s2": MobileOneVariant(
+            width_multipliers=(1.5, 2.0, 2.5, 4.0),
+        ),
+        "s3": MobileOneVariant(
+            width_multipliers=(2.0, 2.5, 3.0, 4.0),
+        ),
+        "s4": MobileOneVariant(
+            width_multipliers=(3.0, 3.5, 3.5, 4.0),
+            use_se=True,
+        ),
+    }
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            "MobileOne model variant should be in "
+            f"{list(variants.keys())}, got {variant}."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/repvgg/__init__.py b/luxonis_train/nodes/backbones/repvgg/__init__.py
new file mode 100644
index 00000000..61a5a4fc
--- /dev/null
+++ b/luxonis_train/nodes/backbones/repvgg/__init__.py
@@ -0,0 +1,3 @@
+from .repvgg import RepVGG
+
+__all__ = ["RepVGG"]
diff --git a/luxonis_train/nodes/backbones/repvgg/repvgg.py b/luxonis_train/nodes/backbones/repvgg/repvgg.py
new file mode 100644
index 00000000..fd8a5e67
--- /dev/null
+++ b/luxonis_train/nodes/backbones/repvgg/repvgg.py
@@ -0,0 +1,135 @@
+import logging
+from collections import defaultdict
+from typing import Any, Literal
+
+import torch.utils.checkpoint as checkpoint
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import RepVGGBlock
+
+from .variants import get_variant
+
+logger = logging.getLogger(__name__)
+
+
+class RepVGG(BaseNode[Tensor, list[Tensor]]):
+    in_channels: int
+    attach_index: int = -1
+
+    def __init__(
+        self,
+        variant: Literal["A0", "A1", "A2"] = "A0",
+        n_blocks: tuple[int, int, int, int] | None = None,
+        width_multiplier: tuple[float, float, float, float] | None = None,
+        override_groups_map: dict[int, int] | None = None,
+        use_se: bool = False,
+        use_checkpoint: bool = False,
+        **kwargs: Any,
+    ):
+        """RepVGG backbone.
+
+        RepVGG is a VGG-style convolutional architecture.
+
+            - Simple feed-forward topology without any branching.
+            - 3x3 convolutions and ReLU activations.
+            - No automatic search, manual refinement or compound scaling.
+
+        @license: U{MIT
+            <https://github.com/DingXiaoH/RepVGG/blob/main/LICENSE>}.
+
+        @see: U{https://github.com/DingXiaoH/RepVGG}
+        @see: U{https://paperswithcode.com/method/repvgg}
+        @see: U{RepVGG: Making VGG-style ConvNets Great Again
+            <https://arxiv.org/abs/2101.03697>}
+
+
+        @type variant: Literal["A0", "A1", "A2"]
+        @param variant: RepVGG model variant. Defaults to "A0".
+        @type override_groups_map: dict[int, int] | None
+        @param override_groups_map: Dictionary mapping layer index to number of groups. The layers are indexed starting from 0.
+        @type use_se: bool
+        @param use_se: Whether to use Squeeze-and-Excitation blocks.
+        @type use_checkpoint: bool
+        @param use_checkpoint: Whether to use checkpointing.
+        @type n_blocks: tuple[int, int, int, int] | None
+        @param n_blocks: Number of blocks in each stage.
+        @type width_multiplier: tuple[float, float, float, float] | None
+        @param width_multiplier: Width multiplier for each stage.
+        """
+        super().__init__(**kwargs)
+        var = get_variant(variant)
+
+        n_blocks = n_blocks or var.n_blocks
+        width_multiplier = width_multiplier or var.width_multiplier
+        override_groups_map = defaultdict(lambda: 1, override_groups_map or {})
+        self.use_se = use_se
+        self.use_checkpoint = use_checkpoint
+
+        self.in_planes = min(64, int(64 * width_multiplier[0]))
+        self.stage0 = RepVGGBlock(
+            in_channels=self.in_channels,
+            out_channels=self.in_planes,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_se=self.use_se,
+        )
+        self.blocks = nn.ModuleList(
+            [
+                block
+                for i in range(4)
+                for block in self._make_stage(
+                    int(2**i * 64 * width_multiplier[i]),
+                    n_blocks[i],
+                    stride=2,
+                    groups=override_groups_map[i],
+                )
+            ]
+        )
+        self.gap = nn.AdaptiveAvgPool2d(output_size=1)
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outputs: list[Tensor] = []
+        out = self.stage0(inputs)
+        for block in self.blocks:
+            if self.use_checkpoint:
+                out = checkpoint.checkpoint(block, out)
+            else:
+                out = block(out)
+            outputs.append(out)  # type: ignore
+        return outputs
+
+    def _make_stage(
+        self, channels: int, n_blocks: int, stride: int, groups: int
+    ) -> nn.ModuleList:
+        strides = [stride] + [1] * (n_blocks - 1)
+        blocks: list[nn.Module] = []
+        for stride in strides:
+            blocks.append(
+                RepVGGBlock(
+                    in_channels=self.in_planes,
+                    out_channels=channels,
+                    kernel_size=3,
+                    stride=stride,
+                    padding=1,
+                    groups=groups,
+                    use_se=self.use_se,
+                )
+            )
+            self.in_planes = channels
+        return nn.ModuleList(blocks)
+
+    def set_export_mode(self, mode: bool = True) -> None:
+        """Reparametrizes instances of L{RepVGGBlock} in the network.
+
+        @type mode: bool
+        @param mode: Whether to set the export mode. Defaults to
+            C{True}.
+        """
+        super().set_export_mode(mode)
+        if self.export:
+            logger.info("Reparametrizing RepVGG.")
+            for module in self.modules():
+                if isinstance(module, RepVGGBlock):
+                    module.reparametrize()
diff --git a/luxonis_train/nodes/backbones/repvgg/variants.py b/luxonis_train/nodes/backbones/repvgg/variants.py
new file mode 100644
index 00000000..a5c734b5
--- /dev/null
+++ b/luxonis_train/nodes/backbones/repvgg/variants.py
@@ -0,0 +1,31 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+class RepVGGVariant(BaseModel):
+    n_blocks: tuple[int, int, int, int]
+    width_multiplier: tuple[float, float, float, float]
+
+
+def get_variant(variant: Literal["A0", "A1", "A2"]) -> RepVGGVariant:
+    variants = {
+        "A0": RepVGGVariant(
+            n_blocks=(2, 4, 14, 1),
+            width_multiplier=(0.75, 0.75, 0.75, 2.5),
+        ),
+        "A1": RepVGGVariant(
+            n_blocks=(2, 4, 14, 1),
+            width_multiplier=(1, 1, 1, 2.5),
+        ),
+        "A2": RepVGGVariant(
+            n_blocks=(2, 4, 14, 1),
+            width_multiplier=(1.5, 1.5, 1.5, 2.75),
+        ),
+    }
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            f"RepVGG variant should be one of "
+            f"{list(variants.keys())}, got '{variant}'."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/resnet.py b/luxonis_train/nodes/backbones/resnet.py
new file mode 100644
index 00000000..93a13d4a
--- /dev/null
+++ b/luxonis_train/nodes/backbones/resnet.py
@@ -0,0 +1,128 @@
+from typing import Any, Literal
+
+import torchvision
+from torch import Tensor
+from torchvision.models import ResNet as TorchResNet
+
+from luxonis_train.nodes.base_node import BaseNode
+
+
+class ResNet(BaseNode[Tensor, list[Tensor]]):
+    def __init__(
+        self,
+        variant: Literal["18", "34", "50", "101", "152"] = "18",
+        download_weights: bool = False,
+        zero_init_residual: bool = False,
+        groups: int = 1,
+        width_per_group: int = 64,
+        replace_stride_with_dilation: tuple[bool, bool, bool] = (
+            False,
+            False,
+            False,
+        ),
+        **kwargs: Any,
+    ):
+        """ResNet backbone.
+
+        Implements the backbone of a ResNet (Residual Network) architecture.
+
+        ResNet is designed to address the vanishing gradient problem in deep neural networks
+        by introducing skip connections. These connections allow the network to learn
+        residual functions with reference to the layer inputs, enabling training of much
+        deeper networks.
+
+        This backbone can be used as a feature extractor for various computer vision tasks
+        such as image classification, object detection, and semantic segmentation. It
+        provides a robust set of features that can be fine-tuned for specific applications.
+
+        The architecture consists of stacked residual blocks, each containing convolutional
+        layers, batch normalization, and ReLU activations. The skip connections can be
+        either identity mappings or projections, depending on the block type.
+
+        Source: U{https://pytorch.org/vision/main/models/resnet.html}
+
+        @license: U{PyTorch<https://github.com/pytorch/pytorch/blob/master/LICENSE>}
+
+        @param variant: ResNet variant, determining the depth and structure of the network. Options are:
+            - "18": 18 layers, uses basic blocks, smaller model suitable for simpler tasks.
+            - "34": 34 layers, uses basic blocks, good balance of depth and computation.
+            - "50": 50 layers, introduces bottleneck blocks, deeper feature extraction.
+            - "101": 101 layers, uses bottleneck blocks, high capacity for complex tasks.
+            - "152": 152 layers, deepest variant, highest capacity but most computationally intensive.
+            The number in each variant represents the total number of weighted layers.
+            Deeper networks generally offer higher accuracy but require more computation.
+        @type variant: Literal["18", "34", "50", "101", "152"]
+        @default variant: "18"
+
+        @type download_weights: bool
+        @param download_weights: If True download weights trained on imagenet.
+            Defaults to False.
+        @type zero_init_residual: bool
+        @param zero_init_residual: Zero-initialize the last BN in each residual branch,
+            so that the residual branch starts with zeros, and each residual block behaves like an identity.
+            This improves the model by 0.2~0.3% according to U{Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour <https://arxiv.org/abs/1706.02677>}. Defaults to C{False}.
+
+        @type groups: int
+        @param groups: Number of groups for each block.
+            Defaults to 1. Can be set to a different value only
+            for ResNet-50, ResNet-101, and ResNet-152.
+            The width of the convolutional blocks is computed as
+            C{int(in_channels * (width_per_group / 64.0)) * groups}
+
+        @type width_per_group: int
+        @param width_per_group: Number of channels per group.
+            Defaults to 64. Can be set to a different value only
+            for ResNet-50, ResNet-101, and ResNet-152.
+            The width of the convolutional blocks is computed as
+            C{int(in_channels * (width_per_group / 64.0)) * groups}
+
+        @type replace_stride_with_dilation: tuple[bool, bool, bool]
+        @param replace_stride_with_dilation: Tuple of booleans where each
+            indicates if the 2x2 strides should be replaced with a dilated convolution instead.
+            Defaults to (False, False, False). Can be set to a different value only for ResNet-50, ResNet-101, and ResNet-152.
+        """
+        super().__init__(**kwargs)
+        self.backbone = self._get_backbone(
+            variant,
+            weights="DEFAULT" if download_weights else None,
+            zero_init_residual=zero_init_residual,
+            groups=groups,
+            width_per_group=width_per_group,
+            replace_stride_with_dilation=replace_stride_with_dilation,
+        )
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outs: list[Tensor] = []
+        x = self.backbone.conv1(inputs)
+        x = self.backbone.bn1(x)
+        x = self.backbone.relu(x)
+        x = self.backbone.maxpool(x)
+
+        x = self.backbone.layer1(x)
+        outs.append(x)
+        x = self.backbone.layer2(x)
+        outs.append(x)
+        x = self.backbone.layer3(x)
+        outs.append(x)
+        x = self.backbone.layer4(x)
+        outs.append(x)
+
+        return outs
+
+    @staticmethod
+    def _get_backbone(
+        variant: Literal["18", "34", "50", "101", "152"], **kwargs: Any
+    ) -> TorchResNet:
+        variants = {
+            "18": torchvision.models.resnet18,
+            "34": torchvision.models.resnet34,
+            "50": torchvision.models.resnet50,
+            "101": torchvision.models.resnet101,
+            "152": torchvision.models.resnet152,
+        }
+        if variant not in variants:
+            raise ValueError(
+                "ResNet model variant should be in "
+                f"{list(variants.keys())}, got {variant}."
+            )
+        return variants[variant](**kwargs)
diff --git a/luxonis_train/nodes/rexnetv1.py b/luxonis_train/nodes/backbones/rexnetv1.py
similarity index 59%
rename from luxonis_train/nodes/rexnetv1.py
rename to luxonis_train/nodes/backbones/rexnetv1.py
index fb4de4b1..6567586a 100644
--- a/luxonis_train/nodes/rexnetv1.py
+++ b/luxonis_train/nodes/backbones/rexnetv1.py
@@ -1,24 +1,14 @@
-"""Implementation of the ReXNetV1 backbone.
-
-Source: U{https://github.com/clovaai/rexnet}
-@license: U{MIT<https://github.com/clovaai/rexnet/blob/master/LICENSE>}
-"""
-
+from typing import Any
 
 import torch
 from torch import Tensor, nn
 
-from luxonis_train.nodes.blocks import (
-    ConvModule,
-)
-from luxonis_train.utils.general import make_divisible
-
-from .base_node import BaseNode
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import ConvModule
+from luxonis_train.utils import make_divisible
 
 
 class ReXNetV1_lite(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
     def __init__(
         self,
         fix_head_stem: bool = False,
@@ -27,10 +17,33 @@ def __init__(
         final_ch: int = 164,
         multiplier: float = 1.0,
         kernel_sizes: int | list[int] = 3,
-        **kwargs,
+        out_indices: list[int] | None = None,
+        **kwargs: Any,
     ):
-        """ReXNetV1_lite backbone.
+        """ReXNetV1 (Rank Expansion Networks) backbone, lite version.
+
+        ReXNet proposes a new approach to designing lightweight CNN architectures by:
+
+            - Studying proper channel dimension expansion at the layer level using rank analysis
+            - Searching for effective channel configurations across the entire network
+            - Parameterizing channel dimensions as a linear function of network depth
+
+        Key aspects:
 
+            - Uses inverted bottleneck blocks similar to MobileNetV2
+            - Employs a linear parameterization of channel dimensions across blocks
+            - Replaces ReLU6 with SiLU (Swish-1) activation in certain layers
+            - Incorporates Squeeze-and-Excitation modules
+
+        ReXNet achieves state-of-the-art performance among lightweight models on ImageNet
+        classification and transfers well to tasks like object detection and fine-grained classification.
+
+        Source: U{https://github.com/clovaai/rexnet}
+
+        @license: U{MIT
+            <https://github.com/clovaai/rexnet/blob/master/LICENSE>}
+        @copyright: 2021-present NAVER Corp.
+        @see U{Rethinking Channel Dimensions for Efficient Model Design <https://arxiv.org/abs/2007.00992>}
         @type fix_head_stem: bool
         @param fix_head_stem: Whether to multiply head stem. Defaults to False.
         @type divisible_value: int
@@ -43,40 +56,44 @@ def __init__(
         @param multiplier: Channel dimension multiplier. Defaults to 1.0.
         @type kernel_sizes: int | list[int]
         @param kernel_sizes: Kernel size for each block. Defaults to 3.
+        @param out_indices: list[int] | None
+        @param out_indices: Indices of the output layers. Defaults to [1, 4, 10, 17].
         """
         super().__init__(**kwargs)
 
-        self.out_indices = [1, 4, 10, 16]
-        self.channels = [16, 48, 112, 184]
         layers = [1, 2, 2, 3, 3, 5]
         strides = [1, 2, 2, 2, 1, 2]
 
+        self.n_convblocks = sum(layers)
+        self.out_indices = out_indices or [1, 4, 10, 17]
+
         kernel_sizes = (
-            [kernel_sizes] * 6 if isinstance(kernel_sizes, int) else kernel_sizes
+            [kernel_sizes] * 6
+            if isinstance(kernel_sizes, int)
+            else kernel_sizes
         )
 
-        strides = sum(
-            [
-                [element] + [1] * (layers[idx] - 1)
-                for idx, element in enumerate(strides)
-            ],
-            [],
-        )
+        strides = [
+            s if i == 0 else 1
+            for layer, s in zip(layers, strides)
+            for i in range(layer)
+        ]
         ts = [1] * layers[0] + [6] * sum(layers[1:])
-        kernel_sizes = sum(
-            [[element] * layers[idx] for idx, element in enumerate(kernel_sizes)], []
-        )
-        self.num_convblocks = sum(layers[:])
+        kernel_sizes = [
+            ks for ks, layer in zip(kernel_sizes, layers) for _ in range(layer)
+        ]
 
         features: list[nn.Module] = []
         inplanes = input_ch / multiplier if multiplier < 1.0 else input_ch
-        first_channel = 32 / multiplier if multiplier < 1.0 or fix_head_stem else 32
+        first_channel = (
+            32 / multiplier if multiplier < 1.0 or fix_head_stem else 32
+        )
         first_channel = make_divisible(
             int(round(first_channel * multiplier)), divisible_value
         )
 
-        in_channels_group = []
-        channels_group = []
+        in_channels_group: list[int] = []
+        channels_group: list[int] = []
 
         features.append(
             ConvModule(
@@ -89,7 +106,7 @@ def __init__(
             )
         )
 
-        for i in range(self.num_convblocks):
+        for i in range(self.n_convblocks):
             inplanes_divisible = make_divisible(
                 int(round(inplanes * multiplier)), divisible_value
             )
@@ -98,7 +115,7 @@ def __init__(
                 channels_group.append(inplanes_divisible)
             else:
                 in_channels_group.append(inplanes_divisible)
-                inplanes += final_ch / (self.num_convblocks - 1 * 1.0)
+                inplanes += final_ch / (self.n_convblocks - 1 * 1.0)
                 inplanes_divisible = make_divisible(
                     int(round(inplanes * multiplier)), divisible_value
                 )
@@ -106,7 +123,12 @@ def __init__(
 
         assert channels_group
         for in_c, c, t, k, s in zip(
-            in_channels_group, channels_group, ts, kernel_sizes, strides, strict=True
+            in_channels_group,
+            channels_group,
+            ts,
+            kernel_sizes,
+            strides,
+            strict=True,
         ):
             features.append(
                 LinearBottleneck(
@@ -115,7 +137,9 @@ def __init__(
             )
 
         pen_channels = (
-            int(1280 * multiplier) if multiplier > 1 and not fix_head_stem else 1280
+            int(1280 * multiplier)
+            if multiplier > 1 and not fix_head_stem
+            else 1280
         )
         features.append(
             ConvModule(
@@ -127,12 +151,12 @@ def __init__(
         )
         self.features = nn.Sequential(*features)
 
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
-        for i, m in enumerate(self.features):
-            x = m(x)
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outs: list[Tensor] = []
+        for i, module in enumerate(self.features):
+            inputs = module(inputs)
             if i in self.out_indices:
-                outs.append(x)
+                outs.append(inputs)
         return outs
 
 
@@ -144,14 +168,12 @@ def __init__(
         t: int,
         kernel_size: int = 3,
         stride: int = 1,
-        **kwargs,
     ):
-        super(LinearBottleneck, self).__init__(**kwargs)
-        self.conv_shortcut = None
+        super().__init__()
         self.use_shortcut = stride == 1 and in_channels <= channels
         self.in_channels = in_channels
         self.out_channels = channels
-        out = []
+        out: list[nn.Module] = []
         if t != 1:
             dw_channels = in_channels * t
             out.append(
@@ -186,12 +208,11 @@ def __init__(
 
         self.out = nn.Sequential(*out)
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         out = self.out(x)
 
         if self.use_shortcut:
-            # this results in a ScatterND node which isn't supported yet in myriad
-            # out[:, 0:self.in_channels] += x
+            # NOTE: this results in a ScatterND node which isn't supported yet in myriad
             a = out[:, : self.in_channels]
             b = x
             a = a + b
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 6ec216fb..aad0b2f2 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -1,23 +1,27 @@
+import inspect
+import logging
 from abc import ABC, abstractmethod
+from contextlib import suppress
 from typing import Generic, TypeVar
 
+from luxonis_ml.data import LabelType
 from luxonis_ml.utils.registry import AutoRegisterMeta
-from pydantic import BaseModel, ValidationError
 from torch import Size, Tensor, nn
+from typeguard import TypeCheckError, check_type
 
-from luxonis_train.utils.general import DatasetMetadata, validate_packet
-from luxonis_train.utils.registry import NODES
-from luxonis_train.utils.types import (
+from luxonis_train.utils import (
     AttachIndexType,
-    FeaturesProtocol,
+    DatasetMetadata,
     IncompatibleException,
-    LabelType,
     Packet,
 )
+from luxonis_train.utils.registry import NODES
 
 ForwardOutputT = TypeVar("ForwardOutputT")
 ForwardInputT = TypeVar("ForwardInputT")
 
+logger = logging.getLogger(__name__)
+
 
 class BaseNode(
     nn.Module,
@@ -38,49 +42,72 @@ class BaseNode(
     of lists of tensors. Each key in the dictionary represents a different output
     from the previous node. Input to the node is a list of L{Packet}s, output is a single L{Packet}.
 
-    Each node can define a list of L{BaseProtocol}s that the inputs must conform to.
-    L{BaseProtocol} is a pydantic model that defines the structure of the input.
-    When the node is called, the inputs are validated against the protocols and
-    then sent to the L{unwrap} method. The C{unwrap} method should return a valid
-    input to the L{forward} method. Outputs of the C{forward} method are then
-    send to L{weap} method, which wraps the output into a C{Packet}, which is the
-    output of the node.
+    When the node is called, the inputs are sent to the L{unwrap} method.
+    The C{unwrap} method should return a valid input to the L{forward} method.
+    Outputs of the C{forward} method are then send to L{wrap} method,
+    which wraps the output into a C{Packet}. The wrapped C{Packet} is the final output of the node.
 
     The L{run} method combines the C{unwrap}, C{forward} and C{wrap} methods
     together with input validation.
 
+    When subclassing, the following methods should be implemented:
+        - L{forward}: Forward pass of the module.
+        - L{unwrap}: Optional. Unwraps the inputs from the input packet.
+            The default implementation expects a single input with `features` key.
+        - L{wrap}: Optional. Wraps the output of the forward pass
+            into a `Packet[Tensor]`. The default implementation expects wraps the output
+            of the forward pass into a packet with either "features" or the task name as the key.
+
+    Additionally, the following class attributes can be defined:
+        - L{attach_index}: Index of previous output that this node attaches to.
+        - L{tasks}: Dictionary of tasks that the node supports.
+
+    Example::
+        class MyNode(BaseNode):
+            # equivalent to `tasks = {LabelType.CLASSIFICATION: "classification"}`
+            tasks = [LabelType.CLASSIFICATION]
+
+            def __init__(self, **kwargs):
+                super().__init__(**kwargs)
+                self.nn = nn.Sequential(
+                    nn.Linear(10, 10),
+                    nn.ReLU(),
+                    nn.Linear(10, 10),
+                )
+
+            # Roughly equivalent to the default implementation
+            def unwrap(self, inputs: list[Packet[Tensor]]) -> Tensor:
+                assert len(inputs) == 1
+                assert "features" in inputs[0]
+                return inputs[0]["features"]
 
-    @type input_shapes: list[Packet[Size]] | None
-    @param input_shapes: List of input shapes for the module.
+            def forward(self, inputs: Tensor) -> Tensor:
+                return self.nn(inputs)
 
-    @type original_in_shape: Size | None
-    @param original_in_shape: Original input shape of the model. Some
-        nodes won't function if not provided.
+            # Roughly equivalent to the default implementation
+            def wrap(output: Tensor) -> Packet[Tensor]:
+                # The key of the main node output have to be the same as the
+                # default task name for it to be automatically recognized
+                # by the attached modules.
+                return {"classification": [output]}
 
-    @type dataset_metadata: L{DatasetMetadata} | None
-    @param dataset_metadata: Metadata of the dataset.
-        Some nodes won't function if not provided.
 
     @type attach_index: AttachIndexType
-    @param attach_index: Index of previous output that this node attaches to.
+    @ivar attach_index: Index of previous output that this node attaches to.
         Can be a single integer to specify a single output, a tuple of
         two or three integers to specify a range of outputs or `"all"` to
         specify all outputs. Defaults to "all". Python indexing conventions apply.
 
-    @type in_protocols: list[type[BaseModel]]
-    @param in_protocols: List of input protocols used to validate inputs to the node.
-        Defaults to [FeaturesProtocol].
-
-    @type n_classes: int | None
-    @param n_classes: Number of classes in the dataset. Provide only
-        in case `dataset_metadata` is not provided. Defaults to None.
-
-    @type in_sizes: Size | list[Size] | None
-    @param in_sizes: List of input sizes for the node.
-        Provide only in case the `input_shapes` were not provided.
+    @type tasks: list[LabelType] | dict[LabelType, str] | None
+    @ivar tasks: Dictionary of tasks that the node supports. Should be defined
+        by the user as a class attribute. The key is the task type and the value
+        is the name of the task. For example:
+        C{{LabelType.CLASSIFICATION: "classification"}}.
+        Only needs to be defined for head nodes.
     """
 
-    attach_index: AttachIndexType = "all"
+    attach_index: AttachIndexType
+    tasks: list[LabelType] | dict[LabelType, str] | None = None
 
     def __init__(
         self,
@@ -88,55 +115,289 @@ def __init__(
         input_shapes: list[Packet[Size]] | None = None,
         original_in_shape: Size | None = None,
         dataset_metadata: DatasetMetadata | None = None,
-        attach_index: AttachIndexType | None = None,
-        in_protocols: list[type[BaseModel]] | None = None,
         n_classes: int | None = None,
+        n_keypoints: int | None = None,
         in_sizes: Size | list[Size] | None = None,
-        task_type: LabelType | None = None,
+        attach_index: AttachIndexType | None = None,
+        _tasks: dict[LabelType, str] | None = None,
     ):
+        """Constructor for the BaseNode.
+
+        @type input_shapes: list[Packet[Size]] | None
+        @param input_shapes: List of input shapes for the module.
+
+        @type original_in_shape: Size | None
+        @param original_in_shape: Original input shape of the model. Some
+            nodes won't function if not provided.
+
+        @type dataset_metadata: L{DatasetMetadata} | None
+        @param dataset_metadata: Metadata of the dataset.
+            Some nodes won't function if not provided.
+
+        @type n_classes: int | None
+        @param n_classes: Number of classes in the dataset. Provide only
+            in case `dataset_metadata` is not provided. Defaults to None.
+
+        @type in_sizes: Size | list[Size] | None
+        @param in_sizes: List of input sizes for the node.
+            Provide only in case the `input_shapes` were not provided.
+
+        @type attach_index: AttachIndexType
+        @param attach_index: Index of previous output that this node attaches to.
+            Can be a single integer to specify a single output, a tuple of
+            two or three integers to specify a range of outputs or `"all"` to
+            specify all outputs. Defaults to "all". Python indexing conventions apply. If provided as a constructor argument, overrides the class attribute.
+
+
+        @type _tasks: dict[LabelType, str] | None
+        @param _tasks: Dictionary of tasks that the node supports. Overrides the
+            class L{tasks} attribute. Shouldn't be provided by the user in most cases.
+        """
         super().__init__()
 
-        self.attach_index = attach_index or self.attach_index
-        self.in_protocols = in_protocols or [FeaturesProtocol]
-        self.task_type = task_type
+        if attach_index is not None:
+            logger.warning(
+                f"Node {self.name} overrides `attach_index` "
+                f"by setting it to '{attach_index}'. "
+                "Make sure this is intended."
+            )
+            self.attach_index = attach_index
+        self._tasks = None
+        if _tasks is not None:
+            self._tasks = _tasks
+        elif self.tasks is not None:
+            self._tasks = self._process_tasks(self.tasks)
+
+        if getattr(self, "attach_index", None) is None:
+            parameters = inspect.signature(self.forward).parameters
+            inputs_forward_type = parameters.get(
+                "inputs", parameters.get("input", parameters.get("x", None))
+            )
+            if (
+                inputs_forward_type is not None
+                and inputs_forward_type.annotation == Tensor
+            ):
+                self.attach_index = -1
+            else:
+                self.attach_index = "all"
 
         self._input_shapes = input_shapes
         self._original_in_shape = original_in_shape
-        if n_classes is not None:
-            if dataset_metadata is not None:
-                raise ValueError("Cannot set both `dataset_metadata` and `n_classes`.")
-            dataset_metadata = DatasetMetadata(n_classes=n_classes)
         self._dataset_metadata = dataset_metadata
+        self._n_classes = n_classes
+        self._n_keypoints = n_keypoints
         self._export = False
         self._epoch = 0
         self._in_sizes = in_sizes
 
-    def _non_set_error(self, name: str) -> ValueError:
-        return ValueError(
-            f"{self.__class__.__name__} is trying to access `{name}`, "
-            "but it was not set during initialization. "
+        self._check_type_overrides()
+
+    @staticmethod
+    def _process_tasks(
+        tasks: dict[LabelType, str] | list[LabelType],
+    ) -> dict[LabelType, str]:
+        if isinstance(tasks, dict):
+            return tasks
+        else:
+            return {task: task.value for task in tasks}
+
+    def _check_type_overrides(self) -> None:
+        properties = []
+        for name, value in inspect.getmembers(self.__class__):
+            if isinstance(value, property):
+                properties.append(name)
+        for name, typ in self.__annotations__.items():
+            if name in properties:
+                with suppress(RuntimeError):
+                    value = getattr(self, name)
+                    try:
+                        check_type(value, typ)
+                    except TypeCheckError as e:
+                        raise IncompatibleException(
+                            f"Node '{self.name}' specifies the type of the property `{name}` as `{typ}`, "
+                            f"but received `{type(value)}`. "
+                            f"This may indicate that the '{self.name}' node is "
+                            "not compatible with its predecessor."
+                        ) from e
+
+    def get_task_name(self, task: LabelType) -> str:
+        """Gets the name of a task for a particular C{LabelType}.
+
+        @type task: LabelType
+        @param task: Task to get the name for.
+        @rtype: str
+        @return: Name of the task.
+        @raises RuntimeError: If the node does not define any tasks.
+        @raises ValueError: If the task is not supported by the node.
+        """
+        if not self._tasks:
+            raise RuntimeError(f"Node '{self.name}' does not define any task.")
+
+        if task not in self._tasks:
+            raise ValueError(
+                f"Node '{self.name}' does not support the '{task.value}' task."
+            )
+        return self._tasks[task]
+
+    @property
+    def name(self) -> str:
+        return self.__class__.__name__
+
+    @property
+    def task(self) -> str:
+        """Getter for the task.
+
+        @type: str
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the node defines more than one task. In
+            that case, use the L{get_task_name} method instead.
+        """
+        if not self._tasks:
+            raise RuntimeError(f"{self.name} does not define any task.")
+
+        if len(self._tasks) > 1:
+            raise ValueError(
+                f"Node {self.name} has multiple tasks defined. "
+                "Use the `get_task_name` method instead."
+            )
+        return next(iter(self._tasks.values()))
+
+    def get_n_classes(self, task: LabelType) -> int:
+        """Gets the number of classes for a particular task.
+
+        @type task: LabelType
+        @param task: Task to get the number of classes for.
+        @rtype: int
+        @return: Number of classes for the task.
+        """
+        return self.dataset_metadata.n_classes(self.get_task_name(task))
+
+    def get_class_names(self, task: LabelType) -> list[str]:
+        """Gets the class names for a particular task.
+
+        @type task: LabelType
+        @param task: Task to get the class names for.
+        @rtype: list[str]
+        @return: Class names for the task.
+        """
+        return self.dataset_metadata.classes(self.get_task_name(task))
+
+    @property
+    def n_keypoints(self) -> int:
+        """Getter for the number of keypoints.
+
+        @type: int
+        @raises ValueError: If the node does not support keypoints.
+        @raises RuntimeError: If the node doesn't define any task.
+        """
+        if self._n_keypoints is not None:
+            return self._n_keypoints
+
+        if self._tasks:
+            if LabelType.KEYPOINTS not in self._tasks:
+                raise ValueError(f"{self.name} does not support keypoints.")
+            return self.dataset_metadata.n_keypoints(
+                self.get_task_name(LabelType.KEYPOINTS)
+            )
+
+        raise RuntimeError(
+            f"{self.name} does not have any tasks defined, "
+            "`BaseNode.n_keypoints` property cannot be used. "
+            "Either override the `tasks` class attribute, "
+            "pass the `n_keypoints` attribute to the constructor or call "
+            "the `BaseNode.dataset_metadata.get_n_keypoints` method manually."
         )
 
     @property
     def n_classes(self) -> int:
-        """Getter for the number of classes."""
-        return self.dataset_metadata.n_classes(self.task_type)
+        """Getter for the number of classes.
+
+        @type: int
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the number of classes is different for
+            different tasks. In that case, use the L{get_n_classes}
+            method.
+        """
+        if self._n_classes is not None:
+            return self._n_classes
+
+        if not self._tasks:
+            raise RuntimeError(
+                f"{self.name} does not have any tasks defined, "
+                "`BaseNode.n_classes` property cannot be used. "
+                "Either override the `tasks` class attribute, "
+                "pass the `n_classes` attribute to the constructor or call "
+                "the `BaseNode.dataset_metadata.n_classes` method manually."
+            )
+        elif len(self._tasks) == 1:
+            return self.dataset_metadata.n_classes(self.task)
+        else:
+            n_classes = [
+                self.dataset_metadata.n_classes(self.get_task_name(task))
+                for task in self._tasks
+            ]
+            if len(set(n_classes)) == 1:
+                return n_classes[0]
+            raise ValueError(
+                "Node defines multiple tasks but they have different number of classes. "
+                "This is likely an error, as the number of classes should be the same."
+                "If it is intended, use `BaseNode.get_n_classes` instead."
+            )
 
     @property
     def class_names(self) -> list[str]:
-        """Getter for the class names."""
-        return self.dataset_metadata.class_names(self.task_type)
+        """Getter for the class names.
+
+        @type: list[str]
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the class names are different for
+            different tasks. In that case, use the L{get_class_names}
+            method.
+        """
+        if not self._tasks:
+            raise RuntimeError(
+                f"{self.name} does not have any tasks defined, "
+                "`BaseNode.class_names` property cannot be used. "
+                "Either override the `tasks` class attribute, "
+                "pass the `n_classes` attribute to the constructor or call "
+                "the `BaseNode.dataset_metadata.class_names` method manually."
+            )
+        elif len(self._tasks) == 1:
+            return self.dataset_metadata.classes(self.task)
+        else:
+            class_names = [
+                self.dataset_metadata.classes(self.get_task_name(task))
+                for task in self._tasks
+            ]
+            if all(set(names) == set(class_names[0]) for names in class_names):
+                return class_names[0]
+            raise ValueError(
+                "Node defines multiple tasks but they have different class names. "
+                "This is likely an error, as the class names should be the same. "
+                "If it is intended, use `BaseNode.get_class_names` instead."
+            )
 
     @property
     def input_shapes(self) -> list[Packet[Size]]:
-        """Getter for the input shapes."""
+        """Getter for the input shapes.
+
+        @type: list[Packet[Size]]
+        @raises RuntimeError: If the C{input_shapes} were not set during
+            initialization.
+        """
+
         if self._input_shapes is None:
             raise self._non_set_error("input_shapes")
         return self._input_shapes
 
     @property
     def original_in_shape(self) -> Size:
-        """Getter for the original input shape."""
+        """Getter for the original input shape as [N, H, W].
+
+        @type: Size
+        @raises RuntimeError: If the C{original_in_shape} were not set
+            during initialization.
+        """
         if self._original_in_shape is None:
             raise self._non_set_error("original_in_shape")
         return self._original_in_shape
@@ -146,10 +407,11 @@ def dataset_metadata(self) -> DatasetMetadata:
         """Getter for the dataset metadata.
 
         @type: L{DatasetMetadata}
-        @raises ValueError: If the C{dataset_metadata} is C{None}.
+        @raises RuntimeError: If the C{dataset_metadata} were not set
+            during initialization.
         """
         if self._dataset_metadata is None:
-            raise ValueError(
+            raise RuntimeError(
                 f"{self._non_set_error('dataset_metadata')}"
                 "Either provide `dataset_metadata` or `n_classes`."
             )
@@ -165,18 +427,18 @@ def in_sizes(self) -> Size | list[Size]:
         In case `in_sizes` were provided during initialization, they are returned
         directly.
 
-        Example:
+        Example::
 
-            >>> input_shapes = [{"features": [Size(1, 64, 128, 128), Size(1, 3, 224, 224)]}]
+            >>> input_shapes = [{"features": [Size(64, 128, 128), Size(3, 224, 224)]}]
             >>> attach_index = -1
-            >>> in_sizes = Size(1, 3, 224, 224)
+            >>> in_sizes = Size(3, 224, 224)
 
-            >>> input_shapes = [{"features": [Size(1, 64, 128, 128), Size(1, 3, 224, 224)]}]
+            >>> input_shapes = [{"features": [Size(64, 128, 128), Size(3, 224, 224)]}]
             >>> attach_index = "all"
-            >>> in_sizes = [Size(1, 64, 128, 128), Size(1, 3, 224, 224)]
+            >>> in_sizes = [Size(64, 128, 128), Size(3, 224, 224)]
 
         @type: Size | list[Size]
-        @raises IncompatibleException: If the C{input_shapes} are too complicated for
+        @raises RuntimeError: If the C{input_shapes} are too complicated for
             the default implementation.
         """
         if self._in_sizes is not None:
@@ -184,29 +446,27 @@ def in_sizes(self) -> Size | list[Size]:
 
         features = self.input_shapes[0].get("features")
         if features is None:
-            raise IncompatibleException(
-                f"Feature field is missing in {self.__class__.__name__}. "
+            raise RuntimeError(
+                f"Feature field is missing in {self.name}. "
                 "The default implementation of `in_sizes` cannot be used."
             )
-        shapes = self.get_attached(self.input_shapes[0]["features"])
-        if isinstance(shapes, list) and len(shapes) == 1:
-            return shapes[0]
-        return shapes
+        return self.get_attached(self.input_shapes[0]["features"])
 
     @property
     def in_channels(self) -> int | list[int]:
         """Simplified getter for the number of input channels.
 
-        Should work out of the box for most cases where the C{input_shapes} are
-        sufficiently simple. Otherwise the C{input_shapes} should be used directly. If
-        C{attach_index} is set to "all" or is a slice, returns a list of input channels,
+        Should work out of the box for most cases where the
+        C{input_shapes} are sufficiently simple. Otherwise the
+        C{input_shapes} should be used directly. If C{attach_index} is
+        set to "all" or is a slice, returns a list of input channels,
         otherwise returns a single value.
 
         @type: int | list[int]
-        @raises IncompatibleException: If the C{input_shapes} are too complicated for
-            the default implementation.
+        @raises RuntimeError: If the C{input_shapes} are too complicated
+            for the default implementation of C{in_sizes}.
         """
-        return self._get_nth_size(1)
+        return self._get_nth_size(-3)
 
     @property
     def in_height(self) -> int | list[int]:
@@ -216,10 +476,10 @@ def in_height(self) -> int | list[int]:
         sufficiently simple. Otherwise the `input_shapes` should be used directly.
 
         @type: int | list[int]
-        @raises IncompatibleException: If the C{input_shapes} are too complicated for
-            the default implementation.
+        @raises RuntimeError: If the C{input_shapes} are too complicated for
+            the default implementation of C{in_sizes}.
         """
-        return self._get_nth_size(2)
+        return self._get_nth_size(-2)
 
     @property
     def in_width(self) -> int | list[int]:
@@ -229,10 +489,10 @@ def in_width(self) -> int | list[int]:
         sufficiently simple. Otherwise the `input_shapes` should be used directly.
 
         @type: int | list[int]
-        @raises IncompatibleException: If the C{input_shapes} are too complicated for
-            the default implementation.
+        @raises RuntimeError: If the C{input_shapes} are too complicated for
+            the default implementation of C{in_sizes}.
         """
-        return self._get_nth_size(3)
+        return self._get_nth_size(-1)
 
     @property
     def export(self) -> bool:
@@ -250,29 +510,37 @@ def set_export_mode(self, mode: bool = True) -> None:
     def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT:
         """Prepares inputs for the forward pass.
 
-        Unwraps the inputs from the C{list[Packet[Tensor]]} input so they can be passed
-        to the forward call. The default implementation expects a single input with
-        C{features} key and returns the tensor or tensors at the C{attach_index}
-        position.
+        Unwraps the inputs from the C{list[Packet[Tensor]]} input so
+        they can be passed to the forward call. The default
+        implementation expects a single input with C{features} key and
+        returns the tensor or tensors at the C{attach_index} position.
 
-        For most cases the default implementation should be sufficient. Exceptions are
-        modules with multiple inputs or producing more complex outputs. This is
-        typically the case for output nodes.
+        For most cases the default implementation should be sufficient.
+        Exceptions are modules with multiple inputs or producing more
+        complex outputs. This is typically the case for output nodes.
 
         @type inputs: list[Packet[Tensor]]
         @param inputs: Inputs to the node.
         @rtype: ForwardInputT
-        @return: Prepared inputs, ready to be passed to the L{forward} method.
+        @return: Prepared inputs, ready to be passed to the L{forward}
+            method.
+        @raises ValueError: If the number of inputs is not equal to 1.
+            In such cases the method has to be overridden.
         """
+        if len(inputs) > 1:
+            raise ValueError(
+                f"Node {self.name} expects a single input, but got {len(inputs)} inputs instead. "
+                "If the node expects multiple inputs, the `unwrap` method should be overridden."
+            )
         return self.get_attached(inputs[0]["features"])  # type: ignore
 
     @abstractmethod
     def forward(self, inputs: ForwardInputT) -> ForwardOutputT:
         """Forward pass of the module.
 
-        @type inputs: ForwardInputT
+        @type inputs: L{ForwardInputT}
         @param inputs: Inputs to the module.
-        @rtype: ForwardOutputT
+        @rtype: L{ForwardOutputT}
         @return: Result of the forward pass.
         """
         ...
@@ -281,30 +549,53 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]:
         """Wraps the output of the forward pass into a `Packet[Tensor]`.
 
         The default implementation expects a single tensor or a list of tensors
-        and wraps them into a Packet with `features` key.
+        and wraps them into a Packet with either the node task as a key
+        or "features" key if task is not defined.
+
+        Example::
+
+            >>> class FooNode(BaseNode):
+            ...     tasks = [LabelType.CLASSIFICATION]
+            ...
+            ... class BarNode(BaseNode):
+            ...     pass
+            ...
+            >>> node = FooNode()
+            >>> node.wrap(torch.rand(1, 10))
+            {"classification": [Tensor(1, 10)]}
+            >>> node = BarNode()
+            >>> node.wrap([torch.rand(1, 10), torch.rand(1, 10)])
+            {"features": [Tensor(1, 10), Tensor(1, 10)]}
 
         @type output: ForwardOutputT
         @param output: Output of the forward pass.
 
         @rtype: L{Packet}[Tensor]
         @return: Wrapped output.
+
+        @raises ValueError: If the C{output} argument is not a tensor or a list of tensors.
+            In such cases the L{wrap} method should be overridden.
         """
 
-        match output:
-            case Tensor(data=out):
-                outputs = [out]
-            case list(tensors) if all(isinstance(t, Tensor) for t in tensors):
-                outputs = tensors
-            case _:
-                raise IncompatibleException(
-                    "Default `wrap` expects a single tensor or a list of tensors."
-                )
-        return {"features": outputs}
+        if isinstance(output, Tensor):
+            outputs = [output]
+        elif isinstance(output, (list, tuple)) and all(
+            isinstance(t, Tensor) for t in output
+        ):
+            outputs = list(output)
+        else:
+            raise ValueError(
+                "Default `wrap` expects a single tensor or a list of tensors."
+            )
+        try:
+            task = self.task
+        except RuntimeError:
+            task = "features"
+        return {task: outputs}
 
     def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]:
-        """Combines the forward pass with the wrapping and unwrapping of the inputs.
-
-        Additionally validates the inputs against `in_protocols`.
+        """Combines the forward pass with the wrapping and unwrapping of
+        the inputs.
 
         @type inputs: list[Packet[Tensor]]
         @param inputs: Inputs to the module.
@@ -313,46 +604,33 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]:
         @return: Outputs of the module as a dictionary of list of tensors:
             `{"features": [Tensor, ...], "segmentation": [Tensor]}`
 
-        @raises IncompatibleException: If the inputs are not compatible with the node.
+        @raises RuntimeError: If default L{wrap} or L{unwrap} methods are not sufficient.
         """
-        unwrapped = self.unwrap(self.validate(inputs))
+        unwrapped = self.unwrap(inputs)
         outputs = self(unwrapped)
-        return self.wrap(outputs)
-
-    def validate(self, data: list[Packet[Tensor]]) -> list[Packet[Tensor]]:
-        """Validates the inputs against `in_protocols`."""
-        if len(data) != len(self.in_protocols):
-            raise IncompatibleException(
-                f"Node {self.__class__.__name__} expects {len(self.in_protocols)} inputs, "
-                f"but got {len(data)} inputs instead."
-            )
-        try:
-            return [
-                validate_packet(d, protocol)
-                for d, protocol in zip(data, self.in_protocols)
-            ]
-        except ValidationError as e:
-            raise IncompatibleException.from_validation_error(
-                e, self.__class__.__name__
-            ) from e
+        wrapped = self.wrap(outputs)
+        str_tasks = [task.value for task in self._tasks] if self._tasks else []
+        for key in list(wrapped.keys()):
+            if key in str_tasks:
+                value = wrapped.pop(key)
+                wrapped[self.get_task_name(LabelType(key))] = value
+        return wrapped
 
     T = TypeVar("T", Tensor, Size)
 
     def get_attached(self, lst: list[T]) -> list[T] | T:
         """Gets the attached elements from a list.
 
-        This method is used to get the attached elements from a list based on
-        the `attach_index` attribute.
+        This method is used to get the attached elements from a list
+        based on the C{attach_index} attribute.
 
         @type lst: list[T]
-        @param lst: List to get the attached elements from. Can be either
-            a list of tensors or a list of sizes.
-
+        @param lst: List to get the attached elements from. Can be
+            either a list of tensors or a list of sizes.
         @rtype: list[T] | T
-        @return: Attached elements. If `attach_index` is set to `"all"` or is a slice,
-            returns a list of attached elements.
-
-        @raises ValueError: If the `attach_index` is invalid.
+        @return: Attached elements. If C{attach_index} is set to
+            C{"all"} or is a slice, returns a list of attached elements.
+        @raises ValueError: If the C{attach_index} is invalid.
         """
 
         def _normalize_index(index: int) -> int:
@@ -386,7 +664,9 @@ def _normalize_slice(i: int, j: int) -> slice:
             case (int(i), int(j), int(k)):
                 return lst[i:j:k]
             case _:
-                raise ValueError(f"Invalid attach index: `{self.attach_index}`")
+                raise ValueError(
+                    f"Invalid attach index: `{self.attach_index}`"
+                )
 
     def _get_nth_size(self, idx: int) -> int | list[int]:
         match self.in_sizes:
@@ -394,3 +674,9 @@ def _get_nth_size(self, idx: int) -> int | list[int]:
                 return sizes[idx]
             case list(sizes):
                 return [size[idx] for size in sizes]
+
+    def _non_set_error(self, name: str) -> RuntimeError:
+        return RuntimeError(
+            f"'{self.name}' node is trying to access `{name}`, "
+            "but it was not set during initialization. "
+        )
diff --git a/luxonis_train/nodes/bisenet_head.py b/luxonis_train/nodes/bisenet_head.py
deleted file mode 100644
index 99845177..00000000
--- a/luxonis_train/nodes/bisenet_head.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""BiSeNet segmentation head.
-
-Adapted from U{https://github.com/taveraantonio/BiseNetv1}.
-License: NOT SPECIFIED.
-"""
-
-
-from torch import Tensor, nn
-
-from luxonis_train.nodes.blocks import ConvModule
-from luxonis_train.utils.general import infer_upscale_factor
-from luxonis_train.utils.types import LabelType, Packet
-
-from .base_node import BaseNode
-
-
-class BiSeNetHead(BaseNode[Tensor, Tensor]):
-    attach_index: int = -1
-    in_height: int
-    in_channels: int
-
-    def __init__(
-        self,
-        intermediate_channels: int = 64,
-        **kwargs,
-    ):
-        """BiSeNet segmentation head.
-        TODO: Add more documentation.
-
-        @type intermediate_channels: int
-        @param intermediate_channels: How many intermediate channels to use.
-            Defaults to C{64}.
-        """
-        super().__init__(task_type=LabelType.SEGMENTATION, **kwargs)
-
-        original_height = self.original_in_shape[2]
-        upscale_factor = 2 ** infer_upscale_factor(self.in_height, original_height)
-        out_channels = self.n_classes * upscale_factor * upscale_factor
-
-        self.conv_3x3 = ConvModule(self.in_channels, intermediate_channels, 3, 1, 1)
-        self.conv_1x1 = nn.Conv2d(intermediate_channels, out_channels, 1, 1, 0)
-        self.upscale = nn.PixelShuffle(upscale_factor)
-
-    def wrap(self, output: Tensor) -> Packet[Tensor]:
-        return {"segmentation": [output]}
-
-    def forward(self, inputs: Tensor) -> Tensor:
-        inputs = self.conv_3x3(inputs)
-        inputs = self.conv_1x1(inputs)
-        return self.upscale(inputs)
diff --git a/luxonis_train/nodes/blocks/__init__.py b/luxonis_train/nodes/blocks/__init__.py
index a87c336e..2e11c52d 100644
--- a/luxonis_train/nodes/blocks/__init__.py
+++ b/luxonis_train/nodes/blocks/__init__.py
@@ -1,8 +1,12 @@
 from .blocks import (
     AttentionRefinmentBlock,
+    BasicResNetBlock,
     BlockRepeater,
+    Bottleneck,
     ConvModule,
+    DropPath,
     EfficientDecoupledBlock,
+    EfficientOBBDecoupledBlock,
     FeatureFusionBlock,
     KeypointBlock,
     LearnableAdd,
@@ -14,12 +18,14 @@
     SpatialPyramidPoolingBlock,
     SqueezeExciteBlock,
     UpBlock,
+    UpscaleOnline,
     autopad,
 )
 
 __all__ = [
     "autopad",
     "EfficientDecoupledBlock",
+    "EfficientOBBDecoupledBlock",
     "ConvModule",
     "UpBlock",
     "RepDownBlock",
@@ -34,4 +40,8 @@
     "LearnableMulAddConv",
     "KeypointBlock",
     "RepUpBlock",
+    "BasicResNetBlock",
+    "Bottleneck",
+    "UpscaleOnline",
+    "DropPath",
 ]
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index f4bd0172..4b96a709 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -1,11 +1,9 @@
-# TODO:  cleanup, document
-# Check if some blocks could be merged togetner.
-
 import math
 from typing import TypeVar
 
 import numpy as np
 import torch
+import torch.nn.functional as F
 from torch import Tensor, nn
 
 from luxonis_train.nodes.activations import HSigmoid
@@ -13,7 +11,8 @@
 
 class EfficientDecoupledBlock(nn.Module):
     def __init__(self, n_classes: int, in_channels: int):
-        """Efficient Decoupled block used for class and regression predictions.
+        """Efficient Decoupled block used for class and regression
+        predictions.
 
         @type n_classes: int
         @param n_classes: Number of classes.
@@ -39,7 +38,9 @@ def __init__(self, n_classes: int, in_channels: int):
                 padding=1,
                 activation=nn.SiLU(),
             ),
-            nn.Conv2d(in_channels=in_channels, out_channels=n_classes, kernel_size=1),
+            nn.Conv2d(
+                in_channels=in_channels, out_channels=n_classes, kernel_size=1
+            ),
         )
         self.regression_branch = nn.Sequential(
             ConvModule(
@@ -64,7 +65,7 @@ def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]:
 
         return out_feature, out_cls, out_reg
 
-    def _initialize_weights_and_biases(self, prior_prob: float):
+    def _initialize_weights_and_biases(self, prior_prob: float) -> None:
         data = [
             (self.class_branch[-1], -math.log((1 - prior_prob) / prior_prob)),
             (self.regression_branch[-1], 1.0),
@@ -80,6 +81,59 @@ def _initialize_weights_and_biases(self, prior_prob: float):
             module.weight = nn.Parameter(w, requires_grad=True)
 
 
+class EfficientOBBDecoupledBlock(EfficientDecoupledBlock):
+    def __init__(self, n_classes: int, in_channels: int, reg_max: int = 16):
+        """Efficient Decoupled block used for angle, class and
+        regression predictions in OBB (oriented bounding box) tasks.
+
+        @type n_classes: int
+        @param n_classes: Number of classes.
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type reg_max: int
+        @param reg_max: Number of bins for predicting the distributions
+            of bounding box coordinates.
+        """
+        super().__init__(n_classes, in_channels)
+
+        self.regression_branch = nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                activation=nn.SiLU(),
+            ),
+            nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=4 * reg_max,
+                kernel_size=1,
+            ),
+        )
+
+        self.angle_branch = nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                activation=nn.SiLU(),
+            ),
+            nn.Conv2d(in_channels=in_channels, out_channels=1, kernel_size=1),
+        )
+
+    def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor, Tensor]:
+        out_feature = self.decoder(x)
+
+        out_cls = self.class_branch(out_feature)
+        out_reg = self.regression_branch(out_feature)
+        out_angle = self.angle_branch(out_feature)
+
+        return out_feature, out_cls, out_reg, out_angle
+
+
 class ConvModule(nn.Sequential):
     def __init__(
         self,
@@ -152,7 +206,10 @@ def __init__(
 
         super().__init__(
             nn.ConvTranspose2d(
-                in_channels, out_channels, kernel_size=kernel_size, stride=stride
+                in_channels,
+                out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
             ),
             ConvModule(out_channels, out_channels, kernel_size=3, padding=1),
         )
@@ -216,10 +273,7 @@ def __init__(
         kernel_size: int = 3,
         stride: int = 1,
         padding: int = 1,
-        dilation: int = 1,
         groups: int = 1,
-        padding_mode: str = "zeros",
-        deploy: bool = False,
         use_se: bool = False,
     ):
         """RepVGGBlock is a basic rep-style block, including training and deploy status
@@ -249,7 +303,6 @@ def __init__(
         """
         super().__init__()
 
-        self.deploy = deploy
         self.groups = groups
         self.in_channels = in_channels
         self.out_channels = out_channels
@@ -262,51 +315,39 @@ def __init__(
         self.nonlinearity = nn.ReLU()
 
         if use_se:
-            #   Note that RepVGG-D2se uses SE before nonlinearity. But RepVGGplus models uses SqueezeExciteBlock after nonlinearity.
+            # NOTE: that RepVGG-D2se uses SE before nonlinearity.
+            # But RepVGGplus models uses SqueezeExciteBlock after nonlinearity.
             self.se = SqueezeExciteBlock(
                 out_channels, intermediate_channels=int(out_channels // 16)
             )
         else:
-            self.se = nn.Identity()  # type: ignore
+            self.se = nn.Identity()
 
-        if deploy:
-            self.rbr_reparam = nn.Conv2d(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                bias=True,
-                padding_mode=padding_mode,
-            )
-        else:
-            self.rbr_identity = (
-                nn.BatchNorm2d(num_features=in_channels)
-                if out_channels == in_channels and stride == 1
-                else None
-            )
-            self.rbr_dense = ConvModule(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=padding,
-                groups=groups,
-                activation=nn.Identity(),
-            )
-            self.rbr_1x1 = ConvModule(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=1,
-                stride=stride,
-                padding=padding_11,
-                groups=groups,
-                activation=nn.Identity(),
-            )
+        self.rbr_identity = (
+            nn.BatchNorm2d(num_features=in_channels)
+            if out_channels == in_channels and stride == 1
+            else None
+        )
+        self.rbr_dense = ConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            activation=nn.Identity(),
+        )
+        self.rbr_1x1 = ConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=stride,
+            padding=padding_11,
+            groups=groups,
+            activation=nn.Identity(),
+        )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         if hasattr(self, "rbr_reparam"):
             return self.nonlinearity(self.se(self.rbr_reparam(x)))
 
@@ -315,11 +356,14 @@ def forward(self, x: Tensor):
         else:
             id_out = self.rbr_identity(x)
 
-        return self.nonlinearity(self.se(self.rbr_dense(x) + self.rbr_1x1(x) + id_out))
+        return self.nonlinearity(
+            self.se(self.rbr_dense(x) + self.rbr_1x1(x) + id_out)
+        )
 
-    def reparametrize(self):
+    def reparametrize(self) -> None:
         if hasattr(self, "rbr_reparam"):
             return
+
         kernel, bias = self._get_equivalent_kernel_bias()
         self.rbr_reparam = nn.Conv2d(
             in_channels=self.rbr_dense[0].in_channels,
@@ -333,15 +377,16 @@ def reparametrize(self):
         )
         self.rbr_reparam.weight.data = kernel  # type: ignore
         self.rbr_reparam.bias.data = bias  # type: ignore
-        self.__delattr__("rbr_dense")
-        self.__delattr__("rbr_1x1")
+        del self.rbr_dense
+        del self.rbr_1x1
         if hasattr(self, "rbr_identity"):
-            self.__delattr__("rbr_identity")
+            del self.rbr_identity
         if hasattr(self, "id_tensor"):
-            self.__delattr__("id_tensor")
+            del self.id_tensor
 
-    def _get_equivalent_kernel_bias(self):
-        """Derives the equivalent kernel and bias in a DIFFERENTIABLE way."""
+    def _get_equivalent_kernel_bias(self) -> tuple[Tensor, Tensor]:
+        """Derives the equivalent kernel and bias in a DIFFERENTIABLE
+        way."""
         kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
         kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
         kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
@@ -358,7 +403,9 @@ def _pad_1x1_to_3x3_tensor(self, kernel1x1: Tensor | None) -> Tensor:
         else:
             return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
 
-    def _fuse_bn_tensor(self, branch: nn.Module | None) -> tuple[Tensor, Tensor]:
+    def _fuse_bn_tensor(
+        self, branch: nn.Module | None
+    ) -> tuple[Tensor, Tensor]:
         if branch is None:
             return torch.tensor(0), torch.tensor(0)
         if isinstance(branch, nn.Sequential):
@@ -396,11 +443,11 @@ def __init__(
         block: type[nn.Module],
         in_channels: int,
         out_channels: int,
-        num_blocks: int = 1,
+        n_blocks: int = 1,
     ):
-        """Module which repeats the block n times. First block accepts in_channels and
-        outputs out_channels while subsequent blocks accept out_channels and output
-        out_channels.
+        """Module which repeats the block n times. First block accepts
+        in_channels and outputs out_channels while subsequent blocks
+        accept out_channels and output out_channels.
 
         @type block: L{nn.Module}
         @param block: Block to repeat.
@@ -408,28 +455,31 @@ def __init__(
         @param in_channels: Number of input channels.
         @type out_channels: int
         @param out_channels: Number of output channels.
-        @type num_blocks: int
-        @param num_blocks: Number of blocks to repeat. Defaults to C{1}.
+        @type n_blocks: int
+        @param n_blocks: Number of blocks to repeat. Defaults to C{1}.
         """
         super().__init__()
 
         in_channels = in_channels
         self.blocks = nn.ModuleList()
-        for _ in range(num_blocks):
+        for _ in range(n_blocks):
             self.blocks.append(
                 block(in_channels=in_channels, out_channels=out_channels)
             )
             in_channels = out_channels
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         for block in self.blocks:
             x = block(x)
         return x
 
 
 class SpatialPyramidPoolingBlock(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 5):
-        """Spatial Pyramid Pooling block with ReLU activation on three different scales.
+    def __init__(
+        self, in_channels: int, out_channels: int, kernel_size: int = 5
+    ):
+        """Spatial Pyramid Pooling block with ReLU activation on three
+        different scales.
 
         @type in_channels: int
         @param in_channels: Number of input channels.
@@ -447,7 +497,7 @@ def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 5):
             kernel_size=kernel_size, stride=1, padding=kernel_size // 2
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         x = self.conv1(x)
         # apply max-pooling at three different scales
         y1 = self.max_pool(x)
@@ -483,7 +533,7 @@ def __init__(self, in_channels: int, out_channels: int):
             nn.Sigmoid(),
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         x = self.conv_3x3(x)
         attention = self.attention(x)
         out = x * attention
@@ -491,7 +541,9 @@ def forward(self, x):
 
 
 class FeatureFusionBlock(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int, reduction: int = 1):
+    def __init__(
+        self, in_channels: int, out_channels: int, reduction: int = 1
+    ):
         """Feature Fusion block adapted from: U{https://github.com/taveraantonio/BiseNetv1}.
 
         @type in_channels: int
@@ -521,7 +573,7 @@ def __init__(self, in_channels: int, out_channels: int, reduction: int = 1):
             nn.Sigmoid(),
         )
 
-    def forward(self, x1, x2):
+    def forward(self, x1: Tensor, x2: Tensor) -> Tensor:
         fusion = torch.cat([x1, x2], dim=1)
         x = self.conv_1x1(fusion)
         attention = self.attention(x)
@@ -538,7 +590,7 @@ def __init__(self, channel: int):
         self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
         nn.init.normal_(self.implicit, std=0.02)
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.implicit.expand_as(x) + x
 
 
@@ -551,7 +603,7 @@ def __init__(self, channel: int):
         self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1))
         nn.init.normal_(self.implicit, mean=1.0, std=0.02)
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.implicit.expand_as(x) * x
 
 
@@ -604,7 +656,7 @@ def __init__(self, in_channels: int, out_channels: int):
 
         self.block = nn.Sequential(*layers)
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         out = self.block(x)
         return out
 
@@ -615,19 +667,19 @@ def __init__(
         in_channels: int,
         in_channels_next: int,
         out_channels: int,
-        num_repeats: int,
+        n_repeats: int,
     ):
         """UpBlock used in RepPAN neck.
 
         @type in_channels: int
         @param in_channels: Number of input channels.
         @type in_channels_next: int
-        @param in_channels_next: Number of input channels of next input which is used in
-            concat.
+        @param in_channels_next: Number of input channels of next input
+            which is used in concat.
         @type out_channels: int
         @param out_channels: Number of output channels.
-        @type num_repeats: int
-        @param num_repeats: Number of RepVGGBlock repeats.
+        @type n_repeats: int
+        @param n_repeats: Number of RepVGGBlock repeats.
         """
 
         super().__init__()
@@ -649,7 +701,7 @@ def __init__(
             block=RepVGGBlock,
             in_channels=in_channels_next + out_channels,
             out_channels=out_channels,
-            num_blocks=num_repeats,
+            n_blocks=n_repeats,
         )
 
     def forward(self, x0: Tensor, x1: Tensor) -> tuple[Tensor, Tensor]:
@@ -667,21 +719,22 @@ def __init__(
         downsample_out_channels: int,
         in_channels_next: int,
         out_channels: int,
-        num_repeats: int,
+        n_repeats: int,
     ):
         """DownBlock used in RepPAN neck.
 
         @type in_channels: int
         @param in_channels: Number of input channels.
         @type downsample_out_channels: int
-        @param downsample_out_channels: Number of output channels after downsample.
+        @param downsample_out_channels: Number of output channels after
+            downsample.
         @type in_channels_next: int
-        @param in_channels_next: Number of input channels of next input which is used in
-            concat.
+        @param in_channels_next: Number of input channels of next input
+            which is used in concat.
         @type out_channels: int
         @param out_channels: Number of output channels.
-        @type num_repeats: int
-        @param num_repeats: Number of RepVGGBlock repeats.
+        @type n_repeats: int
+        @param n_repeats: Number of RepVGGBlock repeats.
         """
         super().__init__()
 
@@ -696,7 +749,7 @@ def __init__(
             block=RepVGGBlock,
             in_channels=downsample_out_channels + in_channels_next,
             out_channels=out_channels,
-            num_blocks=num_repeats,
+            n_blocks=n_repeats,
         )
 
     def forward(self, x0: Tensor, x1: Tensor) -> Tensor:
@@ -726,3 +779,238 @@ def autopad(kernel_size: T, padding: T | None = None) -> T:
     if isinstance(kernel_size, int):
         return kernel_size // 2
     return tuple(x // 2 for x in kernel_size)
+
+
+class BasicResNetBlock(nn.Module):
+    def __init__(
+        self,
+        in_planes: int,
+        planes: int,
+        stride: int = 1,
+        expansion: int = 1,
+        final_relu: bool = True,
+        droppath_prob: float = 0.0,
+    ):
+        """A basic residual block for ResNet.
+
+        @type in_planes: int
+        @param in_planes: Number of input channels.
+        @type planes: int
+        @param planes: Number of output channels.
+        @type stride: int
+        @param stride: Stride for the convolutional layers. Defaults to 1.
+        @type expansion: int
+        @param expansion: Expansion factor for the output channels. Defaults to 1.
+        @type final_relu: bool
+        @param final_relu: Whether to apply a ReLU activation after the residual
+            addition. Defaults to True.
+        @type droppath_prob: float
+        @param droppath_prob: Drop path probability for stochastic depth. Defaults to
+            0.0.
+        """
+        super().__init__()
+        self.expansion = expansion
+        self.conv1 = nn.Conv2d(
+            in_planes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
+        )
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=1, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.final_relu = final_relu
+
+        self.drop_path = DropPath(drop_prob=droppath_prob)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_planes,
+                    self.expansion * planes,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(self.expansion * planes),
+            )
+
+    def forward(self, x: Tensor) -> Tensor:
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out = self.drop_path(out)
+        out += self.shortcut(x)
+        if self.final_relu:
+            out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    def __init__(
+        self,
+        in_planes: int,
+        planes: int,
+        stride: int = 1,
+        expansion: int = 4,
+        final_relu: bool = True,
+        droppath_prob: float = 0.0,
+    ):
+        """A bottleneck block for ResNet.
+
+        @type in_planes: int
+        @param in_planes: Number of input channels.
+        @type planes: int
+        @param planes: Number of intermediate channels.
+        @type stride: int
+        @param stride: Stride for the second convolutional layer. Defaults to 1.
+        @type expansion: int
+        @param expansion: Expansion factor for the output channels. Defaults to 4.
+        @type final_relu: bool
+        @param final_relu: Whether to apply a ReLU activation after the residual
+            addition. Defaults to True.
+        @type droppath_prob: float
+        @param droppath_prob: Drop path probability for stochastic depth. Defaults to
+            0.0.
+        """
+        super().__init__()
+        self.expansion = expansion
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(
+            planes, self.expansion * planes, kernel_size=1, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(self.expansion * planes)
+        self.final_relu = final_relu
+
+        self.drop_path = DropPath(drop_prob=droppath_prob)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_planes,
+                    self.expansion * planes,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(self.expansion * planes),
+            )
+
+    def forward(self, x: Tensor) -> Tensor:
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+
+        out = self.drop_path(out)
+        out += self.shortcut(x)
+
+        if self.final_relu:
+            out = F.relu(out)
+
+        return out
+
+
+class UpscaleOnline(nn.Module):
+    """Upscale tensor to a specified size during the forward pass.
+
+    This class supports cases where the required scale/size is only
+    known when the input is received. Only the interpolation mode is set
+    in advance.
+    """
+
+    def __init__(self, mode: str = "bilinear"):
+        """Initialize UpscaleOnline with the interpolation mode.
+
+        @type mode: str
+        @param mode: Interpolation mode for resizing. Defaults to
+            "bilinear".
+        """
+        super().__init__()
+        self.mode = mode
+
+    def forward(
+        self, x: Tensor, output_height: int, output_width: int
+    ) -> Tensor:
+        """Upscale the input tensor to the specified height and width.
+
+        @type x: Tensor
+        @param x: Input tensor to be upscaled.
+        @type output_height: int
+        @param output_height: Desired height of the output tensor.
+        @type output_width: int
+        @param output_width: Desired width of the output tensor.
+        @return: Upscaled tensor.
+        """
+        return F.interpolate(
+            x, size=[output_height, output_width], mode=self.mode
+        )
+
+
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample, when applied in the
+    main path of residual blocks.
+
+    Intended usage of this block is as follows:
+
+    >>> class ResNetBlock(nn.Module):
+    ...   def __init__(self, ..., drop_path_rate: float):
+    ...     self.drop_path = DropPath(drop_path_rate)
+
+    ...   def forward(self, x):
+    ...     return x + self.drop_path(self.conv_bn_act(x))
+
+    @see U{Original code (TIMM) <https://github.com/rwightman/pytorch-image-models>}
+    @license: U{Apache License 2.0 <https://github.com/huggingface/pytorch-image-models?tab=Apache-2.0-1-ov-file#readme>}
+    """
+
+    def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True):
+        """Initializes the DropPath module.
+
+        @type drop_prob: float
+        @param drop_prob: Probability of zeroing out individual vectors
+            (channel dimension) of each feature map. Defaults to 0.0.
+        @type scale_by_keep: bool
+        @param scale_by_keep: Whether to scale the output by the keep
+            probability. Enabled by default to maintain output mean &
+            std in the same range as without DropPath. Defaults to True.
+        """
+        super().__init__()
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+
+    def drop_path(
+        self, x: Tensor, drop_prob: float = 0.0, scale_by_keep: bool = True
+    ) -> Tensor:
+        """Drop paths (Stochastic Depth) per sample when applied in the
+        main path of residual blocks.
+
+        @type x: Tensor
+        @param x: Input tensor.
+        @type drop_prob: float
+        @param drop_prob: Probability of dropping a path. Defaults to
+            0.0.
+        @type scale_by_keep: bool
+        @param scale_by_keep: Whether to scale the output by the keep
+            probability. Defaults to True.
+        @return: Tensor with dropped paths based on the provided drop
+            probability.
+        """
+        keep_prob = 1 - drop_prob
+        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
+        random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+        if keep_prob > 0.0 and scale_by_keep:
+            random_tensor.div_(keep_prob)
+        return x * random_tensor
+
+    def forward(self, x: Tensor) -> Tensor:
+        if self.drop_prob == 0.0 or not self.training:
+            return x
+        return self.drop_path(x, self.drop_prob, self.scale_by_keep)
diff --git a/luxonis_train/nodes/contextspatial.py b/luxonis_train/nodes/contextspatial.py
deleted file mode 100644
index adbb84bc..00000000
--- a/luxonis_train/nodes/contextspatial.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""Implementation of Context Spatial backbone.
-
-Source: U{BiseNetV1<https://github.com/taveraantonio/BiseNetv1>}
-"""
-
-
-from torch import Tensor, nn
-from torch.nn import functional as F
-
-from luxonis_train.nodes.blocks import (
-    AttentionRefinmentBlock,
-    ConvModule,
-    FeatureFusionBlock,
-)
-from luxonis_train.utils.registry import NODES
-
-from .base_node import BaseNode
-
-
-class ContextSpatial(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
-    def __init__(self, context_backbone: str = "MobileNetV2", **kwargs):
-        """Context spatial backbone.
-        TODO: Add more documentation.
-
-
-        @type context_backbone: str
-        @param context_backbone: Backbone used. Defaults to C{MobileNetV2}.
-        """
-        super().__init__(**kwargs)
-
-        self.context_path = ContextPath(NODES.get(context_backbone)(**kwargs))
-        self.spatial_path = SpatialPath(3, 128)
-        self.ffm = FeatureFusionBlock(256, 256)
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        spatial_out = self.spatial_path(x)
-        context16, _ = self.context_path(x)
-        fm_fuse = self.ffm(spatial_out, context16)
-        outs = [fm_fuse]
-        return outs
-
-
-class SpatialPath(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int):
-        super().__init__()
-        intermediate_channels = 64
-        self.conv_7x7 = ConvModule(in_channels, intermediate_channels, 7, 2, 3)
-        self.conv_3x3_1 = ConvModule(
-            intermediate_channels, intermediate_channels, 3, 2, 1
-        )
-        self.conv_3x3_2 = ConvModule(
-            intermediate_channels, intermediate_channels, 3, 2, 1
-        )
-        self.conv_1x1 = ConvModule(intermediate_channels, out_channels, 1, 1, 0)
-
-    def forward(self, x: Tensor) -> Tensor:
-        x = self.conv_7x7(x)
-        x = self.conv_3x3_1(x)
-        x = self.conv_3x3_2(x)
-        return self.conv_1x1(x)
-
-
-class ContextPath(nn.Module):
-    def __init__(self, backbone: BaseNode):
-        super().__init__()
-        self.backbone = backbone
-
-        self.up16 = nn.Upsample(scale_factor=2.0, mode="bilinear", align_corners=True)
-        self.up32 = nn.Upsample(scale_factor=2.0, mode="bilinear", align_corners=True)
-
-        self.refine16 = ConvModule(128, 128, 3, 1, 1)
-        self.refine32 = ConvModule(128, 128, 3, 1, 1)
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        *_, down16, down32 = self.backbone.forward(x)
-
-        if not hasattr(self, "arm16"):
-            self.arm16 = AttentionRefinmentBlock(down16.shape[1], 128)
-            self.arm32 = AttentionRefinmentBlock(down32.shape[1], 128)
-
-            self.global_context = nn.Sequential(
-                nn.AdaptiveAvgPool2d(1), ConvModule(down32.shape[1], 128, 1, 1, 0)
-            )
-
-        arm_down16 = self.arm16(down16)
-        arm_down32 = self.arm32(down32)
-
-        global_down32 = self.global_context(down32)
-        global_down32 = F.interpolate(
-            global_down32, size=down32.size()[2:], mode="bilinear", align_corners=True
-        )
-
-        arm_down32 = arm_down32 + global_down32
-        arm_down32 = self.up32(arm_down32)
-        arm_down32 = self.refine32(arm_down32)
-
-        arm_down16 = arm_down16 + arm_down32
-        arm_down16 = self.up16(arm_down16)
-        arm_down16 = self.refine16(arm_down16)
-
-        return [arm_down16, arm_down32]
diff --git a/luxonis_train/nodes/efficientnet.py b/luxonis_train/nodes/efficientnet.py
deleted file mode 100644
index 0b0aedde..00000000
--- a/luxonis_train/nodes/efficientnet.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""Implementation of the EfficientNet backbone.
-
-Source: U{https://github.com/rwightman/gen-efficientnet-pytorch}
-@license: U{Apache 2.0<https://github.com/rwightman/gen-efficientnet-pytorch/blob/master/LICENSE>}
-"""
-
-import torch
-from torch import Tensor
-
-from .base_node import BaseNode
-
-
-class EfficientNet(BaseNode[Tensor, list[Tensor]]):
-    def __init__(self, download_weights: bool = False, **kwargs):
-        """EfficientNet backbone.
-
-        @type download_weights: bool
-        @param download_weights: If C{True} download weights from imagenet. Defaults to
-            C{False}.
-        """
-        super().__init__(**kwargs)
-
-        efficientnet_lite0_model = torch.hub.load(
-            "rwightman/gen-efficientnet-pytorch",
-            "efficientnet_lite0",
-            pretrained=download_weights,
-        )
-        self.out_indices = [1, 2, 4, 6]
-        self.backbone = efficientnet_lite0_model
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
-        x = self.backbone.conv_stem(x)
-        x = self.backbone.bn1(x)
-        x = self.backbone.act1(x)
-        for i, m in enumerate(self.backbone.blocks):
-            x = m(x)
-            if i in self.out_indices:
-                outs.append(x)
-        return outs
diff --git a/luxonis_train/nodes/efficientrep.py b/luxonis_train/nodes/efficientrep.py
deleted file mode 100644
index e6a014af..00000000
--- a/luxonis_train/nodes/efficientrep.py
+++ /dev/null
@@ -1,113 +0,0 @@
-"""Implementation of the EfficientRep backbone.
-
-Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial
-Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
-"""
-
-import logging
-
-from torch import Tensor, nn
-
-from luxonis_train.nodes.blocks import (
-    BlockRepeater,
-    RepVGGBlock,
-    SpatialPyramidPoolingBlock,
-)
-from luxonis_train.utils.general import make_divisible
-
-from .base_node import BaseNode
-
-
-class EfficientRep(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
-    def __init__(
-        self,
-        channels_list: list[int] | None = None,
-        num_repeats: list[int] | None = None,
-        depth_mul: float = 0.33,
-        width_mul: float = 0.25,
-        **kwargs,
-    ):
-        """EfficientRep backbone.
-
-        @type channels_list: list[int] | None
-        @param channels_list: List of number of channels for each block. Defaults to
-            C{[64, 128, 256, 512, 1024]}.
-        @type num_repeats: list[int] | None
-        @param num_repeats: List of number of repeats of RepVGGBlock. Defaults to C{[1,
-            6, 12, 18, 6]}.
-        @type depth_mul: float
-        @param depth_mul: Depth multiplier. Defaults to 0.33.
-        @type width_mul: float
-        @param width_mul: Width multiplier. Defaults to 0.25.
-        @type kwargs: Any
-        @param kwargs: Additional arguments to pass to L{BaseNode}.
-        """
-        super().__init__(**kwargs)
-
-        channels_list = channels_list or [64, 128, 256, 512, 1024]
-        num_repeats = num_repeats or [1, 6, 12, 18, 6]
-        channels_list = [make_divisible(i * width_mul, 8) for i in channels_list]
-        num_repeats = [
-            (max(round(i * depth_mul), 1) if i > 1 else i) for i in num_repeats
-        ]
-
-        in_channels = self.in_channels
-        if not isinstance(in_channels, int):
-            raise ValueError("EfficientRep module expects only one input.")
-
-        self.repvgg_encoder = RepVGGBlock(
-            in_channels=in_channels,
-            out_channels=channels_list[0],
-            kernel_size=3,
-            stride=2,
-        )
-
-        self.blocks = nn.ModuleList()
-        for i in range(4):
-            curr_block = nn.Sequential(
-                RepVGGBlock(
-                    in_channels=channels_list[i],
-                    out_channels=channels_list[i + 1],
-                    kernel_size=3,
-                    stride=2,
-                ),
-                BlockRepeater(
-                    block=RepVGGBlock,
-                    in_channels=channels_list[i + 1],
-                    out_channels=channels_list[i + 1],
-                    num_blocks=num_repeats[i + 1],
-                ),
-            )
-            self.blocks.append(curr_block)
-
-        self.blocks[-1].append(
-            SpatialPyramidPoolingBlock(
-                in_channels=channels_list[4],
-                out_channels=channels_list[4],
-                kernel_size=5,
-            )
-        )
-
-    def set_export_mode(self, mode: bool = True) -> None:
-        """Reparametrizes instances of `RepVGGBlock` in the network.
-
-        @type mode: bool
-        @param mode: Whether to set the export mode. Defaults to C{True}.
-        """
-        super().set_export_mode(mode)
-        logger = logging.getLogger(__name__)
-        if mode:
-            logger.info("Reparametrizing EfficientRep.")
-            for module in self.modules():
-                if isinstance(module, RepVGGBlock):
-                    module.reparametrize()
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outputs = []
-        x = self.repvgg_encoder(x)
-        for block in self.blocks:
-            x = block(x)
-            outputs.append(x)
-        return outputs
diff --git a/luxonis_train/nodes/enums/head_categorization.py b/luxonis_train/nodes/enums/head_categorization.py
new file mode 100644
index 00000000..90f75725
--- /dev/null
+++ b/luxonis_train/nodes/enums/head_categorization.py
@@ -0,0 +1,23 @@
+from enum import Enum
+
+
+class ImplementedHeads(Enum):
+    """Task categorization for the implemented heads."""
+
+    ClassificationHead = "ClassificationParser"
+    EfficientBBoxHead = "YOLO"
+    ImplicitKeypointBBoxHead = "YoloDetectionNetwork"
+    EfficientKeypointBBoxHead = "YoloDetectionNetwork"
+    SegmentationHead = "SegmentationParser"
+    BiSeNetHead = "SegmentationParser"
+
+
+class ImplementedHeadsIsSoxtmaxed(Enum):
+    """Softmaxed output categorization for the implemented heads."""
+
+    ClassificationHead = False
+    EfficientBBoxHead = None
+    ImplicitKeypointBBoxHead = None
+    EfficientKeypointBBoxHead = None
+    SegmentationHead = False
+    BiSeNetHead = False
diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py
new file mode 100644
index 00000000..6044e615
--- /dev/null
+++ b/luxonis_train/nodes/heads/__init__.py
@@ -0,0 +1,19 @@
+from .bisenet_head import BiSeNetHead
+from .classification_head import ClassificationHead
+from .ddrnet_segmentation_head import DDRNetSegmentationHead
+from .efficient_bbox_head import EfficientBBoxHead
+from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead
+from .efficient_obbox_head import EfficientOBBoxHead
+from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
+from .segmentation_head import SegmentationHead
+
+__all__ = [
+    "BiSeNetHead",
+    "ClassificationHead",
+    "EfficientBBoxHead",
+    "EfficientOBBoxHead",
+    "EfficientKeypointBBoxHead",
+    "ImplicitKeypointBBoxHead",
+    "SegmentationHead",
+    "DDRNetSegmentationHead",
+]
diff --git a/luxonis_train/nodes/heads/bisenet_head.py b/luxonis_train/nodes/heads/bisenet_head.py
new file mode 100644
index 00000000..dd6e6333
--- /dev/null
+++ b/luxonis_train/nodes/heads/bisenet_head.py
@@ -0,0 +1,58 @@
+from typing import Any
+
+from luxonis_ml.data import LabelType
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import ConvModule
+from luxonis_train.utils import infer_upscale_factor
+
+
+class BiSeNetHead(BaseNode[Tensor, Tensor]):
+    in_height: int
+    in_width: int
+    in_channels: int
+
+    tasks: list[LabelType] = [LabelType.SEGMENTATION]
+
+    def __init__(self, intermediate_channels: int = 64, **kwargs: Any):
+        """BiSeNet segmentation head.
+
+        Source: U{BiseNetV1<https://github.com/taveraantonio/BiseNetv1>}
+        @license: NOT SPECIFIED.
+        @see: U{BiseNetv1: Bilateral Segmentation Network for
+            Real-time Semantic Segmentation
+            <https://arxiv.org/abs/1808.00897>}
+
+        @type intermediate_channels: int
+        @param intermediate_channels: How many intermediate channels to use.
+            Defaults to C{64}.
+        """
+        super().__init__(**kwargs)
+
+        h, w = self.original_in_shape[1:]
+        upscale_factor = 2 ** infer_upscale_factor(
+            (self.in_height, self.in_width), (h, w)
+        )
+        out_channels = self.n_classes * upscale_factor * upscale_factor
+
+        self.conv_3x3 = ConvModule(
+            self.in_channels,
+            intermediate_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+        )
+        self.conv_1x1 = nn.Conv2d(
+            intermediate_channels,
+            out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        self.upscale = nn.PixelShuffle(upscale_factor)
+
+    def forward(self, inputs: Tensor) -> Tensor:
+        x = self.conv_3x3(inputs)
+        x = self.conv_1x1(x)
+        return self.upscale(x)
diff --git a/luxonis_train/nodes/classification_head.py b/luxonis_train/nodes/heads/classification_head.py
similarity index 55%
rename from luxonis_train/nodes/classification_head.py
rename to luxonis_train/nodes/heads/classification_head.py
index 10f9b3c9..5961c853 100644
--- a/luxonis_train/nodes/classification_head.py
+++ b/luxonis_train/nodes/heads/classification_head.py
@@ -1,26 +1,26 @@
-from torch import Tensor, nn
+from typing import Any
 
-from luxonis_train.utils.types import LabelType, Packet
+from torch import Tensor, nn
 
-from .base_node import BaseNode
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.utils.types import LabelType
 
 
 class ClassificationHead(BaseNode[Tensor, Tensor]):
     in_channels: int
-    attach_index: int = -1
+    tasks: list[LabelType] = [LabelType.CLASSIFICATION]
 
-    def __init__(
-        self,
-        dropout_rate: float = 0.2,
-        **kwargs,
-    ):
+    def __init__(self, dropout_rate: float = 0.2, **kwargs: Any):
         """Simple classification head.
 
+        Consists of a global average pooling layer followed by a dropout
+        layer and a single linear layer.
+
         @type dropout_rate: float
-        @param dropout_rate: Dropout rate before last layer, range C{[0, 1]}. Defaults
-            to C{0.2}.
+        @param dropout_rate: Dropout rate before last layer, range C{[0,
+            1]}. Defaults to C{0.2}.
         """
-        super().__init__(task_type=LabelType.CLASSIFICATION, **kwargs)
+        super().__init__(**kwargs)
 
         self.head = nn.Sequential(
             nn.AdaptiveAvgPool2d(1),
@@ -31,6 +31,3 @@ def __init__(
 
     def forward(self, inputs: Tensor) -> Tensor:
         return self.head(inputs)
-
-    def wrap(self, output: Tensor) -> Packet[Tensor]:
-        return {"classes": [output]}
diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py
new file mode 100644
index 00000000..5e8468b0
--- /dev/null
+++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py
@@ -0,0 +1,109 @@
+import logging
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.utils.general import infer_upscale_factor
+from luxonis_train.utils.types import LabelType
+
+logger = logging.getLogger(__name__)
+
+
+class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]):
+    attach_index: int = -1
+    in_height: int
+    in_width: int
+    in_channels: int
+
+    tasks: list[LabelType] = [LabelType.SEGMENTATION]
+
+    def __init__(
+        self,
+        inter_channels: int = 64,
+        inter_mode: str = "bilinear",
+        **kwargs,
+    ):
+        """DDRNet segmentation head.
+
+        @see: U{Adapted from <https://github.com/Deci-AI/super-gradients/blob/master/src
+            /super_gradients/training/models/segmentation_models/ddrnet.py>}
+        @see: U{Original code <https://github.com/ydhongHIT/DDRNet>}
+        @see: U{Paper <https://arxiv.org/pdf/2101.06085.pdf>}
+        @license: U{Apache License, Version 2.0 <https://github.com/Deci-AI/super-
+            gradients/blob/master/LICENSE.md>}
+        @type inter_channels: int
+        @param inter_channels: Width of internal conv. Must be a multiple of
+            scale_factor^2 when inter_mode is pixel_shuffle. Defaults to 64.
+        @type inter_mode: str
+        @param inter_mode: Upsampling method. One of nearest, linear, bilinear, bicubic,
+            trilinear, area or pixel_shuffle. If pixel_shuffle is set, nn.PixelShuffle
+            is used for scaling. Defaults to "bilinear".
+        """
+        super().__init__(**kwargs)
+        model_in_h, model_in_w = self.original_in_shape[1:]
+        scale_factor = 2 ** infer_upscale_factor(
+            (self.in_height, self.in_width), (model_in_h, model_in_w)
+        )
+        self.scale_factor = scale_factor
+
+        if (
+            inter_mode == "pixel_shuffle"
+            and inter_channels % (scale_factor**2) != 0
+        ):
+            raise ValueError(
+                "For pixel_shuffle, inter_channels must be a multiple of scale_factor^2."
+            )
+
+        self.bn1 = nn.BatchNorm2d(self.in_channels)
+        self.conv1 = nn.Conv2d(
+            self.in_channels,
+            inter_channels,
+            kernel_size=3,
+            padding=1,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(inter_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+        self.conv2 = nn.Conv2d(
+            inter_channels,
+            inter_channels
+            if inter_mode == "pixel_shuffle"
+            else self.n_classes,
+            kernel_size=1,
+            padding=0,
+            bias=True,
+        )
+        self.upscale = (
+            nn.PixelShuffle(scale_factor)
+            if inter_mode == "pixel_shuffle"
+            else nn.Upsample(scale_factor=scale_factor, mode=inter_mode)
+        )
+
+    def forward(self, inputs: Tensor) -> Tensor:
+        x = self.relu(self.bn1(inputs))
+        x = self.conv1(x)
+        x = self.relu(self.bn2(x))
+        x = self.conv2(x)
+        x = self.upscale(x)
+        if self.export:
+            return x.argmax(dim=1)
+        return x
+
+    def set_export_mode(self, mode: bool = True) -> None:
+        """Sets the module to export mode.
+
+        Replaces the forward method with a constant empty tensor.
+
+        @warning: The replacement is destructive and cannot be undone.
+        @type mode: bool
+        @param mode: Whether to set the export mode to True or False.
+            Defaults to True.
+        """
+        super().set_export_mode(mode)
+        if self.export and self.attach_index != -1:
+            logger.info("Removing the auxiliary head.")
+
+            self.forward = lambda inputs: torch.tensor([])
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py
similarity index 68%
rename from luxonis_train/nodes/efficient_bbox_head.py
rename to luxonis_train/nodes/heads/efficient_bbox_head.py
index 9f500cd4..6f0e01e7 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_bbox_head.py
@@ -1,63 +1,73 @@
-"""Head for object detection.
-
-Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial
-Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
-"""
-
-from typing import Literal
+import logging
+from typing import Any, Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import EfficientDecoupledBlock
-from luxonis_train.utils.boxutils import (
+from luxonis_train.utils import (
+    Packet,
     anchors_for_fpn_features,
     dist2bbox,
     non_max_suppression,
 )
-from luxonis_train.utils.types import LabelType, Packet
 
-from .base_node import BaseNode
+logger = logging.getLogger(__name__)
 
 
 class EfficientBBoxHead(
     BaseNode[list[Tensor], tuple[list[Tensor], list[Tensor], list[Tensor]]]
 ):
     in_channels: list[int]
+    tasks: list[LabelType] = [LabelType.BOUNDINGBOX]
 
     def __init__(
         self,
         n_heads: Literal[2, 3, 4] = 3,
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
-        **kwargs,
+        max_det: int = 300,
+        **kwargs: Any,
     ):
         """Head for object detection.
 
-        TODO: add more documentation
-
+        Adapted from U{YOLOv6: A Single-Stage Object Detection Framework
+        for Industrial Applications
+        <https://arxiv.org/pdf/2209.02976.pdf>}.
         @type n_heads: Literal[2,3,4]
-        @param n_heads: Number of output heads. Defaults to 3.
-          ***Note:*** Should be same also on neck in most cases.
-
+        @param n_heads: Number of output heads. Defaults to 3. B{Note:}
+            Should be same also on neck in most cases.
         @type conf_thres: float
-        @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
-
+        @param conf_thres: Threshold for confidence. Defaults to
+            C{0.25}.
         @type iou_thres: float
         @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+        @type max_det: int
+        @param max_det: Maximum number of detections retained after NMS.
+            Defaults to C{300}.
         """
-        super().__init__(task_type=LabelType.BOUNDINGBOX, **kwargs)
+        super().__init__(**kwargs)
 
         self.n_heads = n_heads
 
         self.conf_thres = conf_thres
         self.iou_thres = iou_thres
+        self.max_det = max_det
 
-        self.stride = self._fit_stride_to_num_heads()
+        self.stride = self._fit_stride_to_n_heads()
         self.grid_cell_offset = 0.5
         self.grid_cell_size = 5.0
 
         self.heads = nn.ModuleList()
+        if len(self.in_channels) < self.n_heads:
+            logger.warning(
+                f"Head '{self.name}' was set to use {self.n_heads} heads, "
+                f"but received only {len(self.in_channels)} inputs. "
+                f"Changing number of heads to {len(self.in_channels)}."
+            )
+            self.n_heads = len(self.in_channels)
         for i in range(self.n_heads):
             curr_head = EfficientDecoupledBlock(
                 n_classes=self.n_classes,
@@ -87,18 +97,25 @@ def wrap(
         features, cls_score_list, reg_distri_list = output
 
         if self.export:
-            outputs = []
-            for out_cls, out_reg in zip(cls_score_list, reg_distri_list, strict=True):
+            outputs: list[Tensor] = []
+            for out_cls, out_reg in zip(
+                cls_score_list, reg_distri_list, strict=True
+            ):
                 conf, _ = out_cls.max(1, keepdim=True)
                 out = torch.cat([out_reg, conf, out_cls], dim=1)
                 outputs.append(out)
-            return {"boxes": outputs}
+            return {self.task: outputs}
 
         cls_tensor = torch.cat(
-            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2
+            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))],
+            dim=2,
         ).permute(0, 2, 1)
         reg_tensor = torch.cat(
-            [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2
+            [
+                reg_distri_list[i].flatten(2)
+                for i in range(len(reg_distri_list))
+            ],
+            dim=2,
         ).permute(0, 2, 1)
 
         if self.training:
@@ -111,17 +128,18 @@ def wrap(
         else:
             boxes = self._process_to_bbox((features, cls_tensor, reg_tensor))
             return {
-                "boxes": boxes,
+                "boundingbox": boxes,
                 "features": features,
                 "class_scores": [cls_tensor],
                 "distributions": [reg_tensor],
             }
 
-    def _fit_stride_to_num_heads(self):
-        """Returns correct stride for number of heads and attach index."""
+    def _fit_stride_to_n_heads(self):
+        """Returns correct stride for number of heads and attach
+        index."""
         stride = torch.tensor(
             [
-                self.original_in_shape[2] / x[2]  # type: ignore
+                self.original_in_shape[1] / x[2]  # type: ignore
                 for x in self.in_sizes[: self.n_heads]
             ],
             dtype=torch.int,
@@ -131,7 +149,8 @@ def _fit_stride_to_num_heads(self):
     def _process_to_bbox(
         self, output: tuple[list[Tensor], Tensor, Tensor]
     ) -> list[Tensor]:
-        """Performs post-processing of the output and returns bboxs after NMS."""
+        """Performs post-processing of the output and returns bboxs
+        after NMS."""
         features, cls_score_list, reg_dist_list = output
         _, anchor_points, _, stride_tensor = anchors_for_fpn_features(
             features,
@@ -141,7 +160,9 @@ def _process_to_bbox(
             multiply_with_stride=False,
         )
 
-        pred_bboxes = dist2bbox(reg_dist_list, anchor_points, out_format="xyxy")
+        pred_bboxes = dist2bbox(
+            reg_dist_list, anchor_points, out_format="xyxy"
+        )
 
         pred_bboxes *= stride_tensor
         output_merged = torch.cat(
@@ -163,5 +184,6 @@ def _process_to_bbox(
             conf_thres=self.conf_thres,
             iou_thres=self.iou_thres,
             bbox_format="xyxy",
+            max_det=self.max_det,
             predicts_objectness=False,
         )
diff --git a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py
new file mode 100644
index 00000000..51b8b704
--- /dev/null
+++ b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py
@@ -0,0 +1,214 @@
+from typing import Any, Literal
+
+import torch
+from luxonis_ml.data import LabelType
+from torch import Tensor, nn
+
+from luxonis_train.nodes.blocks import ConvModule
+from luxonis_train.utils import (
+    Packet,
+    anchors_for_fpn_features,
+    dist2bbox,
+    non_max_suppression,
+)
+
+from .efficient_bbox_head import EfficientBBoxHead
+
+
+class EfficientKeypointBBoxHead(EfficientBBoxHead):
+    tasks: list[LabelType] = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX]
+
+    def __init__(
+        self,
+        n_heads: Literal[2, 3, 4] = 3,
+        conf_thres: float = 0.25,
+        iou_thres: float = 0.45,
+        max_det: int = 300,
+        **kwargs: Any,
+    ):
+        """Head for object and keypoint detection.
+
+        Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial
+        Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
+
+        @param n_heads: Number of output heads. Defaults to C{3}.
+            B{Note:} Should be same also on neck in most cases.
+        @type n_heads: int
+
+        @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
+        @type conf_thres: float
+
+        @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+        @type iou_thres: float
+
+        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
+        @type max_det: int
+        """
+        super().__init__(
+            n_heads=n_heads,
+            conf_thres=conf_thres,
+            iou_thres=iou_thres,
+            max_det=max_det,
+            **kwargs,
+        )
+
+        self.nk = self.n_keypoints * 3
+
+        mid_ch = max(self.in_channels[0] // 4, self.nk)
+        self.kpt_layers = nn.ModuleList(
+            nn.Sequential(
+                ConvModule(x, mid_ch, 3, 1, 1, activation=nn.SiLU()),
+                ConvModule(mid_ch, mid_ch, 3, 1, 1, activation=nn.SiLU()),
+                nn.Conv2d(mid_ch, self.nk, 1, 1),
+            )
+            for x in self.in_channels
+        )
+
+    def forward(
+        self, inputs: list[Tensor]
+    ) -> tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]:
+        features, cls_score_list, reg_distri_list = super().forward(inputs)
+
+        (
+            _,
+            self.anchor_points,
+            _,
+            self.stride_tensor,
+        ) = anchors_for_fpn_features(
+            features,
+            self.stride,
+            self.grid_cell_size,
+            self.grid_cell_offset,
+            multiply_with_stride=False,
+        )
+
+        kpt_list: list[Tensor] = []
+        for i in range(self.n_heads):
+            kpt_pred = self.kpt_layers[i](inputs[i])
+            kpt_list.append(kpt_pred)
+
+        return features, cls_score_list, reg_distri_list, kpt_list
+
+    def wrap(
+        self,
+        output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]],
+    ) -> Packet[Tensor]:
+        features, cls_score_list, reg_distri_list, kpt_list = output
+        bs = features[0].shape[0]
+        if self.export:
+            outputs: list[Tensor] = []
+            for out_cls, out_reg, out_kpts in zip(
+                cls_score_list, reg_distri_list, kpt_list, strict=True
+            ):
+                chunks = torch.split(out_kpts, 3, dim=1)
+                modified_chunks: list[Tensor] = []
+                for chunk in chunks:
+                    x = chunk[:, 0:1, :, :]
+                    y = chunk[:, 1:2, :, :]
+                    v = torch.sigmoid(chunk[:, 2:3, :, :])
+                    modified_chunk = torch.cat([x, y, v], dim=1)
+                    modified_chunks.append(modified_chunk)
+                out_kpts_modified = torch.cat(modified_chunks, dim=1)
+                out = torch.cat([out_reg, out_cls, out_kpts_modified], dim=1)
+                outputs.append(out)
+            return {"outputs": outputs}
+
+        cls_tensor = torch.cat(
+            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))],
+            dim=2,
+        ).permute(0, 2, 1)
+        reg_tensor = torch.cat(
+            [
+                reg_distri_list[i].flatten(2)
+                for i in range(len(reg_distri_list))
+            ],
+            dim=2,
+        ).permute(0, 2, 1)
+        kpt_tensor = torch.cat(
+            [
+                kpt_list[i].view(bs, self.nk, -1).flatten(2)
+                for i in range(len(kpt_list))
+            ],
+            dim=2,
+        ).permute(0, 2, 1)
+
+        if self.training:
+            return {
+                "features": features,
+                "class_scores": [cls_tensor],
+                "distributions": [reg_tensor],
+                "keypoints_raw": [kpt_tensor],
+            }
+
+        pred_kpt = self._dist2kpts(kpt_tensor)
+        detections = self._process_to_bbox_and_kps(
+            (features, cls_tensor, reg_tensor, pred_kpt)
+        )
+        return {
+            "boundingbox": [detection[:, :6] for detection in detections],
+            "keypoints": [
+                detection[:, 6:].reshape(-1, self.n_keypoints, 3)
+                for detection in detections
+            ],
+            "features": features,
+            "class_scores": [cls_tensor],
+            "distributions": [reg_tensor],
+            "keypoints_raw": [kpt_tensor],
+        }
+
+    def _dist2kpts(self, kpts: Tensor) -> Tensor:
+        """Decodes keypoints."""
+        y = kpts.clone()
+
+        anchor_points_transposed = self.anchor_points.transpose(0, 1)
+        stride_tensor = self.stride_tensor.squeeze(-1)
+
+        stride_tensor = stride_tensor.view(1, -1, 1)
+        anchor_points_x = anchor_points_transposed[0].view(1, -1, 1)
+        anchor_points_y = anchor_points_transposed[1].view(1, -1, 1)
+
+        y[:, :, 0::3] = (
+            y[:, :, 0::3] * 2.0 + (anchor_points_x - 0.5)
+        ) * stride_tensor
+        y[:, :, 1::3] = (
+            y[:, :, 1::3] * 2.0 + (anchor_points_y - 0.5)
+        ) * stride_tensor
+        y[:, :, 2::3] = y[:, :, 2::3].sigmoid()
+
+        return y
+
+    def _process_to_bbox_and_kps(
+        self, output: tuple[list[Tensor], Tensor, Tensor, Tensor]
+    ) -> list[Tensor]:
+        """Performs post-processing of the output and returns bboxs
+        after NMS."""
+        features, cls_score_list, reg_dist_list, keypoints = output
+
+        pred_bboxes = dist2bbox(
+            reg_dist_list, self.anchor_points, out_format="xyxy"
+        )
+
+        pred_bboxes *= self.stride_tensor
+        output_merged = torch.cat(
+            [
+                pred_bboxes,
+                torch.ones(
+                    (features[-1].shape[0], pred_bboxes.shape[1], 1),
+                    dtype=pred_bboxes.dtype,
+                    device=pred_bboxes.device,
+                ),
+                cls_score_list,
+                keypoints,
+            ],
+            dim=-1,
+        )
+
+        return non_max_suppression(
+            output_merged,
+            n_classes=self.n_classes,
+            conf_thres=self.conf_thres,
+            iou_thres=self.iou_thres,
+            bbox_format="xyxy",
+            max_det=self.max_det,
+            predicts_objectness=False,
+        )
diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py
new file mode 100644
index 00000000..ff9eb06d
--- /dev/null
+++ b/luxonis_train/nodes/heads/efficient_obbox_head.py
@@ -0,0 +1,198 @@
+import math
+from typing import Literal
+
+import torch
+from torch import Tensor, nn
+
+from luxonis_train.nodes.blocks import EfficientOBBDecoupledBlock
+from luxonis_train.nodes.heads import EfficientBBoxHead
+from luxonis_train.utils import (
+    anchors_for_fpn_features,
+    dist2rbbox,
+    non_max_suppression_obb,
+)
+from luxonis_train.utils.types import LabelType, Packet
+
+
+class EfficientOBBoxHead(EfficientBBoxHead):
+    tasks: list[LabelType] = [LabelType.OBOUNDINGBOX]
+
+    def __init__(
+        self,
+        n_heads: Literal[2, 3, 4] = 3,
+        conf_thres: float = 0.25,
+        iou_thres: float = 0.45,
+        max_det: int = 300,
+        reg_max: int = 16,
+        **kwargs,
+    ):
+        """Head for object detection using oriented bounding boxes.
+
+        TODO: add more documentation
+
+        @type n_heads: Literal[2,3,4]
+        @param n_heads: Number of output heads. Defaults to 3.
+          ***Note:*** Should be same also on neck in most cases.
+
+        @type conf_thres: float
+        @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
+
+        @type iou_thres: float
+        @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+
+        @type max_det: int
+        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
+
+        @type reg_max: int
+        @param reg_max: Number of bins for predicting the distributions of bounding box coordinates.
+        """
+        super().__init__(n_heads, conf_thres, iou_thres, max_det, **kwargs)
+
+        self.reg_max = reg_max
+
+        self.heads = nn.ModuleList()
+        for i in range(self.n_heads):
+            curr_head = EfficientOBBDecoupledBlock(
+                n_classes=self.n_classes,
+                in_channels=self.in_channels[i],
+            )
+            self.heads.append(curr_head)
+
+    def forward(
+        self, inputs: list[Tensor]
+    ) -> tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]:
+        features: list[Tensor] = []
+        cls_score_list: list[Tensor] = []
+        reg_distri_list: list[Tensor] = []
+        angles_list: list[Tensor] = []
+
+        for i, module in enumerate(self.heads):
+            out_feature, out_cls, out_reg, out_angle = module(inputs[i])
+            features.append(out_feature)
+
+            out_cls = torch.sigmoid(out_cls)
+            cls_score_list.append(out_cls)
+
+            reg_distri_list.append(out_reg)
+
+            # out_angle = (out_angle.sigmoid() - 0.25) * math.pi  # [-pi/4, 3pi/4]
+            out_angle = out_angle.sigmoid() * math.pi / 2  # [0, pi/2]
+            angles_list.append(out_angle)
+
+        return features, cls_score_list, reg_distri_list, angles_list
+
+    def wrap(
+        self,
+        output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]],
+    ) -> Packet[Tensor]:
+        features, cls_score_list, reg_distri_list, angles_list = output
+
+        if self.export:
+            outputs = []
+            for out_cls, out_reg, out_angles in zip(
+                cls_score_list, reg_distri_list, angles_list, strict=True
+            ):
+                conf, _ = out_cls.max(1, keepdim=True)
+                out = torch.cat([out_reg, conf, out_cls, out_angles], dim=1)
+                outputs.append(out)
+            return {self.task: outputs}
+
+        angle_tensor = torch.cat(
+            [angles_list[i].flatten(2) for i in range(len(angles_list))], dim=2
+        ).permute(0, 2, 1)
+        cls_tensor = torch.cat(
+            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))],
+            dim=2,
+        ).permute(0, 2, 1)
+        reg_tensor = torch.cat(
+            [
+                reg_distri_list[i].flatten(2)
+                for i in range(len(reg_distri_list))
+            ],
+            dim=2,
+        ).permute(0, 2, 1)
+
+        if self.training:
+            return {
+                "features": features,
+                "class_scores": [cls_tensor],
+                "distributions": [reg_tensor],
+                "angles": [angle_tensor],
+            }
+
+        else:
+            boxes = self._process_to_bbox(
+                (features, cls_tensor, reg_tensor, angle_tensor)
+            )
+            return {
+                "oboundingbox": boxes,
+                "features": features,
+                "class_scores": [cls_tensor],
+                "distributions": [reg_tensor],
+                "angles": [angle_tensor],
+            }
+
+    def _process_to_bbox(
+        self, output: tuple[list[Tensor], Tensor, Tensor, Tensor]
+    ) -> list[Tensor]:
+        """Performs post-processing of the output and returns bboxs
+        after NMS."""
+        features, cls_score_tensor, reg_dist_tensor, angles_tensor = output
+        _, anchor_points, _, stride_tensor = anchors_for_fpn_features(
+            features,
+            self.stride,
+            self.grid_cell_size,
+            self.grid_cell_offset,
+            multiply_with_stride=False,
+        )
+
+        # The following block below is implied for the distributed predictions of the regression
+        # branch (used in DFL)
+        # if self.use_dfl: # consider adding this as a parameter
+        proj = torch.arange(
+            self.reg_max, dtype=torch.float, device=reg_dist_tensor.device
+        )
+        b, a, c = reg_dist_tensor.shape  # batch, anchors, channels
+        reg_dist_mean_tensor = (  # we get a tensor of the expected values (mean) of the regression predictions
+            reg_dist_tensor.view(b, a, 4, c // 4)
+            .softmax(3)
+            .matmul(proj.type(reg_dist_tensor.dtype))
+        )
+        pred_bboxes = torch.cat(
+            (
+                dist2rbbox(reg_dist_mean_tensor, angles_tensor, anchor_points),
+                angles_tensor,
+            ),
+            dim=-1,
+        )  # xywhr
+
+        xy_strided = pred_bboxes[..., :2] * stride_tensor
+        pred_bboxes = torch.cat(
+            [xy_strided, pred_bboxes[..., 2:]], dim=-1
+        )  # xywhr with xy strided
+
+        output_merged = torch.cat(
+            [
+                pred_bboxes,
+                torch.ones(
+                    (features[-1].shape[0], pred_bboxes.shape[1], 1),
+                    dtype=pred_bboxes.dtype,
+                    device=pred_bboxes.device,
+                ),
+                cls_score_tensor,
+            ],
+            dim=-1,
+        )
+
+        # pred = torch.rand((2, 1344, 15), device=pred_bboxes.device)
+        # pred[..., 5] = 1
+
+        return non_max_suppression_obb(
+            output_merged,
+            # pred,  # for debugging
+            n_classes=self.n_classes,
+            conf_thres=self.conf_thres,
+            iou_thres=self.iou_thres,
+            max_det=self.max_det,
+            predicts_objectness=False,
+        )
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
similarity index 70%
rename from luxonis_train/nodes/implicit_keypoint_bbox_head.py
rename to luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
index 0fdca420..5de88650 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
@@ -1,38 +1,38 @@
 import logging
 import math
-from typing import Literal, cast
+from typing import Any, cast
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
-from luxonis_train.nodes.blocks import (
-    KeypointBlock,
-    LearnableMulAddConv,
-)
-from luxonis_train.utils.boxutils import (
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import KeypointBlock, LearnableMulAddConv
+from luxonis_train.utils import (
+    Packet,
     non_max_suppression,
     process_bbox_predictions,
     process_keypoints_predictions,
 )
-from luxonis_train.utils.types import LabelType, Packet
-
-from .base_node import BaseNode
 
 logger = logging.getLogger(__name__)
 
 
-class ImplicitKeypointBBoxHead(BaseNode):
-    attach_index: Literal["all"] = "all"
+class ImplicitKeypointBBoxHead(
+    BaseNode[list[Tensor], tuple[list[Tensor], Tensor]]
+):
+    tasks = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX]
+    in_channels: list[int]
 
     def __init__(
         self,
-        n_keypoints: int | None = None,
-        num_heads: int = 3,
+        n_heads: int = 3,
         anchors: list[list[float]] | None = None,
         init_coco_biases: bool = True,
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
-        **kwargs,
+        max_det: int = 300,
+        **kwargs: Any,
     ):
         """Head for object and keypoint detection.
 
@@ -41,11 +41,8 @@ def __init__(
 
         TODO: more technical documentation
 
-        @type n_keypoints: int | None
-        @param n_keypoints: Number of keypoints. If not defined, inferred
-            from the dataset metadata (if provided). Defaults to C{None}.
-        @type num_heads: int
-        @param num_heads: Number of output heads. Defaults to C{3}.
+        @type n_heads: int
+        @param n_heads: Number of output heads. Defaults to C{3}.
             B{Note:} Should be same also on neck in most cases.
         @type anchors: list[list[float]] | None
         @param anchors: Anchors used for object detection.
@@ -55,26 +52,32 @@ def __init__(
         @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
         @type iou_thres: float
         @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+        @type max_det: int
+        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
-        super().__init__(task_type=LabelType.KEYPOINT, **kwargs)
-
-        if anchors is None:
-            logger.info("No anchors provided, generating them automatically.")
-            anchors, recall = self.dataset_metadata.autogenerate_anchors(num_heads)
-            logger.info(f"Anchors generated. Best possible recall: {recall:.2f}")
+        super().__init__(**kwargs)
 
         self.conf_thres = conf_thres
         self.iou_thres = iou_thres
+        self.max_det = max_det
+
+        self.n_heads = n_heads
+        if len(self.in_channels) < self.n_heads:
+            logger.warning(
+                f"Head '{self.name}' was set to use {self.n_heads} heads, "
+                f"but received only {len(self.in_channels)} inputs. "
+                f"Changing number of heads to {len(self.in_channels)}."
+            )
+            self.n_heads = len(self.in_channels)
 
-        n_keypoints = n_keypoints or self.dataset_metadata._n_keypoints
-
-        if n_keypoints is None:
-            raise ValueError(
-                "Number of keypoints must be specified either in the constructor or "
-                "in the dataset metadata."
+        if anchors is None:
+            logger.info("No anchors provided, generating them automatically.")
+            anchors, recall = self.dataset_metadata.autogenerate_anchors(
+                self.n_heads
+            )
+            logger.info(
+                f"Anchors generated. Best possible recall: {recall:.2f}"
             )
-        self.n_keypoints = n_keypoints
-        self.num_heads = num_heads
 
         self.box_offset = 5
         self.n_det_out = self.n_classes + self.box_offset
@@ -83,13 +86,13 @@ def __init__(
         self.n_anchors = len(anchors[0]) // 2
         self.grid: list[Tensor] = []
 
-        self.anchors = torch.tensor(anchors).float().view(self.num_heads, -1, 2)
-        self.anchor_grid = self.anchors.clone().view(self.num_heads, 1, -1, 1, 1, 2)
-
-        self.channel_list, self.stride = self._fit_to_num_heads(
-            cast(list[int], self.in_channels)
+        self.anchors = torch.tensor(anchors).float().view(self.n_heads, -1, 2)
+        self.anchor_grid = self.anchors.clone().view(
+            self.n_heads, 1, -1, 1, 1, 2
         )
 
+        self.channel_list, self.stride = self._fit_to_n_heads(self.in_channels)
+
         self.learnable_mul_add_conv = nn.ModuleList(
             LearnableMulAddConv(
                 add_channel=in_channels,
@@ -120,7 +123,7 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]:
 
         self.anchor_grid = self.anchor_grid.to(inputs[0].device)
 
-        for i in range(self.num_heads):
+        for i in range(self.n_heads):
             feat = cast(
                 Tensor,
                 torch.cat(
@@ -135,11 +138,17 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]:
             batch_size, _, feature_height, feature_width = feat.shape
             if i >= len(self.grid):
                 self.grid.append(
-                    self._construct_grid(feature_width, feature_height).to(feat.device)
+                    self._construct_grid(feature_width, feature_height).to(
+                        feat.device
+                    )
                 )
 
             feat = feat.reshape(
-                batch_size, self.n_anchors, self.n_out, feature_height, feature_width
+                batch_size,
+                self.n_anchors,
+                self.n_out,
+                feature_height,
+                feature_width,
             ).permute(0, 1, 3, 4, 2)
 
             features.append(feat)
@@ -151,8 +160,8 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]:
 
         return features, torch.cat(predictions, dim=1)
 
-    def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
-        features, predictions = outputs
+    def wrap(self, output: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
+        features, predictions = output
 
         if self.export:
             return {"boxes_and_keypoints": [predictions]}
@@ -166,12 +175,14 @@ def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
             conf_thres=self.conf_thres,
             iou_thres=self.iou_thres,
             bbox_format="cxcywh",
+            max_det=self.max_det,
         )
 
         return {
-            "boxes": [detection[:, :6] for detection in nms],
+            "boundingbox": [detection[:, :6] for detection in nms],
             "keypoints": [
-                detection[:, 6:].reshape(-1, self.n_keypoints, 3) for detection in nms
+                detection[:, 6:].reshape(-1, self.n_keypoints, 3)
+                for detection in nms
             ],
             "features": features,
         }
@@ -180,10 +191,12 @@ def _build_predictions(
         self, feat: Tensor, anchor_grid: Tensor, grid: Tensor, stride: Tensor
     ) -> Tensor:
         batch_size = feat.shape[0]
-        x_bbox = feat[..., : self.box_offset + self.n_classes]
-        x_keypoints = feat[..., self.box_offset + self.n_classes :]
+        bbox = feat[..., : self.box_offset + self.n_classes]
+        keypoints = feat[..., self.box_offset + self.n_classes :]
 
-        box_cxcy, box_wh, box_tail = process_bbox_predictions(x_bbox, anchor_grid)
+        box_cxcy, box_wh, box_tail = process_bbox_predictions(
+            bbox, anchor_grid
+        )
         grid = grid.to(box_cxcy.device)
         stride = stride.to(box_cxcy.device)
         box_cxcy = (box_cxcy + grid) * stride
@@ -191,13 +204,12 @@ def _build_predictions(
 
         grid_x = grid[..., 0:1]
         grid_y = grid[..., 1:2]
-        kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(x_keypoints)
+        kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(keypoints)
         kpt_x = (kpt_x + grid_x) * stride
         kpt_y = (kpt_y + grid_y) * stride
-        out_kpt = torch.stack([kpt_x, kpt_y, kpt_vis.sigmoid()], dim=-1).reshape(
-            *kpt_x.shape[:-1], -1
-        )
-
+        kpt_vis_sig = kpt_vis.sigmoid()
+        out_kpt = torch.cat((kpt_x, kpt_y, kpt_vis_sig), dim=-1)
+        out_kpt = out_kpt.reshape(*kpt_x.shape[:-1], -1)
         out = torch.cat((out_bbox, out_kpt), dim=-1)
 
         return out.reshape(batch_size, -1, self.n_out)
@@ -212,12 +224,14 @@ def _infer_bbox(
         )
         return torch.cat((out_bbox_xy, out_bbox_wh, out_bbox[..., 4:]), dim=-1)
 
-    def _fit_to_num_heads(self, channel_list: list):
-        out_channel_list = channel_list[: self.num_heads]
+    def _fit_to_n_heads(
+        self, channel_list: list[int]
+    ) -> tuple[list[int], Tensor]:
+        out_channel_list = channel_list[: self.n_heads]
         stride = torch.tensor(
             [
-                self.original_in_shape[2] / h
-                for h in cast(list[int], self.in_height)[: self.num_heads]
+                self.original_in_shape[1] / h
+                for h in cast(list[int], self.in_height)[: self.n_heads]
             ],
             dtype=torch.int,
         )
@@ -226,11 +240,15 @@ def _fit_to_num_heads(self, channel_list: list):
     def _initialize_weights_and_biases(self, class_freq: Tensor | None = None):
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
             elif isinstance(m, nn.BatchNorm2d):
                 m.eps = 1e-3
                 m.momentum = 0.03
-            elif isinstance(m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6)):
+            elif isinstance(
+                m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6)
+            ):
                 m.inplace = True
 
         for mi, s in zip(self.learnable_mul_add_conv, self.stride):
@@ -245,7 +263,8 @@ def _initialize_weights_and_biases(self, class_freq: Tensor | None = None):
 
     def _construct_grid(self, feature_width: int, feature_height: int):
         grid_y, grid_x = torch.meshgrid(
-            [torch.arange(feature_height), torch.arange(feature_width)], indexing="ij"
+            [torch.arange(feature_height), torch.arange(feature_width)],
+            indexing="ij",
         )
         return (
             torch.stack((grid_x, grid_y), 2)
diff --git a/luxonis_train/nodes/heads/segmentation_head.py b/luxonis_train/nodes/heads/segmentation_head.py
new file mode 100644
index 00000000..240b956c
--- /dev/null
+++ b/luxonis_train/nodes/heads/segmentation_head.py
@@ -0,0 +1,42 @@
+from typing import Any
+
+from luxonis_ml.data import LabelType
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import UpBlock
+from luxonis_train.utils import infer_upscale_factor
+
+
+class SegmentationHead(BaseNode[Tensor, Tensor]):
+    in_height: int
+    in_width: int
+    in_channels: int
+
+    tasks: list[LabelType] = [LabelType.SEGMENTATION]
+
+    def __init__(self, **kwargs: Any):
+        """Basic segmentation FCN head.
+
+        Adapted from: U{https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py}
+        @license: U{BSD-3 <https://github.com/pytorch/vision/blob/main/LICENSE>}
+        """
+        super().__init__(**kwargs)
+        h, w = self.original_in_shape[1:]
+        n_up = infer_upscale_factor((self.in_height, self.in_width), (h, w))
+
+        modules: list[nn.Module] = []
+        in_channels = self.in_channels
+        for _ in range(int(n_up)):
+            modules.append(
+                UpBlock(in_channels=in_channels, out_channels=in_channels // 2)
+            )
+            in_channels //= 2
+
+        self.head = nn.Sequential(
+            *modules,
+            nn.Conv2d(in_channels, self.n_classes, kernel_size=1),
+        )
+
+    def forward(self, inputs: Tensor) -> Tensor:
+        return self.head(inputs)
diff --git a/luxonis_train/nodes/micronet.py b/luxonis_train/nodes/micronet.py
deleted file mode 100644
index 03b43e1f..00000000
--- a/luxonis_train/nodes/micronet.py
+++ /dev/null
@@ -1,847 +0,0 @@
-from typing import Literal
-
-import torch
-from torch import Tensor, nn
-
-from luxonis_train.nodes.activations import HSigmoid, HSwish
-from luxonis_train.nodes.blocks import ConvModule
-
-from .base_node import BaseNode
-
-
-class MicroNet(BaseNode[Tensor, list[Tensor]]):
-    """
-
-    TODO: DOCS
-    """
-
-    attach_index: int = -1
-
-    def __init__(self, variant: Literal["M1", "M2", "M3"] = "M1", **kwargs):
-        """MicroNet backbone.
-
-        @type variant: Literal["M1", "M2", "M3"]
-        @param variant: Model variant to use. Defaults to "M1".
-        """
-        super().__init__(**kwargs)
-
-        if variant not in MICRONET_VARIANTS_SETTINGS:
-            raise ValueError(
-                f"MicroNet model variant should be in {list(MICRONET_VARIANTS_SETTINGS.keys())}"
-            )
-
-        self.inplanes = 64
-        (
-            in_channels,
-            stem_groups,
-            _,
-            init_a,
-            init_b,
-            out_indices,
-            channels,
-            cfgs,
-        ) = MICRONET_VARIANTS_SETTINGS[variant]
-        self.out_indices = out_indices
-        self.channels = channels
-
-        self.features = nn.ModuleList([Stem(3, 2, stem_groups)])
-
-        for (
-            stride,
-            out_channels,
-            kernel_size,
-            c1,
-            c2,
-            g1,
-            g2,
-            _,
-            g3,
-            g4,
-            y1,
-            y2,
-            y3,
-            r,
-        ) in cfgs:
-            self.features.append(
-                MicroBlock(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    stride,
-                    (c1, c2),
-                    (g1, g2),
-                    (g3, g4),
-                    (y1, y2, y3),
-                    r,
-                    init_a,
-                    init_b,
-                )
-            )
-            in_channels = out_channels
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
-        for m in self.features:
-            x = m(x)
-            outs.append(x)
-        return outs
-
-
-class MicroBlock(nn.Module):
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: int = 3,
-        stride: int = 1,
-        t1: tuple[int, int] = (2, 2),
-        gs1: tuple[int, int] = (0, 6),
-        groups_1x1: tuple[int, int] = (1, 1),
-        dy: tuple[int, int, int] = (2, 0, 1),
-        r: int = 1,
-        init_a: tuple[float, float] = (1.0, 1.0),
-        init_b: tuple[float, float] = (0.0, 0.0),
-    ):
-        super().__init__()
-
-        self.identity = stride == 1 and in_channels == out_channels
-        y1, y2, y3 = dy
-        g1, g2 = groups_1x1
-        reduction = 8 * r
-        intermediate_channels = in_channels * t1[0] * t1[1]
-
-        if gs1[0] == 0:
-            self.layers = nn.Sequential(
-                DepthSpatialSepConv(in_channels, t1, kernel_size, stride),
-                DYShiftMax(
-                    intermediate_channels,
-                    intermediate_channels,
-                    init_a,
-                    init_b,
-                    True if y2 == 2 else False,
-                    gs1[1],
-                    reduction,
-                )
-                if y2 > 0
-                else nn.ReLU6(True),
-                ChannelShuffle(gs1[1]),
-                ChannelShuffle(intermediate_channels // 2)
-                if y2 != 0
-                else nn.Sequential(),
-                ConvModule(
-                    in_channels=intermediate_channels,
-                    out_channels=out_channels,
-                    kernel_size=1,
-                    groups=g1,
-                    activation=nn.Identity(),
-                ),
-                DYShiftMax(
-                    out_channels,
-                    out_channels,
-                    (1.0, 0.0),
-                    (0.0, 0.0),
-                    False,
-                    g2,
-                    reduction // 2,
-                )
-                if y3 > 0
-                else nn.Sequential(),
-                ChannelShuffle(g2),
-                ChannelShuffle(out_channels // 2)
-                if out_channels % 2 == 0 and y3 != 0
-                else nn.Sequential(),
-            )
-        elif g2 == 0:
-            self.layers = nn.Sequential(
-                ConvModule(
-                    in_channels=in_channels,
-                    out_channels=intermediate_channels,
-                    kernel_size=1,
-                    groups=gs1[0],
-                    activation=nn.Identity(),
-                ),
-                DYShiftMax(
-                    intermediate_channels,
-                    intermediate_channels,
-                    (1.0, 0.0),
-                    (0.0, 0.0),
-                    False,
-                    gs1[1],
-                    reduction,
-                )
-                if y3 > 0
-                else nn.Sequential(),
-            )
-        else:
-            self.layers = nn.Sequential(
-                ConvModule(
-                    in_channels=in_channels,
-                    out_channels=intermediate_channels,
-                    kernel_size=1,
-                    groups=gs1[0],
-                    activation=nn.Identity(),
-                ),
-                DYShiftMax(
-                    intermediate_channels,
-                    intermediate_channels,
-                    init_a,
-                    init_b,
-                    True if y1 == 2 else False,
-                    gs1[1],
-                    reduction,
-                )
-                if y1 > 0
-                else nn.ReLU6(True),
-                ChannelShuffle(gs1[1]),
-                DepthSpatialSepConv(intermediate_channels, (1, 1), kernel_size, stride),
-                nn.Sequential(),
-                DYShiftMax(
-                    intermediate_channels,
-                    intermediate_channels,
-                    init_a,
-                    init_b,
-                    True if y2 == 2 else False,
-                    gs1[1],
-                    reduction,
-                    True,
-                )
-                if y2 > 0
-                else nn.ReLU6(True),
-                ChannelShuffle(intermediate_channels // 4)
-                if y1 != 0 and y2 != 0
-                else nn.Sequential()
-                if y1 == 0 and y2 == 0
-                else ChannelShuffle(intermediate_channels // 2),
-                ConvModule(
-                    in_channels=intermediate_channels,
-                    out_channels=out_channels,
-                    kernel_size=1,
-                    groups=g1,
-                    activation=nn.Identity(),
-                ),
-                DYShiftMax(
-                    out_channels,
-                    out_channels,
-                    (1.0, 0.0),
-                    (0.0, 0.0),
-                    False,
-                    g2,
-                    reduction=reduction // 2
-                    if out_channels < intermediate_channels
-                    else reduction,
-                )
-                if y3 > 0
-                else nn.Sequential(),
-                ChannelShuffle(g2),
-                ChannelShuffle(out_channels // 2) if y3 != 0 else nn.Sequential(),
-            )
-
-    def forward(self, x: Tensor):
-        identity = x
-        out = self.layers(x)
-        if self.identity:
-            out += identity
-        return out
-
-
-class ChannelShuffle(nn.Module):
-    def __init__(self, groups: int):
-        super(ChannelShuffle, self).__init__()
-        self.groups = groups
-
-    def forward(self, x):
-        b, c, h, w = x.size()
-        channels_per_group = c // self.groups
-        # reshape
-        x = x.view(b, self.groups, channels_per_group, h, w)
-        x = torch.transpose(x, 1, 2).contiguous()
-        out = x.view(b, -1, h, w)
-        return out
-
-
-class DYShiftMax(nn.Module):
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        init_a: tuple[float, float] = (0.0, 0.0),
-        init_b: tuple[float, float] = (0.0, 0.0),
-        act_relu: bool = True,
-        g: int = 6,
-        reduction: int = 4,
-        expansion: bool = False,
-    ):
-        super().__init__()
-        self.exp: Literal[2, 4] = 4 if act_relu else 2
-        self.init_a = init_a
-        self.init_b = init_b
-        self.out_channels = out_channels
-
-        self.avg_pool = nn.Sequential(nn.Sequential(), nn.AdaptiveAvgPool2d(1))
-
-        squeeze = self._make_divisible(in_channels // reduction, 4)
-
-        self.fc = nn.Sequential(
-            nn.Linear(in_channels, squeeze),
-            nn.ReLU(True),
-            nn.Linear(squeeze, out_channels * self.exp),
-            HSigmoid(),
-        )
-
-        if g != 1 and expansion:
-            g = in_channels // g
-
-        gc = in_channels // g
-        index = Tensor(range(in_channels)).view(1, in_channels, 1, 1)
-        index = index.view(1, g, gc, 1, 1)
-        indexgs = torch.split(index, [1, g - 1], dim=1)
-        indexgs = torch.cat([indexgs[1], indexgs[0]], dim=1)
-        indexs = torch.split(indexgs, [1, gc - 1], dim=2)
-        indexs = torch.cat([indexs[1], indexs[0]], dim=2)
-        self.index = indexs.view(in_channels).long()
-
-    def forward(self, x: Tensor):
-        B, C, _, _ = x.shape
-        x_out = x
-
-        y = self.avg_pool(x).view(B, C)
-        y = self.fc(y).view(B, -1, 1, 1)
-        y = (y - 0.5) * 4.0
-
-        x2 = x_out[:, self.index, :, :]
-
-        if self.exp == 4:
-            a1, b1, a2, b2 = torch.split(y, self.out_channels, dim=1)
-
-            a1 = a1 + self.init_a[0]
-            a2 = a2 + self.init_b[1]
-            b1 = b1 + self.init_b[0]
-            b2 = b2 + self.init_b[1]
-
-            z1 = x_out * a1 + x2 * b1
-            z2 = x_out * a2 + x2 * b2
-
-            out = torch.max(z1, z2)
-
-        elif self.exp == 2:
-            a1, b1 = torch.split(y, self.out_channels, dim=1)
-            a1 = a1 + self.init_a[0]
-            b1 = b1 + self.init_b[0]
-            out = x_out * a1 + x2 * b1
-        else:
-            raise RuntimeError("Expansion should be 2 or 4.")
-
-        return out
-
-    def _make_divisible(self, v, divisor, min_value=None):
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-        # Make sure that round down does not go down by more than 10%.
-        if new_v < 0.9 * v:
-            new_v += divisor
-        return new_v
-
-
-class SwishLinear(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int):
-        super().__init__()
-        self.linear = nn.Sequential(
-            nn.Linear(in_channels, out_channels), nn.BatchNorm1d(out_channels), HSwish()
-        )
-
-    def forward(self, x: Tensor):
-        return self.linear(x)
-
-
-class SpatialSepConvSF(nn.Module):
-    def __init__(
-        self, in_channels: int, outs: tuple[int, int], kernel_size: int, stride: int
-    ):
-        super().__init__()
-        out_channels1, out_channels2 = outs
-        self.conv = nn.Sequential(
-            nn.Conv2d(
-                in_channels,
-                out_channels1,
-                (kernel_size, 1),
-                (stride, 1),
-                (kernel_size // 2, 0),
-                bias=False,
-            ),
-            nn.BatchNorm2d(out_channels1),
-            nn.Conv2d(
-                out_channels1,
-                out_channels1 * out_channels2,
-                (1, kernel_size),
-                (1, stride),
-                (0, kernel_size // 2),
-                groups=out_channels1,
-                bias=False,
-            ),
-            nn.BatchNorm2d(out_channels1 * out_channels2),
-            ChannelShuffle(out_channels1),
-        )
-
-    def forward(self, x: Tensor):
-        return self.conv(x)
-
-
-class Stem(nn.Module):
-    def __init__(self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)):
-        super().__init__()
-        self.stem = nn.Sequential(
-            SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True)
-        )
-
-    def forward(self, x: Tensor):
-        return self.stem(x)
-
-
-class DepthSpatialSepConv(nn.Module):
-    def __init__(
-        self, in_channels: int, expand: tuple[int, int], kernel_size: int, stride: int
-    ):
-        super().__init__()
-        exp1, exp2 = expand
-        intermediate_channels = in_channels * exp1
-        out_channels = in_channels * exp1 * exp2
-
-        self.conv = nn.Sequential(
-            nn.Conv2d(
-                in_channels,
-                intermediate_channels,
-                (kernel_size, 1),
-                (stride, 1),
-                (kernel_size // 2, 0),
-                groups=in_channels,
-                bias=False,
-            ),
-            nn.BatchNorm2d(intermediate_channels),
-            nn.Conv2d(
-                intermediate_channels,
-                out_channels,
-                (1, kernel_size),
-                (1, stride),
-                (0, kernel_size // 2),
-                groups=intermediate_channels,
-                bias=False,
-            ),
-            nn.BatchNorm2d(out_channels),
-        )
-
-    def forward(self, x: Tensor):
-        return self.conv(x)
-
-
-MICRONET_VARIANTS_SETTINGS = {
-    "M1": [
-        6,  # stem_ch
-        [3, 2],  # stem_groups
-        960,  # out_ch
-        [1.0, 1.0],  # init_a
-        [0.0, 0.0],  # init_b
-        [1, 2, 4, 7],  # out indices
-        [8, 16, 32, 576],
-        [
-            # s, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
-            [2, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1],
-            [2, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1],
-            [
-                2,
-                16,
-                5,
-                2,
-                2,
-                0,
-                16,
-                16,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                1,
-                32,
-                5,
-                1,
-                6,
-                4,
-                4,
-                32,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                2,
-                64,
-                5,
-                1,
-                6,
-                8,
-                8,
-                64,
-                8,
-                8,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                1,
-                96,
-                3,
-                1,
-                6,
-                8,
-                8,
-                96,
-                8,
-                8,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2],  # 96->96(4,24)->576
-        ],
-    ],
-    "M2": [
-        8,
-        [4, 2],
-        1024,
-        [1.0, 1.0],
-        [0.0, 0.0],
-        [1, 3, 6, 9],
-        [12, 24, 64, 768],
-        [
-            # s,  c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
-            [
-                2,
-                12,
-                3,
-                2,
-                2,
-                0,
-                8,
-                12,
-                4,
-                4,
-                2,
-                0,
-                1,
-                1,
-            ],
-            [
-                2,
-                16,
-                3,
-                2,
-                2,
-                0,
-                12,
-                16,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                1,
-                24,
-                3,
-                2,
-                2,
-                0,
-                16,
-                24,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                2,
-                32,
-                5,
-                1,
-                6,
-                6,
-                6,
-                32,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                1,
-                32,
-                5,
-                1,
-                6,
-                8,
-                8,
-                32,
-                4,
-                4,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [
-                1,
-                64,
-                5,
-                1,
-                6,
-                8,
-                8,
-                64,
-                8,
-                8,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [
-                2,
-                96,
-                5,
-                1,
-                6,
-                8,
-                8,
-                96,
-                8,
-                8,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [
-                1,
-                128,
-                3,
-                1,
-                6,
-                12,
-                12,
-                128,
-                8,
-                8,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2],
-        ],
-    ],
-    "M3": [
-        12,
-        [4, 3],
-        1024,
-        [1.0, 0.5],
-        [0.0, 0.5],
-        [1, 3, 8, 12],
-        [16, 24, 80, 864],
-        [
-            # s,  c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
-            [
-                2,
-                16,
-                3,
-                2,
-                2,
-                0,
-                12,
-                16,
-                4,
-                4,
-                0,
-                2,
-                0,
-                1,
-            ],
-            [
-                2,
-                24,
-                3,
-                2,
-                2,
-                0,
-                16,
-                24,
-                4,
-                4,
-                0,
-                2,
-                0,
-                1,
-            ],
-            [
-                1,
-                24,
-                3,
-                2,
-                2,
-                0,
-                24,
-                24,
-                4,
-                4,
-                0,
-                2,
-                0,
-                1,
-            ],
-            [
-                2,
-                32,
-                5,
-                1,
-                6,
-                6,
-                6,
-                32,
-                4,
-                4,
-                0,
-                2,
-                0,
-                1,
-            ],
-            [
-                1,
-                32,
-                5,
-                1,
-                6,
-                8,
-                8,
-                32,
-                4,
-                4,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                64,
-                5,
-                1,
-                6,
-                8,
-                8,
-                48,
-                8,
-                8,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                80,
-                5,
-                1,
-                6,
-                8,
-                8,
-                80,
-                8,
-                8,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                80,
-                5,
-                1,
-                6,
-                10,
-                10,
-                80,
-                8,
-                8,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                2,
-                120,
-                5,
-                1,
-                6,
-                10,
-                10,
-                120,
-                10,
-                10,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                120,
-                5,
-                1,
-                6,
-                12,
-                12,
-                120,
-                10,
-                10,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                144,
-                3,
-                1,
-                6,
-                12,
-                12,
-                144,
-                12,
-                12,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [1, 864, 3, 1, 6, 12, 12, 0, 0, 0, 0, 2, 0, 2],
-        ],
-    ],
-}
diff --git a/luxonis_train/nodes/mobilenetv2.py b/luxonis_train/nodes/mobilenetv2.py
deleted file mode 100644
index 27fe87ec..00000000
--- a/luxonis_train/nodes/mobilenetv2.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""MobileNetV2 backbone.
-
-TODO: source?
-"""
-
-import torchvision
-from torch import Tensor
-
-from .base_node import BaseNode
-
-
-class MobileNetV2(BaseNode[Tensor, list[Tensor]]):
-    """Implementation of the MobileNetV2 backbone.
-
-    TODO: add more info
-    """
-
-    attach_index: int = -1
-
-    def __init__(self, download_weights: bool = False, **kwargs):
-        """Constructor of the MobileNetV2 backbone.
-
-        @type download_weights: bool
-        @param download_weights: If True download weights from imagenet. Defaults to
-            False.
-        @type kwargs: Any
-        @param kwargs: Additional arguments to pass to L{BaseNode}.
-        """
-        super().__init__(**kwargs)
-
-        mobilenet_v2 = torchvision.models.mobilenet_v2(
-            weights="DEFAULT" if download_weights else None
-        )
-        self.out_indices = [3, 6, 13, 17]
-        self.channels = [24, 32, 96, 320]
-        self.backbone = mobilenet_v2
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
-        for i, m in enumerate(self.backbone.features):
-            x = m(x)
-            if i in self.out_indices:
-                outs.append(x)
-
-        return outs
diff --git a/luxonis_train/nodes/mobileone.py b/luxonis_train/nodes/mobileone.py
deleted file mode 100644
index e92d3225..00000000
--- a/luxonis_train/nodes/mobileone.py
+++ /dev/null
@@ -1,430 +0,0 @@
-"""MobileOne backbone.
-
-Soure: U{https://github.com/apple/ml-mobileone} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-"""
-
-
-from typing import Literal
-
-import torch
-from torch import Tensor, nn
-
-from luxonis_train.nodes.blocks import ConvModule, SqueezeExciteBlock
-
-from .base_node import BaseNode
-
-
-class MobileOne(BaseNode[Tensor, list[Tensor]]):
-    """Implementation of MobileOne backbone.
-
-    TODO: add more details
-    """
-
-    attach_index: int = -1
-    in_channels: int
-
-    VARIANTS_SETTINGS: dict[str, dict] = {
-        "s0": {"width_multipliers": (0.75, 1.0, 1.0, 2.0), "num_conv_branches": 4},
-        "s1": {"width_multipliers": (1.5, 1.5, 2.0, 2.5)},
-        "s2": {"width_multipliers": (1.5, 2.0, 2.5, 4.0)},
-        "s3": {"width_multipliers": (2.0, 2.5, 3.0, 4.0)},
-        "s4": {"width_multipliers": (3.0, 3.5, 3.5, 4.0), "use_se": True},
-    }
-
-    def __init__(self, variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0", **kwargs):
-        """Constructor for the MobileOne module.
-
-        @type variant: Literal["s0", "s1", "s2", "s3", "s4"]
-        @param variant: Specifies which variant of the MobileOne network to use. For
-            details, see TODO. Defaults to "s0".
-        """
-        super().__init__(**kwargs)
-
-        if variant not in MobileOne.VARIANTS_SETTINGS.keys():
-            raise ValueError(
-                f"MobileOne model variant should be in {list(MobileOne.VARIANTS_SETTINGS.keys())}"
-            )
-
-        variant_params = MobileOne.VARIANTS_SETTINGS[variant]
-        # TODO: make configurable
-        self.width_multipliers = variant_params["width_multipliers"]
-        self.num_conv_branches = variant_params.get("num_conv_branches", 1)
-        self.num_blocks_per_stage = [2, 8, 10, 1]
-        self.use_se = variant_params.get("use_se", False)
-
-        self.in_planes = min(64, int(64 * self.width_multipliers[0]))
-
-        self.stage0 = MobileOneBlock(
-            in_channels=self.in_channels,
-            out_channels=self.in_planes,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-        )
-        self.cur_layer_idx = 1
-        self.stage1 = self._make_stage(
-            int(64 * self.width_multipliers[0]),
-            self.num_blocks_per_stage[0],
-            num_se_blocks=0,
-        )
-        self.stage2 = self._make_stage(
-            int(128 * self.width_multipliers[1]),
-            self.num_blocks_per_stage[1],
-            num_se_blocks=0,
-        )
-        self.stage3 = self._make_stage(
-            int(256 * self.width_multipliers[2]),
-            self.num_blocks_per_stage[2],
-            num_se_blocks=int(self.num_blocks_per_stage[2] // 2) if self.use_se else 0,
-        )
-        self.stage4 = self._make_stage(
-            int(512 * self.width_multipliers[3]),
-            self.num_blocks_per_stage[3],
-            num_se_blocks=self.num_blocks_per_stage[3] if self.use_se else 0,
-        )
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
-        x = self.stage0(x)
-        outs.append(x)
-        x = self.stage1(x)
-        outs.append(x)
-        x = self.stage2(x)
-        outs.append(x)
-        x = self.stage3(x)
-        outs.append(x)
-
-        return outs
-
-    def export_mode(self, export: bool = True) -> None:
-        """Sets the module to export mode.
-
-        Reparameterizes the model to obtain a plain CNN-like structure for inference.
-        TODO: add more details
-
-        @warning: The reparametrization is destructive and cannot be reversed!
-
-        @type export: bool
-        @param export: Whether to set the export mode to True or False. Defaults to True.
-        """
-        if export:
-            for module in self.modules():
-                if hasattr(module, "reparameterize"):
-                    module.reparameterize()
-
-    def _make_stage(self, planes: int, num_blocks: int, num_se_blocks: int):
-        """Build a stage of MobileOne model.
-
-        @type planes: int
-        @param planes: Number of output channels.
-        @type num_blocks: int
-        @param num_blocks: Number of blocks in this stage.
-        @type num_se_blocks: int
-        @param num_se_blocks: Number of SE blocks in this stage.
-        @rtype: nn.Sequential
-        @return: A stage of MobileOne model.
-        """
-        # Get strides for all layers
-        strides = [2] + [1] * (num_blocks - 1)
-        blocks = []
-        for ix, stride in enumerate(strides):
-            use_se = False
-            if num_se_blocks > num_blocks:
-                raise ValueError(
-                    "Number of SE blocks cannot " "exceed number of layers."
-                )
-            if ix >= (num_blocks - num_se_blocks):
-                use_se = True
-
-            # Depthwise conv
-            blocks.append(
-                MobileOneBlock(
-                    in_channels=self.in_planes,
-                    out_channels=self.in_planes,
-                    kernel_size=3,
-                    stride=stride,
-                    padding=1,
-                    groups=self.in_planes,
-                    use_se=use_se,
-                    num_conv_branches=self.num_conv_branches,
-                )
-            )
-            # Pointwise conv
-            blocks.append(
-                MobileOneBlock(
-                    in_channels=self.in_planes,
-                    out_channels=planes,
-                    kernel_size=1,
-                    stride=1,
-                    padding=0,
-                    groups=1,
-                    use_se=use_se,
-                    num_conv_branches=self.num_conv_branches,
-                )
-            )
-            self.in_planes = planes
-            self.cur_layer_idx += 1
-        return nn.Sequential(*blocks)
-
-
-class MobileOneBlock(nn.Module):
-    """MobileOne building block.
-
-    This block has a multi-branched architecture at train-time and
-    plain-CNN style architecture at inference time For more details,
-    please refer to our paper: U{An Improved One millisecond Mobile
-    Backbone<https://arxiv.org/pdf/2206.04040.pdf>}
-    """
-
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: int,
-        stride: int = 1,
-        padding: int = 0,
-        groups: int = 1,
-        use_se: bool = False,
-        num_conv_branches: int = 1,
-    ):
-        """Construct a MobileOneBlock module.
-
-        @type in_channels: int
-        @param in_channels: Number of channels in the input.
-        @type out_channels: int
-        @param out_channels: Number of channels produced by the block.
-        @type kernel_size: int
-        @param kernel_size: Size of the convolution kernel.
-        @type stride: int
-        @param stride: Stride size. Defaults to 1.
-        @type padding: int
-        @param padding: Zero-padding size. Defaults to 0.
-        @type dilation: int
-        @param dilation: Kernel dilation factor. Defaults to 1.
-        @type groups: int
-        @param groups: Group number. Defaults to 1.
-        @type use_se: bool
-        @param use_se: Whether to use SE-ReLU activations. Defaults to False.
-        @type num_conv_branches: int
-        @param num_conv_branches: Number of linear conv branches. Defaults to 1.
-        """
-        super().__init__()
-
-        self.groups = groups
-        self.stride = stride
-        self.kernel_size = kernel_size
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.num_conv_branches = num_conv_branches
-        self.inference_mode = False
-
-        # Check if SE-ReLU is requested
-        if use_se:
-            self.se = SqueezeExciteBlock(
-                in_channels=out_channels,
-                intermediate_channels=int(out_channels * 0.0625),
-            )
-        else:
-            self.se = nn.Identity()  # type: ignore
-        self.activation = nn.ReLU()
-
-        # Re-parameterizable skip connection
-        self.rbr_skip = (
-            nn.BatchNorm2d(num_features=in_channels)
-            if out_channels == in_channels and stride == 1
-            else None
-        )
-
-        # Re-parameterizable conv branches
-        rbr_conv = list()
-        for _ in range(self.num_conv_branches):
-            rbr_conv.append(
-                ConvModule(
-                    in_channels=self.in_channels,
-                    out_channels=self.out_channels,
-                    kernel_size=kernel_size,
-                    stride=self.stride,
-                    padding=padding,
-                    groups=self.groups,
-                    activation=nn.Identity(),
-                )
-            )
-        self.rbr_conv: list[nn.Sequential] = nn.ModuleList(rbr_conv)  # type: ignore
-
-        # Re-parameterizable scale branch
-        self.rbr_scale = None
-        if kernel_size > 1:
-            self.rbr_scale = ConvModule(
-                in_channels=self.in_channels,
-                out_channels=self.out_channels,
-                kernel_size=1,
-                stride=self.stride,
-                padding=0,
-                groups=self.groups,
-                activation=nn.Identity(),
-            )
-
-    def forward(self, inputs: Tensor):
-        """Apply forward pass."""
-        # Inference mode forward pass.
-        if self.inference_mode:
-            return self.activation(self.se(self.reparam_conv(inputs)))
-
-        # Multi-branched train-time forward pass.
-        # Skip branch output
-        identity_out = 0
-        if self.rbr_skip is not None:
-            identity_out = self.rbr_skip(inputs)
-
-        # Scale branch output
-        scale_out = 0
-        if self.rbr_scale is not None:
-            scale_out = self.rbr_scale(inputs)
-
-        # Other branches
-        out = scale_out + identity_out
-        for ix in range(self.num_conv_branches):
-            out += self.rbr_conv[ix](inputs)
-
-        return self.activation(self.se(out))
-
-    def reparameterize(self):
-        """Following works like U{RepVGG: Making VGG-style ConvNets Great Again
-        <https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched>}
-        architecture used at training time to obtain a plain CNN-like structure
-        for inference.
-        """
-        if self.inference_mode:
-            return
-        kernel, bias = self._get_kernel_bias()
-        self.reparam_conv = nn.Conv2d(
-            in_channels=self.rbr_conv[0][0].in_channels,
-            out_channels=self.rbr_conv[0][0].out_channels,
-            kernel_size=self.rbr_conv[0][0].kernel_size,
-            stride=self.rbr_conv[0][0].stride,
-            padding=self.rbr_conv[0][0].padding,
-            dilation=self.rbr_conv[0][0].dilation,
-            groups=self.rbr_conv[0][0].groups,
-            bias=True,
-        )
-        self.reparam_conv.weight.data = kernel
-        assert self.reparam_conv.bias is not None
-        self.reparam_conv.bias.data = bias
-
-        # Delete un-used branches
-        for para in self.parameters():
-            para.detach_()
-        self.__delattr__("rbr_conv")
-        self.__delattr__("rbr_scale")
-        if hasattr(self, "rbr_skip"):
-            self.__delattr__("rbr_skip")
-
-        self.inference_mode = True
-
-    def _get_kernel_bias(self) -> tuple[Tensor, Tensor]:
-        """Method to obtain re-parameterized kernel and bias.
-        Reference: U{https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83}
-
-        @rtype: tuple[Tensor, Tensor]
-        @return: Tuple of (kernel, bias) after re-parameterization.
-        """
-        # get weights and bias of scale branch
-        kernel_scale = torch.zeros(())
-        bias_scale = torch.zeros(())
-        if self.rbr_scale is not None:
-            kernel_scale, bias_scale = self._fuse_bn_tensor(self.rbr_scale)
-            # Pad scale branch kernel to match conv branch kernel size.
-            pad = self.kernel_size // 2
-            kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad])
-
-        # get weights and bias of skip branch
-        kernel_identity = torch.zeros(())
-        bias_identity = torch.zeros(())
-        if self.rbr_skip is not None:
-            kernel_identity, bias_identity = self._fuse_bn_tensor(self.rbr_skip)
-
-        # get weights and bias of conv branches
-        kernel_conv = torch.zeros(())
-        bias_conv = torch.zeros(())
-        for ix in range(self.num_conv_branches):
-            _kernel, _bias = self._fuse_bn_tensor(self.rbr_conv[ix])
-            kernel_conv = kernel_conv + _kernel
-            bias_conv = bias_conv + _bias
-
-        kernel_final = kernel_conv + kernel_scale + kernel_identity
-        bias_final = bias_conv + bias_scale + bias_identity
-        return kernel_final, bias_final
-
-    def _fuse_bn_tensor(self, branch) -> tuple[Tensor, Tensor]:
-        """Method to fuse batchnorm layer with preceeding conv layer.
-        Reference: U{https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95}
-
-        @rtype: tuple[Tensor, Tensor]
-        @return: Tuple of (kernel, bias) after fusing batchnorm.
-        """
-        if isinstance(branch, nn.Sequential):
-            kernel = branch[0].weight
-            running_mean = branch[1].running_mean
-            running_var = branch[1].running_var
-            gamma = branch[1].weight
-            beta = branch[1].bias
-            eps = branch[1].eps
-        elif isinstance(branch, nn.BatchNorm2d):
-            if not hasattr(self, "id_tensor"):
-                input_dim = self.in_channels // self.groups
-                kernel_value = torch.zeros(
-                    (self.in_channels, input_dim, self.kernel_size, self.kernel_size),
-                    dtype=branch.weight.dtype,
-                    device=branch.weight.device,
-                )
-                for i in range(self.in_channels):
-                    kernel_value[
-                        i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
-                    ] = 1
-                self.id_tensor = kernel_value
-            kernel = self.id_tensor
-            running_mean = branch.running_mean
-            running_var = branch.running_var
-            gamma = branch.weight
-            beta = branch.bias
-            eps = branch.eps
-        else:
-            raise NotImplementedError(
-                "Only nn.BatchNorm2d and nn.Sequential " "are supported."
-            )
-        assert running_var is not None
-        std = (running_var + eps).sqrt()
-        t = (gamma / std).reshape(-1, 1, 1, 1)
-        return kernel * t, beta - running_mean * gamma / std
diff --git a/luxonis_train/nodes/necks/__init__.py b/luxonis_train/nodes/necks/__init__.py
new file mode 100644
index 00000000..eef2e9a0
--- /dev/null
+++ b/luxonis_train/nodes/necks/__init__.py
@@ -0,0 +1,3 @@
+from .reppan_neck import RepPANNeck
+
+__all__ = ["RepPANNeck"]
diff --git a/luxonis_train/nodes/necks/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck.py
new file mode 100644
index 00000000..107151a6
--- /dev/null
+++ b/luxonis_train/nodes/necks/reppan_neck.py
@@ -0,0 +1,163 @@
+from typing import Any, Literal
+
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import RepDownBlock, RepUpBlock
+from luxonis_train.utils import make_divisible
+
+
+class RepPANNeck(BaseNode[list[Tensor], list[Tensor]]):
+    in_channels: list[int]
+
+    def __init__(
+        self,
+        n_heads: Literal[2, 3, 4] = 3,
+        channels_list: list[int] | None = None,
+        n_repeats: list[int] | None = None,
+        depth_mul: float = 0.33,
+        width_mul: float = 0.25,
+        **kwargs: Any,
+    ):
+        """Implementation of the RepPANNeck module.
+
+        Adapted from U{YOLOv6: A Single-Stage Object Detection Framework
+        for Industrial Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
+        It has the balance of feature fusion ability and hardware efficiency.
+
+        @type n_heads: Literal[2,3,4]
+        @param n_heads: Number of output heads. Defaults to 3. B{Note: Should be same
+            also on head in most cases.}
+        @type channels_list: list[int] | None
+        @param channels_list: List of number of channels for each block.
+            Defaults to C{[256, 128, 128, 256, 256, 512]}.
+        @type n_repeats: list[int] | None
+        @param n_repeats: List of number of repeats of RepVGGBlock.
+            Defaults to C{[12, 12, 12, 12]}.
+        @type depth_mul: float
+        @param depth_mul: Depth multiplier. Defaults to C{0.33}.
+        @type width_mul: float
+        @param width_mul: Width multiplier. Defaults to C{0.25}.
+        """
+
+        super().__init__(**kwargs)
+
+        self.n_heads = n_heads
+
+        n_repeats = n_repeats or [12, 12, 12, 12]
+        channels_list = channels_list or [256, 128, 128, 256, 256, 512]
+
+        channels_list = [
+            make_divisible(ch * width_mul, 8) for ch in channels_list
+        ]
+        n_repeats = [
+            (max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats
+        ]
+        channels_list, n_repeats = self._fit_to_n_heads(
+            channels_list, n_repeats
+        )
+
+        self.up_blocks = nn.ModuleList()
+
+        in_channels = self.in_channels[-1]
+        out_channels = channels_list[0]
+        in_channels_next = self.in_channels[-2]
+        curr_n_repeats = n_repeats[0]
+        up_out_channel_list = [in_channels]  # used in DownBlocks
+
+        for i in range(1, n_heads):
+            curr_up_block = RepUpBlock(
+                in_channels=in_channels,
+                in_channels_next=in_channels_next,
+                out_channels=out_channels,
+                n_repeats=curr_n_repeats,
+            )
+            up_out_channel_list.append(out_channels)
+            self.up_blocks.append(curr_up_block)
+            if len(self.up_blocks) == (n_heads - 1):
+                up_out_channel_list.reverse()
+                break
+
+            in_channels = out_channels
+            out_channels = channels_list[i]
+            in_channels_next = self.in_channels[-1 - (i + 1)]
+            curr_n_repeats = n_repeats[i]
+
+        self.down_blocks = nn.ModuleList()
+        channels_list_down_blocks = channels_list[(n_heads - 1) :]
+        n_repeats_down_blocks = n_repeats[(n_heads - 1) :]
+
+        in_channels = out_channels
+        downsample_out_channels = channels_list_down_blocks[0]
+        in_channels_next = up_out_channel_list[0]
+        out_channels = channels_list_down_blocks[1]
+        curr_n_repeats = n_repeats_down_blocks[0]
+
+        for i in range(1, n_heads):
+            curr_down_block = RepDownBlock(
+                in_channels=in_channels,
+                downsample_out_channels=downsample_out_channels,
+                in_channels_next=in_channels_next,
+                out_channels=out_channels,
+                n_repeats=curr_n_repeats,
+            )
+            self.down_blocks.append(curr_down_block)
+            if len(self.down_blocks) == (n_heads - 1):
+                break
+
+            in_channels = out_channels
+            downsample_out_channels = channels_list_down_blocks[2 * i]
+            in_channels_next = up_out_channel_list[i]
+            out_channels = channels_list_down_blocks[2 * i + 1]
+            curr_n_repeats = n_repeats_down_blocks[i]
+
+    def forward(self, inputs: list[Tensor]) -> list[Tensor]:
+        x = inputs[-1]
+        up_block_outs: list[Tensor] = []
+        for up_block, input_ in zip(
+            self.up_blocks, inputs[-2::-1], strict=False
+        ):
+            conv_out, x = up_block(x, input_)
+            up_block_outs.append(conv_out)
+
+        outs = [x]
+        for down_block, up_out in zip(
+            self.down_blocks, reversed(up_block_outs)
+        ):
+            x = down_block(x, up_out)
+            outs.append(x)
+        return outs
+
+    def _fit_to_n_heads(
+        self, channels_list: list[int], n_repeats: list[int]
+    ) -> tuple[list[int], list[int]]:
+        """Fits channels_list and n_repeats to n_heads by removing or
+        adding items.
+
+        Also scales the numbers based on offset
+        """
+        if self.n_heads == 2:
+            channels_list = [channels_list[i] for i in [0, 4, 5]]
+            n_repeats = [n_repeats[0], n_repeats[3]]
+        elif self.n_heads == 3:
+            return channels_list, n_repeats
+        elif self.n_heads == 4:
+            channels_list = [
+                channels_list[0],
+                channels_list[1],
+                channels_list[1] // 2,
+                channels_list[1] // 2,
+                channels_list[1],
+                channels_list[2],
+                channels_list[3],
+                channels_list[4],
+                channels_list[5],
+            ]
+            n_repeats = [n_repeats[i] for i in [0, 1, 1, 2, 2, 3]]
+        else:
+            raise ValueError(
+                f"Specified number of heads ({self.n_heads}) not supported."
+                "The number of heads should be 2, 3 or 4."
+            )
+
+        return channels_list, n_repeats
diff --git a/luxonis_train/nodes/reppan_neck.py b/luxonis_train/nodes/reppan_neck.py
deleted file mode 100644
index 26fed274..00000000
--- a/luxonis_train/nodes/reppan_neck.py
+++ /dev/null
@@ -1,164 +0,0 @@
-"""Implementation of the RepPANNeck module.
-
-Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial
-Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
-It has the balance of feature fusion ability and hardware efficiency.
-"""
-
-
-from typing import Literal, cast
-
-from torch import Tensor, nn
-
-from luxonis_train.nodes.blocks import RepDownBlock, RepUpBlock
-from luxonis_train.utils.general import make_divisible
-
-from .base_node import BaseNode
-
-
-class RepPANNeck(BaseNode[list[Tensor], list[Tensor]]):
-    def __init__(
-        self,
-        num_heads: Literal[2, 3, 4] = 3,
-        channels_list: list[int] | None = None,
-        num_repeats: list[int] | None = None,
-        depth_mul: float = 0.33,
-        width_mul: float = 0.25,
-        **kwargs,
-    ):
-        """Constructor for the RepPANNeck module.
-
-        @type num_heads: Literal[2,3,4]
-        @param num_heads: Number of output heads. Defaults to 3. ***Note: Should be same
-            also on head in most cases.***
-        @type channels_list: list[int] | None
-        @param channels_list: List of number of channels for each block. Defaults to
-            C{[256, 128, 128, 256, 256, 512]}.
-        @type num_repeats: list[int] | None
-        @param num_repeats: List of number of repeats of RepVGGBlock. Defaults to C{[12,
-            12, 12, 12]}.
-        @type depth_mul: float
-        @param depth_mul: Depth multiplier. Defaults to 0.33.
-        @type width_mul: float
-        @param width_mul: Width multiplier. Defaults to 0.25.
-        """
-
-        super().__init__(**kwargs)
-
-        num_repeats = num_repeats or [12, 12, 12, 12]
-        channels_list = channels_list or [256, 128, 128, 256, 256, 512]
-
-        self.num_heads = num_heads
-
-        channels_list = [make_divisible(ch * width_mul, 8) for ch in channels_list]
-        num_repeats = [
-            (max(round(i * depth_mul), 1) if i > 1 else i) for i in num_repeats
-        ]
-        channels_list, num_repeats = self._fit_to_num_heads(channels_list, num_repeats)
-
-        self.up_blocks = nn.ModuleList()
-
-        in_channels = cast(list[int], self.in_channels)[-1]
-        out_channels = channels_list[0]
-        in_channels_next = cast(list[int], self.in_channels)[-2]
-        curr_num_repeats = num_repeats[0]
-        up_out_channel_list = [in_channels]  # used in DownBlocks
-
-        for i in range(1, num_heads):
-            curr_up_block = RepUpBlock(
-                in_channels=in_channels,
-                in_channels_next=in_channels_next,
-                out_channels=out_channels,
-                num_repeats=curr_num_repeats,
-            )
-            up_out_channel_list.append(out_channels)
-            self.up_blocks.append(curr_up_block)
-            if len(self.up_blocks) == (num_heads - 1):
-                up_out_channel_list.reverse()
-                break
-
-            in_channels = out_channels
-            out_channels = channels_list[i]
-            in_channels_next = cast(list[int], self.in_channels)[-1 - (i + 1)]
-            curr_num_repeats = num_repeats[i]
-
-        self.down_blocks = nn.ModuleList()
-        channels_list_down_blocks = channels_list[(num_heads - 1) :]
-        num_repeats_down_blocks = num_repeats[(num_heads - 1) :]
-
-        in_channels = out_channels
-        downsample_out_channels = channels_list_down_blocks[0]
-        in_channels_next = up_out_channel_list[0]
-        out_channels = channels_list_down_blocks[1]
-        curr_num_repeats = num_repeats_down_blocks[0]
-
-        for i in range(1, num_heads):
-            curr_down_block = RepDownBlock(
-                in_channels=in_channels,
-                downsample_out_channels=downsample_out_channels,
-                in_channels_next=in_channels_next,
-                out_channels=out_channels,
-                num_repeats=curr_num_repeats,
-            )
-            self.down_blocks.append(curr_down_block)
-            if len(self.down_blocks) == (num_heads - 1):
-                break
-
-            in_channels = out_channels
-            downsample_out_channels = channels_list_down_blocks[2 * i]
-            in_channels_next = up_out_channel_list[i]
-            out_channels = channels_list_down_blocks[2 * i + 1]
-            curr_num_repeats = num_repeats_down_blocks[i]
-
-    def forward(self, inputs: list[Tensor]) -> list[Tensor]:
-        x0 = inputs[-1]
-        up_block_outs = []
-        for i, up_block in enumerate(self.up_blocks):
-            conv_out, x0 = up_block(x0, inputs[-1 - (i + 1)])
-            up_block_outs.append(conv_out)
-        up_block_outs.reverse()
-
-        outs = [x0]
-        for i, down_block in enumerate(self.down_blocks):
-            x0 = down_block(x0, up_block_outs[i])
-            outs.append(x0)
-        return outs
-
-    def _fit_to_num_heads(
-        self, channels_list: list[int], num_repeats: list[int]
-    ) -> tuple[list[int], list[int]]:
-        """Fits channels_list and num_repeats to num_heads by removing or adding items.
-
-        Also scales the numbers based on offset
-        """
-        if self.num_heads == 3:
-            ...
-        elif self.num_heads == 2:
-            channels_list = [channels_list[0], channels_list[4], channels_list[5]]
-            num_repeats = [num_repeats[0], num_repeats[3]]
-        elif self.num_heads == 4:
-            channels_list = [
-                channels_list[0],
-                channels_list[1],
-                channels_list[1] // 2,
-                channels_list[1] // 2,
-                channels_list[1],
-                channels_list[2],
-                channels_list[3],
-                channels_list[4],
-                channels_list[5],
-            ]
-            num_repeats = [
-                num_repeats[0],
-                num_repeats[1],
-                num_repeats[1],
-                num_repeats[2],
-                num_repeats[2],
-                num_repeats[3],
-            ]
-        else:
-            raise ValueError(
-                f"Specified number of heads ({self.num_heads}) not supported."
-            )
-
-        return channels_list, num_repeats
diff --git a/luxonis_train/nodes/repvgg.py b/luxonis_train/nodes/repvgg.py
deleted file mode 100644
index 44579fa5..00000000
--- a/luxonis_train/nodes/repvgg.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from copy import deepcopy
-
-import torch.utils.checkpoint as checkpoint
-from torch import Tensor, nn
-
-from luxonis_train.nodes.blocks import RepVGGBlock
-
-from .base_node import BaseNode
-
-
-class RepVGG(BaseNode):
-    """Implementation of RepVGG backbone.
-
-    Source: U{https://github.com/DingXiaoH/RepVGG}
-    @license: U{MIT<https://github.com/DingXiaoH/RepVGG/blob/main/LICENSE>}.
-
-    @todo: technical documentation
-    """
-
-    in_channels: int
-
-    VARIANTS_SETTINGS = {
-        "A0": {
-            "num_blocks": [2, 4, 14, 1],
-            "num_classes": 1000,
-            "width_multiplier": [0.75, 0.75, 0.75, 2.5],
-        },
-        "A1": {
-            "num_blocks": [2, 4, 14, 1],
-            "num_classes": 1000,
-            "width_multiplier": [1, 1, 1, 2.5],
-        },
-        "A2": {
-            "num_blocks": [2, 4, 14, 1],
-            "num_classes": 1000,
-            "width_multiplier": [1.5, 1.5, 1.5, 2.75],
-        },
-    }
-
-    def __new__(cls, **kwargs):
-        variant = kwargs.pop("variant", "A0")
-
-        if variant not in RepVGG.VARIANTS_SETTINGS.keys():
-            raise ValueError(
-                f"RepVGG model variant should be in {list(RepVGG.VARIANTS_SETTINGS.keys())}"
-            )
-
-        overrides = deepcopy(kwargs)
-        kwargs.clear()
-        kwargs.update(RepVGG.VARIANTS_SETTINGS[variant])
-        kwargs.update(overrides)
-        return cls.__new__(cls)
-
-    def __init__(
-        self,
-        deploy: bool = False,
-        override_groups_map: dict[int, int] | None = None,
-        use_se: bool = False,
-        use_checkpoint: bool = False,
-        num_blocks: list[int] | None = None,
-        width_multiplier: list[float] | None = None,
-        **kwargs,
-    ):
-        """Constructor for the RepVGG module.
-
-        @type deploy: bool
-        @param deploy: Whether to use the model in deploy mode.
-        @type override_groups_map: dict[int, int] | None
-        @param override_groups_map: Dictionary mapping layer index to number of groups.
-        @type use_se: bool
-        @param use_se: Whether to use Squeeze-and-Excitation blocks.
-        @type use_checkpoint: bool
-        @param use_checkpoint: Whether to use checkpointing.
-        @type num_blocks: list[int] | None
-        @param num_blocks: Number of blocks in each stage.
-        @type width_multiplier: list[float] | None
-        @param width_multiplier: Width multiplier for each stage.
-        """
-        super().__init__(**kwargs)
-        num_blocks = num_blocks or [2, 4, 14, 1]
-        width_multiplier = width_multiplier or [0.75, 0.75, 0.75, 2.5]
-        self.deploy = deploy
-        self.override_groups_map = override_groups_map or {}
-        assert 0 not in self.override_groups_map
-        self.use_se = use_se
-        self.use_checkpoint = use_checkpoint
-
-        self.in_planes = min(64, int(64 * width_multiplier[0]))
-        self.stage0 = RepVGGBlock(
-            in_channels=self.in_channels,
-            out_channels=self.in_planes,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            deploy=self.deploy,
-            use_se=self.use_se,
-        )
-        self.cur_layer_idx = 1
-        self.stage1 = self._make_stage(
-            int(64 * width_multiplier[0]), num_blocks[0], stride=2
-        )
-        self.stage2 = self._make_stage(
-            int(128 * width_multiplier[1]), num_blocks[1], stride=2
-        )
-        self.stage3 = self._make_stage(
-            int(256 * width_multiplier[2]), num_blocks[2], stride=2
-        )
-        self.stage4 = self._make_stage(
-            int(512 * width_multiplier[3]), num_blocks[3], stride=2
-        )
-        self.gap = nn.AdaptiveAvgPool2d(output_size=1)
-
-    def forward(self, inputs: Tensor) -> list[Tensor]:
-        outputs = []
-        out = self.stage0(inputs)
-        for stage in (self.stage1, self.stage2, self.stage3, self.stage4):
-            for block in stage:
-                if self.use_checkpoint:
-                    out = checkpoint.checkpoint(block, out)
-                else:
-                    out = block(out)
-            outputs.append(out)
-        return outputs
-
-    def _make_stage(self, planes: int, num_blocks: int, stride: int):
-        strides = [stride] + [1] * (num_blocks - 1)
-        blocks = []
-        for stride in strides:
-            cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)
-            blocks.append(
-                RepVGGBlock(
-                    in_channels=self.in_planes,
-                    out_channels=planes,
-                    kernel_size=3,
-                    stride=stride,
-                    padding=1,
-                    groups=cur_groups,
-                    deploy=self.deploy,
-                    use_se=self.use_se,
-                )
-            )
-            self.in_planes = planes
-            self.cur_layer_idx += 1
-        return nn.ModuleList(blocks)
diff --git a/luxonis_train/nodes/resnet18.py b/luxonis_train/nodes/resnet18.py
deleted file mode 100644
index 9c38681a..00000000
--- a/luxonis_train/nodes/resnet18.py
+++ /dev/null
@@ -1,59 +0,0 @@
-"""ResNet18 backbone.
-
-Source: U{https://pytorch.org/vision/main/models/generated/
-torchvision.models.resnet18.html}
-@license: U{PyTorch<https://github.com/pytorch/pytorch/blob/master/LICENSE>}
-"""
-
-
-import torchvision
-from torch import Tensor
-
-from .base_node import BaseNode
-
-
-class ResNet18(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
-    def __init__(
-        self,
-        channels_list: list[int] | None = None,
-        download_weights: bool = False,
-        **kwargs,
-    ):
-        """Implementation of the ResNet18 backbone.
-
-        TODO: add more info
-
-        @type channels_list: list[int] | None
-        @param channels_list: List of channels to return.
-            If unset, defaults to [64, 128, 256, 512].
-
-        @type download_weights: bool
-        @param download_weights: If True download weights from imagenet.
-            Defaults to False.
-        """
-        super().__init__(**kwargs)
-
-        self.backbone = torchvision.models.resnet18(
-            weights="DEFAULT" if download_weights else None
-        )
-        self.channels_list = channels_list or [64, 128, 256, 512]
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
-        x = self.backbone.conv1(x)
-        x = self.backbone.bn1(x)
-        x = self.backbone.relu(x)
-        x = self.backbone.maxpool(x)
-
-        x = self.backbone.layer1(x)
-        outs.append(x)
-        x = self.backbone.layer2(x)
-        outs.append(x)
-        x = self.backbone.layer3(x)
-        outs.append(x)
-        x = self.backbone.layer4(x)
-        outs.append(x)
-
-        return outs
diff --git a/luxonis_train/nodes/segmentation_head.py b/luxonis_train/nodes/segmentation_head.py
deleted file mode 100644
index bdfe814d..00000000
--- a/luxonis_train/nodes/segmentation_head.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""Implementation of a basic segmentation head.
-
-Adapted from: U{https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py}
-@license: U{BSD-3 <https://github.com/pytorch/vision/blob/main/LICENSE>}
-"""
-
-
-import torch.nn as nn
-from torch import Tensor
-
-from luxonis_train.nodes.blocks import UpBlock
-from luxonis_train.utils.general import infer_upscale_factor
-from luxonis_train.utils.types import LabelType, Packet
-
-from .base_node import BaseNode
-
-
-class SegmentationHead(BaseNode[Tensor, Tensor]):
-    attach_index: int = -1
-    in_height: int
-    in_channels: int
-
-    def __init__(self, **kwargs):
-        """Basic segmentation FCN head.
-
-        Note that it doesn't ensure that ouptut is same size as input.
-
-        @type kwargs: Any
-        @param kwargs: Additional arguments to pass to L{BaseNode}.
-        """
-        super().__init__(task_type=LabelType.SEGMENTATION, **kwargs)
-
-        original_height = self.original_in_shape[2]
-        num_up = infer_upscale_factor(self.in_height, original_height, strict=False)
-
-        modules = []
-        in_channels = self.in_channels
-        for _ in range(int(num_up)):
-            modules.append(
-                UpBlock(in_channels=in_channels, out_channels=in_channels // 2)
-            )
-            in_channels //= 2
-
-        self.head = nn.Sequential(
-            *modules,
-            nn.Conv2d(in_channels, self.n_classes, kernel_size=1),
-        )
-
-    def wrap(self, output: Tensor) -> Packet[Tensor]:
-        return {"segmentation": [output]}
-
-    def forward(self, inputs: Tensor) -> Tensor:
-        return self.head(inputs)
diff --git a/luxonis_train/optimizers/__init__.py b/luxonis_train/optimizers/__init__.py
new file mode 100644
index 00000000..acd73792
--- /dev/null
+++ b/luxonis_train/optimizers/__init__.py
@@ -0,0 +1 @@
+from .optimizers import *
diff --git a/luxonis_train/utils/optimizers.py b/luxonis_train/optimizers/optimizers.py
similarity index 92%
rename from luxonis_train/utils/optimizers.py
rename to luxonis_train/optimizers/optimizers.py
index 7583cef9..c2a4bf12 100644
--- a/luxonis_train/utils/optimizers.py
+++ b/luxonis_train/optimizers/optimizers.py
@@ -1,4 +1,4 @@
-from torch import optim
+import torch.optim as optim
 
 from luxonis_train.utils.registry import OPTIMIZERS
 
diff --git a/luxonis_train/schedulers/__init__.py b/luxonis_train/schedulers/__init__.py
new file mode 100644
index 00000000..99bcd9d9
--- /dev/null
+++ b/luxonis_train/schedulers/__init__.py
@@ -0,0 +1 @@
+from .schedulers import *
diff --git a/luxonis_train/utils/schedulers.py b/luxonis_train/schedulers/schedulers.py
similarity index 100%
rename from luxonis_train/utils/schedulers.py
rename to luxonis_train/schedulers/schedulers.py
diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py
index 609304c3..0a4861a5 100644
--- a/luxonis_train/utils/__init__.py
+++ b/luxonis_train/utils/__init__.py
@@ -1,5 +1,68 @@
-from .assigners import *
-from .config import *
-from .loaders import *
-from .optimizers import *
-from .schedulers import *
+from .boundingbox import (
+    anchors_for_fpn_features,
+    anchors_from_dataset,
+    batch_probiou,
+    bbox2dist,
+    bbox_iou,
+    compute_iou_loss,
+    dist2bbox,
+    dist2rbbox,
+    match_to_anchor,
+    non_max_suppression,
+    non_max_suppression_obb,
+    probiou,
+    process_bbox_predictions,
+    xywh2xyxy,
+    xywhr2xyxyxyxy,
+    xyxy2xywh,
+    xyxyxyxy2xywhr,
+)
+from .config import Config
+from .dataset_metadata import DatasetMetadata
+from .exceptions import IncompatibleException
+from .general import (
+    get_with_default,
+    infer_upscale_factor,
+    make_divisible,
+    to_shape_packet,
+)
+from .graph import is_acyclic, traverse_graph
+from .keypoints import get_sigmas, process_keypoints_predictions
+from .tracker import LuxonisTrackerPL
+from .types import AttachIndexType, Kwargs, Labels, Packet
+
+__all__ = [
+    "Config",
+    "AttachIndexType",
+    "Kwargs",
+    "Labels",
+    "Packet",
+    "IncompatibleException",
+    "DatasetMetadata",
+    "make_divisible",
+    "infer_upscale_factor",
+    "to_shape_packet",
+    "get_with_default",
+    "LuxonisTrackerPL",
+    "match_to_anchor",
+    "dist2bbox",
+    "bbox2dist",
+    "bbox_iou",
+    "batch_probiou",
+    "xywhr2xyxyxyxy",
+    "xyxyxyxy2xywhr",
+    "probiou",
+    "xywh2xyxy",
+    "xyxy2xywh",
+    "dist2rbbox",
+    "non_max_suppression_obb",
+    "non_max_suppression",
+    "anchors_from_dataset",
+    "anchors_for_fpn_features",
+    "process_bbox_predictions",
+    "compute_iou_loss",
+    "process_keypoints_predictions",
+    "get_sigmas",
+    "is_acyclic",
+    "traverse_graph",
+]
diff --git a/luxonis_train/utils/assigners/__init__.py b/luxonis_train/utils/assigners/__init__.py
deleted file mode 100644
index 4d9bec9f..00000000
--- a/luxonis_train/utils/assigners/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .atts_assigner import ATSSAssigner
-from .tal_assigner import TaskAlignedAssigner
-
-__all__ = ["ATSSAssigner", "TaskAlignedAssigner"]
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boundingbox.py
similarity index 54%
rename from luxonis_train/utils/boxutils.py
rename to luxonis_train/utils/boundingbox.py
index 0d708f79..4c9dab8c 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boundingbox.py
@@ -1,9 +1,10 @@
-"""This module contains various utility functions for working with bounding boxes."""
-
 import math
 from typing import Literal, TypeAlias
 
+import cv2
+import numpy as np
 import torch
+from luxonis_ml.data import LabelType
 from scipy.cluster.vq import kmeans
 from torch import Tensor
 from torchvision.ops import (
@@ -14,24 +15,11 @@
     generalized_box_iou,
 )
 
-from luxonis_train.utils.types import LabelType
+from luxonis_train.loaders import BaseLoaderTorch
 
 IoUType: TypeAlias = Literal["none", "giou", "diou", "ciou", "siou"]
 BBoxFormatType: TypeAlias = Literal["xyxy", "xywh", "cxcywh"]
 
-__all__ = [
-    "anchors_for_fpn_features",
-    "anchors_from_dataset",
-    "bbox2dist",
-    "bbox_iou",
-    "compute_iou_loss",
-    "dist2bbox",
-    "match_to_anchor",
-    "non_max_suppression",
-    "process_bbox_predictions",
-    "process_keypoints_predictions",
-]
-
 
 def match_to_anchor(
     targets: Tensor,
@@ -76,12 +64,20 @@ def match_to_anchor(
     # The boxes and keypoints need to be scaled to the size of the features
     # First two indices are batch index and class label,
     # last index is anchor index. Those are not scaled.
-    scale_length = 2 * n_keypoints + box_offset + 2
+    scale_length = 3 * n_keypoints + box_offset + 2
     scales = torch.ones(scale_length, device=targets.device)
-    scales[2 : scale_length - 1] = torch.tensor(
-        [scale_width, scale_height] * (n_keypoints + 2)
+
+    # Scale box and keypoint coordinates, but not visibility
+    for i in range(n_keypoints):
+        scales[box_offset + 1 + 3 * i] = scale_width
+        scales[box_offset + 2 + 3 * i] = scale_height
+
+    scales[2 : box_offset + 1] = torch.tensor(
+        [scale_width, scale_height, scale_width, scale_height]
     )
+
     scaled_targets = targets * scales
+
     if targets.size(1) == 0:
         return targets[0], torch.zeros(1, device=targets.device)
 
@@ -135,6 +131,29 @@ def dist2bbox(
     return bbox
 
 
+def dist2rbbox(
+    distance: Tensor,
+    pred_angles: Tensor,
+    anchor_points: Tensor,
+) -> Tensor:
+    """Transform distance (ltrb) to a rotated bounding box in "xcycwh"
+    format.
+
+    @type distance: Tensor
+    @param distance: Distance predictions
+    @type anchor_points: Tensor
+    @param anchor_points: Head's anchor points
+    @rtype: Tensor
+    @return: BBoxes in "xcycwh" format
+    """
+    lt, rb = torch.split(distance, 2, -1)
+    cos, sin = torch.cos(pred_angles), torch.sin(pred_angles)
+    xf, yf = ((rb - lt) / 2).split(1, dim=-1)
+    x, y = xf * cos - yf * sin, xf * sin + yf * cos
+    xy = torch.cat([x, y], dim=-1) + anchor_points
+    return torch.cat([xy, lt + rb], dim=-1)
+
+
 def bbox2dist(bbox: Tensor, anchor_points: Tensor, reg_max: float) -> Tensor:
     """Transform bbox(xyxy) to distance(ltrb).
 
@@ -154,6 +173,114 @@ def bbox2dist(bbox: Tensor, anchor_points: Tensor, reg_max: float) -> Tensor:
     return dist
 
 
+def xyxyxyxy2xywhr(x: np.ndarray | Tensor) -> np.ndarray | Tensor:
+    """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2,
+    xy3, xy4] to [xywh, rotation]. Rotation values are returned in
+    radians from 0 to pi/2.
+
+    @type x: np.ndarray | Tensor
+    @param x: Input box corners [xy1, xy2, xy3, xy4] of shape (n, 8).
+    @rtype: np.ndarray | Tensor
+    @return: Converted data in [cx, cy, w, h, rotation] format of shape
+        (n, 5).
+    """
+    is_torch = isinstance(x, Tensor)
+    points = x.cpu().numpy() if is_torch else x
+    points = points.reshape(len(x), -1, 2)
+    rboxes = []
+    for pts in points:
+        # NOTE: Use cv2.minAreaRect to get accurate xywhr,
+        # especially some objects are cut off by augmentations in dataloader.
+        (cx, cy), (w, h), angle = cv2.minAreaRect(pts)
+        rboxes.append([cx, cy, w, h, angle / 180 * np.pi])
+    return (
+        torch.tensor(rboxes, device=x.device, dtype=x.dtype)
+        if is_torch
+        else np.asarray(rboxes)
+    )
+
+
+def xywhr2xyxyxyxy(x: Tensor) -> np.ndarray | Tensor:
+    """Convert batched Oriented Bounding Boxes (OBB) from [xywh,
+    rotation] to [xy1, xy2, xy3, xy4]. Rotation values should be in
+    radians from 0 to pi/2.
+
+    @type x: Tensor
+    @param x: Boxes in [cx, cy, w, h, rotation] format of shape (n, 5)
+        or (b, n, 5).
+    @rtype: numpy.ndarray | Tensor
+    @return: Converted corner points of shape (n, 4, 2) or (b, n, 4, 2).
+    """
+    cos, sin, cat, stack = (
+        (torch.cos, torch.sin, torch.cat, torch.stack)
+        if isinstance(x, Tensor)
+        else (np.cos, np.sin, np.concatenate, np.stack)
+    )
+
+    ctr = x[..., :2]
+    w, h, angle = (x[..., i : i + 1] for i in range(2, 5))
+    cos_value, sin_value = cos(angle), sin(angle)
+    vec1 = [w / 2 * cos_value, w / 2 * sin_value]
+    vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
+    vec1 = cat(vec1, -1)
+    vec2 = cat(vec2, -1)
+    pt1 = ctr + vec1 + vec2
+    pt2 = ctr + vec1 - vec2
+    pt3 = ctr - vec1 - vec2
+    pt4 = ctr - vec1 + vec2
+    return stack([pt1, pt2, pt3, pt4], -2)
+
+
+def xyxy2xywh(x: Tensor) -> Tensor:
+    """Convert bounding box coordinates from (x1, y1, x2, y2) format to
+    (x, y, width, height) format where (x1, y1) is the top-left corner
+    and (x2, y2) is the bottom-right corner.
+
+    @type x: Tensor
+    @param x: The input bounding box coordinates in (x1, y1, x2, y2)
+        format.
+    @rtype: Tensor
+    @return: The bounding box coordinates in (x, y, width, height)
+        format.
+    """
+    assert (
+        x.shape[-1] == 4
+    ), f"input shape last dimension expected 4 but input shape is {x.shape}"
+    y = (
+        torch.empty_like(x) if isinstance(x, Tensor) else np.empty_like(x)
+    )  # faster than clone/copy
+    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
+    y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
+    y[..., 2] = x[..., 2] - x[..., 0]  # width
+    y[..., 3] = x[..., 3] - x[..., 1]  # height
+    return y
+
+
+def xywh2xyxy(x: Tensor) -> Tensor:
+    """Convert bounding box coordinates from (x, y, width, height)
+    format to (x1, y1, x2, y2) format where (x1, y1) is the top-left
+    corner and (x2, y2) is the bottom-right corner. Note: ops per 2
+    channels faster than per channel.
+
+    @type x: Tensor
+    @param x: The input bounding box coordinates in (x, y, width,
+        height) format.
+    @rtype: Tensor
+    @return: The bounding box coordinates in (x1, y1, x2, y2) format.
+    """
+    assert (
+        x.shape[-1] == 4
+    ), f"input shape last dimension expected 4 but input shape is {x.shape}"
+    y = (
+        torch.empty_like(x) if isinstance(x, Tensor) else np.empty_like(x)
+    )  # faster than clone/copy
+    xy = x[..., :2]  # centers
+    wh = x[..., 2:] / 2  # half width-height
+    y[..., :2] = xy - wh  # top left xy
+    y[..., 2:] = xy + wh  # bottom right xy
+    return y
+
+
 def bbox_iou(
     bbox1: Tensor,
     bbox2: Tensor,
@@ -169,8 +296,21 @@ def bbox_iou(
     @param bbox2: Second set of bboxes [M, 4].
     @type bbox_format: BBoxFormatType
     @param bbox_format: Input bbox format. Defaults to "xyxy".
-    @type iou_type: IoUType
+    @type iou_type: Literal["none", "giou", "diou", "ciou", "siou"]
     @param iou_type: IoU type. Defaults to "none".
+        Possible values are:
+            - "none": standard IoU
+            - "giou": Generalized IoU
+            - "diou": Distance IoU
+            - "ciou": Complete IoU. Introduced in U{
+                Enhancing Geometric Factors in Model Learning and
+                Inference for Object Detection and Instance
+                Segmentation<https://arxiv.org/pdf/2005.03572.pdf>}.
+                Implementation adapted from torchvision C{complete_box_iou}
+                with improved stability.
+            - "siou": Soft IoU. Introduced in U{
+                SIoU Loss: More Powerful Learning for Bounding Box
+                Regression<https://arxiv.org/pdf/2205.12740.pdf>}.
     @type element_wise: bool
     @param element_wise: If True returns element wise IoUs. Defaults to False.
     @rtype: Tensor
@@ -188,9 +328,6 @@ def bbox_iou(
     elif iou_type == "diou":
         iou = distance_box_iou(bbox1, bbox2)
     elif iou_type == "ciou":
-        # CIoU from `Enhancing Geometric Factors in Model Learning and Inference for
-        # Object Detection and Instance Segmentation`, https://arxiv.org/pdf/2005.03572.pdf.
-        # Implementation adapted from torchvision complete_box_iou with added eps for stability
         eps = 1e-7
 
         iou = bbox_iou(bbox1, bbox2, iou_type="none")
@@ -209,9 +346,6 @@ def bbox_iou(
         iou = diou - alpha * v
 
     elif iou_type == "siou":
-        # SIoU from `SIoU Loss: More Powerful Learning for Bounding Box Regression`,
-        # https://arxiv.org/pdf/2205.12740.pdf
-
         eps = 1e-7
         bbox1_xywh = box_convert(bbox1, in_fmt="xyxy", out_fmt="xywh")
         w1, h1 = bbox1_xywh[:, 2], bbox1_xywh[:, 3]
@@ -238,7 +372,9 @@ def bbox_iou(
         sin_alpha_1 = torch.abs(s_cw) / sigma
         sin_alpha_2 = torch.abs(s_ch) / sigma
         threshold = pow(2, 0.5) / 2
-        sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1)
+        sin_alpha = torch.where(
+            sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1
+        )
         angle_cost = torch.cos(torch.arcsin(sin_alpha) * 2 - math.pi / 2)
 
         # distance cost
@@ -266,6 +402,132 @@ def bbox_iou(
         return iou
 
 
+def probiou(
+    obb1: Tensor, obb2: Tensor, CIoU: bool = False, eps: float = 1e-7
+) -> Tensor:
+    """Calculate probabilistic IoU between oriented bounding boxes.
+
+    Implements the algorithm from
+    https://arxiv.org/pdf/2106.06072v1.pdf.
+
+    @type obb1: Tensor
+    @param obb1: Ground truth OBBs, shape (N, 5), format xywhr.
+    @type obb2: Tensor
+    @param obb2: Predicted OBBs, shape (N, 5), format xywhr.
+    @type CIoU: bool
+    @param CIoU: If True, calculate CIoU. Defaults to False.
+    @type eps: float
+    @param eps: Small value to avoid division by zero. Defaults to 1e-7.
+    @rtype: Tensor
+    @return: OBB similarities, shape (N,).
+    @note: OBB format: [center_x, center_y, width, height,
+        rotation_angle]. If CIoU is True, returns CIoU instead of IoU.
+    """
+    x1, y1 = obb1[..., :2].split(1, dim=-1)
+    x2, y2 = obb2[..., :2].split(1, dim=-1)
+    a1, b1, c1 = _get_covariance_matrix(obb1)
+    a2, b2, c2 = _get_covariance_matrix(obb2)
+
+    t1 = (
+        ((a1 + a2) * (y1 - y2).pow(2) + (b1 + b2) * (x1 - x2).pow(2))
+        / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
+    ) * 0.25
+    t2 = (
+        ((c1 + c2) * (x2 - x1) * (y1 - y2))
+        / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
+    ) * 0.5
+    t3 = (
+        ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))
+        / (
+            4
+            * (
+                (a1 * b1 - c1.pow(2)).clamp_(0)
+                * (a2 * b2 - c2.pow(2)).clamp_(0)
+            ).sqrt()
+            + eps
+        )
+        + eps
+    ).log() * 0.5
+    bd = (t1 + t2 + t3).clamp(eps, 100.0)
+    hd = (1.0 - (-bd).exp() + eps).sqrt()
+    iou = 1 - hd
+    if CIoU:  # only include the wh aspect ratio part
+        w1, h1 = obb1[..., 2:4].split(1, dim=-1)
+        w2, h2 = obb2[..., 2:4].split(1, dim=-1)
+        v = (4 / math.pi**2) * ((w2 / h2).atan() - (w1 / h1).atan()).pow(2)
+        with torch.no_grad():
+            alpha = v / (v - iou + (1 + eps))
+        return iou - v * alpha  # CIoU
+    return iou
+
+
+def batch_probiou(obb1: Tensor, obb2: Tensor, eps: float = 1e-7) -> Tensor:
+    """Calculate the probabilistic IoU between oriented bounding boxes,
+    https://arxiv.org/pdf/2106.06072v1.pdf.
+
+    @type obb1: Tensor
+    @param obb1: A tensor of shape (N, 5) representing ground truth OBBs, with xywhr format.
+    @type obb2: Tensor
+    @param obb2: A tensor of shape (M, 5) representing predicted OBBs, with xywhr format.
+    @type eps: float
+    @param eps: A small value to avoid division by zero. Defaults to 1e-7.
+
+    @rtype: Tensor
+    @return: A tensor of shape (N, M) representing OBB similarities.
+    """
+    obb1 = torch.from_numpy(obb1) if isinstance(obb1, np.ndarray) else obb1
+    obb2 = torch.from_numpy(obb2) if isinstance(obb2, np.ndarray) else obb2
+
+    x1, y1 = obb1[..., :2].split(1, dim=-1)
+    x2, y2 = (x.squeeze(-1)[None] for x in obb2[..., :2].split(1, dim=-1))
+    a1, b1, c1 = _get_covariance_matrix(obb1)
+    a2, b2, c2 = (x.squeeze(-1)[None] for x in _get_covariance_matrix(obb2))
+
+    t1 = (
+        ((a1 + a2) * (y1 - y2).pow(2) + (b1 + b2) * (x1 - x2).pow(2))
+        / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
+    ) * 0.25
+    t2 = (
+        ((c1 + c2) * (x2 - x1) * (y1 - y2))
+        / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
+    ) * 0.5
+    t3 = (
+        ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))
+        / (
+            4
+            * (
+                (a1 * b1 - c1.pow(2)).clamp_(0)
+                * (a2 * b2 - c2.pow(2)).clamp_(0)
+            ).sqrt()
+            + eps
+        )
+        + eps
+    ).log() * 0.5
+    bd = (t1 + t2 + t3).clamp(eps, 100.0)
+    hd = (1.0 - (-bd).exp() + eps).sqrt()
+    return 1 - hd
+
+
+def _get_covariance_matrix(boxes: Tensor) -> tuple[Tensor, ...]:
+    """Generate covariance matrix from OBBs.
+
+    @type boxes: Tensor
+    @param boxes: A tensor of shape (N, 5) representing rotated bounding
+        boxes, with xywhr format.
+    @rtype: tuple(Tensor)
+    @return: Covariance matrices corresponding to original rotated
+        bounding boxes.
+    """
+    # Gaussian bounding boxes, ignore the center points (the first two columns) because they are not needed here.
+    gbbs = torch.cat((boxes[:, 2:4].pow(2) / 12, boxes[:, 4:]), dim=-1)
+    a, b, c = gbbs.split(1, dim=-1)
+    cos = c.cos()
+    sin = c.sin()
+    cos2 = cos.pow(2)
+    sin2 = sin.pow(2)
+    return a * cos2 + b * sin2, a * sin2 + b * cos2, (a - b) * cos * sin
+
+
 def non_max_suppression(
     preds: Tensor,
     n_classes: int,
@@ -278,7 +540,8 @@ def non_max_suppression(
     max_det: int = 300,
     predicts_objectness: bool = True,
 ) -> list[Tensor]:
-    """Non-maximum suppression on model's predictions to keep only best instances.
+    """Non-maximum suppression on model's predictions to keep only best
+    instances.
 
     @type preds: Tensor
     @param preds: Model's prediction tensor of shape [bs, N, M].
@@ -331,7 +594,9 @@ def non_max_suppression(
             torch.max(preds[..., 5 : 5 + n_classes], dim=-1)[0] > conf_thres,
         )
 
-    output = [torch.zeros((0, preds.size(-1)), device=preds.device)] * preds.size(0)
+    output = [
+        torch.zeros((0, preds.size(-1)), device=preds.device)
+    ] * preds.size(0)
 
     for i, x in enumerate(preds):
         curr_out = x[candidate_mask[i]]
@@ -354,7 +619,9 @@ def non_max_suppression(
 
         if multi_label:
             box_idx, class_idx = (
-                (curr_out[:, 5 : 5 + n_classes] > conf_thres).nonzero(as_tuple=False).T
+                (curr_out[:, 5 : 5 + n_classes] > conf_thres)
+                .nonzero(as_tuple=False)
+                .T
             )
             keep_mask[box_idx] = True
             curr_out = torch.cat(
@@ -366,9 +633,13 @@ def non_max_suppression(
                 1,
             )
         else:
-            conf, class_idx = curr_out[:, 5 : 5 + n_classes].max(1, keepdim=True)
+            conf, class_idx = curr_out[:, 5 : 5 + n_classes].max(
+                1, keepdim=True
+            )
             keep_mask[conf.view(-1) > conf_thres] = True
-            curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[keep_mask]
+            curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[
+                keep_mask
+            ]
 
         if has_additional:
             curr_out = torch.hstack(
@@ -399,39 +670,215 @@ def non_max_suppression(
     return output
 
 
+def non_max_suppression_obb(
+    preds: Tensor,
+    n_classes: int,
+    conf_thres: float = 0.25,
+    iou_thres: float = 0.45,
+    keep_classes: list[int] | None = None,
+    agnostic: bool = False,
+    multi_label: bool = False,
+    max_det: int = 300,
+    predicts_objectness: bool = True,
+) -> list[Tensor]:
+    """Non-maximum suppression on model's predictions to keep only best
+    instances for oriented bounding boxes (obb).
+
+    @type preds: Tensor
+    @param preds: Model's prediction tensor of shape [bs, N, M]. Bounding boxes are in xywhr format.
+    @type n_classes: int
+    @param n_classes: Number of model's classes.
+    @type conf_thres: float
+    @param conf_thres: Boxes with confidence higher than this will be kept. Defaults to
+        0.25.
+    @type iou_thres: float
+    @param iou_thres: Boxes with IoU higher than this will be discarded. Defaults to
+        0.45.
+    @type keep_classes: list[int] | None
+    @param keep_classes: Subset of classes to keep, if None then keep all of them.
+        Defaults to None.
+    @type agnostic: bool
+    @param agnostic: Whether perform NMS per class or treat all classes the same.
+        Defaults to False.
+    @type multi_label: bool
+    @param multi_label: Whether one prediction can have multiple labels. Defaults to
+        False.
+    @type max_det: int
+    @param max_det: Number of maximum output detections. Defaults to 300.
+    @type predicts_objectness: bool
+    @param predicts_objectness: Whether head predicts objectness confidence. Defaults to
+        True.
+    @rtype: list[Tensor]
+    @return: list of kept detections for each image, boxes in "xywhr" format. Tensors
+        with shape [n_kept, M]
+    """
+    if not (0 <= conf_thres <= 1):
+        raise ValueError(
+            f"Confidence threshold must be in range [0,1] but set to {conf_thres}."
+        )
+    if not (0 <= iou_thres <= 1):
+        raise ValueError(
+            f"IoU threshold must be in range [0,1] but set to {iou_thres}."
+        )
+
+    multi_label &= n_classes > 1
+
+    candidate_mask = preds[..., 5] > conf_thres  # all True
+    if not predicts_objectness:
+        candidate_mask = torch.logical_and(
+            candidate_mask,
+            torch.max(preds[..., 6 : 6 + n_classes], dim=-1)[0] > conf_thres,
+        )
+
+    # output = [torch.zeros((0, preds.size(-1)), device=preds.device)] * preds.size(0)
+    output = [torch.zeros((0, 7), device=preds.device)] * preds.size(
+        0
+    )  # [x, y, w, h, conf, cls_idx]
+
+    for i, x in enumerate(preds):
+        curr_out = x[candidate_mask[i]]
+
+        if curr_out.size(0) == 0:
+            continue
+
+        if predicts_objectness:
+            if n_classes == 1:
+                curr_out[:, 5 : 5 + n_classes] = curr_out[
+                    :, 4:5
+                ]  # not changed (non_max_suppression)
+            else:
+                curr_out[:, 5 : 5 + n_classes] *= curr_out[
+                    :, 4:5
+                ]  # not changed (non_max_suppression)
+
+        else:
+            curr_out[:, 6 : 6 + n_classes] *= curr_out[:, 5:6]
+
+        bboxes = curr_out[:, :5]
+        keep_mask = torch.zeros(bboxes.size(0)).bool()
+
+        if multi_label:
+            box_idx, class_idx = (
+                (curr_out[:, 6 : 6 + n_classes] > conf_thres)
+                .nonzero(as_tuple=False)
+                .T
+            )
+            keep_mask[box_idx] = True
+            curr_out = torch.cat(
+                (
+                    bboxes[keep_mask],
+                    curr_out[keep_mask, class_idx + 5, None],  # why 5?
+                    class_idx[:, None].float(),
+                ),
+                1,
+            )
+        else:
+            conf, class_idx = curr_out[:, 6 : 6 + n_classes].max(
+                1, keepdim=True
+            )
+            keep_mask[conf.view(-1) > conf_thres] = True
+            curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[
+                keep_mask
+            ]
+
+        if keep_classes is not None:
+            curr_out = curr_out[
+                (
+                    curr_out[:, 6:7]
+                    == torch.tensor(keep_classes, device=curr_out.device)
+                ).any(1)
+            ]
+
+        if not curr_out.size(0):
+            continue
+
+        keep_indices = batched_nms_obb(
+            boxes=curr_out[:, :5],
+            scores=curr_out[:, 5],
+            idxs=curr_out[:, 6].int() * (0 if agnostic else 1),
+            iou_threshold=iou_thres,
+        )
+
+        keep_indices = keep_indices[:max_det]
+
+        output[i] = curr_out[keep_indices]
+
+    return output
+
+
+def batched_nms_obb(
+    boxes: Tensor,
+    scores: Tensor,
+    idxs: Tensor,
+    iou_threshold: float,
+) -> Tensor:
+    # Based on Detectron2 implementation, just manually call nms() on each class independently
+    keep_mask = torch.zeros_like(scores, dtype=torch.bool)
+    for class_id in torch.unique(idxs):
+        curr_indices = torch.where(idxs == class_id)[0]
+        curr_keep_indices = batched_nms_rotated(
+            boxes[curr_indices], scores[curr_indices], iou_threshold
+        )
+        keep_mask[curr_indices[curr_keep_indices]] = True
+    keep_indices = torch.where(keep_mask)[0]
+    return keep_indices[scores[keep_indices].sort(descending=True)[1]]
+
+
+def batched_nms_rotated(
+    boxes: Tensor, scores: Tensor, threshold: float = 0.45
+) -> Tensor | np.ndarray:
+    """NMS for oriented bounding boxes using Probiou and Fast-NMS.
+
+    @type boxes: Tensor
+    @param boxes: Rotated bounding boxes, shape (N, 5), format xywhr.
+    @type scores: Tensor
+    @param scores: Confidence scores, shape (N,).
+    @type threshold: float
+    @param threshold: IoU threshold. Defaults to 0.45.
+    @rtype: Tensor | np.ndarray
+    @return: Indices of boxes to keep after NMS.
+    """
+    if len(boxes) == 0:
+        return np.empty((0,), dtype=np.int8)
+    sorted_idx = torch.argsort(scores, descending=True)
+    boxes = boxes[sorted_idx]
+    ious = batch_probiou(boxes, boxes).triu_(diagonal=1)
+    pick = torch.nonzero(ious.max(dim=0)[0] < threshold).squeeze_(-1)
+    return sorted_idx[pick]
+
+
 def anchors_from_dataset(
-    loader: torch.utils.data.DataLoader,
+    loader: BaseLoaderTorch,
     n_anchors: int = 9,
     n_generations: int = 1000,
     ratio_threshold: float = 4.0,
 ) -> tuple[Tensor, float]:
-    """Generates anchors based on bounding box annotations present in provided data
-    loader. It uses K-Means for initial proposals which are then refined with genetic
-    algorithm.
+    """Generates anchors based on bounding box annotations present in
+    provided data loader. It uses K-Means for initial proposals which
+    are then refined with genetic algorithm.
 
     @type loader: L{torch.utils.data.DataLoader}
     @param loader: Data loader.
     @type n_anchors: int
-    @param n_anchors: Number of anchors, this is normally num_heads * 3 which generates
-        3 anchors per layer. Defaults to 9.
+    @param n_anchors: Number of anchors, this is normally n_heads * 3
+        which generates 3 anchors per layer. Defaults to 9.
     @type n_generations: int
-    @param n_generations: Number of iterations for anchor improvement with genetic
-        algorithm. Defaults to 1000.
+    @param n_generations: Number of iterations for anchor improvement
+        with genetic algorithm. Defaults to 1000.
     @type ratio_threshold: float
-    @param ratio_threshold: Minimum threshold for ratio. Defaults to 4.0.
+    @param ratio_threshold: Minimum threshold for ratio. Defaults to
+        4.0.
     @rtype: tuple[Tensor, float]
     @return: Proposed anchors and the best possible recall.
     """
 
-    widths = []
-    inputs = None
-    for inp, labels in loader:
-        boxes = labels[LabelType.BOUNDINGBOX]
-        curr_wh = boxes[:, 4:]
-        widths.append(curr_wh)
-        inputs = inp
-    assert inputs is not None, "No inputs found in data loader"
-    _, _, h, w = inputs.shape  # assuming all images are same size
+    widths: list[Tensor] = []
+    for _, labels in loader:
+        for tensor, label_type in labels.values():
+            if label_type == LabelType.BOUNDINGBOX:
+                curr_wh = tensor[:, 4:]
+                widths.append(curr_wh)
+    _, h, w = loader.input_shape
     img_size = torch.tensor([w, h])
     wh = torch.vstack(widths) * img_size
 
@@ -451,7 +898,8 @@ def anchors_from_dataset(
     except Exception:
         print("Fallback to random anchor init")
         proposed_anchors = (
-            torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2) * img_size
+            torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2)
+            * img_size
         )
 
     proposed_anchors = proposed_anchors[
@@ -459,7 +907,8 @@ def anchors_from_dataset(
     ]  # sort small to large
 
     def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor:
-        """Calculate how well most suitable anchor box matches each target bbox."""
+        """Calculate how well most suitable anchor box matches each
+        target bbox."""
         symmetric_size_ratios = torch.min(
             wh[:, None] / anchors[None], anchors[None] / wh[:, None]
         )
@@ -468,17 +917,20 @@ def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor:
         return best_anchor_ratio
 
     def calc_best_possible_recall(anchors: Tensor, wh: Tensor) -> Tensor:
-        """Calculate best possible recall if every bbox is matched to an appropriate
-        anchor."""
+        """Calculate best possible recall if every bbox is matched to an
+        appropriate anchor."""
         best_anchor_ratio = calc_best_anchor_ratio(anchors, wh)
-        best_possible_recall = (best_anchor_ratio > 1 / ratio_threshold).float().mean()
+        best_possible_recall = (
+            (best_anchor_ratio > 1 / ratio_threshold).float().mean()
+        )
         return best_possible_recall
 
     def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor:
         """Fitness function used for anchor evolve."""
         best_anchor_ratio = calc_best_anchor_ratio(anchors, wh)
         return (
-            best_anchor_ratio * (best_anchor_ratio > 1 / ratio_threshold).float()
+            best_anchor_ratio
+            * (best_anchor_ratio > 1 / ratio_threshold).float()
         ).mean()
 
     # Genetic algorithm
@@ -496,7 +948,9 @@ def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor:
             + mutation_noise_mean
         ).clip(0.3, 3.0)
 
-        mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip(min=2.0)
+        mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip(
+            min=2.0
+        )
         mutated_fitness = anchor_fitness(mutated_anchors, wh)
         if mutated_fitness > best_fitness:
             best_fitness = mutated_fitness
@@ -517,20 +971,22 @@ def anchors_for_fpn_features(
     grid_cell_offset: float = 0.5,
     multiply_with_stride: bool = False,
 ) -> tuple[Tensor, Tensor, list[int], Tensor]:
-    """Generates anchor boxes, points and strides based on FPN feature shapes and
-    strides.
+    """Generates anchor boxes, points and strides based on FPN feature
+    shapes and strides.
 
     @type features: list[Tensor]
     @param features: List of FPN features.
     @type strides: Tensor
     @param strides: Strides of FPN features.
     @type grid_cell_size: float
-    @param grid_cell_size: Cell size in respect to input image size. Defaults to 5.0.
+    @param grid_cell_size: Cell size in respect to input image size.
+        Defaults to 5.0.
     @type grid_cell_offset: float
-    @param grid_cell_offset: Percent grid cell center's offset. Defaults to 0.5.
+    @param grid_cell_offset: Percent grid cell center's offset. Defaults
+        to 0.5.
     @type multiply_with_stride: bool
-    @param multiply_with_stride: Whether to multiply per FPN values with its stride.
-        Defaults to False.
+    @param multiply_with_stride: Whether to multiply per FPN values with
+        its stride. Defaults to False.
     @rtype: tuple[Tensor, Tensor, list[int], Tensor]
     @return: BBox anchors, center anchors, number of anchors, strides
     """
@@ -564,7 +1020,9 @@ def anchors_for_fpn_features(
         anchors.append(anchor)
 
         anchor_point = (
-            torch.stack([shift_x, shift_y], dim=-1).reshape(-1, 2).to(feature.dtype)
+            torch.stack([shift_x, shift_y], dim=-1)
+            .reshape(-1, 2)
+            .to(feature.dtype)
         )
         anchor_points.append(anchor_point)
 
@@ -583,26 +1041,6 @@ def anchors_for_fpn_features(
     )
 
 
-def process_keypoints_predictions(keypoints: Tensor) -> tuple[Tensor, Tensor, Tensor]:
-    """Extracts x, y and visibility from keypoints predictions.
-
-    @type keypoints: Tensor
-    @param keypoints: Keypoints predictions. The last dimension must be divisible by 3
-        and is expected to be in format [x1, y1, v1, x2, y2, v2, ...].
-
-    @rtype: tuple[Tensor, Tensor, Tensor]
-    @return: x, y and visibility tensors.
-    """
-    x = keypoints[..., ::3] * 2.0 - 0.5
-    y = keypoints[..., 1::3] * 2.0 - 0.5
-    visibility = keypoints[..., 2::3]
-    return (
-        x,
-        y,
-        visibility,
-    )
-
-
 def process_bbox_predictions(
     bbox: Tensor, anchor: Tensor
 ) -> tuple[Tensor, Tensor, Tensor]:
@@ -613,7 +1051,8 @@ def process_bbox_predictions(
     @type anchor: Tensor
     @param anchor: Anchor boxes
     @rtype: tuple[Tensor, Tensor, Tensor]
-    @return: xy and wh predictions and tail. The tail is anything after xywh.
+    @return: xy and wh predictions and tail. The tail is anything after
+        xywh.
     """
     out_bbox = bbox.sigmoid()
     out_bbox_xy = out_bbox[..., 0:2] * 2.0 - 0.5
@@ -669,10 +1108,12 @@ def compute_iou_loss(
         else:
             bbox_mask = torch.ones_like(pred_bboxes, dtype=torch.bool)
 
-        pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).reshape([-1, 4])
-        target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).reshape(
+        pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).reshape(
             [-1, 4]
         )
+        target_bboxes_pos = torch.masked_select(
+            target_bboxes, bbox_mask
+        ).reshape([-1, 4])
 
         iou = bbox_iou(
             pred_bboxes_pos,
@@ -698,6 +1139,6 @@ def compute_iou_loss(
             raise ValueError(f"Unknown reduction type `{reduction}`")
     else:
         loss_iou = torch.tensor(0.0).to(pred_bboxes.device)
-        iou = torch.zeros([len(target_bboxes)]).to(pred_bboxes.device)
+        iou = torch.zeros([target_bboxes.shape[0]]).to(pred_bboxes.device)
 
     return loss_iou, iou.detach().clamp(0)
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 48661f7d..09cb8795 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -1,59 +1,81 @@
 import logging
 import sys
-from enum import Enum
-from typing import Annotated, Any, Literal
-
-from luxonis_ml.data import BucketStorage, BucketType
-from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
-from pydantic import BaseModel, Field, field_serializer, model_validator
-
-from luxonis_train.utils.general import is_acyclic
-from luxonis_train.utils.registry import MODELS
+from typing import Annotated, Any, Literal, TypeAlias
+
+from luxonis_ml.data import LabelType
+from luxonis_ml.enums import DatasetType
+from luxonis_ml.utils import (
+    BaseModelExtraForbid,
+    Environ,
+    LuxonisConfig,
+    LuxonisFileSystem,
+)
+from pydantic import AliasChoices, Field, field_validator, model_validator
+from pydantic.types import (
+    FilePath,
+    NonNegativeFloat,
+    NonNegativeInt,
+    PositiveInt,
+)
+from typing_extensions import Self
 
 logger = logging.getLogger(__name__)
 
+Params: TypeAlias = dict[str, Any]
 
-class AttachedModuleConfig(BaseModel):
+
+class AttachedModuleConfig(BaseModelExtraForbid):
     name: str
     attached_to: str
     alias: str | None = None
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
 class LossModuleConfig(AttachedModuleConfig):
-    weight: float = 1.0
+    weight: NonNegativeFloat = 1.0
+
+    @model_validator(mode="after")
+    def validate_weight(self) -> Self:
+        if self.weight == 0:
+            logger.warning(
+                f"Loss '{self.name}' has weight set to 0. "
+                "This loss will not contribute to the training."
+            )
+        return self
 
 
 class MetricModuleConfig(AttachedModuleConfig):
     is_main_metric: bool = False
 
 
-class FreezingConfig(BaseModel):
+class FreezingConfig(BaseModelExtraForbid):
     active: bool = False
-    unfreeze_after: int | float | None = None
+    unfreeze_after: NonNegativeInt | NonNegativeFloat | None = None
 
 
-class ModelNodeConfig(BaseModel):
+class ModelNodeConfig(BaseModelExtraForbid):
     name: str
     alias: str | None = None
-    inputs: list[str] = []
-    params: dict[str, Any] = {}
+    inputs: list[str] = []  # From preceding nodes
+    input_sources: list[str] = []  # From data loader
     freezing: FreezingConfig = FreezingConfig()
+    task: str | dict[LabelType, str] | None = None
+    params: Params = {}
 
 
-class PredefinedModelConfig(BaseModel):
+class PredefinedModelConfig(BaseModelExtraForbid):
     name: str
-    params: dict[str, Any] = {}
     include_nodes: bool = True
     include_losses: bool = True
     include_metrics: bool = True
     include_visualizers: bool = True
+    params: Params = {}
 
 
-class ModelConfig(BaseModel):
-    name: str
+class ModelConfig(BaseModelExtraForbid):
+    name: str = "model"
     predefined_model: PredefinedModelConfig | None = None
-    weights: str | None = None
+    weights: FilePath | None = None
     nodes: list[ModelNodeConfig] = []
     losses: list[LossModuleConfig] = []
     metrics: list[MetricModuleConfig] = []
@@ -61,9 +83,13 @@ class ModelConfig(BaseModel):
     outputs: list[str] = []
 
     @model_validator(mode="after")
-    def check_predefined_model(self):
+    def check_predefined_model(self) -> Self:
+        from luxonis_train.utils.registry import MODELS
+
         if self.predefined_model:
-            logger.info(f"Using predefined model: `{self.predefined_model.name}`")
+            logger.info(
+                f"Using predefined model: `{self.predefined_model.name}`"
+            )
             model = MODELS.get(self.predefined_model.name)(
                 **self.predefined_model.params
             )
@@ -81,13 +107,39 @@ def check_predefined_model(self):
         return self
 
     @model_validator(mode="after")
-    def check_graph(self):
+    def check_main_metric(self) -> Self:
+        for metric in self.metrics:
+            if metric.is_main_metric:
+                logger.info(f"Main metric: `{metric.name}`")
+                return self
+
+        logger.warning("No main metric specified.")
+        if self.metrics:
+            metric = self.metrics[0]
+            metric.is_main_metric = True
+            name = metric.alias or metric.name
+            logger.info(f"Setting '{name}' as main metric.")
+        else:
+            logger.warning(
+                "[Ignore if using predefined model] "
+                "No metrics specified. "
+                "This is likely unintended unless "
+                "the configuration is not used for training."
+            )
+        return self
+
+    @model_validator(mode="after")
+    def check_graph(self) -> Self:
+        from luxonis_train.utils import is_acyclic
+
         graph = {node.alias or node.name: node.inputs for node in self.nodes}
         if not is_acyclic(graph):
             raise ValueError("Model graph is not acyclic.")
         if not self.outputs:
             outputs: list[str] = []  # nodes which are not inputs to any nodes
-            inputs = set(node_name for node in self.nodes for node_name in node.inputs)
+            inputs = set(
+                node_name for node in self.nodes for node_name in node.inputs
+            )
             for node in self.nodes:
                 name = node.alias or node.name
                 if name not in inputs:
@@ -98,23 +150,29 @@ def check_graph(self):
         return self
 
     @model_validator(mode="after")
-    def check_unique_names(self):
+    def check_unique_names(self) -> Self:
         for section, objects in [
             ("nodes", self.nodes),
             ("losses", self.losses),
             ("metrics", self.metrics),
             ("visualizers", self.visualizers),
         ]:
-            names = set()
+            names: set[str] = set()
             for obj in objects:
+                obj: AttachedModuleConfig
                 name = obj.alias or obj.name
                 if name in names:
-                    raise ValueError(f"Duplicate name `{name}` in `{section}` section.")
+                    if obj.alias is None:
+                        obj.alias = f"{name}_{obj.attached_to}"
+                    if obj.alias in names:
+                        raise ValueError(
+                            f"Duplicate name `{name}` in `{section}` section."
+                        )
                 names.add(name)
         return self
 
 
-class TrackerConfig(BaseModel):
+class TrackerConfig(BaseModelExtraForbid):
     project_name: str | None = None
     project_id: str | None = None
     run_name: str | None = None
@@ -126,24 +184,38 @@ class TrackerConfig(BaseModel):
     is_mlflow: bool = False
 
 
-class DatasetConfig(BaseModel):
-    name: str | None = None
-    id: str | None = None
-    team_name: str | None = None
-    team_id: str | None = None
-    bucket_type: BucketType = BucketType.INTERNAL
-    bucket_storage: BucketStorage = BucketStorage.LOCAL
-    json_mode: bool = False
-    train_view: str = "train"
-    val_view: str = "val"
-    test_view: str = "test"
-
-    @field_serializer("bucket_storage", "bucket_type")
-    def get_enum_value(self, v: Enum, _) -> str:
-        return str(v.value)
-
-
-class NormalizeAugmentationConfig(BaseModel):
+class LoaderConfig(BaseModelExtraForbid):
+    name: str = "LuxonisLoaderTorch"
+    image_source: str = "image"
+    train_view: list[str] = ["train"]
+    val_view: list[str] = ["val"]
+    test_view: list[str] = ["test"]
+    params: Params = {}
+
+    @field_validator("train_view", "val_view", "test_view", mode="before")
+    @classmethod
+    def validate_splits(cls, splits: Any) -> list[Any]:
+        if isinstance(splits, str):
+            return [splits]
+        return splits
+
+    @model_validator(mode="after")
+    def validate_params(self) -> Self:
+        dataset_type = self.params.get("dataset_type")
+        if dataset_type is None:
+            return self
+        dataset_type = dataset_type.upper()
+
+        if dataset_type not in DatasetType.__members__:
+            raise ValueError(
+                f"Dataset type '{dataset_type}' not supported."
+                f"Supported types are: {', '.join(DatasetType.__members__)}."
+            )
+        self.params["dataset_type"] = DatasetType(dataset_type.lower())
+        return self
+
+
+class NormalizeAugmentationConfig(BaseModelExtraForbid):
     active: bool = True
     params: dict[str, Any] = {
         "mean": [0.485, 0.456, 0.406],
@@ -151,12 +223,13 @@ class NormalizeAugmentationConfig(BaseModel):
     }
 
 
-class AugmentationConfig(BaseModel):
+class AugmentationConfig(BaseModelExtraForbid):
     name: str
-    params: dict[str, Any] = {}
+    active: bool = True
+    params: Params = {}
 
 
-class PreprocessingConfig(BaseModel):
+class PreprocessingConfig(BaseModelExtraForbid):
     train_image_size: Annotated[
         list[int], Field(default=[256, 256], min_length=2, max_length=2)
     ] = [256, 256]
@@ -166,51 +239,79 @@ class PreprocessingConfig(BaseModel):
     augmentations: list[AugmentationConfig] = []
 
     @model_validator(mode="after")
-    def check_normalize(self):
+    def check_normalize(self) -> Self:
         if self.normalize.active:
             self.augmentations.append(
-                AugmentationConfig(name="Normalize", params=self.normalize.params)
+                AugmentationConfig(
+                    name="Normalize", params=self.normalize.params
+                )
             )
         return self
 
+    def get_active_augmentations(self) -> list[AugmentationConfig]:
+        """Returns list of augmentations that are active.
+
+        @rtype: list[AugmentationConfig]
+        @return: Filtered list of active augmentation configs
+        """
+        return [aug for aug in self.augmentations if aug.active]
+
 
-class CallbackConfig(BaseModel):
+class CallbackConfig(BaseModelExtraForbid):
     name: str
     active: bool = True
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
-class OptimizerConfig(BaseModel):
+class OptimizerConfig(BaseModelExtraForbid):
     name: str = "Adam"
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
-class SchedulerConfig(BaseModel):
+class SchedulerConfig(BaseModelExtraForbid):
     name: str = "ConstantLR"
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
-class TrainerConfig(BaseModel):
+class TrainerConfig(BaseModelExtraForbid):
     preprocessing: PreprocessingConfig = PreprocessingConfig()
+    use_rich_progress_bar: bool = True
 
-    accelerator: Literal["auto", "cpu", "gpu"] = "auto"
+    accelerator: Literal["auto", "cpu", "gpu", "tpu"] = "auto"
     devices: int | list[int] | str = "auto"
     strategy: Literal["auto", "ddp"] = "auto"
-    num_sanity_val_steps: int = 2
+    n_sanity_val_steps: Annotated[
+        int,
+        Field(
+            validation_alias=AliasChoices(
+                "n_sanity_val_steps", "num_sanity_val_steps"
+            )
+        ),
+    ] = 2
     profiler: Literal["simple", "advanced"] | None = None
+    matmul_precision: Literal["medium", "high", "highest"] | None = None
     verbose: bool = True
 
-    batch_size: int = 32
-    accumulate_grad_batches: int = 1
+    seed: int | None = None
+    deterministic: bool | Literal["warn"] | None = None
+    batch_size: PositiveInt = 32
+    accumulate_grad_batches: PositiveInt = 1
     use_weighted_sampler: bool = False
-    epochs: int = 100
-    num_workers: int = 2
-    train_metrics_interval: int = -1
-    validation_interval: int = 1
-    num_log_images: int = 4
+    epochs: PositiveInt = 100
+    n_workers: Annotated[
+        NonNegativeInt,
+        Field(validation_alias=AliasChoices("n_workers", "num_workers")),
+    ] = 4
+    train_metrics_interval: Literal[-1] | PositiveInt = -1
+    validation_interval: Literal[-1] | PositiveInt = 5
+    n_log_images: Annotated[
+        NonNegativeInt,
+        Field(validation_alias=AliasChoices("n_log_images", "num_log_images")),
+    ] = 4
     skip_last_batch: bool = True
+    pin_memory: bool = True
     log_sub_losses: bool = True
-    save_top_k: int = 3
+    save_top_k: Literal[-1] | NonNegativeInt = 3
 
     callbacks: list[CallbackConfig] = []
 
@@ -218,42 +319,69 @@ class TrainerConfig(BaseModel):
     scheduler: SchedulerConfig = SchedulerConfig()
 
     @model_validator(mode="after")
-    def check_num_workes_platform(self):
+    def validate_deterministic(self) -> Self:
+        if self.seed is not None and self.deterministic is None:
+            logger.warning(
+                "Setting `trainer.deterministic` to True because `trainer.seed` is set."
+                "This can cause certain layers to fail. "
+                "In such cases, set `trainer.deterministic` to `'warn'`."
+            )
+            self.deterministic = True
+        return self
+
+    @model_validator(mode="after")
+    def check_n_workes_platform(self) -> Self:
         if (
             sys.platform == "win32" or sys.platform == "darwin"
-        ) and self.num_workers != 0:
-            self.num_workers = 0
+        ) and self.n_workers != 0:
+            self.n_workers = 0
+            logger.warning(
+                "Setting `n_workers` to 0 because of platform compatibility."
+            )
+        return self
+
+    @model_validator(mode="after")
+    def check_validation_interval(self) -> Self:
+        if self.validation_interval > self.epochs:
             logger.warning(
-                "Setting `num_workers` to 0 because of platform compatibility."
+                "Setting `validation_interval` same as `epochs` otherwise no checkpoint would be generated."
             )
+            self.validation_interval = self.epochs
         return self
 
 
-class OnnxExportConfig(BaseModel):
-    opset_version: int = 12
+class OnnxExportConfig(BaseModelExtraForbid):
+    opset_version: PositiveInt = 12
     dynamic_axes: dict[str, Any] | None = None
 
 
-class BlobconverterExportConfig(BaseModel):
+class BlobconverterExportConfig(BaseModelExtraForbid):
     active: bool = False
     shaves: int = 6
+    version: Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"] = (
+        "2022.1"
+    )
 
 
-class ExportConfig(BaseModel):
-    export_save_directory: str = "output_export"
+class ArchiveConfig(BaseModelExtraForbid):
+    name: str | None = None
+    upload_to_run: bool = True
+    upload_url: str | None = None
+
+
+class ExportConfig(ArchiveConfig):
+    name: str | None = None
     input_shape: list[int] | None = None
-    export_model_name: str = "model"
-    data_type: Literal["INT8", "FP16", "FP32"] = "FP16"
+    data_type: Literal["int8", "fp16", "fp32"] = "fp16"
     reverse_input_channels: bool = True
     scale_values: list[float] | None = None
     mean_values: list[float] | None = None
     output_names: list[str] | None = None
     onnx: OnnxExportConfig = OnnxExportConfig()
     blobconverter: BlobconverterExportConfig = BlobconverterExportConfig()
-    upload_url: str | None = None
 
     @model_validator(mode="after")
-    def check_values(self):
+    def check_values(self) -> Self:
         def pad_values(values: float | list[float] | None):
             if values is None:
                 return None
@@ -265,29 +393,31 @@ def pad_values(values: float | list[float] | None):
         return self
 
 
-class StorageConfig(BaseModel):
+class StorageConfig(BaseModelExtraForbid):
     active: bool = True
     storage_type: Literal["local", "remote"] = "local"
 
 
-class TunerConfig(BaseModel):
+class TunerConfig(BaseModelExtraForbid):
     study_name: str = "test-study"
+    continue_existing_study: bool = True
     use_pruner: bool = True
-    n_trials: int | None = 15
-    timeout: int | None = None
+    n_trials: PositiveInt | None = 15
+    timeout: PositiveInt | None = None
     storage: StorageConfig = StorageConfig()
     params: Annotated[
-        dict[str, list[str | int | float | bool]], Field(default={}, min_length=1)
+        dict[str, list[str | int | float | bool | list]],
+        Field(default={}, min_length=1),
     ]
 
 
 class Config(LuxonisConfig):
-    use_rich_text: bool = True
-    model: ModelConfig
-    dataset: DatasetConfig = DatasetConfig()
-    tracker: TrackerConfig = TrackerConfig()
-    trainer: TrainerConfig = TrainerConfig()
-    exporter: ExportConfig = ExportConfig()
+    model: Annotated[ModelConfig, Field(default_factory=ModelConfig)]
+    loader: Annotated[LoaderConfig, Field(default_factory=LoaderConfig)]
+    tracker: Annotated[TrackerConfig, Field(default_factory=TrackerConfig)]
+    trainer: Annotated[TrainerConfig, Field(default_factory=TrainerConfig)]
+    exporter: Annotated[ExportConfig, Field(default_factory=ExportConfig)]
+    archiver: Annotated[ArchiveConfig, Field(default_factory=ArchiveConfig)]
     tuner: TunerConfig | None = None
     ENVIRON: Environ = Field(Environ(), exclude=True)
 
@@ -301,26 +431,20 @@ def check_environment(cls, data: Any) -> Any:
             )
         return data
 
-    @model_validator(mode="before")
-    @classmethod
-    def setup_logging(cls, data: Any) -> Any:
-        if isinstance(data, dict):
-            if data.get("use_rich_text", True):
-                setup_logging(use_rich=True)
-        return data
-
     @classmethod
     def get_config(
         cls,
         cfg: str | dict[str, Any] | None = None,
-        overrides: dict[str, Any] | None = None,
+        overrides: dict[str, Any] | list[str] | tuple[str, ...] | None = None,
     ):
         instance = super().get_config(cfg, overrides)
         if not isinstance(cfg, str):
             return instance
         fs = LuxonisFileSystem(cfg)
         if fs.is_mlflow:
-            logger.info("Setting `project_id` and `run_id` to config's MLFlow run")
+            logger.info(
+                "Setting `project_id` and `run_id` to config's MLFlow run"
+            )
             instance.tracker.project_id = fs.experiment_id
             instance.tracker.run_id = fs.run_id
         return instance
diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py
new file mode 100644
index 00000000..35ebbef8
--- /dev/null
+++ b/luxonis_train/utils/dataset_metadata.py
@@ -0,0 +1,154 @@
+from luxonis_train.loaders import BaseLoaderTorch
+from luxonis_train.utils import anchors_from_dataset
+
+
+class DatasetMetadata:
+    """Metadata about the dataset."""
+
+    def __init__(
+        self,
+        *,
+        classes: dict[str, list[str]] | None = None,
+        n_keypoints: dict[str, int] | None = None,
+        loader: BaseLoaderTorch | None = None,
+    ):
+        """An object containing metadata about the dataset. Used to
+        infer the number of classes, number of keypoints, I{etc.}
+        instead of passing them as arguments to the model.
+
+        @type classes: dict[str, list[str]] | None
+        @param classes: Dictionary mapping tasks to lists of class
+            names.
+        @type n_keypoints: dict[str, int] | None
+        @param n_keypoints: Dictionary mapping tasks to the number of
+            keypoints.
+        @type loader: DataLoader | None
+        @param loader: Dataset loader.
+        """
+        self._classes = classes or {}
+        self._n_keypoints = n_keypoints or {}
+        self._loader = loader
+
+    def n_classes(self, task: str | None = None) -> int:
+        """Gets the number of classes for the specified task.
+
+        @type task: str | None
+        @param task: Task to get the number of classes for.
+        @rtype: int
+        @return: Number of classes for the specified label type.
+        @raises ValueError: If the C{task} is not present in the
+            dataset.
+        @raises RuntimeError: If the C{task} was not provided and the
+            dataset contains different number of classes for different
+            label types.
+        """
+        if task is not None:
+            if task not in self._classes:
+                raise ValueError(
+                    f"Task '{task}' is not present in the dataset."
+                )
+            return len(self._classes[task])
+        n_classes = len(list(self._classes.values())[0])
+        for classes in self._classes.values():
+            if len(classes) != n_classes:
+                raise RuntimeError(
+                    "The dataset contains different number of classes for different tasks."
+                    "Please specify the 'task' argument to get the number of classes."
+                )
+        return n_classes
+
+    def n_keypoints(self, task: str | None = None) -> int:
+        """Gets the number of keypoints for the specified task.
+
+        @type task: str | None
+        @param task: Task to get the number of keypoints for.
+        @rtype: int
+        @return: Number of keypoints for the specified label type.
+        @raises ValueError: If the C{task} is not present in the
+            dataset.
+        @raises RuntimeError: If the C{task} was not provided and the
+            dataset contains different number of keypoints for different
+            label types.
+        """
+        if task is not None:
+            if task not in self._n_keypoints:
+                raise ValueError(
+                    f"Task '{task}' is not present in the dataset."
+                )
+            return self._n_keypoints[task]
+        n_keypoints = next(iter(self._n_keypoints.values()))
+        for n in self._n_keypoints.values():
+            if n != n_keypoints:
+                raise RuntimeError(
+                    "The dataset contains different number of keypoints for different tasks."
+                    "Please specify the 'task' argument to get the number of keypoints."
+                )
+        return n_keypoints
+
+    def classes(self, task: str | None = None) -> list[str]:
+        """Gets the class names for the specified task.
+
+        @type task: str | None
+        @param task: Task to get the class names for.
+        @rtype: list[str]
+        @return: List of class names for the specified label type.
+        @raises ValueError: If the C{task} is not present in the
+            dataset.
+        @raises RuntimeError: If the C{task} was not provided and the
+            dataset contains different class names for different label
+            types.
+        """
+        if task is not None:
+            if task not in self._classes:
+                raise ValueError(
+                    f"Task type {task} is not present in the dataset."
+                )
+            return self._classes[task]
+        class_names = list(self._classes.values())[0]
+        for classes in self._classes.values():
+            if classes != class_names:
+                raise RuntimeError(
+                    "The dataset contains different class names for different tasks."
+                )
+        return class_names
+
+    def autogenerate_anchors(
+        self, n_heads: int
+    ) -> tuple[list[list[float]], float]:
+        """Automatically generates anchors for the provided dataset.
+
+        @type n_heads: int
+        @param n_heads: Number of heads to generate anchors for.
+        @rtype: tuple[list[list[float]], float]
+        @return: List of anchors in [-1,6] format and recall of the
+            anchors.
+        @raises RuntimeError: If the dataset loader was not provided
+            during initialization.
+        """
+        if self._loader is None:
+            raise RuntimeError(
+                "Cannot generate anchors without a dataset loader. "
+                "Please provide a dataset loader to the constructor "
+                "or call `set_loader` method."
+            )
+
+        proposed_anchors, recall = anchors_from_dataset(
+            self._loader, n_anchors=n_heads * 3
+        )
+        return proposed_anchors.reshape(-1, 6).tolist(), recall
+
+    @classmethod
+    def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata":
+        """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}.
+
+        @type dataset: LuxonisDataset
+        @param dataset: Dataset to create the metadata from.
+        @rtype: DatasetMetadata
+        @return: Instance of L{DatasetMetadata} created from the
+            provided dataset.
+        """
+        classes = loader.get_classes()
+        n_keypoints = loader.get_n_keypoints()
+
+        instance = cls(classes=classes, n_keypoints=n_keypoints, loader=loader)
+        return instance
diff --git a/luxonis_train/utils/exceptions.py b/luxonis_train/utils/exceptions.py
new file mode 100644
index 00000000..bab8c1aa
--- /dev/null
+++ b/luxonis_train/utils/exceptions.py
@@ -0,0 +1,12 @@
+class IncompatibleException(Exception):
+    """Raised when two parts of the model are incompatible with each
+    other."""
+
+    @classmethod
+    def from_missing_task(
+        cls, task: str, present_tasks: list[str], class_name: str
+    ):
+        return cls(
+            f"{class_name} requires '{task}' label, but it was not found in "
+            f"the label dictionary. Available labels: {present_tasks}."
+        )
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index 9ea5884d..45013807 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -1,299 +1,141 @@
 import logging
 import math
-from typing import Generator, TypeVar
+from typing import TypeVar
 
-from luxonis_ml.data import LuxonisDataset
-from pydantic import BaseModel
 from torch import Size, Tensor
-from torch.utils.data import DataLoader
 
-from luxonis_train.utils.boxutils import anchors_from_dataset
-from luxonis_train.utils.types import LabelType, Packet
+from luxonis_train.utils.types import Packet
 
+logger = logging.getLogger(__name__)
 
-# TODO: could be moved to luxonis-ml?
-# TODO: support multiclass keypoints
-class DatasetMetadata:
-    """Metadata about the dataset."""
 
-    def __init__(
-        self,
-        *,
-        classes: dict[LabelType, list[str]] | None = None,
-        n_classes: int | None = None,
-        n_keypoints: int | None = None,
-        keypoint_names: list[str] | None = None,
-        connectivity: list[tuple[int, int]] | None = None,
-        loader: DataLoader | None = None,
-    ):
-        """An object containing metadata about the dataset. Used to infer the number of
-        classes, number of keypoints, I{etc.} instead of passing them as arguments to
-        the model.
-
-        @type classes: dict[LabelType, list[str]] | None
-        @param classes: Dictionary mapping label types to lists of class names. If not
-            provided, will be inferred from the dataset loader.
-        @type n_classes: int | None
-        @param n_classes: Number of classes for each label type.
-        @type n_keypoints: int | None
-        @param n_keypoints: Number of keypoints in the dataset.
-        @type keypoint_names: list[str] | None
-        @param keypoint_names: List of keypoint names.
-        @type connectivity: list[tuple[int, int]] | None
-        @param connectivity: List of edges in the skeleton graph.
-        @type loader: DataLoader | None
-        @param loader: Dataset loader.
-        """
-        if classes is None and n_classes is not None:
-            classes = {
-                LabelType(lbl): [str(i) for i in range(n_classes)]
-                for lbl in LabelType.__members__
-            }
-        self._classes = classes
-        self._keypoint_names = keypoint_names
-        self._connectivity = connectivity
-        self._n_keypoints = n_keypoints
-        if self._n_keypoints is None and self._keypoint_names is not None:
-            self._n_keypoints = len(self._keypoint_names)
-        self._loader = loader
-
-    @property
-    def classes(self) -> dict[LabelType, list[str]]:
-        """Dictionary mapping label types to lists of class names.
-
-        @type: dict[LabelType, list[str]]
-        @raises ValueError: If classes were not provided during initialization.
-        """
-        if self._classes is None:
-            raise ValueError(
-                "Trying to access `classes`, byt they were not"
-                "provided during initialization."
-            )
-        return self._classes
-
-    def n_classes(self, label_type: LabelType | None) -> int:
-        """Gets the number of classes for the specified label type.
+def make_divisible(x: int | float, divisor: int) -> int:
+    """Upward revision the value x to make it evenly divisible by the
+    divisor.
 
-        @type label_type: L{LabelType} | None
-        @param label_type: Label type to get the number of classes for.
-        @rtype: int
-        @return: Number of classes for the specified label type.
-        @raises ValueError: If the dataset loader was not provided during
-            initialization.
-        @raises ValueError: If the dataset contains different number of classes for
-            different label types.
-        """
-        if label_type is not None:
-            if label_type not in self.classes:
-                raise ValueError(
-                    f"Task type {label_type.name} is not present in the dataset."
-                )
-            return len(self.classes[label_type])
-        n_classes = len(list(self.classes.values())[0])
-        for classes in self.classes.values():
-            if len(classes) != n_classes:
-                raise ValueError(
-                    "The dataset contains different number of classes for different tasks."
-                )
-        return n_classes
+    Equivalent to M{ceil(x / divisor) * divisor}.
 
-    def class_names(self, label_type: LabelType | None) -> list[str]:
-        """Gets the class names for the specified label type.
+    @type x: int | float
+    @param x: Value to be revised.
+    @type divisor: int
+    @param divisor: Divisor.
+    @rtype: int
+    @return: Revised value.
+    """
+    return math.ceil(x / divisor) * divisor
 
-        @type label_type: L{LabelType} | None
-        @param label_type: Label type to get the class names for.
-        @rtype: list[str]
-        @return: List of class names for the specified label type.
-        @raises ValueError: If the dataset loader was not provided during
-            initialization.
-        @raises ValueError: If the dataset contains different class names for different
-            label types.
-        """
-        if label_type is not None:
-            if label_type not in self.classes:
-                raise ValueError(
-                    f"Task type {label_type.name} is not present in the dataset."
-                )
-            return self.classes[label_type]
-        class_names = list(self.classes.values())[0]
-        for classes in self.classes.values():
-            if classes != class_names:
-                raise ValueError(
-                    "The dataset contains different class names for different tasks."
-                )
-        return class_names
 
-    def autogenerate_anchors(self, n_heads: int) -> tuple[list[list[float]], float]:
-        """Automatically generates anchors for the provided dataset.
+def infer_upscale_factor(
+    in_size: tuple[int, int] | int, orig_size: tuple[int, int] | int
+) -> int:
+    """Infer the upscale factor from the input shape and the original
+    shape.
+
+    @type in_size: tuple[int, int] | int
+    @param in_size: Input shape as a tuple of (height, width) or just
+        one of them.
+    @type orig_size: tuple[int, int] | int
+    @param orig_size: Original shape as a tuple of (height, width) or
+        just one of them.
+    @rtype: int
+    @return: Upscale factor.
+    @raise ValueError: If the C{in_size} cannot be upscaled to the
+        C{orig_size}. This can happen if the upscale factors are not
+        integers or are different.
+    """
 
-        @type n_heads: int
-        @param n_heads: Number of heads to generate anchors for.
-        @rtype: tuple[list[list[float]], float]
-        @return: List of anchors in [-1,6] format and recall of the anchors.
-        @raises ValueError: If the dataset loader was not provided during
-            initialization.
-        """
-        if self.loader is None:
+    def _infer_upscale_factor(in_size: int, orig_size: int) -> int | float:
+        factor = math.log2(orig_size) - math.log2(in_size)
+        if abs(round(factor) - factor) < 1e-6:
+            return int(round(factor))
+        return factor
+
+    if isinstance(in_size, int):
+        in_size = (in_size, in_size)
+    if isinstance(orig_size, int):
+        orig_size = (orig_size, orig_size)
+    in_height, in_width = in_size
+    orig_height, orig_width = orig_size
+
+    width_factor = _infer_upscale_factor(in_width, orig_width)
+    height_factor = _infer_upscale_factor(in_height, orig_height)
+
+    match (width_factor, height_factor):
+        case (int(wf), int(hf)) if wf == hf:
+            return wf
+        case (int(wf), int(hf)):
             raise ValueError(
-                "Cannot generate anchors without a dataset loader. "
-                "Please provide a dataset loader to the constructor "
-                "or call `set_loader` method."
+                f"Width and height upscale factors are different. "
+                f"Width: {wf}, height: {hf}."
             )
-
-        proposed_anchors, recall = anchors_from_dataset(
-            self.loader, n_anchors=n_heads * 3
-        )
-        return proposed_anchors.reshape(-1, 6).tolist(), recall
-
-    def set_loader(self, loader: DataLoader) -> None:
-        """Sets the dataset loader.
-
-        @type loader: DataLoader
-        @param loader: Dataset loader.
-        """
-        self.loader = loader
-
-    @classmethod
-    def from_dataset(cls, dataset: LuxonisDataset) -> "DatasetMetadata":
-        """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}.
-
-        @type dataset: LuxonisDataset
-        @param dataset: Dataset to create the metadata from.
-        @rtype: DatasetMetadata
-        @return: Instance of L{DatasetMetadata} created from the provided dataset.
-        """
-        _, classes = dataset.get_classes()
-        skeletons = dataset.get_skeletons()
-
-        keypoint_names = None
-        connectivity = None
-
-        if len(skeletons) == 1:
-            name = list(skeletons.keys())[0]
-            keypoint_names = skeletons[name]["labels"]
-            connectivity = skeletons[name]["edges"]
-
-        elif len(skeletons) > 1:
-            raise NotImplementedError(
-                "The dataset defines multiclass keypoint detection. "
-                "This is not yet supported."
+        case (int(wf), float(hf)):
+            raise ValueError(
+                f"Width upscale factor is an integer, but height upscale factor is not. "
+                f"Width: {wf}, height: {hf}."
+            )
+        case (float(wf), int(hf)):
+            raise ValueError(
+                f"Height upscale factor is an integer, but width upscale factor is not. "
+                f"Width: {wf}, height: {hf}."
+            )
+        case (float(wf), float(hf)):
+            raise ValueError(
+                "Width and height upscale factors are not integers. "
+                f"Width: {wf}, height: {hf}."
             )
 
-        return cls(
-            classes=classes,
-            keypoint_names=keypoint_names,
-            connectivity=connectivity,
-        )
-
-
-def make_divisible(x: int | float, divisor: int) -> int:
-    """Upward revision the value x to make it evenly divisible by the divisor."""
-    return math.ceil(x / divisor) * divisor
-
+    raise NotImplementedError(
+        f"Unexpected case: {width_factor}, {height_factor}"
+    )
 
-def infer_upscale_factor(
-    in_height: int, orig_height: int, strict: bool = True, warn: bool = True
-) -> int:
-    """Infer the upscale factor from the input height and original height."""
-    num_up = math.log2(orig_height) - math.log2(in_height)
-    if num_up.is_integer():
-        return int(num_up)
-    elif not strict:
-        if warn:
-            logging.getLogger(__name__).warning(
-                f"Upscale factor is not an integer: {num_up}. "
-                "Output shape will not be the same as input shape."
-            )
-        return round(num_up)
-    else:
-        raise ValueError(
-            f"Upscale factor is not an integer: {num_up}. "
-            "Output shape will not be the same as input shape."
-        )
 
+def to_shape_packet(packet: Packet[Tensor]) -> Packet[Size]:
+    """Converts a packet of tensors to a packet of shapes. Used for
+    debugging purposes.
 
-def get_shape_packet(packet: Packet[Tensor]) -> Packet[Size]:
+    @type packet: Packet[Tensor]
+    @param packet: Packet of tensors.
+    @rtype: Packet[Size]
+    @return: Packet of shapes.
+    """
     shape_packet: Packet[Size] = {}
     for name, value in packet.items():
         shape_packet[name] = [x.shape for x in value]
     return shape_packet
 
 
-def is_acyclic(graph: dict[str, list[str]]) -> bool:
-    """Tests if graph is acyclic.
-
-    @type graph: dict[str, list[str]]
-    @param graph: Graph in a format of a dictionary of predecessors. Keys are node
-        names, values are inputs to the node (list of node names).
-    @rtype: bool
-    @return: True if graph is acyclic, False otherwise.
-    """
-    graph = graph.copy()
-
-    def dfs(node: str, visited: set[str], recursion_stack: set[str]):
-        visited.add(node)
-        recursion_stack.add(node)
-
-        for predecessor in graph.get(node, []):
-            if predecessor in recursion_stack:
-                return True
-            if predecessor not in visited:
-                if dfs(predecessor, visited, recursion_stack):
-                    return True
-
-        recursion_stack.remove(node)
-        return False
-
-    visited: set[str] = set()
-    recursion_stack: set[str] = set()
-
-    for node in graph.keys():
-        if node not in visited:
-            if dfs(node, visited, recursion_stack):
-                return False
-
-    return True
-
-
-def validate_packet(data: Packet[Tensor], protocol: type[BaseModel]) -> Packet[Tensor]:
-    return protocol(**data).model_dump()
-
-
 T = TypeVar("T")
 
 
-# TEST:
-def traverse_graph(
-    graph: dict[str, list[str]], nodes: dict[str, T]
-) -> Generator[tuple[str, T, list[str], set[str]], None, None]:
-    """Traverses the graph in topological order.
-
-    @type graph: dict[str, list[str]]
-    @param graph: Graph in a format of a dictionary of predecessors. Keys are node
-        names, values are inputs to the node (list of node names).
-    @type nodes: dict[str, T]
-    @param nodes: Dictionary mapping node names to node objects.
-    @rtype: Generator[tuple[str, T, list[str], set[str]], None, None]
-    @return: Generator of tuples containing node name, node object, node dependencies
-        and unprocessed nodes.
-    @raises RuntimeError: If the graph is malformed.
+def get_with_default(
+    value: T | None,
+    action_name: str,
+    caller_name: str | None = None,
+    *,
+    default: T,
+) -> T:
+    """Returns value if it is not C{None}, otherwise returns the default
+    value and log an info.
+
+    @type value: T | None
+    @param value: Value to return.
+    @type action_name: str
+    @param action_name: Name of the action for which the default value
+        is being used. Used for logging.
+    @type caller_name: str | None
+    @param caller_name: Name of the caller function. Used for logging.
+    @type default: T
+    @param default: Default value to return if C{value} is C{None}.
+    @rtype: T
+    @return: C{value} if it is not C{None}, otherwise C{default}.
     """
-    unprocessed_nodes = set(nodes.keys())
-    processed: set[str] = set()
+    if value is not None:
+        return value
 
-    while unprocessed_nodes:
-        unprocessed_nodes_copy = unprocessed_nodes.copy()
-        for node_name in unprocessed_nodes_copy:
-            node_dependencies = graph[node_name]
-            if not node_dependencies or all(
-                dependency in processed for dependency in node_dependencies
-            ):
-                yield node_name, nodes[node_name], node_dependencies, unprocessed_nodes
-                processed.add(node_name)
-                unprocessed_nodes.remove(node_name)
+    msg = f"Default value of {value} is being used for {action_name}."
 
-        if unprocessed_nodes_copy == unprocessed_nodes:
-            raise RuntimeError(
-                "Malformed graph. "
-                "Please check that all nodes are connected in a directed acyclic graph."
-            )
+    if caller_name:
+        msg = f"[{caller_name}] {msg}"
+
+    logger.info(msg, stacklevel=2)
+    return default
diff --git a/luxonis_train/utils/graph.py b/luxonis_train/utils/graph.py
new file mode 100644
index 00000000..a2b72832
--- /dev/null
+++ b/luxonis_train/utils/graph.py
@@ -0,0 +1,92 @@
+from copy import deepcopy
+from typing import Iterator, TypeAlias, TypeVar
+
+Graph: TypeAlias = dict[str, list[str]]
+"""Graph in a format of a dictionary of predecessors.
+
+Keys are node names, values are inputs to the node (list of node names).
+"""
+
+
+def is_acyclic(graph: Graph) -> bool:
+    """Tests if graph is acyclic.
+
+    @type graph: dict[str, list[str]]
+    @param graph: Graph in a format of a dictionary of predecessors.
+        Keys are node names, values are inputs to the node (list of node
+        names).
+    @rtype: bool
+    @return: True if graph is acyclic, False otherwise.
+    """
+    graph = graph.copy()
+
+    def dfs(node: str, visited: set[str], recursion_stack: set[str]):
+        visited.add(node)
+        recursion_stack.add(node)
+
+        for predecessor in graph.get(node, []):
+            if predecessor in recursion_stack:
+                return True
+            if predecessor not in visited:
+                if dfs(predecessor, visited, recursion_stack):
+                    return True
+
+        recursion_stack.remove(node)
+        return False
+
+    visited: set[str] = set()
+    recursion_stack: set[str] = set()
+
+    for node in graph.keys():
+        if node not in visited:
+            if dfs(node, visited, recursion_stack):
+                return False
+
+    return True
+
+
+T = TypeVar("T")
+
+
+def traverse_graph(
+    graph: Graph, nodes: dict[str, T]
+) -> Iterator[tuple[str, T, list[str], list[str]]]:
+    """Traverses the graph in topological order.
+
+    @type graph: dict[str, list[str]]
+    @param graph: Graph in a format of a dictionary of predecessors.
+        Keys are node names, values are inputs to the node (list of node
+        names).
+    @type nodes: dict[str, T]
+    @param nodes: Dictionary mapping node names to node objects.
+    @rtype: Iterator[tuple[str, T, list[str], list[str]]]
+    @return: Iterator of tuples containing node name, node object, node
+        dependencies and unprocessed nodes.
+    @raises RuntimeError: If the graph is malformed.
+    """
+    # sort the set to allow reproducibility
+    unprocessed_nodes = sorted(set(nodes.keys()))
+    processed: set[str] = set()
+
+    graph = deepcopy(graph)
+    while unprocessed_nodes:
+        unprocessed_nodes_copy = unprocessed_nodes.copy()
+        for node_name in unprocessed_nodes_copy:
+            node_dependencies = graph[node_name]
+            if not node_dependencies or all(
+                dependency in processed for dependency in node_dependencies
+            ):
+                unprocessed_nodes.remove(node_name)
+                yield (
+                    node_name,
+                    nodes[node_name],
+                    node_dependencies,
+                    unprocessed_nodes.copy(),
+                )
+                processed.add(node_name)
+
+        if unprocessed_nodes_copy == unprocessed_nodes:
+            raise RuntimeError(
+                "Malformed graph. "
+                "Please check that all nodes are connected in a directed acyclic graph."
+            )
diff --git a/luxonis_train/utils/keypoints.py b/luxonis_train/utils/keypoints.py
new file mode 100644
index 00000000..9fbc741d
--- /dev/null
+++ b/luxonis_train/utils/keypoints.py
@@ -0,0 +1,85 @@
+import logging
+
+import torch
+from torch import Tensor
+
+logger = logging.getLogger(__name__)
+
+
+def process_keypoints_predictions(
+    keypoints: Tensor,
+) -> tuple[Tensor, Tensor, Tensor]:
+    """Extracts x, y and visibility from keypoints predictions.
+
+    @type keypoints: Tensor
+    @param keypoints: Keypoints predictions. The last dimension must be divisible by 3
+        and is expected to be in format [x1, y1, v1, x2, y2, v2, ...].
+
+    @rtype: tuple[Tensor, Tensor, Tensor]
+    @return: x, y and visibility tensors.
+    """
+    x = keypoints[..., ::3]
+    y = keypoints[..., 1::3]
+    visibility = keypoints[..., 2::3]
+    return x, y, visibility
+
+
+def get_sigmas(
+    sigmas: list[float] | None,
+    n_keypoints: int,
+    caller_name: str | None = None,
+) -> Tensor:
+    """Validate or create sigma values for each keypoint.
+
+    @type sigmas: list[float] | None
+    @param sigmas: List of sigmas for each keypoint. If C{None}, then
+        default sigmas are used.
+    @type n_keypoints: int
+    @param n_keypoints: Number of keypoints.
+    @type caller_name: str | None
+    @param caller_name: Name of the caller function. Used for logging.
+    @rtype: Tensor
+    @return: Tensor of sigmas.
+    """
+    if sigmas is not None:
+        if len(sigmas) == n_keypoints:
+            return torch.tensor(sigmas, dtype=torch.float32)
+        else:
+            error_msg = "The length of the sigmas list must be the same as the number of keypoints."
+            if caller_name:
+                error_msg = f"[{caller_name}] {error_msg}"
+            raise ValueError(error_msg)
+    else:
+        if n_keypoints == 17:
+            msg = "Default COCO sigmas are being used."
+            if caller_name:
+                msg = f"[{caller_name}] {msg}"
+            logger.warning(msg)
+            return torch.tensor(
+                [
+                    0.026,
+                    0.025,
+                    0.025,
+                    0.035,
+                    0.035,
+                    0.079,
+                    0.079,
+                    0.072,
+                    0.072,
+                    0.062,
+                    0.062,
+                    0.107,
+                    0.107,
+                    0.087,
+                    0.087,
+                    0.089,
+                    0.089,
+                ],
+                dtype=torch.float32,
+            )
+        else:
+            msg = "Default sigma of 0.04 is being used for each keypoint."
+            if caller_name:
+                msg = f"[{caller_name}] {msg}"
+            logger.info(msg)
+            return torch.tensor([0.04] * n_keypoints, dtype=torch.float32)
diff --git a/luxonis_train/utils/loaders/__init__.py b/luxonis_train/utils/loaders/__init__.py
deleted file mode 100644
index fe5cc4e8..00000000
--- a/luxonis_train/utils/loaders/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .base_loader import collate_fn
-from .luxonis_loader_torch import LuxonisLoaderTorch
-
-__all__ = ["LuxonisLoaderTorch", "collate_fn"]
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
deleted file mode 100644
index 93f3fd0c..00000000
--- a/luxonis_train/utils/loaders/base_loader.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from abc import ABC, abstractmethod, abstractproperty
-
-import torch
-from luxonis_ml.utils.registry import AutoRegisterMeta
-from torch import Size, Tensor
-from torch.utils.data import Dataset
-
-from luxonis_train.utils.registry import LOADERS
-from luxonis_train.utils.types import Labels, LabelType
-
-LuxonisLoaderTorchOutput = tuple[Tensor, Labels]
-"""LuxonisLoaderTorchOutput is a tuple of images and corresponding labels."""
-
-
-class BaseLoaderTorch(
-    Dataset[LuxonisLoaderTorchOutput],
-    ABC,
-    metaclass=AutoRegisterMeta,
-    register=False,
-    registry=LOADERS,
-):
-    """Base abstract loader class that enforces LuxonisLoaderTorchOutput output label
-    structure."""
-
-    @abstractproperty
-    def input_shape(self) -> Size:
-        """Input shape in [N,C,H,W] format."""
-        ...
-
-    @abstractmethod
-    def __len__(self) -> int:
-        """Returns length of the dataset."""
-        ...
-
-    @abstractmethod
-    def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
-        """Loads sample from dataset.
-
-        @type idx: int
-        @param idx: Sample index.
-        @rtype: L{LuxonisLoaderTorchOutput}
-        @return: Sample's data in L{LuxonisLoaderTorchOutput} format
-        """
-        ...
-
-
-def collate_fn(
-    batch: list[LuxonisLoaderTorchOutput],
-) -> tuple[Tensor, dict[LabelType, Tensor]]:
-    """Default collate function used for training.
-
-    @type batch: list[LuxonisLoaderTorchOutput]
-    @param batch: List of images and their annotations in the LuxonisLoaderTorchOutput
-        format.
-    @rtype: tuple[Tensor, dict[LabelType, Tensor]]
-    @return: Tuple of images and annotations in the format expected by the model.
-    """
-    zipped = zip(*batch)
-    imgs, anno_dicts = zipped
-    imgs = torch.stack(imgs, 0)
-
-    present_annotations = anno_dicts[0].keys()
-    out_annotations: dict[LabelType, Tensor] = {
-        anno: torch.empty(0) for anno in present_annotations
-    }
-
-    if LabelType.CLASSIFICATION in present_annotations:
-        class_annos = [anno[LabelType.CLASSIFICATION] for anno in anno_dicts]
-        out_annotations[LabelType.CLASSIFICATION] = torch.stack(class_annos, 0)
-
-    if LabelType.SEGMENTATION in present_annotations:
-        seg_annos = [anno[LabelType.SEGMENTATION] for anno in anno_dicts]
-        out_annotations[LabelType.SEGMENTATION] = torch.stack(seg_annos, 0)
-
-    if LabelType.BOUNDINGBOX in present_annotations:
-        bbox_annos = [anno[LabelType.BOUNDINGBOX] for anno in anno_dicts]
-        label_box: list[Tensor] = []
-        for i, box in enumerate(bbox_annos):
-            l_box = torch.zeros((box.shape[0], 6))
-            l_box[:, 0] = i  # add target image index for build_targets()
-            l_box[:, 1:] = box
-            label_box.append(l_box)
-        out_annotations[LabelType.BOUNDINGBOX] = torch.cat(label_box, 0)
-
-    if LabelType.KEYPOINT in present_annotations:
-        keypoint_annos = [anno[LabelType.KEYPOINT] for anno in anno_dicts]
-        label_keypoints: list[Tensor] = []
-        for i, points in enumerate(keypoint_annos):
-            l_kps = torch.zeros((points.shape[0], points.shape[1] + 1))
-            l_kps[:, 0] = i  # add target image index for build_targets()
-            l_kps[:, 1:] = points
-            label_keypoints.append(l_kps)
-        out_annotations[LabelType.KEYPOINT] = torch.cat(label_keypoints, 0)
-
-    return imgs, out_annotations
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
deleted file mode 100644
index a0e1f324..00000000
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import numpy as np
-from luxonis_ml.data import Augmentations, LuxonisDataset, LuxonisLoader
-from torch import Size, Tensor
-
-from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput
-
-
-class LuxonisLoaderTorch(BaseLoaderTorch):
-    def __init__(
-        self,
-        dataset: LuxonisDataset,
-        view: str = "train",
-        stream: bool = False,
-        augmentations: Augmentations | None = None,
-    ):
-        self.base_loader = LuxonisLoader(
-            dataset=dataset,
-            view=view,
-            stream=stream,
-            augmentations=augmentations,
-        )
-
-    def __len__(self) -> int:
-        return len(self.base_loader)
-
-    @property
-    def input_shape(self) -> Size:
-        img, _ = self[0]
-        return Size([1, *img.shape])
-
-    def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
-        img, annotations = self.base_loader[idx]
-
-        img = np.transpose(img, (2, 0, 1))  # HWC to CHW
-        tensor_img = Tensor(img)
-        for key in annotations:
-            annotations[key] = Tensor(annotations[key])  # type: ignore
-
-        return tensor_img, annotations
diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py
index 7f76df7c..02532d32 100644
--- a/luxonis_train/utils/registry.py
+++ b/luxonis_train/utils/registry.py
@@ -1,31 +1,46 @@
-"""This module implements a metaclass for automatic registration of classes."""
-
+"""This module implements a metaclass for automatic registration of
+classes."""
 
+import lightning.pytorch as pl
 from luxonis_ml.utils.registry import Registry
+from torch.optim.lr_scheduler import _LRScheduler
+from torch.optim.optimizer import Optimizer
+
+import luxonis_train as lt
 
-CALLBACKS = Registry(name="callbacks")
+CALLBACKS: Registry[type[pl.Callback]] = Registry(name="callbacks")
 """Registry for all callbacks."""
 
-LOADERS = Registry(name="loaders")
+LOADERS: Registry[type["lt.loaders.BaseLoaderTorch"]] = Registry(
+    name="loaders"
+)
 """Registry for all loaders."""
 
-LOSSES = Registry(name="losses")
+LOSSES: Registry[type["lt.attached_modules.BaseLoss"]] = Registry(
+    name="losses"
+)
 """Registry for all losses."""
 
-METRICS = Registry(name="metrics")
+METRICS: Registry[type["lt.attached_modules.BaseMetric"]] = Registry(
+    name="metrics"
+)
 """Registry for all metrics."""
 
-MODELS = Registry(name="models")
+MODELS: Registry[type["lt.models.BasePredefinedModel"]] = Registry(
+    name="models"
+)
 """Registry for all models."""
 
-NODES = Registry(name="nodes")
+NODES: Registry[type["lt.nodes.BaseNode"]] = Registry(name="nodes")
 """Registry for all nodes."""
 
-OPTIMIZERS = Registry(name="optimizers")
+OPTIMIZERS: Registry[type[Optimizer]] = Registry(name="optimizers")
 """Registry for all optimizers."""
 
-SCHEDULERS = Registry(name="schedulers")
+SCHEDULERS: Registry[type[_LRScheduler]] = Registry(name="schedulers")
 """Registry for all schedulers."""
 
-VISUALIZERS = Registry(name="visualizers")
+VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer"]] = Registry(
+    "visualizers"
+)
 """Registry for all visualizers."""
diff --git a/luxonis_train/utils/tracker.py b/luxonis_train/utils/tracker.py
index 13c77cb2..35d7af70 100644
--- a/luxonis_train/utils/tracker.py
+++ b/luxonis_train/utils/tracker.py
@@ -1,8 +1,43 @@
+from typing import Any
+
 from lightning.pytorch.loggers.logger import Logger
+from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from luxonis_ml.tracker import LuxonisTracker
 
 
 class LuxonisTrackerPL(LuxonisTracker, Logger):
-    """Implementation of LuxonisTracker that is compatible with PytorchLightning."""
+    """Implementation of LuxonisTracker that is compatible with
+    PytorchLightning."""
+
+    def __init__(self, *, _auto_finalize: bool = True, **kwargs: Any):
+        """
+        @type _auto_finalize: bool
+        @param _auto_finalize: If True, the run will be finalized automatically when the training ends.
+            If set to C{False}, the user will have to call the L{_finalize} method manually.
+
+        @type kwargs: dict
+        @param kwargs: Additional keyword arguments to be passed to the L{LuxonisTracker}.
+        """
+        LuxonisTracker.__init__(self, **kwargs)
+        Logger.__init__(self)
+        if _auto_finalize:
+            self.finalize = self._finalize
 
-    ...
+    @rank_zero_only
+    def _finalize(self, status: str = "success") -> None:  # pragma: no cover
+        """Finalizes current run."""
+        if self.is_tensorboard:
+            self.experiment["tensorboard"].flush()
+            self.experiment["tensorboard"].close()
+        if self.is_mlflow:
+            if status in ["success", "finished"]:
+                mlflow_status = "FINISHED"
+            else:
+                mlflow_status = "FAILED"
+            self.experiment["mlflow"].end_run(mlflow_status)
+        if self.is_wandb:
+            if status == "success":
+                wandb_status = 0
+            else:
+                wandb_status = 1
+            self.experiment["wandb"].finish(wandb_status)
diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py
index dbbf471e..3a7ca7f4 100644
--- a/luxonis_train/utils/types.py
+++ b/luxonis_train/utils/types.py
@@ -1,19 +1,21 @@
-from typing import Annotated, Any, Literal, TypeVar
+from typing import Any, Literal, TypeVar
 
-from luxonis_ml.enums import LabelType
-from pydantic import BaseModel, Field, ValidationError
+from luxonis_ml.data import LabelType
 from torch import Size, Tensor
 
 Kwargs = dict[str, Any]
-OutputTypes = Literal["boxes", "class", "keypoints", "segmentation", "features"]
-Labels = dict[LabelType, Tensor]
+"""Kwargs is a dictionary containing keyword arguments."""
+
+Labels = dict[str, tuple[Tensor, LabelType]]
+"""Labels is a dictionary containing a tuple of tensors and their
+corresponding label type."""
 
 AttachIndexType = Literal["all"] | int | tuple[int, int] | tuple[int, int, int]
-"""AttachIndexType is used to specify to which output of the prevoius node does the
-current node attach to.
+"""AttachIndexType is used to specify to which output of the prevoius
+node does the current node attach to.
 
-It can be either "all" (all outputs), an index of the output or a tuple of indices of
-the output (specifying a range of outputs).
+It can be either "all" (all outputs), an index of the output or a tuple
+of indices of the output (specifying a range of outputs).
 """
 
 T = TypeVar("T", Tensor, Size)
@@ -22,44 +24,3 @@
 
 It is used to pass data between different nodes of the network graph.
 """
-
-
-class IncompatibleException(Exception):
-    """Raised when two parts of the model are incompatible with each other."""
-
-    @classmethod
-    def from_validation_error(cls, val_error: ValidationError, class_name: str):
-        return cls(
-            f"{class_name} received an input not conforming to the protocol. "
-            f"Validation error: {val_error.errors(include_input=False, include_url=False)}."
-        )
-
-    @classmethod
-    def from_missing_label(
-        cls, label: LabelType, present_labels: list[LabelType], class_name: str
-    ):
-        return cls(
-            f"{class_name} requires {label} label, but it was not found in "
-            f"the label dictionary. Available labels: {present_labels}."
-        )
-
-
-class BaseProtocol(BaseModel):
-    class Config:
-        arbitrary_types_allowed = True
-
-
-class SegmentationProtocol(BaseProtocol):
-    segmentation: Annotated[list[Tensor], Field(min_length=1)]
-
-
-class KeypointProtocol(BaseProtocol):
-    keypoints: Annotated[list[Tensor], Field(min_length=1)]
-
-
-class BBoxProtocol(BaseProtocol):
-    boxes: Annotated[list[Tensor], Field(min_length=1)]
-
-
-class FeaturesProtocol(BaseProtocol):
-    features: Annotated[list[Tensor], Field(min_length=1)]
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 12876e69..34387324 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -9,13 +9,13 @@
     </mask>
     <g mask="url(#a)">
         <path fill="#555" d="M0 0h63v20H0z"/>
-        <path fill="#a4a61d" d="M63 0h36v20H63z"/>
+        <path fill="#4c1" d="M63 0h36v20H63z"/>
         <path fill="url(#b)" d="M0 0h99v20H0z"/>
     </g>
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">78%</text>
-        <text x="80" y="14">78%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">97%</text>
+        <text x="80" y="14">97%</text>
     </g>
 </svg>
diff --git a/pyproject.toml b/pyproject.toml
index 048c005b..d65978d4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,5 @@
 [project]
 name = "luxonis-train"
-version = "0.0.1"
 description = "Luxonis training framework for seamless training of various neural networks."
 readme = "README.md"
 requires-python = ">=3.10"
@@ -8,7 +7,7 @@ license = { file = "LICENSE" }
 authors = [{ name = "Luxonis", email = "support@luxonis.com" }]
 maintainers = [{ name = "Luxonis", email = "support@luxonis.com" }]
 keywords = ["ml", "training", "luxonis", "oak"]
-dynamic = ["dependencies", "optional-dependencies"]
+dynamic = ["dependencies", "optional-dependencies", "version"]
 classifiers = [
   "License :: OSI Approved :: Apache Software License",
   "Development Status :: 3 - Alpha",
@@ -19,7 +18,7 @@ classifiers = [
 ]
 
 [project.scripts]
-luxonis_train = "tools.main:main"
+luxonis_train = "luxonis_train.__main__:app"
 
 [project.urls]
 repository = "https://github.com/luxonis/luxonis-train"
@@ -35,10 +34,11 @@ where = ["."]
 [tool.setuptools.dynamic]
 dependencies = { file = ["requirements.txt"] }
 optional-dependencies = { dev = { file = ["requirements-dev.txt"] } }
+version = {attr = "luxonis_train.__version__"}
 
 [tool.ruff]
 target-version = "py310"
-line-length = 88
+line-length = 79
 indent-width = 4
 
 [tool.ruff.lint]
@@ -47,10 +47,44 @@ select = ["E4", "E7", "E9", "F", "W", "B", "I"]
 
 [tool.docformatter]
 black = true
-
-[tool.mypy]
-python_version = "3.10"
-ignore_missing_imports = true
+wrap-summaries = 72
+wrap-descriptions = 72
 
 [tool.pyright]
 typeCheckingMode = "basic"
+reportMissingTypeStubs = "none"
+reportPrivateImportUsage = "none"
+reportPrivateUsage = "none"
+reportIncompatibleVariableOverride = "none"
+reportIncompatibleMethodOverride = "none"
+reportUnnecessaryIsInstance = "none"
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "--disable-warnings"
+markers = [
+    "unit: mark a test as a unit test",
+    "integration: mark a test as an integration test",
+]
+
+[tool.coverage.run]
+omit = [
+    "**/__main__.py",
+    "**/gpu_stats_monitor.py"
+]
+
+[tool.coverage.report]
+exclude_also = [
+    "def __repr__",
+    "def __rich_repr__",
+    "def __str__",
+    "assert",
+    "raise NotImplementedError",
+    "except ImportError",
+    "@abstractmethod",
+    "@overload",
+    "exit\\(\\)",
+    "cv2\\.imshow",
+    "cv2\\.waitKey",
+    "logger\\.",
+]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index a919d265..e4dbd194 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,3 +3,6 @@ gdown>=4.2.0
 pre-commit>=3.2.1
 opencv-stubs>=0.0.8
 pytest-cov>=4.1.0
+pytest-subtests>=0.12.1
+pytest-md>=0.2.0
+pytest-order>=1.3.0
diff --git a/requirements.txt b/requirements.txt
index eecf828e..8bec2286 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,13 @@
 blobconverter>=1.4.2
-lightning>=2.0.0
-luxonis-ml[all]>=0.0.1
+lightning>=2.4.0
+#luxonis-ml[all]>=0.1.0
+luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@dev
 onnx>=1.12.0
 onnxruntime>=1.13.1
 onnxsim>=0.4.10
-optuna>=3.2.0
+optuna>=3.6.0
+optuna-integration>=3.6.0
+parameterized>=0.9.0
 psycopg2-binary>=2.9.1
 pycocotools>=2.0.7
 rich>=13.0.0
@@ -12,3 +15,6 @@ s3fs>=2023.0.0
 tensorboard>=2.10.1
 torchvision>=0.16.0
 typer>=0.9.0
+mlflow>=2.10.0
+psutil>=5.0.0
+tabulate>=0.9.0
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/configs/archive_config.yaml b/tests/configs/archive_config.yaml
new file mode 100644
index 00000000..71589f4d
--- /dev/null
+++ b/tests/configs/archive_config.yaml
@@ -0,0 +1,43 @@
+
+model:
+  name: archive_test
+  nodes:
+    - name: EfficientRep
+
+    - name: EfficientBBoxHead
+      inputs:
+        - EfficientRep
+
+    - name: EfficientKeypointBBoxHead
+      inputs:
+        - EfficientRep
+
+    - name: ImplicitKeypointBBoxHead
+      inputs:
+        - EfficientRep
+
+    - name: SegmentationHead
+      inputs:
+        - EfficientRep
+
+    - name: BiSeNetHead
+      inputs:
+        - EfficientRep
+
+    - name: ClassificationHead
+      inputs:
+        - EfficientRep
+
+exporter:
+  output_names:
+    - seg0
+    - class0
+    - bbox0
+    - bbox1
+    - bbox2
+    - effkpt0
+    - effkpt1
+    - effkpt2
+    - impl
+    - seg1
+
diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml
new file mode 100644
index 00000000..bb15ac37
--- /dev/null
+++ b/tests/configs/parking_lot_config.yaml
@@ -0,0 +1,172 @@
+
+model:
+  name: parking_lot_model
+  nodes:
+
+    - name: EfficientRep
+      alias: backbone
+
+    - name: RepPANNeck
+      alias: neck
+      inputs:
+        - backbone
+
+    - name: EfficientBBoxHead
+      alias: bbox-head
+      inputs:
+        - neck
+
+    - name: ImplicitKeypointBBoxHead
+      alias: car-detection-head
+      inputs:
+        - neck
+      task:
+        keypoints: car-keypoints
+        boundingbox: car-boundingbox
+
+    - name: EfficientKeypointBBoxHead
+      alias: motorbike-detection-head
+      task:
+        keypoints: motorbike-keypoints
+        boundingbox: motorbike-boundingbox
+      inputs:
+        - neck
+
+    - name: SegmentationHead
+      alias: color-segmentation-head
+      task: color-segmentation
+      inputs:
+        - neck
+
+    - name: SegmentationHead
+      alias: any-vehicle-segmentation-head
+      task: vehicle-segmentation
+      inputs:
+        - neck
+
+    - name: BiSeNetHead
+      alias: brand-segmentation-head
+      task: brand-segmentation
+      inputs:
+        - neck
+
+    - name: BiSeNetHead
+      alias: vehicle-type-segmentation-head
+      task: vehicle_type-segmentation
+      inputs:
+        - neck
+
+  losses:
+    - name: AdaptiveDetectionLoss
+      attached_to: bbox-head
+    - name: BCEWithLogitsLoss
+      attached_to: any-vehicle-segmentation-head
+    - name: CrossEntropyLoss
+      attached_to: vehicle-type-segmentation-head
+    - name: CrossEntropyLoss
+      attached_to: color-segmentation-head
+    - name: ImplicitKeypointBBoxLoss
+      attached_to: car-detection-head
+    - name: EfficientKeypointBBoxLoss
+      attached_to: motorbike-detection-head
+
+  metrics:
+    - name: ObjectKeypointSimilarity
+      attached_to: car-detection-head
+    - name: MeanAveragePrecisionKeypoints
+      attached_to: motorbike-detection-head
+    - name: MeanAveragePrecision
+      attached_to: bbox-head
+      is_main_metric: true
+    - name: F1Score
+      attached_to: any-vehicle-segmentation-head
+    - name: JaccardIndex
+      attached_to: color-segmentation-head
+    - name: Accuracy
+      attached_to: vehicle-type-segmentation-head
+    - name: Precision
+      attached_to: brand-segmentation-head
+
+  visualizers:
+    - name: MultiVisualizer
+      alias: multi-visualizer-car
+      attached_to: car-detection-head
+      params:
+        visualizers:
+          - name: KeypointVisualizer
+            params:
+              nonvisible_color: blue
+          - name: BBoxVisualizer
+
+    - name: MultiVisualizer
+      alias: multi-visualizer-motorbike
+      attached_to: motorbike-detection-head
+      params:
+        visualizers:
+          - name: KeypointVisualizer
+            params:
+              nonvisible_color: blue
+          - name: BBoxVisualizer
+
+    - name: SegmentationVisualizer
+      alias: color-segmentation-visualizer
+      attached_to: color-segmentation-head
+    - name: SegmentationVisualizer
+      alias: vehicle-type-segmentation-visualizer
+      attached_to: vehicle-type-segmentation-head
+    - name: SegmentationVisualizer
+      alias: vehicle-segmentation-visualizer
+      attached_to: any-vehicle-segmentation-head
+    - name: SegmentationVisualizer
+      alias: brand-segmentation-visualizer
+      attached_to: brand-segmentation-head
+    - name: BBoxVisualizer
+      alias: bbox-visualizer
+      attached_to: bbox-head
+
+tracker:
+  project_name: Parking_Lot
+  is_tensorboard: True
+
+loader:
+  train_view: val
+  params:
+    dataset_name: D1ParkingLot
+
+trainer:
+  accelerator: auto
+  devices: auto
+  strategy: auto
+
+  n_sanity_val_steps: 1
+  profiler: null
+  verbose: True
+  batch_size: 2
+  accumulate_grad_batches: 1
+  epochs: 200
+  n_workers: 8
+  train_metrics_interval: -1
+  validation_interval: 10
+  n_log_images: 8
+  skip_last_batch: True
+  log_sub_losses: True
+  save_top_k: 3
+
+  preprocessing:
+    train_image_size: [256, 320]
+    keep_aspect_ratio: False
+    train_rgb: True
+    normalize:
+      active: True
+    augmentations:
+      - name: Defocus
+        params:
+          p: 0.1
+      - name: Sharpen
+        params:
+          p: 0.1
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: ArchiveOnTrainEnd
+
diff --git a/tests/configs/segmentation_parse_loader.yaml b/tests/configs/segmentation_parse_loader.yaml
new file mode 100644
index 00000000..14814571
--- /dev/null
+++ b/tests/configs/segmentation_parse_loader.yaml
@@ -0,0 +1,27 @@
+# Example configuration for training a predefined segmentation model
+
+model:
+  name: parse_loader_test
+  predefined_model:
+    name: SegmentationModel
+    params:
+      backbone: MicroNet
+      task: multiclass
+
+loader:
+  params:
+    dataset_dir: gs://luxonis-test-bucket/luxonis-ml-test-data/D2_Tile.png-mask-semantic.zip
+    dataset_name: _parse_loader_test_dataset
+
+trainer:
+  preprocessing:
+    train_image_size: [&height 128, &width 128]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  batch_size: 4
+  epochs: &epochs 1
+  n_workers: 4
+  validation_interval: 1
+  n_log_images: 8
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..4a8a492c
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,18 @@
+import pytest
+
+
+def pytest_collection_modifyitems(items):
+    for item in items:
+        if "/unittests/" in str(item.fspath):
+            item.add_marker(pytest.mark.unit)
+            # ensure unittests run before integration tests
+            item.add_marker(pytest.mark.order(0))
+        elif "/integration/" in str(item.fspath):
+            item.add_marker(pytest.mark.integration)
+
+
+def pytest_configure(config):
+    config.addinivalue_line("markers", "unit: mark test as a unit test")
+    config.addinivalue_line(
+        "markers", "integration: mark test as an integration test"
+    )
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 35c893d4..ef5a2142 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,8 +1,10 @@
-import glob
 import json
+import multiprocessing as mp
 import os
-import zipfile
+import shutil
+from collections import defaultdict
 from pathlib import Path
+from typing import Any
 
 import cv2
 import gdown
@@ -10,126 +12,205 @@
 import pytest
 import torchvision
 from luxonis_ml.data import LuxonisDataset
-from luxonis_ml.utils import environ
+from luxonis_ml.data.parsers import LuxonisParser
+from luxonis_ml.data.utils.data_utils import rgb_to_bool_masks
+from luxonis_ml.utils import LuxonisFileSystem, environ
 
-Path(environ.LUXONISML_BASE_PATH).mkdir(exist_ok=True)
+WORK_DIR = Path("tests", "data")
 
 
-def create_dataset(name: str) -> LuxonisDataset:
-    if LuxonisDataset.exists(name):
-        dataset = LuxonisDataset(name)
-        dataset.delete_dataset()
-    return LuxonisDataset(name)
+@pytest.fixture(scope="session")
+def test_output_dir() -> Path:
+    return Path("tests/integration/save-directory")
 
 
 @pytest.fixture(scope="session", autouse=True)
-def create_coco_dataset():
-    dataset = create_dataset("coco_test")
-    url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
-    output_folder = "../data/"
-    output_zip = os.path.join(output_folder, "COCO_people_subset.zip")
+def setup(test_output_dir: Path):
+    WORK_DIR.mkdir(parents=True, exist_ok=True)
+    shutil.rmtree(WORK_DIR / "luxonisml", ignore_errors=True)
+    shutil.rmtree(test_output_dir, ignore_errors=True)
+    environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml"
+    test_output_dir.mkdir(exist_ok=True)
 
-    if not os.path.exists(output_folder):
-        os.makedirs(output_folder)
 
-    if not os.path.exists(output_zip) and not os.path.exists(
-        os.path.join(output_folder, "COCO_people_subset")
-    ):
-        gdown.download(url, output_zip, quiet=False)
-
-        with zipfile.ZipFile(output_zip, "r") as zip_ref:
-            zip_ref.extractall(output_folder)
-
-    def COCO_people_subset_generator():
-        img_dir = os.path.join(output_folder, "person_val2017_subset")
-        annot_file = os.path.join(output_folder, "person_keypoints_val2017.json")
-        im_paths = glob.glob(img_dir + "/*.jpg")
-        nums = np.array([int(Path(path).stem) for path in im_paths])
-        idxs = np.argsort(nums)
-        im_paths = list(np.array(im_paths)[idxs])
-        with open(annot_file) as file:
-            data = json.load(file)
-        imgs = data["images"]
-        anns = data["annotations"]
-
-        for path in im_paths:
-            gran = Path(path).name
-            img = [img for img in imgs if img["file_name"] == gran][0]
-            img_id = img["id"]
-            img_anns = [ann for ann in anns if ann["image_id"] == img_id]
-
-            im = cv2.imread(path)
-            height, width, _ = im.shape
-
-            if len(img_anns):
-                yield {
-                    "file": path,
-                    "class": "person",
-                    "type": "classification",
-                    "value": True,
+@pytest.fixture
+def train_overfit() -> bool:
+    return bool(os.getenv("LUXONIS_TRAIN_OVERFIT"))
+
+
+@pytest.fixture(scope="session")
+def parking_lot_dataset() -> LuxonisDataset:
+    url = "gs://luxonis-test-bucket/luxonis-ml-test-data/D1_ParkingSlotTest"
+    base_path = WORK_DIR / "D1_ParkingSlotTest"
+    if not base_path.exists():
+        base_path = LuxonisFileSystem.download(url, WORK_DIR)
+
+    mask_brand_path = base_path / "mask_brand"
+    mask_color_path = base_path / "mask_color"
+    kpt_mask_path = base_path / "keypoints_mask_vehicle"
+
+    def generator():
+        filenames: dict[int, Path] = {}
+        for base_path in [kpt_mask_path, mask_brand_path, mask_color_path]:
+            for sequence_path in sorted(list(base_path.glob("sequence.*"))):
+                frame_data = sequence_path / "step0.frame_data.json"
+                with open(frame_data) as f:
+                    data = json.load(f)["captures"][0]
+                    frame_data = data["annotations"]
+                    sequence_num = int(sequence_path.suffix[1:])
+                    filename = data["filename"]
+                    if filename is not None:
+                        filename = sequence_path / filename
+                        filenames[sequence_num] = filename
+                    else:
+                        filename = filenames[sequence_num]
+                    W, H = data["dimension"]
+
+                annotations = {
+                    anno["@type"].split(".")[-1]: anno for anno in frame_data
                 }
 
-            for ann in img_anns:
-                seg = ann["segmentation"]
-                if isinstance(seg, list):
-                    poly = []
-                    for s in seg:
-                        poly_arr = np.array(s).reshape(-1, 2)
-                        poly += [
-                            (poly_arr[i, 0] / width, poly_arr[i, 1] / height)
-                            for i in range(len(poly_arr))
-                        ]
+                bbox_classes = {}
+                bboxes = {}
+
+                for bbox_annotation in annotations.get(
+                    "BoundingBox2DAnnotation", defaultdict(list)
+                )["values"]:
+                    class_ = (
+                        bbox_annotation["labelName"].split("-")[-1].lower()
+                    )
+                    if class_ == "motorbiek":
+                        class_ = "motorbike"
+                    x, y = bbox_annotation["origin"]
+                    w, h = bbox_annotation["dimension"]
+                    instance_id = bbox_annotation["instanceId"]
+                    bbox_classes[instance_id] = class_
+                    bboxes[instance_id] = [x / W, y / H, w / W, h / H]
                     yield {
-                        "file": path,
-                        "class": "person",
-                        "type": "polyline",
-                        "value": poly,
+                        "file": filename,
+                        "annotation": {
+                            "type": "boundingbox",
+                            "class": class_,
+                            "x": x / W,
+                            "y": y / H,
+                            "w": w / W,
+                            "h": h / H,
+                            "instance_id": instance_id,
+                        },
                     }
 
-                x, y, w, h = ann["bbox"]
-                yield {
-                    "file": path,
-                    "class": "person",
-                    "type": "box",
-                    "value": (x / width, y / height, w / width, h / height),
-                }
+                for kpt_annotation in annotations.get(
+                    "KeypointAnnotation", defaultdict(list)
+                )["values"]:
+                    keypoints = kpt_annotation["keypoints"]
+                    instance_id = kpt_annotation["instanceId"]
+                    class_ = bbox_classes[instance_id]
+                    bbox = bboxes[instance_id]
+                    kpts = []
 
-                kps = np.array(ann["keypoints"]).reshape(-1, 3)
-                keypoint = []
-                for kp in kps:
-                    keypoint.append(
-                        (float(kp[0] / width), float(kp[1] / height), int(kp[2]))
-                    )
-                yield {
-                    "file": path,
-                    "class": "person",
-                    "type": "keypoints",
-                    "value": keypoint,
+                    if class_ == "motorbike":
+                        keypoints = keypoints[:3]
+                    else:
+                        keypoints = keypoints[3:]
+
+                    for kp in keypoints:
+                        x, y = kp["location"]
+                        kpts.append([x / W, y / H, kp["state"]])
+
+                    yield {
+                        "file": filename,
+                        "annotation": {
+                            "type": "detection",
+                            "class": class_,
+                            "task": class_,
+                            "keypoints": kpts,
+                            "instance_id": instance_id,
+                            "boundingbox": {
+                                "x": bbox[0],
+                                "y": bbox[1],
+                                "w": bbox[2],
+                                "h": bbox[3],
+                            },
+                        },
+                    }
+
+                vehicle_type_segmentation = annotations[
+                    "SemanticSegmentationAnnotation"
+                ]
+                mask = cv2.cvtColor(
+                    cv2.imread(
+                        str(
+                            sequence_path
+                            / vehicle_type_segmentation["filename"]
+                        )
+                    ),
+                    cv2.COLOR_BGR2RGB,
+                )
+                classes = {
+                    inst["labelName"]: inst["pixelValue"][:3]
+                    for inst in vehicle_type_segmentation["instances"]
                 }
+                if base_path == kpt_mask_path:
+                    task = "vehicle_type-segmentation"
+                elif base_path == mask_brand_path:
+                    task = "brand-segmentation"
+                else:
+                    task = "color-segmentation"
+                for class_, mask_ in rgb_to_bool_masks(
+                    mask, classes, add_background_class=True
+                ):
+                    yield {
+                        "file": filename,
+                        "annotation": {
+                            "type": "mask",
+                            "class": class_,
+                            "task": task,
+                            "mask": mask_,
+                        },
+                    }
+                if base_path == mask_color_path:
+                    yield {
+                        "file": filename,
+                        "annotation": {
+                            "type": "mask",
+                            "class": "vehicle",
+                            "task": "vehicle-segmentation",
+                            "mask": mask.astype(bool)[..., 0]
+                            | mask.astype(bool)[..., 1]
+                            | mask.astype(bool)[..., 2],
+                        },
+                    }
 
-    dataset.set_classes(["person"])
+    dataset = LuxonisDataset("_ParkingLot", delete_existing=True)
+    dataset.add(generator())
+    np.random.seed(42)
+    dataset.make_splits()
+    return dataset
 
-    annot_file = os.path.join(output_folder, "person_keypoints_val2017.json")
-    with open(annot_file) as file:
-        data = json.load(file)
-    dataset.set_skeletons(
-        {
-            "person": {
-                "labels": data["categories"][0]["keypoints"],
-                "edges": (np.array(data["categories"][0]["skeleton"]) - 1).tolist(),
-            }
-        }
+
+@pytest.fixture(scope="session")
+def coco_dataset() -> LuxonisDataset:
+    dataset_name = "coco_test"
+    url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
+    output_zip = WORK_DIR / "COCO_people_subset.zip"
+
+    if (
+        not output_zip.exists()
+        and not (WORK_DIR / "COCO_people_subset").exists()
+    ):
+        gdown.download(url, str(output_zip), quiet=False)
+
+    parser = LuxonisParser(
+        str(output_zip), dataset_name=dataset_name, delete_existing=True
     )
-    dataset.add(COCO_people_subset_generator)  # type: ignore
-    dataset.make_splits()
+    return parser.parse(random_split=True)
 
 
-@pytest.fixture(scope="session", autouse=True)
-def create_cifar10_dataset():
-    dataset = create_dataset("cifar10_test")
-    output_folder = "../data/"
-    if not os.path.exists(output_folder):
-        os.makedirs(output_folder)
+@pytest.fixture(scope="session")
+def cifar10_dataset() -> LuxonisDataset:
+    dataset = LuxonisDataset("cifar10_test", delete_existing=True)
+    output_folder = WORK_DIR / "cifar10"
+    output_folder.mkdir(parents=True, exist_ok=True)
     cifar10_torch = torchvision.datasets.CIFAR10(
         root=output_folder, train=False, download=True
     )
@@ -150,16 +231,52 @@ def CIFAR10_subset_generator():
         for i, (image, label) in enumerate(cifar10_torch):  # type: ignore
             if i == 1000:
                 break
-            path = os.path.join(output_folder, f"cifar_{i}.png")
+            path = output_folder / f"cifar_{i}.png"
             image.save(path)
             yield {
                 "file": path,
-                "class": classes[label],
-                "type": "classification",
-                "value": True,
+                "annotation": {
+                    "type": "classification",
+                    "class": classes[label],
+                },
             }
 
-    dataset.set_classes(classes)
-
-    dataset.add(CIFAR10_subset_generator)  # type: ignore
+    dataset.add(CIFAR10_subset_generator())
     dataset.make_splits()
+    return dataset
+
+
+@pytest.fixture
+def config(train_overfit: bool) -> dict[str, Any]:
+    if train_overfit:
+        epochs = 100
+    else:
+        epochs = 1
+
+    return {
+        "tracker": {
+            "save_directory": "tests/integration/save-directory",
+        },
+        "loader": {
+            "train_view": "val",
+            "params": {
+                "dataset_name": "_ParkingLot",
+            },
+        },
+        "trainer": {
+            "batch_size": 4,
+            "epochs": epochs,
+            "n_workers": mp.cpu_count(),
+            "validation_interval": epochs,
+            "save_top_k": 0,
+            "preprocessing": {
+                "train_image_size": [256, 320],
+                "keep_aspect_ratio": False,
+                "normalize": {"active": True},
+            },
+            "callbacks": [
+                {"name": "ExportOnTrainEnd"},
+            ],
+            "matmul_precision": "medium",
+        },
+    }
diff --git a/tests/integration/multi_input_modules.py b/tests/integration/multi_input_modules.py
new file mode 100644
index 00000000..e6fd0476
--- /dev/null
+++ b/tests/integration/multi_input_modules.py
@@ -0,0 +1,107 @@
+import torch
+from luxonis_ml.data import LabelType
+from torch import Tensor, nn
+
+from luxonis_train.loaders import BaseLoaderTorch
+from luxonis_train.nodes import BaseNode
+from luxonis_train.utils import Packet
+
+
+class CustomMultiInputLoader(BaseLoaderTorch):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @property
+    def input_shapes(self):
+        return {
+            "left": torch.Size([3, 224, 224]),
+            "right": torch.Size([3, 224, 224]),
+            "disparity": torch.Size([1, 224, 224]),
+            "pointcloud": torch.Size([1000, 3]),
+        }
+
+    def __getitem__(self, _):
+        # Fake data
+        left = torch.rand(3, 224, 224, dtype=torch.float32)
+        right = torch.rand(3, 224, 224, dtype=torch.float32)
+        disparity = torch.rand(1, 224, 224, dtype=torch.float32)
+        pointcloud = torch.rand(1000, 3, dtype=torch.float32)
+        inputs = {
+            "left": left,
+            "right": right,
+            "disparity": disparity,
+            "pointcloud": pointcloud,
+        }
+
+        # Fake labels
+        segmap = torch.zeros(1, 224, 224, dtype=torch.float32)
+        segmap[0, 100:150, 100:150] = 1
+        labels = {
+            "segmentation": (segmap, LabelType.SEGMENTATION),
+        }
+
+        return inputs, labels
+
+    def __len__(self):
+        return 10
+
+    def get_classes(self) -> dict[LabelType, list[str]]:
+        return {LabelType.SEGMENTATION: ["square"]}
+
+
+class MultiInputTestBaseNode(BaseNode):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.scalar = nn.Parameter(torch.tensor(1.0), requires_grad=True)
+
+    def forward(self, inputs: list[Tensor]):
+        return [self.scalar * inp for inp in inputs]
+
+    def unwrap(self, inputs: list[dict[str, list[Tensor]]]):
+        return [item for inp in inputs for key in inp for item in inp[key]]
+
+
+class FullBackbone(MultiInputTestBaseNode): ...
+
+
+class RGBDBackbone(MultiInputTestBaseNode): ...
+
+
+class PointcloudBackbone(MultiInputTestBaseNode): ...
+
+
+class FusionNeck(MultiInputTestBaseNode): ...
+
+
+class FusionNeck2(MultiInputTestBaseNode): ...
+
+
+class CustomSegHead1(MultiInputTestBaseNode):
+    tasks = {LabelType.SEGMENTATION: "segmentation"}
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.conv = nn.Conv2d(1, 1, 3, padding=1)
+
+    def unwrap(self, inputs: list[Packet[Tensor]]) -> Tensor:
+        assert len(inputs) == 1
+        return inputs[0]["features"][-1]
+
+    def forward(self, inputs: Tensor):
+        return [self.conv(inputs)]
+
+
+class CustomSegHead2(MultiInputTestBaseNode):
+    tasks = {LabelType.SEGMENTATION: "segmentation"}
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.conv = nn.Conv2d(1, 1, 3, padding=1)
+
+    def unwrap(self, inputs: list[Packet[Tensor]]):
+        return [packet["features"][-1] for packet in inputs]
+
+    def forward(self, inputs: list[Tensor]):
+        fn1, _, disp = inputs
+        x = fn1 + disp
+        return [self.conv(x)]
diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json
new file mode 100644
index 00000000..a42b82b2
--- /dev/null
+++ b/tests/integration/parking_lot.json
@@ -0,0 +1,304 @@
+{
+    "config_version": "1.0",
+    "model": {
+        "metadata": {
+            "name": "parking_lot_model",
+            "path": "parking_lot_model.onnx",
+            "precision": "float32"
+        },
+        "inputs": [
+            {
+                "name": "image",
+                "dtype": "float32",
+                "input_type": "image",
+                "shape": [
+                    1,
+                    3,
+                    256,
+                    320
+                ],
+                "layout": "NCHW",
+                "preprocessing": {
+                    "mean": [
+                        123.675,
+                        116.28,
+                        103.53
+                    ],
+                    "scale": [
+                        58.395,
+                        57.12,
+                        57.375
+                    ],
+                    "reverse_channels": true,
+                    "interleaved_to_planar": false
+                }
+            }
+        ],
+        "outputs": [
+            {
+                "name": "any-vehicle-segmentation-head/vehicle-segmentation/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    1,
+                    256,
+                    320
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "bbox-head/boundingbox/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    7,
+                    32,
+                    40
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "bbox-head/boundingbox/1",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    7,
+                    16,
+                    20
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "bbox-head/boundingbox/2",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    7,
+                    8,
+                    10
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "brand-segmentation-head/brand-segmentation/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    23,
+                    256,
+                    320
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "car-detection-head/boxes_and_keypoints/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    5040,
+                    24
+                ],
+                "layout": "NCD"
+            },
+            {
+                "name": "color-segmentation-head/color-segmentation/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    4,
+                    256,
+                    320
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "motorbike-detection-head/outputs/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    14,
+                    32,
+                    40
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "motorbike-detection-head/outputs/1",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    14,
+                    16,
+                    20
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "motorbike-detection-head/outputs/2",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    14,
+                    8,
+                    10
+                ],
+                "layout": "NCDE"
+            },
+            {
+                "name": "vehicle-type-segmentation-head/vehicle_type-segmentation/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    3,
+                    256,
+                    320
+                ],
+                "layout": "NCHW"
+            }
+        ],
+        "heads": [
+            {
+                "parser": "YOLO",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "motorbike",
+                        "car"
+                    ],
+                    "n_classes": 2,
+                    "iou_threshold": 0.45,
+                    "conf_threshold": 0.25,
+                    "max_det": 300,
+                    "anchors": null,
+                    "subtype": "yolov6"
+                },
+                "outputs": [
+                    "bbox-head/boundingbox/0",
+                    "bbox-head/boundingbox/1",
+                    "bbox-head/boundingbox/2"
+                ]
+            },
+            {
+                "parser": "YoloDetectionNetwork",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "car"
+                    ],
+                    "n_classes": 1,
+                    "iou_threshold": 0.45,
+                    "conf_threshold": 0.25,
+                    "max_det": 300,
+                    "subtype": "yolov7",
+                    "n_keypoints": 6
+                },
+                "outputs": [
+                    "car-detection-head/boxes_and_keypoints/0"
+                ]
+            },
+            {
+                "parser": "YoloDetectionNetwork",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "motorbike"
+                    ],
+                    "n_classes": 1,
+                    "iou_threshold": 0.45,
+                    "conf_threshold": 0.25,
+                    "max_det": 300,
+                    "anchors": null,
+                    "n_keypoints": 3
+                },
+                "outputs": [
+                    "motorbike-detection-head/outputs/0",
+                    "motorbike-detection-head/outputs/1",
+                    "motorbike-detection-head/outputs/2"
+                ]
+            },
+            {
+                "parser": "SegmentationParser",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "background",
+                        "blue",
+                        "green",
+                        "red"
+                    ],
+                    "n_classes": 4,
+                    "is_softmax": false
+                },
+                "outputs": [
+                    "color-segmentation-head/color-segmentation/0"
+                ]
+            },
+            {
+                "parser": "SegmentationParser",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "vehicle"
+                    ],
+                    "n_classes": 1,
+                    "is_softmax": false
+                },
+                "outputs": [
+                    "any-vehicle-segmentation-head/vehicle-segmentation/0"
+                ]
+            },
+            {
+                "parser": "SegmentationParser",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "background",
+                        "chrysler",
+                        "bmw",
+                        "ducati",
+                        "dodge",
+                        "ferrari",
+                        "infiniti",
+                        "land-rover",
+                        "roll-royce",
+                        "saab",
+                        "Kawasaki",
+                        "moto",
+                        "truimph",
+                        "alfa-romeo",
+                        "harley",
+                        "honda",
+                        "jeep",
+                        "aprilia",
+                        "piaggio",
+                        "yamaha",
+                        "buick",
+                        "pontiac",
+                        "isuzu"
+                    ],
+                    "n_classes": 23,
+                    "is_softmax": false
+                },
+                "outputs": [
+                    "brand-segmentation-head/brand-segmentation/0"
+                ]
+            },
+            {
+                "parser": "SegmentationParser",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "background",
+                        "car",
+                        "motorbike"
+                    ],
+                    "n_classes": 3,
+                    "is_softmax": false
+                },
+                "outputs": [
+                    "vehicle-type-segmentation-head/vehicle_type-segmentation/0"
+                ]
+            }
+        ]
+    }
+}
diff --git a/tests/integration/test_detection.py b/tests/integration/test_detection.py
new file mode 100644
index 00000000..fb184b6f
--- /dev/null
+++ b/tests/integration/test_detection.py
@@ -0,0 +1,95 @@
+from typing import Any
+
+import pytest
+from luxonis_ml.data import LuxonisDataset
+
+from luxonis_train.core import LuxonisModel
+from luxonis_train.nodes.backbones import __all__ as BACKBONES
+
+
+def get_opts(backbone: str) -> dict[str, Any]:
+    return {
+        "model": {
+            "nodes": [
+                {
+                    "name": backbone,
+                },
+                {
+                    "name": "EfficientBBoxHead",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "EfficientKeypointBBoxHead",
+                    "task": {
+                        "keypoints": "car-keypoints",
+                        "boundingbox": "car-boundingbox",
+                    },
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "ImplicitKeypointBBoxHead",
+                    "task": {
+                        "keypoints": "car-keypoints",
+                        "boundingbox": "car-boundingbox",
+                    },
+                    "inputs": [backbone],
+                },
+            ],
+            "losses": [
+                {
+                    "name": "AdaptiveDetectionLoss",
+                    "attached_to": "EfficientBBoxHead",
+                },
+                {
+                    "name": "EfficientKeypointBBoxLoss",
+                    "attached_to": "EfficientKeypointBBoxHead",
+                    "params": {"area_factor": 0.5},
+                },
+                {
+                    "name": "ImplicitKeypointBBoxLoss",
+                    "attached_to": "ImplicitKeypointBBoxHead",
+                },
+            ],
+            "metrics": [
+                {
+                    "name": "MeanAveragePrecision",
+                    "attached_to": "EfficientBBoxHead",
+                },
+                {
+                    "name": "MeanAveragePrecisionKeypoints",
+                    "alias": "EfficientKeypointBBoxHead-MaP",
+                    "attached_to": "EfficientKeypointBBoxHead",
+                },
+                {
+                    "name": "MeanAveragePrecisionKeypoints",
+                    "alias": "ImplicitKeypointBBoxHead-MaP",
+                    "attached_to": "ImplicitKeypointBBoxHead",
+                },
+            ],
+        }
+    }
+
+
+def train_and_test(
+    config: dict[str, Any],
+    opts: dict[str, Any],
+    train_overfit: bool = False,
+):
+    model = LuxonisModel(config, opts)
+    model.train()
+    results = model.test(view="val")
+    if train_overfit:
+        for name, value in results.items():
+            if "/map_50" in name or "/kpt_map_medium" in name:
+                assert value > 0.8, f"{name} = {value} (expected > 0.8)"
+
+
+@pytest.mark.parametrize("backbone", BACKBONES)
+def test_backbones(
+    backbone: str,
+    config: dict[str, Any],
+    parking_lot_dataset: LuxonisDataset,
+):
+    opts = get_opts(backbone)
+    opts["loader.params.dataset_name"] = parking_lot_dataset.identifier
+    train_and_test(config, opts)
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
deleted file mode 100644
index 8b6f872b..00000000
--- a/tests/integration/test_sanity.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import os
-import shutil
-import subprocess
-from pathlib import Path
-
-import pytest
-
-
-@pytest.fixture(scope="function", autouse=True)
-def clear_output():
-    shutil.rmtree("output", ignore_errors=True)
-
-
-@pytest.mark.parametrize(
-    "config_file", [path for path in os.listdir("configs") if "model" in path]
-)
-def test_sanity(config_file):
-    opts = [
-        "trainer.epochs",
-        "1",
-        "trainer.validation_interval",
-        "1",
-        "trainer.callbacks",
-        "[]",
-    ]
-    result = subprocess.run(
-        ["luxonis_train", "train", "--config", f"configs/{config_file}", *opts],
-    )
-    assert result.returncode == 0
-
-    opts += ["model.weights", str(list(Path("output").rglob("*.ckpt"))[0])]
-    opts += ["exporter.onnx.opset_version", "11"]
-
-    result = subprocess.run(
-        ["luxonis_train", "export", "--config", f"configs/{config_file}", *opts],
-    )
-
-    assert result.returncode == 0
-
-    result = subprocess.run(
-        ["luxonis_train", "eval", "--config", f"configs/{config_file}", *opts],
-    )
-
-    assert result.returncode == 0
-
-    save_dir = Path("sanity_infer_save_dir")
-    shutil.rmtree(save_dir, ignore_errors=True)
-
-    result = subprocess.run(
-        [
-            "luxonis_train",
-            "infer",
-            "--save-dir",
-            str(save_dir),
-            "--config",
-            f"configs/{config_file}",
-            *opts,
-        ],
-    )
-
-    assert result.returncode == 0
-    assert save_dir.exists()
-    assert len(list(save_dir.rglob("*.png"))) > 0
-    shutil.rmtree(save_dir, ignore_errors=True)
-
-
-def test_tuner():
-    Path("study_local.db").unlink(missing_ok=True)
-    result = subprocess.run(
-        [
-            "luxonis_train",
-            "tune",
-            "--config",
-            "configs/example_tuning.yaml",
-            "trainer.epochs",
-            "1",
-            "trainer.validation_interval",
-            "1",
-            "trainer.callbacks",
-            "[]",
-            "tuner.n_trials",
-            "4",
-        ],
-    )
-    assert result.returncode == 0
diff --git a/tests/integration/test_segmentation.py b/tests/integration/test_segmentation.py
new file mode 100644
index 00000000..c24e6fb9
--- /dev/null
+++ b/tests/integration/test_segmentation.py
@@ -0,0 +1,134 @@
+from typing import Any
+
+import pytest
+from luxonis_ml.data import LuxonisDataset
+
+from luxonis_train.core import LuxonisModel
+from luxonis_train.nodes.backbones import __all__ as BACKBONES
+
+
+def get_opts(backbone: str) -> dict[str, Any]:
+    opts = {
+        "model": {
+            "nodes": [
+                {
+                    "name": backbone,
+                },
+                {
+                    "name": "SegmentationHead",
+                    "alias": "seg-color-segmentation",
+                    "task": "color-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "BiSeNetHead",
+                    "alias": "bi-color-segmentation",
+                    "task": "color-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "SegmentationHead",
+                    "alias": "seg-vehicle-segmentation",
+                    "task": "vehicle-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "BiSeNetHead",
+                    "alias": "bi-vehicle-segmentation",
+                    "task": "vehicle-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "SegmentationHead",
+                    "alias": "seg-vehicle-segmentation-2",
+                    "task": "vehicle-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "SegmentationHead",
+                    "alias": "seg-vehicle-segmentation-3",
+                    "task": "vehicle-segmentation",
+                    "inputs": [backbone],
+                },
+            ],
+            "losses": [
+                {
+                    "name": "CrossEntropyLoss",
+                    "attached_to": "seg-color-segmentation",
+                },
+                {
+                    "name": "CrossEntropyLoss",
+                    "attached_to": "bi-color-segmentation",
+                },
+                {
+                    "name": "BCEWithLogitsLoss",
+                    "attached_to": "seg-vehicle-segmentation",
+                },
+                {
+                    "name": "SigmoidFocalLoss",
+                    "attached_to": "bi-vehicle-segmentation",
+                    "params": {"alpha": 0.5, "gamma": 1.0},
+                },
+                {
+                    "name": "SoftmaxFocalLoss",
+                    "attached_to": "seg-vehicle-segmentation-2",
+                    "params": {"alpha": 0.5, "gamma": 1.0},
+                },
+                {
+                    "name": "SmoothBCEWithLogitsLoss",
+                    "attached_to": "seg-vehicle-segmentation-3",
+                    "params": {"label_smoothing": 0.1},
+                },
+            ],
+            "metrics": [],
+            "visualizers": [],
+        }
+    }
+    aliases = [head["alias"] for head in opts["model"]["nodes"][1:]]
+    for alias in aliases:
+        opts["model"]["metrics"].extend(
+            [
+                {
+                    "name": "JaccardIndex",
+                    "alias": f"JaccardIndex_{alias}",
+                    "attached_to": alias,
+                },
+                {
+                    "name": "F1Score",
+                    "alias": f"F1Score_{alias}",
+                    "attached_to": alias,
+                },
+            ]
+        )
+        opts["model"]["visualizers"].append(
+            {
+                "name": "SegmentationVisualizer",
+                "attached_to": alias,
+            }
+        )
+    return opts
+
+
+def train_and_test(
+    config: dict[str, Any],
+    opts: dict[str, Any],
+    train_overfit: bool = False,
+):
+    model = LuxonisModel(config, opts)
+    model.train()
+    results = model.test(view="val")
+    if train_overfit:
+        for name, value in results.items():
+            if "metric" in name:
+                assert value > 0.8, f"{name} = {value} (expected > 0.8)"
+
+
+@pytest.mark.parametrize("backbone", BACKBONES)
+def test_backbones(
+    backbone: str,
+    config: dict[str, Any],
+    parking_lot_dataset: LuxonisDataset,
+):
+    opts = get_opts(backbone)
+    opts["loader.params.dataset_name"] = parking_lot_dataset.identifier
+    train_and_test(config, opts)
diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py
new file mode 100644
index 00000000..069e53b0
--- /dev/null
+++ b/tests/integration/test_simple.py
@@ -0,0 +1,216 @@
+import json
+import shutil
+import sys
+import tarfile
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+
+import pytest
+from luxonis_ml.data import LuxonisDataset
+from luxonis_ml.utils import environ
+
+from luxonis_train.core import LuxonisModel
+
+from .multi_input_modules import *
+
+INFER_PATH = Path("tests/integration/infer-save-directory")
+ONNX_PATH = Path("tests/integration/_model.onnx")
+STUDY_PATH = Path("study_local.db")
+
+
+@pytest.fixture
+def opts(test_output_dir: Path) -> dict[str, Any]:
+    return {
+        "trainer.epochs": 1,
+        "trainer.batch_size": 1,
+        "trainer.validation_interval": 1,
+        "trainer.callbacks": "[]",
+        "tracker.save_directory": str(test_output_dir),
+        "tuner.n_trials": 4,
+    }
+
+
+@pytest.fixture(scope="function", autouse=True)
+def clear_files():
+    # todo
+    yield
+    STUDY_PATH.unlink(missing_ok=True)
+    ONNX_PATH.unlink(missing_ok=True)
+    shutil.rmtree(INFER_PATH, ignore_errors=True)
+
+
+@pytest.mark.parametrize(
+    "config_file",
+    [
+        "classification_model",
+        "segmentation_model",
+        "detection_model",
+        "keypoint_bbox_model",
+        "ddrnet_segmentation_model",
+    ],
+)
+def test_predefined_models(
+    opts: dict[str, Any],
+    config_file: str,
+    coco_dataset: LuxonisDataset,
+    cifar10_dataset: LuxonisDataset,
+):
+    config_file = f"configs/{config_file}.yaml"
+    opts |= {
+        "loader.params.dataset_name": cifar10_dataset.dataset_name
+        if "classification_model" in config_file
+        else coco_dataset.dataset_name,
+    }
+    model = LuxonisModel(config_file, opts)
+    model.train()
+    model.test()
+
+
+def test_multi_input(opts: dict[str, Any]):
+    config_file = "configs/example_multi_input.yaml"
+    model = LuxonisModel(config_file, opts)
+    model.train()
+    model.test(view="val")
+
+    assert not ONNX_PATH.exists()
+    model.export(str(ONNX_PATH))
+    assert ONNX_PATH.exists()
+
+    assert not INFER_PATH.exists()
+    model.infer(view="val", save_dir=INFER_PATH)
+    assert INFER_PATH.exists()
+
+
+def test_custom_tasks(
+    opts: dict[str, Any], parking_lot_dataset: LuxonisDataset, subtests
+):
+    config_file = "tests/configs/parking_lot_config.yaml"
+    opts |= {
+        "loader.params.dataset_name": parking_lot_dataset.dataset_name,
+        "trainer.batch_size": 2,
+    }
+    del opts["trainer.callbacks"]
+    model = LuxonisModel(config_file, opts)
+    model.train()
+    archive_path = Path(
+        model.run_save_dir, "archive", model.cfg.model.name
+    ).with_suffix(".onnx.tar.xz")
+    correct_archive_config = json.loads(
+        Path("tests/integration/parking_lot.json").read_text()
+    )
+
+    with subtests.test("test_archive"):
+        assert archive_path.exists()
+        with tarfile.open(archive_path) as tar:
+            extracted_cfg = tar.extractfile("config.json")
+
+            assert (
+                extracted_cfg is not None
+            ), "Config JSON not found in the archive."
+            generated_config = json.loads(extracted_cfg.read().decode())
+
+        del generated_config["model"]["heads"][1]["metadata"]["anchors"]
+        assert generated_config == correct_archive_config
+
+
+@pytest.mark.skipif(
+    environ.GOOGLE_APPLICATION_CREDENTIALS is None,
+    reason="GCP credentials not set",
+)
+def test_parsing_loader():
+    model = LuxonisModel("tests/configs/segmentation_parse_loader.yaml")
+    model.train()
+
+
+@pytest.mark.skipif(
+    sys.platform == "win32",
+    reason="Tuning not supported on Windows",
+)
+def test_tune(opts: dict[str, Any], coco_dataset: LuxonisDataset):
+    opts["tuner.params"] = {
+        "trainer.optimizer.name_categorical": ["Adam", "SGD"],
+        "trainer.optimizer.params.lr_float": [0.0001, 0.001],
+        "trainer.batch_size_int": [4, 16, 4],
+        "trainer.preprocessing.augmentations_subset": [
+            ["Defocus", "Sharpen", "Flip", "Normalize", "invalid"],
+            2,
+        ],
+        "model.losses.0.weight_uniform": [0.1, 0.9],
+        "model.nodes.0.freezing.unfreeze_after_loguniform": [0.1, 0.9],
+    }
+    opts["loader.params.dataset_name"] = coco_dataset.identifier
+    model = LuxonisModel("configs/example_tuning.yaml", opts)
+    model.tune()
+    assert STUDY_PATH.exists()
+
+
+def test_archive(test_output_dir: Path, coco_dataset: LuxonisDataset):
+    opts = {
+        "tracker.save_directory": str(test_output_dir),
+        "loader.params.dataset_name": coco_dataset.identifier,
+    }
+    model = LuxonisModel("tests/configs/archive_config.yaml", opts)
+    model.archive()
+    assert (
+        Path(
+            model.run_save_dir,
+            "archive",
+            model.cfg.archiver.name or model.cfg.model.name,
+        )
+        .with_suffix(".onnx.tar.xz")
+        .exists()
+    )
+
+
+def test_callbacks(opts: dict[str, Any], parking_lot_dataset: LuxonisDataset):
+    config_file = "tests/configs/parking_lot_config.yaml"
+    opts = deepcopy(opts)
+    del opts["trainer.callbacks"]
+    opts |= {
+        "trainer.use_rich_progress_bar": False,
+        "trainer.seed": 42,
+        "trainer.deterministic": "warn",
+        "trainer.callbacks": [
+            {
+                "name": "MetadataLogger",
+                "params": {
+                    "hyperparams": ["trainer.epochs", "trainer.batch_size"],
+                },
+            },
+            {"name": "TestOnTrainEnd"},
+            {"name": "UploadCheckpoint"},
+            {
+                "name": "ExportOnTrainEnd",
+            },
+            {
+                "name": "ArchiveOnTrainEnd",
+                "params": {"preferred_checkpoint": "loss"},
+            },
+        ],
+        "exporter.scale_values": [0.5, 0.5, 0.5],
+        "exporter.mean_values": [0.5, 0.5, 0.5],
+        "exporter.blobconverter.active": True,
+    }
+    opts["loader.params.dataset_name"] = parking_lot_dataset.identifier
+    model = LuxonisModel(config_file, opts)
+    model.train()
+
+
+def test_freezing(opts: dict[str, Any], coco_dataset: LuxonisDataset):
+    config_file = "configs/segmentation_model.yaml"
+    opts = deepcopy(opts)
+    opts |= {
+        "model.predefined_model.params": {
+            "head_params": {
+                "freezing": {
+                    "active": True,
+                    "unfreeze_after": 2,
+                },
+            }
+        }
+    }
+    opts["trainer.epochs"] = 3
+    opts["loader.params.dataset_name"] = coco_dataset.identifier
+    model = LuxonisModel(config_file, opts)
+    model.train()
diff --git a/tests/unittests/__init__.py b/tests/unittests/__init__.py
index f9269fdf..e69de29b 100644
--- a/tests/unittests/__init__.py
+++ b/tests/unittests/__init__.py
@@ -1,2 +0,0 @@
-# import warnings
-# warnings.filterwarnings("module", category=DeprecationWarning)
diff --git a/tests/unittests/test_assigners/__init__.py b/tests/unittests/test_assigners/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py b/tests/unittests/test_assigners/test_atts_assigner.py
similarity index 85%
rename from tests/unittests/test_utils/test_assigners/test_atts_assigner.py
rename to tests/unittests/test_assigners/test_atts_assigner.py
index 4512d9e5..4ab6f939 100644
--- a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py
+++ b/tests/unittests/test_assigners/test_atts_assigner.py
@@ -1,6 +1,6 @@
 import torch
 
-from luxonis_train.utils.assigners.atts_assigner import ATSSAssigner
+from luxonis_train.assigners import ATSSAssigner
 
 
 def test_init():
@@ -24,14 +24,20 @@ def test_forward():
     mask_gt = torch.rand(bs, n_max_boxes, 1)
     pred_bboxes = torch.rand(bs, n_anchors, 4)
 
-    labels, bboxes, scores, mask = assigner.forward(
-        anchor_bboxes, n_level_bboxes, gt_labels, gt_bboxes, mask_gt, pred_bboxes
+    labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward(
+        anchor_bboxes,
+        n_level_bboxes,
+        gt_labels,
+        gt_bboxes,
+        mask_gt,
+        pred_bboxes,
     )
 
     assert labels.shape == (bs, n_anchors)
     assert bboxes.shape == (bs, n_anchors, 4)
     assert scores.shape == (bs, n_anchors, n_classes)
     assert mask.shape == (bs, n_anchors)
+    assert assigned_gt_idx.shape == (bs, n_anchors)
 
 
 def test_get_bbox_center():
@@ -58,7 +64,11 @@ def test_select_topk_candidates():
     )
 
     assert is_in_topk.shape == (batch_size, n_max_boxes, n_anchors)
-    assert topk_idxs.shape == (batch_size, n_max_boxes, topk * len(n_level_bboxes))
+    assert topk_idxs.shape == (
+        batch_size,
+        n_max_boxes,
+        topk * len(n_level_bboxes),
+    )
 
 
 def test_get_positive_samples():
@@ -96,7 +106,11 @@ def test_get_final_assignments():
     assigned_gt_idx = torch.randint(0, n_max_boxes, (batch_size, n_anchors))
     mask_pos_sum = torch.randint(0, 2, (batch_size, n_anchors))
 
-    assigned_labels, assigned_bboxes, assigned_scores = assigner._get_final_assignments(
+    (
+        assigned_labels,
+        assigned_bboxes,
+        assigned_scores,
+    ) = assigner._get_final_assignments(
         gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
     )
 
diff --git a/tests/unittests/test_assigners/test_tal_assigner.py b/tests/unittests/test_assigners/test_tal_assigner.py
new file mode 100644
index 00000000..cb94b62d
--- /dev/null
+++ b/tests/unittests/test_assigners/test_tal_assigner.py
@@ -0,0 +1,135 @@
+import torch
+
+from luxonis_train.assigners import TaskAlignedAssigner
+
+
+def test_init():
+    assigner = TaskAlignedAssigner(
+        n_classes=80, topk=13, alpha=1.0, beta=6.0, eps=1e-9
+    )
+    assert assigner.n_classes == 80
+    assert assigner.topk == 13
+    assert assigner.alpha == 1.0
+    assert assigner.beta == 6.0
+    assert assigner.eps == 1e-9
+
+
+def test_forward():
+    batch_size = 10
+    n_anchors = 100
+    n_max_boxes = 5
+    n_classes = 80
+
+    assigner = TaskAlignedAssigner(n_classes=n_classes, topk=13)
+
+    # Create mock inputs
+    pred_scores = torch.rand(batch_size, n_anchors, 1)
+    pred_bboxes = torch.rand(batch_size, n_anchors, 4)
+    anchor_points = torch.rand(n_anchors, 2)
+    gt_labels = torch.rand(batch_size, n_max_boxes, 1)
+    gt_bboxes = torch.zeros(batch_size, n_max_boxes, 4)  # no gt bboxes
+    mask_gt = torch.rand(batch_size, n_max_boxes, 1)
+
+    labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward(
+        pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, mask_gt
+    )
+
+    assert labels.shape == (batch_size, n_anchors)
+    assert bboxes.shape == (batch_size, n_anchors, 4)
+    assert scores.shape == (
+        batch_size,
+        n_anchors,
+        n_classes,
+    )
+    assert mask.shape == (batch_size, n_anchors)
+    assert assigned_gt_idx.shape == (batch_size, n_anchors)
+
+    # Labels should be `n_classes` as there are no GT boxes
+    assert labels.unique().tolist() == [n_classes]
+
+    # All results should be zero as there are no GT boxes
+    assert torch.equal(bboxes, torch.zeros_like(bboxes))
+    assert torch.equal(scores, torch.zeros_like(scores))
+    assert torch.equal(mask, torch.zeros_like(mask))
+    assert torch.equal(assigned_gt_idx, torch.zeros_like(assigned_gt_idx))
+
+
+def test_get_alignment_metric():
+    batch_size = 2
+    n_anchors = 5
+    n_max_boxes = 3
+    n_classes = 80
+
+    pred_scores = torch.rand(batch_size, n_anchors, n_classes)
+    pred_bboxes = torch.rand(batch_size, n_anchors, 4)
+    gt_labels = torch.randint(0, n_classes, (batch_size, n_max_boxes, 1))
+    gt_bboxes = torch.rand(batch_size, n_max_boxes, 4)
+
+    assigner = TaskAlignedAssigner(
+        n_classes=n_classes, topk=13, alpha=1.0, beta=6.0, eps=1e-9
+    )
+    assigner.bs = pred_scores.size(0)
+    assigner.n_max_boxes = gt_bboxes.size(1)
+
+    align_metric, overlaps = assigner._get_alignment_metric(
+        pred_scores, pred_bboxes, gt_labels, gt_bboxes
+    )
+
+    assert align_metric.shape == (batch_size, n_max_boxes, n_anchors)
+    assert overlaps.shape == (batch_size, n_max_boxes, n_anchors)
+    assert align_metric.dtype == torch.float32
+    assert overlaps.dtype == torch.float32
+    assert align_metric.min() >= 0 and align_metric.max() <= 1
+    assert overlaps.min() >= 0 and overlaps.max() <= 1
+
+
+def test_select_topk_candidates():
+    batch_size = 2
+    n_max_boxes = 3
+    n_anchors = 5
+    topk = 2
+
+    metrics = torch.rand(batch_size, n_max_boxes, n_anchors)
+    mask_gt = torch.rand(batch_size, n_max_boxes, 1)
+
+    assigner = TaskAlignedAssigner(n_classes=80, topk=topk)
+
+    is_in_topk = assigner._select_topk_candidates(metrics)
+    topk_mask = mask_gt.repeat([1, 1, topk]).bool()
+    assert torch.equal(
+        assigner._select_topk_candidates(metrics),
+        assigner._select_topk_candidates(metrics, topk_mask=topk_mask),
+    )
+    assert is_in_topk.shape == (batch_size, n_max_boxes, n_anchors)
+    assert is_in_topk.dtype == torch.float32
+
+    assert is_in_topk.sum(dim=-1).max() <= topk
+
+
+def test_get_final_assignments():
+    batch_size = 2
+    n_max_boxes = 3
+    n_anchors = 5
+    n_classes = 80
+
+    gt_labels = torch.randint(0, n_classes, (batch_size, n_max_boxes, 1))
+    gt_bboxes = torch.rand(batch_size, n_max_boxes, 4)
+    assigned_gt_idx = torch.randint(0, n_max_boxes, (batch_size, n_anchors))
+    mask_pos_sum = torch.randint(0, 2, (batch_size, n_anchors))
+
+    assigner = TaskAlignedAssigner(n_classes=n_classes, topk=13)
+    assigner.bs = batch_size  # Set batch size
+    assigner.n_max_boxes = gt_bboxes.size(1)
+
+    (
+        assigned_labels,
+        assigned_bboxes,
+        assigned_scores,
+    ) = assigner._get_final_assignments(
+        gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
+    )
+
+    assert assigned_labels.shape == (batch_size, n_anchors)
+    assert assigned_bboxes.shape == (batch_size, n_anchors, 4)
+    assert assigned_scores.shape == (batch_size, n_anchors, n_classes)
+    assert assigned_labels.min() >= 0 and assigned_labels.max() <= n_classes
diff --git a/tests/unittests/test_utils/test_assigners/test_utils.py b/tests/unittests/test_assigners/test_utils.py
similarity index 96%
rename from tests/unittests/test_utils/test_assigners/test_utils.py
rename to tests/unittests/test_assigners/test_utils.py
index bf849e25..d10e1d47 100644
--- a/tests/unittests/test_utils/test_assigners/test_utils.py
+++ b/tests/unittests/test_assigners/test_utils.py
@@ -1,6 +1,6 @@
 import torch
 
-from luxonis_train.utils.assigners.utils import (
+from luxonis_train.assigners.utils import (
     batch_iou,
     candidates_in_gt,
     fix_collisions,
diff --git a/tests/unittests/test_base_attached_module.py b/tests/unittests/test_base_attached_module.py
new file mode 100644
index 00000000..c6ffdd48
--- /dev/null
+++ b/tests/unittests/test_base_attached_module.py
@@ -0,0 +1,153 @@
+import pytest
+from luxonis_ml.data import LabelType
+
+from luxonis_train import BaseLoss, BaseNode
+from luxonis_train.utils.exceptions import IncompatibleException
+
+
+class DummyBackbone(BaseNode):
+    def forward(self, _): ...
+
+
+class DummySegmentationHead(BaseNode):
+    tasks = [LabelType.SEGMENTATION]
+
+    def forward(self, _): ...
+
+
+class DummyBBoxHead(BaseNode):
+    tasks = [LabelType.BOUNDINGBOX]
+
+    def forward(self, _): ...
+
+
+class DummyDetectionHead(BaseNode):
+    tasks = [LabelType.BOUNDINGBOX, LabelType.KEYPOINTS]
+
+    def forward(self, _): ...
+
+
+class DummyLoss(BaseLoss):
+    supported_labels = [
+        LabelType.SEGMENTATION,
+        (LabelType.KEYPOINTS, LabelType.BOUNDINGBOX),
+    ]
+
+    def forward(self, _): ...
+
+
+class NoLabelLoss(BaseLoss):
+    def forward(self, _): ...
+
+
+@pytest.fixture
+def labels():
+    return {
+        "segmentation": ("segmentation", LabelType.SEGMENTATION),
+        "keypoints": ("keypoints", LabelType.KEYPOINTS),
+        "boundingbox": ("boundingbox", LabelType.BOUNDINGBOX),
+        "classification": ("classification", LabelType.CLASSIFICATION),
+    }
+
+
+@pytest.fixture
+def inputs():
+    return {
+        "features": ["features"],
+        "segmentation": ["segmentation"],
+    }
+
+
+def test_valid_properties():
+    head = DummySegmentationHead()
+    loss = DummyLoss(node=head)
+    no_labels_loss = NoLabelLoss(node=head)
+    assert loss.node == head
+    assert loss.node_tasks == {LabelType.SEGMENTATION: "segmentation"}
+    assert loss.required_labels == [LabelType.SEGMENTATION]
+    assert no_labels_loss.node == head
+    assert no_labels_loss.node_tasks == {
+        LabelType.SEGMENTATION: "segmentation"
+    }
+    assert no_labels_loss.required_labels == []
+
+
+def test_invalid_properties():
+    backbone = DummyBackbone()
+    with pytest.raises(IncompatibleException):
+        DummyLoss(node=backbone)
+    with pytest.raises(IncompatibleException):
+        DummyLoss(node=DummyBBoxHead())
+    with pytest.raises(RuntimeError):
+        _ = DummyLoss().node
+    with pytest.raises(RuntimeError):
+        _ = NoLabelLoss(node=backbone).node_tasks
+
+
+def test_get_label(labels):
+    seg_head = DummySegmentationHead()
+    det_head = DummyDetectionHead()
+    seg_loss = DummyLoss(node=seg_head)
+    assert seg_loss.get_label(labels) == "segmentation"
+    assert seg_loss.get_label(labels, LabelType.SEGMENTATION) == "segmentation"
+
+    del labels["segmentation"]
+    labels["segmentation-task"] = ("segmentation", LabelType.SEGMENTATION)
+
+    with pytest.raises(IncompatibleException):
+        seg_loss.get_label(labels)
+
+    det_loss = DummyLoss(node=det_head)
+    assert det_loss.get_label(labels, LabelType.KEYPOINTS) == "keypoints"
+    assert det_loss.get_label(labels, LabelType.BOUNDINGBOX) == "boundingbox"
+
+    with pytest.raises(ValueError):
+        det_loss.get_label(labels)
+
+    with pytest.raises(ValueError):
+        det_loss.get_label(labels, LabelType.SEGMENTATION)
+
+
+def test_input_tensors(inputs):
+    seg_head = DummySegmentationHead()
+    seg_loss = DummyLoss(node=seg_head)
+    assert seg_loss.get_input_tensors(inputs) == ["segmentation"]
+    assert seg_loss.get_input_tensors(inputs, "segmentation") == [
+        "segmentation"
+    ]
+    assert seg_loss.get_input_tensors(inputs, LabelType.SEGMENTATION) == [
+        "segmentation"
+    ]
+
+    with pytest.raises(IncompatibleException):
+        seg_loss.get_input_tensors(inputs, LabelType.KEYPOINTS)
+    with pytest.raises(IncompatibleException):
+        seg_loss.get_input_tensors(inputs, "keypoints")
+
+    det_head = DummyDetectionHead()
+    det_loss = DummyLoss(node=det_head)
+    with pytest.raises(ValueError):
+        det_loss.get_input_tensors(inputs)
+
+
+def test_prepare(inputs, labels):
+    backbone = DummyBackbone()
+    seg_head = DummySegmentationHead()
+    seg_loss = DummyLoss(node=seg_head)
+    det_head = DummyDetectionHead()
+
+    assert seg_loss.prepare(inputs, labels) == ("segmentation", "segmentation")
+    inputs["segmentation"].append("segmentation2")
+    assert seg_loss.prepare(inputs, labels) == (
+        "segmentation2",
+        "segmentation",
+    )
+
+    with pytest.raises(RuntimeError):
+        NoLabelLoss(node=backbone).prepare(inputs, labels)
+
+    with pytest.raises(RuntimeError):
+        NoLabelLoss(node=seg_head).prepare(inputs, labels)
+
+    with pytest.raises(RuntimeError):
+        DummyLoss(node=det_head).prepare(inputs, labels)
diff --git a/tests/unittests/test_base_node.py b/tests/unittests/test_base_node.py
new file mode 100644
index 00000000..68386f73
--- /dev/null
+++ b/tests/unittests/test_base_node.py
@@ -0,0 +1,160 @@
+import pytest
+import torch
+from luxonis_ml.data import LabelType
+from torch import Size, Tensor
+
+from luxonis_train.nodes import AttachIndexType, BaseNode
+from luxonis_train.utils import DatasetMetadata, Packet
+from luxonis_train.utils.exceptions import IncompatibleException
+
+
+class DummyNode(BaseNode, register=False):
+    def forward(self, _): ...
+
+
+@pytest.fixture
+def packet() -> Packet[Tensor]:
+    return {
+        "features": [torch.rand(3, 224, 224)],
+    }
+
+
+@pytest.mark.parametrize(
+    ("attach_index", "expected"),
+    [
+        (-1, 5),
+        (0, 1),
+        ("all", [1, 2, 3, 4, 5]),
+        ((0, 2), [1, 2]),
+        ((0, 4, 2), [1, 3]),
+        ((-1, -3, -1), [5, 4]),
+        ((4, 2), [5, 4]),
+        ((-1, -3), [5, 4]),
+        ((-4, 4), [2, 3, 4]),
+        ((1, -1), [2, 3, 4]),
+    ],
+)
+def test_attach_index(
+    attach_index: AttachIndexType, expected: list[int] | int
+):
+    lst = [1, 2, 3, 4, 5]
+
+    class DummyBaseNode:
+        attach_index: AttachIndexType
+
+    DummyBaseNode.attach_index = attach_index
+
+    assert BaseNode.get_attached(DummyBaseNode, lst) == expected  # type: ignore
+
+
+def test_attach_index_error():
+    lst = [1, 2, 3, 4, 5]
+
+    class DummyNode(BaseNode, register=False):
+        attach_index: AttachIndexType
+
+    with pytest.raises(ValueError):
+        DummyNode.attach_index = 10
+        BaseNode.get_attached(DummyNode, lst)  # type: ignore
+
+    with pytest.raises(ValueError):
+        DummyNode.attach_index = "none"  # type: ignore
+        BaseNode.get_attached(DummyNode, lst)  # type: ignore
+
+
+def test_invalid(packet: Packet[Tensor]):
+    node = DummyNode()
+    with pytest.raises(RuntimeError):
+        _ = node.input_shapes
+    with pytest.raises(RuntimeError):
+        _ = node.original_in_shape
+    with pytest.raises(RuntimeError):
+        _ = node.dataset_metadata
+    with pytest.raises(ValueError):
+        node.unwrap([packet, packet])
+    with pytest.raises(ValueError):
+        node.wrap({"inp": torch.rand(3, 224, 224)})
+
+
+def tets_in_sizes():
+    node = DummyNode(
+        input_shapes=[{"features": [Size((3, 224, 224)) for _ in range(3)]}]
+    )
+    assert node.in_sizes == [Size((3, 224, 224)) for _ in range(3)]
+    node = DummyNode(in_sizes=Size((3, 224, 224)))
+    assert node.in_sizes == Size((3, 224, 224))
+    with pytest.raises(RuntimeError):
+        node = DummyNode(input_shapes=[{"feats": [Size((3, 224, 224))]}])
+        _ = node.in_sizes
+
+
+def test_check_type_override():
+    class DummyNode(BaseNode, register=False):
+        in_channels: int
+
+        def forward(self, _): ...
+
+    with pytest.raises(IncompatibleException):
+        DummyNode(
+            input_shapes=[
+                {"features": [Size((3, 224, 224)) for _ in range(3)]}
+            ]
+        )
+
+
+def test_tasks():
+    class DummyHead(DummyNode):
+        tasks = [LabelType.CLASSIFICATION]
+
+    class DummyMultiHead(DummyNode):
+        tasks = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+
+    dummy_head = DummyHead()
+    dummy_node = DummyNode()
+    dummy_multi_head = DummyMultiHead(n_keypoints=4)
+    assert (
+        dummy_head.get_task_name(LabelType.CLASSIFICATION) == "classification"
+    )
+    assert dummy_head.task == "classification"
+    with pytest.raises(ValueError):
+        dummy_head.get_task_name(LabelType.SEGMENTATION)
+
+    with pytest.raises(RuntimeError):
+        dummy_node.get_task_name(LabelType.SEGMENTATION)
+
+    with pytest.raises(RuntimeError):
+        _ = dummy_node.task
+
+    with pytest.raises(ValueError):
+        _ = dummy_multi_head.task
+
+    metadata = DatasetMetadata(
+        classes={
+            "segmentation": ["car", "person", "dog"],
+            "classification": ["car-class", "person-class"],
+        },
+        n_keypoints={"color-segmentation": 0, "detection": 0},
+    )
+
+    dummy_multi_head._dataset_metadata = metadata
+    assert dummy_multi_head.get_class_names(LabelType.SEGMENTATION) == [
+        "car",
+        "person",
+        "dog",
+    ]
+    assert dummy_multi_head.get_class_names(LabelType.CLASSIFICATION) == [
+        "car-class",
+        "person-class",
+    ]
+    assert dummy_multi_head.get_n_classes(LabelType.SEGMENTATION) == 3
+    assert dummy_multi_head.get_n_classes(LabelType.CLASSIFICATION) == 2
+    assert dummy_multi_head.n_keypoints == 4
+    with pytest.raises(ValueError):
+        _ = dummy_head.n_keypoints
+    with pytest.raises(RuntimeError):
+        _ = dummy_node.n_keypoints
+
+    dummy_head = DummyHead(n_classes=5)
+    assert dummy_head.n_classes == 5
+    with pytest.raises(ValueError):
+        _ = dummy_multi_head.n_classes
diff --git a/tests/unittests/test_blocks.py b/tests/unittests/test_blocks.py
new file mode 100644
index 00000000..8b6110d4
--- /dev/null
+++ b/tests/unittests/test_blocks.py
@@ -0,0 +1,15 @@
+import torch
+
+from luxonis_train.nodes.blocks import SqueezeExciteBlock, autopad
+
+
+def test_autopad():
+    assert autopad(1, 2) == 2
+    assert autopad(2) == 1
+    assert autopad((2, 4)) == (1, 2)
+
+
+def test_squeeze_excite_block():
+    se_block = SqueezeExciteBlock(64, 32)
+    x = torch.rand(1, 64, 224, 224)
+    assert se_block(x).shape == (1, 64, 224, 224)
diff --git a/tests/unittests/test_callbacks/__init__.py b/tests/unittests/test_callbacks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unittests/test_callbacks/test_needs_checkpoint.py b/tests/unittests/test_callbacks/test_needs_checkpoint.py
new file mode 100644
index 00000000..bd296dea
--- /dev/null
+++ b/tests/unittests/test_callbacks/test_needs_checkpoint.py
@@ -0,0 +1,6 @@
+from luxonis_train.callbacks.needs_checkpoint import NeedsCheckpoint
+
+
+def test_other_type():
+    assert NeedsCheckpoint._get_other_type("loss") == "metric"
+    assert NeedsCheckpoint._get_other_type("metric") == "loss"
diff --git a/tests/unittests/test_loaders/__init__.py b/tests/unittests/test_loaders/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unittests/test_loaders/test_base_loader.py b/tests/unittests/test_loaders/test_base_loader.py
new file mode 100644
index 00000000..dee1ecef
--- /dev/null
+++ b/tests/unittests/test_loaders/test_base_loader.py
@@ -0,0 +1,94 @@
+import pytest
+import torch
+from luxonis_ml.data import LabelType
+from torch import Size
+
+from luxonis_train.loaders import collate_fn
+
+
+@pytest.mark.parametrize(
+    "input_names_and_shapes",
+    [
+        [("features", Size([3, 224, 224]))],
+        [
+            ("features", Size([3, 224, 224])),
+            ("segmentation", Size([1, 224, 224])),
+        ],
+        [
+            ("features", Size([3, 224, 224])),
+            ("segmentation", Size([1, 224, 224])),
+            ("disparity", Size([1, 224, 224])),
+        ],
+        [
+            ("features", Size([3, 224, 224])),
+            ("pointcloud", Size([1000, 3])),
+        ],
+        [
+            ("features", Size([3, 224, 224])),
+            ("pointcloud", Size([1000, 3])),
+            ("foobar", Size([2, 3, 4, 5, 6])),
+        ],
+    ],
+)
+@pytest.mark.parametrize("batch_size", [1, 2])
+def test_collate_fn(
+    input_names_and_shapes: list[tuple[str, Size]], batch_size: int, subtests
+):
+    def build_batch_element():
+        inputs = {}
+        for name, shape in input_names_and_shapes:
+            inputs[name] = torch.rand(shape, dtype=torch.float32)
+
+        labels = {
+            "classification": (
+                torch.randint(0, 2, (2,), dtype=torch.int64),
+                LabelType.CLASSIFICATION,
+            ),
+            "segmentation": (
+                torch.randint(0, 2, (1, 224, 224), dtype=torch.int64),
+                LabelType.SEGMENTATION,
+            ),
+            "keypoints": (
+                torch.rand(1, 52, dtype=torch.float32),
+                LabelType.KEYPOINTS,
+            ),
+            "boundingbox": (
+                torch.rand(1, 5, dtype=torch.float32),
+                LabelType.BOUNDINGBOX,
+            ),
+        }
+
+        return inputs, labels
+
+    batch = [build_batch_element() for _ in range(batch_size)]
+
+    inputs, annotations = collate_fn(batch)  # type: ignore
+
+    with subtests.test("inputs"):
+        assert inputs["features"].shape == (batch_size, 3, 224, 224)
+        assert inputs["features"].dtype == torch.float32
+
+    with subtests.test("classification"):
+        assert "classification" in annotations
+        assert annotations["classification"][0].shape == (batch_size, 2)
+        assert annotations["classification"][0].dtype == torch.int64
+
+    with subtests.test("segmentation"):
+        assert "segmentation" in annotations
+        assert annotations["segmentation"][0].shape == (
+            batch_size,
+            1,
+            224,
+            224,
+        )
+        assert annotations["segmentation"][0].dtype == torch.int64
+
+    with subtests.test("keypoints"):
+        assert "keypoints" in annotations
+        assert annotations["keypoints"][0].shape == (batch_size, 53)
+        assert annotations["keypoints"][0].dtype == torch.float32
+
+    with subtests.test("boundingbox"):
+        assert "boundingbox" in annotations
+        assert annotations["boundingbox"][0].shape == (batch_size, 6)
+        assert annotations["boundingbox"][0].dtype == torch.float32
diff --git a/tests/unittests/test_losses/test_bce_with_logits_loss.py b/tests/unittests/test_losses/test_bce_with_logits_loss.py
index 27871019..f94b5cb1 100644
--- a/tests/unittests/test_losses/test_bce_with_logits_loss.py
+++ b/tests/unittests/test_losses/test_bce_with_logits_loss.py
@@ -16,7 +16,9 @@ def test_forward_pass():
             predictions = torch.full([bs, n_cl], 1.5)  # logit
             loss_fn = BCEWithLogitsLoss()
 
-            loss = loss_fn.forward(predictions, targets)  # -log(sigmoid(1.5)) = 0.2014
+            loss = loss_fn.forward(
+                predictions, targets
+            )  # -log(sigmoid(1.5)) = 0.2014
 
             assert isinstance(loss, torch.Tensor)
             assert loss.shape == torch.Size([])
@@ -57,5 +59,7 @@ def test_weights():
     assert loss_weight != loss_no_weight
 
 
-if __name__ == "__main__":
-    pytest.main()
+def test_invalid():
+    loss_fn = BCEWithLogitsLoss()
+    with pytest.raises(RuntimeError):
+        loss_fn.forward(torch.rand(10, 10), torch.rand(15, 15))
diff --git a/tests/unittests/test_metrics/test_torchmetrics.py b/tests/unittests/test_metrics/test_torchmetrics.py
new file mode 100644
index 00000000..141a3785
--- /dev/null
+++ b/tests/unittests/test_metrics/test_torchmetrics.py
@@ -0,0 +1,52 @@
+import pytest
+import torchmetrics
+from luxonis_ml.data import LabelType
+
+from luxonis_train.attached_modules.metrics.torchmetrics import (
+    TorchMetricWrapper,
+)
+from luxonis_train.nodes import BaseNode
+
+
+def test_torchmetrics():
+    class DummyNode(BaseNode):
+        tasks = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+
+        def forward(self, _): ...
+
+    class DummyMetric(TorchMetricWrapper):
+        supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+        Metric = torchmetrics.Accuracy
+
+    node_1_class = DummyNode(n_classes=1)
+    node_2_classes = DummyNode(n_classes=2)
+    node = DummyNode()
+    assert DummyMetric(node=node_1_class)._task == "binary"
+    assert DummyMetric(node=node_2_classes)._task == "multiclass"
+    assert DummyMetric(node=node_2_classes, task="multilabel")
+    assert DummyMetric(num_classes=1)._task == "binary"
+    assert DummyMetric(num_classes=2)._task == "multiclass"
+    assert DummyMetric(num_labels=2)._task == "multilabel"
+
+    assert DummyMetric(task="binary")
+
+    with pytest.raises(ValueError):
+        DummyMetric()
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="multiclass")
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="invalid")
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="binary", node=node_2_classes)
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="multiclass", node=node_1_class)
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="multiclass", node=node)
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="multilabel", node=node)
diff --git a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py b/tests/unittests/test_utils/test_assigners/test_tal_assigner.py
deleted file mode 100644
index bb2dd912..00000000
--- a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import torch
-
-from luxonis_train.utils.assigners.tal_assigner import TaskAlignedAssigner
-
-
-def test_init():
-    assigner = TaskAlignedAssigner(n_classes=80, topk=13, alpha=1.0, beta=6.0, eps=1e-9)
-    assert assigner.n_classes == 80
-    assert assigner.topk == 13
-    assert assigner.alpha == 1.0
-    assert assigner.beta == 6.0
-    assert assigner.eps == 1e-9
-
-
-def test_forward():
-    # Constants for clarity
-    batch_size = 10
-    num_anchors = 100
-    num_max_boxes = 5
-    num_classes = 80
-
-    # Initialize the TaskAlignedAssigner
-    assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13)
-
-    # Create mock inputs
-    pred_scores = torch.rand(batch_size, num_anchors, 1)
-    pred_bboxes = torch.rand(batch_size, num_anchors, 4)
-    anchor_points = torch.rand(num_anchors, 2)
-    gt_labels = torch.rand(batch_size, num_max_boxes, 1)
-    gt_bboxes = torch.zeros(batch_size, num_max_boxes, 4)  # no gt bboxes
-    mask_gt = torch.rand(batch_size, num_max_boxes, 1)
-
-    # Call the forward method
-    labels, bboxes, scores, mask = assigner.forward(
-        pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, mask_gt
-    )
-
-    # Assert the expected outcomes
-    assert labels.shape == (batch_size, num_anchors)
-    assert labels.unique().tolist() == [
-        num_classes
-    ]  # All labels should be num_classes as there are no GT boxes
-    assert bboxes.shape == (batch_size, num_anchors, 4)
-    assert torch.equal(
-        bboxes, torch.zeros_like(bboxes)
-    )  # All bboxes should be zero as there are no GT boxes
-    assert (
-        scores.shape
-        == (
-            batch_size,
-            num_anchors,
-            num_classes,
-        )
-    )  # TODO: We have this in doc string: Returns: ... assigned scores of shape [bs, n_anchors, 1],
-    # it returns tensor of shape [bs, n_anchors, n_classes] instead
-    assert torch.equal(
-        scores, torch.zeros_like(scores)
-    )  # All scores should be zero as there are no GT boxes
-    assert mask.shape == (batch_size, num_anchors)
-    assert torch.equal(
-        mask, torch.zeros_like(mask)
-    )  # All mask values should be zero as there are no GT boxes
-
-
-def test_get_alignment_metric():
-    # Create mock inputs
-    bs = 2  # batch size
-    n_anchors = 5
-    n_max_boxes = 3
-    n_classes = 80
-
-    pred_scores = torch.rand(
-        bs, n_anchors, n_classes
-    )  # TODO: Same issue: works with n_classes instead of 1, change it in the doc string in the method itself!!!
-    pred_bboxes = torch.rand(bs, n_anchors, 4)
-    gt_labels = torch.randint(0, n_classes, (bs, n_max_boxes, 1))
-    gt_bboxes = torch.rand(bs, n_max_boxes, 4)
-
-    # Initialize the TaskAlignedAssigner
-    assigner = TaskAlignedAssigner(
-        n_classes=n_classes, topk=13, alpha=1.0, beta=6.0, eps=1e-9
-    )
-    assigner.bs = pred_scores.size(0)
-    assigner.n_max_boxes = gt_bboxes.size(1)
-
-    # Call the method
-    align_metric, overlaps = assigner._get_alignment_metric(
-        pred_scores, pred_bboxes, gt_labels, gt_bboxes
-    )
-
-    # Assert the expected outcomes
-    assert align_metric.shape == (bs, n_max_boxes, n_anchors)
-    assert overlaps.shape == (bs, n_max_boxes, n_anchors)
-    assert align_metric.dtype == torch.float32
-    assert overlaps.dtype == torch.float32
-    assert (align_metric >= 0).all() and (
-        align_metric <= 1
-    ).all()  # Alignment metric should be in the range [0, 1]
-    assert (overlaps >= 0).all() and (
-        overlaps <= 1
-    ).all()  # IoU should be in the range [0, 1]
-
-
-def test_select_topk_candidates():
-    # Constants for the test
-    batch_size = 2
-    num_max_boxes = 3
-    num_anchors = 5
-    topk = 2
-
-    metrics = torch.rand(batch_size, num_max_boxes, num_anchors)
-    mask_gt = torch.rand(batch_size, num_max_boxes, 1)
-
-    # Initialize the TaskAlignedAssigner
-    assigner = TaskAlignedAssigner(n_classes=80, topk=topk)
-
-    # Call the method
-    is_in_topk = assigner._select_topk_candidates(
-        metrics,
-    )
-    topk_mask = mask_gt.repeat([1, 1, topk]).bool()
-    assert torch.equal(
-        assigner._select_topk_candidates(metrics),
-        assigner._select_topk_candidates(metrics, topk_mask=topk_mask),
-    )
-    # Assert the expected outcomes
-    assert is_in_topk.shape == (batch_size, num_max_boxes, num_anchors)
-    assert is_in_topk.dtype == torch.float32
-
-    # Check that each ground truth has at most 'topk' anchors selected
-    assert (is_in_topk.sum(dim=-1) <= topk).all()
-
-
-def test_get_final_assignments():
-    # Constants for the test
-    batch_size = 2
-    num_max_boxes = 3
-    num_anchors = 5
-    num_classes = 80
-
-    # Mock inputs
-    gt_labels = torch.randint(0, num_classes, (batch_size, num_max_boxes, 1))
-    gt_bboxes = torch.rand(batch_size, num_max_boxes, 4)
-    assigned_gt_idx = torch.randint(0, num_max_boxes, (batch_size, num_anchors))
-    mask_pos_sum = torch.randint(0, 2, (batch_size, num_anchors))
-
-    # Initialize the TaskAlignedAssigner
-    assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13)
-    assigner.bs = batch_size  # Set batch size
-    assigner.n_max_boxes = gt_bboxes.size(1)
-
-    # Call the method
-    assigned_labels, assigned_bboxes, assigned_scores = assigner._get_final_assignments(
-        gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
-    )
-
-    # Assert the expected outcomes
-    assert assigned_labels.shape == (batch_size, num_anchors)
-    assert assigned_bboxes.shape == (batch_size, num_anchors, 4)
-    assert assigned_scores.shape == (batch_size, num_anchors, num_classes)
-    assert (assigned_labels >= 0).all() and (assigned_labels <= num_classes).all()
diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py
index 2cb3df24..2b05a428 100644
--- a/tests/unittests/test_utils/test_boxutils.py
+++ b/tests/unittests/test_utils/test_boxutils.py
@@ -1,39 +1,42 @@
+import pytest
 import torch
 
-from luxonis_train.utils.boxutils import (
+from luxonis_train.utils.boundingbox import (
+    IoUType,
     anchors_for_fpn_features,
     bbox2dist,
     bbox_iou,
     compute_iou_loss,
     dist2bbox,
     process_bbox_predictions,
-    process_keypoints_predictions,
 )
 
 
-def generate_random_bboxes(num_bboxes, max_width, max_height, format="xyxy"):
-    # Generate top-left corners (x1, y1)
-    x1y1 = torch.rand(num_bboxes, 2) * torch.tensor([max_width - 1, max_height - 1])
+def generate_random_bboxes(
+    n_bboxes: int, max_width: int, max_height: int, format: str = "xyxy"
+):
+    x1y1 = torch.rand(n_bboxes, 2) * torch.tensor(
+        [max_width - 1, max_height - 1]
+    )
 
-    # Generate widths and heights ensuring x2 > x1 and y2 > y1
     wh = (
-        torch.rand(num_bboxes, 2) * (torch.tensor([max_width, max_height]) - 1 - x1y1)
+        torch.rand(n_bboxes, 2)
+        * (torch.tensor([max_width, max_height]) - 1 - x1y1)
         + 1
     )
 
     if format == "xyxy":
-        # Calculate bottom-right corners (x2, y2) for xyxy format
         x2y2 = x1y1 + wh
         bboxes = torch.cat((x1y1, x2y2), dim=1)
     elif format == "xywh":
-        # Use x1y1 as top-left corner and wh as width and height for xywh format
         bboxes = torch.cat((x1y1, wh), dim=1)
     elif format == "cxcywh":
-        # Calculate center coordinates and use wh as width and height for cxcywh format
         cxcy = x1y1 + wh / 2
         bboxes = torch.cat((cxcy, wh), dim=1)
     else:
-        raise ValueError("Unsupported format. Choose from 'xyxy', 'xywh', 'cxcywh'.")
+        raise ValueError(
+            "Unsupported format. Choose from 'xyxy', 'xywh', 'cxcywh'."
+        )
 
     return bboxes
 
@@ -44,6 +47,8 @@ def test_dist2bbox():
     bbox = dist2bbox(distance, anchor_points)
 
     assert bbox.shape == distance.shape
+    with pytest.raises(ValueError):
+        dist2bbox(distance, anchor_points, out_format="invalid")  # type: ignore
 
 
 def test_bbox2dist():
@@ -56,22 +61,41 @@ def test_bbox2dist():
     assert distance.shape == bbox.shape
 
 
-def test_bbox_iou():
+@pytest.mark.parametrize("iou_type", ["none", "giou", "diou", "ciou", "siou"])
+def test_bbox_iou(iou_type: IoUType):
     for format in ["xyxy", "cxcywh", "xywh"]:
         bbox1 = generate_random_bboxes(5, 640, 640, format)
-        bbox2 = generate_random_bboxes(8, 640, 640, format)
-
-        iou = bbox_iou(bbox1, bbox2)
-
-        assert iou.shape == (5, 8)
-        assert iou.min() >= 0 and iou.max() <= 1
+        if iou_type == "siou":
+            bbox2 = generate_random_bboxes(5, 640, 640, format)
+        else:
+            bbox2 = generate_random_bboxes(8, 640, 640, format)
+
+        iou = bbox_iou(
+            bbox1,
+            bbox2,
+            bbox_format=format,  # type: ignore
+            iou_type=iou_type,
+        )
+
+        assert iou.shape == (bbox1.shape[0], bbox2.shape[0])
+        if iou_type == "none":
+            min = 0
+        else:
+            min = -1.5
+        assert iou.min() >= min and iou.max() <= 1
+
+    if iou_type == "none":
+        with pytest.raises(ValueError):
+            bbox_iou(bbox1, bbox2, iou_type="invalid")  # type: ignore
 
 
 def test_compute_iou_loss():
     pred_bboxes = generate_random_bboxes(8, 640, 640, "xyxy")
     target_bboxes = generate_random_bboxes(8, 640, 640, "xyxy")
 
-    loss_iou, iou = compute_iou_loss(pred_bboxes, target_bboxes, iou_type="giou")
+    loss_iou, iou = compute_iou_loss(
+        pred_bboxes, target_bboxes, iou_type="giou"
+    )
 
     assert isinstance(loss_iou, torch.Tensor)
     assert isinstance(iou, torch.Tensor)
@@ -93,21 +117,16 @@ def test_process_bbox_predictions():
     assert out_bbox_tail.shape == (10, 4)
 
 
-def test_process_keypoints_predictions():
-    keypoints = torch.rand(10, 15)  # 5 keypoints * 3 (x, y, visibility)
-
-    x, y, visibility = process_keypoints_predictions(keypoints)
-
-    assert x.shape == y.shape == visibility.shape == (10, 5)
-
-
 def test_anchors_for_fpn_features():
     features = [torch.rand(1, 256, 14, 14), torch.rand(1, 256, 28, 28)]
     strides = torch.tensor([8, 16])
 
-    anchors, anchor_points, n_anchors_list, stride_tensor = anchors_for_fpn_features(
-        features, strides
-    )
+    (
+        anchors,
+        anchor_points,
+        n_anchors_list,
+        stride_tensor,
+    ) = anchors_for_fpn_features(features, strides)
 
     assert isinstance(anchors, torch.Tensor)
     assert isinstance(anchor_points, torch.Tensor)
diff --git a/tests/unittests/test_utils/test_dataset_metadata.py b/tests/unittests/test_utils/test_dataset_metadata.py
new file mode 100644
index 00000000..8dba11a8
--- /dev/null
+++ b/tests/unittests/test_utils/test_dataset_metadata.py
@@ -0,0 +1,53 @@
+import pytest
+
+from luxonis_train.utils import DatasetMetadata
+
+
+@pytest.fixture
+def metadata():
+    return DatasetMetadata(
+        classes={
+            "color-segmentation": ["car", "person"],
+            "detection": ["car", "person"],
+        },
+        n_keypoints={"color-segmentation": 0, "detection": 0},
+    )
+
+
+def test_n_classes(metadata):
+    assert metadata.n_classes("color-segmentation") == 2
+    assert metadata.n_classes("detection") == 2
+    assert metadata.n_classes() == 2
+    with pytest.raises(ValueError):
+        metadata.n_classes("segmentation")
+    metadata._classes["segmentation"] = ["car", "person", "tree"]
+    with pytest.raises(RuntimeError):
+        metadata.n_classes()
+
+
+def test_n_keypoints(metadata):
+    assert metadata.n_keypoints("color-segmentation") == 0
+    assert metadata.n_keypoints("detection") == 0
+    assert metadata.n_keypoints() == 0
+    with pytest.raises(ValueError):
+        metadata.n_keypoints("segmentation")
+    metadata._n_keypoints["segmentation"] = 1
+    with pytest.raises(RuntimeError):
+        metadata.n_keypoints()
+
+
+def test_class_names(metadata):
+    assert metadata.classes("color-segmentation") == ["car", "person"]
+    assert metadata.classes("detection") == ["car", "person"]
+    assert metadata.classes() == ["car", "person"]
+    with pytest.raises(ValueError):
+        metadata.classes("segmentation")
+    metadata._classes["segmentation"] = ["car", "person", "tree"]
+    with pytest.raises(RuntimeError):
+        metadata.classes()
+
+
+def test_no_loader():
+    metadata = DatasetMetadata()
+    with pytest.raises(RuntimeError):
+        metadata.autogenerate_anchors(3)
diff --git a/tests/unittests/test_utils/test_general.py b/tests/unittests/test_utils/test_general.py
new file mode 100644
index 00000000..7f13f796
--- /dev/null
+++ b/tests/unittests/test_utils/test_general.py
@@ -0,0 +1,44 @@
+import pytest
+
+from luxonis_train.utils.general import infer_upscale_factor
+
+
+@pytest.mark.parametrize(
+    ("in_size", "orig_size", "expected"),
+    [
+        ((1, 1), (1, 1), 0),
+        ((1, 1), (2, 2), 1),
+        ((2, 2), (1, 1), -1),
+        ((2, 2), (4, 4), 1),
+        ((4, 4), (2, 2), -1),
+        ((4, 4), (8, 8), 1),
+        ((8, 8), (4, 4), -1),
+        ((2, 2), (16, 16), 3),
+        ((16, 16), (4, 4), -2),
+        (4, 8, 1),
+    ],
+)
+def test_infer_upscale_factor(
+    in_size: tuple[int, int] | int,
+    orig_size: tuple[int, int] | int,
+    expected: int,
+):
+    assert infer_upscale_factor(in_size, orig_size) == expected
+
+
+@pytest.mark.parametrize(
+    ("in_size", "orig_size"),
+    [
+        ((1, 1), (2, 1)),
+        ((1, 1), (1, 2)),
+        ((2, 3), (16, 16)),
+        ((3, 2), (16, 16)),
+        ((3, 3), (16, 16)),
+    ],
+)
+def test_infer_upscale_factor_fail(
+    in_size: tuple[int, int] | int,
+    orig_size: tuple[int, int] | int,
+):
+    with pytest.raises(ValueError):
+        infer_upscale_factor(in_size, orig_size)
diff --git a/tests/unittests/test_utils/test_graph.py b/tests/unittests/test_utils/test_graph.py
new file mode 100644
index 00000000..c63e4b72
--- /dev/null
+++ b/tests/unittests/test_utils/test_graph.py
@@ -0,0 +1,79 @@
+import pytest
+
+from luxonis_train.utils.graph import Graph, is_acyclic, traverse_graph
+
+
+@pytest.mark.parametrize(
+    ("graph", "acyclic"),
+    [
+        ({}, True),
+        ({"a": []}, True),
+        ({"a": ["b"], "b": ["a"]}, False),
+        ({"a": ["b"], "b": []}, True),
+        ({"a": ["b"], "b": ["c"], "c": ["a"]}, False),
+        ({"a": ["b"], "b": ["c"], "c": []}, True),
+        ({"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []}, True),
+        ({"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": ["a"]}, False),
+    ],
+)
+def test_acyclic(graph: Graph, acyclic: bool):
+    assert is_acyclic(graph) == acyclic
+
+
+@pytest.mark.parametrize(
+    ("graph", "nodes", "expected"),
+    [
+        ({}, {}, []),
+        (
+            {"a": []},
+            {"a": 1},
+            [("a", 1, [], [])],
+        ),
+        (
+            {"a": ["b"], "b": []},
+            {"a": 1, "b": 2},
+            [("b", 2, [], ["a"]), ("a", 1, ["b"], [])],
+        ),
+        (
+            {"a": ["b"], "b": ["c"], "c": []},
+            {"a": 1, "b": 2, "c": 3},
+            [
+                ("c", 3, [], ["a", "b"]),
+                ("b", 2, ["c"], ["a"]),
+                ("a", 1, ["b"], []),
+            ],
+        ),
+        (
+            {"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []},
+            {"a": 1, "b": 2, "c": 3, "d": 4},
+            [
+                ("d", 4, [], ["a", "b", "c"]),
+                ("b", 2, ["d"], ["a", "c"]),
+                ("c", 3, ["d"], ["a"]),
+                ("a", 1, ["b", "c"], []),
+            ],
+        ),
+    ],
+)
+def test_traverse(
+    graph: Graph,
+    nodes: dict[str, int],
+    expected: list[tuple[str, int, list[str], list[str]]],
+):
+    result = list(traverse_graph(graph, nodes))
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    ("graph", "nodes"),
+    [
+        ({"a": ["b"], "b": ["a"]}, {"a": 1, "b": 2}),
+        (
+            {"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": ["a"]},
+            {"a": 1, "b": 2, "c": 3, "d": 4},
+        ),
+    ],
+)
+def test_traverse_fail(graph: Graph, nodes: dict[str, int]):
+    with pytest.raises(RuntimeError):
+        list(traverse_graph(graph, nodes))
diff --git a/tests/unittests/test_utils/test_keypoints.py b/tests/unittests/test_utils/test_keypoints.py
new file mode 100644
index 00000000..3d20dae6
--- /dev/null
+++ b/tests/unittests/test_utils/test_keypoints.py
@@ -0,0 +1,24 @@
+import pytest
+import torch
+
+from luxonis_train.utils.keypoints import (
+    get_sigmas,
+    process_keypoints_predictions,
+)
+
+
+def test_get_sigmas():
+    sigmas = [0.1, 0.2, 0.3]
+    pytest.approx(get_sigmas(sigmas, 3).tolist(), sigmas)
+    with pytest.raises(ValueError):
+        get_sigmas(sigmas, 2)
+    assert len(get_sigmas(None, 17)) == 17
+    assert len(get_sigmas(None, 5)) == 5
+
+
+def test_process_keypoints_predictions():
+    keypoints = torch.tensor([[0.1, 0.2, 1.0, 0.4, 0.5, 0.0]])
+    x, y, visibility = process_keypoints_predictions(keypoints)
+    pytest.approx(x[0].tolist(), [0.1, 0.4])
+    pytest.approx(y[0].tolist(), [0.2, 0.5])
+    pytest.approx(visibility[0].tolist(), [1.0, 0.0])
diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py
deleted file mode 100644
index e48f81ad..00000000
--- a/tests/unittests/test_utils/test_loaders/test_base_loader.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import pytest
-import torch
-
-from luxonis_train.utils.loaders import (
-    collate_fn,
-)
-from luxonis_train.utils.types import LabelType
-
-
-def test_collate_fn():
-    # Mock batch data
-    batch = [
-        (
-            torch.rand(3, 224, 224, dtype=torch.float32),
-            {LabelType.CLASSIFICATION: torch.tensor([1, 0])},
-        ),
-        (
-            torch.rand(3, 224, 224, dtype=torch.float32),
-            {LabelType.CLASSIFICATION: torch.tensor([0, 1])},
-        ),
-    ]
-
-    # Call collate_fn
-    imgs, annotations = collate_fn(batch)
-
-    # Check images tensor
-    assert imgs.shape == (2, 3, 224, 224)
-    assert imgs.dtype == torch.float32
-
-    # Check annotations
-    assert LabelType.CLASSIFICATION in annotations
-    assert annotations[LabelType.CLASSIFICATION].shape == (2, 2)
-    assert annotations[LabelType.CLASSIFICATION].dtype == torch.int64
-
-    # TODO: test also segmentation, boundingbox and keypoint
-
-
-if __name__ == "__main__":
-    pytest.main()
diff --git a/tools/main.py b/tools/main.py
deleted file mode 100644
index 73843593..00000000
--- a/tools/main.py
+++ /dev/null
@@ -1,226 +0,0 @@
-import os
-from enum import Enum
-from importlib.metadata import version
-from pathlib import Path
-from typing import Annotated, Optional
-
-import cv2
-import torch
-import typer
-
-app = typer.Typer(help="Luxonis Train CLI", add_completion=False)
-
-
-class View(str, Enum):
-    train = "train"
-    val = "val"
-    test = "test"
-
-    def __str__(self):
-        return self.value
-
-
-ConfigType = Annotated[
-    Optional[Path],
-    typer.Option(
-        help="Path to the configuration file.",
-        show_default=False,
-    ),
-]
-
-OptsType = Annotated[
-    Optional[list[str]],
-    typer.Argument(
-        help="A list of optional CLI overrides of the config file.",
-        show_default=False,
-    ),
-]
-
-ViewType = Annotated[View, typer.Option(help="Which dataset view to use.")]
-
-SaveDirType = Annotated[
-    Optional[Path],
-    typer.Option(help="Where to save the inference results."),
-]
-
-
-@app.command()
-def train(config: ConfigType = None, opts: OptsType = None):
-    """Start training."""
-    from luxonis_train.core import Trainer
-
-    Trainer(str(config), opts).train()
-
-
-@app.command()
-def eval(config: ConfigType = None, view: ViewType = View.val, opts: OptsType = None):
-    """Evaluate model."""
-    from luxonis_train.core import Trainer
-
-    Trainer(str(config), opts).test(view=view.name)
-
-
-@app.command()
-def tune(config: ConfigType = None, opts: OptsType = None):
-    """Start hyperparameter tuning."""
-    from luxonis_train.core import Tuner
-
-    Tuner(str(config), opts).tune()
-
-
-@app.command()
-def export(config: ConfigType = None, opts: OptsType = None):
-    """Export model."""
-    from luxonis_train.core import Exporter
-
-    Exporter(str(config), opts).export()
-
-
-@app.command()
-def infer(
-    config: ConfigType = None,
-    view: ViewType = View.val,
-    save_dir: SaveDirType = None,
-    opts: OptsType = None,
-):
-    """Run inference."""
-    from luxonis_train.core import Inferer
-
-    Inferer(str(config), opts, view=view.name, save_dir=save_dir).infer()
-
-
-@app.command()
-def inspect(
-    config: ConfigType = None,
-    view: ViewType = View.val,
-    save_dir: SaveDirType = None,
-    opts: OptsType = None,
-):
-    """Inspect dataset."""
-    from luxonis_ml.data import (
-        LuxonisDataset,
-        TrainAugmentations,
-        ValAugmentations,
-    )
-
-    from luxonis_train.attached_modules.visualizers.utils import (
-        draw_bounding_box_labels,
-        draw_keypoint_labels,
-        draw_segmentation_labels,
-        get_unnormalized_images,
-    )
-    from luxonis_train.utils.config import Config
-    from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
-    from luxonis_train.utils.types import LabelType
-
-    overrides = {}
-    if opts:
-        if len(opts) % 2 != 0:
-            raise ValueError("Override options should be a list of key-value pairs")
-
-        for i in range(0, len(opts), 2):
-            overrides[opts[i]] = opts[i + 1]
-
-    cfg = Config.get_config(str(config), overrides)
-
-    image_size = cfg.trainer.preprocessing.train_image_size
-
-    dataset = LuxonisDataset(
-        dataset_name=cfg.dataset.name,
-        team_id=cfg.dataset.team_id,
-        dataset_id=cfg.dataset.id,
-        bucket_type=cfg.dataset.bucket_type,
-        bucket_storage=cfg.dataset.bucket_storage,
-    )
-    augmentations = (
-        TrainAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
-        if view == "train"
-        else ValAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
-    )
-
-    loader_train = LuxonisLoaderTorch(
-        dataset,
-        view=view,
-        augmentations=augmentations,
-    )
-
-    pytorch_loader_train = torch.utils.data.DataLoader(
-        loader_train,
-        batch_size=4,
-        num_workers=1,
-        collate_fn=collate_fn,
-    )
-
-    if save_dir is not None:
-        os.makedirs(save_dir, exist_ok=True)
-
-    counter = 0
-    for data in pytorch_loader_train:
-        imgs, label_dict = data
-        images = get_unnormalized_images(cfg, imgs)
-        for i, img in enumerate(images):
-            for label_type, labels in label_dict.items():
-                if label_type == LabelType.CLASSIFICATION:
-                    continue
-                elif label_type == LabelType.BOUNDINGBOX:
-                    img = draw_bounding_box_labels(
-                        img, labels[labels[:, 0] == i][:, 2:], colors="yellow", width=1
-                    )
-                elif label_type == LabelType.KEYPOINT:
-                    img = draw_keypoint_labels(
-                        img, labels[labels[:, 0] == i][:, 1:], colors="red"
-                    )
-                elif label_type == LabelType.SEGMENTATION:
-                    img = draw_segmentation_labels(
-                        img, labels[i], alpha=0.8, colors="#5050FF"
-                    )
-
-            img_arr = img.permute(1, 2, 0).numpy()
-            img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)
-            if save_dir is not None:
-                counter += 1
-                cv2.imwrite(os.path.join(save_dir, f"{counter}.png"), img_arr)
-            else:
-                cv2.imshow("img", img_arr)
-                if cv2.waitKey() == ord("q"):
-                    exit()
-
-
-def version_callback(value: bool):
-    if value:
-        typer.echo(f"LuxonisTrain Version: {version(__package__)}")
-        raise typer.Exit()
-
-
-@app.callback()
-def common(
-    _: Annotated[
-        bool,
-        typer.Option(
-            "--version", callback=version_callback, help="Show version and exit."
-        ),
-    ] = False,
-):
-    ...
-
-
-def main():
-    app()
-
-
-if __name__ == "__main__":
-    main()