From 3ac70fbe1190a80539e88a2a93250940f748cb0a Mon Sep 17 00:00:00 2001
From: yiyoulin <130589152+yiyoulin@users.noreply.github.com>
Date: Mon, 20 Nov 2023 11:23:12 -0800
Subject: [PATCH] Yiyoulin/mot examples (#2746)

* mot inference notebook

* finetune notebook completed

* clean up finetune notebook

* clean up for online inference notebook

* shorten visualization sleep time

* removed unused inference compute

* update the mot2coco script

* add mmtracking_model_name

* edit the bytetrack link to specific tag

* delete validation_batch_size

* wording updates

* placeholder for mmtracking cli, scripts not changed yet

* add image scale to notebook

* online cli

* finetune cli - pending verification

* finetune data preparation scripts working fine locally

* finetune submission

* finetune submission scripts

* update registry and component name

* update registry and model name in online endpoint notebook

* update for cli model, registry and component names

* comments resolve

* update the model name for scripts

* resolve comments

* reformat code files

* resolve comments
---
 .../cocovid2jsonl.py                          |  190 +++
 .../video-multi-object-tracking/deploy.yaml   |   11 +
 .../mmtracking-mot17tiny-mot-pipeline.yaml    |   86 ++
 .../mmtracking-mot17tiny-mot.sh               |  168 +++
 .../video-multi-object-tracking/mot2coco.py   |  245 ++++
 .../prepare_data.py                           |  205 +++
 .../sample_request_data.json                  |    1 +
 .../video-multi-object-tracking/deploy.yaml   |   11 +
 .../prepare_data.py                           |   32 +
 ...o-multi-object-tracking-online-endpoint.sh |   81 ++
 .../cocovid2jsonl.py                          |  187 +++
 ...tracking-video-multi-object-tracking.ipynb | 1181 +++++++++++++++++
 .../video-multi-object-tracking/mot2coco.py   |  245 ++++
 .../sample_request_data.json                  |    1 +
 .../sample_request_data.json                  |    1 +
 ...ulti-object-tracking-online-endpoint.ipynb |  396 ++++++
 16 files changed, 3041 insertions(+)
 create mode 100644 cli/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py
 create mode 100644 cli/foundation-models/system/finetune/video-multi-object-tracking/deploy.yaml
 create mode 100644 cli/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot-pipeline.yaml
 create mode 100644 cli/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot.sh
 create mode 100644 cli/foundation-models/system/finetune/video-multi-object-tracking/mot2coco.py
 create mode 100644 cli/foundation-models/system/finetune/video-multi-object-tracking/prepare_data.py
 create mode 100644 cli/foundation-models/system/finetune/video-multi-object-tracking/sample_request_data.json
 create mode 100644 cli/foundation-models/system/inference/video-multi-object-tracking/deploy.yaml
 create mode 100644 cli/foundation-models/system/inference/video-multi-object-tracking/prepare_data.py
 create mode 100644 cli/foundation-models/system/inference/video-multi-object-tracking/video-multi-object-tracking-online-endpoint.sh
 create mode 100644 sdk/python/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py
 create mode 100644 sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-video-multi-object-tracking.ipynb
 create mode 100644 sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mot2coco.py
 create mode 100644 sdk/python/foundation-models/system/finetune/video-multi-object-tracking/sample_request_data.json
 create mode 100644 sdk/python/foundation-models/system/inference/video-multi-object-tracking/sample_request_data.json
 create mode 100644 sdk/python/foundation-models/system/inference/video-multi-object-tracking/video-multi-object-tracking-online-endpoint.ipynb

diff --git a/cli/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py b/cli/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py
new file mode 100644
index 00000000000..97a1288d715
--- /dev/null
+++ b/cli/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py
@@ -0,0 +1,190 @@
+import json
+import os
+import sys
+import argparse
+
+# Define Converters
+
+
+class CocoVidToJSONLinesConverter:
+    def convert(self):
+        raise NotImplementedError
+
+
+class BoundingBoxConverter(CocoVidToJSONLinesConverter):
+    """example output for object tracking jsonl:
+    {
+      "image_url":"azureml://subscriptions/<my-subscription-id>/resourcegroups/<my-resource-group>/workspaces/<my-workspace>/datastores/<my-datastore>/paths/<path_to_image>",
+      "image_details":{
+          "format":"image_format",
+          "width":"image_width",
+          "height":"image_height"
+      },
+      "video_details": {
+          "frame_id": "zero_based_frame_id(int)",
+          "video_name": "video_name",
+      },
+      "label":[
+          {
+            "label":"class_name_1",
+            "topX":"xmin/width",
+            "topY":"ymin/height",
+            "bottomX":"xmax/width",
+            "bottomY":"ymax/height",
+            "isCrowd":"isCrowd"
+            "instance_id": "instance_id"
+          },
+          {
+            "label":"class_name_2",
+            "topX":"xmin/width",
+            "topY":"ymin/height",
+            "bottomX":"xmax/width",
+            "bottomY":"ymax/height",
+            "instance_id": "instance_id"
+          },
+          "..."
+      ]
+    }
+    """
+
+    def __init__(self, coco_data):
+        self.json_lines_data = []
+        self.categories = {}
+        self.coco_data = coco_data
+        self.image_id_to_data_index = {}
+        self.video_id_to_name = {}
+        for i in range(0, len(coco_data["images"])):
+            self.json_lines_data.append({})
+            self.json_lines_data[i]["image_url"] = ""
+            self.json_lines_data[i]["image_details"] = {}
+            self.json_lines_data[i]["video_details"] = {}
+            self.json_lines_data[i]["label"] = []
+        for i in range(0, len(coco_data["categories"])):
+            self.categories[coco_data["categories"][i]["id"]] = coco_data["categories"][
+                i
+            ]["name"]
+        for i in range(0, len(coco_data["videos"])):
+            self.video_id_to_name[coco_data["videos"][i]["id"]] = coco_data["videos"][
+                i
+            ]["name"]
+
+    def _populate_image_url(self, index, coco_image):
+        self.json_lines_data[index]["image_url"] = coco_image["file_name"]
+        self.image_id_to_data_index[coco_image["id"]] = index
+
+    def _populate_image_details(self, index, coco_image):
+        file_name = coco_image["file_name"]
+        self.json_lines_data[index]["image_details"]["format"] = file_name[
+            file_name.rfind(".") + 1 :
+        ]
+        self.json_lines_data[index]["image_details"]["width"] = coco_image["width"]
+        self.json_lines_data[index]["image_details"]["height"] = coco_image["height"]
+
+    def _populate_video_details(self, index, coco_image):
+        self.json_lines_data[index]["video_details"]["frame_id"] = coco_image[
+            "frame_id"
+        ]
+        self.json_lines_data[index]["video_details"][
+            "video_name"
+        ] = self.video_id_to_name[coco_image["video_id"]]
+
+    def _populate_bbox_in_label(self, label, annotation, image_details):
+        # if bbox comes as normalized, skip normalization.
+        if max(annotation["bbox"]) < 1.5:
+            width = 1
+            height = 1
+        else:
+            width = image_details["width"]
+            height = image_details["height"]
+        label["topX"] = annotation["bbox"][0] / width
+        label["topY"] = annotation["bbox"][1] / height
+        label["bottomX"] = (annotation["bbox"][0] + annotation["bbox"][2]) / width
+        label["bottomY"] = (annotation["bbox"][1] + annotation["bbox"][3]) / height
+
+    def _populate_label(self, annotation):
+        index = self.image_id_to_data_index[annotation["image_id"]]
+        image_details = self.json_lines_data[index]["image_details"]
+        label = {"label": self.categories[annotation["category_id"]]}
+        self._populate_bbox_in_label(label, annotation, image_details)
+        self._populate_instanceId(label, annotation)
+        self._populate_isCrowd(label, annotation)
+        self._populate_visibility(label, annotation)
+        self.json_lines_data[index]["label"].append(label)
+
+    def _populate_instanceId(self, label, annotation):
+        label["instance_id"] = annotation["instance_id"]
+
+    def _populate_isCrowd(self, label, annotation):
+        if "iscrowd" in annotation.keys():
+            label["isCrowd"] = int(annotation["iscrowd"])
+
+    def _populate_visibility(self, label, annotation):
+        if "visibility" in annotation.keys():
+            label["visibility"] = annotation["visibility"]
+
+    def convert(self):
+        for i in range(0, len(self.coco_data["images"])):
+            self._populate_image_url(i, self.coco_data["images"][i])
+            self._populate_image_details(i, self.coco_data["images"][i])
+            self._populate_video_details(i, self.coco_data["images"][i])
+        if "annotations" not in self.coco_data:
+            self.coco_data["annotations"] = []
+        for i in range(0, len(self.coco_data["annotations"])):
+            self._populate_label(self.coco_data["annotations"][i])
+        return self.json_lines_data
+
+
+def main(args):
+    input_coco_file_path = args.input_cocovid_file_path
+    output_dir = args.output_dir
+    output_file_path = output_dir + "/" + args.output_file_name
+    print(output_file_path)
+    task_type = args.task_type
+    base_url = args.base_url
+
+    def read_coco_file(coco_file):
+        with open(coco_file) as f_in:
+            return json.load(f_in)
+
+    def write_json_lines(converter, filename, base_url=None):
+        json_lines_data = converter.convert()
+        with open(filename, "w") as outfile:
+            for json_line in json_lines_data:
+                if base_url is not None:
+                    image_url = json_line["image_url"]
+                    json_line["image_url"] = os.path.join(base_url, image_url)
+                    json_line["image_url"] = json_line["image_url"].replace("\\", "/")
+                json.dump(json_line, outfile, separators=(",", ":"))
+                outfile.write("\n")
+            print(f"Conversion completed. Converted {len(json_lines_data)} lines.")
+
+    coco_data = read_coco_file(input_coco_file_path)
+
+    print(f"Converting for {task_type}")
+
+    if task_type == "ObjectTracking":
+        converter = BoundingBoxConverter(coco_data)
+        write_json_lines(converter, output_file_path, base_url)
+
+    else:
+        print("ERROR: Invalid Task Type")
+        pass
+
+
+if __name__ == "__main__":
+    # Parse arguments that are passed into the script
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_cocovid_file_path", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, required=True)
+    parser.add_argument("--output_file_name", type=str, required=True)
+    parser.add_argument(
+        "--task_type",
+        type=str,
+        required=True,
+        choices=["ObjectTracking"],
+        default="ObjectTracking",
+    )
+    parser.add_argument("--base_url", type=str, default=None)
+
+    args = parser.parse_args()
+    main(args)
diff --git a/cli/foundation-models/system/finetune/video-multi-object-tracking/deploy.yaml b/cli/foundation-models/system/finetune/video-multi-object-tracking/deploy.yaml
new file mode 100644
index 00000000000..acdfd9b7cd5
--- /dev/null
+++ b/cli/foundation-models/system/finetune/video-multi-object-tracking/deploy.yaml
@@ -0,0 +1,11 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_NC6s_v3
+instance_count: 1
+liveness_probe:
+  initial_delay: 180
+  period: 180
+  failure_threshold: 49
+  timeout: 299
+request_settings:
+  request_timeout_ms: 90000
\ No newline at end of file
diff --git a/cli/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot-pipeline.yaml b/cli/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot-pipeline.yaml
new file mode 100644
index 00000000000..02cc3c7a3dc
--- /dev/null
+++ b/cli/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot-pipeline.yaml
@@ -0,0 +1,86 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: AzureML-Cli-Train-Finetune-Vision-MOT-Samples
+
+inputs:
+  # dataset files
+  training_data:
+    type: mltable
+  
+  validation_data:
+    type: mltable
+
+  # compute
+  compute_model_import: sample-model-import-cluster
+  compute_finetune: sample-finetune-cluster-gpu
+  # model_name: bytetrack_yolox_x_crowdhuman_mot17-private-half
+  # # model - specify the foundation model available in the azureml system registry
+  mlflow_model: 
+    path: azureml://registries/azureml/models/bytetrack_yolox_x_crowdhuman_mot17-private-half/versions/3
+    type: mlflow_model
+
+outputs:
+  # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint
+  trained_model:
+    type: mlflow_model
+
+settings:
+  force_rerun: true
+  default_compute: azureml:sample-finetune-cluster-gpu
+
+jobs:
+  mmtracking_model_finetune_job:
+    type: pipeline
+    component: azureml://registries/azureml/components/mmtracking_video_multi_object_tracking_pipeline/labels/latest
+    inputs:
+      # # Compute
+      compute_model_import: ${{parent.inputs.compute_model_import}}
+      compute_finetune: ${{parent.inputs.compute_finetune}}
+
+      # # Model import args
+      task_name: video-multi-object-tracking
+      # model_name: ${{parent.inputs.model_name}}
+      # pytorch_model: ${{parent.inputs.pytorch_model}}
+      mlflow_model: ${{parent.inputs.mlflow_model}}
+      model_family: MmTrackingVideo
+
+      # # Data
+      training_data: ${{parent.inputs.training_data}}
+      validation_data: ${{parent.inputs.validation_data}}
+
+      # Finetuning parameters
+      image_width: 1920
+      image_height: 1080
+      learning_rate: 0.00001
+      number_of_epochs: 5
+      metric_for_best_model: MOTA
+      training_batch_size: 1
+
+      # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
+      # learning_rate_scheduler: warmup_linear
+      # warmup_steps: 0
+      # optimizer: sgd
+      # weight_decay: 0.0
+      # gradient_accumulation_step: 1
+      # max_grad_norm: 1.0
+      # iou_threshold: 0.5
+      # box_score_threshold: 0.3
+      # number_of_workers: 8
+      # extra_optim_args: ""
+      # precision: 32
+      # random_seed: 42
+      # evaluation_strategy: epoch
+      # evaluation_steps: 500
+      # logging_strategy: epoch
+      # logging_steps: 500
+      # save_strategy: epoch
+      # save_steps: 500
+      # save_total_limit: -1
+      # early_stopping: False
+      # early_stopping_patience: 1
+      # resume_from_checkpoint: False
+      # save_as_mlflow_model: True
+
+    outputs:
+      mlflow_model_folder: ${{parent.outputs.trained_model}}
diff --git a/cli/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot.sh b/cli/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot.sh
new file mode 100644
index 00000000000..3bc1d85db90
--- /dev/null
+++ b/cli/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot.sh
@@ -0,0 +1,168 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-video-multi-object-tracking-finetune
+# the data files are available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+compute_cluster_model_import="sample-model-import-cluster"
+compute_cluster_finetune="sample-finetune-cluster-gpu"
+# If above compute cluster does not exist, create it with the following vm size
+compute_model_import_sku="Standard_D12"
+compute_finetune_sku="Standard_NC6s_v3"
+
+# This is the foundation model for finetuning
+mmtracking_model_name="bytetrack_yolox_x_crowdhuman_mot17-private-half"
+model_label="latest"
+
+version=$(date +%s)
+finetuned_mmtracking_model_name="$mmtracking_model_name-mot17-tiny"
+mmtracking_endpoint_name="mmt-mot-mot17-tiny-$version"
+deployment_sku="Standard_NC6s_v3"
+
+# Scoring file
+mmtracking_sample_request_data="./sample_request_data.json"
+
+# finetuning job parameters
+finetuning_pipeline_component="mmtracking_video_multi_object_tracking_pipeline"
+
+# 1. Install dependencies
+pip install azure-ai-ml==1.8.0
+pip install azure-identity==1.13.0
+
+# 2. Setup pre-requisites
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# Check if $compute_cluster_model_import exists, else create it
+if az ml compute show --name $compute_cluster_model_import $workspace_info
+then
+    echo "Compute cluster $compute_cluster_model_import already exists"
+else
+    echo "Creating compute cluster $compute_cluster_model_import"
+    az ml compute create --name $compute_cluster_model_import --type amlcompute --min-instances 0 --max-instances 2 --size $compute_model_import_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_cluster_model_import"
+        exit 1
+    }
+fi
+
+# Check if $compute_cluster_finetune exists, else create it
+if az ml compute show --name $compute_cluster_finetune $workspace_info
+then
+    echo "Compute cluster $compute_cluster_finetune already exists"
+else
+    echo "Creating compute cluster $compute_cluster_finetune"
+    az ml compute create --name $compute_cluster_finetune --type amlcompute --min-instances 0 --max-instances 2 --size $compute_finetune_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_cluster_finetune"
+        exit 1
+    }
+fi
+
+# Check if the finetuning pipeline component exists
+if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
+then
+    echo "Finetuning pipeline component $finetuning_pipeline_component does not exist"
+    exit 1
+fi
+
+# # 3. Check if the model exists in the registry
+# # need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $mmtracking_model_name --label $model_label --registry-name $registry_name 
+then
+    echo "Model $mmtracking_model_name:$model_label does not exist in registry $registry_name"
+    exit 1
+fi
+# get the latest model version
+model_version=$(az ml model show --name $mmtracking_model_name --label $model_label --registry-name $registry_name --query version --output tsv)
+# 4. Prepare data
+python prepare_data.py --subscription $subscription_id --group $resource_group_name --workspace $workspace_name
+
+# training data
+train_data="./data/training-mltable-folder"
+# validation data
+validation_data="./data/validation-mltable-folder"
+
+# Check if training data, validation data
+if [ ! -d $train_data ] 
+then
+    echo "Training data $train_data does not exist"
+    exit 1
+fi
+
+if [ ! -d $validation_data ] 
+then
+    echo "Validation data $validation_data does not exist"
+    exit 1
+fi
+
+# 5. Submit finetuning job using pipeline.yaml for a open-mmlab mmtracking model
+
+# If you want to use a MMTracking model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below
+# currently only support ocsort and bytetrack
+# inputs.model_name="ocsort_yolox_x_crowdhuman_mot17-private-half"
+
+mmtracking_parent_job_name=$( az ml job create \
+  --file ./mmtracking-mot17tiny-mot-pipeline.yaml \
+  $workspace_info \
+  --query name -o tsv \
+  --set \
+  jobs.mmtracking_model_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
+  inputs.compute_model_import=$compute_cluster_model_import \
+  inputs.compute_finetune=$compute_cluster_finetune \
+  inputs.mlflow_model.path="azureml://registries/$registry_name/models/$mmtracking_model_name/versions/$model_version" \
+  inputs.training_data.path=$train_data \
+  inputs.validation_data.path=$validation_data
+  ) || {
+    echo "Failed to submit finetuning job"
+    exit 1
+  }
+
+az ml job stream --name $mmtracking_parent_job_name $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 6. Create model in workspace from train job output for fine-tuned mmtracking model
+az ml model create --name $finetuned_mmtracking_model_name --version $version --type mlflow_model \
+ --path azureml://jobs/$mmtracking_parent_job_name/outputs/trained_model $workspace_info  || {
+    echo "model create in workspace failed"; exit 1;
+}
+
+# 7. Deploy the fine-tuned mmtracking model to an endpoint
+# Create online endpoint 
+az ml online-endpoint create --name $mmtracking_endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# Deploy registered model to endpoint in workspace
+az ml online-deployment create --file ./deploy.yaml $workspace_info --all-traffic --set \
+  endpoint_name=$mmtracking_endpoint_name model=azureml:$finetuned_mmtracking_model_name:$version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 8. Try a sample scoring request on the deployed MMTracking model
+
+# Check if scoring data file exists
+if [ -f $mmtracking_sample_request_data ] 
+then
+    echo "Invoking endpoint $mmtracking_endpoint_name with $mmtracking_sample_request_data\n\n"
+else
+    echo "Scoring file $mmtracking_sample_request_data does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $mmtracking_endpoint_name --request-file $mmtracking_sample_request_data $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 9. Delete the endpoint
+az ml online-endpoint delete --name $mmtracking_endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 10. Delete the request data file
+
+rm $mmtracking_sample_request_data
diff --git a/cli/foundation-models/system/finetune/video-multi-object-tracking/mot2coco.py b/cli/foundation-models/system/finetune/video-multi-object-tracking/mot2coco.py
new file mode 100644
index 00000000000..ae93a6c8e31
--- /dev/null
+++ b/cli/foundation-models/system/finetune/video-multi-object-tracking/mot2coco.py
@@ -0,0 +1,245 @@
+# This script converts MOT labels into COCO style.
+# Official website of the MOT dataset: https://motchallenge.net/
+#
+# Label format of MOT dataset:
+#   GTs:
+#       <frame_id> # starts from 1 but COCO style starts from 0,
+#       <instance_id>, <x1>, <y1>, <w>, <h>,
+#       <conf> # conf is annotated as 0 if the object is ignored,
+#       <class_id>, <visibility>
+#
+#   DETs and Results:
+#       <frame_id>, <instance_id>, <x1>, <y1>, <w>, <h>, <conf>,
+#       <x>, <y>, <z> # for 3D objects
+#
+# Classes in MOT:
+#   1: 'pedestrian'
+#   2: 'person on vehicle'
+#   3: 'car'
+#   4: 'bicycle'
+#   5: 'motorbike'
+#   6: 'non motorized vehicle'
+#   7: 'static person'
+#   8: 'distractor'
+#   9: 'occluder'
+#   10: 'occluder on the ground',
+#   11: 'occluder full'
+#   12: 'reflection'
+#
+#   USELESS classes are not included into the json file.
+#   IGNORES classes are included with `ignore=True`.
+import argparse
+from collections import defaultdict
+import json
+import os
+import os.path as osp
+import pickle
+
+import numpy as np
+from tqdm import tqdm
+
+USELESS = [3, 4, 5, 6, 9, 10, 11]
+IGNORES = [2, 7, 8, 12, 13]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Convert MOT label and detections to COCO-VID format."
+    )
+    parser.add_argument("-i", "--input", help="path of MOT data")
+    parser.add_argument("-o", "--output", help="path to save coco formatted label file")
+    parser.add_argument(
+        "--convert-det", action="store_true", help="convert official detection results."
+    )
+    parser.add_argument(
+        "--split-train",
+        action="store_true",
+        help="split the train set into half-train and half-validate.",
+    )
+    return parser.parse_args()
+
+
+def parse_gts(gts, is_mot15):
+    outputs = defaultdict(list)
+    for gt in gts:
+        gt = gt.strip().split(",")
+        frame_id, ins_id = map(int, gt[:2])
+        bbox = list(map(float, gt[2:6]))
+        if is_mot15:
+            conf = 1.0
+            class_id = 1
+            visibility = 1.0
+        else:
+            conf = float(gt[6])
+            class_id = int(gt[7])
+            visibility = float(gt[8])
+        if class_id in USELESS:
+            continue
+        elif class_id in IGNORES:
+            continue
+        anns = dict(
+            category_id=1,
+            bbox=bbox,
+            area=bbox[2] * bbox[3],
+            iscrowd=False,
+            visibility=visibility,
+            mot_instance_id=ins_id,
+            mot_conf=conf,
+            mot_class_id=class_id,
+        )
+        outputs[frame_id].append(anns)
+    return outputs
+
+
+def parse_dets(dets):
+    outputs = defaultdict(list)
+    for det in dets:
+        det = det.strip().split(",")
+        frame_id, ins_id = map(int, det[:2])
+        assert ins_id == -1
+        bbox = list(map(float, det[2:7]))
+        # [x1, y1, x2, y2] to be consistent with mmdet
+        bbox = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3], bbox[4]]
+        outputs[frame_id].append(bbox)
+
+    return outputs
+
+
+def list_from_file(file_name):
+    with open(file_name) as f:
+        lines = [line.rstrip() for line in f]
+
+    return lines
+
+
+def dump_to_json(json_object, json_file_name):
+    with open(json_file_name, "w") as outfile:
+        json_str = json.dumps(json_object)
+        outfile.write(json_str)
+
+
+def dump_to_pickle(pickle_object, pickle_file_name):
+    with open(pickle_file_name, "wb") as outfile:
+        pickle.dump(pickle_object, outfile)
+
+
+def main(args):
+    if not osp.isdir(args.output):
+        os.makedirs(args.output)
+
+    sets = ["train", "test"]
+    if args.split_train:
+        sets += ["half-train", "half-val"]
+    vid_id, img_id, ann_id = 1, 1, 1
+
+    for subset in sets:
+        ins_id = 0
+        print(f"Converting {subset} set to COCO format")
+        if "half" in subset:
+            in_folder = osp.join(args.input, "train")
+        else:
+            in_folder = osp.join(args.input, subset)
+        out_file = osp.join(args.output, f"{subset}_cocoformat.json")
+        outputs = defaultdict(list)
+        outputs["categories"] = [dict(id=1, name="pedestrian")]
+        if args.convert_det:
+            det_file = osp.join(args.output, f"{subset}_detections.pkl")
+            detections = dict(det_bboxes=dict())
+        video_names = os.listdir(in_folder)
+        for video_name in tqdm(video_names):
+            # basic params
+            parse_gt = "test" not in subset
+            ins_maps = dict()
+            # load video infos
+            video_folder = osp.join(in_folder, video_name)
+            infos = list_from_file(f"{video_folder}/seqinfo.ini")
+            # video-level infos
+            assert video_name == infos[1].strip().split("=")[1]
+            img_folder = infos[2].strip().split("=")[1]
+            img_names = [
+                fname
+                for fname in os.listdir(f"{video_folder}/{img_folder}")
+                if fname.endswith(".jpg") or fname.endswith(".png")
+            ]
+            img_names = sorted(img_names)
+            fps = int(infos[3].strip().split("=")[1])
+            num_imgs = int(infos[4].strip().split("=")[1])
+            assert num_imgs == len(img_names)
+            width = int(infos[5].strip().split("=")[1])
+            height = int(infos[6].strip().split("=")[1])
+            video = dict(
+                id=vid_id, name=video_name, fps=fps, width=width, height=height
+            )
+            # parse annotations
+            if parse_gt:
+                gts = list_from_file(f"{video_folder}/gt/gt.txt")
+                if "MOT15" in video_folder:
+                    img2gts = parse_gts(gts, True)
+                else:
+                    img2gts = parse_gts(gts, False)
+            if args.convert_det:
+                dets = list_from_file(f"{video_folder}/det/det.txt")
+                img2dets = parse_dets(dets)
+            # make half sets
+            if "half" in subset:
+                split_frame = num_imgs // 2 + 1
+                if "train" in subset:
+                    img_names = img_names[:split_frame]
+                elif "val" in subset:
+                    img_names = img_names[split_frame:]
+                else:
+                    raise ValueError("subset must be named with `train` or `val`")
+                mot_frame_ids = [str(int(_.split(".")[0])) for _ in img_names]
+                with open(f"{video_folder}/gt/gt_{subset}.txt", "wt") as f:
+                    for gt in gts:
+                        if gt.split(",")[0] in mot_frame_ids:
+                            f.writelines(f"{gt}\n")
+            # image and box level infos
+            for frame_id, name in enumerate(img_names):
+                img_name = osp.join(video_name, img_folder, name)
+                mot_frame_id = int(name.split(".")[0])
+                image = dict(
+                    id=img_id,
+                    video_id=vid_id,
+                    file_name=img_name,
+                    height=height,
+                    width=width,
+                    frame_id=frame_id,
+                    mot_frame_id=mot_frame_id,
+                )
+                if parse_gt:
+                    gts = img2gts[mot_frame_id]
+                    for gt in gts:
+                        gt.update(id=ann_id, image_id=img_id)
+                        mot_ins_id = gt["mot_instance_id"]
+                        if mot_ins_id in ins_maps:
+                            gt["instance_id"] = ins_maps[mot_ins_id]
+                        else:
+                            gt["instance_id"] = ins_id
+                            ins_maps[mot_ins_id] = ins_id
+                            ins_id += 1
+                        outputs["annotations"].append(gt)
+                        ann_id += 1
+                if args.convert_det:
+                    dets = np.array(img2dets[mot_frame_id])
+                    if dets.ndim == 1:
+                        assert len(dets) == 0
+                        dets = np.zeros((0, 5))
+                    detections["det_bboxes"][img_name] = [dets]
+                outputs["images"].append(image)
+                img_id += 1
+            outputs["videos"].append(video)
+            vid_id += 1
+            outputs["num_instances"] = ins_id
+        print(f"{subset} has {ins_id} instances.")
+        dump_to_json(outputs, out_file)
+        if args.convert_det:
+            dump_to_pickle(detections, det_file)
+            print(f"Done! Saved as {out_file} and {det_file}")
+        else:
+            print(f"Done! Saved as {out_file}")
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/cli/foundation-models/system/finetune/video-multi-object-tracking/prepare_data.py b/cli/foundation-models/system/finetune/video-multi-object-tracking/prepare_data.py
new file mode 100644
index 00000000000..f1a712c3ce2
--- /dev/null
+++ b/cli/foundation-models/system/finetune/video-multi-object-tracking/prepare_data.py
@@ -0,0 +1,205 @@
+import argparse
+import base64
+import json
+import os
+import urllib
+import xml.etree.ElementTree as ET
+
+from zipfile import ZipFile
+
+from azure.identity import DefaultAzureCredential
+from azure.ai.ml import MLClient
+from azure.ai.ml.entities import Data
+from azure.ai.ml.constants import AssetTypes
+
+from mot2coco import main as mot2coco_converter
+from cocovid2jsonl import main as cocovid2jsonl_converter
+
+
+def create_ml_table_file(filename):
+    """Create ML Table definition
+    :param filename: Name of the jsonl file
+    """
+
+    return (
+        "paths:\n"
+        "  - file: ./{0}\n"
+        "transformations:\n"
+        "  - read_json_lines:\n"
+        "        encoding: utf8\n"
+        "        invalid_lines: error\n"
+        "        include_path_column: false\n"
+        "  - convert_column_types:\n"
+        "      - columns: image_url\n"
+        "        column_type: stream_info"
+    ).format(filename)
+
+
+def save_ml_table_file(output_path, mltable_file_contents):
+    """Save ML Table file
+    :param output_path: Path to save the MLTable file
+    :param mltable_file_contents: Contents of the MLTable file
+    """
+    with open(os.path.join(output_path, "MLTable"), "w") as f:
+        f.write(mltable_file_contents)
+
+
+def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
+    """Create jsonl
+
+    :param uri_folder_data_path: Path to the data folder
+    :param dataset_dir: Path to the dataset folder
+    """
+    # We'll copy each JSONL file within its related MLTable folder
+    training_mltable_path = os.path.join(dataset_dir, "../training-mltable-folder")
+    validation_mltable_path = os.path.join(dataset_dir, "../validation-mltable-folder")
+    testing_mltable_path = os.path.join(dataset_dir, "../testing-mltable-folder")
+
+    # First, let's create the folders if they don't exist
+    os.makedirs(training_mltable_path, exist_ok=True)
+    os.makedirs(validation_mltable_path, exist_ok=True)
+    os.makedirs(testing_mltable_path, exist_ok=True)
+
+    train_annotations_file = os.path.join(
+        training_mltable_path, "train_annotations.jsonl"
+    )
+    validation_annotations_file = os.path.join(
+        validation_mltable_path, "validation_annotations.jsonl"
+    )
+    testing_annotations_file = os.path.join(
+        testing_mltable_path, "testing_annotations.jsonl"
+    )
+
+    print("Creating jsonl files")
+
+    # Second, convert the COCO format to jsonl
+    print("convert MOT format to COCO format")
+    mot2coco_converter(
+        argparse.Namespace(
+            input=dataset_dir,
+            output=f"{dataset_dir}/annotations",
+            convert_det=True,
+            split_train=True,
+        )
+    )
+    print("Converting COCO video format to jsonl")
+    cocovid2jsonl_converter(
+        argparse.Namespace(
+            input_cocovid_file_path=f"{dataset_dir}/annotations/half-train_cocoformat.json",
+            output_dir=training_mltable_path,
+            output_file_name="train_annotations.jsonl",
+            task_type="ObjectTracking",
+            base_url=f"{uri_folder_data_path}train",
+        )
+    )
+    cocovid2jsonl_converter(
+        argparse.Namespace(
+            input_cocovid_file_path=f"{dataset_dir}/annotations/half-val_cocoformat.json",
+            output_dir=validation_mltable_path,
+            output_file_name="validation_annotations.jsonl",
+            task_type="ObjectTracking",
+            base_url=f"{uri_folder_data_path}train",
+        )
+    )
+
+    # Create and save train mltable
+    print("create and save train mltable")
+    train_mltable_file_contents = create_ml_table_file(
+        os.path.basename(train_annotations_file)
+    )
+    save_ml_table_file(training_mltable_path, train_mltable_file_contents)
+
+    # Create and save validation mltable
+    print("create and save validation mltable")
+    validation_mltable_file_contents = create_ml_table_file(
+        os.path.basename(validation_annotations_file)
+    )
+    save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)
+
+    # Create and save testing mltable
+    testing_mltable_file_contents = create_ml_table_file(
+        os.path.basename(testing_annotations_file)
+    )
+    save_ml_table_file(testing_mltable_path, testing_mltable_file_contents)
+
+
+def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
+    """upload data to blob storage and create jsonl and mltable files
+
+    :param ml_client: Azure ML client
+    :param dataset_parent_dir: Path to the dataset folder
+    """
+    # Change to a different location if you prefer
+    dataset_parent_dir = "data"
+
+    # create data folder if it doesnt exist.
+    os.makedirs(dataset_parent_dir, exist_ok=True)
+
+    # download data
+    download_url = "https://download.openmmlab.com/mmtracking/data/MOT17_tiny.zip"
+
+    # Extract current dataset name from dataset url
+    dataset_name = os.path.split(download_url)[-1].split(".")[0]
+    # Get dataset path for later use
+    dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
+
+    # Get the data zip file path
+    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
+
+    # Download the dataset
+    urllib.request.urlretrieve(download_url, filename=data_file)
+
+    # extract files
+    with ZipFile(data_file, "r") as zzip:
+        print("extracting files...")
+        zzip.extractall(path=dataset_parent_dir)
+        print("done")
+    # delete zip file
+    os.remove(data_file)
+
+    # Upload data and create a data asset URI folder
+    print("Uploading data to blob storage")
+    my_data = Data(
+        path=dataset_dir,
+        type=AssetTypes.URI_FOLDER,
+        description=f"{dataset_name} dataset folder",
+        name=f"{dataset_name}_sample_folder",
+    )
+
+    uri_folder_data_asset = ml_client.data.create_or_update(my_data)
+
+    print(uri_folder_data_asset)
+    print("")
+    print("Path to folder in Blob Storage:")
+    print(uri_folder_data_asset.path)
+
+    create_jsonl_and_mltable_files(
+        uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Prepare data for video multi-object tracking"
+    )
+
+    parser.add_argument("--subscription", type=str, help="Subscription ID")
+    parser.add_argument("--group", type=str, help="Resource group name")
+    parser.add_argument("--workspace", type=str, help="Workspace name")
+    parser.add_argument(
+        "--data_path", type=str, default="./data", help="Dataset location"
+    )
+
+    args, unknown = parser.parse_known_args()
+    args_dict = vars(args)
+
+    credential = DefaultAzureCredential()
+    ml_client = None
+    subscription_id = args.subscription
+    resource_group = args.group
+    workspace = args.workspace
+    ml_client = MLClient(credential, subscription_id, resource_group, workspace)
+
+    upload_data_and_create_jsonl_mltable_files(
+        ml_client=ml_client, dataset_parent_dir=args.data_path
+    )
diff --git a/cli/foundation-models/system/finetune/video-multi-object-tracking/sample_request_data.json b/cli/foundation-models/system/finetune/video-multi-object-tracking/sample_request_data.json
new file mode 100644
index 00000000000..9eaa17ee063
--- /dev/null
+++ b/cli/foundation-models/system/finetune/video-multi-object-tracking/sample_request_data.json
@@ -0,0 +1 @@
+{"input_data": {"columns": ["video"], "data": ["https://github.com/open-mmlab/mmtracking/raw/master/demo/demo.mp4"]}}
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/video-multi-object-tracking/deploy.yaml b/cli/foundation-models/system/inference/video-multi-object-tracking/deploy.yaml
new file mode 100644
index 00000000000..1b0000541f9
--- /dev/null
+++ b/cli/foundation-models/system/inference/video-multi-object-tracking/deploy.yaml
@@ -0,0 +1,11 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_NC6s_V3
+instance_count: 1
+liveness_probe:
+  initial_delay: 180
+  period: 180
+  failure_threshold: 49
+  timeout: 299
+request_settings:
+  request_timeout_ms: 90000
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/video-multi-object-tracking/prepare_data.py b/cli/foundation-models/system/inference/video-multi-object-tracking/prepare_data.py
new file mode 100644
index 00000000000..e6b34e9bee5
--- /dev/null
+++ b/cli/foundation-models/system/inference/video-multi-object-tracking/prepare_data.py
@@ -0,0 +1,32 @@
+import json
+import os
+import argparse
+
+
+def prepare_data_for_online_inference(sample_video_link) -> None:
+    """Prepare request json for online inference.
+
+    :param sample_video_links: sample video links
+    :type sample_video_links: str
+    """
+    request_json = {"input_data": {"columns": ["video"], "data": [sample_video_link]}}
+    request_file_name = "sample_request_data.json"
+    with open(request_file_name, "w") as request_file:
+        json.dump(request_json, request_file)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Prepare data for video multi object tracking model"
+    )
+    parser.add_argument(
+        "--video_link",
+        type=str,
+        help="sample demo video link",
+        default="https://github.com/open-mmlab/mmtracking/raw/master/demo/demo.mp4",
+    )
+
+    args, unknown = parser.parse_known_args()
+    args_dict = vars(args)
+
+    prepare_data_for_online_inference(sample_video_link=args_dict["video_link"])
diff --git a/cli/foundation-models/system/inference/video-multi-object-tracking/video-multi-object-tracking-online-endpoint.sh b/cli/foundation-models/system/inference/video-multi-object-tracking/video-multi-object-tracking-online-endpoint.sh
new file mode 100644
index 00000000000..1eed2b74c82
--- /dev/null
+++ b/cli/foundation-models/system/inference/video-multi-object-tracking/video-multi-object-tracking-online-endpoint.sh
@@ -0,0 +1,81 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-video-mutli-object-tracking-online-inference
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="bytetrack_yolox_x_crowdhuman_mot17-private-half"
+model_label="latest"
+
+version=$(date +%s)
+endpoint_name="video-mot-$version"
+
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_NC6s_V3"
+
+# Prepare data for deployment
+python ./prepare_data.py
+# sample_request_data
+
+sample_request_data="./sample_request_data.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv)
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yaml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name \
+  model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+sample_request_data="./sample_request_data.json"
+# Check if scoring data file exists
+if [ -f $sample_request_data ]; then
+    echo "Invoking endpoint $endpoint_name with $sample_request_data"
+
+else
+    echo "Scoring file $sample_request_data does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $sample_request_data $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint and sample_request_data.json
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+rm $sample_request_data
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py
new file mode 100644
index 00000000000..a0f2a7fbefa
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py
@@ -0,0 +1,187 @@
+import json
+import os
+import sys
+import argparse
+
+# Define Converters
+
+
+class CocoVidToJSONLinesConverter:
+    def convert(self):
+        raise NotImplementedError
+
+
+class BoundingBoxConverter(CocoVidToJSONLinesConverter):
+    """example output for object tracking jsonl:
+    {
+      "image_url":"azureml://subscriptions/<my-subscription-id>/resourcegroups/<my-resource-group>/workspaces/<my-workspace>/datastores/<my-datastore>/paths/<path_to_image>",
+      "image_details":{
+          "format":"image_format",
+          "width":"image_width",
+          "height":"image_height"
+      },
+      "video_details": {
+          "frame_id": "zero_based_frame_id(int)",
+          "video_name": "video_name",
+      },
+      "label":[
+          {
+            "label":"class_name_1",
+            "topX":"xmin/width",
+            "topY":"ymin/height",
+            "bottomX":"xmax/width",
+            "bottomY":"ymax/height",
+            "isCrowd":"isCrowd"
+            "instance_id": "instance_id"
+          },
+          {
+            "label":"class_name_2",
+            "topX":"xmin/width",
+            "topY":"ymin/height",
+            "bottomX":"xmax/width",
+            "bottomY":"ymax/height",
+            "instance_id": "instance_id"
+          },
+          "..."
+      ]
+    }
+    """
+
+    def __init__(self, coco_data):
+        self.json_lines_data = []
+        self.categories = {}
+        self.coco_data = coco_data
+        self.image_id_to_data_index = {}
+        self.video_id_to_name = {}
+        for i in range(0, len(coco_data["images"])):
+            self.json_lines_data.append({})
+            self.json_lines_data[i]["image_url"] = ""
+            self.json_lines_data[i]["image_details"] = {}
+            self.json_lines_data[i]["video_details"] = {}
+            self.json_lines_data[i]["label"] = []
+        for i in range(0, len(coco_data["categories"])):
+            self.categories[coco_data["categories"][i]["id"]] = coco_data["categories"][
+                i
+            ]["name"]
+        for i in range(0, len(coco_data["videos"])):
+            self.video_id_to_name[coco_data["videos"][i]["id"]] = coco_data["videos"][
+                i
+            ]["name"]
+
+    def _populate_image_url(self, index, coco_image):
+        self.json_lines_data[index]["image_url"] = coco_image["file_name"]
+        self.image_id_to_data_index[coco_image["id"]] = index
+
+    def _populate_image_details(self, index, coco_image):
+        file_name = coco_image["file_name"]
+        self.json_lines_data[index]["image_details"]["format"] = file_name[
+            file_name.rfind(".") + 1 :
+        ]
+        self.json_lines_data[index]["image_details"]["width"] = coco_image["width"]
+        self.json_lines_data[index]["image_details"]["height"] = coco_image["height"]
+
+    def _populate_video_details(self, index, coco_image):
+        self.json_lines_data[index]["video_details"]["frame_id"] = coco_image[
+            "frame_id"
+        ]
+        self.json_lines_data[index]["video_details"][
+            "video_name"
+        ] = self.video_id_to_name[coco_image["video_id"]]
+
+    def _populate_bbox_in_label(self, label, annotation, image_details):
+        # if bbox comes as normalized, skip normalization.
+        if max(annotation["bbox"]) < 1.5:
+            width = 1
+            height = 1
+        else:
+            width = image_details["width"]
+            height = image_details["height"]
+        label["topX"] = annotation["bbox"][0] / width
+        label["topY"] = annotation["bbox"][1] / height
+        label["bottomX"] = (annotation["bbox"][0] + annotation["bbox"][2]) / width
+        label["bottomY"] = (annotation["bbox"][1] + annotation["bbox"][3]) / height
+
+    def _populate_label(self, annotation):
+        index = self.image_id_to_data_index[annotation["image_id"]]
+        image_details = self.json_lines_data[index]["image_details"]
+        label = {"label": self.categories[annotation["category_id"]]}
+        self._populate_bbox_in_label(label, annotation, image_details)
+        self._populate_instanceId(label, annotation)
+        self._populate_isCrowd(label, annotation)
+        self._populate_visibility(label, annotation)
+        self.json_lines_data[index]["label"].append(label)
+
+    def _populate_instanceId(self, label, annotation):
+        label["instance_id"] = annotation["instance_id"]
+
+    def _populate_isCrowd(self, label, annotation):
+        if "iscrowd" in annotation.keys():
+            label["isCrowd"] = int(annotation["iscrowd"])
+
+    def _populate_visibility(self, label, annotation):
+        if "visibility" in annotation.keys():
+            label["visibility"] = annotation["visibility"]
+
+    def convert(self):
+        for i in range(0, len(self.coco_data["images"])):
+            self._populate_image_url(i, self.coco_data["images"][i])
+            self._populate_image_details(i, self.coco_data["images"][i])
+            self._populate_video_details(i, self.coco_data["images"][i])
+        if "annotations" not in self.coco_data:
+            self.coco_data["annotations"] = []
+        for i in range(0, len(self.coco_data["annotations"])):
+            self._populate_label(self.coco_data["annotations"][i])
+        return self.json_lines_data
+
+
+if __name__ == "__main__":
+    # Parse arguments that are passed into the script
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_cocovid_file_path", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, required=True)
+    parser.add_argument("--output_file_name", type=str, required=True)
+    parser.add_argument(
+        "--task_type",
+        type=str,
+        required=True,
+        choices=["ObjectTracking"],
+        default="ObjectTracking",
+    )
+    parser.add_argument("--base_url", type=str, default=None)
+
+    args = parser.parse_args()
+
+    input_coco_file_path = args.input_cocovid_file_path
+    output_dir = args.output_dir
+    output_file_path = output_dir + "/" + args.output_file_name
+    print(output_file_path)
+    task_type = args.task_type
+    base_url = args.base_url
+
+    def read_coco_file(coco_file):
+        with open(coco_file) as f_in:
+            return json.load(f_in)
+
+    def write_json_lines(converter, filename, base_url=None):
+        json_lines_data = converter.convert()
+        with open(filename, "w") as outfile:
+            for json_line in json_lines_data:
+                if base_url is not None:
+                    image_url = json_line["image_url"]
+                    json_line["image_url"] = os.path.join(base_url, image_url)
+                    json_line["image_url"] = json_line["image_url"].replace("\\", "/")
+                json.dump(json_line, outfile, separators=(",", ":"))
+                outfile.write("\n")
+            print(f"Conversion completed. Converted {len(json_lines_data)} lines.")
+
+    coco_data = read_coco_file(input_coco_file_path)
+
+    print(f"Converting for {task_type}")
+
+    if task_type == "ObjectTracking":
+        converter = BoundingBoxConverter(coco_data)
+        write_json_lines(converter, output_file_path, base_url)
+
+    else:
+        print("ERROR: Invalid Task Type")
+        pass
diff --git a/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-video-multi-object-tracking.ipynb b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-video-multi-object-tracking.ipynb
new file mode 100644
index 00000000000..cb8b47ebea0
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-video-multi-object-tracking.ipynb
@@ -0,0 +1,1181 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Video Multi-Object Tracking using MMTracking specific pipeline component\n",
+    "\n",
+    "This sample shows how to use `mmtracking_video_multi_object_tracking_pipeline` component from the `azureml` registry to fine tune a model for video multi-object tracking task using MOT17 tiny Dataset. We then deploy the fine tuned model to an online endpoint for real time inference.\n",
+    "\n",
+    "### Training data\n",
+    "We will use the [MOT17 tiny](https://download.openmmlab.com/mmtracking/data/MOT17_tiny.zip) dataset.\n",
+    "\n",
+    "### Model\n",
+    "We will use the `bytetrack-yolox-x-crowdhuman-mot17-private-half` model in this notebook. If you need to fine tune a model that is available on MmTracking model zoo, but not available in `azureml` system registry, you can either register the model and use the registered model or use the `model_name` parameter to instruct the components to pull the model directly from MMTracking model zoo.\n",
+    "\n",
+    "### Outline\n",
+    "1. Install dependencies\n",
+    "2. Setup pre-requisites such as compute\n",
+    "3. Pick a model to fine tune\n",
+    "4. Prepare dataset for finetuning the model\n",
+    "5. Submit the fine tuning job using MMTracking specific video-multi-object-tracking component\n",
+    "6. Review training and evaluation metrics\n",
+    "7. Register the fine tuned model\n",
+    "8. Deploy the fine tuned model for real time inference\n",
+    "9. Test deployed end point\n",
+    "10. Clean up resources"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Install dependencies\n",
+    "Before starting off, if you are running the notebook on Azure Machine Learning Studio or running first time locally, you will need the following packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install azure-ai-ml==1.8.0\n",
+    "! pip install azure-identity==1.13.0"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Setup pre-requisites"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 2.1 Connect to Azure Machine Learning workspace\n",
+    "\n",
+    "Before we dive in the code, you'll need to connect to your workspace. The workspace is the top-level resource for Azure Machine Learning, providing a centralized place to work with all the artifacts you create when you use Azure Machine Learning.\n",
+    "\n",
+    "We are using `DefaultAzureCredential` to get access to workspace. `DefaultAzureCredential` should be capable of handling most scenarios. If you want to learn more about other available credentials, go to [set up authentication doc](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk), [azure-identity reference doc](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity?view=azure-python).\n",
+    "\n",
+    "Replace `<AML_WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` with their respective values in the below cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential\n",
+    "\n",
+    "\n",
+    "experiment_name = (\n",
+    "    \"AzureML-Train-Finetune-Vision-MOT-Samples\"  # can rename to any valid name\n",
+    ")\n",
+    "\n",
+    "credential = DefaultAzureCredential()\n",
+    "workspace_ml_client = None\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "    workspace_name = workspace_ml_client.workspace_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace_name = \"<AML_WORKSPACE_NAME>\"\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, workspace_name\n",
+    ")\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id,\n",
+    "    resource_group,\n",
+    "    registry_name=\"azureml\",\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 2.2 Create compute\n",
+    "\n",
+    "In order to finetune a model on Azure Machine Learning studio, you will need to create a compute resource first. **Creating a compute will take 3-4 minutes.** \n",
+    "\n",
+    "For additional references, see [Azure Machine Learning in a Day](https://github.com/Azure/azureml-examples/blob/main/tutorials/azureml-in-a-day/azureml-in-a-day.ipynb). "
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Create CPU compute for model selection component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "from azure.core.exceptions import ResourceNotFoundError\n",
+    "\n",
+    "model_import_cluster_name = \"sample-model-import-cluster\"\n",
+    "try:\n",
+    "    _ = workspace_ml_client.compute.get(model_import_cluster_name)\n",
+    "    print(\"Found existing compute target.\")\n",
+    "except ResourceNotFoundError:\n",
+    "    print(\"Creating a new compute target...\")\n",
+    "    compute_config = AmlCompute(\n",
+    "        name=model_import_cluster_name,\n",
+    "        type=\"amlcompute\",\n",
+    "        size=\"Standard_D12_v2\",\n",
+    "        idle_time_before_scale_down=120,\n",
+    "        min_instances=0,\n",
+    "        max_instances=4,\n",
+    "    )\n",
+    "    workspace_ml_client.begin_create_or_update(compute_config).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Create GPU compute for finetune component\n",
+    "\n",
+    "The list of GPU machines can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "finetune_cluster_name = \"sample-finetune-cluster-gpu\"\n",
+    "\n",
+    "try:\n",
+    "    _ = workspace_ml_client.compute.get(finetune_cluster_name)\n",
+    "    print(\"Found existing compute target.\")\n",
+    "except ResourceNotFoundError:\n",
+    "    print(\"Creating a new compute target...\")\n",
+    "    compute_config = AmlCompute(\n",
+    "        name=finetune_cluster_name,\n",
+    "        type=\"amlcompute\",\n",
+    "        size=\"STANDARD_NC6s_v3\",\n",
+    "        idle_time_before_scale_down=120,\n",
+    "        min_instances=0,\n",
+    "        max_instances=4,\n",
+    "    )\n",
+    "    workspace_ml_client.begin_create_or_update(compute_config).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick a foundation model to fine tune\n",
+    "\n",
+    "We will use the `bytetrack-yolox-x-crowdhuman-mot17-private-half` model in this notebook. If you need to fine tune a model that is available on MMTracking model zoo, but not available in `azureml` registry, you can either register the model and use the registered model or use the `model_name` parameter to instruct the components to pull the model directly from MMTracking model zoo.\n",
+    "\n",
+    "Currently we support tracking-by-detection models, ByteTrack and OCSort, as follows:\n",
+    "\n",
+    "| Model Name | Source |\n",
+    "| :------------: | :-------:  |\n",
+    "| [bytetrack_yolox_x_crowdhuman-mot17_private-half](https://ml.azure.com/registries/azureml/models/bytetrack_yolox_x_crowdhuman_mot17-private-half/version/3) | azureml registry |\n",
+    "| [ocsort_yolox_x_crowdhuman_mot17-private-half](https://ml.azure.com/registries/azureml/models/ocsort_yolox_x_crowdhuman_mot17-private-half/version/3) | azureml registry |\n",
+    "| [Variants of bytetrack models from MMTracking](https://github.com/open-mmlab/mmtracking/tree/v0.14.0/configs/mot/bytetrack) | MMTracking |"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aml_registry_model_name = \"bytetrack_yolox_x_crowdhuman_mot17-private-half\"\n",
+    "foundation_model = registry_ml_client.models.get(\n",
+    "    name=aml_registry_model_name, label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "print(\n",
+    "    f\"\\n\\nUsing model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Prepare the dataset for fine-tuning the model\n",
+    "\n",
+    "We will use the  [MOT17 tiny](https://download.openmmlab.com/mmtracking/data/MOT17_tiny.zip) dataset, a subset of the [MOT17 Challenge](https://motchallenge.net/data/MOT17/). It consists of two video sequences of class {`pedestrian`}.\n",
+    "\n",
+    "\n",
+    "#### 4.1 Download the Data\n",
+    "We first download and unzip the data locally. By default, the data would be downloaded in `./data` folder in current directory. \n",
+    "If you prefer to download the data at a different location, update it in `dataset_parent_dir = ...` in the following cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import urllib\n",
+    "from zipfile import ZipFile\n",
+    "\n",
+    "# Change to a different location if you prefer\n",
+    "dataset_parent_dir = \"data\"\n",
+    "\n",
+    "# create data folder if it doesnt exist.\n",
+    "os.makedirs(dataset_parent_dir, exist_ok=True)\n",
+    "\n",
+    "# download data\n",
+    "download_url = \"https://download.openmmlab.com/mmtracking/data/MOT17_tiny.zip\"\n",
+    "\n",
+    "# Extract current dataset name from dataset url\n",
+    "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n",
+    "# Get dataset path for later use\n",
+    "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n",
+    "\n",
+    "# Get the data zip file path\n",
+    "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n",
+    "\n",
+    "# Download the dataset\n",
+    "urllib.request.urlretrieve(download_url, filename=data_file)\n",
+    "\n",
+    "# extract files\n",
+    "with ZipFile(data_file, \"r\") as zzip:\n",
+    "    print(\"extracting files...\")\n",
+    "    zzip.extractall(path=dataset_parent_dir)\n",
+    "    print(\"done\")\n",
+    "# delete zip file\n",
+    "os.remove(data_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install Pillow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "\n",
+    "sample_image = os.path.join(dataset_dir, \"train/MOT17-02-FRCNN/img1/000001.jpg\")\n",
+    "sample_image = Image.open(sample_image)\n",
+    "print(sample_image.size)\n",
+    "sample_image"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4.2 Upload the images to Datastore through an AML Data asset (URI Folder)\n",
+    "\n",
+    "In order to use the data for training in Azure ML, we upload it to our default Azure Blob Storage of our  Azure ML Workspace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Uploading image files by creating a 'data asset URI FOLDER':\n",
+    "\n",
+    "from azure.ai.ml.entities import Data\n",
+    "from azure.ai.ml import Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "my_data = Data(\n",
+    "    path=dataset_dir,\n",
+    "    type=AssetTypes.URI_FOLDER,\n",
+    "    description=f\"{dataset_name} dataset folder\",\n",
+    "    name=f\"{dataset_name}_sample_folder\",\n",
+    ")\n",
+    "\n",
+    "uri_folder_data_asset = workspace_ml_client.data.create_or_update(my_data)\n",
+    "# uri_folder_data_asset = workspace_ml_client.data.get(name=f\"{dataset_name}_sample_folder\", version=1)\n",
+    "\n",
+    "# or if the uri_folder was uploaded, we could get it with:\n",
+    "# uri_folder_data_asset = workspace_ml_client.data.get(name = f\"{dataset_name}_sample_folder\", version=1)\n",
+    "\n",
+    "print(uri_folder_data_asset)\n",
+    "print(\"\")\n",
+    "print(\"Path to folder in Blob Storage:\")\n",
+    "print(uri_folder_data_asset.path)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4.3 Convert the downloaded data to JSONL\n",
+    "\n",
+    "In this example, the MOT17_tiny dataset is stored in a directory. There are two different folders inside `train` image folder, each denoting a different video:\n",
+    "\n",
+    "- train/MOT17-02-FRCNN\n",
+    "- train/MOT17-04-FRCNN\n",
+    "\n",
+    "This is the most common data format for image object tracking. Inside each of video folder, video frames are sorted in sequence.\n",
+    "\n",
+    "Note that, in the above folders, videos are already parsed into image frames. If you have a video available at hand, you can install [`ffmpeg`](https://ffmpeg.org/download.html) , and run the following command:\n",
+    "\n",
+    "```\n",
+    "mkdir video_name\n",
+    "ffmpeg -i video_name.mp4 -vf fps=30 video_name/%6d.png\n",
+    "```\n",
+    "where `-i` denotes input, `-vf fps=30` is the most commonly used frame per second rate.\n",
+    "\n",
+    "\n",
+    "For documentation on preparing the datasets beyond this notebook, please refer to the [documentation on how to prepare datasets](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-prepare-datasets-for-automl-images).\n",
+    "\n",
+    "The following code block converts original dataset to [CocoVid format](https://github.com/open-mmlab/mmtracking/blob/master/tests/data/demo_cocovid_data/ann.json).  Most of the datasets are available in Coco-Vid format.\n",
+    "\n",
+    "AzureML pipelines accepts dataset in MLTable format. We will convert MOT17_tiny dataset to Coco-Vid format and then convert Coco-Vid format to MLTable."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install numpy\n",
+    "!python ./mot2coco.py -i {dataset_dir} -o {dataset_dir}/annotations --split-train --convert-det"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "\n",
+    "\n",
+    "# We'll copy each JSONL file within its related MLTable folder\n",
+    "training_mltable_path = os.path.join(dataset_dir, \"training-mltable-folder\")\n",
+    "validation_mltable_path = os.path.join(dataset_dir, \"validation-mltable-folder\")\n",
+    "testing_mltable_path = os.path.join(dataset_dir, \"testing-mltable-folder\")\n",
+    "\n",
+    "# First, let's create the folders if they don't exist\n",
+    "os.makedirs(training_mltable_path, exist_ok=True)\n",
+    "os.makedirs(validation_mltable_path, exist_ok=True)\n",
+    "os.makedirs(testing_mltable_path, exist_ok=True)\n",
+    "\n",
+    "train_annotations_file = os.path.join(training_mltable_path, \"train_annotations.jsonl\")\n",
+    "validation_annotations_file = os.path.join(\n",
+    "    validation_mltable_path, \"validation_annotations.jsonl\"\n",
+    ")\n",
+    "testing_annotations_file = os.path.join(\n",
+    "    testing_mltable_path, \"testing_annotations.jsonl\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4.4 Convert annotation file from COCO to JSONL\n",
+    "AzureML recommends MLTable format for dataset. In order to create MLTable we first need to convert it to JSONL format. The following script will create two `.jsonl` files (one for training and one for validation) in the corresponding MLTable folder.\n",
+    "\n",
+    "The next step is to convert CocoVid format to jsonl format, which is required by the following step of mltable creation. The jsonl schema is similar to [object detection schema](https://learn.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema?view=azureml-api-2#object-detection), with additional information of `video_details`, and `instance_id` to the label part.\n",
+    "\n",
+    "Note that, for test jsonl creation, we do not require `label` field.\n",
+    "\n",
+    "    {\n",
+    "      \"image_url\":\"azureml://subscriptions/<my-subscription-id>/resourcegroups/<my-resource-group>/workspaces/<my-workspace>/datastores/<my-datastore>/paths/<path_to_image>\",\n",
+    "      \"image_details\":{\n",
+    "          \"format\":\"image_format\",\n",
+    "          \"width\":\"image_width\",\n",
+    "          \"height\":\"image_height\"\n",
+    "      },\n",
+    "      \"video_details\": {\n",
+    "          \"frame_id\": \"zero_based_frame_id(int)\",\n",
+    "          \"video_name\": \"video_name\",\n",
+    "      },\n",
+    "      \"label\":[\n",
+    "          {\n",
+    "            \"label\":\"class_name_1\",\n",
+    "            \"topX\":\"xmin/width\",\n",
+    "            \"topY\":\"ymin/height\",\n",
+    "            \"bottomX\":\"xmax/width\",\n",
+    "            \"bottomY\":\"ymax/height\",\n",
+    "            \"isCrowd\":\"isCrowd\"\n",
+    "            \"instance_id\": \"instance_id\"\n",
+    "          },\n",
+    "          {\n",
+    "            \"label\":\"class_name_2\",\n",
+    "            \"topX\":\"xmin/width\",\n",
+    "            \"topY\":\"ymin/height\",\n",
+    "            \"bottomX\":\"xmax/width\",\n",
+    "            \"bottomY\":\"ymax/height\",\n",
+    "            \"instance_id\": \"instance_id\"\n",
+    "          },\n",
+    "          \"...\"\n",
+    "      ]\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python cocovid2jsonl.py \\\n",
+    "    --input_cocovid_file_path {dataset_dir}/annotations/half-train_cocoformat.json \\\n",
+    "    --output_dir {training_mltable_path} \\\n",
+    "    --output_file_name train_annotations.jsonl \\\n",
+    "    --task_type ObjectTracking \\\n",
+    "    --base_url {uri_folder_data_asset.path}train\n",
+    "!python cocovid2jsonl.py \\\n",
+    "    --input_cocovid_file_path {dataset_dir}/annotations/half-val_cocoformat.json \\\n",
+    "    --output_dir {validation_mltable_path} \\\n",
+    "    --output_file_name validation_annotations.jsonl \\\n",
+    "    --task_type ObjectTracking \\\n",
+    "    --base_url {uri_folder_data_asset.path}train"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4.5 Create MLTable data input\n",
+    "\n",
+    "Create MLTable data input using the jsonl files created above.\n",
+    "\n",
+    "For documentation on creating your own MLTable assets for jobs beyond this notebook, please refer to below resources\n",
+    "- [MLTable YAML Schema](https://learn.microsoft.com/en-us/azure/machine-learning/reference-yaml-mltable) - covers how to write MLTable YAML, which is required for each MLTable asset.\n",
+    "- [Create MLTable data asset](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-data-assets?tabs=Python-SDK#create-a-mltable-data-asset) - covers how to create MLTable data asset. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_ml_table_file(filename):\n",
+    "    \"\"\"Create ML Table definition\"\"\"\n",
+    "\n",
+    "    return (\n",
+    "        \"paths:\\n\"\n",
+    "        \"  - file: ./{0}\\n\"\n",
+    "        \"transformations:\\n\"\n",
+    "        \"  - read_json_lines:\\n\"\n",
+    "        \"        encoding: utf8\\n\"\n",
+    "        \"        invalid_lines: error\\n\"\n",
+    "        \"        include_path_column: false\\n\"\n",
+    "        \"  - convert_column_types:\\n\"\n",
+    "        \"      - columns: image_url\\n\"\n",
+    "        \"        column_type: stream_info\"\n",
+    "    ).format(filename)\n",
+    "\n",
+    "\n",
+    "def save_ml_table_file(output_path, mltable_file_contents):\n",
+    "    with open(os.path.join(output_path, \"MLTable\"), \"w\") as f:\n",
+    "        f.write(mltable_file_contents)\n",
+    "\n",
+    "\n",
+    "# Create and save train mltable\n",
+    "train_mltable_file_contents = create_ml_table_file(\n",
+    "    os.path.basename(train_annotations_file)\n",
+    ")\n",
+    "save_ml_table_file(training_mltable_path, train_mltable_file_contents)\n",
+    "\n",
+    "# Create and save validation mltable\n",
+    "validation_mltable_file_contents = create_ml_table_file(\n",
+    "    os.path.basename(validation_annotations_file)\n",
+    ")\n",
+    "save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)\n",
+    "\n",
+    "# Create and save testing mltable\n",
+    "testing_mltable_file_contents = create_ml_table_file(\n",
+    "    os.path.basename(testing_annotations_file)\n",
+    ")\n",
+    "save_ml_table_file(testing_mltable_path, testing_mltable_file_contents)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Submit the fine tuning job using `mmtracking_video_multi_object_tracking_pipeline` component\n",
+    " \n",
+    "Create the job that uses the `mmtracking_video_multi_object_tracking_pipeline` component for `video-multi-object-tracking` tasks. Learn more in 5.2 about all the parameters supported for fine tuning."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.1 Receive component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "FINETUNE_PIPELINE_COMPONENT_NAME = \"mmtracking_video_multi_object_tracking_pipeline\"\n",
+    "pipeline_component_mmtracking_func = registry_ml_client.components.get(\n",
+    "    name=FINETUNE_PIPELINE_COMPONENT_NAME, label=\"latest\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.2 Create arguments to be passed to `mmtracking_video_multi_object_tracking_pipeline` component\n",
+    "\n",
+    "The `mmtracking_video_multi_object_tracking_pipeline` component consists of model selection and finetuning components. The detailed arguments for each component can be found at following README files:\n",
+    "- [Model Import Component](../../docs/component_docs/image_finetune/mmd_model_import_component.md)\n",
+    "- [Finetune Component](../../docs/component_docs/image_finetune/mmd_finetune_component.md)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mmtracking_model_name = aml_registry_model_name\n",
+    "pipeline_component_args = {\n",
+    "    # # Model import args\n",
+    "    \"model_family\": \"MmTrackingVideo\",\n",
+    "    \"mlflow_model\": foundation_model.id,  # foundation_model.id is provided, only foundation_model gives UserErrorException: only path input is supported now but get: ...\n",
+    "    # \"model_name\": mmtracking_model_name, # specify the model_name instead of mlflow_model if you want to use a model from the mmtracking model zoo\n",
+    "    # Finetune args\n",
+    "    \"task_name\": \"video-multi-object-tracking\",\n",
+    "    \"number_of_workers\": 8,\n",
+    "    \"image_width\": sample_image.size[0],\n",
+    "    \"image_height\": sample_image.size[1],\n",
+    "    \"number_of_epochs\": 5,\n",
+    "    # \"learning_rate\": 0.0001,\n",
+    "    # \"metric_for_best_model\": \"MOTA\",\n",
+    "    # \"extra_optim_args\": \"\",\n",
+    "    # \"evaluation_strategy\": \"epoch\",\n",
+    "    # \"evaluation_steps\": 500,\n",
+    "    # \"logging_strategy\": \"epoch\",\n",
+    "    # \"logging_steps\": 500,\n",
+    "    # \"save_strategy\": \"epoch\",\n",
+    "    # \"save_steps\": 500,\n",
+    "    # \"save_total_limit\": -1,\n",
+    "    # \"early_stopping\": False,\n",
+    "    # \"early_stopping_patience\": 1,\n",
+    "    # \"resume_from_checkpoint\": False,\n",
+    "    # \"save_as_mlflow_model\": True,\n",
+    "    # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.\n",
+    "    # \"max_steps\": -1,\n",
+    "    # \"training_batch_size\": 4, # note that: validation_batch_size is not supported as for mot task, it only allows training_batch_size = 1 to remain the sequence order\n",
+    "    # \"learning_rate_scheduler\": \"warmup_cosine\",\n",
+    "    # \"warmup_steps\": 0,\n",
+    "    # \"optimizer\": \"sgd\",\n",
+    "    # \"weight_decay\": 0.0,\n",
+    "    # \"gradient_accumulation_step\": 1,\n",
+    "    # \"max_grad_norm\": 1.0,\n",
+    "    # \"iou_threshold\": 0.5,\n",
+    "    # \"box_score_threshold\": 0.3,\n",
+    "    # \"precision\": \"32\",\n",
+    "    # \"random_seed\": 42,\n",
+    "    # The following parameters map to the dataset fields\n",
+    "    # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.\n",
+    "}\n",
+    "\n",
+    "# Ensure that the user provides only one of mlflow_model or model_name\n",
+    "if (\n",
+    "    pipeline_component_args.get(\"mlflow_model\") is None\n",
+    "    and pipeline_component_args.get(\"model_name\") is None\n",
+    "):\n",
+    "    raise ValueError(\n",
+    "        \"You must specify either mlflow_model or model_name for the model to finetune\"\n",
+    "    )\n",
+    "if (\n",
+    "    pipeline_component_args.get(\"mlflow_model\") is not None\n",
+    "    and pipeline_component_args.get(\"model_name\") is not None\n",
+    "):\n",
+    "    raise ValueError(\n",
+    "        \"You must specify ONLY one of mlflow_model and model_name for the model to finetune\"\n",
+    "    )\n",
+    "elif (\n",
+    "    pipeline_component_args.get(\"mlflow_model\") is None\n",
+    "    and pipeline_component_args.get(\"model_name\") is not None\n",
+    "):\n",
+    "    use_model_name = mmtracking_model_name\n",
+    "elif (\n",
+    "    pipeline_component_args.get(\"mlflow_model\") is not None\n",
+    "    and pipeline_component_args.get(\"model_name\") is None\n",
+    "):\n",
+    "    use_model_name = aml_registry_model_name\n",
+    "print(f\"Finetuning model {use_model_name}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.3 Utility function to create pipeline using `mmtracking_video_multi_object_tracking_pipeline` component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml.entities import PipelineComponent\n",
+    "from azure.ai.ml import Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "\n",
+    "@pipeline()\n",
+    "def create_pipeline_mmtracking():\n",
+    "    \"\"\"Create pipeline.\"\"\"\n",
+    "\n",
+    "    mmtracking_pipeline_component: PipelineComponent = (\n",
+    "        pipeline_component_mmtracking_func(\n",
+    "            compute_model_import=model_import_cluster_name,\n",
+    "            compute_finetune=finetune_cluster_name,\n",
+    "            training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),\n",
+    "            validation_data=Input(\n",
+    "                type=AssetTypes.MLTABLE, path=validation_mltable_path\n",
+    "            ),\n",
+    "            **pipeline_component_args,\n",
+    "        )\n",
+    "    )\n",
+    "    return {\n",
+    "        # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.\n",
+    "        \"trained_model\": mmtracking_pipeline_component.outputs.mlflow_model_folder,\n",
+    "    }"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.4 Run the fine tuning job using `mmtracking_video_multi_object_tracking_pipeline` component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mmtracking_pipeline_object = create_pipeline_mmtracking()\n",
+    "\n",
+    "mmtracking_pipeline_object.display_name = (\n",
+    "    use_model_name + \"_mmtracking_pipeline_component_run_\" + \"mot\"\n",
+    ")\n",
+    "# Don't use cached results from previous jobs\n",
+    "mmtracking_pipeline_object.settings.force_rerun = True\n",
+    "\n",
+    "print(\"Submitting pipeline\")\n",
+    "\n",
+    "mmtracking_pipeline_run = workspace_ml_client.jobs.create_or_update(\n",
+    "    mmtracking_pipeline_object, experiment_name=experiment_name\n",
+    ")\n",
+    "\n",
+    "print(f\"Pipeline created. URL: {mmtracking_pipeline_run.studio_url}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.jobs.stream(mmtracking_pipeline_run.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Get metrics from finetune component\n",
+    "\n",
+    "The model training happens as part of the finetune component. Please follow below steps to extract validation metrics from the run."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### 6.1 Initialize MLFlow Client\n",
+    "\n",
+    "The models and artifacts can be accessed via the MLFlow interface.\n",
+    "Initialize the MLFlow client here, and set the backend as Azure ML, via. the MLFlow Client.\n",
+    "\n",
+    "IMPORTANT - You need to have installed the latest MLFlow packages with:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install azureml-mlflow\n",
+    "!pip install mlflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow\n",
+    "\n",
+    "# Obtain the tracking URL from MLClient\n",
+    "MLFLOW_TRACKING_URI = workspace_ml_client.workspaces.get(\n",
+    "    name=workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "\n",
+    "print(MLFLOW_TRACKING_URI)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set the MLFLOW TRACKING URI\n",
+    "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
+    "print(f\"\\nCurrent tracking uri: {mlflow.get_tracking_uri()}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mlflow.tracking.client import MlflowClient\n",
+    "\n",
+    "# Initialize MLFlow client\n",
+    "mlflow_client = MlflowClient()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 6.2 Get the training run\n",
+    "\n",
+    "Fetch the training run ids from the above pipeline run. We will later use these run ids to fetch the metrics. We will use the training run id to register the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "filter = \"tags.mlflow.rootRunId='\" + mmtracking_pipeline_run.name + \"'\"\n",
+    "runs = mlflow.search_runs(\n",
+    "    experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    ")\n",
+    "\n",
+    "# Get the training runs.\n",
+    "# Using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "for run in runs:\n",
+    "    # Check if run.data.metrics.epoch exists\n",
+    "    if \"epoch\" in run.data.metrics:\n",
+    "        training_run = run\n",
+    "    # Else, check if run.data.metrics.MOTA exists\n",
+    "    elif \"MOTA\" in run.data.metrics:\n",
+    "        evaluation_run = run"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 6.3 Get training metrics\n",
+    "\n",
+    "Access the results (such as Models, Artifacts, Metrics) of a previously completed run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "pd.DataFrame(training_run.data.metrics, index=[0]).T"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Register the fine tuned model with the workspace\n",
+    "\n",
+    "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "# Generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import Model\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# Check if the `trained_model` output is available\n",
+    "print(\n",
+    "    f\"Pipeline job outputs: {workspace_ml_client.jobs.get(mmtracking_pipeline_run.name).outputs}\"\n",
+    ")\n",
+    "\n",
+    "# Fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n",
+    "model_path_from_job = (\n",
+    "    f\"azureml://jobs/{mmtracking_pipeline_run.name}/outputs/trained_model\"\n",
+    ")\n",
+    "print(f\"Path to register model: {model_path_from_job}\")\n",
+    "\n",
+    "finetuned_model_name = f\"{use_model_name.replace('/', '-')}-mot17-tiny\"\n",
+    "finetuned_model_description = f\"{use_model_name.replace('/', '-')} fine tuned model for mot17 tiny video-multi-object-tracking\"\n",
+    "prepare_to_register_model = Model(\n",
+    "    path=model_path_from_job,\n",
+    "    type=AssetTypes.MLFLOW_MODEL,\n",
+    "    name=finetuned_model_name,\n",
+    "    version=timestamp,  # Use timestamp as version to avoid version conflict\n",
+    "    description=finetuned_model_description,\n",
+    ")\n",
+    "print(f\"Prepare to register model: \\n{prepare_to_register_model}\")\n",
+    "\n",
+    "# Register the model from pipeline job output\n",
+    "registered_model = workspace_ml_client.models.create_or_update(\n",
+    "    prepare_to_register_model\n",
+    ")\n",
+    "print(f\"Registered model: {registered_model}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 8. Deploy the fine tuned model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n",
+    "\n",
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "online_endpoint_name = \"mmt-mot17tiny-\" + datetime.datetime.now().strftime(\"%m%d%H%M\")\n",
+    "online_endpoint_description = f\"Online endpoint for {registered_model.name}, fine tuned model for mot17 tiny video-multi-object-tracking\"\n",
+    "# Create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=online_endpoint_description,\n",
+    "    auth_mode=\"key\",\n",
+    "    tags={\"foo\": \"bar\"},\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import OnlineRequestSettings, ProbeSettings\n",
+    "\n",
+    "deployment_name = \"mmt-mot17tiny-mlflow-deploy\"\n",
+    "print(registered_model.id)\n",
+    "print(online_endpoint_name)\n",
+    "print(deployment_name)\n",
+    "\n",
+    "# Create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=registered_model.id,\n",
+    "    instance_type=\"Standard_NC6s_V3\",\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        max_concurrent_requests_per_instance=1,\n",
+    "        request_timeout_ms=90000,\n",
+    "        max_queue_wait_ms=500,\n",
+    "    ),\n",
+    "    liveness_probe=ProbeSettings(\n",
+    "        failure_threshold=49,\n",
+    "        success_threshold=1,\n",
+    "        timeout=299,\n",
+    "        period=200,\n",
+    "        initial_delay=180,\n",
+    "    ),\n",
+    "    readiness_probe=ProbeSettings(\n",
+    "        failure_threshold=10,\n",
+    "        success_threshold=1,\n",
+    "        timeout=10,\n",
+    "        period=10,\n",
+    "        initial_delay=10,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {deployment_name: 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 9. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then display the scored labels alongside the ground truth labels."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "demo_deployment = workspace_ml_client.online_deployments.get(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    ")\n",
+    "\n",
+    "# Get the details for online endpoint\n",
+    "endpoint = workspace_ml_client.online_endpoints.get(name=online_endpoint_name)\n",
+    "\n",
+    "# existing traffic details\n",
+    "print(endpoint.traffic)\n",
+    "# Get the scoring URI\n",
+    "print(endpoint.scoring_uri)\n",
+    "print(demo_deployment)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create request json\n",
+    "import base64\n",
+    "import json\n",
+    "\n",
+    "sample_video_link = \"https://github.com/open-mmlab/mmtracking/raw/master/demo/demo.mp4\"\n",
+    "request_json = {\"input_data\": {\"columns\": [\"video\"], \"data\": [sample_video_link]}}\n",
+    "request_file_name = \"sample_request_data.json\"\n",
+    "with open(request_file_name, \"w\") as request_file:\n",
+    "    json.dump(request_json, request_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "resp = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=demo_deployment.name,\n",
+    "    request_file=request_file_name,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "resp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Visualize tracking\n",
+    "Now we can visualize the tracking in the video:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install opencv-python-headless\n",
+    "!pip install mmcv-full==1.7.1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cv2\n",
+    "import mmcv\n",
+    "from time import sleep\n",
+    "from PIL import Image\n",
+    "from IPython.display import display, clear_output\n",
+    "\n",
+    "img_frames = mmcv.VideoReader(sample_video_link)\n",
+    "predictions = json.loads(resp)\n",
+    "assert len(img_frames) == len(predictions)\n",
+    "\n",
+    "\n",
+    "def draw_bbox_on_image(img, track_bbox):\n",
+    "    x0, y0, x1, y1 = (\n",
+    "        track_bbox[\"topX\"],\n",
+    "        track_bbox[\"topY\"],\n",
+    "        track_bbox[\"bottomX\"],\n",
+    "        track_bbox[\"bottomY\"],\n",
+    "    )\n",
+    "    x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)\n",
+    "    instance_id = track_bbox[\"instance_id\"]\n",
+    "    text = f\"ID: {instance_id}\"\n",
+    "    cv2.putText(img, text, (x0, y0), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 1)\n",
+    "    cv2.rectangle(img, (x0, y0), (x1, y1), color=(0, 0, 0), thickness=2)\n",
+    "\n",
+    "\n",
+    "visualized_results = []\n",
+    "for img, prediction in zip(img_frames, predictions):\n",
+    "    track_bboxes = prediction[\"track_bboxes\"]\n",
+    "    for track_bbox in track_bboxes:\n",
+    "        draw_bbox_on_image(img, track_bbox[\"box\"])\n",
+    "    visualized_results.append(img)\n",
+    "\n",
+    "fps = 10  # frames per second, for most videos fps=30, pls change it according to your video\n",
+    "for img_array in visualized_results:\n",
+    "    display(Image.fromarray(img_array))\n",
+    "    sleep(1.0 / fps)\n",
+    "    clear_output(wait=True)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 10. Clean up resources - delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py38",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mot2coco.py b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mot2coco.py
new file mode 100644
index 00000000000..72897c1e490
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mot2coco.py
@@ -0,0 +1,245 @@
+# This script converts MOT labels into COCO style.
+# Official website of the MOT dataset: https://motchallenge.net/
+#
+# Label format of MOT dataset:
+#   GTs:
+#       <frame_id> # starts from 1 but COCO style starts from 0,
+#       <instance_id>, <x1>, <y1>, <w>, <h>,
+#       <conf> # conf is annotated as 0 if the object is ignored,
+#       <class_id>, <visibility>
+#
+#   DETs and Results:
+#       <frame_id>, <instance_id>, <x1>, <y1>, <w>, <h>, <conf>,
+#       <x>, <y>, <z> # for 3D objects
+#
+# Classes in MOT:
+#   1: 'pedestrian'
+#   2: 'person on vehicle'
+#   3: 'car'
+#   4: 'bicycle'
+#   5: 'motorbike'
+#   6: 'non motorized vehicle'
+#   7: 'static person'
+#   8: 'distractor'
+#   9: 'occluder'
+#   10: 'occluder on the ground',
+#   11: 'occluder full'
+#   12: 'reflection'
+#
+#   USELESS classes are not included into the json file.
+#   IGNORES classes are included with `ignore=True`.
+import argparse
+from collections import defaultdict
+import json
+import os
+import os.path as osp
+import pickle
+
+import numpy as np
+from tqdm import tqdm
+
+USELESS = [3, 4, 5, 6, 9, 10, 11]
+IGNORES = [2, 7, 8, 12, 13]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Convert MOT label and detections to COCO-VID format."
+    )
+    parser.add_argument("-i", "--input", help="path of MOT data")
+    parser.add_argument("-o", "--output", help="path to save coco formatted label file")
+    parser.add_argument(
+        "--convert-det", action="store_true", help="convert official detection results."
+    )
+    parser.add_argument(
+        "--split-train",
+        action="store_true",
+        help="split the train set into half-train and half-validate.",
+    )
+    return parser.parse_args()
+
+
+def parse_gts(gts, is_mot15):
+    outputs = defaultdict(list)
+    for gt in gts:
+        gt = gt.strip().split(",")
+        frame_id, ins_id = map(int, gt[:2])
+        bbox = list(map(float, gt[2:6]))
+        if is_mot15:
+            conf = 1.0
+            class_id = 1
+            visibility = 1.0
+        else:
+            conf = float(gt[6])
+            class_id = int(gt[7])
+            visibility = float(gt[8])
+        if class_id in USELESS:
+            continue
+        elif class_id in IGNORES:
+            continue
+        anns = dict(
+            category_id=1,
+            bbox=bbox,
+            area=bbox[2] * bbox[3],
+            iscrowd=False,
+            visibility=visibility,
+            mot_instance_id=ins_id,
+            mot_conf=conf,
+            mot_class_id=class_id,
+        )
+        outputs[frame_id].append(anns)
+    return outputs
+
+
+def parse_dets(dets):
+    outputs = defaultdict(list)
+    for det in dets:
+        det = det.strip().split(",")
+        frame_id, ins_id = map(int, det[:2])
+        assert ins_id == -1
+        bbox = list(map(float, det[2:7]))
+        # [x1, y1, x2, y2] to be consistent with mmdet
+        bbox = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3], bbox[4]]
+        outputs[frame_id].append(bbox)
+
+    return outputs
+
+
+def list_from_file(file_name):
+    with open(file_name) as f:
+        lines = [line.rstrip() for line in f]
+
+    return lines
+
+
+def dump_to_json(json_object, json_file_name):
+    with open(json_file_name, "w") as outfile:
+        json_str = json.dumps(json_object)
+        outfile.write(json_str)
+
+
+def dump_to_pickle(pickle_object, pickle_file_name):
+    with open(pickle_file_name, "wb") as outfile:
+        pickle.dump(pickle_object, outfile)
+
+
+def main():
+    args = parse_args()
+    if not osp.isdir(args.output):
+        os.makedirs(args.output)
+
+    sets = ["train", "test"]
+    if args.split_train:
+        sets += ["half-train", "half-val"]
+    vid_id, img_id, ann_id = 1, 1, 1
+
+    for subset in sets:
+        ins_id = 0
+        print(f"Converting {subset} set to COCO format")
+        if "half" in subset:
+            in_folder = osp.join(args.input, "train")
+        else:
+            in_folder = osp.join(args.input, subset)
+        out_file = osp.join(args.output, f"{subset}_cocoformat.json")
+        outputs = defaultdict(list)
+        outputs["categories"] = [dict(id=1, name="pedestrian")]
+        if args.convert_det:
+            det_file = osp.join(args.output, f"{subset}_detections.pkl")
+            detections = dict(det_bboxes=dict())
+        video_names = os.listdir(in_folder)
+        for video_name in tqdm(video_names):
+            # basic params
+            parse_gt = "test" not in subset
+            ins_maps = dict()
+            # load video infos
+            video_folder = osp.join(in_folder, video_name)
+            infos = list_from_file(f"{video_folder}/seqinfo.ini")
+            # video-level infos
+            assert video_name == infos[1].strip().split("=")[1]
+            img_folder = infos[2].strip().split("=")[1]
+            img_names = [
+                fname
+                for fname in os.listdir(f"{video_folder}/{img_folder}")
+                if fname.endswith(".jpg") or fname.endswith(".png")
+            ]
+            img_names = sorted(img_names)
+            fps = int(infos[3].strip().split("=")[1])
+            num_imgs = int(infos[4].strip().split("=")[1])
+            assert num_imgs == len(img_names)
+            width = int(infos[5].strip().split("=")[1])
+            height = int(infos[6].strip().split("=")[1])
+            video = dict(
+                id=vid_id, name=video_name, fps=fps, width=width, height=height
+            )
+            # parse annotations
+            if parse_gt:
+                gts = list_from_file(f"{video_folder}/gt/gt.txt")
+                if "MOT15" in video_folder:
+                    img2gts = parse_gts(gts, True)
+                else:
+                    img2gts = parse_gts(gts, False)
+            if args.convert_det:
+                dets = list_from_file(f"{video_folder}/det/det.txt")
+                img2dets = parse_dets(dets)
+            # make half sets
+            if "half" in subset:
+                split_frame = num_imgs // 2 + 1
+                if "train" in subset:
+                    img_names = img_names[:split_frame]
+                elif "val" in subset:
+                    img_names = img_names[split_frame:]
+                else:
+                    raise ValueError("subset must be named with `train` or `val`")
+                mot_frame_ids = [str(int(_.split(".")[0])) for _ in img_names]
+                with open(f"{video_folder}/gt/gt_{subset}.txt", "wt") as f:
+                    for gt in gts:
+                        if gt.split(",")[0] in mot_frame_ids:
+                            f.writelines(f"{gt}\n")
+            # image and box level infos
+            for frame_id, name in enumerate(img_names):
+                img_name = osp.join(video_name, img_folder, name)
+                mot_frame_id = int(name.split(".")[0])
+                image = dict(
+                    id=img_id,
+                    video_id=vid_id,
+                    file_name=img_name,
+                    height=height,
+                    width=width,
+                    frame_id=frame_id,
+                    mot_frame_id=mot_frame_id,
+                )
+                if parse_gt:
+                    gts = img2gts[mot_frame_id]
+                    for gt in gts:
+                        gt.update(id=ann_id, image_id=img_id)
+                        mot_ins_id = gt["mot_instance_id"]
+                        if mot_ins_id in ins_maps:
+                            gt["instance_id"] = ins_maps[mot_ins_id]
+                        else:
+                            gt["instance_id"] = ins_id
+                            ins_maps[mot_ins_id] = ins_id
+                            ins_id += 1
+                        outputs["annotations"].append(gt)
+                        ann_id += 1
+                if args.convert_det:
+                    dets = np.array(img2dets[mot_frame_id])
+                    if dets.ndim == 1:
+                        assert len(dets) == 0
+                        dets = np.zeros((0, 5))
+                    detections["det_bboxes"][img_name] = [dets]
+                outputs["images"].append(image)
+                img_id += 1
+            outputs["videos"].append(video)
+            vid_id += 1
+            outputs["num_instances"] = ins_id
+        print(f"{subset} has {ins_id} instances.")
+        dump_to_json(outputs, out_file)
+        if args.convert_det:
+            dump_to_pickle(detections, det_file)
+            print(f"Done! Saved as {out_file} and {det_file}")
+        else:
+            print(f"Done! Saved as {out_file}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/sample_request_data.json b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/sample_request_data.json
new file mode 100644
index 00000000000..9eaa17ee063
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/sample_request_data.json
@@ -0,0 +1 @@
+{"input_data": {"columns": ["video"], "data": ["https://github.com/open-mmlab/mmtracking/raw/master/demo/demo.mp4"]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/video-multi-object-tracking/sample_request_data.json b/sdk/python/foundation-models/system/inference/video-multi-object-tracking/sample_request_data.json
new file mode 100644
index 00000000000..9eaa17ee063
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/video-multi-object-tracking/sample_request_data.json
@@ -0,0 +1 @@
+{"input_data": {"columns": ["video"], "data": ["https://github.com/open-mmlab/mmtracking/raw/master/demo/demo.mp4"]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/video-multi-object-tracking/video-multi-object-tracking-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/video-multi-object-tracking/video-multi-object-tracking-online-endpoint.ipynb
new file mode 100644
index 00000000000..961e8efa7c9
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/video-multi-object-tracking/video-multi-object-tracking-online-endpoint.ipynb
@@ -0,0 +1,396 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Video Multi-Object Tracking Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how deploy `video-multi-object-tracking` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`video-multi-object-tracking` task monitors multiple objects as they move. The goal is to identify and locate objects of interest in each frame and then associate them across frames to keep track of their movements over time. The output will be assigned boxes with their top-left and bottom-right coordinates along with instance id, box label and confidence score to video frames.\n",
+    " \n",
+    "### Model\n",
+    "Models that can perform the `video-multi-object-tracking` task are tagged with `video-multi-object-tracking`. We will use the `bytetrack_yolox_x_crowdhuman_mot17-private-half` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name.\n",
+    "\n",
+    "### Inference data\n",
+    "We will use an [online video link](https://github.com/open-mmlab/mmtracking/blob/master/demo/demo.mp4)  for multi-object tracking.\n",
+    "\n",
+    "\n",
+    "### Outline\n",
+    "1. Setup pre-requisites\n",
+    "2. Pick a model to deploy\n",
+    "3. Prepare data for inference\n",
+    "4. Deploy the model to an online endpoint for real time inference\n",
+    "5. Test the endpoint\n",
+    "6. Visualize output\n",
+    "7. Clean up resources - delete the online endpoint"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install azure-ai-ml==1.8.0\n",
+    "! pip install azure-identity==1.13.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    ")\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "    workspace_name = workspace_ml_client.workspace_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace_name = \"<AML_WORKSPACE_NAME>\"\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, workspace_name\n",
+    ")\n",
+    "\n",
+    "# The models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id,\n",
+    "    resource_group,\n",
+    "    registry_name=\"azureml\",\n",
+    ")\n",
+    "# Generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `video-multi-object-tracking` task. In this example, we use the `bytetrack_yolox_x_crowdhuman_mot17-private-half` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to finetune this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [video multi-object tracking finetuning notebook](../../finetune/video-multi-object-tracking/mmtracking-video-multi-object-tracking.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"bytetrack_yolox_x_crowdhuman_mot17-private-half\"\n",
+    "foundation_model = registry_ml_client.models.get(name=model_name, label=\"latest\")\n",
+    "print(\n",
+    "    f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Deploy the model to an online endpoint for real time inference\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n",
+    "\n",
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"video-mot-\" + str(timestamp)\n",
+    "# Create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for video-multi-object-tracking task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import OnlineRequestSettings, ProbeSettings\n",
+    "\n",
+    "deployment_name = \"video-mot-mlflow-deploy\"\n",
+    "\n",
+    "print(foundation_model.id)\n",
+    "print(online_endpoint_name)\n",
+    "print(deployment_name)\n",
+    "\n",
+    "# Create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_NC6s_v3\",  # Use GPU instance type only for MOT\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        max_concurrent_requests_per_instance=1,\n",
+    "        request_timeout_ms=90000,\n",
+    "        max_queue_wait_ms=500,\n",
+    "    ),\n",
+    "    liveness_probe=ProbeSettings(\n",
+    "        failure_threshold=49,\n",
+    "        success_threshold=1,\n",
+    "        timeout=299,\n",
+    "        period=180,\n",
+    "        initial_delay=180,\n",
+    "    ),\n",
+    "    readiness_probe=ProbeSettings(\n",
+    "        failure_threshold=10,\n",
+    "        success_threshold=1,\n",
+    "        timeout=10,\n",
+    "        period=10,\n",
+    "        initial_delay=10,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {deployment_name: 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Test the endpoint\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "demo_deployment = workspace_ml_client.online_deployments.get(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    ")\n",
+    "\n",
+    "# Get the details for online endpoint\n",
+    "endpoint = workspace_ml_client.online_endpoints.get(name=online_endpoint_name)\n",
+    "\n",
+    "# Existing traffic details\n",
+    "print(endpoint.traffic)\n",
+    "\n",
+    "# Get the scoring URI\n",
+    "print(endpoint.scoring_uri)\n",
+    "print(demo_deployment)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "We will use an [online video link](https://github.com/open-mmlab/mmtracking/blob/master/demo/demo.mp4)  for multi-object tracking."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create request json\n",
+    "import json\n",
+    "\n",
+    "sample_video_link = \"https://github.com/open-mmlab/mmtracking/raw/master/demo/demo.mp4\"\n",
+    "\n",
+    "request_json = {\"input_data\": {\"columns\": [\"video\"], \"data\": [sample_video_link]}}\n",
+    "request_file_name = \"sample_request_data.json\"\n",
+    "\n",
+    "with open(request_file_name, \"w\") as request_file:\n",
+    "    json.dump(request_json, request_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=demo_deployment.name,\n",
+    "    request_file=request_file_name,\n",
+    ")\n",
+    "print(f\"raw response: {response}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Visualize the Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install opencv-python-headless\n",
+    "!pip install mmcv-full==1.7.1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cv2\n",
+    "import mmcv\n",
+    "import json\n",
+    "\n",
+    "img_frames = mmcv.VideoReader(sample_video_link)\n",
+    "predictions = json.loads(response)\n",
+    "assert len(img_frames) == len(predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def draw_bbox_on_image(img, track_bbox):\n",
+    "    x0, y0, x1, y1 = (\n",
+    "        track_bbox[\"topX\"],\n",
+    "        track_bbox[\"topY\"],\n",
+    "        track_bbox[\"bottomX\"],\n",
+    "        track_bbox[\"bottomY\"],\n",
+    "    )\n",
+    "    x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)\n",
+    "    instance_id = track_bbox[\"instance_id\"]\n",
+    "    text = f\"ID: {instance_id}\"\n",
+    "    cv2.putText(img, text, (x0, y0), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1)\n",
+    "    cv2.rectangle(img, (x0, y0), (x1, y1), color=(0, 0, 0), thickness=2)\n",
+    "\n",
+    "\n",
+    "visualized_results = []\n",
+    "for img, prediction in zip(img_frames, predictions):\n",
+    "    track_bboxes = prediction[\"track_bboxes\"]\n",
+    "    for track_bbox in track_bboxes:\n",
+    "        draw_bbox_on_image(img, track_bbox[\"box\"])\n",
+    "    visualized_results.append(img)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "from IPython.display import display, clear_output\n",
+    "from time import sleep\n",
+    "\n",
+    "fps = 10  # frames per second, for most videos fps=30, pls change it according to your video\n",
+    "for img_array in visualized_results:\n",
+    "    display(Image.fromarray(img_array[:, :, ::-1]))\n",
+    "    sleep(1.0 / fps)\n",
+    "    clear_output(wait=True)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Clean up resources - delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py38",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}