Yiyoulin/mot examples (#2746)

* mot inference notebook * finetune notebook completed * clean up finetune notebook * clean up for online inference notebook * shorten visualization sleep time * removed unused inference compute * update the mot2coco script * add mmtracking_model_name * edit the bytetrack link to specific tag * delete validation_batch_size * wording updates * placeholder for mmtracking cli, scripts not changed yet * add image scale to notebook * online cli * finetune cli - pending verification * finetune data preparation scripts working fine locally * finetune submission * finetune submission scripts * update registry and component name * update registry and model name in online endpoint notebook * update for cli model, registry and component names * comments resolve * update the model name for scripts * resolve comments * reformat code files * resolve comments
Azure · Nov 20, 2023 · 3ac70fb · 3ac70fb
1 parent 64cbae7
commit 3ac70fb
Show file tree

Hide file tree

Showing 16 changed files with 3,041 additions and 0 deletions.
diff --git a/cli/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py b/cli/foundation-models/system/finetune/video-multi-object-tracking/cocovid2jsonl.py
@@ -0,0 +1,190 @@
+import json
+import os
+import sys
+import argparse
+
+# Define Converters
+
+
+class CocoVidToJSONLinesConverter:
+    def convert(self):
+        raise NotImplementedError
+
+
+class BoundingBoxConverter(CocoVidToJSONLinesConverter):
+    """example output for object tracking jsonl:
+    {
+      "image_url":"azureml://subscriptions/<my-subscription-id>/resourcegroups/<my-resource-group>/workspaces/<my-workspace>/datastores/<my-datastore>/paths/<path_to_image>",
+      "image_details":{
+          "format":"image_format",
+          "width":"image_width",
+          "height":"image_height"
+      },
+      "video_details": {
+          "frame_id": "zero_based_frame_id(int)",
+          "video_name": "video_name",
+      },
+      "label":[
+          {
+            "label":"class_name_1",
+            "topX":"xmin/width",
+            "topY":"ymin/height",
+            "bottomX":"xmax/width",
+            "bottomY":"ymax/height",
+            "isCrowd":"isCrowd"
+            "instance_id": "instance_id"
+          },
+          {
+            "label":"class_name_2",
+            "topX":"xmin/width",
+            "topY":"ymin/height",
+            "bottomX":"xmax/width",
+            "bottomY":"ymax/height",
+            "instance_id": "instance_id"
+          },
+          "..."
+      ]
+    }
+    """
+
+    def __init__(self, coco_data):
+        self.json_lines_data = []
+        self.categories = {}
+        self.coco_data = coco_data
+        self.image_id_to_data_index = {}
+        self.video_id_to_name = {}
+        for i in range(0, len(coco_data["images"])):
+            self.json_lines_data.append({})
+            self.json_lines_data[i]["image_url"] = ""
+            self.json_lines_data[i]["image_details"] = {}
+            self.json_lines_data[i]["video_details"] = {}
+            self.json_lines_data[i]["label"] = []
+        for i in range(0, len(coco_data["categories"])):
+            self.categories[coco_data["categories"][i]["id"]] = coco_data["categories"][
+                i
+            ]["name"]
+        for i in range(0, len(coco_data["videos"])):
+            self.video_id_to_name[coco_data["videos"][i]["id"]] = coco_data["videos"][
+                i
+            ]["name"]
+
+    def _populate_image_url(self, index, coco_image):
+        self.json_lines_data[index]["image_url"] = coco_image["file_name"]
+        self.image_id_to_data_index[coco_image["id"]] = index
+
+    def _populate_image_details(self, index, coco_image):
+        file_name = coco_image["file_name"]
+        self.json_lines_data[index]["image_details"]["format"] = file_name[
+            file_name.rfind(".") + 1 :
+        ]
+        self.json_lines_data[index]["image_details"]["width"] = coco_image["width"]
+        self.json_lines_data[index]["image_details"]["height"] = coco_image["height"]
+
+    def _populate_video_details(self, index, coco_image):
+        self.json_lines_data[index]["video_details"]["frame_id"] = coco_image[
+            "frame_id"
+        ]
+        self.json_lines_data[index]["video_details"][
+            "video_name"
+        ] = self.video_id_to_name[coco_image["video_id"]]
+
+    def _populate_bbox_in_label(self, label, annotation, image_details):
+        # if bbox comes as normalized, skip normalization.
+        if max(annotation["bbox"]) < 1.5:
+            width = 1
+            height = 1
+        else:
+            width = image_details["width"]
+            height = image_details["height"]
+        label["topX"] = annotation["bbox"][0] / width
+        label["topY"] = annotation["bbox"][1] / height
+        label["bottomX"] = (annotation["bbox"][0] + annotation["bbox"][2]) / width
+        label["bottomY"] = (annotation["bbox"][1] + annotation["bbox"][3]) / height
+
+    def _populate_label(self, annotation):
+        index = self.image_id_to_data_index[annotation["image_id"]]
+        image_details = self.json_lines_data[index]["image_details"]
+        label = {"label": self.categories[annotation["category_id"]]}
+        self._populate_bbox_in_label(label, annotation, image_details)
+        self._populate_instanceId(label, annotation)
+        self._populate_isCrowd(label, annotation)
+        self._populate_visibility(label, annotation)
+        self.json_lines_data[index]["label"].append(label)
+
+    def _populate_instanceId(self, label, annotation):
+        label["instance_id"] = annotation["instance_id"]
+
+    def _populate_isCrowd(self, label, annotation):
+        if "iscrowd" in annotation.keys():
+            label["isCrowd"] = int(annotation["iscrowd"])
+
+    def _populate_visibility(self, label, annotation):
+        if "visibility" in annotation.keys():
+            label["visibility"] = annotation["visibility"]
+
+    def convert(self):
+        for i in range(0, len(self.coco_data["images"])):
+            self._populate_image_url(i, self.coco_data["images"][i])
+            self._populate_image_details(i, self.coco_data["images"][i])
+            self._populate_video_details(i, self.coco_data["images"][i])
+        if "annotations" not in self.coco_data:
+            self.coco_data["annotations"] = []
+        for i in range(0, len(self.coco_data["annotations"])):
+            self._populate_label(self.coco_data["annotations"][i])
+        return self.json_lines_data
+
+
+def main(args):
+    input_coco_file_path = args.input_cocovid_file_path
+    output_dir = args.output_dir
+    output_file_path = output_dir + "/" + args.output_file_name
+    print(output_file_path)
+    task_type = args.task_type
+    base_url = args.base_url
+
+    def read_coco_file(coco_file):
+        with open(coco_file) as f_in:
+            return json.load(f_in)
+
+    def write_json_lines(converter, filename, base_url=None):
+        json_lines_data = converter.convert()
+        with open(filename, "w") as outfile:
+            for json_line in json_lines_data:
+                if base_url is not None:
+                    image_url = json_line["image_url"]
+                    json_line["image_url"] = os.path.join(base_url, image_url)
+                    json_line["image_url"] = json_line["image_url"].replace("\\", "/")
+                json.dump(json_line, outfile, separators=(",", ":"))
+                outfile.write("\n")
+            print(f"Conversion completed. Converted {len(json_lines_data)} lines.")
+
+    coco_data = read_coco_file(input_coco_file_path)
+
+    print(f"Converting for {task_type}")
+
+    if task_type == "ObjectTracking":
+        converter = BoundingBoxConverter(coco_data)
+        write_json_lines(converter, output_file_path, base_url)
+
+    else:
+        print("ERROR: Invalid Task Type")
+        pass
+
+
+if __name__ == "__main__":
+    # Parse arguments that are passed into the script
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_cocovid_file_path", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, required=True)
+    parser.add_argument("--output_file_name", type=str, required=True)
+    parser.add_argument(
+        "--task_type",
+        type=str,
+        required=True,
+        choices=["ObjectTracking"],
+        default="ObjectTracking",
+    )
+    parser.add_argument("--base_url", type=str, default=None)
+
+    args = parser.parse_args()
+    main(args)
diff --git a/cli/foundation-models/system/finetune/video-multi-object-tracking/deploy.yaml b/cli/foundation-models/system/finetune/video-multi-object-tracking/deploy.yaml
@@ -0,0 +1,11 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_NC6s_v3
+instance_count: 1
+liveness_probe:
+  initial_delay: 180
+  period: 180
+  failure_threshold: 49
+  timeout: 299
+request_settings:
+  request_timeout_ms: 90000
diff --git a/...models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot-pipeline.yaml b/...models/system/finetune/video-multi-object-tracking/mmtracking-mot17tiny-mot-pipeline.yaml
@@ -0,0 +1,86 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: AzureML-Cli-Train-Finetune-Vision-MOT-Samples
+
+inputs:
+  # dataset files
+  training_data:
+    type: mltable
+
+  validation_data:
+    type: mltable
+
+  # compute
+  compute_model_import: sample-model-import-cluster
+  compute_finetune: sample-finetune-cluster-gpu
+  # model_name: bytetrack_yolox_x_crowdhuman_mot17-private-half
+  # # model - specify the foundation model available in the azureml system registry
+  mlflow_model: 
+    path: azureml://registries/azureml/models/bytetrack_yolox_x_crowdhuman_mot17-private-half/versions/3
+    type: mlflow_model
+
+outputs:
+  # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint
+  trained_model:
+    type: mlflow_model
+
+settings:
+  force_rerun: true
+  default_compute: azureml:sample-finetune-cluster-gpu
+
+jobs:
+  mmtracking_model_finetune_job:
+    type: pipeline
+    component: azureml://registries/azureml/components/mmtracking_video_multi_object_tracking_pipeline/labels/latest
+    inputs:
+      # # Compute
+      compute_model_import: ${{parent.inputs.compute_model_import}}
+      compute_finetune: ${{parent.inputs.compute_finetune}}
+
+      # # Model import args
+      task_name: video-multi-object-tracking
+      # model_name: ${{parent.inputs.model_name}}
+      # pytorch_model: ${{parent.inputs.pytorch_model}}
+      mlflow_model: ${{parent.inputs.mlflow_model}}
+      model_family: MmTrackingVideo
+
+      # # Data
+      training_data: ${{parent.inputs.training_data}}
+      validation_data: ${{parent.inputs.validation_data}}
+
+      # Finetuning parameters
+      image_width: 1920
+      image_height: 1080
+      learning_rate: 0.00001
+      number_of_epochs: 5
+      metric_for_best_model: MOTA
+      training_batch_size: 1
+
+      # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
+      # learning_rate_scheduler: warmup_linear
+      # warmup_steps: 0
+      # optimizer: sgd
+      # weight_decay: 0.0
+      # gradient_accumulation_step: 1
+      # max_grad_norm: 1.0
+      # iou_threshold: 0.5
+      # box_score_threshold: 0.3
+      # number_of_workers: 8
+      # extra_optim_args: ""
+      # precision: 32
+      # random_seed: 42
+      # evaluation_strategy: epoch
+      # evaluation_steps: 500
+      # logging_strategy: epoch
+      # logging_steps: 500
+      # save_strategy: epoch
+      # save_steps: 500
+      # save_total_limit: -1
+      # early_stopping: False
+      # early_stopping_patience: 1
+      # resume_from_checkpoint: False
+      # save_as_mlflow_model: True
+
+    outputs:
+      mlflow_model_folder: ${{parent.outputs.trained_model}}