Add qnn examples of laion/CLIP-ViT-B-32-laion2B-s34B-b79K model. (#1629)

## Describe your changes - Add qnn examples of laion/CLIP-ViT-B-32-laion2B-s34B-b79K model. - Clean unused config. ## Checklist before requesting a review - [ ] Add unit tests for this change. - [x] Make sure all tests can pass. - [x] Update documents if necessary. - [x] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [x] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## Test metrics ```json { "accuracy-accuracy": { "value": 1.0, "priority": 1, "higher_is_better": true }, "latency_qnn-avg": { "value": 340.28136, "priority": 2, "higher_is_better": false }, "latency_qnn-max": { "value": 439.1883, "priority": -1, "higher_is_better": false }, "latency_qnn-min": { "value": 250.1979, "priority": -1, "higher_is_better": false }, "latency_cpu-avg": { "value": 431.19076, "priority": 3, "higher_is_better": false }, "latency_cpu-max": { "value": 573.6913, "priority": -1, "higher_is_better": false }, "latency_cpu-min": { "value": 303.0901, "priority": -1, "higher_is_better": false }, "throughput-avg": { "value": 2.23641, "priority": -1, "higher_is_better": true }, "throughput-max": { "value": 2.74631, "priority": -1, "higher_is_better": true }, "throughput-min": { "value": 1.66013, "priority": -1, "higher_is_better": true } } ``` ## (Optional) Issue link
microsoft · Feb 20, 2025 · 043f7e1 · 043f7e1
1 parent 9475998
commit 043f7e1
Show file tree

Hide file tree

Showing 5 changed files with 123 additions and 57 deletions.
diff --git a/examples/clip/README.md b/examples/clip/README.md
@@ -14,9 +14,11 @@ This workflow performs CLIP VIT optimization on Qualcomm NPU with ONNX Runtime P
 
 It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed.
 
-Config file: [openai_clip-vit-base-patch16_ptq_qnn.json](openai_clip-vit-base-patch16_ptq_qnn.json)
-Config file: [openai_clip-vit-base-patch32_ptq_qnn.json](openai_clip-vit-base-patch32_ptq_qnn.json)
-Config file: [openai_clip-vit-large-patch14_ptq_qnn.json](openai_clip-vit-large-patch14_ptq_qnn.json)
+OpenAI clip model config file: [openai_clip-vit-base-patch16_ptq_qnn.json](openai_clip-vit-base-patch16_ptq_qnn.json)
+ [openai_clip-vit-base-patch32_ptq_qnn.json](openai_clip-vit-base-patch32_ptq_qnn.json)
+ [openai_clip-vit-large-patch14_ptq_qnn.json](openai_clip-vit-large-patch14_ptq_qnn.json)
+
+Open clip model config file: [laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json](laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json)
 
 **NOTE:** The model optimization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step.
 

diff --git a/examples/clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json b/examples/clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json
@@ -0,0 +1,112 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": { "attn_implementation": "eager" },
+        "io_config": {
+            "input_names": [ "input_ids", "pixel_values", "attention_mask" ],
+            "input_shapes": [ [ 10, 77 ], [ 1, 3, 224, 224 ], [ 10, 77 ] ],
+            "input_types": [ "int64", "float32", "int64" ],
+            "output_names": [ "logits_per_image" ],
+            "output_shapes": [ [ 1, 2 ] ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "npu", "execution_providers": [ "NPUExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "quant_data_config",
+            "user_script": "user_script.py",
+            "load_dataset_config": {
+                "type": "clip_dataset",
+                "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+                "dataset_name": "nlphuji/flickr30k",
+                "start": 0,
+                "end": 10
+            },
+            "dataloader_config": { "type": "no_auto_batch_dataloader" }
+        },
+        {
+            "name": "metric_data_config",
+            "user_script": "user_script.py",
+            "load_dataset_config": {
+                "type": "clip_dataset",
+                "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+                "dataset_name": "nlphuji/flickr30k",
+                "start": 10,
+                "end": 20
+            },
+            "dataloader_config": { "type": "no_auto_batch_dataloader" },
+            "post_process_data_config": { "type": "clip_post_process" }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "backend": "huggingface_metrics",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } }
+                    ]
+                },
+                {
+                    "name": "latency_qnn",
+                    "type": "latency",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        { "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
+                        { "name": "max" },
+                        { "name": "min" }
+                    ]
+                },
+                {
+                    "name": "latency_cpu",
+                    "type": "latency",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        { "name": "avg", "priority": 3, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
+                        { "name": "max" },
+                        { "name": "min" }
+                    ],
+                    "user_config": {
+                        "inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } }
+                    }
+                },
+                {
+                    "name": "throughput",
+                    "type": "throughput",
+                    "data_config": "metric_data_config",
+                    "sub_types": [ { "name": "avg" }, { "name": "max" }, { "name": "min" } ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": { "type": "OnnxConversion", "target_opset": 17 },
+        "qnn_preprocess": { "type": "QNNPreprocess" },
+        "quantization": {
+            "type": "OnnxStaticQuantization",
+            "quant_preprocess": true,
+            "data_config": "quant_data_config",
+            "op_types_to_quantize": [ "MatMul", "LayerNormalization", "Gemm", "Sigmoid", "Gelu" ],
+            "activation_type": "QUInt16",
+            "weight_type": "QUInt8",
+            "calibrate_method": "MinMax"
+        }
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "clean_cache": true,
+    "output_dir": "models/CLIP-ViT-B-32-laion2B-s34B-b79K"
+}
diff --git a/examples/clip/openai_clip-vit-base-patch16_ptq_qnn.json b/examples/clip/openai_clip-vit-base-patch16_ptq_qnn.json
@@ -55,15 +55,7 @@
                     "data_config": "metric_data_config",
                     "sub_types": [
                         { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } }
-                    ],
-                    "user_config": {
-                        "inference_settings": {
-                            "onnx": {
-                                "execution_provider": "QNNExecutionProvider",
-                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
-                            }
-                        }
-                    }
+                    ]
                 },
                 {
                     "name": "latency_qnn",
@@ -73,15 +65,7 @@
                         { "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
                         { "name": "max" },
                         { "name": "min" }
-                    ],
-                    "user_config": {
-                        "inference_settings": {
-                            "onnx": {
-                                "execution_provider": "QNNExecutionProvider",
-                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
-                            }
-                        }
-                    }
+                    ]
                 },
                 {
                     "name": "latency_cpu",

diff --git a/examples/clip/openai_clip-vit-base-patch32_ptq_qnn.json b/examples/clip/openai_clip-vit-base-patch32_ptq_qnn.json
@@ -55,15 +55,7 @@
                     "data_config": "metric_data_config",
                     "sub_types": [
                         { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } }
-                    ],
-                    "user_config": {
-                        "inference_settings": {
-                            "onnx": {
-                                "execution_provider": "QNNExecutionProvider",
-                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
-                            }
-                        }
-                    }
+                    ]
                 },
                 {
                     "name": "latency_qnn",
@@ -73,15 +65,7 @@
                         { "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
                         { "name": "max" },
                         { "name": "min" }
-                    ],
-                    "user_config": {
-                        "inference_settings": {
-                            "onnx": {
-                                "execution_provider": "QNNExecutionProvider",
-                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
-                            }
-                        }
-                    }
+                    ]
                 },
                 {
                     "name": "latency_cpu",

diff --git a/examples/clip/openai_clip-vit-large-patch14_ptq_qnn.json b/examples/clip/openai_clip-vit-large-patch14_ptq_qnn.json
@@ -55,15 +55,7 @@
                     "data_config": "metric_data_config",
                     "sub_types": [
                         { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } }
-                    ],
-                    "user_config": {
-                        "inference_settings": {
-                            "onnx": {
-                                "execution_provider": "QNNExecutionProvider",
-                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
-                            }
-                        }
-                    }
+                    ]
                 },
                 {
                     "name": "latency_qnn",
@@ -73,15 +65,7 @@
                         { "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
                         { "name": "max" },
                         { "name": "min" }
-                    ],
-                    "user_config": {
-                        "inference_settings": {
-                            "onnx": {
-                                "execution_provider": "QNNExecutionProvider",
-                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
-                            }
-                        }
-                    }
+                    ]
                 },
                 {
                     "name": "latency_cpu",