Skip to content

Commit

Permalink
Add qnn examples of laion/CLIP-ViT-B-32-laion2B-s34B-b79K model. (#1629)
Browse files Browse the repository at this point in the history
## Describe your changes

- Add qnn examples of laion/CLIP-ViT-B-32-laion2B-s34B-b79K model.
- Clean unused config.

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [x] Make sure all tests can pass.
- [x] Update documents if necessary.
- [x] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [x] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## Test metrics
```json
{
    "accuracy-accuracy": {
        "value": 1.0,
        "priority": 1,
        "higher_is_better": true
    },
    "latency_qnn-avg": {
        "value": 340.28136,
        "priority": 2,
        "higher_is_better": false
    },
    "latency_qnn-max": {
        "value": 439.1883,
        "priority": -1,
        "higher_is_better": false
    },
    "latency_qnn-min": {
        "value": 250.1979,
        "priority": -1,
        "higher_is_better": false
    },
    "latency_cpu-avg": {
        "value": 431.19076,
        "priority": 3,
        "higher_is_better": false
    },
    "latency_cpu-max": {
        "value": 573.6913,
        "priority": -1,
        "higher_is_better": false
    },
    "latency_cpu-min": {
        "value": 303.0901,
        "priority": -1,
        "higher_is_better": false
    },
    "throughput-avg": {
        "value": 2.23641,
        "priority": -1,
        "higher_is_better": true
    },
    "throughput-max": {
        "value": 2.74631,
        "priority": -1,
        "higher_is_better": true
    },
    "throughput-min": {
        "value": 1.66013,
        "priority": -1,
        "higher_is_better": true
    }
}
```
## (Optional) Issue link
  • Loading branch information
chinazhangchao authored Feb 20, 2025
1 parent 9475998 commit 043f7e1
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 57 deletions.
8 changes: 5 additions & 3 deletions examples/clip/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ This workflow performs CLIP VIT optimization on Qualcomm NPU with ONNX Runtime P

It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed.

Config file: [openai_clip-vit-base-patch16_ptq_qnn.json](openai_clip-vit-base-patch16_ptq_qnn.json)
Config file: [openai_clip-vit-base-patch32_ptq_qnn.json](openai_clip-vit-base-patch32_ptq_qnn.json)
Config file: [openai_clip-vit-large-patch14_ptq_qnn.json](openai_clip-vit-large-patch14_ptq_qnn.json)
OpenAI clip model config file: [openai_clip-vit-base-patch16_ptq_qnn.json](openai_clip-vit-base-patch16_ptq_qnn.json)
[openai_clip-vit-base-patch32_ptq_qnn.json](openai_clip-vit-base-patch32_ptq_qnn.json)
[openai_clip-vit-large-patch14_ptq_qnn.json](openai_clip-vit-large-patch14_ptq_qnn.json)

Open clip model config file: [laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json](laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json)

**NOTE:** The model optimization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step.

Expand Down
112 changes: 112 additions & 0 deletions examples/clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
{
"input_model": {
"type": "HfModel",
"model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
"task": "zero-shot-image-classification",
"load_kwargs": { "attn_implementation": "eager" },
"io_config": {
"input_names": [ "input_ids", "pixel_values", "attention_mask" ],
"input_shapes": [ [ 10, 77 ], [ 1, 3, 224, 224 ], [ 10, 77 ] ],
"input_types": [ "int64", "float32", "int64" ],
"output_names": [ "logits_per_image" ],
"output_shapes": [ [ 1, 2 ] ]
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [ { "device": "npu", "execution_providers": [ "NPUExecutionProvider" ] } ]
}
},
"data_configs": [
{
"name": "quant_data_config",
"user_script": "user_script.py",
"load_dataset_config": {
"type": "clip_dataset",
"model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
"dataset_name": "nlphuji/flickr30k",
"start": 0,
"end": 10
},
"dataloader_config": { "type": "no_auto_batch_dataloader" }
},
{
"name": "metric_data_config",
"user_script": "user_script.py",
"load_dataset_config": {
"type": "clip_dataset",
"model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
"dataset_name": "nlphuji/flickr30k",
"start": 10,
"end": 20
},
"dataloader_config": { "type": "no_auto_batch_dataloader" },
"post_process_data_config": { "type": "clip_post_process" }
}
],
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "accuracy",
"type": "accuracy",
"backend": "huggingface_metrics",
"data_config": "metric_data_config",
"sub_types": [
{ "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } }
]
},
{
"name": "latency_qnn",
"type": "latency",
"data_config": "metric_data_config",
"sub_types": [
{ "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
{ "name": "max" },
{ "name": "min" }
]
},
{
"name": "latency_cpu",
"type": "latency",
"data_config": "metric_data_config",
"sub_types": [
{ "name": "avg", "priority": 3, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
{ "name": "max" },
{ "name": "min" }
],
"user_config": {
"inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } }
}
},
{
"name": "throughput",
"type": "throughput",
"data_config": "metric_data_config",
"sub_types": [ { "name": "avg" }, { "name": "max" }, { "name": "min" } ]
}
]
}
},
"passes": {
"conversion": { "type": "OnnxConversion", "target_opset": 17 },
"qnn_preprocess": { "type": "QNNPreprocess" },
"quantization": {
"type": "OnnxStaticQuantization",
"quant_preprocess": true,
"data_config": "quant_data_config",
"op_types_to_quantize": [ "MatMul", "LayerNormalization", "Gemm", "Sigmoid", "Gelu" ],
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax"
}
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"clean_cache": true,
"output_dir": "models/CLIP-ViT-B-32-laion2B-s34B-b79K"
}
20 changes: 2 additions & 18 deletions examples/clip/openai_clip-vit-base-patch16_ptq_qnn.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,7 @@
"data_config": "metric_data_config",
"sub_types": [
{ "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } }
],
"user_config": {
"inference_settings": {
"onnx": {
"execution_provider": "QNNExecutionProvider",
"provider_options": [ { "backend_path": "QnnHtp.dll" } ]
}
}
}
]
},
{
"name": "latency_qnn",
Expand All @@ -73,15 +65,7 @@
{ "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
{ "name": "max" },
{ "name": "min" }
],
"user_config": {
"inference_settings": {
"onnx": {
"execution_provider": "QNNExecutionProvider",
"provider_options": [ { "backend_path": "QnnHtp.dll" } ]
}
}
}
]
},
{
"name": "latency_cpu",
Expand Down
20 changes: 2 additions & 18 deletions examples/clip/openai_clip-vit-base-patch32_ptq_qnn.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,7 @@
"data_config": "metric_data_config",
"sub_types": [
{ "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } }
],
"user_config": {
"inference_settings": {
"onnx": {
"execution_provider": "QNNExecutionProvider",
"provider_options": [ { "backend_path": "QnnHtp.dll" } ]
}
}
}
]
},
{
"name": "latency_qnn",
Expand All @@ -73,15 +65,7 @@
{ "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
{ "name": "max" },
{ "name": "min" }
],
"user_config": {
"inference_settings": {
"onnx": {
"execution_provider": "QNNExecutionProvider",
"provider_options": [ { "backend_path": "QnnHtp.dll" } ]
}
}
}
]
},
{
"name": "latency_cpu",
Expand Down
20 changes: 2 additions & 18 deletions examples/clip/openai_clip-vit-large-patch14_ptq_qnn.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,7 @@
"data_config": "metric_data_config",
"sub_types": [
{ "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } }
],
"user_config": {
"inference_settings": {
"onnx": {
"execution_provider": "QNNExecutionProvider",
"provider_options": [ { "backend_path": "QnnHtp.dll" } ]
}
}
}
]
},
{
"name": "latency_qnn",
Expand All @@ -73,15 +65,7 @@
{ "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } },
{ "name": "max" },
{ "name": "min" }
],
"user_config": {
"inference_settings": {
"onnx": {
"execution_provider": "QNNExecutionProvider",
"provider_options": [ { "backend_path": "QnnHtp.dll" } ]
}
}
}
]
},
{
"name": "latency_cpu",
Expand Down

0 comments on commit 043f7e1

Please sign in to comment.