From 82905db2cd73099cb30fc0d388afc16c67607628 Mon Sep 17 00:00:00 2001 From: winskuo-quic Date: Wed, 4 Jun 2025 17:42:53 +0800 Subject: [PATCH 1/4] Qualcomm AI Engine Direct - CI for Non-LLM GA model Part1 --- .ci/scripts/test_model.sh | 25 +++++++++++++-- .github/workflows/trunk.yml | 6 ++-- backends/qualcomm/builders/op_slice_copy.py | 2 +- backends/qualcomm/tests/test_qnn_delegate.py | 32 +++++++++---------- examples/qualcomm/oss_scripts/deit.py | 29 +++++++++++------ .../{mobilevit1.py => mobilevit_v1.py} | 6 ++-- examples/qualcomm/oss_scripts/roberta.py | 28 ++++++++++++---- .../qualcomm/oss_scripts/swin_transformer.py | 7 ++-- 8 files changed, 90 insertions(+), 45 deletions(-) rename examples/qualcomm/oss_scripts/{mobilevit1.py => mobilevit_v1.py} (97%) diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index bc9bbb8bae0..c16c9915849 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -188,6 +188,22 @@ test_model_with_qnn() { EXPORT_SCRIPT=edsr # Additional deps for edsr pip install piq + elif [[ "${MODEL_NAME}" == "cvt" ]]; then + EXPORT_SCRIPT=cvt + elif [[ "${MODEL_NAME}" == "dit" ]]; then + EXPORT_SCRIPT=dit + elif [[ "${MODEL_NAME}" == "efficientnet" ]]; then + EXPORT_SCRIPT=efficientnet + elif [[ "${MODEL_NAME}" == "focalnet" ]]; then + EXPORT_SCRIPT=focalnet + elif [[ "${MODEL_NAME}" == "mobilevit_v1" ]]; then + EXPORT_SCRIPT=mobilevit_v1 + elif [[ "${MODEL_NAME}" == "mobilevit_v2" ]]; then + EXPORT_SCRIPT=mobilevit_v2 + elif [[ "${MODEL_NAME}" == "pvt" ]]; then + EXPORT_SCRIPT=pvt + elif [[ "${MODEL_NAME}" == "swin" ]]; then + EXPORT_SCRIPT=swin_transformer elif [[ "${MODEL_NAME}" == "albert" ]]; then EXPORT_SCRIPT=albert elif [[ "${MODEL_NAME}" == "bert" ]]; then @@ -196,6 +212,8 @@ test_model_with_qnn() { EXPORT_SCRIPT=distilbert elif [[ "${MODEL_NAME}" == "eurobert" ]]; then EXPORT_SCRIPT=eurobert + elif [[ "${MODEL_NAME}" == "roberta" ]]; then + EXPORT_SCRIPT=roberta else echo "Unsupported model $MODEL_NAME" exit 1 @@ -210,10 +228,13 @@ test_model_with_qnn() { "dl3"|"mv3"|"mv2"|"ic4"|"ic3"|"vit"|"mb"|"w2l") SCRIPT_FOLDER=scripts ;; - "albert"|"bert"|"distilbert") + "cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin") + SCRIPT_FOLDER=oss_scripts + ;; + "albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1") pip install evaluate SCRIPT_FOLDER=oss_scripts - # Bert models running in 16bit will encounter op validation fail on some operations, + # 16bit models will encounter op validation fail on some operations, # which requires CHIPSET >= SM8550. QNN_CHIPSET=SM8550 ;; diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 0eceddca36f..a05f74a9245 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -470,7 +470,7 @@ jobs: docker-image: executorch-ubuntu-22.04-qnn-sdk submodules: 'recursive' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 + timeout: 90 script: | # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") @@ -489,14 +489,14 @@ jobs: strategy: matrix: dtype: [fp32] - model: [albert, bert, distilbert] # eurobert requires transfomer >= 4.48.0, skip for now + model: [cvt, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now fail-fast: false with: runner: linux.2xlarge docker-image: executorch-ubuntu-22.04-qnn-sdk submodules: 'recursive' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 + timeout: 90 script: | # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") diff --git a/backends/qualcomm/builders/op_slice_copy.py b/backends/qualcomm/builders/op_slice_copy.py index b2a4cc15bea..62688a10036 100644 --- a/backends/qualcomm/builders/op_slice_copy.py +++ b/backends/qualcomm/builders/op_slice_copy.py @@ -56,7 +56,7 @@ def define_node( if start < 0: start = start % input_tensor.shape[dim] - if len(node.args) > 3: + if len(node.args) > 3 and node.args[3] is not None: end = min(cast(int, node.args[3]), input_tensor.shape[dim]) if end < 0: end = end % input_tensor.shape[dim] diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 4a0edaf471d..d1dd266646f 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -4585,14 +4585,13 @@ def test_gMLP(self): self.assertGreaterEqual(msg["top_1"], 60) self.assertGreaterEqual(msg["top_5"], 85) - @unittest.skip("Only outputs good accuracy in QNN 2.29") - def test_mobilevit_v2(self): + def test_mobilevit_v1(self): if not self.required_envs([self.image_dataset]): self.skipTest("missing required envs") cmds = [ "python", - f"{self.executorch_root}/examples/qualcomm/oss_scripts/mobilevit_v2.py", + f"{self.executorch_root}/examples/qualcomm/oss_scripts/mobilevit_v1.py" "--dataset", self.image_dataset, "--artifact", @@ -4610,8 +4609,6 @@ def test_mobilevit_v2(self): ] if self.host: cmds.extend(["--host", self.host]) - if self.shared_buffer: - cmds.extend(["--shared_buffer"]) p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL) with Listener((self.ip, self.port)) as listener: @@ -4621,17 +4618,22 @@ def test_mobilevit_v2(self): if "Error" in msg: self.fail(msg["Error"]) else: - self.assertGreaterEqual(msg["top_1"], 50) + self.assertGreaterEqual(msg["top_1"], 70) self.assertGreaterEqual(msg["top_5"], 85) - def test_pvt(self): + @unittest.skip("Only outputs good accuracy in QNN 2.29") + def test_mobilevit_v2(self): if not self.required_envs([self.image_dataset]): self.skipTest("missing required envs") cmds = [ "python", - f"{self.executorch_root}/examples/qualcomm/oss_scripts/pvt.py", + f"{self.executorch_root}/examples/qualcomm/oss_scripts/mobilevit_v2.py", + "--dataset", self.image_dataset, + "--artifact", + self.artifact_dir, + "--build_folder", self.build_folder, "--device", self.device, @@ -4644,6 +4646,8 @@ def test_pvt(self): ] if self.host: cmds.extend(["--host", self.host]) + if self.shared_buffer: + cmds.extend(["--shared_buffer"]) p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL) with Listener((self.ip, self.port)) as listener: @@ -4653,21 +4657,17 @@ def test_pvt(self): if "Error" in msg: self.fail(msg["Error"]) else: - self.assertGreaterEqual(msg["top_1"], 65) + self.assertGreaterEqual(msg["top_1"], 50) self.assertGreaterEqual(msg["top_5"], 85) - def test_mobilevit1(self): + def test_pvt(self): if not self.required_envs([self.image_dataset]): self.skipTest("missing required envs") cmds = [ "python", - f"{self.executorch_root}/examples/qualcomm/oss_scripts/mobilevit1.py" - "--dataset", + f"{self.executorch_root}/examples/qualcomm/oss_scripts/pvt.py", self.image_dataset, - "--artifact", - self.artifact_dir, - "--build_folder", self.build_folder, "--device", self.device, @@ -4689,7 +4689,7 @@ def test_mobilevit1(self): if "Error" in msg: self.fail(msg["Error"]) else: - self.assertGreaterEqual(msg["top_1"], 70) + self.assertGreaterEqual(msg["top_1"], 65) self.assertGreaterEqual(msg["top_5"], 85) def test_regnet(self): diff --git a/examples/qualcomm/oss_scripts/deit.py b/examples/qualcomm/oss_scripts/deit.py index 5482a77a166..e0719dfffb9 100644 --- a/examples/qualcomm/oss_scripts/deit.py +++ b/examples/qualcomm/oss_scripts/deit.py @@ -6,10 +6,12 @@ import getpass import json +import logging import os from multiprocessing.connection import Client import numpy as np +import torch from executorch.backends.qualcomm._passes.qnn_pass_manager import ( get_capture_program_passes, ) @@ -46,16 +48,23 @@ def main(args): data_num = 100 height = config.image_size width = config.image_size - inputs, targets, input_list = get_imagenet_dataset( - dataset_path=f"{args.dataset}", - data_size=data_num, - image_shape=(height, width), - crop_size=(height, width), - ) + + if args.ci: + inputs = [(torch.rand(1, 3, height, width),)] + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) + else: + inputs, targets, input_list = get_imagenet_dataset( + dataset_path=f"{args.dataset}", + data_size=data_num, + image_shape=(height, width), + crop_size=(height, width), + ) # Get the Deit model. model = get_instance() - pte_filename = "deit_qnn" + pte_filename = "deit_qnn_q8" # lower to QNN passes_job = get_capture_program_passes() @@ -120,8 +129,8 @@ def main(args): parser.add_argument( "-a", "--artifact", - help="path for storing generated artifacts and output by this example. Default ./deit_qnn", - default="./deit_qnn", + help="path for storing generated artifacts and output by this example. Default ./deit", + default="./deit", type=str, ) @@ -134,7 +143,7 @@ def main(args): "for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)" ), type=str, - required=True, + required=False, ) args = parser.parse_args() diff --git a/examples/qualcomm/oss_scripts/mobilevit1.py b/examples/qualcomm/oss_scripts/mobilevit_v1.py similarity index 97% rename from examples/qualcomm/oss_scripts/mobilevit1.py rename to examples/qualcomm/oss_scripts/mobilevit_v1.py index 44de082ab27..99b7160f669 100644 --- a/examples/qualcomm/oss_scripts/mobilevit1.py +++ b/examples/qualcomm/oss_scripts/mobilevit_v1.py @@ -84,7 +84,7 @@ def main(args): .to("cpu") ) - pte_filename = "mobilevit1_qnn_q16" + pte_filename = "mobilevit_v1_qnn_q16" build_executorch_binary( module.eval(), inputs[0], @@ -157,8 +157,8 @@ def main(args): "-a", "--artifact", help="path for storing generated artifacts by this example. " - "Default ./mobilevit1", - default="./mobilevit1", + "Default ./mobilevit_v1", + default="./mobilevit_v1", type=str, ) diff --git a/examples/qualcomm/oss_scripts/roberta.py b/examples/qualcomm/oss_scripts/roberta.py index b91888c7efb..cd70edc5dec 100644 --- a/examples/qualcomm/oss_scripts/roberta.py +++ b/examples/qualcomm/oss_scripts/roberta.py @@ -6,6 +6,7 @@ import getpass import json +import logging import os from multiprocessing.connection import Client @@ -38,16 +39,29 @@ def main(args): skip_node_id_set, skip_node_op_set = parse_skip_delegation_node(args) os.makedirs(args.artifact, exist_ok=True) - data_size = 100 tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base") - inputs, targets, input_list = get_masked_language_model_dataset( - args.dataset, tokenizer, data_size - ) + data_size = 100 + if args.ci: + random_ids = torch.randint(low=0, high=100, size=(1, 100), dtype=torch.int32) + attention_mask = torch.ones((1, 100), dtype=torch.float32) + inputs = [ + ( + random_ids, + attention_mask, + ) + ] + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) + else: + inputs, targets, input_list = get_masked_language_model_dataset( + args.dataset, tokenizer, data_size + ) # Get the Roberta model. model = get_instance(args) - pte_filename = "roberta_qnn" + pte_filename = "roberta_qnn_q16" # lower to QNN passes_job = get_capture_program_passes() @@ -137,7 +151,7 @@ def main(args): "-a", "--artifact", help="path for storing generated artifacts and output by this example. Default ./Roberta_qnn", - default="./Roberta_qnn", + default="./roberta", type=str, ) parser.add_argument( @@ -149,7 +163,7 @@ def main(args): "for https://www.kaggle.com/datasets/mikeortman/wikipedia-sentences" ), type=str, - required=True, + required=False, ) args = parser.parse_args() diff --git a/examples/qualcomm/oss_scripts/swin_transformer.py b/examples/qualcomm/oss_scripts/swin_transformer.py index 11afff0d70d..3c62eba45cd 100644 --- a/examples/qualcomm/oss_scripts/swin_transformer.py +++ b/examples/qualcomm/oss_scripts/swin_transformer.py @@ -89,7 +89,7 @@ def main(args): data_num = 100 if args.ci: - inputs = [torch.rand(1, 3, 224, 224)] + inputs = [(torch.rand(1, 3, 224, 224),)] logging.warning( "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." ) @@ -181,8 +181,9 @@ def main(args): parser.add_argument( "-a", "--artifact", - help="path for storing generated artifacts by this example. " "Default ./swin", - default="./swin", + help="path for storing generated artifacts by this example. " + "Default ./swin_transformer", + default="./swin_transformer", type=str, ) From df493fc2025d73a757be85bd80aa9322f6111b6a Mon Sep 17 00:00:00 2001 From: winskuo-quic Date: Thu, 3 Jul 2025 16:32:43 +0800 Subject: [PATCH 2/4] debug --- .ci/scripts/test_model.sh | 4 +++- .github/workflows/trunk.yml | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index c16c9915849..2ef6cff6f67 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -190,6 +190,8 @@ test_model_with_qnn() { pip install piq elif [[ "${MODEL_NAME}" == "cvt" ]]; then EXPORT_SCRIPT=cvt + elif [[ "${MODEL_NAME}" == "deit" ]]; then + EXPORT_SCRIPT=deit elif [[ "${MODEL_NAME}" == "dit" ]]; then EXPORT_SCRIPT=dit elif [[ "${MODEL_NAME}" == "efficientnet" ]]; then @@ -228,7 +230,7 @@ test_model_with_qnn() { "dl3"|"mv3"|"mv2"|"ic4"|"ic3"|"vit"|"mb"|"w2l") SCRIPT_FOLDER=scripts ;; - "cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin") + "cvt"|"deit"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin") SCRIPT_FOLDER=oss_scripts ;; "albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1") diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index a05f74a9245..4936cb5d8a4 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -489,10 +489,10 @@ jobs: strategy: matrix: dtype: [fp32] - model: [cvt, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now + model: [cvt, deit, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now fail-fast: false with: - runner: linux.2xlarge + runner: linux.2xlarge.memory docker-image: executorch-ubuntu-22.04-qnn-sdk submodules: 'recursive' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} From bc5ffd19d80fab43b92d12250ddf80e969e7caae Mon Sep 17 00:00:00 2001 From: winskuo-quic Date: Fri, 4 Jul 2025 17:57:04 +0800 Subject: [PATCH 3/4] test --- .github/workflows/trunk.yml | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 4936cb5d8a4..fb4747c4469 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -463,7 +463,7 @@ jobs: strategy: matrix: dtype: [fp32] - model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l] + model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, cvt, deit, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] fail-fast: false with: runner: linux.2xlarge @@ -480,32 +480,6 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" - test-qnn-optimum-model: - name: test-qnn-optimum-model - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - dtype: [fp32] - model: [cvt, deit, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now - fail-fast: false - with: - runner: linux.2xlarge.memory - docker-image: executorch-ubuntu-22.04-qnn-sdk - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" - test-apple-model: name: test-apple-model uses: pytorch/test-infra/.github/workflows/macos_job.yml@main From d36369b57843d7a5270b33346d5cf23ef7a17d06 Mon Sep 17 00:00:00 2001 From: winskuo-quic Date: Fri, 4 Jul 2025 18:31:51 +0800 Subject: [PATCH 4/4] test2 --- .github/workflows/trunk.yml | 54 ++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index fb4747c4469..eca49d482a0 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -463,7 +463,59 @@ jobs: strategy: matrix: dtype: [fp32] - model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, cvt, deit, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] + model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l] + fail-fast: false + with: + runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-qnn-sdk + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" + + test-qnn-optimum-model1: + name: test-qnn-optimum-model1 + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + strategy: + matrix: + dtype: [fp32] + model: [cvt, deit, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2] # eurobert requires transfomer >= 4.48.0, skip for now + fail-fast: false + with: + runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-qnn-sdk + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" + + test-qnn-optimum-model2: + name: test-qnn-optimum-model2 + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + strategy: + matrix: + dtype: [fp32] + model: [pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now fail-fast: false with: runner: linux.2xlarge