pytorch · zingo · Apr 29, 2025 · Feb 20, 2025 · Mar 21, 2025 · Apr 16, 2025
@@ -7,6 +7,8 @@ python_library(
         "ethosu_partitioner.py",
         "tosa_backend.py",
         "tosa_partitioner.py",
+        "vgf_backend.py",
+        "vgf_partitioner.py",
     ],
     deps = [
         ":arm_backend",

@@ -25,21 +25,37 @@ def __init__(self):
         self.output_format = None
         self.path_for_intermediates = None
         self.tosa_spec = None
-        self.input_order = None
+
+    def vgf_compile_spec(
+        self,
+        compiler_flags: Optional[str] = "",
+    ) -> "ArmCompileSpecBuilder":
+        """
+        Generate compile spec for VGF compatible targets
+
+        Args:
+            compiler_flags: Extra compiler flags for converter_backend
+        """
+        self.output_format = "vgf"
+        self.compiler_flags = [
+            compiler_flags,
+        ]
+        self.tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+MI")
+        return self
 
     def ethosu_compile_spec(
         self,
-        config: str,
-        system_config: str,
-        memory_mode: str,
+        target: str,
+        system_config: Optional[str] = None,
+        memory_mode: Optional[str] = None,
         extra_flags: Optional[str] = None,
         config_ini: Optional[str] = "Arm/vela.ini",
     ) -> "ArmCompileSpecBuilder":
         """
         Generate compile spec for Ethos-U NPU
 
         Args:
-            config: Ethos-U accelerator configuration, e.g. ethos-u55-128
+            target: Ethos-U accelerator configuration, e.g. ethos-u55-128
             system_config: System configuration to select from the Vel
                 configuration file
             memory_mode: Memory mode to select from the Vela configuration file
@@ -52,18 +68,38 @@ def ethosu_compile_spec(
         ), f"Output format already set to f{self.output_format}"
         self.output_format = "vela"
         self.compiler_flags = [
-            f"--accelerator-config={config}",
+            f"--accelerator-config={target}",
             f"--config={config_ini}",
         ]
+
+        # default system config and memory mode
+        if "ethos-u55" in target:
+            if system_config is None:
+                system_config = "Ethos_U55_High_End_Embedded"
+            if memory_mode is None:
+                memory_mode = "Shared_Sram"
+        elif "ethos-u85" in target:
+            if system_config is None:
+                system_config = "Ethos_U85_SYS_DRAM_Mid"
+            if memory_mode is None:
+                memory_mode = "Sram_Only"
+        else:
+            raise RuntimeError(f"Unknown ethos target: {target}")
+
         if system_config is not None:
             self.compiler_flags.append(f"--system-config={system_config}")
         if memory_mode is not None:
             self.compiler_flags.append(f"--memory-mode={memory_mode}")
         if extra_flags is not None:
             self.compiler_flags.append(extra_flags)
 
+        # We require raw output and regor, so add these flags if absent. This
+        # overrides any other output setting.
+        self.compiler_flags.append("--output-format=raw")
+        self.compiler_flags.append("--debug-force-regor")
+
         base_tosa_version = "TOSA-0.80+BI"
-        if "u55" in config:
+        if "u55" in target:
             # Add the Ethos-U55 extension marker
             base_tosa_version += "+u55"
         self.tosa_spec = TosaSpecification.create_from_string(base_tosa_version)
@@ -106,26 +142,22 @@ def build(self) -> List[CompileSpec]:
         # Always supply a TOSA version
         self.compile_spec = [CompileSpec("tosa_spec", str(self.tosa_spec).encode())]
 
-        if self.output_format == "vela":
-            self.compile_spec += [
-                CompileSpec("output_format", "vela".encode()),
-                CompileSpec("compile_flags", " ".join(self.compiler_flags).encode()),
-            ]
-        elif self.output_format == "tosa":
-            self.compile_spec.append(CompileSpec("output_format", "tosa".encode()))
+        # Add compile flags, these are backend specific, refer to the backend
+        # documentation.
+        self.compile_spec += [
+            CompileSpec("compile_flags", " ".join(self.compiler_flags).encode()),
+        ]
+
+        # encode output format
+        self.compile_spec.append(
+            CompileSpec("output_format", self.output_format.encode())
+        )
 
         if self.path_for_intermediates is not None:
             self.compile_spec.append(
                 CompileSpec("debug_artifact_path", self.path_for_intermediates.encode())
             )
 
-        if self.input_order:
-            self.compile_spec.append(
-                CompileSpec(
-                    "input_order", " ".join(map(str, self.input_order)).encode()
-                )
-            )
-
         return self.compile_spec
 
 
@@ -148,6 +180,13 @@ def is_ethosu(compile_spec: List[CompileSpec]) -> bool:
     return False
 
 
+def is_vgf(compile_spec: List[CompileSpec]) -> bool:
+    for spec in compile_spec:
+        if spec.key == "output_format":
+            return spec.value.decode() == "vgf"
+    return False
+
+
 def get_tosa_spec(compile_spec: List[CompileSpec]) -> TosaSpecification:
     for spec in compile_spec:
         if spec.key == "tosa_spec":

@@ -23,12 +23,11 @@
 
 # Pack either input or output tensor block, compose the related arrays into
 # per-io structs to simplify runtime use.
-def vela_bin_pack_io(prefix, data, shape_order=None):
+def vela_bin_pack_io(prefix, data):
     vela_input_shapes = data[prefix + "_shape"]
 
-    order = shape_order if shape_order else range(len(vela_input_shapes))
     ios = struct.pack("<i", len(vela_input_shapes))
-    for i in order:
+    for i in range(len(vela_input_shapes)):
         io_shape = vela_input_shapes[i]
         io_elem_size = data[prefix + "_elem_size"][i]
         io_offset = data[prefix + "_offset"][i]
@@ -45,9 +44,7 @@ def vela_bin_pack_io(prefix, data, shape_order=None):
 # Output via Vela to binary stream for ArmBackendEthosU
 # WARNING: Do not change this without changing VelaBinStream.cpp as that
 #          function consumes this format and the two need to align.
-def vela_compile(
-    tosa_flatbuffer: bytes, args: List[str], shape_order=None, verbose: bool = False
-):
+def vela_compile(tosa_flatbuffer: bytes, args: List[str], verbose: bool = False):
     """
     Compile a TOSA graph to a binary stream for ArmBackendEthosU using Vela.
     """
@@ -98,7 +95,7 @@ def vela_compile(
             bin_blocks["scratch_data"] = b"\x00" * block_length
 
             # Capture inputs and outputs
-            bin_blocks["inputs"] = vela_bin_pack_io("input", data, shape_order)
+            bin_blocks["inputs"] = vela_bin_pack_io("input", data)
             bin_blocks["outputs"] = vela_bin_pack_io("output", data)
 
             bin_blocks["vela_end_stream"] = b""

@@ -35,19 +35,16 @@ class EthosUBackend(BackendDetails):
 
     @staticmethod
     def _compile_tosa_flatbuffer(
-        tosa_flatbuffer: bytes, compile_spec: list[CompileSpec]
+        tosa_flatbuffer: bytes, compile_spec: List[CompileSpec]
     ) -> bytes:
         """
         Static helper method to do the compilation of the TOSA flatbuffer
         representation to a target specific binary stream.
         """
         compile_flags = []
-        input_order = []
         for spec in compile_spec:
             if spec.key == "compile_flags":
                 compile_flags.append(spec.value.decode())
-            if spec.key == "input_order":
-                input_order = list(map(int, spec.value.decode().split(",")))
 
         if len(compile_flags) == 0:
             # Not testing for compile_flags correctness here, just that they are
@@ -60,7 +57,6 @@ def _compile_tosa_flatbuffer(
         binary = vela_compile(
             tosa_flatbuffer,
             compile_flags,
-            input_order,
             verbose=logger.getEffectiveLevel() == logging.INFO,
         )
         return binary

@@ -9,6 +9,7 @@
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
+    VgfQuantizer,
 )
 
 # Used in tests

@@ -27,6 +27,7 @@
 from executorch.backends.arm.arm_backend import (
     get_tosa_spec,
     is_ethosu,
+    is_vgf,
 )  # usort: skip
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from torch.ao.quantization.fake_quantize import (
@@ -52,6 +53,7 @@
 __all__ = [
     "TOSAQuantizer",
     "EthosUQuantizer",
+    "VgfQuantizer",
     "get_symmetric_quantization_config",
 ]
 
@@ -358,3 +360,12 @@ def __init__(self, compile_spec: list[CompileSpec]) -> None:
 
         tosa_spec = get_tosa_spec(compile_spec)
         super().__init__(tosa_spec)
+
+
+class VgfQuantizer(TOSAQuantizer):
+    def __init__(self, compile_spec: list[CompileSpec]) -> None:
+        if not is_vgf(compile_spec):
+            raise RuntimeError("compile spec is not targeting VGF")
+
+        tosa_spec = get_tosa_spec(compile_spec)
+        super().__init__(tosa_spec)
@@ -35,15 +35,15 @@
 logger = logging.getLogger(__name__)
 
 
-def _get_first_delegation_tag(graph_module) -> str | None:
-    """Get the first delegation tag from the graph_module or return None."""
+def arm_get_first_delegation_tag(graph_module) -> str:
+    """Get the first delegation tag from the graph_module or return empty string."""
     for node in graph_module.graph.nodes:
         tag = node.meta.get("delegation_tag")
         if tag:
             return tag
 
     logger.debug("No delegation tag found in partition.")
-    return None
+    return ""
 
 
 @final
@@ -63,16 +63,13 @@ def preprocess(  # noqa: C901
         artifact_path = None
         output_format = ""
         compile_flags = []
-        input_order = []
         for spec in compile_spec:
             if spec.key == "debug_artifact_path":
                 artifact_path = spec.value.decode()
             if spec.key == "output_format":
                 output_format = spec.value.decode()
             if spec.key == "compile_flags":
                 compile_flags.append(spec.value.decode())
-            if spec.key == "input_order":
-                input_order = list(map(int, spec.value.decode().split(",")))
 
         # Check that the output format is set correctly in the compile spec
         if output_format != "tosa":
@@ -129,14 +126,8 @@ def preprocess(  # noqa: C901
                 dbg_fail(node, graph_module, tosa_graph, artifact_path)
                 raise
 
-        if len(input_order) > 0:
-            if input_count != len(input_order):
-                raise RuntimeError(
-                    "The rank of the input order is not equal to amount of input tensors"
-                )
-
         if artifact_path:
-            tag = _get_first_delegation_tag(graph_module)
+            tag = arm_get_first_delegation_tag(graph_module)
             dbg_tosa_dump(
                 tosa_graph,
                 artifact_path,