OPEN: Snitch Cluster Tiling Support (#22)

* extend snitch support * CI flow and linting * fix typo * change snitch patch * apply victor's comment
pulp-platform · Jan 9, 2025 · c3f11d6 · c3f11d6
1 parent f69868b
commit c3f11d6
Show file tree

Hide file tree

Showing 74 changed files with 3,438 additions and 123 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -94,7 +94,43 @@ jobs:
     uses: ./.github/workflows/TestRunnerSnitch.yml
     with:
       test-names: |
+        Adder
+        iSoftmax
+        TestiNoNorm
+        TestAdderLarge
+        TestiSoftmaxLarge
         testMatMul
+        testRQGEMM
+        TestRQAdd
+        testRQGEMMTransB
+      num-cores: 9
+
+  snitch-kernels-tiled-singlebuffer-L2:
+    uses: ./.github/workflows/TestRunnerTiledSnitchSequential.yml
+    with:
+      tests-config: |
+        [
+          {
+            "name": "TestiNoNorm",
+            "L1": [5000, 10000]
+          },
+          {
+            "name": "TestAdderLarge",
+            "L1": [5000, 10000]
+          },
+          {
+            "name": "TestiSoftmaxLarge",
+            "L1": [5000, 10000]
+          },
+          {
+            "name": "testRQGEMM",
+            "L1": [2000, 5000]
+          },
+          {
+            "name": "TestRQAdd",
+            "L1": [5000, 10000]
+          }
+        ]
 
   ### Mempool Tests ###
   mempool-kernels:

diff --git a/.github/workflows/TestRunnerSnitch.yml b/.github/workflows/TestRunnerSnitch.yml
@@ -6,6 +6,13 @@ on:
       test-names:
         required: true
         type: string
+      num-cores:
+        required: true
+        type: number
+      simulator:
+        required: false
+        default: "banshee"
+        type: string      
 
 jobs:
   test-runner-snitch:
@@ -26,7 +33,7 @@ jobs:
           echo "$testNames" | while IFS= read -r testName; do
             if [[ -n "$testName" ]]; then
               echo "Running test: $testName"
-              python testRunner_snitch.py -t Tests/$testName --toolchain_install_dir /app/install/riscv-llvm/
+              python testRunner_snitch.py -t Tests/$testName --simulator=${{ inputs.simulator }} --cores=${{ inputs.num-cores }} --toolchain_install_dir /app/install/riscv-llvm/
             fi
           done
         shell: bash
diff --git a/.github/workflows/TestRunnerTiledSnitchSequential.yml b/.github/workflows/TestRunnerTiledSnitchSequential.yml
@@ -0,0 +1,60 @@
+name: TestRunnerTiledSnitchSequential
+
+on:
+  workflow_call:
+    inputs:
+      tests-config:
+        required: true
+        type: string
+      num-cores:
+        required: false
+        default: 9
+        type: number
+      default-memory-level:
+        required: false
+        default: "L2"
+        type: string
+      simulator:
+        required: false
+        default: "banshee"
+        type: string
+
+
+jobs:
+
+  test-runner-snitch-tiled:
+    runs-on: ubuntu-22.04
+    container:
+      image: ghcr.io/pulp-platform/deeploy:main
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Build Deeploy
+        run: pip install -e .
+      - name: Install jq
+        run: apt-get install -y jq
+      - name: Cache ccache
+        id: ccache-cache
+        uses: actions/cache@v4
+        with:
+          path: /app/.ccache
+          key: ${{ runner.os }}-ccache
+      - name: Run Tests
+        run: |
+          cd DeeployTest
+          echo '${{ inputs.tests-config }}' > tests.json
+          mkdir -p /app/.ccache
+          export CCACHE_DIR=/app/.ccache
+
+          jq -c '.[]' tests.json | while read test; do
+            testName=$(echo "$test" | jq -r '.name')
+            L1_values=$(echo "$test" | jq -r '.L1[]')
+            for L1_value in $L1_values; do
+              echo "Running test: $testName with L1: $L1_value"
+              python testRunner_tiled_snitch.py -t Tests/$testName --cores=${{ inputs.num-cores }} --simulator=${{ inputs.simulator }} --l1 $L1_value --defaultMemLevel=${{ inputs.default-memory-level }} --toolchain_install_dir /app/install/riscv-llvm/
+            done
+          done
+        shell: bash
+
diff --git a/.gitmodules b/.gitmodules
@@ -6,4 +6,4 @@
 	url = https://github.com/pulp-platform/pulp-nnx.git
 [submodule "CMSIS-NN"]
 	path = TargetLibraries/CMSIS/third_party/CMSIS-NN
-	url = https://github.com/ARM-software/CMSIS-NN.git
+	url = https://github.com/ARM-software/CMSIS-NN.git
diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py
@@ -29,7 +29,7 @@
 
 import numpy as np
 
-from Deeploy.DeeployTypes import NodeMapper, ONNXLayer, Shape
+from Deeploy.DeeployTypes import NodeMapper, ONNXLayer, OperatorRepresentation, Shape
 
 
 class ConcatLayer(ONNXLayer):
@@ -85,6 +85,23 @@ def __init__(self, maps: List[NodeMapper]):
         super().__init__(maps)
 
 
+class iNoNormLayer(ONNXLayer):
+
+    def __init__(self, maps: List[NodeMapper]):
+        super().__init__(maps)
+
+    def computeOps(self):
+        return self.mapper.parser.operatorRepresentation['size'] * 4  # 2 mul, 1 add, 1 right shift
+
+    def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation: OperatorRepresentation,
+                      channels_first: bool) -> Tuple[Shape]:
+
+        # JUNGVI: Broadcast the weights and bias to have as many dimensions as the inputs
+        inputShapes[1] = [1] * (len(inputShapes[0]) - len(inputShapes[1])) + list(inputShapes[1])
+        inputShapes[2] = inputShapes[1]
+        return (inputShapes, outputShapes)
+
+
 class RQSiGELULayer(iGELULayer):
 
     def __init__(self, maps: List[NodeMapper]):

diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
@@ -752,6 +752,41 @@ def parseNodeCtxt(self,
         return ctxt, True
 
 
+class iNoNormParser(NodeParser):
+
+    def __init__(self):
+        super().__init__()
+
+    def parseNode(self, node: gs.Node) -> bool:
+
+        ret = all(['D' in node.attrs, 'mul' in node.attrs, 'n_levels' in node.attrs])
+
+        if ret:
+            self.operatorRepresentation['D'] = node.attrs['D']
+            self.operatorRepresentation['log2D'] = int(np.log2(node.attrs['D'].values).tolist()[0])
+            self.operatorRepresentation['mul'] = int(node.attrs['mul'].values.tolist()[0])
+            self.operatorRepresentation['n_levels'] = node.attrs['n_levels']
+
+        return ret
+
+    def parseNodeCtxt(self,
+                      ctxt: NetworkContext,
+                      node: gs.Node,
+                      channels_first: bool = True) -> Tuple[NetworkContext, bool]:
+
+        data_in = ctxt.lookup(node.inputs[0].name)
+        weights = ctxt.lookup(node.inputs[1].name)
+        bias = ctxt.lookup(node.inputs[2].name)
+        data_out = ctxt.lookup(node.outputs[0].name)
+        self.operatorRepresentation['data_in'] = data_in.name
+        self.operatorRepresentation['weights'] = weights.name
+        self.operatorRepresentation['bias'] = bias.name
+        self.operatorRepresentation['data_out'] = data_out.name
+        self.operatorRepresentation['size'] = np.prod(data_in.shape)
+
+        return ctxt, True
+
+
 class RQSiHardswishParser(iHardswishParser):
 
     def __init__(self):
@@ -2080,3 +2115,59 @@ def parseNodeCtxt(self,
             return newCtxt, True
 
         return ctxt, False
+
+
+class RQAddParser(AddParser):
+
+    def parseNode(self, node: gs.Node) -> bool:
+
+        if not super().parseNode(node):
+            return False
+
+        ret = all([
+            'rqs1_mul' in node.attrs,
+            'rqs1_add' in node.attrs,
+            'rqs1_div' in node.attrs,
+            'rqs1_signed' in node.attrs,
+            any(['rqs1_n_levels' in node.attrs, 'rqs1_n_levels_out' in node.attrs]),
+            'rqs2_mul' in node.attrs,
+            'rqs2_add' in node.attrs,
+            'rqs2_div' in node.attrs,
+            'rqs2_signed' in node.attrs,
+            any(['rqs2_n_levels' in node.attrs, 'rqs2_n_levels_out' in node.attrs]),
+            'rqsOut_mul' in node.attrs,
+            'rqsOut_add' in node.attrs,
+            'rqsOut_div' in node.attrs,
+            'rqsOut_signed' in node.attrs,
+            any(['rqsOut_n_levels' in node.attrs, 'rqsOut_n_levels_out' in node.attrs]),
+        ])
+
+        if ret:
+            if 'rqs1_n_levels' in node.attrs:
+                self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels'].values)
+            else:
+                self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels_out'].values)
+            self.operatorRepresentation['rqs1_mul'] = int(node.attrs['rqs1_mul'])
+            self.operatorRepresentation['rqs1_add'] = int(node.attrs['rqs1_add'])
+            self.operatorRepresentation['rqs1_signed'] = int(node.attrs['rqs1_signed'].values)
+            self.operatorRepresentation['rqs1_log2D'] = int(math.log2(node.attrs['rqs1_div'].values))
+
+            if 'rqs2_n_levels' in node.attrs:
+                self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels'].values)
+            else:
+                self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels_out'].values)
+            self.operatorRepresentation['rqs2_mul'] = int(node.attrs['rqs2_mul'])
+            self.operatorRepresentation['rqs2_add'] = int(node.attrs['rqs2_add'])
+            self.operatorRepresentation['rqs2_signed'] = int(node.attrs['rqs2_signed'].values)
+            self.operatorRepresentation['rqs2_log2D'] = int(math.log2(node.attrs['rqs2_div'].values))
+
+            if 'rqsOut_n_levels' in node.attrs:
+                self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels'].values)
+            else:
+                self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels_out'].values)
+            self.operatorRepresentation['rqsOut_mul'] = int(node.attrs['rqsOut_mul'])
+            self.operatorRepresentation['rqsOut_add'] = int(node.attrs['rqsOut_add'])
+            self.operatorRepresentation['rqsOut_signed'] = int(node.attrs['rqsOut_signed'].values)
+            self.operatorRepresentation['rqsOut_log2D'] = int(math.log2(node.attrs['rqsOut_div'].values))
+
+        return ret
diff --git a/Deeploy/Targets/Generic/Templates/RQAddTemplate.py b/Deeploy/Targets/Generic/Templates/RQAddTemplate.py
@@ -0,0 +1,48 @@
+# ----------------------------------------------------------------------
+#
+# File: RQAddTemplate.py
+#
+# Last edited: 11.11.2023
+#
+# Copyright (C) 2023, ETH Zurich and University of Bologna.
+#
+# Author:
+# - Moritz Scherer, ETH Zurich
+# - Victor Jung, ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, List, Tuple
+
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+class RQAddTemplate(NodeTemplate):
+
+    def __init__(self, templateStr):
+        super().__init__(templateStr)
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:
+        # Extract signedness information of input, weights and output
+        signedI2 = ctxt.lookup(operatorRepresentation['data_in_2'])._type.referencedType.typeMin < 0
+        signedI = ctxt.lookup(operatorRepresentation['data_in_1'])._type.referencedType.typeMin < 0
+        signedO = ctxt.lookup(operatorRepresentation['data_out'])._type.referencedType.typeMin < 0
+        operatorRepresentation['input_2_signed'] = signedI2
+        operatorRepresentation['input_signed'] = signedI
+        operatorRepresentation['output_signed'] = signedO
+
+        return ctxt, operatorRepresentation, []
diff --git a/Deeploy/Targets/Generic/Templates/iNoNormTemplate.py b/Deeploy/Targets/Generic/Templates/iNoNormTemplate.py
@@ -0,0 +1,38 @@
+# ----------------------------------------------------------------------
+#
+# File: iNoNormTemplate.py
+#
+# Last edited: 22.02.2024
+#
+# Copyright (C) 2024, ETH Zurich and University of Bologna.
+#
+# Author:
+# - Victor Jung, [email protected], ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from Deeploy.DeeployTypes import NodeTemplate
+
+
+class _iNoNormTemplate(NodeTemplate):
+
+    def __init__(self, templateStr):
+        super().__init__(templateStr)
+
+
+referenceTemplate = _iNoNormTemplate("""
+// iNoNorm (Name: ${nodeName}, Op: ${nodeOp})
+SnitchiNoNorm_s${data_in_type.referencedType.typeWidth}_s${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${weights}, ${bias}, ${size}, ${mul}, ${log2D});
+""")