onnx · AlexandreEichenberger · Jan 8, 2025 · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
diff --git a/docker/Dockerfile.onnx-mlir b/docker/Dockerfile.onnx-mlir
@@ -26,7 +26,7 @@ RUN ONNX_ROOT=${WORK_DIR}/onnx-mlir/third_party/onnx \
 ARG NPROC=4
 ARG ACCEL=NNPA
 ARG TEST_NOFLOAT16
-ARG TEST_MCPU
+ARG TEST_MARCH
 ARG KEEPSRC
 
 RUN LLVM_PROJECT_ROOT=${WORK_DIR}/llvm-project \
@@ -53,21 +53,21 @@ RUN LLVM_PROJECT_ROOT=${WORK_DIR}/llvm-project \
                                          ([ "$(uname -m)" = "x86_64" ] &&  echo true || \
                                          ([ "$(uname -m)" = "ppc64le" ] && echo || echo)))} \
 # User image is built with SIMD (currently on s390x only)
-    && TEST_MCPU=${TEST_MCPU:-$([ "$(uname -m)" = "s390x" ] && echo z16 || \
+    && TEST_MARCH=${TEST_MARCH:-$([ "$(uname -m)" = "s390x" ] && echo z16 || \
                                ([ "$(uname -m)" = "x86_64" ] &&  echo || \
                                ([ "$(uname -m)" = "ppc64le" ] && echo || echo)))} \
-    && TEST_ARGS="-mcpu=${TEST_MCPU}" \
+    && TEST_ARGS="-march=${TEST_MARCH}" \
     && make check-docs \
     && make check-unittest \
     && make check-multiple-models \
     && make NPROC=${NPROC} \
             CTEST_PARALLEL_LEVEL=${NPROC} \
             TEST_NOFLOAT16=${TEST_NOFLOAT16} \
-            TEST_MCPU=${TEST_MCPU} \
+            TEST_MARCH=${TEST_MARCH} \
             TEST_ARGS="${TEST_ARGS}" \
             -j${NPROC} \
             check-onnx-backend-numerical \
-    && if [ "${TEST_MCPU}" = "z16" ]; then \
+    && if [ "${TEST_MARCH}" = "z16" ]; then \
           make NPROC=${NPROC} \
                CTEST_PARALLEL_LEVEL=${NPROC} \
                -j${NPROC} \

diff --git a/docker/Dockerfile.onnx-mlir-dev b/docker/Dockerfile.onnx-mlir-dev
@@ -20,7 +20,7 @@ RUN ONNX_ROOT=${WORK_DIR}/onnx-mlir/third_party/onnx \
 ARG NPROC=4
 ARG ACCEL=NNPA
 ARG TEST_NOFLOAT16
-ARG TEST_MCPU
+ARG TEST_MARCH
 
 RUN LLVM_PROJECT_ROOT=${WORK_DIR}/llvm-project \
     && ONNX_MLIR_ROOT=${WORK_DIR}/onnx-mlir \
@@ -51,18 +51,18 @@ RUN LLVM_PROJECT_ROOT=${WORK_DIR}/llvm-project \
                                          ([ "$(uname -m)" = "x86_64" ] &&  echo true || \
                                          ([ "$(uname -m)" = "ppc64le" ] && echo || echo)))} \
 # Dev image is built without SIMD, placeholder for easy SIMD enablement
-    && TEST_MCPU=$([ "$(uname -m)" = "s390x" ] && echo || \
+    && TEST_MARCH=$([ "$(uname -m)" = "s390x" ] && echo || \
                   ([ "$(uname -m)" = "x86_64" ] &&  echo || \
                   ([ "$(uname -m)" = "ppc64le" ] && echo || echo))) \
-    && TEST_ARGS="-mcpu=${TEST_MCPU}" \
+    && TEST_ARGS="-march=${TEST_MARCH}" \
     && TEST_OPTLEVEL=0 \
     && make check-docs \
     && make check-unittest \
     && make check-multiple-models \
     && make NPROC=${NPROC} \
             CTEST_PARALLEL_LEVEL=${NPROC} \
             TEST_NOFLOAT16=${TEST_NOFLOAT16} \
-            TEST_MCPU=${TEST_MCPU} \
+            TEST_MARCH=${TEST_MARCH} \
             TEST_ARGS="${TEST_ARGS}" \
             TEST_OPTLEVEL=${TEST_OPTLEVEL} \
             -j${NPROC} \

diff --git a/docs/DebuggingNumericalError.md b/docs/DebuggingNumericalError.md
@@ -65,7 +65,7 @@ optional arguments:
 ## Helper script to compare a model under two distinct compile option.
 
 Based on the above `utils/runONNXModel.py`, the `utils/checkONNXModel.py` allows a user to run a given model twice, under two distinct compile options, and compare its results.
-This let a user simply test a new option, comparing the safe version of the compiler (e.g. `-O0` or `-O3`) with a more advanced version (e.g. `-O3` or `-O3 -march=x86-64`). Simply specify the compile options using the `--ref-compile-args` and `--test-compile-args` flags, a model using the `--model` flag, and possibly a `--shape-info` in presence of dynamic shape inputs.
+This let a user simply test a new option, comparing the safe version of the compiler (e.g. `-O0` or `-O3`) with a more advanced version (e.g. `-O3` or `-O3 --march=x86-64`). Simply specify the compile options using the `--ref-compile-args` and `--test-compile-args` flags, a model using the `--model` flag, and possibly a `--shape-info` in presence of dynamic shape inputs.
 Full options are listed under the `--help` flag.
 
 ## Debugging the Code Generated for an Operator.

diff --git a/docs/Dialects/zhigh.md b/docs/Dialects/zhigh.md
diff --git a/docs/Dialects/zlow.md b/docs/Dialects/zlow.md
@@ -342,6 +342,52 @@ Interfaces: `MemoryEffectOpInterface`
 | `shape` | memref of 64-bit signless integer values
 | `hn_output` | memref of dlfloat16 type values
 
+### `zlow.gelu` (::onnx_mlir::zlow::ZLowGeluOp)
+
+_ZLow gelu operation_
+
+ZLow operation to perform a gelu.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
+### `zlow.invsqrt` (::onnx_mlir::zlow::ZLowInvSqrtOp)
+
+_ZLow invsqrt operation_
+
+ZLow operation to perform a invsqrt.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
 ### `zlow.lstm` (::onnx_mlir::zlow::ZLowLSTMOp)
 
 _ZLow lstm operation_
@@ -387,6 +433,30 @@ Interfaces: `MemoryEffectOpInterface`
 | `hn_output` | memref of dlfloat16 type values
 | `cf_output` | memref of dlfloat16 type values
 
+### `zlow.leakyrelu` (::onnx_mlir::zlow::ZLowLeakyReluOp)
+
+_ZLow leakyrelu operation_
+
+ZLow operation to perform a leakyrelu.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>alpha</code></td><td>::mlir::FloatAttr</td><td>32-bit float attribute</td></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
 ### `zlow.log` (::onnx_mlir::zlow::ZLowLogOp)
 
 _ZLow log operation_
@@ -423,14 +493,18 @@ shape is a 1D MemRef (memref<3xi64>) whose items are:
   * 2nd item: n
   * 3rd item: p
 * In case of stacked: X(s, m, n) * Y(s, n, p) + Bias(s, p)
-     or broadcasting: X(s, m, n) * Y(n, p) + Bias(p)
+     or broadcasting1: X(m, n) * Y(s, n, p) + Bias(s, p)
+     or broadcasting23: X(s, m, n) * Y(n, p) + Bias(p)
 shape is a 1D MemRef (memref<4xi64>) whose items are:
   * 1st item: s
   * 2nd item: m
   * 3rd item: n
   * 4th item: p
-* is_bcast: -1 broadcasting, 0: no broadcasting.
+* is_bcast1:  -1 broadcasting1, 0: no broadcasting1.
+* is_bcast23: -1 broadcasting23, 0: no broadcasting23.
 * is_stacked: -1 stacked, 0: unstacked.
+* transposeA: !0 transpose A, 0: do not transpose A.
+* transposeB: !0 transpose B, 0: do not transpose B.
 
 Traits: `MemRefsNormalizable`
 
@@ -440,8 +514,11 @@ Interfaces: `MemoryEffectOpInterface`
 
 <table>
 <tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
-<tr><td><code>is_bcast</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>is_bcast1</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>is_bcast23</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
 <tr><td><code>is_stacked</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>transposeA</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>transposeB</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
 </table>
 
 #### Operands:
@@ -592,6 +669,144 @@ Interfaces: `MemoryEffectOpInterface`
 | `shape` | memref of 64-bit signless integer values
 | `Out` | memref of dlfloat16 type values
 
+### `zlow.quantizedMatmul` (::onnx_mlir::zlow::ZLowQuantizedMatMulOp)
+
+_ZLow quantized matmul operation_
+
+ZLow operation to perform a matmul.
+work_area: a 4K-aligned buffer having the same layout as bias but dlfloat16 type.
+* In case of unstacked: X(m, n) * Y(n, p) + Bias(p)
+shape is a 1D MemRef (memref<3xi64>) whose items are:
+  * 1st item: m
+  * 2nd item: n
+  * 3rd item: p
+* In case of stacked: X(s, m, n) * Y(s, n, p) + Bias(s, p)
+     or broadcasting: X(s, m, n) * Y(n, p) + Bias(p)
+shape is a 1D MemRef (memref<4xi64>) whose items are:
+  * 1st item: s
+  * 2nd item: m
+  * 3rd item: n
+  * 4th item: p
+* is_bcast: -1 broadcasting, 0: no broadcasting.
+* is_stacked: -1 stacked, 0: unstacked.
+* DequantizeOutput: -1 output is dequantized, 0: output is not dequantized.
+* PreComputedBias: -1 bias is re-computed, 0: bias is not pre-computed.
+
+Values for `q_type` are "DLFLOAT16", "INT8", "WEIGHTS", "UNDEFINED".
+
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>x_q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>y_q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>bias_q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>out_q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>is_bcast</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>is_stacked</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>pre_computed_bias</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>disable_clipping</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>dequantize_output</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type or 8-bit signless integer values
+| `x_rec_scale` | 0D memref of 32-bit float values
+| `x_offset` | 0D memref of 32-bit float values
+| `Y` | memref of dlfloat16 type or 8-bit signless integer values
+| `y_rec_scale` | 0D memref of 32-bit float values
+| `y_offset` | 0D memref of 32-bit float values
+| `Bias` | memref of dlfloat16 type or 8-bit signless integer values
+| `bias_rec_scale` | 0D memref of 32-bit float values
+| `bias_offset` | 0D memref of 32-bit float values
+| `work_area` | memref of dlfloat16 type or 8-bit signless integer values or none type
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type or 8-bit signless integer values
+| `out_rec_scale` | 0D memref of 32-bit float values
+| `out_offset` | 0D memref of 32-bit float values
+
+### `zlow.quantizedStick` (::onnx_mlir::zlow::ZLowQuantizedStickOp)
+
+_ZLow stick operation for quantization_
+
+"ZLow operation to perform a quantization stick."
+"Type is one of values: dlfloat16, int8, and weights."
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of 8-bit signless integer or 32-bit float values
+| `rec_scale` | 0D memref of 32-bit float values
+| `offset` | 0D memref of 32-bit float values
+| `out` | memref of dlfloat16 type or 8-bit signless integer values
+
+### `zlow.reducemax` (::onnx_mlir::zlow::ZLowReduceMaxOp)
+
+_ZLow reducemax operation_
+
+ZLow operation to perform a reducemax.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>op_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `work_area` | memref of 8-bit signless integer values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
+### `zlow.reducemin` (::onnx_mlir::zlow::ZLowReduceMinOp)
+
+_ZLow reducemin operation_
+
+ZLow operation to perform a reducemin.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>op_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `work_area` | memref of 8-bit signless integer values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
 ### `zlow.relu` (::onnx_mlir::zlow::ZLowReluOp)
 
 _ZLow relu operation_
@@ -670,6 +885,29 @@ Interfaces: `MemoryEffectOpInterface`
 | `shape` | memref of 64-bit signless integer values
 | `Out` | memref of dlfloat16 type values
 
+### `zlow.sqrt` (::onnx_mlir::zlow::ZLowSqrtOp)
+
+_ZLow sqrt operation_
+
+ZLow operation to perform a sqrt.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
 ### `zlow.stickForGRU` (::onnx_mlir::zlow::ZLowStickForGRUOp)
 
 _ZLow stick operation for GRU_

diff --git a/docs/Instrumentation.md b/docs/Instrumentation.md
@@ -61,11 +61,11 @@ The output for the memory measurement is explained here.
 
 Other example for NNPA
 - Performance profiling for onnx ops before lowering to zhigh ops:
-  `onnx-mlir --mcpu=z16 --maccel=NNPA --instrument-stage=Onnx --instrument-ops=onnx.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime mymodel.onnx`
+  `onnx-mlir --march=z16 --maccel=NNPA --instrument-stage=Onnx --instrument-ops=onnx.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime mymodel.onnx`
 - Performance profiling for onnx and zhigh ops:
-  `onnx-mlir --mcpu=z16 --maccel=NNPA --instrument-stage=ZHigh --instrument-ops=onnx.*,zhigh.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime mymodel.onnx`
+  `onnx-mlir --march=z16 --maccel=NNPA --instrument-stage=ZHigh --instrument-ops=onnx.*,zhigh.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime mymodel.onnx`
 - Performance profiling for zlow ops:
-  `onnx-mlir --mcpu=z16 --maccel=NNPA --instrument-stage=ZLow --instrument-ops=zlow.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime mymodel.onnx`
+  `onnx-mlir --march=z16 --maccel=NNPA --instrument-stage=ZLow --instrument-ops=zlow.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime mymodel.onnx`
 
 ## Control instrument at runtime
 By providing certain env variable at runtime, you can disable reports from  instrument library.