Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: buddy-compiler/buddy-benchmark
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: main
Choose a base ref
...
head repository: xlinsist/buddy-benchmark
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: main
Choose a head ref
Can’t automatically merge. Don’t worry, you can still create the pull request.
  • 2 commits
  • 44 files changed
  • 1 contributor

Commits on May 16, 2024

  1. Copy the full SHA
    edfb8fe View commit details
  2. Copy the full SHA
    40e2ca2 View commit details
Showing with 1,459 additions and 792 deletions.
  1. +9 −1 CMakeLists.txt
  2. +16 −0 README.md
  3. +17 −0 benchmarks/OpOptimization/AffineOps/Add2D.mlir
  4. +108 −0 benchmarks/OpOptimization/AffineOps/AffineOpsBenchmark.cpp
  5. +87 −0 benchmarks/OpOptimization/AffineOps/CMakeLists.txt
  6. +28 −0 benchmarks/OpOptimization/AffineOps/Main.cpp
  7. +21 −0 benchmarks/OpOptimization/AffineOps/Matmul2D.mlir
  8. +16 −0 benchmarks/OpOptimization/AffineOps/Reduction.mlir
  9. +3 −2 benchmarks/OpOptimization/CMakeLists.txt
  10. 0 benchmarks/OpOptimization/{Conv2dNchwFchw/CMakeLists.txt → Conv2DNchwFchw/CMakeLists copy.txt}
  11. +95 −0 benchmarks/OpOptimization/Conv2DNchwFchw/CMakeLists.txt
  12. +0 −1 benchmarks/OpOptimization/{Conv2dNchwFchw → Conv2DNchwFchw}/Conv2DNchwFchw.mlir
  13. +14 −14 ...ptimization/{Conv2dNchwFchw/Conv2DNchwFchwBroadcast.mlir → Conv2DNchwFchw/Conv2DNchwFchw128.mlir}
  14. +63 −0 benchmarks/OpOptimization/Conv2DNchwFchw/Conv2DNchwFchw16.mlir
  15. +63 −0 benchmarks/OpOptimization/Conv2DNchwFchw/Conv2DNchwFchw32.mlir
  16. +63 −0 benchmarks/OpOptimization/Conv2DNchwFchw/Conv2DNchwFchw64.mlir
  17. +63 −0 benchmarks/OpOptimization/Conv2DNchwFchw/Conv2DNchwFchw8.mlir
  18. +99 −0 benchmarks/OpOptimization/Conv2DNchwFchw/Conv2DNchwFchwBenchmark.cpp
  19. +16 −16 ...mization/{Conv2dNchwFchw/Conv2DNchwFchwIm2col.mlir → Conv2DNchwFchw/Conv2DNchwFchwIm2col128.mlir}
  20. +78 −0 benchmarks/OpOptimization/Conv2DNchwFchw/Conv2DNchwFchwIm2col16.mlir
  21. +78 −0 benchmarks/OpOptimization/Conv2DNchwFchw/Conv2DNchwFchwIm2col32.mlir
  22. +78 −0 benchmarks/OpOptimization/Conv2DNchwFchw/Conv2DNchwFchwIm2col64.mlir
  23. +78 −0 benchmarks/OpOptimization/Conv2DNchwFchw/Conv2DNchwFchwIm2col8.mlir
  24. +0 −2 benchmarks/OpOptimization/{Conv2dNchwFchw → Conv2DNchwFchw}/Main.cpp
  25. +0 −225 benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBenchmark.cpp
  26. +0 −137 benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwWinagrad.mlir
  27. +0 −102 benchmarks/OpOptimization/MatMul/CMakeLists.txt
  28. +0 −211 benchmarks/OpOptimization/MatMul/MatMulBenchmark.cpp
  29. +0 −55 benchmarks/OpOptimization/MatMul/MatMulTransform.mlir
  30. +72 −0 benchmarks/OpOptimization/Matmul/CMakeLists.txt
  31. +0 −2 benchmarks/OpOptimization/{MatMul → Matmul}/Main.cpp
  32. +1 −1 benchmarks/OpOptimization/{MatMul/MatMul.mlir → Matmul/Matmul.mlir}
  33. +14 −14 benchmarks/OpOptimization/{MatMul/MatMulBroadcast.mlir → Matmul/Matmul128.mlir}
  34. +40 −0 benchmarks/OpOptimization/Matmul/Matmul16.mlir
  35. +40 −0 benchmarks/OpOptimization/Matmul/Matmul32.mlir
  36. +40 −0 benchmarks/OpOptimization/Matmul/Matmul64.mlir
  37. +40 −0 benchmarks/OpOptimization/Matmul/Matmul8.mlir
  38. +61 −0 benchmarks/OpOptimization/Matmul/MatmulBenchmark.cpp
  39. 0 benchmarks/OpOptimization/{MatMul → Matmul}/TVM/.gitignore
  40. 0 benchmarks/OpOptimization/{MatMul → Matmul}/TVM/main.py
  41. 0 benchmarks/OpOptimization/{MatMul → Matmul}/TVM/matmul_autotvm.py
  42. 0 benchmarks/OpOptimization/{MatMul → Matmul}/TVM/matmul_manual.py
  43. +33 −9 cmake/buddy-benchmark.cmake
  44. +25 −0 cmake/riscv-toolchain.cmake
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -44,7 +44,14 @@ include_directories(${BUDDY_THIRDPARTY_INCLUDE_DIR})

# MLIR binary directory.
set(LLVM_MLIR_BINARY_DIR ${BUDDY_MLIR_BUILD_DIR}/../llvm/build/bin)
set(LLVM_MLIR_LIBRARY_DIR ${BUDDY_MLIR_BUILD_DIR}/../llvm/build/lib)
# set(LLVM_MLIR_LIBRARY_DIR ${BUDDY_MLIR_BUILD_DIR}/../llvm/build/lib)

if(CMAKE_TOOLCHAIN_FILE STREQUAL ${BUDDY_SOURCE_DIR}/cmake/riscv-toolchain.cmake)
# Use cross-compiled RISC-V MLIR library.
set(LLVM_MLIR_LIBRARY_DIR ${BUDDY_MLIR_BUILD_DIR}/../llvm/build-cross-mlir/lib)
else()
set(LLVM_MLIR_LIBRARY_DIR ${BUDDY_MLIR_BUILD_DIR}/../llvm/build/lib)
endif()

# Helper functions.
include(${BUDDY_SOURCE_DIR}/cmake/buddy-benchmark.cmake)
@@ -68,6 +75,7 @@ ExternalProject_Add(project_googlebenchmark
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/vendor/benchmark
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DBENCHMARK_ENABLE_TESTING=OFF
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
UPDATE_COMMAND ""
TEST_COMMAND "")

16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -233,8 +233,24 @@ $ ninja <your target operation benchmark>
// Operation benchamrk supported include:
// - conv2d-nchw-fchw-benchmark
// - matmul-benchmark
// - affine-ops-benchmark
```

OpOptimization benchmark also support RISC-V. To set up environments for RISC-V cross compilation and run the benchmarks:
1. Build riscv-gnu-toolchain, QEMU and cross-compiled MLIR according to [this link](https://gist.github.com/zhanghb97/ad44407e169de298911b8a4235e68497).
2. Change the path of `RISCV_TOOLCHAIN_ROOT` in buddy-benchmark/cmake/riscv-toolchain.cmake to where your riscv-gnu-toolchain root directory is built.
3. Cross-compile deep learning benchmark with CMAKE_TOOLCHAIN_FILE being set:
```
$ mkdir build && cd build
$ cmake -G Ninja .. \
-DCMAKE_BUILD_TYPE=RELEASE \
-DOP_OPTIMIZATION_BENCHMARKS=ON \
-DBUDDY_MLIR_BUILD_DIR=/PATH/TO/BUDDY-MLIR/BUILD/
-DCMAKE_TOOLCHAIN_FILE=/PATH/TO/BUDDY-BENCHMARK/cmake/riscv-toolchain.cmake
$ ninja <your target operation benchmark>
```
4. Use RISC-V machine to run the executable files.

Run TVM operation optimization benchmark cases.
- Install TVM ([steps](./thirdparty/README.md#tvm)).
- Enter to your TVM (virtual) environment.
17 changes: 17 additions & 0 deletions benchmarks/OpOptimization/AffineOps/Add2D.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
func.func @add2d(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%M = memref.dim %A, %c0 : memref<?x?xf32>
%N = memref.dim %A, %c1 : memref<?x?xf32>
%f1 = arith.constant 1.0 : f32
%f2 = arith.constant 2.0 : f32
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
%a5 = affine.load %A[%i4, %i5] : memref<?x?xf32>
%b5 = affine.load %B[%i4, %i5] : memref<?x?xf32>
%s5 = arith.addf %a5, %b5 : f32
affine.store %s5, %C[%i4, %i5] : memref<?x?xf32>
}
}
return
}
108 changes: 108 additions & 0 deletions benchmarks/OpOptimization/AffineOps/AffineOpsBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
//===- AffineOpsBenchmark.cpp
//------------------------------------------------===//

#include <benchmark/benchmark.h>
#include <buddy/Core/Container.h>
#include <iostream>
#include <random>

namespace {
extern "C" {

float _mlir_ciface_add2d(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
float _mlir_ciface_add2d_vector_8(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
float _mlir_ciface_add2d_vector_16(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
float _mlir_ciface_add2d_vector_32(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
float _mlir_ciface_add2d_vector_64(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
float _mlir_ciface_add2d_vector_128(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);

void _mlir_ciface_reduction(MemRef<float, 2> *A, MemRef<float, 1> *B);
void _mlir_ciface_reduction_vector_8(MemRef<float, 2> *A, MemRef<float, 1> *B);
void _mlir_ciface_reduction_vector_16(MemRef<float, 2> *A, MemRef<float, 1> *B);
void _mlir_ciface_reduction_vector_32(MemRef<float, 2> *A, MemRef<float, 1> *B);
void _mlir_ciface_reduction_vector_64(MemRef<float, 2> *A, MemRef<float, 1> *B);
void _mlir_ciface_reduction_vector_128(MemRef<float, 2> *A,
MemRef<float, 1> *B);

void _mlir_ciface_matmul2d(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
void _mlir_ciface_matmul2d_vector_8(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
void _mlir_ciface_matmul2d_vector_16(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
void _mlir_ciface_matmul2d_vector_32(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
void _mlir_ciface_matmul2d_vector_64(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
void _mlir_ciface_matmul2d_vector_128(MemRef<float, 2> *A, MemRef<float, 2> *B,
MemRef<float, 2> *C);
}

#define DefineAdd2DBenchmark(name, func) \
void name(benchmark::State &state) { \
int M = 100000, N = 128; \
intptr_t sizes[2] = {M, N}; \
MemRef<float, 2> A(sizes, 1.0); \
MemRef<float, 2> B(sizes, 1.0); \
MemRef<float, 2> C(sizes, 0.0); \
for (auto _ : state) { \
func(&A, &B, &C); \
} \
}

#define DefineReductionBenchmark(name, func) \
void name(benchmark::State &state) { \
int M = 100000, N = 128; \
intptr_t sizesA[2] = {M, N}; \
intptr_t sizesB[1] = {M}; \
MemRef<float, 2> A(sizesA, 1.0); \
MemRef<float, 1> B(sizesB, 0.0); \
for (auto _ : state) { \
func(&A, &B); \
} \
}

#define DefineMatmul2DBenchmark(name, func) \
void name(benchmark::State &state) { \
int M = 128, N = 128, K = 128; \
intptr_t sizesA[2] = {M, K}; \
intptr_t sizesB[2] = {K, N}; \
intptr_t sizesC[2] = {M, N}; \
MemRef<float, 2> A(sizesA, 1.0); \
MemRef<float, 2> B(sizesB, 1.0); \
MemRef<float, 2> C(sizesC, 0.0); \
for (auto _ : state) { \
func(&A, &B, &C); \
} \
}

#define RUN_BENCHMARK(name, func) \
Define##name##Benchmark(name, func) BENCHMARK(name)->Unit( \
benchmark::kMillisecond); \
Define##name##Benchmark(name##Vector8, func##_vector_8) \
BENCHMARK(name##Vector8) \
->Unit(benchmark::kMillisecond); \
Define##name##Benchmark(name##Vector16, func##_vector_16) \
BENCHMARK(name##Vector16) \
->Unit(benchmark::kMillisecond); \
Define##name##Benchmark(name##Vector32, func##_vector_32) \
BENCHMARK(name##Vector32) \
->Unit(benchmark::kMillisecond); \
Define##name##Benchmark(name##Vector64, func##_vector_64) \
BENCHMARK(name##Vector64) \
->Unit(benchmark::kMillisecond); \
Define##name##Benchmark(name##Vector128, func##_vector_128) \
BENCHMARK(name##Vector128) \
->Unit(benchmark::kMillisecond);

RUN_BENCHMARK(Add2D, _mlir_ciface_add2d)
RUN_BENCHMARK(Reduction, _mlir_ciface_reduction)
RUN_BENCHMARK(Matmul2D, _mlir_ciface_matmul2d)

} // namespace
87 changes: 87 additions & 0 deletions benchmarks/OpOptimization/AffineOps/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
function(add_e2e_custom_command output_file mlir_file sed_command mlir_opt_flags mlir_translate_flags)
if(CMAKE_TOOLCHAIN_FILE STREQUAL ${BUDDY_SOURCE_DIR}/cmake/riscv-toolchain.cmake)
# Generate RISC-V object file.
set(target_triple "riscv64")
set(target_attr "+m,+d,+v")
set(target_attr_additional "--target-abi=lp64d" "--relocation-model=pic" "-riscv-v-vector-bits-min=128")
else()
# Generate x86 object file.
set(target_triple "${BUDDY_OPT_TRIPLE}")
set(target_attr "${BUDDY_OPT_ATTR}")
endif()

set(mlir_opt_commands "${LLVM_MLIR_BINARY_DIR}/mlir-opt")
foreach(flag IN LISTS mlir_opt_flags)
list(APPEND mlir_opt_commands "${flag}")
endforeach()

add_custom_command(OUTPUT ${output_file}
COMMAND cat ${BUDDY_SOURCE_DIR}/benchmarks/OpOptimization/AffineOps/${mlir_file} |
sed '${sed_command}' | ${mlir_opt_commands} |
${LLVM_MLIR_BINARY_DIR}/mlir-translate ${mlir_translate_flags} |
${LLVM_MLIR_BINARY_DIR}/llc -O0 -mtriple=${target_triple} -mattr=${target_attr}
${target_attr_additional} --filetype=obj
-o ${BUDDY_BINARY_DIR}/../benchmarks/OpOptimization/AffineOps/${output_file}
)
endfunction()

function(build_function_vector func_name step)
string(TOLOWER ${func_name} func_name_lower_case)
add_e2e_custom_command(
"${func_name}${step}.o"
"${func_name}.mlir"
"s/${func_name_lower_case}/${func_name_lower_case}_vector_${step}/g"
"-affine-super-vectorize=\"virtual-vector-size=${step};vectorize-reductions=true\";-convert-vector-to-scf;-lower-affine;-convert-scf-to-cf;-convert-vector-to-llvm;-finalize-memref-to-llvm;-llvm-request-c-wrappers;-convert-func-to-llvm;-reconcile-unrealized-casts"
"--mlir-to-llvmir"
)
add_library(${func_name}Vector${step} STATIC "${func_name}${step}.o")
target_link_libraries(${func_name}Vector${step} -static)
set_target_properties(${func_name}Vector${step} PROPERTIES LINKER_LANGUAGE CXX)
endfunction()

function(build_function_all func_name)
add_e2e_custom_command(
"${func_name}.o"
"${func_name}.mlir"
""
"-lower-affine;-convert-scf-to-cf;-convert-vector-to-llvm;-finalize-memref-to-llvm;-llvm-request-c-wrappers;-convert-func-to-llvm;-reconcile-unrealized-casts"
"--mlir-to-llvmir"
)
add_library(${func_name} STATIC "${func_name}.o")
target_link_libraries(${func_name} -static)
set_target_properties(${func_name} PROPERTIES LINKER_LANGUAGE CXX)

build_function_vector(${func_name} 8)
build_function_vector(${func_name} 16)
build_function_vector(${func_name} 32)
build_function_vector(${func_name} 64)
build_function_vector(${func_name} 128)
endfunction()

build_function_all("Add2D")
build_function_all("Reduction")
build_function_all("Matmul2D")

add_executable(affine-ops-benchmark Main.cpp AffineOpsBenchmark.cpp)
target_link_libraries(affine-ops-benchmark -static
GoogleBenchmark
Add2D
Add2DVector8
Add2DVector16
Add2DVector32
Add2DVector64
Add2DVector128
Reduction
ReductionVector8
ReductionVector16
ReductionVector32
ReductionVector64
ReductionVector128
Matmul2D
Matmul2DVector8
Matmul2DVector16
Matmul2DVector32
Matmul2DVector64
Matmul2DVector128
)

28 changes: 28 additions & 0 deletions benchmarks/OpOptimization/AffineOps/Main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//===- Main.cpp -----------------------------------------------------------===//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
//
// This is the main file of the matmul benchmark.
//
//===----------------------------------------------------------------------===//

#include <benchmark/benchmark.h>

int main(int argc, char **argv) {
// Run benchmark.
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
return 0;
}
21 changes: 21 additions & 0 deletions benchmarks/OpOptimization/AffineOps/Matmul2D.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
func.func @matmul2d(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%M = memref.dim %arg0, %c0 : memref<?x?xf32>
%K = memref.dim %arg0, %c1 : memref<?x?xf32>
%N = memref.dim %arg2, %c1 : memref<?x?xf32>

affine.for %i2 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%M) {
affine.for %i3 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%N) {
affine.for %i4 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%K) {
%6 = affine.load %arg1[%i4, %i3] : memref<?x?xf32>
%7 = affine.load %arg0[%i2, %i4] : memref<?x?xf32>
%8 = arith.mulf %7, %6 : f32
%9 = affine.load %arg2[%i2, %i3] : memref<?x?xf32>
%10 = arith.addf %9, %8 : f32
affine.store %10, %arg2[%i2, %i3] : memref<?x?xf32>
}
}
}
return
}
16 changes: 16 additions & 0 deletions benchmarks/OpOptimization/AffineOps/Reduction.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
func.func @reduction(%in: memref<?x?xf32>, %out: memref<?xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%M = memref.dim %in, %c0 : memref<?x?xf32>
%N = memref.dim %in, %c1 : memref<?x?xf32>
%cst = arith.constant 0.000000e+00 : f32
affine.for %i = 0 to %M {
%final_red = affine.for %j = 0 to %N iter_args(%red_iter = %cst) -> (f32) {
%ld = affine.load %in[%i, %j] : memref<?x?xf32>
%add = arith.addf %red_iter, %ld : f32
affine.yield %add : f32
}
affine.store %final_red, %out[%i] : memref<?xf32>
}
return
}
5 changes: 3 additions & 2 deletions benchmarks/OpOptimization/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
add_subdirectory(Conv2dNchwFchw)
add_subdirectory(MatMul)
add_subdirectory(Conv2DNchwFchw)
add_subdirectory(Matmul)
add_subdirectory(AffineOps)
Loading