Skip to content

Commit

Permalink
Add a framework for NNPA op placement heuristics (#2541)
Browse files Browse the repository at this point in the history
Signed-off-by: Alexandre Eichenberger <[email protected]>
  • Loading branch information
AlexandreEichenberger authored Oct 12, 2023
1 parent 1ccfbfc commit 4c213b7
Show file tree
Hide file tree
Showing 15 changed files with 1,085 additions and 320 deletions.
17 changes: 12 additions & 5 deletions src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,17 @@ llvm::cl::opt<std::string> nnpaSaveDevicePlacementFile{
llvm::cl::desc("Save device placement configuration to a JSON file."),
llvm::cl::init(""), llvm::cl::cat(OnnxMlirOptions)};

llvm::cl::opt<bool> nnpaEnableZHighPerfModel("enable-zhigh-perf-model",
llvm::cl::desc("Enabling performance cost model to estimate if ONNX "
"operations will be faster on the NNPA or the CPU. Works "
"best with static shapes. Default is false."),
llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions));
llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic{
"nnpa-placement-heuristic",
llvm::cl::desc(
"[Optional] Choose NNPA-related heuristic to place operations "
"on NNPA device:"),
llvm::cl::values(
clEnumVal(QualifyingOps, "Place all qualifying ops on NNPA (default)"),
clEnumVal(FasterOps, "Place qualifying ops that are faster on NNPA"),
clEnumVal(FasterOpsWSU, "FasterOps with stick/unstick cost"),
clEnumVal(MuchFasterOpsWSU,
"Much/Significantly FasterOps with stick/unstick cost")),
llvm::cl::init(QualifyingOps), llvm::cl::cat(OnnxMlirOptions)};

} // namespace onnx_mlir
9 changes: 8 additions & 1 deletion src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,18 @@ typedef enum {
EmitZHighIR,
} NNPAEmissionTargetType;

typedef enum {
QualifyingOps, /* Any ops that qualify for NNPA will go on NNPA. */
FasterOps, /* Only qualifying ops that are faster on NNPA */
FasterOpsWSU, /* FasterOps with With Stick and Unstick (WSU) cost.*/
MuchFasterOpsWSU, /* FasterOpsWSU only if significantly faster. */
} NNPAPlacementHeuristic;

extern llvm::cl::OptionCategory OnnxMlirOptions;
extern llvm::cl::opt<onnx_mlir::NNPAEmissionTargetType> nnpaEmissionTarget;
extern llvm::cl::opt<bool> nnpaClipToDLFloatRange;
extern llvm::cl::opt<bool> nnpaEnableZHighToOnnx;
extern llvm::cl::opt<bool> nnpaEnableZHighPerfModel;
extern llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic;
extern llvm::cl::opt<bool> profileZHighIR;
extern llvm::cl::opt<std::string> nnpaLoadDevicePlacementFile;
extern llvm::cl::opt<std::string> nnpaSaveDevicePlacementFile;
Expand Down
2 changes: 1 addition & 1 deletion src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
if (emissionTarget >= EmitONNXIR) {
addONNXToMLIRPasses(pm, /*target CPU*/ maccel.empty());
pm.addPass(onnx_mlir::createDevicePlacementPass(nnpaLoadDevicePlacementFile,
nnpaSaveDevicePlacementFile, nnpaEnableZHighPerfModel));
nnpaSaveDevicePlacementFile, nnpaPlacementHeuristic));
}

if (emissionTarget >= EmitMLIR) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ add_onnx_mlir_library(OMZHighToONNX

add_onnx_mlir_library(OMDevicePlacement
DevicePlacement.cpp
DevicePlacementHeuristic.cpp
PerfModel.cpp

DEPENDS
Expand Down
70 changes: 42 additions & 28 deletions src/Accelerators/NNPA/Conversion/ONNXToZHigh/DevicePlacement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@
#include "llvm/Support/JSON.h"
#include "llvm/Support/MemoryBuffer.h"

#include "src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp"
#include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/DevicePlacementHeuristic.hpp"
#include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.hpp"
#include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp"
#include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/PerfModel.hpp"
#include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/RewriteONNXForZHigh.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
#include "src/Pass/Passes.hpp"
Expand All @@ -60,12 +61,14 @@ struct DevicePlacementPass

DevicePlacementPass() = default;
DevicePlacementPass(const DevicePlacementPass &pass)
: PassWrapper<DevicePlacementPass, OperationPass<ModuleOp>>() {}
: PassWrapper<DevicePlacementPass, OperationPass<ModuleOp>>() {
this->placementHeuristic = QualifyingOps;
}
DevicePlacementPass(std::string loadConfigFile, std::string saveConfigFile,
bool useZHighPerfModel) {
NNPAPlacementHeuristic placementHeuristic) {
this->loadConfigFile = loadConfigFile;
this->saveConfigFile = saveConfigFile;
this->useZHighPerfModel = useZHighPerfModel;
this->placementHeuristic = placementHeuristic;
}

StringRef getArgument() const override { return "device-placement"; }
Expand All @@ -82,9 +85,28 @@ struct DevicePlacementPass
llvm::cl::desc("Path to load a device configuration file in JSON format"),
llvm::cl::init("")};

Option<bool> useZHighPerfModel{*this, "use-zhigh-perf-model",
llvm::cl::desc("Enable ZHigh cost model for ops on NNPA vs CPU"),
// Placement heuristic switches (policy driven by placementHeuristic).
NNPAPlacementHeuristic placementHeuristic;
// Option useXXX listed in decreasing order of priority, if multiple are
// selected.
Option<bool> useMuchFasterWithStickOps{*this, "use-much-faster-wsu",
llvm::cl::desc("Enable FasterOpsWithStickUnstick NNPAPlacementHeuristic"),
llvm::cl::init(false)};
Option<bool> useFasterWithStickOps{*this, "use-faster-wsu",
llvm::cl::desc("Enable FasterOpsWithStickUnstick NNPAPlacementHeuristic"),
llvm::cl::init(false)};
Option<bool> useFasterOps{*this, "use-faster",
llvm::cl::desc("Enable FasterOps NNPAPlacementHeuristic"),
llvm::cl::init(false)};
// Method to override placement using useXXX flags
void initPlacementHeuristic() {
if (useMuchFasterWithStickOps)
placementHeuristic = MuchFasterOpsWSU;
else if (useFasterWithStickOps)
placementHeuristic = FasterOpsWSU;
else if (useFasterOps)
placementHeuristic = FasterOps;
}

void runOnOperation() final;

Expand Down Expand Up @@ -189,26 +211,18 @@ void DevicePlacementPass::runOnOperation() {
OpSetType cpuOps = llvm::set_intersection(
legalizedOps1, llvm::set_intersection(legalizedOps2, legalizedOps3));

// Now annotate accelerator operations in the IR with `device` attribute,
// according to the compiler decision.
for (Operation *op : ops) {
// Set device if it is empty or unavailable.
StringAttr device = op->getAttrOfType<mlir::StringAttr>(DEVICE_ATTRIBUTE);
if (device && !device.getValue().empty())
continue;
// Op that is legal (should remain on the CPU) as determined by compiler
// analysis.
if (cpuOps.contains(op))
continue;
// Now we have an operation that can work on the NNPA, check if its
// beneficial
if (useZHighPerfModel && !isOpFasterOnNNPA(op, &dimAnalysis)) {
op->setAttr(DEVICE_ATTRIBUTE, StringAttr::get(context, CPU_DEVICE));
continue;
}
// Compiler determined that we want this op on the NNPA, mark as such.
op->setAttr(DEVICE_ATTRIBUTE, StringAttr::get(context, NNPA_DEVICE));
}
initPlacementHeuristic();
if (placementHeuristic == QualifyingOps)
PlaceAllLegalOpsOnNNPA(context, ops, cpuOps);
else if (placementHeuristic == FasterOps)
PlaceBeneficialOpsOnNNPA(context, ops, &dimAnalysis, cpuOps);
else if (placementHeuristic == FasterOpsWSU)
PlaceBeneficialOpsOnNNPAWithStickUnstick(
context, module, ops, &dimAnalysis, cpuOps);
else if (placementHeuristic == MuchFasterOpsWSU)
PlaceBeneficialOpsOnNNPAWithStickUnstick(context, module, ops, &dimAnalysis,
cpuOps, /*min factor*/ 3.0, /*significant CPU Factor*/ 2.0,
/*significant NNPA Factor*/ 8.0);

// Create a JSON configuration file if required.
if (!saveConfigFile.empty())
Expand Down Expand Up @@ -306,9 +320,9 @@ std::unique_ptr<mlir::Pass> createDevicePlacementPass() {

std::unique_ptr<mlir::Pass> createDevicePlacementPass(
std::string loadConfigFile, std::string saveConfigFile,
bool useZHighPerfModel) {
NNPAPlacementHeuristic placementHeuristic) {
return std::make_unique<DevicePlacementPass>(
loadConfigFile, saveConfigFile, useZHighPerfModel);
loadConfigFile, saveConfigFile, placementHeuristic);
}

} // namespace onnx_mlir
Loading

0 comments on commit 4c213b7

Please sign in to comment.