diff --git a/include/onnx-mlir/Compiler/OMCompilerRuntimeTypes.h b/include/onnx-mlir/Compiler/OMCompilerRuntimeTypes.h index 476d0e6927..131064f51e 100644 --- a/include/onnx-mlir/Compiler/OMCompilerRuntimeTypes.h +++ b/include/onnx-mlir/Compiler/OMCompilerRuntimeTypes.h @@ -30,7 +30,8 @@ typedef enum { InstrumentBeforeOp = 0x0, InstrumentAfterOp = 0x1, InstrumentReportTime = 0x2, - InstrumentReportMemory = 0x4, + InstrumentReportMemory = 0x3, + InstrumentInit = 0x4, } InstrumentActions; /* Definition of setter/getter from a 64 bit unsigned int. Use 64 bit only to @@ -67,6 +68,11 @@ typedef enum { #define IS_INSTRUMENT_REPORT_MEMORY(x) \ ((x) & (0x1ull << (unsigned int)InstrumentReportMemory)) +#define SET_INSTRUMENT_INIT(x) \ + (x) = (x) | (0x1ull << (unsigned int)InstrumentInit) +#define IS_INSTRUMENT_INIT(x) \ + ((x) & (0x1ull << (unsigned int)InstrumentInit)) + /* Second - third byte. */ #define INSTRUMENT_OP_NAME_MASK 0x3Full /* Max 64 chars. */ #define SET_INSTRUMENT_OP_NAME_LEN(x, len) \ diff --git a/src/Accelerators/Accelerator.hpp b/src/Accelerators/Accelerator.hpp index aef158af8e..9e71118416 100644 --- a/src/Accelerators/Accelerator.hpp +++ b/src/Accelerators/Accelerator.hpp @@ -108,8 +108,8 @@ class Accelerator { //===--------------------------------------------------------------------===// /// Convert TensorType to MemRefType. - /// Acccelators may have special versions of TensorType. If not, override this - /// method and return nullptr. + /// Accelerators may have special versions of TensorType. If not, override + /// this method and return nullptr. virtual mlir::MemRefType convertTensorTypeToMemRefType( const mlir::TensorType tensorType) const = 0; diff --git a/src/Runtime/OMInstrument.inc b/src/Runtime/OMInstrument.inc index 92cae0c656..93eeb6fef6 100644 --- a/src/Runtime/OMInstrument.inc +++ b/src/Runtime/OMInstrument.inc @@ -165,7 +165,7 @@ FILE *getInstrumentFile() { fout = stdout; if (getenv("ONNX_MLIR_INSTRUMENT_FILE")) { char *fileName = getenv("ONNX_MLIR_INSTRUMENT_FILE"); - FILE *newFileHandle = fopen(fileName, "w"); + FILE *newFileHandle = fopen(fileName, "w+"); if (newFileHandle) { fout = newFileHandle; } @@ -193,28 +193,26 @@ void OMInstrumentInit() { if (!instrumentReportDisabled) { TimeInit(); } + + fprintf(fout, "==START-REPORT==\n"); } void OMInstrumentPoint(const char *opName, int64_t iTag, const char *nodeName) { if (instrumentReportDisabled) return; - // Initialize on first call. Would prefer to call explicitly, but there is - // currently no support for that. - static bool firstTime = true; - if (firstTime) { - OMInstrumentInit(); - firstTime = false; - } - // Detect which reporting we have to do here. uint64_t tag = iTag; + bool initInstrument = IS_INSTRUMENT_INIT(tag); bool isBefore = IS_INSTRUMENT_BEFORE_OP(tag); bool reportTime = !instrumentReportTimeDisabled && IS_INSTRUMENT_REPORT_TIME(tag); bool reportMem = !instrumentReportMemoryDisabled && IS_INSTRUMENT_REPORT_MEMORY(tag); + if (initInstrument) + OMInstrumentInit(); + if (!reportTime && !reportMem) { fprintf(fout, "==TICK-REPORT==, %i\n", instrumentCounter++); return; diff --git a/src/Transform/ONNX/InstrumentPass.cpp b/src/Transform/ONNX/InstrumentPass.cpp index 15960e8d2b..07c72c1bde 100644 --- a/src/Transform/ONNX/InstrumentPass.cpp +++ b/src/Transform/ONNX/InstrumentPass.cpp @@ -132,6 +132,7 @@ class InstrumentPass if (instrumentOps == "" || instrumentOps == "NONE") return; init(instrumentOps); + bool hasInitializedRuntime = false; // Iterate on the operations nested in this function getOperation().walk([&](mlir::Operation *op) -> WalkResult { @@ -154,13 +155,24 @@ class InstrumentPass if (std::regex_match(opName, re)) { Location loc = op->getLoc(); OpBuilder opBuilder(op); - if (instrumentBefore) - opBuilder.create(loc, op, beforeTag()); + if (instrumentBefore) { + uint64_t tag = beforeTag(); + if (!hasInitializedRuntime) { + SET_INSTRUMENT_INIT(tag); + hasInitializedRuntime = true; + } + opBuilder.create(loc, op, tag); + } // Can not insert after Op (e.g. ONNXYieldOP) with IsTerminator Trait if (instrumentAfter && !op->hasTrait()) { opBuilder.setInsertionPointAfter(op); - opBuilder.create(loc, op, afterTag()); + uint64_t tag = afterTag(); + if (!hasInitializedRuntime) { + SET_INSTRUMENT_INIT(tag); + hasInitializedRuntime = true; + } + opBuilder.create(loc, op, tag); } } } diff --git a/test/mlir/accelerators/nnpa/conversion/instrument/add-onnx-level.mlir b/test/mlir/accelerators/nnpa/conversion/instrument/add-onnx-level.mlir index 1ea2c55a79..90f9f85341 100644 --- a/test/mlir/accelerators/nnpa/conversion/instrument/add-onnx-level.mlir +++ b/test/mlir/accelerators/nnpa/conversion/instrument/add-onnx-level.mlir @@ -1,4 +1,6 @@ -// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --printIR --EmitZHighIR --instrument-stage=Onnx --instrument-ops=onnx.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime -tag="test" %s | FileCheck %s +// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --printIR --EmitZHighIR -profile-ir=Onnx %s | FileCheck %s + +// ----- func.func @test_instrument_add_onnx(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> { %0 = "onnx.Add"(%arg0, %arg1) {onnx_node_name = "onnx.Add"} : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32> @@ -6,7 +8,7 @@ func.func @test_instrument_add_onnx(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10 } // CHECK-LABEL: func.func @test_instrument_add_onnx -// CHECK: "krnl.runtime_instrument"() {nodeName = "onnx.Add", opName = "onnx.Add", tag = 5 : i64} : () -> () +// CHECK: "krnl.runtime_instrument"() {nodeName = "onnx.Add", opName = "onnx.Add", tag = 21 : i64} : () -> () // CHECK: "zhigh.Stick" // CHECK: "zhigh.Stick" // CHECK: "zhigh.Add" diff --git a/test/mlir/accelerators/nnpa/conversion/instrument/add-onnx-zhigh-level.mlir b/test/mlir/accelerators/nnpa/conversion/instrument/add-onnx-zhigh-level.mlir index 7a3804322c..f85e8021e7 100644 --- a/test/mlir/accelerators/nnpa/conversion/instrument/add-onnx-zhigh-level.mlir +++ b/test/mlir/accelerators/nnpa/conversion/instrument/add-onnx-zhigh-level.mlir @@ -1,4 +1,6 @@ -// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --printIR --EmitZHighIR --instrument-stage=ZHigh --instrument-ops="onnx.*,zhigh.*" --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime -tag="test" %s | FileCheck %s +// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --printIR --EmitZHighIR --profile-ir=ZHigh %s | FileCheck %s + +// ----- func.func @test_instrument_add_onnx_zhigh(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x1xf32>) -> tensor<*xf32> { %0 = "onnx.Add"(%arg0, %arg1) {onnx_node_name = "onnx.Add1"} : (tensor<10x10xf32>, tensor<10x1xf32>) -> tensor<*xf32> @@ -7,7 +9,7 @@ func.func @test_instrument_add_onnx_zhigh(%arg0 : tensor<10x10xf32>, %arg1 : ten } // CHECK-LABEL: func.func @test_instrument_add_onnx_zhigh -// CHECK: "krnl.runtime_instrument"() {nodeName = "onnx.Add1", opName = "onnx.Add", tag = 5 : i64} : () -> () +// CHECK: "krnl.runtime_instrument"() {nodeName = "onnx.Add1", opName = "onnx.Add", tag = 21 : i64} : () -> () // CHECK: "onnx.Add" // CHECK: "krnl.runtime_instrument"() {nodeName = "onnx.Add1", opName = "onnx.Add", tag = 6 : i64} : () -> () // CHECK: "krnl.runtime_instrument"() {opName = "zhigh.Stick", tag = 5 : i64} : () -> () diff --git a/test/mlir/accelerators/nnpa/conversion/instrument/add-zhigh-level.mlir b/test/mlir/accelerators/nnpa/conversion/instrument/add-zhigh-level.mlir index 236e0b9940..89f226d57f 100644 --- a/test/mlir/accelerators/nnpa/conversion/instrument/add-zhigh-level.mlir +++ b/test/mlir/accelerators/nnpa/conversion/instrument/add-zhigh-level.mlir @@ -1,4 +1,6 @@ -// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --printIR --EmitZLowIR --instrument-stage=ZHigh --instrument-ops="zhigh.*" --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime -tag="test" %s | FileCheck %s +// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --printIR --EmitZLowIR --instrument-stage=ZHigh --instrument-ops=zhigh.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime %s | FileCheck %s + +// ----- func.func @test_instrument_add_zhigh(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> { %0 = "onnx.Add"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32> @@ -6,7 +8,7 @@ func.func @test_instrument_add_zhigh(%arg0 : tensor<10x10xf32>, %arg1 : tensor<1 } // CHECK-LABEL: func.func @test_instrument_add_zhigh -// CHECK: "krnl.runtime_instrument"() {opName = "zhigh.Stick", tag = 5 : i64} : () -> () +// CHECK: "krnl.runtime_instrument"() {opName = "zhigh.Stick", tag = 21 : i64} : () -> () // CHECK: memref.alloc() // CHECK: "zlow.stick" // CHECK: "krnl.runtime_instrument"() {opName = "zhigh.Stick", tag = 6 : i64} : () -> () diff --git a/test/mlir/accelerators/nnpa/conversion/instrument/add-zlow-level.mlir b/test/mlir/accelerators/nnpa/conversion/instrument/add-zlow-level.mlir index b81f59e652..e71c98e640 100644 --- a/test/mlir/accelerators/nnpa/conversion/instrument/add-zlow-level.mlir +++ b/test/mlir/accelerators/nnpa/conversion/instrument/add-zlow-level.mlir @@ -1,4 +1,6 @@ -// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --printIR --EmitZLowIR --instrument-stage=ZLow --instrument-ops="zlow.*" --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime -tag="test" %s | FileCheck %s +// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --printIR --EmitZLowIR --instrument-stage=ZLow --instrument-ops=zlow.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime %s | FileCheck %s + +// ----- func.func @test_instrument_add_zlow(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> { %0 = "onnx.Add"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32> @@ -6,7 +8,7 @@ func.func @test_instrument_add_zlow(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10 } // CHECK-LABEL: func.func @test_instrument_add_zlow -// CHECK: "krnl.runtime_instrument"() {opName = "zlow.stick", tag = 5 : i64} : () -> () +// CHECK: "krnl.runtime_instrument"() {opName = "zlow.stick", tag = 21 : i64} : () -> () // CHECK: "zlow.stick" // CHECK: "krnl.runtime_instrument"() {opName = "zlow.stick", tag = 6 : i64} : () -> () // CHECK: memref.alloc() @@ -26,5 +28,3 @@ func.func @test_instrument_add_zlow(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10 // CHECK: "krnl.runtime_instrument"() {opName = "zlow.unstick", tag = 6 : i64} : () -> () // CHECK: return // CHECK: } - -// ----- diff --git a/test/mlir/conversion/instrument/add.mlir b/test/mlir/conversion/instrument/add.mlir index 27f080acb5..6984a27546 100644 --- a/test/mlir/conversion/instrument/add.mlir +++ b/test/mlir/conversion/instrument/add.mlir @@ -5,8 +5,10 @@ func.func @test_instrument_add_onnx(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10 "onnx.Return"(%0) : (tensor<*xf32>) -> () } +// ----- + // CHECK-LABEL: func.func @test_instrument_add_onnx -// CHECK: "krnl.runtime_instrument"() {nodeName = "model/add1", opName = "onnx.Add", tag = 5 : i64} : () -> () +// CHECK: "krnl.runtime_instrument"() {nodeName = "model/add1", opName = "onnx.Add", tag = 21 : i64} : () -> () // CHECK: [[RES_:%.+]] = memref.alloc() // CHECK: affine.for [[I_0_:%.+]] = 0 to 10 { // CHECK: affine.for [[I_1_:%.+]] = 0 to 10 { @@ -20,4 +22,3 @@ func.func @test_instrument_add_onnx(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10 // CHECK: return // CHECK: } -// ----- diff --git a/test/mlir/conversion/instrument/onnx_add.mlir b/test/mlir/conversion/instrument/onnx_add.mlir index 77a95d82f7..369a8140b5 100644 --- a/test/mlir/conversion/instrument/onnx_add.mlir +++ b/test/mlir/conversion/instrument/onnx_add.mlir @@ -1,12 +1,14 @@ // RUN: onnx-mlir --printIR --EmitMLIR --instrument-ops=onnx.Add --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime %s | FileCheck %s +// ----- + func.func @test_instrument_add_onnx(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> { %0 = "onnx.Add"(%arg0, %arg1) {onnx_node_name = "model/add1"} : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32> "onnx.Return"(%0) : (tensor<*xf32>) -> () } // CHECK-LABEL: func.func @test_instrument_add_onnx -// CHECK: "krnl.runtime_instrument"() {nodeName = "model/add1", opName = "onnx.Add", tag = 5 : i64} : () -> () +// CHECK: "krnl.runtime_instrument"() {nodeName = "model/add1", opName = "onnx.Add", tag = 21 : i64} : () -> () // CHECK: [[RES_:%.+]] = memref.alloc() // CHECK: affine.for [[I_0_:%.+]] = 0 to 10 { // CHECK: affine.for [[I_1_:%.+]] = 0 to 10 { @@ -20,4 +22,3 @@ func.func @test_instrument_add_onnx(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10 // CHECK: return // CHECK: } -// ----- diff --git a/test/mlir/conversion/instrument/onnx_all.mlir b/test/mlir/conversion/instrument/onnx_all.mlir index 5de913eaab..6d10244f7f 100644 --- a/test/mlir/conversion/instrument/onnx_all.mlir +++ b/test/mlir/conversion/instrument/onnx_all.mlir @@ -1,12 +1,14 @@ // RUN: onnx-mlir --printIR --EmitMLIR --instrument-ops=onnx.* --InstrumentBeforeOp --InstrumentAfterOp --InstrumentReportTime %s | FileCheck %s +// ----- + func.func @test_instrument_add_onnx(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> { %0 = "onnx.Add"(%arg0, %arg1) {onnx_node_name = "model/add1"} : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32> "onnx.Return"(%0) : (tensor<*xf32>) -> () } // CHECK-LABEL: func.func @test_instrument_add_onnx -// CHECK: "krnl.runtime_instrument"() {nodeName = "model/add1", opName = "onnx.Add", tag = 5 : i64} : () -> () +// CHECK: "krnl.runtime_instrument"() {nodeName = "model/add1", opName = "onnx.Add", tag = 21 : i64} : () -> () // CHECK: [[RES_:%.+]] = memref.alloc() // CHECK: affine.for [[I_0_:%.+]] = 0 to 10 { // CHECK: affine.for [[I_1_:%.+]] = 0 to 10 { @@ -19,5 +21,3 @@ func.func @test_instrument_add_onnx(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10 // CHECK: "krnl.runtime_instrument"() {nodeName = "model/add1", opName = "onnx.Add", tag = 6 : i64} : () -> () // CHECK: return // CHECK: } - -// ----- diff --git a/utils/RunONNXModel.py b/utils/RunONNXModel.py index 3c10b30d43..5d55bc7cb2 100755 --- a/utils/RunONNXModel.py +++ b/utils/RunONNXModel.py @@ -550,6 +550,9 @@ def warning(msg): def data_without_top_bottom_quartile(data, percent): data = np.array(sorted(data)) trim = int(percent * data.size / 100.0) + if trim == 0 or data.size - 2 * trim < 1: + # Want at least one element, return as is. + return data return data[trim:-trim] diff --git a/utils/make-report.py b/utils/make-report.py index 67933571ba..dc5ad9325a 100755 --- a/utils/make-report.py +++ b/utils/make-report.py @@ -36,75 +36,59 @@ def print_usage(msg=""): if msg: print("Error:", msg, "\n") - print("make-report.py -[vh] [-c ] [-r ] [-l ]") - print(" [-s ] [--sort ] [--supported] [-u ] [-p ]") - print("") - print( - "Usage: Report statistics on compiler and runtime characteristics of onnx ops." - ) - print("") - print("Compile-time statistics are collected from a `onnx-mlir` compiler output") - print( - "with the `--opt-report` option equal to `Simd` or other supported sub-options." - ) - print("") - print( - "Runtime statistics are collected from the runtime output of a model compiled." - ) - print( - "with the `--profile-ir` option equal to `Onnx` or other supported sub-options." - ) - print("") - print( - "When both compile time and runtime statistics are provided at the same time," - ) - print( - "it will correlate the performance metrics with data gathered at compile time." - ) - print("") - print("Additional help.") - print(" If you need more specific info on individual success/failure, run ") - print( - " `onnx-mlir --debug-only=lowering-to-krnl` and look at the compiler output." - ) - print(" Use `-l 3` to correlate the node name printed here with compiler output.") - print("") - print("Parameters:") - print(" -c/--compile : File name containing the compile-time statistics") - print(" or runtime signature statistics.") - print( - " -r/--runtime : File name containing the runtime time statistics." - ) - print(" -a/--stats : Print specific statistics:") - print(" simd: Print simd optimization stats.") - print(" Default if a compile time file is given.") - print(" par: Print parallel optimization stats.") - print(" sig: Print signatures of op.") - print(" perf: Print runtime execution time of ops.") - print(" Default if no compile time file is given.") - print(" -l/--level : Print detailed level of statistics:") - print(" 0: Just count successful/unsuccessful ops (default).") - print(" 1: Also count reasons for success/failure.") - print(" 2: Also list metrics.") - print(" 3: Also list node name.") - print(" -f/--focus : Focus only on ops that match the regexp pattern.") print( - " -supported: Focus only on ops that are supported. Namely, the report" + """ +Usage: Report statistics on compiler and runtime characteristics of ONNX ops. +make-report.py -[vh] [-c ] [-r ] [-l ] + [-s ] [--sort ] [--supported] [-u ] [-p ] + [-w ] + +Compile-time statistics are collected from a `onnx-mlir` compiler output +with the `--opt-report` option equal to `Simd` or other supported sub-options. + +Runtime statistics are collected from the runtime output of a model compiled. +with the `--profile-ir` option equal to `Onnx` or other supported sub-options. + +When both compile time and runtime statistics are provided at the same time, +it will correlate the performance metrics with data gathered at compile time. + +Additional help. + If you need more specific info on individual success/failure, run + `onnx-mlir --debug-only=lowering-to-krnl` and look at the compiler output. + Use `-l 3` to correlate the node name printed here with compiler output. + +Parameters: + -c/--compile : File name containing the compile-time statistics + or runtime signature statistics. + -r/--runtime : File name containing the runtime time statistics. + + -a/--stats : Print specific statistics: + simd: Print simd optimization stats. + Default if a compile time file is given. + par: Print parallel optimization stats. + sig: Print signatures of op. + perf: Print runtime execution time of ops. + Default if no compile time file is given. + -l/--level : Print detailed level of statistics: + 0: Just count successful/unsuccessful ops (default). + 1: Also count reasons for success/failure. + 2: Also list metrics. + 3: Also list node name. + -f/--focus : Focus only on ops that match the regexp pattern. + -supported: Focus only on ops that are supported. Namely, the report + will skip ops for which compile-time statistics list + the 'unsupported' keyword in its printout. + For SIMD/parallel statistics, this include all ops that + have currently no support for it. + -u/--unit : Time in second ('s', default), millisecond ('ms') or + microsecond ('us). + --sort : Sort output by op 'name', occurrence 'num' or `time`. + -v/--verbose: Run in verbose mode (see error and warnings). + -w/--warmup : If multiple runtime statistics are given, ignore the first + stats. Default is zero. + -h/--help: Print usage. + """ ) - print(" will skip ops for which compile-time statistics list") - print(" the 'unsupported' keyword in its printout.") - print( - " For SIMD/parallel statistics, this include all ops that" - ) - print(" have currently no support for it.") - print(" -u/--unit : Time in second ('s', default), millisecond ('ms') or") - print(" microsecond ('us).") - print( - " --sort : Sort output by op 'name', occurrence 'num' or `time`." - ) - print(" -v/--verbose: Run in verbose mode (see error and warnings).") - print(" -h/--help: Print usage.") - print("") exit(1) @@ -123,7 +107,6 @@ def print_usage(msg=""): tot_time = 0 # total time. focus_on_op_with_pattern = r".*" -spurious_node_name_count = 0 error_missing_time = 0 supported_only = False has_timing = False @@ -138,6 +121,10 @@ def common_report_str(stat_name): return r"^==" + stat_name + r"-REPORT==,\s*([0-9a-zA-Z\.\-]+)\s*,\s*([^,]*),\s*(.*)" +def match_start_report(line): + return re.match(r"==START-REPORT==", line) + + # ==SIMD-REPORT==, ..., , , simd_legend = ( "message, SIMD vector length (in elements), SIMD loop trip count (-1 is runtime)" @@ -248,7 +235,7 @@ def record_pattern(op, node_name, detail_key): def parse_line(line, report_str, is_perf_stat): global focus_on_op_with_pattern, supported_only - global verbose, spurious_node_name_count + global verbose p = re.match(report_str, line) if p is None: @@ -272,8 +259,6 @@ def parse_line(line, report_str, is_perf_stat): return (True, op, node_name, details) -parse_line.last_node_name = "" - ################################################################################ # Parse file for statistics. @@ -305,7 +290,16 @@ def parse_file_for_stat(file_name, stat_name): report_str = common_report_str(stat_name) is_perf_stat = re.match(r"PERF", stat_name) + start_count = 0 for line in file: + line = line.rstrip() + + # Only gather stats for the first set of measurement. + if match_start_report(line): + start_count += 1 + if start_count >= 2: + break + # Parse line. (has_stat, op, node_name, details) = parse_line( line.rstrip(), report_str, is_perf_stat @@ -332,9 +326,9 @@ def parse_file_for_stat(file_name, stat_name): # Parse file for performance -def parse_file_for_perf(file_name, stat_name): +def parse_file_for_perf(file_name, stat_name, warmup_num=0): global node_time_dict, tot_time - global spurious_node_name_count, verbose, has_timing + global verbose, has_timing try: file = open(file_name, "r") @@ -342,31 +336,88 @@ def parse_file_for_perf(file_name, stat_name): print_usage("Could not open file `" + file_name + "`") report_str = common_report_str(stat_name) - time_stat_dict = {} # op+op_name -> numpy array of times - last_node_name = "" + + # Skip lines until the first start report + has_start = False + start_count = 0 for line in file: - # Parse line. - (has_stat, op, node_name, details) = parse_line(line.rstrip(), report_str, True) - if not has_stat: - continue - # Keep only after times. - detail_array = details.split(",") - key = get_timing_key(op, node_name) - time = float(detail_array[1]) - tot_time += time - time_stat_dict[key] = append_to_dict_entry(time_stat_dict, key, time) - - # Normally, an - pair should be seen only once in a run, - # except for loops and some other circumstances (e.g. multiple dim op for - # a given original onnx op). Because in any case, the report will be done - # on visiting N time a op-nodename combination that has N instances, and - # thus adding N times the value from "node_time_dict[node]", we must take - # here the average. We loose distinguishing the variability of the timing - # of the same op with same op name, but this is ok. Taking the sum is just - # wrong, as we would add N times the sum of the N time measurements. - for node in time_stat_dict: - node_time_dict[node] = np.average(time_stat_dict[node]) + line = line.rstrip() + if match_start_report(line): + has_start = True + break # On to the first measurement. + if not has_start: + # Failed to have any timing info, stop. + return + + time_stat_dict_all_meas = {} # op+op_name -> numpy array of times + while has_start: + time_stat_dict = {} # op+op_name -> numpy array of times + has_start = False + start_count += 1 + meas_tot_time = 0 + for line in file: + line = line.rstrip() + # New measurement set? + if match_start_report(line): + has_start = True + break # On to the next measurement. + + # Parse line. + (has_stat, op, node_name, details) = parse_line(line, report_str, True) + if not has_stat: + continue + # Keep only after times. + detail_array = details.split(",") + key = get_timing_key(op, node_name) + time = float(detail_array[1]) + meas_tot_time += time + time_stat_dict[key] = append_to_dict_entry(time_stat_dict, key, time) + + # Has reach the end or a new START_REPORT, summarize this. + + # Normally, an - pair should be seen only once in a run, + # except for loops and some other circumstances (e.g. multiple dim op for + # a given original onnx op). Because in any case, the report will be done + # on visiting N time a op-nodename combination that has N instances, and + # thus adding N times the value from "node_time_dict[node]", we must take + # here the average. We loose distinguishing the variability of the timing + # of the same op with same op name, but this is ok. Taking the sum is just + # wrong, as we would add N times the sum of the N time measurements. + for node in time_stat_dict: + time = np.average(time_stat_dict[node]) + time_stat_dict_all_meas[node] = append_to_dict_entry( + time_stat_dict_all_meas, node, time + ) + # Add a "tot time" entry. + time_stat_dict_all_meas["tot_time"] = append_to_dict_entry( + time_stat_dict_all_meas, "tot_time", meas_tot_time + ) + + # Encountered now all the measurements, has no more. + meas_num = start_count - warmup_num + assert meas_num > 0, "expected at least one set of measurement after warmups" + discard_num = int(meas_num / 4) + print( + "Gather stats from", + start_count, + "measurement sets with", + warmup_num, + "warmup; keep inner", + meas_num - 2 * discard_num, + "experiment(s)", + ) + for node in time_stat_dict_all_meas: + # Verify that we have the right number of measurements. + time_array = time_stat_dict_all_meas[node][warmup_num:] + assert time_array.size == meas_num + if discard_num > 0 and meas_num - 2 * discard_num > 0: + time_array = np.sort(time_array) + time_array = time_array[discard_num:-discard_num] + node_time_dict[node] = np.average(time_array) + # Success. has_timing = True + tot_time = node_time_dict["tot_time"] + del node_time_dict["tot_time"] ################################################################################ @@ -392,7 +443,7 @@ def make_report(stat_message): global op_count_dict, op_detail_count_dict global op_time_dict, op_detail_time_dict, tot_time global has_timing, time_unit, error_missing_time - global report_level, supported_only, verbose, spurious_node_name_count + global report_level, supported_only, verbose global sorting_preference # Gather statistics in a dictionary so that we may sort the entries. @@ -478,14 +529,7 @@ def make_report(stat_message): print("Statistics end" + stat_details) # Report spurious node name if any. - if spurious_node_name_count: - if error_missing_time: - print("> Spurious node name were detected.") - print("> Timing information was missing for some of the nodes.") - else: - print("> Spurious node name were detected and fixed.") - print("> Run with `-v` for detailed list of fixes and errors.") - elif error_missing_time: + if error_missing_time: print("> Timing information was missing for some of the nodes.") print("> Run with `-v` for detailed list of errors.") @@ -502,10 +546,11 @@ def main(argv): runtime_file_name = "" make_stats = "" make_legend = "" + warmup_num = 0 try: opts, args = getopt.getopt( argv, - "c:f:hl:r:s:u:v", + "c:f:hl:r:s:u:vw:", [ "compile=", "focus=", @@ -516,6 +561,7 @@ def main(argv): "supported", "unit=", "verbose", + "warmup=", ], ) except getopt.GetoptError: @@ -572,6 +618,8 @@ def main(argv): print_usage("time units are 's', 'ms', or 'us'") elif opt in ("-v", "--verbose"): verbose = True + elif opt in ("-w", "--warmup"): + warmup_num = int(arg) # Default stats. if not make_stats: @@ -591,14 +639,14 @@ def main(argv): else: sorting_preference = "name" if compile_file_name and runtime_file_name: - parse_file_for_perf(runtime_file_name, "PERF") + parse_file_for_perf(runtime_file_name, "PERF", warmup_num) parse_file_for_stat(compile_file_name, make_stats) make_report(make_legend) elif compile_file_name: parse_file_for_stat(compile_file_name, make_stats) make_report(make_legend) elif runtime_file_name: - parse_file_for_perf(runtime_file_name, "PERF") + parse_file_for_perf(runtime_file_name, "PERF", warmup_num) parse_file_for_stat(runtime_file_name, "PERF") make_report(make_legend) else: