From d4d492bffaf8b33abb7836e0e15b5459d17efcd4 Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Fri, 29 Oct 2021 16:03:47 -0700 Subject: [PATCH 01/23] add API for split with flag --- include/taco/index_notation/index_notation.h | 2 ++ src/index_notation/index_notation.cpp | 9 +++++++++ src/index_notation/provenance_graph.cpp | 7 ++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index 1ce260859..4d0bc6648 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -564,6 +564,8 @@ class IndexStmt : public util::IntrusivePtr { /// variable, a \textit{tail strategy} is employed such as emitting a variable /// sized loop that handles remaining iterations. /// Preconditions: splitFactor is a positive nonzero integer + IndexStmt splitUpDown(IndexVar i, IndexVar i1, IndexVar i2, bool split_up, size_t splitFactor) const; + IndexStmt split(IndexVar i, IndexVar i1, IndexVar i2, size_t splitFactor) const; // TODO: TailStrategy /// The divide transformation splits one index variable into diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index 1e462e47a..ddc65ba4c 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1494,6 +1494,15 @@ IndexStmt IndexStmt::concretize() const { return stmt; } +IndexStmt IndexStmt::splitUpDown(IndexVar i, IndexVar i1, IndexVar i2, bool split_up, size_t splitFactor) const { + + if (split_up){ + return split(i, i1, i2, splitFactor); + } + return divide(i, i1, i2, splitFactor); + +} + IndexStmt IndexStmt::split(IndexVar i, IndexVar i1, IndexVar i2, size_t splitFactor) const { IndexVarRel rel = IndexVarRel(new SplitRelNode(i, i1, i2, splitFactor)); string reason; diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index c4fce6864..7448d2bab 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -918,7 +918,8 @@ ProvenanceGraph::ProvenanceGraph(IndexStmt concreteStmt) { // No relations defined return; } - + // q: does a such node impose some restrictions on + // the value of the variable SuchThat suchThat = to(concreteStmt); vector relations = suchThat.getPredicate(); @@ -927,6 +928,9 @@ ProvenanceGraph::ProvenanceGraph(IndexStmt concreteStmt) { std::vector children = rel.getNode()->getChildren(); for (IndexVar parent : parents) { nodes.insert(parent); + // q: childrelmap maps the + // parent to a constrained iteration + // space? childRelMap[parent] = rel; childrenMap[parent] = children; } @@ -1151,6 +1155,7 @@ bool ProvenanceGraph::isRecoverablePrecompute(taco::IndexVar indexVar, std::set< return isRecoverablePrecompute(precomputeChild, defined, producers, consumers); } for (const IndexVar& child : getChildren(indexVar)) { + // q: why is it !isRecoverablePrecompute? if (!defined.count(child) && (isFullyDerived(child) || !isRecoverablePrecompute(child, defined, producers, consumers))) { return false; From b7657218e4eb772b2d4a7659ce40980f45c51b6c Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Mon, 29 Nov 2021 19:24:54 -0800 Subject: [PATCH 02/23] add test for splitUpDown and bound attributes for IndexVars --- include/taco/index_notation/index_notation.h | 10 ++++ src/index_notation/index_notation.cpp | 20 +++++++ test/tests-scheduling.cpp | 57 ++++++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index 4d0bc6648..8ec16a688 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -993,6 +993,14 @@ class IndexVar : public util::Comparable, public IndexVarInterface { /// Returns the name of the index variable. std::string getName() const; + size_t& getBound() const; + + const BoundType& getBoundType() const; + + void setBoundType(BoundType boundType); + + void setBound(size_t bound); + friend bool operator==(const IndexVar&, const IndexVar&); friend bool operator<(const IndexVar&, const IndexVar&); @@ -1010,6 +1018,8 @@ class IndexVar : public util::Comparable, public IndexVarInterface { struct IndexVar::Content { std::string name; + size_t bound; + taco::BoundType boundType; }; struct WindowedIndexVar::Content { diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index ddc65ba4c..87977b3f7 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1677,6 +1677,10 @@ IndexStmt IndexStmt::fuse(IndexVar i, IndexVar j, IndexVar f) const { } IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { + + i.setBound(bound); + i.setBoundType(bound_type); + IndexVarRel rel = IndexVarRel(new BoundRelNode(i, i1, bound, bound_type)); string reason; @@ -2031,6 +2035,22 @@ std::string IndexVar::getName() const { return content->name; } +size_t& IndexVar::getBound() const { + return content->bound; +} + +const BoundType& IndexVar::getBoundType() const { + return content->boundType; +} + +void IndexVar::setBound(size_t bound){ + content->bound = bound; +} + +void IndexVar::setBoundType(BoundType boundType){ + content->boundType = boundType; +} + WindowedIndexVar IndexVar::operator()(int lo, int hi, int stride) { return WindowedIndexVar(*this, lo, hi, stride); } diff --git a/test/tests-scheduling.cpp b/test/tests-scheduling.cpp index 6019febf7..6a914d54f 100644 --- a/test/tests-scheduling.cpp +++ b/test/tests-scheduling.cpp @@ -990,3 +990,60 @@ TEST(scheduling, divide) { return stmt.fuse(i, j, f).pos(f, fpos, A(i, j)).divide(fpos, f0, f1, 4).split(f1, i1, i2, 16).split(i2, i3, i4, 8); }); } + + +// TEST(scheduling, splitUpDown) { +// auto dim = 256; +// float sparsity = 0.1; +// Tensor A("A", {dim, dim}, {Dense, Sparse}); +// Tensor x("x", {dim}, Dense); +// IndexVar i("i"), i1("i1"), i2("i2"), j("j"), f("f"), fpos("fpos"), f0("f0"), f1("f1"); + +// srand(59393); +// for (int i = 0; i < dim; i++) { +// for (int j = 0; j < dim; j++) { +// auto rand_float = (float)rand()/(float)(RAND_MAX); +// if (rand_float < sparsity) { +// A.insert({i, j},((int)(rand_float * 10 / sparsity))); +// } +// } +// } + +// for (int j = 0; j < dim; j++) { +// float rand_float = (float)rand()/(float)(RAND_MAX); +// x.insert({j}, ((int)(rand_float*10))); +// } + +// x.pack(); A.pack(); + +// auto test = [&](std::function f) { +// Tensor y("y", {dim}, Dense); +// y(i) = A(i, j) * x(j); +// auto stmt = f(y.getAssignment().concretize()); +// y.compile(stmt); +// y.evaluate(); +// Tensor expected("expected", {dim}, Dense); +// expected(i) = A(i, j) * x(j); +// expected.evaluate(); +// ASSERT_TRUE(equals(expected, y)) << expected << endl << y << endl; +// }; + +// // Test that a simple divide works. +// test([&](IndexStmt stmt) { +// return stmt.splitUpDown(i, i1, i2, 2, false); +// }); + +// // Test when the divide factor doesn't divide the dimension evenly. +// test([&](IndexStmt stmt) { +// return stmt.splitUpDown(i, i1, i2, 3, false); +// }); + +// // Test a more complicated case where we fuse loops and then divide them. +// test([&](IndexStmt stmt) { +// return stmt.fuse(i, j, f).pos(f, fpos, A(i, j)).splitUpDown(fpos, f0, f1, 2, false).splitUpDown(f1, i1, i2, 4, true); +// }); +// test([&](IndexStmt stmt) { +// IndexVar i3, i4; +// return stmt.fuse(i, j, f).pos(f, fpos, A(i, j)).splitUpDown(fpos, f0, f1, 4, false).splitUpDown(f1, i1, i2, 16, true).splitUpDown(i2, i3, i4, 8, true); +// }); +// } \ No newline at end of file From cbc872c3b106212539cfea2f6857dc198a95613d Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sun, 9 Jan 2022 11:36:42 -0800 Subject: [PATCH 03/23] start substituting bound rel function calls --- src/index_notation/provenance_graph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index 7448d2bab..3dcfeb6ad 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -1320,7 +1320,7 @@ bool ProvenanceGraph::hasExactBound(IndexVar indexVar) const { IndexVarRel rel = parentRelMap.at(indexVar); if(rel.getRelType() == BOUND) { - return rel.getNode()->getBoundType() == BoundType::MaxExact; + return indexVar.getBoundType() == BoundType::MaxExact; } // TODO: include non-irregular variables return false; From 897bb0401781538e108a06a16ab7ccaceeae6c19 Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sat, 22 Jan 2022 13:29:55 -0800 Subject: [PATCH 04/23] add index notation falg --- include/taco/index_notation/index_notation.h | 5 +++++ src/index_notation/index_notation.cpp | 11 ++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index 8ec16a688..bea86ae46 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -1001,6 +1001,10 @@ class IndexVar : public util::Comparable, public IndexVarInterface { void setBound(size_t bound); + void bound(size_t bound, BoundType boundType); + + // void setBoundVar(IndexVar boundVar); + friend bool operator==(const IndexVar&, const IndexVar&); friend bool operator<(const IndexVar&, const IndexVar&); @@ -1020,6 +1024,7 @@ struct IndexVar::Content { std::string name; size_t bound; taco::BoundType boundType; + bool isbound = false; }; struct WindowedIndexVar::Content { diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index 87977b3f7..d9d87706b 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1678,9 +1678,8 @@ IndexStmt IndexStmt::fuse(IndexVar i, IndexVar j, IndexVar f) const { IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { - i.setBound(bound); - i.setBoundType(bound_type); - + i.bound(bound, bound_type); + IndexVarRel rel = IndexVarRel(new BoundRelNode(i, i1, bound, bound_type)); string reason; @@ -2051,6 +2050,12 @@ void IndexVar::setBoundType(BoundType boundType){ content->boundType = boundType; } +void IndexVar::bound(size_t bound, BoundType boundType){ + content->isbound = true; + setBound(bound); + setBoundType(boundType); +} + WindowedIndexVar IndexVar::operator()(int lo, int hi, int stride) { return WindowedIndexVar(*this, lo, hi, stride); } From d6c5bb7085e7187ffa094520b0a3ec4483fed74e Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sat, 22 Jan 2022 14:21:56 -0800 Subject: [PATCH 05/23] don't add such that node for bound relation --- include/taco/index_notation/index_notation.h | 2 ++ src/index_notation/index_notation.cpp | 38 ++++++++++++-------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index bea86ae46..dcbae1193 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -701,6 +701,8 @@ class IndexStmt : public util::IntrusivePtr { /// Preconditions: /// The precondition for bound is that the computation bounds supplied are /// correct given the inputs that this code will be run on. + // IndexStmt bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const; + IndexStmt bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const; /// The unroll primitive unrolls the corresponding loop by a statically-known diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index d9d87706b..3d300fb0b 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1676,26 +1676,34 @@ IndexStmt IndexStmt::fuse(IndexVar i, IndexVar j, IndexVar f) const { return transformed; } -IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { +// IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { - i.bound(bound, bound_type); +// i.bound(bound, bound_type); - IndexVarRel rel = IndexVarRel(new BoundRelNode(i, i1, bound, bound_type)); - string reason; +// IndexVarRel rel = IndexVarRel(new BoundRelNode(i, i1, bound, bound_type)); +// string reason; - // Add predicate to concrete index notation - IndexStmt transformed = Transformation(AddSuchThatPredicates({rel})).apply(*this, &reason); - if (!transformed.defined()) { - taco_uerror << reason; - } +// // Add predicate to concrete index notation +// IndexStmt transformed = Transformation(AddSuchThatPredicates({rel})).apply(*this, &reason); +// if (!transformed.defined()) { +// taco_uerror << reason; +// } - // Replace all occurrences of i with i1 - transformed = Transformation(ForAllReplace({i}, {i1})).apply(transformed, &reason); - if (!transformed.defined()) { - taco_uerror << reason; - } +// // Replace all occurrences of i with i1 +// transformed = Transformation(ForAllReplace({i}, {i1})).apply(transformed, &reason); +// if (!transformed.defined()) { +// taco_uerror << reason; +// } + +// return transformed; +// } + +IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { + + i.bound(bound, bound_type); + + return *this; - return transformed; } IndexStmt IndexStmt::unroll(IndexVar i, size_t unrollFactor) const { From f2e5cfb92c31ae2543b2bf091362202d9a10be59 Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Wed, 26 Jan 2022 11:56:35 -0800 Subject: [PATCH 06/23] change derive iter --- include/taco/index_notation/index_notation.h | 4 +- src/index_notation/index_notation.cpp | 5 ++ src/index_notation/provenance_graph.cpp | 81 ++++++++++++++++++++ 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index dcbae1193..21530f361 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -1005,6 +1005,8 @@ class IndexVar : public util::Comparable, public IndexVarInterface { void bound(size_t bound, BoundType boundType); + bool isBound(); + // void setBoundVar(IndexVar boundVar); friend bool operator==(const IndexVar&, const IndexVar&); @@ -1026,7 +1028,7 @@ struct IndexVar::Content { std::string name; size_t bound; taco::BoundType boundType; - bool isbound = false; + bool isbound; }; struct WindowedIndexVar::Content { diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index 3d300fb0b..2056fe584 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -2036,6 +2036,7 @@ IndexVar::IndexVar() : IndexVar(util::uniqueName('i')) {} IndexVar::IndexVar(const std::string& name) : content(new Content) { content->name = name; + content->isbound = false; } std::string IndexVar::getName() const { @@ -2064,6 +2065,10 @@ void IndexVar::bound(size_t bound, BoundType boundType){ setBoundType(boundType); } +bool IndexVar::isBound(){ + return content->isbound; +} + WindowedIndexVar IndexVar::operator()(int lo, int hi, int stride) { return WindowedIndexVar(*this, lo, hi, stride); } diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index 3dcfeb6ad..c6e16afbc 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -205,6 +205,24 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, std::map> parentCoordBounds, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { + + + // taco::IndexVar parent = getParentVar(); + if (indexVar.isBound()){ + + taco_iassert(parentCoordBounds.count(getParentVar()) == 1); + std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); + + if (indexVar.getBoundType() == BoundType::MaxExact) { + return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; + } + else { + taco_not_supported_yet; + } + return {}; + + } + taco_iassert(indexVar == getOuterVar() || indexVar == getInnerVar()); taco_iassert(parentIterBounds.size() == 1); taco_iassert(parentIterBounds.count(getParentVar()) == 1); @@ -351,6 +369,22 @@ std::vector DivideRelNode::deriveIterBounds(taco::IndexVar indexVar, std::map> parentCoordBounds, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { + + if (indexVar.isBound()){ + + taco_iassert(parentCoordBounds.count(getParentVar()) == 1); + std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); + + if (indexVar.getBoundType() == BoundType::MaxExact) { + return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; + } + else { + taco_not_supported_yet; + } + return {}; + + } + taco_iassert(indexVar == getOuterVar() || indexVar == getInnerVar()); taco_iassert(parentIterBounds.size() == 1); taco_iassert(parentIterBounds.count(getParentVar()) == 1); @@ -461,6 +495,23 @@ std::vector PosRelNode::deriveIterBounds(taco::IndexVar indexVar, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { + + + if (indexVar.isBound()){ + + taco_iassert(parentCoordBounds.count(getParentVar()) == 1); + std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); + + if (indexVar.getBoundType() == BoundType::MaxExact) { + return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; + } + else { + taco_not_supported_yet; + } + return {}; + + } + taco_iassert(indexVar == getPosVar()); taco_iassert(parentCoordBounds.count(getParentVar()) == 1); std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); @@ -665,6 +716,21 @@ std::vector FuseRelNode::deriveIterBounds(taco::IndexVar indexVar, std::map> parentCoordBounds, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { + if (indexVar.isBound()){ + + taco_iassert(parentCoordBounds.count(getParentVar()) == 1); + std::vector parentCoordBound = combineParentBounds(parentIterBounds[getOuterParentVar()], parentIterBounds[getInnerParentVar()]); + + if (indexVar.getBoundType() == BoundType::MaxExact) { + return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; + } + else { + taco_not_supported_yet; + } + return {}; + + } + taco_iassert(indexVar == getFusedVar()); taco_iassert(parentIterBounds.count(getOuterParentVar()) && parentIterBounds.count(getInnerParentVar())); return combineParentBounds(parentIterBounds[getOuterParentVar()], parentIterBounds[getInnerParentVar()]); @@ -875,6 +941,21 @@ std::vector PrecomputeRelNode::deriveIterBounds(taco::IndexVar indexVa std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { + if (indexVar.isBound()){ + + taco_iassert(parentCoordBounds.count(getParentVar()) == 1); + std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); + + if (indexVar.getBoundType() == BoundType::MaxExact) { + return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; + } + else { + taco_not_supported_yet; + } + return {}; + + } + taco_iassert(indexVar == getPrecomputeVar()); taco_iassert(parentIterBounds.count(getParentVar()) == 1); std::vector parentIterBound = parentIterBounds.at(getParentVar()); From aa1a11432c2c5bb166d521eedffdc21ef3c4935a Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Wed, 26 Jan 2022 12:16:44 -0800 Subject: [PATCH 07/23] bound rel node deleted, change workspace tests to reflect changes --- .../taco/index_notation/provenance_graph.h | 33 - out | 2736 +++++++++++++++++ src/index_notation/provenance_graph.cpp | 105 - 3 files changed, 2736 insertions(+), 138 deletions(-) create mode 100644 out diff --git a/include/taco/index_notation/provenance_graph.h b/include/taco/index_notation/provenance_graph.h index b291d51d9..f4fbad292 100644 --- a/include/taco/index_notation/provenance_graph.h +++ b/include/taco/index_notation/provenance_graph.h @@ -244,39 +244,6 @@ struct FuseRelNode : public IndexVarRelNode { bool operator==(const FuseRelNode&, const FuseRelNode&); -/// The bound relation allows expressing a constraint or value known at compile-time that allows for compile-time optimizations -struct BoundRelNode : public IndexVarRelNode { - BoundRelNode(IndexVar parentVar, IndexVar boundVar, size_t bound, BoundType boundType); - - const IndexVar& getParentVar() const; - const IndexVar& getBoundVar() const; - const size_t& getBound() const; - const BoundType& getBoundType() const; - - void print(std::ostream& stream) const; - bool equals(const BoundRelNode &rel) const; - std::vector getParents() const; // parentVar - std::vector getChildren() const; // boundVar - std::vector getIrregulars() const; // boundVar - - /// Coordinate bounds remain unchanged, only iteration bounds change - std::vector computeRelativeBound(std::set definedVars, std::map> computedBounds, std::map variableExprs, Iterators iterators, ProvenanceGraph provGraph) const; - - /// Constrained depending on bound_type - std::vector deriveIterBounds(IndexVar indexVar, std::map> parentIterBounds, std::map> parentCoordBounds, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const; - - /// parentVar = boundVar - ir::Expr recoverVariable(IndexVar indexVar, std::map variableNames, Iterators iterators, std::map> parentIterBounds, std::map> parentCoordBounds, ProvenanceGraph provGraph) const; - - /// boundVar = parentVar - ir::Stmt recoverChild(IndexVar indexVar, std::map relVariables, bool emitVarDecl, Iterators iterators, ProvenanceGraph provGraph) const; -private: - struct Content; - std::shared_ptr content; -}; - -bool operator==(const BoundRelNode&, const BoundRelNode&); - /// The precompute relation allows creating a new precomputeVar that is iterated over for the precompute loop and shares same sizes as parentVar /// This allows precomputeVar to be scheduled separately from the parentVar struct PrecomputeRelNode : public IndexVarRelNode { diff --git a/out b/out new file mode 100644 index 000000000..5b47862e4 --- /dev/null +++ b/out @@ -0,0 +1,2736 @@ +[==========] Running 914 tests from 288 test cases. +[----------] Global test environment set-up. +[----------] 21 tests from scheduling_eval +[ RUN ] scheduling_eval.test_spmvCPU_temp +[ OK ] scheduling_eval.test_spmvCPU_temp (641 ms) +[ RUN ] scheduling_eval.test_sptvCPU_temp +[ OK ] scheduling_eval.test_sptvCPU_temp (571 ms) +[ RUN ] scheduling_eval.example_spmvCPU_splitpos +[ OK ] scheduling_eval.example_spmvCPU_splitpos (130 ms) +[ RUN ] scheduling_eval.spmmCPU +[ OK ] scheduling_eval.spmmCPU (731 ms) +[ RUN ] scheduling_eval.spmataddCPU +[ OK ] scheduling_eval.spmataddCPU (673 ms) +[ RUN ] scheduling_eval.sptenaddCPU +[ OK ] scheduling_eval.sptenaddCPU (885 ms) +[ RUN ] scheduling_eval.sddmmCPU +[ OK ] scheduling_eval.sddmmCPU (943 ms) +[ RUN ] scheduling_eval.spmvCPU +[ OK ] scheduling_eval.spmvCPU (127 ms) +[ RUN ] scheduling_eval.precompute2D +[ OK ] scheduling_eval.precompute2D (632 ms) +[ RUN ] scheduling_eval.precompute1D +[ OK ] scheduling_eval.precompute1D (129 ms) +[ RUN ] scheduling_eval.ttvCPU +[ OK ] scheduling_eval.ttvCPU (760 ms) +[ RUN ] scheduling_eval.ttvCPU_CSR +[ OK ] scheduling_eval.ttvCPU_CSR (360 ms) +[ RUN ] scheduling_eval.ttmCPU +[ OK ] scheduling_eval.ttmCPU (983 ms) +[ RUN ] scheduling_eval.mttkrpCPU +[ OK ] scheduling_eval.mttkrpCPU (949 ms) +[ RUN ] scheduling_eval.spmvGPU +[ OK ] scheduling_eval.spmvGPU (0 ms) +[ RUN ] scheduling_eval.spmmGPU +[ OK ] scheduling_eval.spmmGPU (0 ms) +[ RUN ] scheduling_eval.spmmDCSRGPU +[ OK ] scheduling_eval.spmmDCSRGPU (0 ms) +[ RUN ] scheduling_eval.sddmmGPU +[ OK ] scheduling_eval.sddmmGPU (0 ms) +[ RUN ] scheduling_eval.ttmGPU +[ OK ] scheduling_eval.ttmGPU (0 ms) +[ RUN ] scheduling_eval.ttvGPU +[ OK ] scheduling_eval.ttvGPU (0 ms) +[ RUN ] scheduling_eval.mttkrpGPU +[ OK ] scheduling_eval.mttkrpGPU (0 ms) +[----------] 21 tests from scheduling_eval (8516 ms total) + +[----------] 6 tests from io +[ RUN ] io.tns +[ OK ] io.tns (179 ms) +[ RUN ] io.mtx +[ OK ] io.mtx (166 ms) +[ RUN ] io.tensor +[ OK ] io.tensor (3 ms) +[ RUN ] io.ttxdense +[ OK ] io.ttxdense (151 ms) +[ RUN ] io.ttxsparse +[ OK ] io.ttxsparse (180 ms) +[ RUN ] io.mtxsymmetric +[ OK ] io.mtxsymmetric (159 ms) +[----------] 6 tests from io (838 ms total) + +[----------] 6 tests from merge_lattice +[ RUN ] merge_lattice.split +[ OK ] merge_lattice.split (0 ms) +[ RUN ] merge_lattice.split_sparse +[ OK ] merge_lattice.split_sparse (0 ms) +[ RUN ] merge_lattice.dense_tile +[ OK ] merge_lattice.dense_tile (1 ms) +[ RUN ] merge_lattice.pos +[ OK ] merge_lattice.pos (0 ms) +[ RUN ] merge_lattice.pos_mul_sparse +[ OK ] merge_lattice.pos_mul_sparse (0 ms) +[ RUN ] merge_lattice.split_pos_sparse +[ OK ] merge_lattice.split_pos_sparse (0 ms) +[----------] 6 tests from merge_lattice (3 ms total) + +[----------] 2 tests from format +[ RUN ] format.sparse +[ OK ] format.sparse (1 ms) +[ RUN ] format.dense +[ OK ] format.dense (141 ms) +[----------] 2 tests from format (142 ms total) + +[----------] 2 tests from schedule_parser +[ RUN ] schedule_parser.normal_operation +string "i,j,k" parsed as: [ [ 'i', ], [ 'j', ], [ 'k', ], ] +string "i(j,k)" parsed as: [ [ 'i', 'j', 'k', ], ] +string "i(j,k),l(m,n)" parsed as: [ [ 'i', 'j', 'k', ], [ 'l', 'm', 'n', ], ] +string "i(j,k),l(m(n,o),p)" parsed as: [ [ 'i', 'j', 'k', ], [ 'l', 'm(n,o)', 'p', ], ] +string "i(j,k),l(m(n(o(p))),q)" parsed as: [ [ 'i', 'j', 'k', ], [ 'l', 'm(n(o(p)))', 'q', ], ] +string "i,j, k" parsed as: [ [ 'i', ], [ 'j', ], [ 'k', ], ] +string "i(j, k)" parsed as: [ [ 'i', 'j', 'k', ], ] +string "i(j,k), l(m,n)" parsed as: [ [ 'i', 'j', 'k', ], [ 'l', 'm', 'n', ], ] +string "i(j,k),l(m(n, o),p)" parsed as: [ [ 'i', 'j', 'k', ], [ 'l', 'm(n,o)', 'p', ], ] +string "i(j,k),l(m(n(o(p))), q)" parsed as: [ [ 'i', 'j', 'k', ], [ 'l', 'm(n(o(p)))', 'q', ], ] +string "" parsed as: [ ] +string ",j,k" parsed as: [ [ '', ], [ 'j', ], [ 'k', ], ] +string "i(,k)" parsed as: [ [ 'i', '', 'k', ], ] +string "(j,k)" parsed as: [ [ '', 'j', 'k', ], ] +string "i(j,),,l(m,n)" parsed as: [ [ 'i', 'j', '', ], [ '', ], [ 'l', 'm', 'n', ], ] +string "split(i,i0,i1,16)" parsed as: [ [ 'split', 'i', 'i0', 'i1', '16', ], ] +string "precompute(A(i,j)*x(j),i,i)" parsed as: [ [ 'precompute', 'A(i,j)*x(j)', 'i', 'i', ], ] +string "split(i,i0,i1,16),precompute(A(i,j)*x(j),i,i)" parsed as: [ [ 'split', 'i', 'i0', 'i1', '16', ], [ 'precompute', 'A(i,j)*x(j)', 'i', 'i', ], ] +[ OK ] schedule_parser.normal_operation (0 ms) +[ RUN ] schedule_parser.error_reporting +[ OK ] schedule_parser.error_reporting (1 ms) +[----------] 2 tests from schedule_parser (2 ms total) + +[----------] 7 tests from indexexpr +[ RUN ] indexexpr.access +[ OK ] indexexpr.access (0 ms) +[ RUN ] indexexpr.literal +[ OK ] indexexpr.literal (0 ms) +[ RUN ] indexexpr.neg +[ OK ] indexexpr.neg (0 ms) +[ RUN ] indexexpr.add +[ OK ] indexexpr.add (0 ms) +[ RUN ] indexexpr.sub +[ OK ] indexexpr.sub (0 ms) +[ RUN ] indexexpr.mul +[ OK ] indexexpr.mul (0 ms) +[ RUN ] indexexpr.div +[ OK ] indexexpr.div (0 ms) +[----------] 7 tests from indexexpr (0 ms total) + +[----------] 6 tests from notation +[ RUN ] notation.isEinsumNotation +[ OK ] notation.isEinsumNotation (0 ms) +[ RUN ] notation.isReductionNotation +[ OK ] notation.isReductionNotation (0 ms) +[ RUN ] notation.isConcreteNotation +[ OK ] notation.isConcreteNotation (1 ms) +[ RUN ] notation.makeReductionNotation +[ OK ] notation.makeReductionNotation (0 ms) +[ RUN ] notation.isomorphic +[ OK ] notation.isomorphic (0 ms) +[ RUN ] notation.generatePackCOOStmt +[ OK ] notation.generatePackCOOStmt (1 ms) +[----------] 6 tests from notation (2 ms total) + +[----------] 11 tests from workspaces +[ RUN ] workspaces.tile_vecElemMul_NoTail +Error at /home/manya227/taco/src/index_notation/index_notation.cpp:1519 in split: + The pattern of ForAlls: i_bounded was not found while attempting to replace with: i0, i1 +[ FAILED ] workspaces.tile_vecElemMul_NoTail (132 ms) +[ RUN ] workspaces.tile_vecElemMul_Tail1 +Error at /home/manya227/taco/src/index_notation/index_notation.cpp:1519 in split: + The pattern of ForAlls: i_bounded was not found while attempting to replace with: i0, i1 +[ FAILED ] workspaces.tile_vecElemMul_Tail1 (0 ms) +[ RUN ] workspaces.tile_vecElemMul_Tail2 +Error at /home/manya227/taco/src/index_notation/index_notation.cpp:1519 in split: + The pattern of ForAlls: i_bounded was not found while attempting to replace with: i0, i1 +[ FAILED ] workspaces.tile_vecElemMul_Tail2 (133 ms) +[ RUN ] workspaces.tile_denseMatMul +Error at /home/manya227/taco/src/index_notation/index_notation.cpp:1519 in split: + The pattern of ForAlls: i_bounded was not found while attempting to replace with: i0, i1 +[ FAILED ] workspaces.tile_denseMatMul (1 ms) +[ RUN ] workspaces.precompute2D_add +[ OK ] workspaces.precompute2D_add (289 ms) +[ RUN ] workspaces.precompute4D_add +[ OK ] workspaces.precompute4D_add (1350 ms) +[ RUN ] workspaces.precompute4D_multireduce +[ OK ] workspaces.precompute4D_multireduce (417 ms) +[ RUN ] workspaces.precompute3D_TspV +[ OK ] workspaces.precompute3D_TspV (438 ms) +[ RUN ] workspaces.precompute3D_multipleWS +[ OK ] workspaces.precompute3D_multipleWS (24 ms) +[ RUN ] workspaces.precompute3D_renamedIVars_TspV +[ OK ] workspaces.precompute3D_renamedIVars_TspV (323 ms) +[ RUN ] workspaces.tile_dotProduct_3 +Error at /home/manya227/taco/src/index_notation/index_notation.cpp:1519 in split: + The pattern of ForAlls: i_bounded was not found while attempting to replace with: i0, i1 +[ FAILED ] workspaces.tile_dotProduct_3 (140 ms) +[----------] 11 tests from workspaces (3247 ms total) + +[----------] 13 tests from expr +[ RUN ] expr.simplify_copy +[ OK ] expr.simplify_copy (0 ms) +[ RUN ] expr.simplify_copy_before_update +[ OK ] expr.simplify_copy_before_update (0 ms) +[ RUN ] expr.simplify_dont_copy_loop_dependent_var +[ OK ] expr.simplify_dont_copy_loop_dependent_var (0 ms) +[ RUN ] expr.reduction0 +[ OK ] expr.reduction0 (384 ms) +[ RUN ] expr.repeated_operand +[ OK ] expr.repeated_operand (291 ms) +[ RUN ] expr.sub +[ OK ] expr.sub (413 ms) +[ RUN ] expr.simplify_neg +[ OK ] expr.simplify_neg (0 ms) +[ RUN ] expr.simplify_elmul +[ OK ] expr.simplify_elmul (0 ms) +[ RUN ] expr.simplify_add +[ OK ] expr.simplify_add (0 ms) +[ RUN ] expr.simplify_addmul +[ OK ] expr.simplify_addmul (0 ms) +[ RUN ] expr.simplify_muladd +[ OK ] expr.simplify_muladd (0 ms) +[ RUN ] expr.scalarops +[ OK ] expr.scalarops (0 ms) +[ RUN ] expr.redefine +[ OK ] expr.redefine (262 ms) +[----------] 13 tests from expr (1350 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/0, where TypeParam = signed char +[ RUN ] tensor_types/ScalarTensorTest/0.types +[ OK ] tensor_types/ScalarTensorTest/0.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/0 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/1, where TypeParam = short +[ RUN ] tensor_types/ScalarTensorTest/1.types +[ OK ] tensor_types/ScalarTensorTest/1.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/1 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/2, where TypeParam = int +[ RUN ] tensor_types/ScalarTensorTest/2.types +[ OK ] tensor_types/ScalarTensorTest/2.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/2 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/3, where TypeParam = long +[ RUN ] tensor_types/ScalarTensorTest/3.types +[ OK ] tensor_types/ScalarTensorTest/3.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/3 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/4, where TypeParam = long long +[ RUN ] tensor_types/ScalarTensorTest/4.types +[ OK ] tensor_types/ScalarTensorTest/4.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/4 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/5, where TypeParam = unsigned char +[ RUN ] tensor_types/ScalarTensorTest/5.types +[ OK ] tensor_types/ScalarTensorTest/5.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/5 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/6, where TypeParam = unsigned short +[ RUN ] tensor_types/ScalarTensorTest/6.types +[ OK ] tensor_types/ScalarTensorTest/6.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/6 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/7, where TypeParam = unsigned int +[ RUN ] tensor_types/ScalarTensorTest/7.types +[ OK ] tensor_types/ScalarTensorTest/7.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/7 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/8, where TypeParam = unsigned long +[ RUN ] tensor_types/ScalarTensorTest/8.types +[ OK ] tensor_types/ScalarTensorTest/8.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/8 (1 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/9, where TypeParam = unsigned long long +[ RUN ] tensor_types/ScalarTensorTest/9.types +[ OK ] tensor_types/ScalarTensorTest/9.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/9 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/10, where TypeParam = float +[ RUN ] tensor_types/ScalarTensorTest/10.types +[ OK ] tensor_types/ScalarTensorTest/10.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/10 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/11, where TypeParam = double +[ RUN ] tensor_types/ScalarTensorTest/11.types +[ OK ] tensor_types/ScalarTensorTest/11.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/11 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/12, where TypeParam = std::complex +[ RUN ] tensor_types/ScalarTensorTest/12.types +[ OK ] tensor_types/ScalarTensorTest/12.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/12 (0 ms total) + +[----------] 1 test from tensor_types/ScalarTensorTest/13, where TypeParam = std::complex +[ RUN ] tensor_types/ScalarTensorTest/13.types +[ OK ] tensor_types/ScalarTensorTest/13.types (0 ms) +[----------] 1 test from tensor_types/ScalarTensorTest/13 (0 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/0, where TypeParam = signed char +[ RUN ] tensor_types/ScalarValueTensorTest/0.types +[ OK ] tensor_types/ScalarValueTensorTest/0.types (161 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/0 (161 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/1, where TypeParam = short +[ RUN ] tensor_types/ScalarValueTensorTest/1.types +[ OK ] tensor_types/ScalarValueTensorTest/1.types (154 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/1 (154 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/2, where TypeParam = int +[ RUN ] tensor_types/ScalarValueTensorTest/2.types +[ OK ] tensor_types/ScalarValueTensorTest/2.types (151 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/2 (151 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/3, where TypeParam = long +[ RUN ] tensor_types/ScalarValueTensorTest/3.types +[ OK ] tensor_types/ScalarValueTensorTest/3.types (127 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/3 (127 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/4, where TypeParam = long long +[ RUN ] tensor_types/ScalarValueTensorTest/4.types +[ OK ] tensor_types/ScalarValueTensorTest/4.types (0 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/4 (0 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/5, where TypeParam = unsigned char +[ RUN ] tensor_types/ScalarValueTensorTest/5.types +[ OK ] tensor_types/ScalarValueTensorTest/5.types (127 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/5 (127 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/6, where TypeParam = unsigned short +[ RUN ] tensor_types/ScalarValueTensorTest/6.types +[ OK ] tensor_types/ScalarValueTensorTest/6.types (128 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/6 (128 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/7, where TypeParam = unsigned int +[ RUN ] tensor_types/ScalarValueTensorTest/7.types +[ OK ] tensor_types/ScalarValueTensorTest/7.types (126 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/7 (126 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/8, where TypeParam = unsigned long +[ RUN ] tensor_types/ScalarValueTensorTest/8.types +[ OK ] tensor_types/ScalarValueTensorTest/8.types (129 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/8 (129 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/9, where TypeParam = unsigned long long +[ RUN ] tensor_types/ScalarValueTensorTest/9.types +[ OK ] tensor_types/ScalarValueTensorTest/9.types (0 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/9 (0 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/10, where TypeParam = float +[ RUN ] tensor_types/ScalarValueTensorTest/10.types +[ OK ] tensor_types/ScalarValueTensorTest/10.types (163 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/10 (163 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/11, where TypeParam = double +[ RUN ] tensor_types/ScalarValueTensorTest/11.types +[ OK ] tensor_types/ScalarValueTensorTest/11.types (0 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/11 (0 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/12, where TypeParam = std::complex +[ RUN ] tensor_types/ScalarValueTensorTest/12.types +[ OK ] tensor_types/ScalarValueTensorTest/12.types (135 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/12 (135 ms total) + +[----------] 1 test from tensor_types/ScalarValueTensorTest/13, where TypeParam = std::complex +[ RUN ] tensor_types/ScalarValueTensorTest/13.types +[ OK ] tensor_types/ScalarValueTensorTest/13.types (139 ms) +[----------] 1 test from tensor_types/ScalarValueTensorTest/13 (139 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/0, where TypeParam = signed char +[ RUN ] tensor_types/VectorTensorTest/0.types +[ OK ] tensor_types/VectorTensorTest/0.types (143 ms) +[----------] 1 test from tensor_types/VectorTensorTest/0 (143 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/1, where TypeParam = short +[ RUN ] tensor_types/VectorTensorTest/1.types +[ OK ] tensor_types/VectorTensorTest/1.types (142 ms) +[----------] 1 test from tensor_types/VectorTensorTest/1 (142 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/2, where TypeParam = int +[ RUN ] tensor_types/VectorTensorTest/2.types +[ OK ] tensor_types/VectorTensorTest/2.types (140 ms) +[----------] 1 test from tensor_types/VectorTensorTest/2 (140 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/3, where TypeParam = long +[ RUN ] tensor_types/VectorTensorTest/3.types +[ OK ] tensor_types/VectorTensorTest/3.types (142 ms) +[----------] 1 test from tensor_types/VectorTensorTest/3 (142 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/4, where TypeParam = long long +[ RUN ] tensor_types/VectorTensorTest/4.types +[ OK ] tensor_types/VectorTensorTest/4.types (0 ms) +[----------] 1 test from tensor_types/VectorTensorTest/4 (0 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/5, where TypeParam = unsigned char +[ RUN ] tensor_types/VectorTensorTest/5.types +[ OK ] tensor_types/VectorTensorTest/5.types (143 ms) +[----------] 1 test from tensor_types/VectorTensorTest/5 (143 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/6, where TypeParam = unsigned short +[ RUN ] tensor_types/VectorTensorTest/6.types +[ OK ] tensor_types/VectorTensorTest/6.types (138 ms) +[----------] 1 test from tensor_types/VectorTensorTest/6 (138 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/7, where TypeParam = unsigned int +[ RUN ] tensor_types/VectorTensorTest/7.types +[ OK ] tensor_types/VectorTensorTest/7.types (140 ms) +[----------] 1 test from tensor_types/VectorTensorTest/7 (140 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/8, where TypeParam = unsigned long +[ RUN ] tensor_types/VectorTensorTest/8.types +[ OK ] tensor_types/VectorTensorTest/8.types (140 ms) +[----------] 1 test from tensor_types/VectorTensorTest/8 (140 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/9, where TypeParam = unsigned long long +[ RUN ] tensor_types/VectorTensorTest/9.types +[ OK ] tensor_types/VectorTensorTest/9.types (0 ms) +[----------] 1 test from tensor_types/VectorTensorTest/9 (0 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/10, where TypeParam = float +[ RUN ] tensor_types/VectorTensorTest/10.types +[ OK ] tensor_types/VectorTensorTest/10.types (139 ms) +[----------] 1 test from tensor_types/VectorTensorTest/10 (139 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/11, where TypeParam = double +[ RUN ] tensor_types/VectorTensorTest/11.types +[ OK ] tensor_types/VectorTensorTest/11.types (138 ms) +[----------] 1 test from tensor_types/VectorTensorTest/11 (138 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/12, where TypeParam = std::complex +[ RUN ] tensor_types/VectorTensorTest/12.types +[ OK ] tensor_types/VectorTensorTest/12.types (139 ms) +[----------] 1 test from tensor_types/VectorTensorTest/12 (139 ms total) + +[----------] 1 test from tensor_types/VectorTensorTest/13, where TypeParam = std::complex +[ RUN ] tensor_types/VectorTensorTest/13.types +[ OK ] tensor_types/VectorTensorTest/13.types (157 ms) +[----------] 1 test from tensor_types/VectorTensorTest/13 (157 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/0, where TypeParam = signed char +[ RUN ] tensor_types/IterateTensorTest/0.types +[ OK ] tensor_types/IterateTensorTest/0.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/0 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/1, where TypeParam = short +[ RUN ] tensor_types/IterateTensorTest/1.types +[ OK ] tensor_types/IterateTensorTest/1.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/1 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/2, where TypeParam = int +[ RUN ] tensor_types/IterateTensorTest/2.types +[ OK ] tensor_types/IterateTensorTest/2.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/2 (2 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/3, where TypeParam = long +[ RUN ] tensor_types/IterateTensorTest/3.types +[ OK ] tensor_types/IterateTensorTest/3.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/3 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/4, where TypeParam = long long +[ RUN ] tensor_types/IterateTensorTest/4.types +[ OK ] tensor_types/IterateTensorTest/4.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/4 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/5, where TypeParam = unsigned char +[ RUN ] tensor_types/IterateTensorTest/5.types +[ OK ] tensor_types/IterateTensorTest/5.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/5 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/6, where TypeParam = unsigned short +[ RUN ] tensor_types/IterateTensorTest/6.types +[ OK ] tensor_types/IterateTensorTest/6.types (1 ms) +[----------] 1 test from tensor_types/IterateTensorTest/6 (1 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/7, where TypeParam = unsigned int +[ RUN ] tensor_types/IterateTensorTest/7.types +[ OK ] tensor_types/IterateTensorTest/7.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/7 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/8, where TypeParam = unsigned long +[ RUN ] tensor_types/IterateTensorTest/8.types +[ OK ] tensor_types/IterateTensorTest/8.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/8 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/9, where TypeParam = unsigned long long +[ RUN ] tensor_types/IterateTensorTest/9.types +[ OK ] tensor_types/IterateTensorTest/9.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/9 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/10, where TypeParam = float +[ RUN ] tensor_types/IterateTensorTest/10.types +[ OK ] tensor_types/IterateTensorTest/10.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/10 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/11, where TypeParam = double +[ RUN ] tensor_types/IterateTensorTest/11.types +[ OK ] tensor_types/IterateTensorTest/11.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/11 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/12, where TypeParam = std::complex +[ RUN ] tensor_types/IterateTensorTest/12.types +[ OK ] tensor_types/IterateTensorTest/12.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/12 (0 ms total) + +[----------] 1 test from tensor_types/IterateTensorTest/13, where TypeParam = std::complex +[ RUN ] tensor_types/IterateTensorTest/13.types +[ OK ] tensor_types/IterateTensorTest/13.types (0 ms) +[----------] 1 test from tensor_types/IterateTensorTest/13 (0 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/0, where TypeParam = signed char +[ RUN ] tensor_types/AddTensorTest/0.types +[ OK ] tensor_types/AddTensorTest/0.types (313 ms) +[----------] 1 test from tensor_types/AddTensorTest/0 (313 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/1, where TypeParam = short +[ RUN ] tensor_types/AddTensorTest/1.types +[ OK ] tensor_types/AddTensorTest/1.types (294 ms) +[----------] 1 test from tensor_types/AddTensorTest/1 (294 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/2, where TypeParam = int +[ RUN ] tensor_types/AddTensorTest/2.types +[ OK ] tensor_types/AddTensorTest/2.types (287 ms) +[----------] 1 test from tensor_types/AddTensorTest/2 (287 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/3, where TypeParam = long +[ RUN ] tensor_types/AddTensorTest/3.types +[ OK ] tensor_types/AddTensorTest/3.types (281 ms) +[----------] 1 test from tensor_types/AddTensorTest/3 (281 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/4, where TypeParam = long long +[ RUN ] tensor_types/AddTensorTest/4.types +[ OK ] tensor_types/AddTensorTest/4.types (1 ms) +[----------] 1 test from tensor_types/AddTensorTest/4 (2 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/5, where TypeParam = unsigned char +[ RUN ] tensor_types/AddTensorTest/5.types +[ OK ] tensor_types/AddTensorTest/5.types (306 ms) +[----------] 1 test from tensor_types/AddTensorTest/5 (306 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/6, where TypeParam = unsigned short +[ RUN ] tensor_types/AddTensorTest/6.types +[ OK ] tensor_types/AddTensorTest/6.types (292 ms) +[----------] 1 test from tensor_types/AddTensorTest/6 (292 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/7, where TypeParam = unsigned int +[ RUN ] tensor_types/AddTensorTest/7.types +[ OK ] tensor_types/AddTensorTest/7.types (321 ms) +[----------] 1 test from tensor_types/AddTensorTest/7 (321 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/8, where TypeParam = unsigned long +[ RUN ] tensor_types/AddTensorTest/8.types +[ OK ] tensor_types/AddTensorTest/8.types (316 ms) +[----------] 1 test from tensor_types/AddTensorTest/8 (316 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/9, where TypeParam = unsigned long long +[ RUN ] tensor_types/AddTensorTest/9.types +[ OK ] tensor_types/AddTensorTest/9.types (1 ms) +[----------] 1 test from tensor_types/AddTensorTest/9 (2 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/10, where TypeParam = float +[ RUN ] tensor_types/AddTensorTest/10.types +[ OK ] tensor_types/AddTensorTest/10.types (316 ms) +[----------] 1 test from tensor_types/AddTensorTest/10 (316 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/11, where TypeParam = double +[ RUN ] tensor_types/AddTensorTest/11.types +[ OK ] tensor_types/AddTensorTest/11.types (0 ms) +[----------] 1 test from tensor_types/AddTensorTest/11 (1 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/12, where TypeParam = std::complex +[ RUN ] tensor_types/AddTensorTest/12.types +[ OK ] tensor_types/AddTensorTest/12.types (322 ms) +[----------] 1 test from tensor_types/AddTensorTest/12 (322 ms total) + +[----------] 1 test from tensor_types/AddTensorTest/13, where TypeParam = std::complex +[ RUN ] tensor_types/AddTensorTest/13.types +[ OK ] tensor_types/AddTensorTest/13.types (295 ms) +[----------] 1 test from tensor_types/AddTensorTest/13 (295 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/0, where TypeParam = signed char +[ RUN ] tensor_types/MulTensorTest/0.types +[ OK ] tensor_types/MulTensorTest/0.types (162 ms) +[----------] 1 test from tensor_types/MulTensorTest/0 (162 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/1, where TypeParam = short +[ RUN ] tensor_types/MulTensorTest/1.types +[ OK ] tensor_types/MulTensorTest/1.types (154 ms) +[----------] 1 test from tensor_types/MulTensorTest/1 (154 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/2, where TypeParam = int +[ RUN ] tensor_types/MulTensorTest/2.types +[ OK ] tensor_types/MulTensorTest/2.types (147 ms) +[----------] 1 test from tensor_types/MulTensorTest/2 (147 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/3, where TypeParam = long +[ RUN ] tensor_types/MulTensorTest/3.types +[ OK ] tensor_types/MulTensorTest/3.types (148 ms) +[----------] 1 test from tensor_types/MulTensorTest/3 (148 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/4, where TypeParam = long long +[ RUN ] tensor_types/MulTensorTest/4.types +[ OK ] tensor_types/MulTensorTest/4.types (1 ms) +[----------] 1 test from tensor_types/MulTensorTest/4 (2 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/5, where TypeParam = unsigned char +[ RUN ] tensor_types/MulTensorTest/5.types +[ OK ] tensor_types/MulTensorTest/5.types (172 ms) +[----------] 1 test from tensor_types/MulTensorTest/5 (172 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/6, where TypeParam = unsigned short +[ RUN ] tensor_types/MulTensorTest/6.types +[ OK ] tensor_types/MulTensorTest/6.types (184 ms) +[----------] 1 test from tensor_types/MulTensorTest/6 (184 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/7, where TypeParam = unsigned int +[ RUN ] tensor_types/MulTensorTest/7.types +[ OK ] tensor_types/MulTensorTest/7.types (143 ms) +[----------] 1 test from tensor_types/MulTensorTest/7 (143 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/8, where TypeParam = unsigned long +[ RUN ] tensor_types/MulTensorTest/8.types +[ OK ] tensor_types/MulTensorTest/8.types (162 ms) +[----------] 1 test from tensor_types/MulTensorTest/8 (162 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/9, where TypeParam = unsigned long long +[ RUN ] tensor_types/MulTensorTest/9.types +[ OK ] tensor_types/MulTensorTest/9.types (1 ms) +[----------] 1 test from tensor_types/MulTensorTest/9 (2 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/10, where TypeParam = float +[ RUN ] tensor_types/MulTensorTest/10.types +[ OK ] tensor_types/MulTensorTest/10.types (140 ms) +[----------] 1 test from tensor_types/MulTensorTest/10 (140 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/11, where TypeParam = double +[ RUN ] tensor_types/MulTensorTest/11.types +[ OK ] tensor_types/MulTensorTest/11.types (139 ms) +[----------] 1 test from tensor_types/MulTensorTest/11 (139 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/12, where TypeParam = std::complex +[ RUN ] tensor_types/MulTensorTest/12.types +[ OK ] tensor_types/MulTensorTest/12.types (147 ms) +[----------] 1 test from tensor_types/MulTensorTest/12 (147 ms total) + +[----------] 1 test from tensor_types/MulTensorTest/13, where TypeParam = std::complex +[ RUN ] tensor_types/MulTensorTest/13.types +[ OK ] tensor_types/MulTensorTest/13.types (142 ms) +[----------] 1 test from tensor_types/MulTensorTest/13 (142 ms total) + +[----------] 6 tests from tensor_types +[ RUN ] tensor_types.complex_add +[ OK ] tensor_types.complex_add (1 ms) +[ RUN ] tensor_types.complex_mul_complex +[ OK ] tensor_types.complex_mul_complex (0 ms) +[ RUN ] tensor_types.complex_available_expr +[ OK ] tensor_types.complex_available_expr (457 ms) +[ RUN ] tensor_types.complex_accumulate +[ OK ] tensor_types.complex_accumulate (136 ms) +[ RUN ] tensor_types.float_double_promotion +[ OK ] tensor_types.float_double_promotion (139 ms) +[ RUN ] tensor_types.int_float_promotion +[ OK ] tensor_types.int_float_promotion (187 ms) +[----------] 6 tests from tensor_types (921 ms total) + +[----------] 25 tests from scheduling +[ RUN ] scheduling.splitEquality +[ OK ] scheduling.splitEquality (0 ms) +[ RUN ] scheduling.forallReplace +[ OK ] scheduling.forallReplace (0 ms) +[ RUN ] scheduling.splitIndexStmt +[ OK ] scheduling.splitIndexStmt (0 ms) +[ RUN ] scheduling.fuseDenseLoops +[ OK ] scheduling.fuseDenseLoops (580 ms) +[ RUN ] scheduling.lowerDenseMatrixMul +[ OK ] scheduling.lowerDenseMatrixMul (454 ms) +[ RUN ] scheduling.lowerSparseCopy +[ OK ] scheduling.lowerSparseCopy (423 ms) +[ RUN ] scheduling.lowerSparseMulDense +[ OK ] scheduling.lowerSparseMulDense (258 ms) +[ RUN ] scheduling.lowerSparseMulSparse +[ OK ] scheduling.lowerSparseMulSparse (266 ms) +[ RUN ] scheduling.precomputeIndependentIndexVars +[ OK ] scheduling.precomputeIndependentIndexVars (263 ms) +[ RUN ] scheduling.precomputeIndependentIndexVarsSplit +[ OK ] scheduling.precomputeIndependentIndexVarsSplit (137 ms) +[ RUN ] scheduling.lowerSparseAddSparse +[ OK ] scheduling.lowerSparseAddSparse (284 ms) +[ RUN ] scheduling.lowerSparseMatrixMul +[ OK ] scheduling.lowerSparseMatrixMul (742 ms) +[ RUN ] scheduling.parallelizeAtomicReduction +[ OK ] scheduling.parallelizeAtomicReduction (248 ms) +[ RUN ] scheduling.parallelizeTemporaryReduction +[ OK ] scheduling.parallelizeTemporaryReduction (149 ms) +[ RUN ] scheduling.multilevel_tiling +[ OK ] scheduling.multilevel_tiling (142 ms) +[ RUN ] scheduling.pos_noop +[ OK ] scheduling.pos_noop (249 ms) +[ RUN ] scheduling.pos_mul_dense +[ OK ] scheduling.pos_mul_dense (124 ms) +[ RUN ] scheduling.pos_mul_sparse +[ OK ] scheduling.pos_mul_sparse (126 ms) +[ RUN ] scheduling.pos_mul_dense_split +[ OK ] scheduling.pos_mul_dense_split (128 ms) +[ RUN ] scheduling.pos_tile_coord_and_pos +[ OK ] scheduling.pos_tile_coord_and_pos (135 ms) +[ RUN ] scheduling.spmv_warp_per_row +[ OK ] scheduling.spmv_warp_per_row (0 ms) +[ RUN ] scheduling.dense_pos_error +[ OK ] scheduling.dense_pos_error (0 ms) +[ RUN ] scheduling.pos_var_not_in_access +[ OK ] scheduling.pos_var_not_in_access (1 ms) +[ RUN ] scheduling.pos_wrong_access +[ OK ] scheduling.pos_wrong_access (0 ms) +[ RUN ] scheduling.divide +[ OK ] scheduling.divide (978 ms) +[----------] 25 tests from scheduling (5689 ms total) + +[----------] 1 test from scheduling_eval_test +[ RUN ] scheduling_eval_test.spmv_fuse +[ OK ] scheduling_eval_test.spmv_fuse (0 ms) +[----------] 1 test from scheduling_eval_test (0 ms total) + +[----------] 6 tests from indexstmt +[ RUN ] indexstmt.assignment +[ OK ] indexstmt.assignment (0 ms) +[ RUN ] indexstmt.forall +[ OK ] indexstmt.forall (0 ms) +[ RUN ] indexstmt.where +[ OK ] indexstmt.where (0 ms) +[ RUN ] indexstmt.multi +[ OK ] indexstmt.multi (0 ms) +[ RUN ] indexstmt.sequence +[ OK ] indexstmt.sequence (0 ms) +[ RUN ] indexstmt.spmm +[ OK ] indexstmt.spmm (0 ms) +[----------] 6 tests from indexstmt (2 ms total) + +[----------] 2 tests from index +[ RUN ] index.size +[ OK ] index.size (0 ms) +[ RUN ] index.makeCSR +[ OK ] index.makeCSR (0 ms) +[----------] 2 tests from index (0 ms total) + +[----------] 20 tests from tensor +[ RUN ] tensor.double_scalar +[ OK ] tensor.double_scalar (0 ms) +[ RUN ] tensor.double_vector +[ OK ] tensor.double_vector (0 ms) +[ RUN ] tensor.iterate +[ OK ] tensor.iterate (0 ms) +[ RUN ] tensor.iterate_empty +[ OK ] tensor.iterate_empty (0 ms) +[ RUN ] tensor.duplicates +[ OK ] tensor.duplicates (160 ms) +[ RUN ] tensor.duplicates_scalar +[ OK ] tensor.duplicates_scalar (0 ms) +[ RUN ] tensor.transpose +[ OK ] tensor.transpose (536 ms) +[ RUN ] tensor.operator_parens_insertion +[ OK ] tensor.operator_parens_insertion (0 ms) +[ RUN ] tensor.get_value +[ OK ] tensor.get_value (0 ms) +[ RUN ] tensor.set_from_components +[ OK ] tensor.set_from_components (0 ms) +[ RUN ] tensor.hidden_pack +[ OK ] tensor.hidden_pack (0 ms) +[ RUN ] tensor.automatic_pack_before_iteration +[ OK ] tensor.automatic_pack_before_iteration (1 ms) +[ RUN ] tensor.automatic_pack_before_const_iteration +[ OK ] tensor.automatic_pack_before_const_iteration (0 ms) +[ RUN ] tensor.hidden_compiler_methods +[ OK ] tensor.hidden_compiler_methods (625 ms) +[ RUN ] tensor.explicit_compiler_methods +[ OK ] tensor.explicit_compiler_methods (2 ms) +[ RUN ] tensor.computation_dependency_modification +[ OK ] tensor.computation_dependency_modification (1 ms) +[ RUN ] tensor.old_dependency_modification +[ OK ] tensor.old_dependency_modification (154 ms) +[ RUN ] tensor.skip_recompile +[ OK ] tensor.skip_recompile (383 ms) +[ RUN ] tensor.recompile +[ OK ] tensor.recompile (255 ms) +[ RUN ] tensor.cache +[ OK ] tensor.cache (417 ms) +[----------] 20 tests from tensor (2535 ms total) + +[----------] 1 test from Generic/IntTest/0, where TypeParam = char +[ RUN ] Generic/IntTest/0.types +[ OK ] Generic/IntTest/0.types (0 ms) +[----------] 1 test from Generic/IntTest/0 (0 ms total) + +[----------] 1 test from Generic/IntTest/1, where TypeParam = short +[ RUN ] Generic/IntTest/1.types +[ OK ] Generic/IntTest/1.types (0 ms) +[----------] 1 test from Generic/IntTest/1 (0 ms total) + +[----------] 1 test from Generic/IntTest/2, where TypeParam = int +[ RUN ] Generic/IntTest/2.types +[ OK ] Generic/IntTest/2.types (0 ms) +[----------] 1 test from Generic/IntTest/2 (0 ms total) + +[----------] 1 test from Generic/IntTest/3, where TypeParam = long +[ RUN ] Generic/IntTest/3.types +[ OK ] Generic/IntTest/3.types (0 ms) +[----------] 1 test from Generic/IntTest/3 (0 ms total) + +[----------] 1 test from Generic/IntTest/4, where TypeParam = long long +[ RUN ] Generic/IntTest/4.types +[ OK ] Generic/IntTest/4.types (0 ms) +[----------] 1 test from Generic/IntTest/4 (0 ms total) + +[----------] 1 test from specific/IntTest/0, where TypeParam = signed char +[ RUN ] specific/IntTest/0.types +[ OK ] specific/IntTest/0.types (0 ms) +[----------] 1 test from specific/IntTest/0 (0 ms total) + +[----------] 1 test from specific/IntTest/1, where TypeParam = short +[ RUN ] specific/IntTest/1.types +[ OK ] specific/IntTest/1.types (0 ms) +[----------] 1 test from specific/IntTest/1 (0 ms total) + +[----------] 1 test from specific/IntTest/2, where TypeParam = int +[ RUN ] specific/IntTest/2.types +[ OK ] specific/IntTest/2.types (0 ms) +[----------] 1 test from specific/IntTest/2 (0 ms total) + +[----------] 1 test from specific/IntTest/3, where TypeParam = long +[ RUN ] specific/IntTest/3.types +[ OK ] specific/IntTest/3.types (0 ms) +[----------] 1 test from specific/IntTest/3 (0 ms total) + +[----------] 1 test from Generic/UIntTest/0, where TypeParam = unsigned char +[ RUN ] Generic/UIntTest/0.types +[ OK ] Generic/UIntTest/0.types (0 ms) +[----------] 1 test from Generic/UIntTest/0 (0 ms total) + +[----------] 1 test from Generic/UIntTest/1, where TypeParam = unsigned short +[ RUN ] Generic/UIntTest/1.types +[ OK ] Generic/UIntTest/1.types (0 ms) +[----------] 1 test from Generic/UIntTest/1 (0 ms total) + +[----------] 1 test from Generic/UIntTest/2, where TypeParam = unsigned int +[ RUN ] Generic/UIntTest/2.types +[ OK ] Generic/UIntTest/2.types (0 ms) +[----------] 1 test from Generic/UIntTest/2 (1 ms total) + +[----------] 1 test from Generic/UIntTest/3, where TypeParam = unsigned long +[ RUN ] Generic/UIntTest/3.types +[ OK ] Generic/UIntTest/3.types (0 ms) +[----------] 1 test from Generic/UIntTest/3 (0 ms total) + +[----------] 1 test from Generic/UIntTest/4, where TypeParam = unsigned long long +[ RUN ] Generic/UIntTest/4.types +[ OK ] Generic/UIntTest/4.types (0 ms) +[----------] 1 test from Generic/UIntTest/4 (1 ms total) + +[----------] 1 test from specific/UIntTest/0, where TypeParam = unsigned char +[ RUN ] specific/UIntTest/0.types +[ OK ] specific/UIntTest/0.types (0 ms) +[----------] 1 test from specific/UIntTest/0 (0 ms total) + +[----------] 1 test from specific/UIntTest/1, where TypeParam = unsigned short +[ RUN ] specific/UIntTest/1.types +[ OK ] specific/UIntTest/1.types (0 ms) +[----------] 1 test from specific/UIntTest/1 (0 ms total) + +[----------] 1 test from specific/UIntTest/2, where TypeParam = unsigned int +[ RUN ] specific/UIntTest/2.types +[ OK ] specific/UIntTest/2.types (0 ms) +[----------] 1 test from specific/UIntTest/2 (0 ms total) + +[----------] 1 test from specific/UIntTest/3, where TypeParam = unsigned long +[ RUN ] specific/UIntTest/3.types +[ OK ] specific/UIntTest/3.types (0 ms) +[----------] 1 test from specific/UIntTest/3 (0 ms total) + +[----------] 1 test from Generic/FloatTest/0, where TypeParam = float +[ RUN ] Generic/FloatTest/0.types +[ OK ] Generic/FloatTest/0.types (0 ms) +[----------] 1 test from Generic/FloatTest/0 (0 ms total) + +[----------] 1 test from Generic/FloatTest/1, where TypeParam = double +[ RUN ] Generic/FloatTest/1.types +[ OK ] Generic/FloatTest/1.types (0 ms) +[----------] 1 test from Generic/FloatTest/1 (0 ms total) + +[----------] 4 tests from type +[ RUN ] type.equality +[ OK ] type.equality (0 ms) +[ RUN ] type.Dimension +[ OK ] type.Dimension (0 ms) +[ RUN ] type.Shape +[ OK ] type.Shape (0 ms) +[ RUN ] type.TensorType +[ OK ] type.TensorType (0 ms) +[----------] 4 tests from type (0 ms total) + +[----------] 6 tests from error +[ RUN ] error.expr_dimension_mismatch_freevar +[ OK ] error.expr_dimension_mismatch_freevar (0 ms) +[ RUN ] error.expr_dimension_mismatch_sumvar +[ OK ] error.expr_dimension_mismatch_sumvar (0 ms) +[ RUN ] error.compile_without_expr +[ OK ] error.compile_without_expr (0 ms) +[ RUN ] error.compile_tensor_name_collision +[ OK ] error.compile_tensor_name_collision (0 ms) +[ RUN ] error.assemble_without_compile +[ OK ] error.assemble_without_compile (0 ms) +[ RUN ] error.compute_without_compile +[ OK ] error.compute_without_compile (0 ms) +[----------] 6 tests from error (0 ms total) + +[----------] 6 tests from windowing +[ RUN ] windowing.mixIndexing +[ OK ] windowing.mixIndexing (1 ms) +[ RUN ] windowing.boundsChecks +[ OK ] windowing.boundsChecks (0 ms) +[ RUN ] windowing.sliceMultipleWays +[ OK ] windowing.sliceMultipleWays (408 ms) +[ RUN ] windowing.transformations +[ OK ] windowing.transformations (1686 ms) +[ RUN ] windowing.lhsIndexSet +[ OK ] windowing.lhsIndexSet (402 ms) +[ RUN ] windowing.compoundAssign +[ OK ] windowing.compoundAssign (254 ms) +[----------] 6 tests from windowing (2751 ms total) + +[----------] 1 test from schedule +[ RUN ] schedule.workspace_spmspm +[ OK ] schedule.workspace_spmspm (327 ms) +[----------] 1 test from schedule (327 ms total) + +[----------] 1 test from lower +[ RUN ] lower.denseIterationTranspose +[ OK ] lower.denseIterationTranspose (132 ms) +[----------] 1 test from lower (132 ms total) + +[----------] 1 test from makecsr +[ RUN ] makecsr.access_past_pos +Success! +[ OK ] makecsr.access_past_pos (0 ms) +[----------] 1 test from makecsr (0 ms total) + +[----------] 3 tests from qcd +[ RUN ] qcd.mul0 +[ OK ] qcd.mul0 (419 ms) +[ RUN ] qcd.mul1 +[ OK ] qcd.mul1 (143 ms) +[ RUN ] qcd.mul2 +[ OK ] qcd.mul2 (145 ms) +[----------] 3 tests from qcd (707 ms total) + +[----------] 5 tests from spgemm/spgemm +[ RUN ] spgemm/spgemm.scheduling_eval/0 +[ OK ] spgemm/spgemm.scheduling_eval/0 (693 ms) +[ RUN ] spgemm/spgemm.scheduling_eval/1 +[ OK ] spgemm/spgemm.scheduling_eval/1 (559 ms) +[ RUN ] spgemm/spgemm.scheduling_eval/2 +[ OK ] spgemm/spgemm.scheduling_eval/2 (413 ms) +[ RUN ] spgemm/spgemm.scheduling_eval/3 +[ OK ] spgemm/spgemm.scheduling_eval/3 (528 ms) +[ RUN ] spgemm/spgemm.scheduling_eval/4 +[ OK ] spgemm/spgemm.scheduling_eval/4 (540 ms) +[----------] 5 tests from spgemm/spgemm (2733 ms total) + +[----------] 2 tests from copy/merge_lattice +[ RUN ] copy/merge_lattice.test/0 +[ OK ] copy/merge_lattice.test/0 (0 ms) +[ RUN ] copy/merge_lattice.test/1 +[ OK ] copy/merge_lattice.test/1 (0 ms) +[----------] 2 tests from copy/merge_lattice (0 ms total) + +[----------] 2 tests from neg/merge_lattice +[ RUN ] neg/merge_lattice.test/0 +[ OK ] neg/merge_lattice.test/0 (0 ms) +[ RUN ] neg/merge_lattice.test/1 +[ OK ] neg/merge_lattice.test/1 (0 ms) +[----------] 2 tests from neg/merge_lattice (0 ms total) + +[----------] 11 tests from mul/merge_lattice +[ RUN ] mul/merge_lattice.test/0 +[ OK ] mul/merge_lattice.test/0 (0 ms) +[ RUN ] mul/merge_lattice.test/1 +[ OK ] mul/merge_lattice.test/1 (0 ms) +[ RUN ] mul/merge_lattice.test/2 +[ OK ] mul/merge_lattice.test/2 (0 ms) +[ RUN ] mul/merge_lattice.test/3 +[ OK ] mul/merge_lattice.test/3 (0 ms) +[ RUN ] mul/merge_lattice.test/4 +[ OK ] mul/merge_lattice.test/4 (0 ms) +[ RUN ] mul/merge_lattice.test/5 +[ OK ] mul/merge_lattice.test/5 (0 ms) +[ RUN ] mul/merge_lattice.test/6 +[ OK ] mul/merge_lattice.test/6 (0 ms) +[ RUN ] mul/merge_lattice.test/7 +[ OK ] mul/merge_lattice.test/7 (0 ms) +[ RUN ] mul/merge_lattice.test/8 +[ OK ] mul/merge_lattice.test/8 (0 ms) +[ RUN ] mul/merge_lattice.test/9 +[ OK ] mul/merge_lattice.test/9 (0 ms) +[ RUN ] mul/merge_lattice.test/10 +[ OK ] mul/merge_lattice.test/10 (0 ms) +[----------] 11 tests from mul/merge_lattice (2 ms total) + +[----------] 6 tests from add/merge_lattice +[ RUN ] add/merge_lattice.test/0 +[ OK ] add/merge_lattice.test/0 (0 ms) +[ RUN ] add/merge_lattice.test/1 +[ OK ] add/merge_lattice.test/1 (0 ms) +[ RUN ] add/merge_lattice.test/2 +[ OK ] add/merge_lattice.test/2 (0 ms) +[ RUN ] add/merge_lattice.test/3 +[ OK ] add/merge_lattice.test/3 (0 ms) +[ RUN ] add/merge_lattice.test/4 +[ OK ] add/merge_lattice.test/4 (0 ms) +[ RUN ] add/merge_lattice.test/5 +[ OK ] add/merge_lattice.test/5 (0 ms) +[----------] 6 tests from add/merge_lattice (0 ms total) + +[----------] 5 tests from add_multiply/merge_lattice +[ RUN ] add_multiply/merge_lattice.test/0 +[ OK ] add_multiply/merge_lattice.test/0 (0 ms) +[ RUN ] add_multiply/merge_lattice.test/1 +[ OK ] add_multiply/merge_lattice.test/1 (0 ms) +[ RUN ] add_multiply/merge_lattice.test/2 +[ OK ] add_multiply/merge_lattice.test/2 (0 ms) +[ RUN ] add_multiply/merge_lattice.test/3 +[ OK ] add_multiply/merge_lattice.test/3 (0 ms) +[ RUN ] add_multiply/merge_lattice.test/4 +[ OK ] add_multiply/merge_lattice.test/4 (1 ms) +[----------] 5 tests from add_multiply/merge_lattice (1 ms total) + +[----------] 5 tests from multiply_add/merge_lattice +[ RUN ] multiply_add/merge_lattice.test/0 +[ OK ] multiply_add/merge_lattice.test/0 (0 ms) +[ RUN ] multiply_add/merge_lattice.test/1 +[ OK ] multiply_add/merge_lattice.test/1 (0 ms) +[ RUN ] multiply_add/merge_lattice.test/2 +[ OK ] multiply_add/merge_lattice.test/2 (0 ms) +[ RUN ] multiply_add/merge_lattice.test/3 +[ OK ] multiply_add/merge_lattice.test/3 (0 ms) +[ RUN ] multiply_add/merge_lattice.test/4 +[ OK ] multiply_add/merge_lattice.test/4 (0 ms) +[----------] 5 tests from multiply_add/merge_lattice (0 ms total) + +[----------] 5 tests from add_multiply_multiply/merge_lattice +[ RUN ] add_multiply_multiply/merge_lattice.test/0 +[ OK ] add_multiply_multiply/merge_lattice.test/0 (0 ms) +[ RUN ] add_multiply_multiply/merge_lattice.test/1 +[ OK ] add_multiply_multiply/merge_lattice.test/1 (0 ms) +[ RUN ] add_multiply_multiply/merge_lattice.test/2 +[ OK ] add_multiply_multiply/merge_lattice.test/2 (0 ms) +[ RUN ] add_multiply_multiply/merge_lattice.test/3 +[ OK ] add_multiply_multiply/merge_lattice.test/3 (0 ms) +[ RUN ] add_multiply_multiply/merge_lattice.test/4 +[ OK ] add_multiply_multiply/merge_lattice.test/4 (0 ms) +[----------] 5 tests from add_multiply_multiply/merge_lattice (3 ms total) + +[----------] 4 tests from add_multiply_add/merge_lattice +[ RUN ] add_multiply_add/merge_lattice.test/0 +[ OK ] add_multiply_add/merge_lattice.test/0 (0 ms) +[ RUN ] add_multiply_add/merge_lattice.test/1 +[ OK ] add_multiply_add/merge_lattice.test/1 (0 ms) +[ RUN ] add_multiply_add/merge_lattice.test/2 +[ OK ] add_multiply_add/merge_lattice.test/2 (0 ms) +[ RUN ] add_multiply_add/merge_lattice.test/3 +[ OK ] add_multiply_add/merge_lattice.test/3 (1 ms) +[----------] 4 tests from add_multiply_add/merge_lattice (1 ms total) + +[----------] 12 tests from hashmap/merge_lattice +[ RUN ] hashmap/merge_lattice.test/0 +[ OK ] hashmap/merge_lattice.test/0 (0 ms) +[ RUN ] hashmap/merge_lattice.test/1 +[ OK ] hashmap/merge_lattice.test/1 (0 ms) +[ RUN ] hashmap/merge_lattice.test/2 +[ OK ] hashmap/merge_lattice.test/2 (0 ms) +[ RUN ] hashmap/merge_lattice.test/3 +[ OK ] hashmap/merge_lattice.test/3 (0 ms) +[ RUN ] hashmap/merge_lattice.test/4 +[ OK ] hashmap/merge_lattice.test/4 (0 ms) +[ RUN ] hashmap/merge_lattice.test/5 +[ OK ] hashmap/merge_lattice.test/5 (0 ms) +[ RUN ] hashmap/merge_lattice.test/6 +[ OK ] hashmap/merge_lattice.test/6 (0 ms) +[ RUN ] hashmap/merge_lattice.test/7 +[ OK ] hashmap/merge_lattice.test/7 (0 ms) +[ RUN ] hashmap/merge_lattice.test/8 +[ OK ] hashmap/merge_lattice.test/8 (0 ms) +[ RUN ] hashmap/merge_lattice.test/9 +[ OK ] hashmap/merge_lattice.test/9 (0 ms) +[ RUN ] hashmap/merge_lattice.test/10 +[ OK ] hashmap/merge_lattice.test/10 (0 ms) +[ RUN ] hashmap/merge_lattice.test/11 +[ OK ] hashmap/merge_lattice.test/11 (0 ms) +[----------] 12 tests from hashmap/merge_lattice (2 ms total) + +[----------] 10 tests from vector/format +[ RUN ] vector/format.pack/0 +[ OK ] vector/format.pack/0 (133 ms) +[ RUN ] vector/format.pack/1 +[ OK ] vector/format.pack/1 (138 ms) +[ RUN ] vector/format.pack/2 +[ OK ] vector/format.pack/2 (0 ms) +[ RUN ] vector/format.pack/3 +[ OK ] vector/format.pack/3 (0 ms) +[ RUN ] vector/format.pack/4 +[ OK ] vector/format.pack/4 (0 ms) +[ RUN ] vector/format.pack/5 +[ OK ] vector/format.pack/5 (1 ms) +[ RUN ] vector/format.pack/6 +[ OK ] vector/format.pack/6 (0 ms) +[ RUN ] vector/format.pack/7 +[ OK ] vector/format.pack/7 (0 ms) +[ RUN ] vector/format.pack/8 +[ OK ] vector/format.pack/8 (0 ms) +[ RUN ] vector/format.pack/9 +[ OK ] vector/format.pack/9 (1 ms) +[----------] 10 tests from vector/format (275 ms total) + +[----------] 16 tests from matrix/format +[ RUN ] matrix/format.pack/0 +[ OK ] matrix/format.pack/0 (0 ms) +[ RUN ] matrix/format.pack/1 +[ OK ] matrix/format.pack/1 (146 ms) +[ RUN ] matrix/format.pack/2 +[ OK ] matrix/format.pack/2 (156 ms) +[ RUN ] matrix/format.pack/3 +[ OK ] matrix/format.pack/3 (152 ms) +[ RUN ] matrix/format.pack/4 +[ OK ] matrix/format.pack/4 (1 ms) +[ RUN ] matrix/format.pack/5 +[ OK ] matrix/format.pack/5 (0 ms) +[ RUN ] matrix/format.pack/6 +[ OK ] matrix/format.pack/6 (0 ms) +[ RUN ] matrix/format.pack/7 +[ OK ] matrix/format.pack/7 (160 ms) +[ RUN ] matrix/format.pack/8 +[ OK ] matrix/format.pack/8 (1 ms) +[ RUN ] matrix/format.pack/9 +[ OK ] matrix/format.pack/9 (0 ms) +[ RUN ] matrix/format.pack/10 +[ OK ] matrix/format.pack/10 (0 ms) +[ RUN ] matrix/format.pack/11 +[ OK ] matrix/format.pack/11 (0 ms) +[ RUN ] matrix/format.pack/12 +[ OK ] matrix/format.pack/12 (1 ms) +[ RUN ] matrix/format.pack/13 +[ OK ] matrix/format.pack/13 (0 ms) +[ RUN ] matrix/format.pack/14 +[ OK ] matrix/format.pack/14 (0 ms) +[ RUN ] matrix/format.pack/15 +[ OK ] matrix/format.pack/15 (0 ms) +[----------] 16 tests from matrix/format (633 ms total) + +[----------] 96 tests from tensor3/format +[ RUN ] tensor3/format.pack/0 +[ OK ] tensor3/format.pack/0 (153 ms) +[ RUN ] tensor3/format.pack/1 +[ OK ] tensor3/format.pack/1 (152 ms) +[ RUN ] tensor3/format.pack/2 +[ OK ] tensor3/format.pack/2 (152 ms) +[ RUN ] tensor3/format.pack/3 +[ OK ] tensor3/format.pack/3 (154 ms) +[ RUN ] tensor3/format.pack/4 +[ OK ] tensor3/format.pack/4 (152 ms) +[ RUN ] tensor3/format.pack/5 +[ OK ] tensor3/format.pack/5 (154 ms) +[ RUN ] tensor3/format.pack/6 +[ OK ] tensor3/format.pack/6 (165 ms) +[ RUN ] tensor3/format.pack/7 +[ OK ] tensor3/format.pack/7 (168 ms) +[ RUN ] tensor3/format.pack/8 +[ OK ] tensor3/format.pack/8 (167 ms) +[ RUN ] tensor3/format.pack/9 +[ OK ] tensor3/format.pack/9 (168 ms) +[ RUN ] tensor3/format.pack/10 +[ OK ] tensor3/format.pack/10 (165 ms) +[ RUN ] tensor3/format.pack/11 +[ OK ] tensor3/format.pack/11 (169 ms) +[ RUN ] tensor3/format.pack/12 +[ OK ] tensor3/format.pack/12 (169 ms) +[ RUN ] tensor3/format.pack/13 +[ OK ] tensor3/format.pack/13 (167 ms) +[ RUN ] tensor3/format.pack/14 +[ OK ] tensor3/format.pack/14 (171 ms) +[ RUN ] tensor3/format.pack/15 +[ OK ] tensor3/format.pack/15 (180 ms) +[ RUN ] tensor3/format.pack/16 +[ OK ] tensor3/format.pack/16 (172 ms) +[ RUN ] tensor3/format.pack/17 +[ OK ] tensor3/format.pack/17 (171 ms) +[ RUN ] tensor3/format.pack/18 +[ OK ] tensor3/format.pack/18 (175 ms) +[ RUN ] tensor3/format.pack/19 +[ OK ] tensor3/format.pack/19 (174 ms) +[ RUN ] tensor3/format.pack/20 +[ OK ] tensor3/format.pack/20 (180 ms) +[ RUN ] tensor3/format.pack/21 +[ OK ] tensor3/format.pack/21 (176 ms) +[ RUN ] tensor3/format.pack/22 +[ OK ] tensor3/format.pack/22 (176 ms) +[ RUN ] tensor3/format.pack/23 +[ OK ] tensor3/format.pack/23 (177 ms) +[ RUN ] tensor3/format.pack/24 +[ OK ] tensor3/format.pack/24 (169 ms) +[ RUN ] tensor3/format.pack/25 +[ OK ] tensor3/format.pack/25 (170 ms) +[ RUN ] tensor3/format.pack/26 +[ OK ] tensor3/format.pack/26 (167 ms) +[ RUN ] tensor3/format.pack/27 +[ OK ] tensor3/format.pack/27 (171 ms) +[ RUN ] tensor3/format.pack/28 +[ OK ] tensor3/format.pack/28 (168 ms) +[ RUN ] tensor3/format.pack/29 +[ OK ] tensor3/format.pack/29 (169 ms) +[ RUN ] tensor3/format.pack/30 +[ OK ] tensor3/format.pack/30 (180 ms) +[ RUN ] tensor3/format.pack/31 +[ OK ] tensor3/format.pack/31 (182 ms) +[ RUN ] tensor3/format.pack/32 +[ OK ] tensor3/format.pack/32 (181 ms) +[ RUN ] tensor3/format.pack/33 +[ OK ] tensor3/format.pack/33 (179 ms) +[ RUN ] tensor3/format.pack/34 +[ OK ] tensor3/format.pack/34 (179 ms) +[ RUN ] tensor3/format.pack/35 +[ OK ] tensor3/format.pack/35 (180 ms) +[ RUN ] tensor3/format.pack/36 +[ OK ] tensor3/format.pack/36 (177 ms) +[ RUN ] tensor3/format.pack/37 +[ OK ] tensor3/format.pack/37 (175 ms) +[ RUN ] tensor3/format.pack/38 +[ OK ] tensor3/format.pack/38 (178 ms) +[ RUN ] tensor3/format.pack/39 +[ OK ] tensor3/format.pack/39 (177 ms) +[ RUN ] tensor3/format.pack/40 +[ OK ] tensor3/format.pack/40 (177 ms) +[ RUN ] tensor3/format.pack/41 +[ OK ] tensor3/format.pack/41 (175 ms) +[ RUN ] tensor3/format.pack/42 +[ OK ] tensor3/format.pack/42 (180 ms) +[ RUN ] tensor3/format.pack/43 +[ OK ] tensor3/format.pack/43 (183 ms) +[ RUN ] tensor3/format.pack/44 +[ OK ] tensor3/format.pack/44 (181 ms) +[ RUN ] tensor3/format.pack/45 +[ OK ] tensor3/format.pack/45 (181 ms) +[ RUN ] tensor3/format.pack/46 +[ OK ] tensor3/format.pack/46 (182 ms) +[ RUN ] tensor3/format.pack/47 +[ OK ] tensor3/format.pack/47 (184 ms) +[ RUN ] tensor3/format.pack/48 +[ OK ] tensor3/format.pack/48 (0 ms) +[ RUN ] tensor3/format.pack/49 +[ OK ] tensor3/format.pack/49 (0 ms) +[ RUN ] tensor3/format.pack/50 +[ OK ] tensor3/format.pack/50 (0 ms) +[ RUN ] tensor3/format.pack/51 +[ OK ] tensor3/format.pack/51 (1 ms) +[ RUN ] tensor3/format.pack/52 +[ OK ] tensor3/format.pack/52 (0 ms) +[ RUN ] tensor3/format.pack/53 +[ OK ] tensor3/format.pack/53 (0 ms) +[ RUN ] tensor3/format.pack/54 +[ OK ] tensor3/format.pack/54 (0 ms) +[ RUN ] tensor3/format.pack/55 +[ OK ] tensor3/format.pack/55 (1 ms) +[ RUN ] tensor3/format.pack/56 +[ OK ] tensor3/format.pack/56 (0 ms) +[ RUN ] tensor3/format.pack/57 +[ OK ] tensor3/format.pack/57 (0 ms) +[ RUN ] tensor3/format.pack/58 +[ OK ] tensor3/format.pack/58 (0 ms) +[ RUN ] tensor3/format.pack/59 +[ OK ] tensor3/format.pack/59 (0 ms) +[ RUN ] tensor3/format.pack/60 +[ OK ] tensor3/format.pack/60 (1 ms) +[ RUN ] tensor3/format.pack/61 +[ OK ] tensor3/format.pack/61 (0 ms) +[ RUN ] tensor3/format.pack/62 +[ OK ] tensor3/format.pack/62 (0 ms) +[ RUN ] tensor3/format.pack/63 +[ OK ] tensor3/format.pack/63 (0 ms) +[ RUN ] tensor3/format.pack/64 +[ OK ] tensor3/format.pack/64 (0 ms) +[ RUN ] tensor3/format.pack/65 +[ OK ] tensor3/format.pack/65 (0 ms) +[ RUN ] tensor3/format.pack/66 +[ OK ] tensor3/format.pack/66 (0 ms) +[ RUN ] tensor3/format.pack/67 +[ OK ] tensor3/format.pack/67 (1 ms) +[ RUN ] tensor3/format.pack/68 +[ OK ] tensor3/format.pack/68 (0 ms) +[ RUN ] tensor3/format.pack/69 +[ OK ] tensor3/format.pack/69 (0 ms) +[ RUN ] tensor3/format.pack/70 +[ OK ] tensor3/format.pack/70 (0 ms) +[ RUN ] tensor3/format.pack/71 +[ OK ] tensor3/format.pack/71 (0 ms) +[ RUN ] tensor3/format.pack/72 +[ OK ] tensor3/format.pack/72 (1 ms) +[ RUN ] tensor3/format.pack/73 +[ OK ] tensor3/format.pack/73 (0 ms) +[ RUN ] tensor3/format.pack/74 +[ OK ] tensor3/format.pack/74 (0 ms) +[ RUN ] tensor3/format.pack/75 +[ OK ] tensor3/format.pack/75 (0 ms) +[ RUN ] tensor3/format.pack/76 +[ OK ] tensor3/format.pack/76 (1 ms) +[ RUN ] tensor3/format.pack/77 +[ OK ] tensor3/format.pack/77 (0 ms) +[ RUN ] tensor3/format.pack/78 +[ OK ] tensor3/format.pack/78 (0 ms) +[ RUN ] tensor3/format.pack/79 +[ OK ] tensor3/format.pack/79 (0 ms) +[ RUN ] tensor3/format.pack/80 +[ OK ] tensor3/format.pack/80 (0 ms) +[ RUN ] tensor3/format.pack/81 +[ OK ] tensor3/format.pack/81 (1 ms) +[ RUN ] tensor3/format.pack/82 +[ OK ] tensor3/format.pack/82 (0 ms) +[ RUN ] tensor3/format.pack/83 +[ OK ] tensor3/format.pack/83 (0 ms) +[ RUN ] tensor3/format.pack/84 +[ OK ] tensor3/format.pack/84 (0 ms) +[ RUN ] tensor3/format.pack/85 +[ OK ] tensor3/format.pack/85 (0 ms) +[ RUN ] tensor3/format.pack/86 +[ OK ] tensor3/format.pack/86 (0 ms) +[ RUN ] tensor3/format.pack/87 +[ OK ] tensor3/format.pack/87 (1 ms) +[ RUN ] tensor3/format.pack/88 +[ OK ] tensor3/format.pack/88 (0 ms) +[ RUN ] tensor3/format.pack/89 +[ OK ] tensor3/format.pack/89 (0 ms) +[ RUN ] tensor3/format.pack/90 +[ OK ] tensor3/format.pack/90 (0 ms) +[ RUN ] tensor3/format.pack/91 +[ OK ] tensor3/format.pack/91 (0 ms) +[ RUN ] tensor3/format.pack/92 +[ OK ] tensor3/format.pack/92 (0 ms) +[ RUN ] tensor3/format.pack/93 +[ OK ] tensor3/format.pack/93 (0 ms) +[ RUN ] tensor3/format.pack/94 +[ OK ] tensor3/format.pack/94 (0 ms) +[ RUN ] tensor3/format.pack/95 +[ OK ] tensor3/format.pack/95 (0 ms) +[----------] 96 tests from tensor3/format (8273 ms total) + +[----------] 6 tests from load/apiset +[ RUN ] load/apiset.api/0 +[ OK ] load/apiset.api/0 (0 ms) +[ RUN ] load/apiset.api/1 +[ OK ] load/apiset.api/1 (0 ms) +[ RUN ] load/apiset.api/2 +[ OK ] load/apiset.api/2 (0 ms) +[ RUN ] load/apiset.api/3 +[ OK ] load/apiset.api/3 (0 ms) +[ RUN ] load/apiset.api/4 +[ OK ] load/apiset.api/4 (1 ms) +[ RUN ] load/apiset.api/5 +[ OK ] load/apiset.api/5 (0 ms) +[----------] 6 tests from load/apiset (1 ms total) + +[----------] 3 tests from write/apiget +[ RUN ] write/apiget.api/0 +[ OK ] write/apiget.api/0 (0 ms) +[ RUN ] write/apiget.api/1 +[ OK ] write/apiget.api/1 (0 ms) +[ RUN ] write/apiget.api/2 +[ OK ] write/apiget.api/2 (1 ms) +[----------] 3 tests from write/apiget (2 ms total) + +[----------] 1 test from write/apiwrb +[ RUN ] write/apiwrb.api/0 +[ OK ] write/apiwrb.api/0 (64 ms) +[----------] 1 test from write/apiwrb (64 ms total) + +[----------] 3 tests from write/apiwmtx +[ RUN ] write/apiwmtx.api/0 +[ OK ] write/apiwmtx.api/0 (35 ms) +[ RUN ] write/apiwmtx.api/1 +[ OK ] write/apiwmtx.api/1 (32 ms) +[ RUN ] write/apiwmtx.api/2 +[ OK ] write/apiwmtx.api/2 (31 ms) +[----------] 3 tests from write/apiwmtx (98 ms total) + +[----------] 2 tests from readwrite/apitns +[ RUN ] readwrite/apitns.api/0 +[ OK ] readwrite/apitns.api/0 (1 ms) +[ RUN ] readwrite/apitns.api/1 +[ OK ] readwrite/apitns.api/1 (0 ms) +[----------] 2 tests from readwrite/apitns (2 ms total) + +[----------] 4 tests from scalar/concrete +[ RUN ] scalar/concrete.notation/0 +[ OK ] scalar/concrete.notation/0 (0 ms) +[ RUN ] scalar/concrete.notation/1 +[ OK ] scalar/concrete.notation/1 (0 ms) +[ RUN ] scalar/concrete.notation/2 +[ OK ] scalar/concrete.notation/2 (1 ms) +[ RUN ] scalar/concrete.notation/3 +[ OK ] scalar/concrete.notation/3 (0 ms) +[----------] 4 tests from scalar/concrete (1 ms total) + +[----------] 2 tests from elwise/concrete +[ RUN ] elwise/concrete.notation/0 +[ OK ] elwise/concrete.notation/0 (0 ms) +[ RUN ] elwise/concrete.notation/1 +[ OK ] elwise/concrete.notation/1 (0 ms) +[----------] 2 tests from elwise/concrete (0 ms total) + +[----------] 3 tests from reduce_into_temporary/concrete +[ RUN ] reduce_into_temporary/concrete.notation/0 +[ OK ] reduce_into_temporary/concrete.notation/0 (0 ms) +[ RUN ] reduce_into_temporary/concrete.notation/1 +[ OK ] reduce_into_temporary/concrete.notation/1 (0 ms) +[ RUN ] reduce_into_temporary/concrete.notation/2 +[ OK ] reduce_into_temporary/concrete.notation/2 (0 ms) +[----------] 3 tests from reduce_into_temporary/concrete (0 ms total) + +[----------] 2 tests from separate_reductions/concrete +[ RUN ] separate_reductions/concrete.notation/0 +[ OK ] separate_reductions/concrete.notation/0 (0 ms) +[ RUN ] separate_reductions/concrete.notation/1 +[ OK ] separate_reductions/concrete.notation/1 (0 ms) +[----------] 2 tests from separate_reductions/concrete (0 ms total) + +[----------] 1 test from scalar/storage +[ RUN ] scalar/storage.pack/0 +[ OK ] scalar/storage.pack/0 (0 ms) +[----------] 1 test from scalar/storage (0 ms total) + +[----------] 4 tests from vector/storage +[ RUN ] vector/storage.pack/0 +[ OK ] vector/storage.pack/0 (0 ms) +[ RUN ] vector/storage.pack/1 +[ OK ] vector/storage.pack/1 (0 ms) +[ RUN ] vector/storage.pack/2 +[ OK ] vector/storage.pack/2 (0 ms) +[ RUN ] vector/storage.pack/3 +[ OK ] vector/storage.pack/3 (1 ms) +[----------] 4 tests from vector/storage (1 ms total) + +[----------] 4 tests from matrix/storage +[ RUN ] matrix/storage.pack/0 +[ OK ] matrix/storage.pack/0 (0 ms) +[ RUN ] matrix/storage.pack/1 +[ OK ] matrix/storage.pack/1 (0 ms) +[ RUN ] matrix/storage.pack/2 +[ OK ] matrix/storage.pack/2 (0 ms) +[ RUN ] matrix/storage.pack/3 +[ OK ] matrix/storage.pack/3 (0 ms) +[----------] 4 tests from matrix/storage (0 ms total) + +[----------] 3 tests from fixed/storage +[ RUN ] fixed/storage.pack/0 +[ OK ] fixed/storage.pack/0 (0 ms) +[ RUN ] fixed/storage.pack/1 +[ OK ] fixed/storage.pack/1 (0 ms) +[ RUN ] fixed/storage.pack/2 +[ OK ] fixed/storage.pack/2 (1 ms) +[----------] 3 tests from fixed/storage (1 ms total) + +[----------] 1 test from matrix_blocked/storage +[ RUN ] matrix_blocked/storage.pack/0 +[ OK ] matrix_blocked/storage.pack/0 (190 ms) +[----------] 1 test from matrix_blocked/storage (190 ms total) + +[----------] 4 tests from matrix_col/storage +[ RUN ] matrix_col/storage.pack/0 +[ OK ] matrix_col/storage.pack/0 (0 ms) +[ RUN ] matrix_col/storage.pack/1 +[ OK ] matrix_col/storage.pack/1 (0 ms) +[ RUN ] matrix_col/storage.pack/2 +[ OK ] matrix_col/storage.pack/2 (0 ms) +[ RUN ] matrix_col/storage.pack/3 +[ OK ] matrix_col/storage.pack/3 (1 ms) +[----------] 4 tests from matrix_col/storage (2 ms total) + +[----------] 8 tests from tensor3/storage +[ RUN ] tensor3/storage.pack/0 +[ OK ] tensor3/storage.pack/0 (0 ms) +[ RUN ] tensor3/storage.pack/1 +[ OK ] tensor3/storage.pack/1 (0 ms) +[ RUN ] tensor3/storage.pack/2 +[ OK ] tensor3/storage.pack/2 (0 ms) +[ RUN ] tensor3/storage.pack/3 +[ OK ] tensor3/storage.pack/3 (0 ms) +[ RUN ] tensor3/storage.pack/4 +[ OK ] tensor3/storage.pack/4 (0 ms) +[ RUN ] tensor3/storage.pack/5 +[ OK ] tensor3/storage.pack/5 (0 ms) +[ RUN ] tensor3/storage.pack/6 +[ OK ] tensor3/storage.pack/6 (1 ms) +[ RUN ] tensor3/storage.pack/7 +[ OK ] tensor3/storage.pack/7 (0 ms) +[----------] 8 tests from tensor3/storage (1 ms total) + +[----------] 1 test from vector_elwise_sqrt/parafac +[ RUN ] vector_elwise_sqrt/parafac.eval/0 +[ OK ] vector_elwise_sqrt/parafac.eval/0 (133 ms) +[----------] 1 test from vector_elwise_sqrt/parafac (134 ms total) + +[----------] 1 test from matrix_mul/parafac +[ RUN ] matrix_mul/parafac.eval/0 +[ OK ] matrix_mul/parafac.eval/0 (147 ms) +[----------] 1 test from matrix_mul/parafac (147 ms total) + +[----------] 1 test from matrix_transpose_mul/parafac +[ RUN ] matrix_transpose_mul/parafac.eval/0 +[ OK ] matrix_transpose_mul/parafac.eval/0 (144 ms) +[----------] 1 test from matrix_transpose_mul/parafac (144 ms total) + +[----------] 2 tests from matrix_column_squared_norm/parafac +[ RUN ] matrix_column_squared_norm/parafac.eval/0 +[ OK ] matrix_column_squared_norm/parafac.eval/0 (136 ms) +[ RUN ] matrix_column_squared_norm/parafac.eval/1 +[ OK ] matrix_column_squared_norm/parafac.eval/1 (1 ms) +[----------] 2 tests from matrix_column_squared_norm/parafac (138 ms total) + +[----------] 1 test from matrix_column_normalize/parafac +[ RUN ] matrix_column_normalize/parafac.eval/0 +[ OK ] matrix_column_normalize/parafac.eval/0 (140 ms) +[----------] 1 test from matrix_column_normalize/parafac (140 ms total) + +[----------] 1 test from mttkrp1/parafac +[ RUN ] mttkrp1/parafac.eval/0 +[ OK ] mttkrp1/parafac.eval/0 (289 ms) +[----------] 1 test from mttkrp1/parafac (289 ms total) + +[----------] 1 test from mttkrp2/parafac +[ RUN ] mttkrp2/parafac.eval/0 +[ OK ] mttkrp2/parafac.eval/0 (327 ms) +[----------] 1 test from mttkrp2/parafac (327 ms total) + +[----------] 1 test from mttkrp3/parafac +[ RUN ] mttkrp3/parafac.eval/0 +[ OK ] mttkrp3/parafac.eval/0 (333 ms) +[----------] 1 test from mttkrp3/parafac (333 ms total) + +[----------] 1 test from tensor_squared_norm/parafac +[ RUN ] tensor_squared_norm/parafac.eval/0 +[ OK ] tensor_squared_norm/parafac.eval/0 (143 ms) +[----------] 1 test from tensor_squared_norm/parafac (143 ms total) + +[----------] 1 test from factorized_tensor_squared_norm/parafac +[ RUN ] factorized_tensor_squared_norm/parafac.eval/0 +[ OK ] factorized_tensor_squared_norm/parafac.eval/0 (148 ms) +[----------] 1 test from factorized_tensor_squared_norm/parafac (148 ms total) + +[----------] 1 test from factorized_tensor_inner_product/parafac +[ RUN ] factorized_tensor_inner_product/parafac.eval/0 +[ OK ] factorized_tensor_inner_product/parafac.eval/0 (347 ms) +[----------] 1 test from factorized_tensor_inner_product/parafac (347 ms total) + +[----------] 1 test from kroneckerDense/parafac +[ RUN ] kroneckerDense/parafac.eval/0 +[ OK ] kroneckerDense/parafac.eval/0 (309 ms) +[----------] 1 test from kroneckerDense/parafac (310 ms total) + +[----------] 1 test from scalar_copy/lower +[ RUN ] scalar_copy/lower.compile/0 +[ OK ] scalar_copy/lower.compile/0 (126 ms) +[----------] 1 test from scalar_copy/lower (126 ms total) + +[----------] 1 test from scalar_neg/lower +[ RUN ] scalar_neg/lower.compile/0 +[ OK ] scalar_neg/lower.compile/0 (124 ms) +[----------] 1 test from scalar_neg/lower (124 ms total) + +[----------] 1 test from scalar_add/lower +[ RUN ] scalar_add/lower.compile/0 +[ OK ] scalar_add/lower.compile/0 (127 ms) +[----------] 1 test from scalar_add/lower (127 ms total) + +[----------] 1 test from scalar_sub/lower +[ RUN ] scalar_sub/lower.compile/0 +[ OK ] scalar_sub/lower.compile/0 (126 ms) +[----------] 1 test from scalar_sub/lower (126 ms total) + +[----------] 1 test from scalar_mul/lower +[ RUN ] scalar_mul/lower.compile/0 +[ OK ] scalar_mul/lower.compile/0 (126 ms) +[----------] 1 test from scalar_mul/lower (126 ms total) + +[----------] 1 test from scalar_div/lower +[ RUN ] scalar_div/lower.compile/0 +[ OK ] scalar_div/lower.compile/0 (126 ms) +[----------] 1 test from scalar_div/lower (126 ms total) + +[----------] 1 test from scalar_sqr/lower +[ RUN ] scalar_sqr/lower.compile/0 +[ OK ] scalar_sqr/lower.compile/0 (125 ms) +[----------] 1 test from scalar_sqr/lower (125 ms total) + +[----------] 4 tests from vector_neg/lower +[ RUN ] vector_neg/lower.compile/0 +[ OK ] vector_neg/lower.compile/0 (144 ms) +[ RUN ] vector_neg/lower.compile/1 +[ OK ] vector_neg/lower.compile/1 (140 ms) +[ RUN ] vector_neg/lower.compile/2 +[ OK ] vector_neg/lower.compile/2 (152 ms) +[ RUN ] vector_neg/lower.compile/3 +[ OK ] vector_neg/lower.compile/3 (154 ms) +[----------] 4 tests from vector_neg/lower (590 ms total) + +[----------] 5 tests from vector_mul/lower +[ RUN ] vector_mul/lower.compile/0 +[ OK ] vector_mul/lower.compile/0 (150 ms) +[ RUN ] vector_mul/lower.compile/1 +[ OK ] vector_mul/lower.compile/1 (139 ms) +[ RUN ] vector_mul/lower.compile/2 +[ OK ] vector_mul/lower.compile/2 (141 ms) +[ RUN ] vector_mul/lower.compile/3 +[ OK ] vector_mul/lower.compile/3 (148 ms) +[ RUN ] vector_mul/lower.compile/4 +[ OK ] vector_mul/lower.compile/4 (161 ms) +[----------] 5 tests from vector_mul/lower (739 ms total) + +[----------] 5 tests from vector_div/lower +[ RUN ] vector_div/lower.compile/0 +[ OK ] vector_div/lower.compile/0 (148 ms) +[ RUN ] vector_div/lower.compile/1 +[ OK ] vector_div/lower.compile/1 (139 ms) +[ RUN ] vector_div/lower.compile/2 +[ OK ] vector_div/lower.compile/2 (139 ms) +[ RUN ] vector_div/lower.compile/3 +[ OK ] vector_div/lower.compile/3 (150 ms) +[ RUN ] vector_div/lower.compile/4 +[ OK ] vector_div/lower.compile/4 (160 ms) +[----------] 5 tests from vector_div/lower (738 ms total) + +[----------] 5 tests from vector_intdiv/lower +[ RUN ] vector_intdiv/lower.compile/0 +[ OK ] vector_intdiv/lower.compile/0 (134 ms) +[ RUN ] vector_intdiv/lower.compile/1 +[ OK ] vector_intdiv/lower.compile/1 (140 ms) +[ RUN ] vector_intdiv/lower.compile/2 +[ OK ] vector_intdiv/lower.compile/2 (140 ms) +[ RUN ] vector_intdiv/lower.compile/3 +[ OK ] vector_intdiv/lower.compile/3 (147 ms) +[ RUN ] vector_intdiv/lower.compile/4 +[ OK ] vector_intdiv/lower.compile/4 (161 ms) +[----------] 5 tests from vector_intdiv/lower (723 ms total) + +[----------] 5 tests from vector_add/lower +[ RUN ] vector_add/lower.compile/0 +[ OK ] vector_add/lower.compile/0 (156 ms) +[ RUN ] vector_add/lower.compile/1 +[ OK ] vector_add/lower.compile/1 (175 ms) +[ RUN ] vector_add/lower.compile/2 +[ OK ] vector_add/lower.compile/2 (166 ms) +[ RUN ] vector_add/lower.compile/3 +[ OK ] vector_add/lower.compile/3 (163 ms) +[ RUN ] vector_add/lower.compile/4 +[ OK ] vector_add/lower.compile/4 (227 ms) +[----------] 5 tests from vector_add/lower (887 ms total) + +[----------] 4 tests from vector_sub/lower +[ RUN ] vector_sub/lower.compile/0 +[ OK ] vector_sub/lower.compile/0 (147 ms) +[ RUN ] vector_sub/lower.compile/1 +[ OK ] vector_sub/lower.compile/1 (165 ms) +[ RUN ] vector_sub/lower.compile/2 +[ OK ] vector_sub/lower.compile/2 (152 ms) +[ RUN ] vector_sub/lower.compile/3 +[ OK ] vector_sub/lower.compile/3 (186 ms) +[----------] 4 tests from vector_sub/lower (650 ms total) + +[----------] 4 tests from vector_inner_product/lower +[ RUN ] vector_inner_product/lower.compile/0 +[ OK ] vector_inner_product/lower.compile/0 (147 ms) +[ RUN ] vector_inner_product/lower.compile/1 +[ OK ] vector_inner_product/lower.compile/1 (133 ms) +[ RUN ] vector_inner_product/lower.compile/2 +[ OK ] vector_inner_product/lower.compile/2 (133 ms) +[ RUN ] vector_inner_product/lower.compile/3 +[ OK ] vector_inner_product/lower.compile/3 (140 ms) +[----------] 4 tests from vector_inner_product/lower (553 ms total) + +[----------] 2 tests from vector_or/lower +[ RUN ] vector_or/lower.compile/0 +[ OK ] vector_or/lower.compile/0 (134 ms) +[ RUN ] vector_or/lower.compile/1 +[ OK ] vector_or/lower.compile/1 (171 ms) +[----------] 2 tests from vector_or/lower (305 ms total) + +[----------] 2 tests from vector_and/lower +[ RUN ] vector_and/lower.compile/0 +[ OK ] vector_and/lower.compile/0 (136 ms) +[ RUN ] vector_and/lower.compile/1 +[ OK ] vector_and/lower.compile/1 (149 ms) +[----------] 2 tests from vector_and/lower (285 ms total) + +[----------] 16 tests from matrix_neg/lower +[ RUN ] matrix_neg/lower.compile/0 +[ OK ] matrix_neg/lower.compile/0 (303 ms) +[ RUN ] matrix_neg/lower.compile/1 +[ OK ] matrix_neg/lower.compile/1 (148 ms) +[ RUN ] matrix_neg/lower.compile/2 +[ OK ] matrix_neg/lower.compile/2 (165 ms) +[ RUN ] matrix_neg/lower.compile/3 +[ OK ] matrix_neg/lower.compile/3 (151 ms) +[ RUN ] matrix_neg/lower.compile/4 +[ OK ] matrix_neg/lower.compile/4 (340 ms) +[ RUN ] matrix_neg/lower.compile/5 +[ OK ] matrix_neg/lower.compile/5 (187 ms) +[ RUN ] matrix_neg/lower.compile/6 +[ OK ] matrix_neg/lower.compile/6 (187 ms) +[ RUN ] matrix_neg/lower.compile/7 +[ OK ] matrix_neg/lower.compile/7 (188 ms) +[ RUN ] matrix_neg/lower.compile/8 +[ OK ] matrix_neg/lower.compile/8 (328 ms) +[ RUN ] matrix_neg/lower.compile/9 +[ OK ] matrix_neg/lower.compile/9 (166 ms) +[ RUN ] matrix_neg/lower.compile/10 +[ OK ] matrix_neg/lower.compile/10 (174 ms) +[ RUN ] matrix_neg/lower.compile/11 +[ OK ] matrix_neg/lower.compile/11 (165 ms) +[ RUN ] matrix_neg/lower.compile/12 +[ OK ] matrix_neg/lower.compile/12 (186 ms) +[ RUN ] matrix_neg/lower.compile/13 +[ OK ] matrix_neg/lower.compile/13 (192 ms) +[ RUN ] matrix_neg/lower.compile/14 +[ OK ] matrix_neg/lower.compile/14 (187 ms) +[ RUN ] matrix_neg/lower.compile/15 +[ OK ] matrix_neg/lower.compile/15 (195 ms) +[----------] 16 tests from matrix_neg/lower (3264 ms total) + +[----------] 4 tests from matrix_sum/lower +[ RUN ] matrix_sum/lower.compile/0 +[ OK ] matrix_sum/lower.compile/0 (152 ms) +[ RUN ] matrix_sum/lower.compile/1 +[ OK ] matrix_sum/lower.compile/1 (154 ms) +[ RUN ] matrix_sum/lower.compile/2 +[ OK ] matrix_sum/lower.compile/2 (152 ms) +[ RUN ] matrix_sum/lower.compile/3 +[ OK ] matrix_sum/lower.compile/3 (157 ms) +[----------] 4 tests from matrix_sum/lower (615 ms total) + +[----------] 8 tests from matrix_rowsum/lower +[ RUN ] matrix_rowsum/lower.compile/0 +[ OK ] matrix_rowsum/lower.compile/0 (140 ms) +[ RUN ] matrix_rowsum/lower.compile/1 +[ OK ] matrix_rowsum/lower.compile/1 (144 ms) +[ RUN ] matrix_rowsum/lower.compile/2 +[ OK ] matrix_rowsum/lower.compile/2 (146 ms) +[ RUN ] matrix_rowsum/lower.compile/3 +[ OK ] matrix_rowsum/lower.compile/3 (147 ms) +[ RUN ] matrix_rowsum/lower.compile/4 +[ OK ] matrix_rowsum/lower.compile/4 (152 ms) +[ RUN ] matrix_rowsum/lower.compile/5 +[ OK ] matrix_rowsum/lower.compile/5 (156 ms) +[ RUN ] matrix_rowsum/lower.compile/6 +[ OK ] matrix_rowsum/lower.compile/6 (156 ms) +[ RUN ] matrix_rowsum/lower.compile/7 +[ OK ] matrix_rowsum/lower.compile/7 (161 ms) +[----------] 8 tests from matrix_rowsum/lower (1203 ms total) + +[----------] 4 tests from matrix_vector_mul/lower +[ RUN ] matrix_vector_mul/lower.compile/0 +[ OK ] matrix_vector_mul/lower.compile/0 (145 ms) +[ RUN ] matrix_vector_mul/lower.compile/1 +[ OK ] matrix_vector_mul/lower.compile/1 (146 ms) +[ RUN ] matrix_vector_mul/lower.compile/2 +[ OK ] matrix_vector_mul/lower.compile/2 (148 ms) +[ RUN ] matrix_vector_mul/lower.compile/3 +[ OK ] matrix_vector_mul/lower.compile/3 (150 ms) +[----------] 4 tests from matrix_vector_mul/lower (589 ms total) + +[----------] 16 tests from tensor_slicesum/lower +[ RUN ] tensor_slicesum/lower.compile/0 +[ OK ] tensor_slicesum/lower.compile/0 (150 ms) +[ RUN ] tensor_slicesum/lower.compile/1 +[ OK ] tensor_slicesum/lower.compile/1 (155 ms) +[ RUN ] tensor_slicesum/lower.compile/2 +[ OK ] tensor_slicesum/lower.compile/2 (152 ms) +[ RUN ] tensor_slicesum/lower.compile/3 +[ OK ] tensor_slicesum/lower.compile/3 (156 ms) +[ RUN ] tensor_slicesum/lower.compile/4 +[ OK ] tensor_slicesum/lower.compile/4 (181 ms) +[ RUN ] tensor_slicesum/lower.compile/5 +[ OK ] tensor_slicesum/lower.compile/5 (186 ms) +[ RUN ] tensor_slicesum/lower.compile/6 +[ OK ] tensor_slicesum/lower.compile/6 (181 ms) +[ RUN ] tensor_slicesum/lower.compile/7 +[ OK ] tensor_slicesum/lower.compile/7 (189 ms) +[ RUN ] tensor_slicesum/lower.compile/8 +[ OK ] tensor_slicesum/lower.compile/8 (164 ms) +[ RUN ] tensor_slicesum/lower.compile/9 +[ OK ] tensor_slicesum/lower.compile/9 (170 ms) +[ RUN ] tensor_slicesum/lower.compile/10 +[ OK ] tensor_slicesum/lower.compile/10 (172 ms) +[ RUN ] tensor_slicesum/lower.compile/11 +[ OK ] tensor_slicesum/lower.compile/11 (176 ms) +[ RUN ] tensor_slicesum/lower.compile/12 +[ OK ] tensor_slicesum/lower.compile/12 (184 ms) +[ RUN ] tensor_slicesum/lower.compile/13 +[ OK ] tensor_slicesum/lower.compile/13 (188 ms) +[ RUN ] tensor_slicesum/lower.compile/14 +[ OK ] tensor_slicesum/lower.compile/14 (189 ms) +[ RUN ] tensor_slicesum/lower.compile/15 +[ OK ] tensor_slicesum/lower.compile/15 (191 ms) +[----------] 16 tests from tensor_slicesum/lower (2784 ms total) + +[----------] 1 test from where_scalar/lower +[ RUN ] where_scalar/lower.compile/0 +[ OK ] where_scalar/lower.compile/0 (129 ms) +[----------] 1 test from where_scalar/lower (129 ms total) + +[----------] 2 tests from where_vector_sum/lower +[ RUN ] where_vector_sum/lower.compile/0 +[ OK ] where_vector_sum/lower.compile/0 (143 ms) +[ RUN ] where_vector_sum/lower.compile/1 +[ OK ] where_vector_sum/lower.compile/1 (148 ms) +[----------] 2 tests from where_vector_sum/lower (291 ms total) + +[----------] 4 tests from where_matrix_sum/lower +[ RUN ] where_matrix_sum/lower.compile/0 +[ OK ] where_matrix_sum/lower.compile/0 (153 ms) +[ RUN ] where_matrix_sum/lower.compile/1 +[ OK ] where_matrix_sum/lower.compile/1 (155 ms) +[ RUN ] where_matrix_sum/lower.compile/2 +[ OK ] where_matrix_sum/lower.compile/2 (158 ms) +[ RUN ] where_matrix_sum/lower.compile/3 +[ OK ] where_matrix_sum/lower.compile/3 (156 ms) +[----------] 4 tests from where_matrix_sum/lower (622 ms total) + +[----------] 4 tests from where_matrix_vector_mul/lower +[ RUN ] where_matrix_vector_mul/lower.compile/0 +[ OK ] where_matrix_vector_mul/lower.compile/0 (161 ms) +[ RUN ] where_matrix_vector_mul/lower.compile/1 +[ OK ] where_matrix_vector_mul/lower.compile/1 (144 ms) +[ RUN ] where_matrix_vector_mul/lower.compile/2 +[ OK ] where_matrix_vector_mul/lower.compile/2 (167 ms) +[ RUN ] where_matrix_vector_mul/lower.compile/3 +[ OK ] where_matrix_vector_mul/lower.compile/3 (150 ms) +[----------] 4 tests from where_matrix_vector_mul/lower (622 ms total) + +[----------] 2 tests from where_spmm/lower +[ RUN ] where_spmm/lower.compile/0 +[ OK ] where_spmm/lower.compile/0 (182 ms) +[ RUN ] where_spmm/lower.compile/1 +[ OK ] where_spmm/lower.compile/1 (223 ms) +[----------] 2 tests from where_spmm/lower (405 ms total) + +[----------] 1 test from sequence_scalar/lower +[ RUN ] sequence_scalar/lower.compile/0 +[ OK ] sequence_scalar/lower.compile/0 (127 ms) +[----------] 1 test from sequence_scalar/lower (127 ms total) + +[----------] 1 test from multi_scalar/lower +[ RUN ] multi_scalar/lower.compile/0 +[ OK ] multi_scalar/lower.compile/0 (128 ms) +[----------] 1 test from multi_scalar/lower (128 ms total) + +[----------] 4 tests from matrix_transposed_output/lower +[ RUN ] matrix_transposed_output/lower.compile/0 +[ OK ] matrix_transposed_output/lower.compile/0 (140 ms) +[ RUN ] matrix_transposed_output/lower.compile/1 +[ OK ] matrix_transposed_output/lower.compile/1 (149 ms) +[ RUN ] matrix_transposed_output/lower.compile/2 +[ OK ] matrix_transposed_output/lower.compile/2 (146 ms) +[ RUN ] matrix_transposed_output/lower.compile/3 +[ OK ] matrix_transposed_output/lower.compile/3 (149 ms) +[----------] 4 tests from matrix_transposed_output/lower (584 ms total) + +[----------] 4 tests from matrix_transposed_input/lower +[ RUN ] matrix_transposed_input/lower.compile/0 +[ OK ] matrix_transposed_input/lower.compile/0 (144 ms) +[ RUN ] matrix_transposed_input/lower.compile/1 +[ OK ] matrix_transposed_input/lower.compile/1 (212 ms) +[ RUN ] matrix_transposed_input/lower.compile/2 +[ OK ] matrix_transposed_input/lower.compile/2 (205 ms) +[ RUN ] matrix_transposed_input/lower.compile/3 +[ OK ] matrix_transposed_input/lower.compile/3 (280 ms) +[----------] 4 tests from matrix_transposed_input/lower (841 ms total) + +[----------] 4 tests from broadcast_vector_mul_scalar/lower +[ RUN ] broadcast_vector_mul_scalar/lower.compile/0 +[ OK ] broadcast_vector_mul_scalar/lower.compile/0 (144 ms) +[ RUN ] broadcast_vector_mul_scalar/lower.compile/1 +[ OK ] broadcast_vector_mul_scalar/lower.compile/1 (139 ms) +[ RUN ] broadcast_vector_mul_scalar/lower.compile/2 +[ OK ] broadcast_vector_mul_scalar/lower.compile/2 (152 ms) +[ RUN ] broadcast_vector_mul_scalar/lower.compile/3 +[ OK ] broadcast_vector_mul_scalar/lower.compile/3 (160 ms) +[----------] 4 tests from broadcast_vector_mul_scalar/lower (595 ms total) + +[----------] 4 tests from broadcast_vector_add_scalar/lower +[ RUN ] broadcast_vector_add_scalar/lower.compile/0 +[ OK ] broadcast_vector_add_scalar/lower.compile/0 (144 ms) +[ RUN ] broadcast_vector_add_scalar/lower.compile/1 +[ OK ] broadcast_vector_add_scalar/lower.compile/1 (160 ms) +[ RUN ] broadcast_vector_add_scalar/lower.compile/2 +[ OK ] broadcast_vector_add_scalar/lower.compile/2 (155 ms) +[ RUN ] broadcast_vector_add_scalar/lower.compile/3 +[ OK ] broadcast_vector_add_scalar/lower.compile/3 (180 ms) +[----------] 4 tests from broadcast_vector_add_scalar/lower (639 ms total) + +[----------] 4 tests from broadcast_vector_mul_constant/lower +[ RUN ] broadcast_vector_mul_constant/lower.compile/0 +[ OK ] broadcast_vector_mul_constant/lower.compile/0 (148 ms) +[ RUN ] broadcast_vector_mul_constant/lower.compile/1 +[ OK ] broadcast_vector_mul_constant/lower.compile/1 (141 ms) +[ RUN ] broadcast_vector_mul_constant/lower.compile/2 +[ OK ] broadcast_vector_mul_constant/lower.compile/2 (154 ms) +[ RUN ] broadcast_vector_mul_constant/lower.compile/3 +[ OK ] broadcast_vector_mul_constant/lower.compile/3 (161 ms) +[----------] 4 tests from broadcast_vector_mul_constant/lower (604 ms total) + +[----------] 4 tests from broadcast_vector_add_constant/lower +[ RUN ] broadcast_vector_add_constant/lower.compile/0 +[ OK ] broadcast_vector_add_constant/lower.compile/0 (147 ms) +[ RUN ] broadcast_vector_add_constant/lower.compile/1 +[ OK ] broadcast_vector_add_constant/lower.compile/1 (161 ms) +[ RUN ] broadcast_vector_add_constant/lower.compile/2 +[ OK ] broadcast_vector_add_constant/lower.compile/2 (156 ms) +[ RUN ] broadcast_vector_add_constant/lower.compile/3 +[ OK ] broadcast_vector_add_constant/lower.compile/3 (183 ms) +[----------] 4 tests from broadcast_vector_add_constant/lower (647 ms total) + +[----------] 4 tests from vector_mod/lower +[ RUN ] vector_mod/lower.compile/0 +[ OK ] vector_mod/lower.compile/0 (134 ms) +[ RUN ] vector_mod/lower.compile/1 +[ OK ] vector_mod/lower.compile/1 (143 ms) +[ RUN ] vector_mod/lower.compile/2 +[ OK ] vector_mod/lower.compile/2 (141 ms) +[ RUN ] vector_mod/lower.compile/3 +[ OK ] vector_mod/lower.compile/3 (150 ms) +[----------] 4 tests from vector_mod/lower (568 ms total) + +[----------] 2 tests from vector_abs/lower +[ RUN ] vector_abs/lower.compile/0 +[ OK ] vector_abs/lower.compile/0 (148 ms) +[ RUN ] vector_abs/lower.compile/1 +[ OK ] vector_abs/lower.compile/1 (141 ms) +[----------] 2 tests from vector_abs/lower (289 ms total) + +[----------] 2 tests from vector_pow_constant/lower +[ RUN ] vector_pow_constant/lower.compile/0 +[ OK ] vector_pow_constant/lower.compile/0 (151 ms) +[ RUN ] vector_pow_constant/lower.compile/1 +[ OK ] vector_pow_constant/lower.compile/1 (139 ms) +[----------] 2 tests from vector_pow_constant/lower (290 ms total) + +[----------] 4 tests from vector_pow_vector/lower +[ RUN ] vector_pow_vector/lower.compile/0 +[ OK ] vector_pow_vector/lower.compile/0 (137 ms) +[ RUN ] vector_pow_vector/lower.compile/1 +[ OK ] vector_pow_vector/lower.compile/1 (166 ms) +[ RUN ] vector_pow_vector/lower.compile/2 +[ OK ] vector_pow_vector/lower.compile/2 (145 ms) +[ RUN ] vector_pow_vector/lower.compile/3 +[ OK ] vector_pow_vector/lower.compile/3 (173 ms) +[----------] 4 tests from vector_pow_vector/lower (622 ms total) + +[----------] 2 tests from vector_square/lower +[ RUN ] vector_square/lower.compile/0 +[ OK ] vector_square/lower.compile/0 (147 ms) +[ RUN ] vector_square/lower.compile/1 +[ OK ] vector_square/lower.compile/1 (145 ms) +[----------] 2 tests from vector_square/lower (293 ms total) + +[----------] 2 tests from vector_cube/lower +[ RUN ] vector_cube/lower.compile/0 +[ OK ] vector_cube/lower.compile/0 (150 ms) +[ RUN ] vector_cube/lower.compile/1 +[ OK ] vector_cube/lower.compile/1 (144 ms) +[----------] 2 tests from vector_cube/lower (294 ms total) + +[----------] 2 tests from vector_sqrt/lower +[ RUN ] vector_sqrt/lower.compile/0 +[ OK ] vector_sqrt/lower.compile/0 (152 ms) +[ RUN ] vector_sqrt/lower.compile/1 +[ OK ] vector_sqrt/lower.compile/1 (147 ms) +[----------] 2 tests from vector_sqrt/lower (299 ms total) + +[----------] 4 tests from vector_product_sqrt/lower +[ RUN ] vector_product_sqrt/lower.compile/0 +[ OK ] vector_product_sqrt/lower.compile/0 (155 ms) +[ RUN ] vector_product_sqrt/lower.compile/1 +[ OK ] vector_product_sqrt/lower.compile/1 (149 ms) +[ RUN ] vector_product_sqrt/lower.compile/2 +[ OK ] vector_product_sqrt/lower.compile/2 (150 ms) +[ RUN ] vector_product_sqrt/lower.compile/3 +[ OK ] vector_product_sqrt/lower.compile/3 (154 ms) +[----------] 4 tests from vector_product_sqrt/lower (609 ms total) + +[----------] 4 tests from vector_sum_sqrt/lower +[ RUN ] vector_sum_sqrt/lower.compile/0 +[ OK ] vector_sum_sqrt/lower.compile/0 (154 ms) +[ RUN ] vector_sum_sqrt/lower.compile/1 +[ OK ] vector_sum_sqrt/lower.compile/1 (173 ms) +[ RUN ] vector_sum_sqrt/lower.compile/2 +[ OK ] vector_sum_sqrt/lower.compile/2 (172 ms) +[ RUN ] vector_sum_sqrt/lower.compile/3 +[ OK ] vector_sum_sqrt/lower.compile/3 (173 ms) +[----------] 4 tests from vector_sum_sqrt/lower (672 ms total) + +[----------] 2 tests from vector_cbrt/lower +[ RUN ] vector_cbrt/lower.compile/0 +[ OK ] vector_cbrt/lower.compile/0 (141 ms) +[ RUN ] vector_cbrt/lower.compile/1 +[ OK ] vector_cbrt/lower.compile/1 (146 ms) +[----------] 2 tests from vector_cbrt/lower (287 ms total) + +[----------] 2 tests from vector_exp/lower +[ RUN ] vector_exp/lower.compile/0 +[ OK ] vector_exp/lower.compile/0 (138 ms) +[ RUN ] vector_exp/lower.compile/1 +[ OK ] vector_exp/lower.compile/1 (170 ms) +[----------] 2 tests from vector_exp/lower (308 ms total) + +[----------] 4 tests from vector_product_exp/lower +[ RUN ] vector_product_exp/lower.compile/0 +[ OK ] vector_product_exp/lower.compile/0 (161 ms) +[ RUN ] vector_product_exp/lower.compile/1 +[ OK ] vector_product_exp/lower.compile/1 (168 ms) +[ RUN ] vector_product_exp/lower.compile/2 +[ OK ] vector_product_exp/lower.compile/2 (182 ms) +[ RUN ] vector_product_exp/lower.compile/3 +[ OK ] vector_product_exp/lower.compile/3 (189 ms) +[----------] 4 tests from vector_product_exp/lower (701 ms total) + +[----------] 2 tests from vector_log/lower +[ RUN ] vector_log/lower.compile/0 +[ OK ] vector_log/lower.compile/0 (145 ms) +[ RUN ] vector_log/lower.compile/1 +[ OK ] vector_log/lower.compile/1 (169 ms) +[----------] 2 tests from vector_log/lower (314 ms total) + +[----------] 2 tests from vector_log10/lower +[ RUN ] vector_log10/lower.compile/0 +[ OK ] vector_log10/lower.compile/0 (141 ms) +[ RUN ] vector_log10/lower.compile/1 +[ OK ] vector_log10/lower.compile/1 (221 ms) +[----------] 2 tests from vector_log10/lower (362 ms total) + +[----------] 2 tests from vector_sin/lower +[ RUN ] vector_sin/lower.compile/0 +[ OK ] vector_sin/lower.compile/0 (141 ms) +[ RUN ] vector_sin/lower.compile/1 +[ OK ] vector_sin/lower.compile/1 (146 ms) +[----------] 2 tests from vector_sin/lower (287 ms total) + +[----------] 2 tests from vector_cos/lower +[ RUN ] vector_cos/lower.compile/0 +[ OK ] vector_cos/lower.compile/0 (144 ms) +[ RUN ] vector_cos/lower.compile/1 +[ OK ] vector_cos/lower.compile/1 (189 ms) +[----------] 2 tests from vector_cos/lower (333 ms total) + +[----------] 2 tests from vector_tan/lower +[ RUN ] vector_tan/lower.compile/0 +[ OK ] vector_tan/lower.compile/0 (144 ms) +[ RUN ] vector_tan/lower.compile/1 +[ OK ] vector_tan/lower.compile/1 (146 ms) +[----------] 2 tests from vector_tan/lower (290 ms total) + +[----------] 2 tests from vector_asin/lower +[ RUN ] vector_asin/lower.compile/0 +[ OK ] vector_asin/lower.compile/0 (155 ms) +[ RUN ] vector_asin/lower.compile/1 +[ OK ] vector_asin/lower.compile/1 (146 ms) +[----------] 2 tests from vector_asin/lower (301 ms total) + +[----------] 2 tests from vector_acos/lower +[ RUN ] vector_acos/lower.compile/0 +[ OK ] vector_acos/lower.compile/0 (139 ms) +[ RUN ] vector_acos/lower.compile/1 +[ OK ] vector_acos/lower.compile/1 (174 ms) +[----------] 2 tests from vector_acos/lower (313 ms total) + +[----------] 2 tests from vector_atan/lower +[ RUN ] vector_atan/lower.compile/0 +[ OK ] vector_atan/lower.compile/0 (141 ms) +[ RUN ] vector_atan/lower.compile/1 +[ OK ] vector_atan/lower.compile/1 (146 ms) +[----------] 2 tests from vector_atan/lower (287 ms total) + +[----------] 2 tests from vector_atan2_constant/lower +[ RUN ] vector_atan2_constant/lower.compile/0 +[ OK ] vector_atan2_constant/lower.compile/0 (145 ms) +[ RUN ] vector_atan2_constant/lower.compile/1 +[ OK ] vector_atan2_constant/lower.compile/1 (143 ms) +[----------] 2 tests from vector_atan2_constant/lower (288 ms total) + +[----------] 4 tests from vector_atan2_vector/lower +[ RUN ] vector_atan2_vector/lower.compile/0 +[ OK ] vector_atan2_vector/lower.compile/0 (137 ms) +[ RUN ] vector_atan2_vector/lower.compile/1 +[ OK ] vector_atan2_vector/lower.compile/1 (159 ms) +[ RUN ] vector_atan2_vector/lower.compile/2 +[ OK ] vector_atan2_vector/lower.compile/2 (189 ms) +[ RUN ] vector_atan2_vector/lower.compile/3 +[ OK ] vector_atan2_vector/lower.compile/3 (178 ms) +[----------] 4 tests from vector_atan2_vector/lower (663 ms total) + +[----------] 2 tests from vector_sinh/lower +[ RUN ] vector_sinh/lower.compile/0 +[ OK ] vector_sinh/lower.compile/0 (141 ms) +[ RUN ] vector_sinh/lower.compile/1 +[ OK ] vector_sinh/lower.compile/1 (144 ms) +[----------] 2 tests from vector_sinh/lower (285 ms total) + +[----------] 2 tests from vector_cosh/lower +[ RUN ] vector_cosh/lower.compile/0 +[ OK ] vector_cosh/lower.compile/0 (139 ms) +[ RUN ] vector_cosh/lower.compile/1 +[ OK ] vector_cosh/lower.compile/1 (165 ms) +[----------] 2 tests from vector_cosh/lower (304 ms total) + +[----------] 2 tests from vector_tanh/lower +[ RUN ] vector_tanh/lower.compile/0 +[ OK ] vector_tanh/lower.compile/0 (137 ms) +[ RUN ] vector_tanh/lower.compile/1 +[ OK ] vector_tanh/lower.compile/1 (144 ms) +[----------] 2 tests from vector_tanh/lower (281 ms total) + +[----------] 2 tests from vector_asinh/lower +[ RUN ] vector_asinh/lower.compile/0 +[ OK ] vector_asinh/lower.compile/0 (171 ms) +[ RUN ] vector_asinh/lower.compile/1 +[ OK ] vector_asinh/lower.compile/1 (151 ms) +[----------] 2 tests from vector_asinh/lower (322 ms total) + +[----------] 2 tests from vector_acosh/lower +[ RUN ] vector_acosh/lower.compile/0 +[ OK ] vector_acosh/lower.compile/0 (138 ms) +[ RUN ] vector_acosh/lower.compile/1 +[ OK ] vector_acosh/lower.compile/1 (173 ms) +[----------] 2 tests from vector_acosh/lower (311 ms total) + +[----------] 2 tests from vector_atanh/lower +[ RUN ] vector_atanh/lower.compile/0 +[ OK ] vector_atanh/lower.compile/0 (140 ms) +[ RUN ] vector_atanh/lower.compile/1 +[ OK ] vector_atanh/lower.compile/1 (160 ms) +[----------] 2 tests from vector_atanh/lower (300 ms total) + +[----------] 2 tests from vector_gt_positive_constant/lower +[ RUN ] vector_gt_positive_constant/lower.compile/0 +[ OK ] vector_gt_positive_constant/lower.compile/0 (167 ms) +[ RUN ] vector_gt_positive_constant/lower.compile/1 +[ OK ] vector_gt_positive_constant/lower.compile/1 (146 ms) +[----------] 2 tests from vector_gt_positive_constant/lower (313 ms total) + +[----------] 2 tests from vector_gt_negative_constant/lower +[ RUN ] vector_gt_negative_constant/lower.compile/0 +[ OK ] vector_gt_negative_constant/lower.compile/0 (153 ms) +[ RUN ] vector_gt_negative_constant/lower.compile/1 +[ OK ] vector_gt_negative_constant/lower.compile/1 (184 ms) +[----------] 2 tests from vector_gt_negative_constant/lower (337 ms total) + +[----------] 4 tests from vector_gt_vector/lower +[ RUN ] vector_gt_vector/lower.compile/0 +[ OK ] vector_gt_vector/lower.compile/0 (152 ms) +[ RUN ] vector_gt_vector/lower.compile/1 +[ OK ] vector_gt_vector/lower.compile/1 (176 ms) +[ RUN ] vector_gt_vector/lower.compile/2 +[ OK ] vector_gt_vector/lower.compile/2 (201 ms) +[ RUN ] vector_gt_vector/lower.compile/3 +[ OK ] vector_gt_vector/lower.compile/3 (176 ms) +[----------] 4 tests from vector_gt_vector/lower (705 ms total) + +[----------] 2 tests from vector_lt_positive_constant/lower +[ RUN ] vector_lt_positive_constant/lower.compile/0 +[ OK ] vector_lt_positive_constant/lower.compile/0 (191 ms) +[ RUN ] vector_lt_positive_constant/lower.compile/1 +[ OK ] vector_lt_positive_constant/lower.compile/1 (166 ms) +[----------] 2 tests from vector_lt_positive_constant/lower (357 ms total) + +[----------] 2 tests from vector_lt_negative_constant/lower +[ RUN ] vector_lt_negative_constant/lower.compile/0 +[ OK ] vector_lt_negative_constant/lower.compile/0 (152 ms) +[ RUN ] vector_lt_negative_constant/lower.compile/1 +[ OK ] vector_lt_negative_constant/lower.compile/1 (148 ms) +[----------] 2 tests from vector_lt_negative_constant/lower (300 ms total) + +[----------] 4 tests from vector_lt_vector/lower +[ RUN ] vector_lt_vector/lower.compile/0 +[ OK ] vector_lt_vector/lower.compile/0 (176 ms) +[ RUN ] vector_lt_vector/lower.compile/1 +[ OK ] vector_lt_vector/lower.compile/1 (184 ms) +[ RUN ] vector_lt_vector/lower.compile/2 +[ OK ] vector_lt_vector/lower.compile/2 (176 ms) +[ RUN ] vector_lt_vector/lower.compile/3 +[ OK ] vector_lt_vector/lower.compile/3 (178 ms) +[----------] 4 tests from vector_lt_vector/lower (714 ms total) + +[----------] 2 tests from vector_gte_zero/lower +[ RUN ] vector_gte_zero/lower.compile/0 +[ OK ] vector_gte_zero/lower.compile/0 (149 ms) +[ RUN ] vector_gte_zero/lower.compile/1 +[ OK ] vector_gte_zero/lower.compile/1 (168 ms) +[----------] 2 tests from vector_gte_zero/lower (317 ms total) + +[----------] 4 tests from vector_gte_vector/lower +[ RUN ] vector_gte_vector/lower.compile/0 +[ OK ] vector_gte_vector/lower.compile/0 (148 ms) +[ RUN ] vector_gte_vector/lower.compile/1 +[ OK ] vector_gte_vector/lower.compile/1 (169 ms) +[ RUN ] vector_gte_vector/lower.compile/2 +[ OK ] vector_gte_vector/lower.compile/2 (174 ms) +[ RUN ] vector_gte_vector/lower.compile/3 +[ OK ] vector_gte_vector/lower.compile/3 (198 ms) +[----------] 4 tests from vector_gte_vector/lower (690 ms total) + +[----------] 2 tests from vector_lte_zero/lower +[ RUN ] vector_lte_zero/lower.compile/0 +[ OK ] vector_lte_zero/lower.compile/0 (148 ms) +[ RUN ] vector_lte_zero/lower.compile/1 +[ OK ] vector_lte_zero/lower.compile/1 (163 ms) +[----------] 2 tests from vector_lte_zero/lower (311 ms total) + +[----------] 4 tests from vector_lte_vector/lower +[ RUN ] vector_lte_vector/lower.compile/0 +[ OK ] vector_lte_vector/lower.compile/0 (155 ms) +[ RUN ] vector_lte_vector/lower.compile/1 +[ OK ] vector_lte_vector/lower.compile/1 (171 ms) +[ RUN ] vector_lte_vector/lower.compile/2 +[ OK ] vector_lte_vector/lower.compile/2 (170 ms) +[ RUN ] vector_lte_vector/lower.compile/3 +[ OK ] vector_lte_vector/lower.compile/3 (199 ms) +[----------] 4 tests from vector_lte_vector/lower (695 ms total) + +[----------] 2 tests from vector_eq/lower +[ RUN ] vector_eq/lower.compile/0 +[ OK ] vector_eq/lower.compile/0 (154 ms) +[ RUN ] vector_eq/lower.compile/1 +[ OK ] vector_eq/lower.compile/1 (200 ms) +[----------] 2 tests from vector_eq/lower (354 ms total) + +[----------] 2 tests from vector_neq/lower +[ RUN ] vector_neq/lower.compile/0 +[ OK ] vector_neq/lower.compile/0 (151 ms) +[ RUN ] vector_neq/lower.compile/1 +[ OK ] vector_neq/lower.compile/1 (173 ms) +[----------] 2 tests from vector_neq/lower (324 ms total) + +[----------] 4 tests from vector_max/lower +[ RUN ] vector_max/lower.compile/0 +[ OK ] vector_max/lower.compile/0 (147 ms) +[ RUN ] vector_max/lower.compile/1 +[ OK ] vector_max/lower.compile/1 (168 ms) +[ RUN ] vector_max/lower.compile/2 +[ OK ] vector_max/lower.compile/2 (168 ms) +[ RUN ] vector_max/lower.compile/3 +[ OK ] vector_max/lower.compile/3 (167 ms) +[----------] 4 tests from vector_max/lower (650 ms total) + +[----------] 4 tests from vector_min/lower +[ RUN ] vector_min/lower.compile/0 +[ OK ] vector_min/lower.compile/0 (148 ms) +[ RUN ] vector_min/lower.compile/1 +[ OK ] vector_min/lower.compile/1 (166 ms) +[ RUN ] vector_min/lower.compile/2 +[ OK ] vector_min/lower.compile/2 (168 ms) +[ RUN ] vector_min/lower.compile/3 +[ OK ] vector_min/lower.compile/3 (167 ms) +[----------] 4 tests from vector_min/lower (649 ms total) + +[----------] 2 tests from vector_heaviside/lower +[ RUN ] vector_heaviside/lower.compile/0 +[ OK ] vector_heaviside/lower.compile/0 (151 ms) +[ RUN ] vector_heaviside/lower.compile/1 +[ OK ] vector_heaviside/lower.compile/1 (142 ms) +[----------] 2 tests from vector_heaviside/lower (293 ms total) + +[----------] 2 tests from vector_heaviside_half_maximum/lower +[ RUN ] vector_heaviside_half_maximum/lower.compile/0 +[ OK ] vector_heaviside_half_maximum/lower.compile/0 (139 ms) +[ RUN ] vector_heaviside_half_maximum/lower.compile/1 +[ OK ] vector_heaviside_half_maximum/lower.compile/1 (166 ms) +[----------] 2 tests from vector_heaviside_half_maximum/lower (305 ms total) + +[----------] 2 tests from vector_not/lower +[ RUN ] vector_not/lower.compile/0 +[ OK ] vector_not/lower.compile/0 (151 ms) +[ RUN ] vector_not/lower.compile/1 +[ OK ] vector_not/lower.compile/1 (165 ms) +[----------] 2 tests from vector_not/lower (316 ms total) + +[----------] 16 tests from windowing/basic +[ RUN ] windowing/basic.windowing/0 +[ OK ] windowing/basic.windowing/0 (424 ms) +[ RUN ] windowing/basic.windowing/1 +[ OK ] windowing/basic.windowing/1 (457 ms) +[ RUN ] windowing/basic.windowing/2 +[ OK ] windowing/basic.windowing/2 (289 ms) +[ RUN ] windowing/basic.windowing/3 +[ OK ] windowing/basic.windowing/3 (317 ms) +[ RUN ] windowing/basic.windowing/4 +[ OK ] windowing/basic.windowing/4 (289 ms) +[ RUN ] windowing/basic.windowing/5 +[ OK ] windowing/basic.windowing/5 (324 ms) +[ RUN ] windowing/basic.windowing/6 +[ OK ] windowing/basic.windowing/6 (322 ms) +[ RUN ] windowing/basic.windowing/7 +[ OK ] windowing/basic.windowing/7 (372 ms) +[ RUN ] windowing/basic.windowing/8 +[ OK ] windowing/basic.windowing/8 (420 ms) +[ RUN ] windowing/basic.windowing/9 +[ OK ] windowing/basic.windowing/9 (462 ms) +[ RUN ] windowing/basic.windowing/10 +[ OK ] windowing/basic.windowing/10 (298 ms) +[ RUN ] windowing/basic.windowing/11 +[ OK ] windowing/basic.windowing/11 (323 ms) +[ RUN ] windowing/basic.windowing/12 +[ OK ] windowing/basic.windowing/12 (293 ms) +[ RUN ] windowing/basic.windowing/13 +[ OK ] windowing/basic.windowing/13 (322 ms) +[ RUN ] windowing/basic.windowing/14 +[ OK ] windowing/basic.windowing/14 (323 ms) +[ RUN ] windowing/basic.windowing/15 +[ OK ] windowing/basic.windowing/15 (378 ms) +[----------] 16 tests from windowing/basic (5613 ms total) + +[----------] 4 tests from windowing/slicedOutput +[ RUN ] windowing/slicedOutput.windowing/0 +[ OK ] windowing/slicedOutput.windowing/0 (132 ms) +[ RUN ] windowing/slicedOutput.windowing/1 +[ OK ] windowing/slicedOutput.windowing/1 (149 ms) +[ RUN ] windowing/slicedOutput.windowing/2 +[ OK ] windowing/slicedOutput.windowing/2 (147 ms) +[ RUN ] windowing/slicedOutput.windowing/3 +[ OK ] windowing/slicedOutput.windowing/3 (160 ms) +[----------] 4 tests from windowing/slicedOutput (588 ms total) + +[----------] 4 tests from windowing/matrixMultiply +[ RUN ] windowing/matrixMultiply.windowing/0 +[ OK ] windowing/matrixMultiply.windowing/0 (718 ms) +[ RUN ] windowing/matrixMultiply.windowing/1 +[ OK ] windowing/matrixMultiply.windowing/1 (279 ms) +[ RUN ] windowing/matrixMultiply.windowing/2 +[ OK ] windowing/matrixMultiply.windowing/2 (277 ms) +[ RUN ] windowing/matrixMultiply.windowing/3 +[ OK ] windowing/matrixMultiply.windowing/3 (148 ms) +[----------] 4 tests from windowing/matrixMultiply (1422 ms total) + +[----------] 4 tests from windowing/workspace +[ RUN ] windowing/workspace.windowing/0 +[ OK ] windowing/workspace.windowing/0 (272 ms) +[ RUN ] windowing/workspace.windowing/1 +[ OK ] windowing/workspace.windowing/1 (143 ms) +[ RUN ] windowing/workspace.windowing/2 +[ OK ] windowing/workspace.windowing/2 (145 ms) +[ RUN ] windowing/workspace.windowing/3 +[ OK ] windowing/workspace.windowing/3 (159 ms) +[----------] 4 tests from windowing/workspace (719 ms total) + +[----------] 2 tests from windowing/assignment +[ RUN ] windowing/assignment.windowing/0 +[ OK ] windowing/assignment.windowing/0 (399 ms) +[ RUN ] windowing/assignment.windowing/1 +[ OK ] windowing/assignment.windowing/1 (581 ms) +[----------] 2 tests from windowing/assignment (981 ms total) + +[----------] 4 tests from windowing/cuda +[ RUN ] windowing/cuda.windowing/0 +[ OK ] windowing/cuda.windowing/0 (0 ms) +[ RUN ] windowing/cuda.windowing/1 +[ OK ] windowing/cuda.windowing/1 (0 ms) +[ RUN ] windowing/cuda.windowing/2 +[ OK ] windowing/cuda.windowing/2 (0 ms) +[ RUN ] windowing/cuda.windowing/3 +[ OK ] windowing/cuda.windowing/3 (0 ms) +[----------] 4 tests from windowing/cuda (0 ms total) + +[----------] 4 tests from windowing/stride +[ RUN ] windowing/stride.windowing/0 +[ OK ] windowing/stride.windowing/0 (519 ms) +[ RUN ] windowing/stride.windowing/1 +[ OK ] windowing/stride.windowing/1 (292 ms) +[ RUN ] windowing/stride.windowing/2 +[ OK ] windowing/stride.windowing/2 (559 ms) +[ RUN ] windowing/stride.windowing/3 +[ OK ] windowing/stride.windowing/3 (311 ms) +[----------] 4 tests from windowing/stride (1681 ms total) + +[----------] 8 tests from windowing/indexSetVectors +[ RUN ] windowing/indexSetVectors.windowing/0 +[ OK ] windowing/indexSetVectors.windowing/0 (264 ms) +[ RUN ] windowing/indexSetVectors.windowing/1 +[ OK ] windowing/indexSetVectors.windowing/1 (276 ms) +[ RUN ] windowing/indexSetVectors.windowing/2 +[ OK ] windowing/indexSetVectors.windowing/2 (276 ms) +[ RUN ] windowing/indexSetVectors.windowing/3 +[ OK ] windowing/indexSetVectors.windowing/3 (301 ms) +[ RUN ] windowing/indexSetVectors.windowing/4 +[ OK ] windowing/indexSetVectors.windowing/4 (276 ms) +[ RUN ] windowing/indexSetVectors.windowing/5 +[ OK ] windowing/indexSetVectors.windowing/5 (307 ms) +[ RUN ] windowing/indexSetVectors.windowing/6 +[ OK ] windowing/indexSetVectors.windowing/6 (309 ms) +[ RUN ] windowing/indexSetVectors.windowing/7 +[ OK ] windowing/indexSetVectors.windowing/7 (353 ms) +[----------] 8 tests from windowing/indexSetVectors (2362 ms total) + +[----------] 8 tests from windowing/indexSetMatrices +[ RUN ] windowing/indexSetMatrices.windowing/0 +[ OK ] windowing/indexSetMatrices.windowing/0 (301 ms) +[ RUN ] windowing/indexSetMatrices.windowing/1 +[ OK ] windowing/indexSetMatrices.windowing/1 (156 ms) +[ RUN ] windowing/indexSetMatrices.windowing/2 +[ OK ] windowing/indexSetMatrices.windowing/2 (156 ms) +[ RUN ] windowing/indexSetMatrices.windowing/3 +[ OK ] windowing/indexSetMatrices.windowing/3 (167 ms) +[ RUN ] windowing/indexSetMatrices.windowing/4 +[ OK ] windowing/indexSetMatrices.windowing/4 (314 ms) +[ RUN ] windowing/indexSetMatrices.windowing/5 +[ OK ] windowing/indexSetMatrices.windowing/5 (183 ms) +[ RUN ] windowing/indexSetMatrices.windowing/6 +[ OK ] windowing/indexSetMatrices.windowing/6 (186 ms) +[ RUN ] windowing/indexSetMatrices.windowing/7 +[ OK ] windowing/indexSetMatrices.windowing/7 (223 ms) +[----------] 8 tests from windowing/indexSetMatrices (1686 ms total) + +[----------] 5 tests from reorder/precondition +[ RUN ] reorder/precondition.transformations/0 +[ OK ] reorder/precondition.transformations/0 (0 ms) +[ RUN ] reorder/precondition.transformations/1 +[ OK ] reorder/precondition.transformations/1 (0 ms) +[ RUN ] reorder/precondition.transformations/2 +[ OK ] reorder/precondition.transformations/2 (0 ms) +[ RUN ] reorder/precondition.transformations/3 +[ OK ] reorder/precondition.transformations/3 (0 ms) +[ RUN ] reorder/precondition.transformations/4 +[ OK ] reorder/precondition.transformations/4 (0 ms) +[----------] 5 tests from reorder/precondition (0 ms total) + +[----------] 6 tests from parallelize/precondition +[ RUN ] parallelize/precondition.transformations/0 +[ OK ] parallelize/precondition.transformations/0 (0 ms) +[ RUN ] parallelize/precondition.transformations/1 +[ OK ] parallelize/precondition.transformations/1 (0 ms) +[ RUN ] parallelize/precondition.transformations/2 +[ OK ] parallelize/precondition.transformations/2 (0 ms) +[ RUN ] parallelize/precondition.transformations/3 +[ OK ] parallelize/precondition.transformations/3 (0 ms) +[ RUN ] parallelize/precondition.transformations/4 +[ OK ] parallelize/precondition.transformations/4 (0 ms) +[ RUN ] parallelize/precondition.transformations/5 +[ OK ] parallelize/precondition.transformations/5 (0 ms) +[----------] 6 tests from parallelize/precondition (2 ms total) + +[----------] 7 tests from reorder/apply +[ RUN ] reorder/apply.transformations/0 +[ OK ] reorder/apply.transformations/0 (0 ms) +[ RUN ] reorder/apply.transformations/1 +[ OK ] reorder/apply.transformations/1 (0 ms) +[ RUN ] reorder/apply.transformations/2 +[ OK ] reorder/apply.transformations/2 (0 ms) +[ RUN ] reorder/apply.transformations/3 +[ OK ] reorder/apply.transformations/3 (0 ms) +[ RUN ] reorder/apply.transformations/4 +[ OK ] reorder/apply.transformations/4 (0 ms) +[ RUN ] reorder/apply.transformations/5 +[ OK ] reorder/apply.transformations/5 (1 ms) +[ RUN ] reorder/apply.transformations/6 +[ OK ] reorder/apply.transformations/6 (0 ms) +[----------] 7 tests from reorder/apply (1 ms total) + +[----------] 1 test from precompute/apply +[ RUN ] precompute/apply.transformations/0 +[ OK ] precompute/apply.transformations/0 (0 ms) +[----------] 1 test from precompute/apply (0 ms total) + +[----------] 3 tests from parallelize/apply +[ RUN ] parallelize/apply.transformations/0 +[ OK ] parallelize/apply.transformations/0 (0 ms) +[ RUN ] parallelize/apply.transformations/1 +[ OK ] parallelize/apply.transformations/1 (0 ms) +[ RUN ] parallelize/apply.transformations/2 +[ OK ] parallelize/apply.transformations/2 (0 ms) +[----------] 3 tests from parallelize/apply (0 ms total) + +[----------] 13 tests from misc/reorderLoopsTopologically +[ RUN ] misc/reorderLoopsTopologically.test/0 +[ OK ] misc/reorderLoopsTopologically.test/0 (0 ms) +[ RUN ] misc/reorderLoopsTopologically.test/1 +[ OK ] misc/reorderLoopsTopologically.test/1 (1 ms) +[ RUN ] misc/reorderLoopsTopologically.test/2 +[ OK ] misc/reorderLoopsTopologically.test/2 (0 ms) +[ RUN ] misc/reorderLoopsTopologically.test/3 +[ OK ] misc/reorderLoopsTopologically.test/3 (0 ms) +[ RUN ] misc/reorderLoopsTopologically.test/4 +[ OK ] misc/reorderLoopsTopologically.test/4 (0 ms) +[ RUN ] misc/reorderLoopsTopologically.test/5 +[ OK ] misc/reorderLoopsTopologically.test/5 (0 ms) +[ RUN ] misc/reorderLoopsTopologically.test/6 +[ OK ] misc/reorderLoopsTopologically.test/6 (0 ms) +[ RUN ] misc/reorderLoopsTopologically.test/7 +[ OK ] misc/reorderLoopsTopologically.test/7 (0 ms) +[ RUN ] misc/reorderLoopsTopologically.test/8 +[ OK ] misc/reorderLoopsTopologically.test/8 (1 ms) +[ RUN ] misc/reorderLoopsTopologically.test/9 +[ OK ] misc/reorderLoopsTopologically.test/9 (0 ms) +[ RUN ] misc/reorderLoopsTopologically.test/10 +[ OK ] misc/reorderLoopsTopologically.test/10 (0 ms) +[ RUN ] misc/reorderLoopsTopologically.test/11 +[ OK ] misc/reorderLoopsTopologically.test/11 (1 ms) +[ RUN ] misc/reorderLoopsTopologically.test/12 +[ OK ] misc/reorderLoopsTopologically.test/12 (0 ms) +[----------] 13 tests from misc/reorderLoopsTopologically (7 ms total) + +[----------] 1 test from spmm/insertTemporaries +[ RUN ] spmm/insertTemporaries.test/0 +[ OK ] spmm/insertTemporaries.test/0 (0 ms) +[----------] 1 test from spmm/insertTemporaries (0 ms total) + +[----------] 1 test from scalar_constant/expr +[ RUN ] scalar_constant/expr.storage/0 +[ OK ] scalar_constant/expr.storage/0 (126 ms) +[----------] 1 test from scalar_constant/expr (126 ms total) + +[----------] 3 tests from scalar/expr +[ RUN ] scalar/expr.storage/0 +[ OK ] scalar/expr.storage/0 (126 ms) +[ RUN ] scalar/expr.storage/1 +[ OK ] scalar/expr.storage/1 (124 ms) +[ RUN ] scalar/expr.storage/2 +[ OK ] scalar/expr.storage/2 (128 ms) +[----------] 3 tests from scalar/expr (378 ms total) + +[----------] 2 tests from vector_neg/expr +[ RUN ] vector_neg/expr.storage/0 +[ OK ] vector_neg/expr.storage/0 (135 ms) +[ RUN ] vector_neg/expr.storage/1 +[ OK ] vector_neg/expr.storage/1 (149 ms) +[----------] 2 tests from vector_neg/expr (284 ms total) + +[----------] 3 tests from vector_scalar/expr +[ RUN ] vector_scalar/expr.storage/0 +[ OK ] vector_scalar/expr.storage/0 (135 ms) +[ RUN ] vector_scalar/expr.storage/1 +[ OK ] vector_scalar/expr.storage/1 (149 ms) +[ RUN ] vector_scalar/expr.storage/2 +[ OK ] vector_scalar/expr.storage/2 (136 ms) +[----------] 3 tests from vector_scalar/expr (420 ms total) + +[----------] 4 tests from vector_elmul/expr +[ RUN ] vector_elmul/expr.storage/0 +[ OK ] vector_elmul/expr.storage/0 (139 ms) +[ RUN ] vector_elmul/expr.storage/1 +[ OK ] vector_elmul/expr.storage/1 (135 ms) +[ RUN ] vector_elmul/expr.storage/2 +[ OK ] vector_elmul/expr.storage/2 (132 ms) +[ RUN ] vector_elmul/expr.storage/3 +[ OK ] vector_elmul/expr.storage/3 (146 ms) +[----------] 4 tests from vector_elmul/expr (552 ms total) + +[----------] 4 tests from vector_add/expr +[ RUN ] vector_add/expr.storage/0 +[ OK ] vector_add/expr.storage/0 (140 ms) +[ RUN ] vector_add/expr.storage/1 +[ OK ] vector_add/expr.storage/1 (147 ms) +[ RUN ] vector_add/expr.storage/2 +[ OK ] vector_add/expr.storage/2 (159 ms) +[ RUN ] vector_add/expr.storage/3 +[ OK ] vector_add/expr.storage/3 (186 ms) +[----------] 4 tests from vector_add/expr (632 ms total) + +[----------] 13 tests from vector_composites/expr +[ RUN ] vector_composites/expr.storage/0 +[ OK ] vector_composites/expr.storage/0 (147 ms) +[ RUN ] vector_composites/expr.storage/1 +[ OK ] vector_composites/expr.storage/1 (157 ms) +[ RUN ] vector_composites/expr.storage/2 +[ OK ] vector_composites/expr.storage/2 (136 ms) +[ RUN ] vector_composites/expr.storage/3 +[ OK ] vector_composites/expr.storage/3 (143 ms) +[ RUN ] vector_composites/expr.storage/4 +[ OK ] vector_composites/expr.storage/4 (146 ms) +[ RUN ] vector_composites/expr.storage/5 +[ OK ] vector_composites/expr.storage/5 (193 ms) +[ RUN ] vector_composites/expr.storage/6 +[ OK ] vector_composites/expr.storage/6 (169 ms) +[ RUN ] vector_composites/expr.storage/7 +[ OK ] vector_composites/expr.storage/7 (207 ms) +[ RUN ] vector_composites/expr.storage/8 +[ OK ] vector_composites/expr.storage/8 (147 ms) +[ RUN ] vector_composites/expr.storage/9 +[ OK ] vector_composites/expr.storage/9 (198 ms) +[ RUN ] vector_composites/expr.storage/10 +[ OK ] vector_composites/expr.storage/10 (168 ms) +[ RUN ] vector_composites/expr.storage/11 +[ OK ] vector_composites/expr.storage/11 (216 ms) +[ RUN ] vector_composites/expr.storage/12 +[ OK ] vector_composites/expr.storage/12 (313 ms) +[----------] 13 tests from vector_composites/expr (2340 ms total) + +[----------] 2 tests from matrix_neg/expr +[ RUN ] matrix_neg/expr.storage/0 +[ OK ] matrix_neg/expr.storage/0 (140 ms) +[ RUN ] matrix_neg/expr.storage/1 +[ OK ] matrix_neg/expr.storage/1 (168 ms) +[----------] 2 tests from matrix_neg/expr (309 ms total) + +[----------] 1 test from matrix_elmul/expr +[ RUN ] matrix_elmul/expr.storage/0 +[ OK ] matrix_elmul/expr.storage/0 (173 ms) +[----------] 1 test from matrix_elmul/expr (173 ms total) + +[----------] 7 tests from matrix_add/expr +[ RUN ] matrix_add/expr.storage/0 +[ OK ] matrix_add/expr.storage/0 (142 ms) +[ RUN ] matrix_add/expr.storage/1 +[ OK ] matrix_add/expr.storage/1 (152 ms) +[ RUN ] matrix_add/expr.storage/2 +[ OK ] matrix_add/expr.storage/2 (205 ms) +[ RUN ] matrix_add/expr.storage/3 +[ OK ] matrix_add/expr.storage/3 (205 ms) +[ RUN ] matrix_add/expr.storage/4 +[ OK ] matrix_add/expr.storage/4 (330 ms) +[ RUN ] matrix_add/expr.storage/5 +[ OK ] matrix_add/expr.storage/5 (368 ms) +[ RUN ] matrix_add/expr.storage/6 +[ OK ] matrix_add/expr.storage/6 (378 ms) +[----------] 7 tests from matrix_add/expr (1781 ms total) + +[----------] 1 test from tensor_elmul/expr +[ RUN ] tensor_elmul/expr.storage/0 +[ OK ] tensor_elmul/expr.storage/0 (204 ms) +[----------] 1 test from tensor_elmul/expr (204 ms total) + +[----------] 3 tests from composite/expr +[ RUN ] composite/expr.storage/0 +[ OK ] composite/expr.storage/0 (195 ms) +[ RUN ] composite/expr.storage/1 +[ OK ] composite/expr.storage/1 (198 ms) +[ RUN ] composite/expr.storage/2 +[ OK ] composite/expr.storage/2 (157 ms) +[----------] 3 tests from composite/expr (551 ms total) + +[----------] 2 tests from residual/expr +[ RUN ] residual/expr.storage/0 +[ OK ] residual/expr.storage/0 (147 ms) +[ RUN ] residual/expr.storage/1 +[ OK ] residual/expr.storage/1 (154 ms) +[----------] 2 tests from residual/expr (303 ms total) + +[----------] 1 test from matrix_add_vec_mul_composite/expr +[ RUN ] matrix_add_vec_mul_composite/expr.storage/0 +[ OK ] matrix_add_vec_mul_composite/expr.storage/0 (155 ms) +[----------] 1 test from matrix_add_vec_mul_composite/expr (155 ms total) + +[----------] 1 test from scaled_matrix_vector/expr +[ RUN ] scaled_matrix_vector/expr.storage/0 +[ OK ] scaled_matrix_vector/expr.storage/0 (149 ms) +[----------] 1 test from scaled_matrix_vector/expr (149 ms total) + +[----------] 4 tests from axpy_3x3/expr +[ RUN ] axpy_3x3/expr.storage/0 +[ OK ] axpy_3x3/expr.storage/0 (147 ms) +[ RUN ] axpy_3x3/expr.storage/1 +[ OK ] axpy_3x3/expr.storage/1 (136 ms) +[ RUN ] axpy_3x3/expr.storage/2 +[ OK ] axpy_3x3/expr.storage/2 (319 ms) +[ RUN ] axpy_3x3/expr.storage/3 +[ OK ] axpy_3x3/expr.storage/3 (140 ms) +[----------] 4 tests from axpy_3x3/expr (743 ms total) + +[----------] 2 tests from vector_inner/expr +[ RUN ] vector_inner/expr.storage/0 +[ OK ] vector_inner/expr.storage/0 (2 ms) +[ RUN ] vector_inner/expr.storage/1 +[ OK ] vector_inner/expr.storage/1 (134 ms) +[----------] 2 tests from vector_inner/expr (136 ms total) + +[----------] 5 tests from spmv/expr +[ RUN ] spmv/expr.storage/0 +[ OK ] spmv/expr.storage/0 (148 ms) +[ RUN ] spmv/expr.storage/1 +[ OK ] spmv/expr.storage/1 (150 ms) +[ RUN ] spmv/expr.storage/2 +[ OK ] spmv/expr.storage/2 (142 ms) +[ RUN ] spmv/expr.storage/3 +[ OK ] spmv/expr.storage/3 (140 ms) +[ RUN ] spmv/expr.storage/4 +[ OK ] spmv/expr.storage/4 (155 ms) +[----------] 5 tests from spmv/expr (735 ms total) + +[----------] 1 test from bspmv/expr +[ RUN ] bspmv/expr.storage/0 +[ OK ] bspmv/expr.storage/0 (316 ms) +[----------] 1 test from bspmv/expr (316 ms total) + +[----------] 2 tests from matrix_sum/expr +[ RUN ] matrix_sum/expr.storage/0 +[ OK ] matrix_sum/expr.storage/0 (146 ms) +[ RUN ] matrix_sum/expr.storage/1 +[ OK ] matrix_sum/expr.storage/1 (145 ms) +[----------] 2 tests from matrix_sum/expr (291 ms total) + +[----------] 2 tests from matrix_mul/expr +[ RUN ] matrix_mul/expr.storage/0 +[ OK ] matrix_mul/expr.storage/0 (2 ms) +[ RUN ] matrix_mul/expr.storage/1 +[ OK ] matrix_mul/expr.storage/1 (156 ms) +[----------] 2 tests from matrix_mul/expr (158 ms total) + +[----------] 3 tests from tensor_vector_mul/expr +[ RUN ] tensor_vector_mul/expr.storage/0 +[ OK ] tensor_vector_mul/expr.storage/0 (313 ms) +[ RUN ] tensor_vector_mul/expr.storage/1 +[ OK ] tensor_vector_mul/expr.storage/1 (145 ms) +[ RUN ] tensor_vector_mul/expr.storage/2 +[ OK ] tensor_vector_mul/expr.storage/2 (153 ms) +[----------] 3 tests from tensor_vector_mul/expr (611 ms total) + +[----------] 2 tests from tensor_matrix_mul/expr +[ RUN ] tensor_matrix_mul/expr.storage/0 +[ OK ] tensor_matrix_mul/expr.storage/0 (147 ms) +[ RUN ] tensor_matrix_mul/expr.storage/1 +[ OK ] tensor_matrix_mul/expr.storage/1 (150 ms) +[----------] 2 tests from tensor_matrix_mul/expr (297 ms total) + +[----------] 1 test from mttkrp/expr +[ RUN ] mttkrp/expr.storage/0 +[ OK ] mttkrp/expr.storage/0 (158 ms) +[----------] 1 test from mttkrp/expr (158 ms total) + +[----------] 5 tests from emit_avail_exprs/expr +[ RUN ] emit_avail_exprs/expr.storage/0 +[ OK ] emit_avail_exprs/expr.storage/0 (144 ms) +[ RUN ] emit_avail_exprs/expr.storage/1 +[ OK ] emit_avail_exprs/expr.storage/1 (147 ms) +[ RUN ] emit_avail_exprs/expr.storage/2 +[ OK ] emit_avail_exprs/expr.storage/2 (150 ms) +[ RUN ] emit_avail_exprs/expr.storage/3 +[ OK ] emit_avail_exprs/expr.storage/3 (148 ms) +[ RUN ] emit_avail_exprs/expr.storage/4 +[ OK ] emit_avail_exprs/expr.storage/4 (155 ms) +[----------] 5 tests from emit_avail_exprs/expr (744 ms total) + +[----------] 2 tests from vector_add/alloc +[ RUN ] vector_add/alloc.storage/0 +[ OK ] vector_add/alloc.storage/0 (349 ms) +[ RUN ] vector_add/alloc.storage/1 +[ OK ] vector_add/alloc.storage/1 (213 ms) +[----------] 2 tests from vector_add/alloc (562 ms total) + +[----------] Global test environment tear-down +[==========] 914 tests from 288 test cases ran. (117808 ms total) +[ PASSED ] 909 tests. +[ FAILED ] 5 tests, listed below: +[ FAILED ] workspaces.tile_vecElemMul_NoTail +[ FAILED ] workspaces.tile_vecElemMul_Tail1 +[ FAILED ] workspaces.tile_vecElemMul_Tail2 +[ FAILED ] workspaces.tile_denseMatMul +[ FAILED ] workspaces.tile_dotProduct_3 + + 5 FAILED TESTS + YOU HAVE 13 DISABLED TESTS + diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index c6e16afbc..2a72d12d0 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -40,9 +40,6 @@ void IndexVarRel::print(std::ostream& stream) const { case FUSE: getNode()->print(stream); break; - case BOUND: - getNode()->print(stream); - break; case PRECOMPUTE: getNode()->print(stream); break; @@ -68,8 +65,6 @@ bool IndexVarRel::equals(const IndexVarRel &rel) const { return getNode()->equals(*rel.getNode()); case UNDEFINED: return true; - case BOUND: - return getNode()->equals(*rel.getNode()); case PRECOMPUTE: return getNode()->equals(*rel.getNode()); default: @@ -788,106 +783,6 @@ bool operator==(const FuseRelNode& a, const FuseRelNode& b) { return a.equals(b); } -// BoundRelNode -struct BoundRelNode::Content { - IndexVar parentVar; - IndexVar boundVar; - size_t bound; - BoundType boundType; -}; - -BoundRelNode::BoundRelNode(taco::IndexVar parentVar, taco::IndexVar boundVar, size_t bound, - taco::BoundType boundType) : IndexVarRelNode(BOUND), content(new Content) { - content->parentVar = parentVar; - content->boundVar = boundVar; - content->bound = bound; - content->boundType = boundType; -} - -const IndexVar& BoundRelNode::getParentVar() const { - return content->parentVar; -} -const IndexVar& BoundRelNode::getBoundVar() const { - return content->boundVar; -} -const size_t& BoundRelNode::getBound() const { - return content->bound; -} -const BoundType& BoundRelNode::getBoundType() const { - return content->boundType; -} - -void BoundRelNode::print(std::ostream &stream) const { - stream << "bound(" << getParentVar() << ", " << getBoundVar() << ", " << getBound() << ", " << BoundType_NAMES[(int) getBoundType()] << ")"; -} - -bool BoundRelNode::equals(const BoundRelNode &rel) const { - return getParentVar() == rel.getParentVar() && - getBoundVar() == rel.getBoundVar() && getBound() == rel.getBound() && - getBoundType() == rel.getBoundType(); -} - -std::vector BoundRelNode::getParents() const { - return {getParentVar()}; -} - -std::vector BoundRelNode::getChildren() const { - return {getBoundVar()}; -} - -std::vector BoundRelNode::getIrregulars() const { - return {getBoundVar()}; -} - -std::vector BoundRelNode::computeRelativeBound(std::set definedVars, std::map> computedBounds, std::map variableExprs, Iterators iterators, ProvenanceGraph provGraph) const { - // coordinate bounds stay unchanged, only iteration bounds change - taco_iassert(computedBounds.count(getParentVar()) == 1); - std::vector parentCoordBound = computedBounds.at(getParentVar()); - return parentCoordBound; -} - -std::vector BoundRelNode::deriveIterBounds(taco::IndexVar indexVar, - std::map> parentIterBounds, - std::map> parentCoordBounds, - std::map variableNames, - Iterators iterators, - ProvenanceGraph provGraph) const { - taco_iassert(indexVar == getBoundVar()); - taco_iassert(parentCoordBounds.count(getParentVar()) == 1); - std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); - - if (getBoundType() == BoundType::MaxExact) { - return {parentCoordBound[0], ir::Literal::make(getBound(), parentCoordBound[1].type())}; - } - else { - taco_not_supported_yet; - } - return {}; -} - -ir::Expr BoundRelNode::recoverVariable(taco::IndexVar indexVar, - std::map variableNames, - Iterators iterators, - std::map> parentIterBounds, - std::map> parentCoordBounds, - ProvenanceGraph provGraph) const { - taco_iassert(indexVar == getParentVar()); - taco_iassert(variableNames.count(getBoundVar()) == 1); - return variableNames[getBoundVar()]; -} - -ir::Stmt BoundRelNode::recoverChild(taco::IndexVar indexVar, - std::map variableNames, bool emitVarDecl, Iterators iterators, ProvenanceGraph provGraph) const { - taco_iassert(indexVar == getBoundVar()); - taco_iassert(variableNames.count(getParentVar()) && variableNames.count(getBoundVar())); - ir::Expr boundVarExpr = variableNames[getBoundVar()]; - return ir::VarDecl::make(boundVarExpr, variableNames[getParentVar()]); -} - -bool operator==(const BoundRelNode& a, const BoundRelNode& b) { - return a.equals(b); -} - // PrecomputeRelNode struct PrecomputeRelNode::Content { IndexVar parentVar; From fc86897418a85ce2c4e65e4fe698786ebe8f6410 Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sat, 5 Feb 2022 15:40:59 -0800 Subject: [PATCH 08/23] api change + new tests --- include/taco/index_notation/index_notation.h | 3 ++ src/index_notation/index_notation.cpp | 9 +++++ test/tests-scheduling-eval.cpp | 19 ++++++++++- test/tests-workspaces.cpp | 35 ++++++++------------ 4 files changed, 44 insertions(+), 22 deletions(-) diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index 21530f361..9e1fa7db8 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -705,6 +705,9 @@ class IndexStmt : public util::IntrusivePtr { IndexStmt bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const; + + IndexStmt bound(IndexVar i, size_t bound, BoundType bound_type) const; + /// The unroll primitive unrolls the corresponding loop by a statically-known /// integer number of iterations /// Preconditions: unrollFactor is a positive nonzero integer diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index 2056fe584..79dc00b62 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1706,6 +1706,15 @@ IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType boun } + +IndexStmt IndexStmt::bound(IndexVar i, size_t bound, BoundType bound_type) const { + + i.bound(bound, bound_type); + + return *this; + +} + IndexStmt IndexStmt::unroll(IndexVar i, size_t unrollFactor) const { struct UnrollLoop : IndexNotationRewriter { using IndexNotationRewriter::visit; diff --git a/test/tests-scheduling-eval.cpp b/test/tests-scheduling-eval.cpp index 0de471f68..ca7da789d 100644 --- a/test/tests-scheduling-eval.cpp +++ b/test/tests-scheduling-eval.cpp @@ -271,7 +271,8 @@ IndexStmt scheduleSDDMMGPU(IndexStmt stmt, Tensor B, int NNZ_PER_WARP=8* IndexVar f("f"), fpos("fpos"), block("block"), fpos1("fpos1"), warp("warp"), nnz("nnz"); IndexVar dense_val_unbounded("dense_val_unbounded"), dense_val("dense_val"), thread("thread"); IndexVar thread_nz("thread_nz"); - return stmt.reorder({i, k, j}) + + stmt = stmt.reorder({i, k, j}) .fuse(i, k, f) .pos(f, fpos, B(i,k)) .split(fpos, block, fpos1, NNZ_PER_TB) @@ -283,6 +284,22 @@ IndexStmt scheduleSDDMMGPU(IndexStmt stmt, Tensor B, int NNZ_PER_WARP=8* .parallelize(block, ParallelUnit::GPUBlock, OutputRaceStrategy::IgnoreRaces) .parallelize(warp, ParallelUnit::GPUWarp, OutputRaceStrategy::Atomics) .parallelize(thread, ParallelUnit::GPUThread, OutputRaceStrategy::ParallelReduction); + + ir::IRPrinter irp = ir::IRPrinter(cout); + + cout << stmt << endl; + + std::shared_ptr codegen = ir::CodeGen::init_default(cout, ir::CodeGen::ImplementationGen); + ir::Stmt compute = lower(stmt, "compute", false, true); + + irp.print(compute); + cout << endl; + + codegen->compile(compute, false); + + return stmt; + + } IndexStmt scheduleTTMGPU(IndexStmt stmt, Tensor B, int NNZ_PER_WARP=8*32, int BLOCK_SIZE=256, int CO_FACTOR=4) { diff --git a/test/tests-workspaces.cpp b/test/tests-workspaces.cpp index 80fd5f3f5..1529267bc 100644 --- a/test/tests-workspaces.cpp +++ b/test/tests-workspaces.cpp @@ -25,15 +25,14 @@ TEST(workspaces, tile_vecElemMul_NoTail) { B.pack(); IndexVar i("i"); - IndexVar i_bounded("i_bounded"); IndexVar i0("i0"), i1("i1"); IndexExpr precomputedExpr = B(i) * C(i); A(i) = precomputedExpr; IndexStmt stmt = A.getAssignment().concretize(); TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - stmt = stmt.bound(i, i_bounded, 16, BoundType::MaxExact) - .split(i_bounded, i0, i1, 4) + stmt = stmt.bound(i, 16, BoundType::MaxExact) + .split(i, i0, i1, 4) .precompute(precomputedExpr, i1, i1, precomputed); A.compile(stmt); @@ -63,15 +62,14 @@ TEST(workspaces, tile_vecElemMul_Tail1) { B.pack(); IndexVar i("i"); - IndexVar i_bounded("i_bounded"); IndexVar i0("i0"), i1("i1"); IndexExpr precomputedExpr = B(i) * C(i); A(i) = precomputedExpr; IndexStmt stmt = A.getAssignment().concretize(); TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - stmt = stmt.bound(i, i_bounded, 16, BoundType::MaxExact) - .split(i_bounded, i0, i1, 5) + stmt = stmt.bound(i, 16, BoundType::MaxExact) + .split(i, i0, i1, 5) .precompute(precomputedExpr, i1, i1, precomputed); A.compile(stmt.concretize()); @@ -101,15 +99,14 @@ TEST(workspaces, tile_vecElemMul_Tail2) { B.pack(); IndexVar i("i"); - IndexVar i_bounded("i_bounded"); IndexVar i0("i0"), i1("i1"); IndexExpr precomputedExpr = B(i) * C(i); A(i) = precomputedExpr; IndexStmt stmt = A.getAssignment().concretize(); TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - stmt = stmt.bound(i, i_bounded, 17, BoundType::MaxExact) - .split(i_bounded, i0, i1, 4) + stmt = stmt.bound(i, 17, BoundType::MaxExact) + .split(i, i0, i1, 4) .precompute(precomputedExpr, i1, i1, precomputed); A.compile(stmt.concretize()); @@ -150,15 +147,14 @@ TEST(workspaces, tile_denseMatMul) { B.pack(); IndexVar i("i"); - IndexVar i_bounded("i_bounded"); IndexVar i0("i0"), i1("i1"); IndexExpr precomputedExpr = B(i) * C(i); A(i) = precomputedExpr; IndexStmt stmt = A.getAssignment().concretize(); TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - stmt = stmt.bound(i, i_bounded, 16, BoundType::MaxExact) - .split(i_bounded, i0, i1, 4); + stmt = stmt.bound(i, 16, BoundType::MaxExact) + .split(i, i0, i1, 4); stmt = stmt.precompute(precomputedExpr, i1, i1, precomputed); @@ -461,7 +457,6 @@ TEST(workspaces, DISABLED_tile_dotProduct_1) { C.pack(); IndexVar i("i"); - IndexVar i_bounded("i_bounded"); IndexVar i0("i0"), i1("i1"); IndexExpr BExpr = B(i); IndexExpr CExpr = C(i); @@ -473,8 +468,8 @@ TEST(workspaces, DISABLED_tile_dotProduct_1) { TensorVar C_new("C_new", Type(Float64, {(size_t)N}), taco::dense); TensorVar precomputed("precomputed", Type(Float64, {(size_t)N}), taco::dense); - stmt = stmt.bound(i, i_bounded, (size_t)N, BoundType::MaxExact) - .split(i_bounded, i0, i1, 32); + stmt = stmt.bound(i, (size_t)N, BoundType::MaxExact) + .split(i, i0, i1, 32); stmt = stmt.precompute(precomputedExpr, i1, i1, precomputed); stmt = stmt.precompute(BExpr, i1, i1, B_new) .precompute(CExpr, i1, i1, C_new); @@ -524,7 +519,6 @@ TEST(workspaces, DISABLED_tile_dotProduct_2) { C.pack(); IndexVar i("i"); - IndexVar i_bounded("i_bounded"); IndexVar i0("i0"), i1("i1"); IndexExpr BExpr = B(i); IndexExpr CExpr = C(i); @@ -541,8 +535,8 @@ TEST(workspaces, DISABLED_tile_dotProduct_2) { stmt = stmt.precompute(BExpr, i, i, B_new) .precompute(CExpr, i, i, C_new); - stmt = stmt.bound(i, i_bounded, (size_t)N, BoundType::MaxExact) - .split(i_bounded, i0, i1, 32); + stmt = stmt.bound(i, (size_t)N, BoundType::MaxExact) + .split(i, i0, i1, 32); stmt = stmt.concretize(); @@ -573,7 +567,6 @@ TEST(workspaces, tile_dotProduct_3) { C.pack(); IndexVar i("i"); - IndexVar i_bounded("i_bounded"); IndexVar i0("i0"), i1("i1"); IndexExpr BExpr = B(i); IndexExpr CExpr = C(i); @@ -585,8 +578,8 @@ TEST(workspaces, tile_dotProduct_3) { TensorVar C_new("C_new", Type(Float64, {(size_t)N}), taco::dense); TensorVar precomputed("precomputed", Type(Float64, {(size_t)N}), taco::dense); - stmt = stmt.bound(i, i_bounded, (size_t)N, BoundType::MaxExact) - .split(i_bounded, i0, i1, 32); + stmt = stmt.bound(i, (size_t)N, BoundType::MaxExact) + .split(i, i0, i1, 32); stmt = stmt.precompute(precomputedExpr, i0, i0, precomputed); stmt = stmt.precompute(BExpr, i1, i1, B_new) From e74bf06d2e3b91de01c2e610c4f1008dfa1b9462 Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sat, 12 Feb 2022 20:56:14 -0800 Subject: [PATCH 09/23] bounds test file --- test/tests-bound.cpp | 134 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 test/tests-bound.cpp diff --git a/test/tests-bound.cpp b/test/tests-bound.cpp new file mode 100644 index 000000000..35c652e1f --- /dev/null +++ b/test/tests-bound.cpp @@ -0,0 +1,134 @@ +#include +#include +#include +#include "test.h" +#include "test_tensors.h" +#include "taco/tensor.h" +#include "taco/index_notation/index_notation.h" +#include "codegen/codegen.h" +#include "taco/lower/lower.h" + +using namespace taco; + +TEST(bound, test_1) { + + Tensor A("A", {16}, Format{Dense}); + Tensor B("B", {16}, Format{Dense}); + Tensor C("C", {16}, Format{Dense}); + + for (int i = 0; i < 16; i++) { + A.insert({i}, (double) i); + B.insert({i}, (double) i); + } + + A.pack(); + B.pack(); + + IndexVar i("i"); + IndexVar i0("i0"), i1("i1"); + IndexExpr precomputedExpr = B(i) * C(i); + A(i) = precomputedExpr; + + IndexStmt stmt = A.getAssignment().concretize(); + TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); + stmt = stmt.bound(i, 16, BoundType::MaxExact) + .split(i, i0, i1, 4); + + A.compile(stmt); + A.assemble(); + A.compute(); + + Tensor expected("expected", {16}, Format{Dense}); + expected(i) = B(i) * C(i); + expected.compile(); + expected.assemble(); + expected.compute(); + ASSERT_TENSOR_EQ(expected, A); +} + +TEST(bound, test_2) { + + Tensor A("A", {16}, Format{Dense}); + Tensor B("B", {16}, Format{Dense}); + Tensor C("C", {16}, Format{Dense}); + + for (int i = 0; i < 16; i++) { + A.insert({i}, (double) i); + B.insert({i}, (double) i); + } + + A.pack(); + B.pack(); + + IndexVar i("i"); + IndexVar i0("i0"), i1("i1"); + IndexExpr precomputedExpr = B(i) * C(i); + A(i) = precomputedExpr; + + IndexStmt stmt = A.getAssignment().concretize(); + TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); + stmt = stmt.bound(i, 16, BoundType::MaxExact) + .split(i, i0, i1, 5) + .precompute(precomputedExpr, i1, i1, precomputed); + + A.compile(stmt.concretize()); + A.assemble(); + A.compute(); + + Tensor expected("expected", {16}, Format{Dense}); + expected(i) = B(i) * C(i); + expected.compile(); + expected.assemble(); + expected.compute(); + ASSERT_TENSOR_EQ(expected, A); +} + +TEST(bound, bound_and_split) { + + Tensor A("A", {17}, Format{Dense}); + Tensor B("B", {17}, Format{Dense}); + Tensor C("C", {17}, Format{Dense}); + + for (int i = 0; i < 17; i++) { + A.insert({i}, (double) i); + B.insert({i}, (double) i); + } + + A.pack(); + B.pack(); + + IndexVar i("i"); + IndexVar i0("i0"), i1("i1"); + IndexExpr precomputedExpr = B(i) * C(i); + A(i) = precomputedExpr; + + IndexStmt stmt = A.getAssignment().concretize(); + TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); + stmt = stmt.bound(i, 17, BoundType::MaxExact) + .split(i, i0, i1, 4) + .precompute(precomputedExpr, i1, i1, precomputed); + + A.compile(stmt.concretize()); + A.assemble(); + A.compute(); + + Tensor expected("expected", {17}, Format{Dense}); + expected(i) = B(i) * C(i); + expected.compile(); + expected.assemble(); + expected.compute(); + + ASSERT_TENSOR_EQ(expected, A); + +// ir::IRPrinter irp = ir::IRPrinter(cout); +// +// cout << stmt << endl; +// +// std::shared_ptr codegen = ir::CodeGen::init_default(cout, ir::CodeGen::ImplementationGen); +// ir::Stmt compute = lower(stmt, "compute", false, true); +// +// irp.print(compute); +// cout << endl; +// codegen->compile(compute, false); +} + From 9ae9908959151ccb130fb8e15c8f56748180d18b Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sun, 13 Feb 2022 12:45:25 -0800 Subject: [PATCH 10/23] bound and rebound test --- test/tests-bound.cpp | 78 ++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/test/tests-bound.cpp b/test/tests-bound.cpp index 35c652e1f..a6982a746 100644 --- a/test/tests-bound.cpp +++ b/test/tests-bound.cpp @@ -10,43 +10,9 @@ using namespace taco; -TEST(bound, test_1) { - - Tensor A("A", {16}, Format{Dense}); - Tensor B("B", {16}, Format{Dense}); - Tensor C("C", {16}, Format{Dense}); - - for (int i = 0; i < 16; i++) { - A.insert({i}, (double) i); - B.insert({i}, (double) i); - } - - A.pack(); - B.pack(); - - IndexVar i("i"); - IndexVar i0("i0"), i1("i1"); - IndexExpr precomputedExpr = B(i) * C(i); - A(i) = precomputedExpr; - IndexStmt stmt = A.getAssignment().concretize(); - TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - stmt = stmt.bound(i, 16, BoundType::MaxExact) - .split(i, i0, i1, 4); - - A.compile(stmt); - A.assemble(); - A.compute(); - Tensor expected("expected", {16}, Format{Dense}); - expected(i) = B(i) * C(i); - expected.compile(); - expected.assemble(); - expected.compute(); - ASSERT_TENSOR_EQ(expected, A); -} - -TEST(bound, test_2) { +TEST(bound, bound_and_rebound) { Tensor A("A", {16}, Format{Dense}); Tensor B("B", {16}, Format{Dense}); @@ -67,7 +33,8 @@ TEST(bound, test_2) { IndexStmt stmt = A.getAssignment().concretize(); TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - stmt = stmt.bound(i, 16, BoundType::MaxExact) + stmt = stmt.bound(i, 18, BoundType::MaxExact) + .bound(i, 16, BoundType::MaxExact) .split(i, i0, i1, 5) .precompute(precomputedExpr, i1, i1, precomputed); @@ -83,6 +50,45 @@ TEST(bound, test_2) { ASSERT_TENSOR_EQ(expected, A); } + +// TEST(bound, bound_and_fuse) { + +// Tensor A("A", {16}, Format{Dense}); +// Tensor B("B", {16}, Format{Dense}); +// Tensor C("C", {16}, Format{Dense}); + +// for (int i = 0; i < 16; i++) { +// A.insert({i}, (double) i); +// B.insert({i}, (double) i); +// } + +// A.pack(); +// B.pack(); + +// IndexVar i("i"); +// IndexVar i0("i0"), i1("i1"); +// IndexExpr precomputedExpr = B(i) * C(i); +// A(i) = precomputedExpr; + +// IndexStmt stmt = A.getAssignment().concretize(); +// TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); +// stmt = stmt.bound(i, 16, BoundType::MaxExact) +// .split(i, i0, i1, 5) +// .fuse() +// .precompute(precomputedExpr, i1, i1, precomputed); + +// A.compile(stmt.concretize()); +// A.assemble(); +// A.compute(); + +// Tensor expected("expected", {16}, Format{Dense}); +// expected(i) = B(i) * C(i); +// expected.compile(); +// expected.assemble(); +// expected.compute(); +// ASSERT_TENSOR_EQ(expected, A); +// } + TEST(bound, bound_and_split) { Tensor A("A", {17}, Format{Dense}); From ba95326c316c3e15a389f26c3bf7f2805052a41e Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sun, 13 Feb 2022 15:12:23 -0800 Subject: [PATCH 11/23] adding tests for bound --- src/index_notation/index_notation.cpp | 2 + test/tests-bound.cpp | 114 ++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index 79dc00b62..9a45a5b97 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1699,6 +1699,8 @@ IndexStmt IndexStmt::fuse(IndexVar i, IndexVar j, IndexVar f) const { // } IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { + + taco_uerror << "Depericated Bound: bounding " << i.getName() << "ignoring " << i1.getName() << endl; i.bound(bound, bound_type); diff --git a/test/tests-bound.cpp b/test/tests-bound.cpp index a6982a746..c244d0032 100644 --- a/test/tests-bound.cpp +++ b/test/tests-bound.cpp @@ -138,3 +138,117 @@ TEST(bound, bound_and_split) { // codegen->compile(compute, false); } +TEST(bound, bound_normal_1) { + + Tensor A("A", {254}, Format{Dense}); + Tensor B("B", {254}, Format{Dense}); + Tensor C("C", {254}, Format{Dense}); + + for (int i = 0; i < 254; i++) { + A.insert({i}, (double) i); + B.insert({i}, (double) i); + } + + A.pack(); + B.pack(); + + IndexVar i("i"); + IndexVar i0("i0"), i1("i1"); + IndexExpr precomputedExpr = B(i) * C(i); + A(i) = precomputedExpr; + + IndexStmt stmt = A.getAssignment().concretize(); + TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); + stmt = stmt.bound(i, 254, BoundType::MaxExact) + .split(i, i0, i1, 4) + .precompute(precomputedExpr, i1, i1, precomputed); + + A.compile(stmt.concretize()); + A.assemble(); + A.compute(); + + Tensor expected("expected", {254}, Format{Dense}); + expected(i) = B(i) * C(i); + expected.compile(); + expected.assemble(); + expected.compute(); + + ASSERT_TENSOR_EQ(expected, A); + +// ir::IRPrinter irp = ir::IRPrinter(cout); +// +// cout << stmt << endl; +// +// std::shared_ptr codegen = ir::CodeGen::init_default(cout, ir::CodeGen::ImplementationGen); +// ir::Stmt compute = lower(stmt, "compute", false, true); +// +// irp.print(compute); +// cout << endl; +// codegen->compile(compute, false); +} + + +TEST(bound, bound_normal_2) { + + Tensor A("A", {176}, Format{Dense}); + Tensor B("B", {176}, Format{Dense}); + Tensor C("C", {176}, Format{Dense}); + + for (int i = 0; i < 176; i++) { + A.insert({i}, (double) i); + B.insert({i}, (double) i); + } + + A.pack(); + B.pack(); + + IndexVar i("i"); + IndexVar i0("i0"), i1("i1"); + IndexExpr precomputedExpr = B(i) * C(i); + A(i) = precomputedExpr; + + IndexStmt stmt = A.getAssignment().concretize(); + TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); + stmt = stmt.bound(i, 176, BoundType::MaxExact) + .split(i, i0, i1, 4) + .precompute(precomputedExpr, i1, i1, precomputed); + + A.compile(stmt.concretize()); + A.assemble(); + A.compute(); + + Tensor expected("expected", {176}, Format{Dense}); + expected(i) = B(i) * C(i); + expected.compile(); + expected.assemble(); + expected.compute(); + + ASSERT_TENSOR_EQ(expected, A); + +// ir::IRPrinter irp = ir::IRPrinter(cout); +// +// cout << stmt << endl; +// +// std::shared_ptr codegen = ir::CodeGen::init_default(cout, ir::CodeGen::ImplementationGen); +// ir::Stmt compute = lower(stmt, "compute", false, true); +// +// irp.print(compute); +// cout << endl; +// codegen->compile(compute, false); +} + + +TEST(bound, bound_throw_assert) { + Tensor A("A", {176}, Format{Dense}); + + for (int i = 0; i < 3; i++) { + A.insert({i}, (double) i); + } + + IndexVar i0("i0"), i1("i1"); + + IndexStmt stmt = A.getAssignment().concretize(); + + ASSERT_THROW(stmt.bound(i0, i1, 4, BoundType::MaxExact), taco::TacoException); + +} \ No newline at end of file From 2cf16af3f66dad7951a3203c67ba2a86d957a879 Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sun, 13 Feb 2022 16:31:52 -0800 Subject: [PATCH 12/23] add more tests --- src/index_notation/provenance_graph.cpp | 6 +++++ test/tests-bound.cpp | 29 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index 2a72d12d0..a4da9400f 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -208,6 +208,12 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, taco_iassert(parentCoordBounds.count(getParentVar()) == 1); std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); + if (indexVar == getInnerVar()){ + if (indexVar.getBound() != getSplitFactor()){ + taco_uerror << "Bounded a split inner varibale with illegal bound, real bound: "<< getSplitFactor() << endl; + } + } + if (indexVar.getBoundType() == BoundType::MaxExact) { return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; } diff --git a/test/tests-bound.cpp b/test/tests-bound.cpp index c244d0032..1b5c9c06e 100644 --- a/test/tests-bound.cpp +++ b/test/tests-bound.cpp @@ -244,6 +244,7 @@ TEST(bound, bound_throw_assert) { for (int i = 0; i < 3; i++) { A.insert({i}, (double) i); } + A.pack(); IndexVar i0("i0"), i1("i1"); @@ -251,4 +252,32 @@ TEST(bound, bound_throw_assert) { ASSERT_THROW(stmt.bound(i0, i1, 4, BoundType::MaxExact), taco::TacoException); +} + +TEST(bound, split_bound_illegal) { + Tensor A("A", {176}, Format{Dense}); + Tensor B("B", {176}, Format{Dense}); + Tensor C("C", {176}, Format{Dense}); + + for (int i = 0; i < 176; i++) { + A.insert({i}, (double) i); + B.insert({i}, (double) i); + } + + A.pack(); + B.pack(); + + IndexVar i("i"); + IndexVar i0("i0"), i1("i1"); + IndexExpr precomputedExpr = B(i) * C(i); + A(i) = precomputedExpr; + + IndexStmt stmt = A.getAssignment().concretize(); + TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); + + ASSERT_THROW(stmt.bound(i, 176, BoundType::MaxExact) + .split(i, i0, i1, 4) + .bound(i1, 2, BoundType::MaxExact) + .precompute(precomputedExpr, i1, i1, precomputed), taco::TacoException); + } \ No newline at end of file From 6dc9ad47bd5b307f27a017aaa572d304e018e10b Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sat, 26 Feb 2022 14:57:45 -0800 Subject: [PATCH 13/23] print prov graph --- .../taco/index_notation/provenance_graph.h | 4 ++ src/index_notation/index_notation.cpp | 5 +-- src/index_notation/provenance_graph.cpp | 45 ++++++++++++++++--- test/tests-bound.cpp | 7 ++- 4 files changed, 51 insertions(+), 10 deletions(-) diff --git a/include/taco/index_notation/provenance_graph.h b/include/taco/index_notation/provenance_graph.h index f4fbad292..98a80c908 100644 --- a/include/taco/index_notation/provenance_graph.h +++ b/include/taco/index_notation/provenance_graph.h @@ -357,6 +357,8 @@ class ProvenanceGraph { /// does the index variable have an exact bound known at compile-time bool hasExactBound(IndexVar indexVar) const; + void printGraph(void) const; + /// Once indexVar is defined what new variables become recoverable /// returned in order of recovery (ie if parent being recovered allows its parent to also be recovered then parent comes first) std::vector newlyRecoverableParents(IndexVar indexVar, std::set previouslyDefined) const; @@ -378,6 +380,8 @@ class ProvenanceGraph { /// a `.divide` scheduling operation. bool isDivided(IndexVar indexVar) const; + + private: std::map childRelMap; std::map parentRelMap; diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index 9a45a5b97..d47fb86f5 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1709,10 +1709,9 @@ IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType boun } -IndexStmt IndexStmt::bound(IndexVar i, size_t bound, BoundType bound_type) const { - - i.bound(bound, bound_type); +IndexStmt IndexStmt::bound(IndexVar i, size_t bound, BoundType bound_type) const { + i.bound(bound, bound_type); return *this; } diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index a4da9400f..99bea6c32 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -201,6 +201,14 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { + cout << "derive iter bounds for split" << endl; + + + taco_iassert(indexVar == getOuterVar() || indexVar == getInnerVar()); + taco_iassert(parentIterBounds.size() == 1); + taco_iassert(parentIterBounds.count(getParentVar()) == 1); + + // taco::IndexVar parent = getParentVar(); if (indexVar.isBound()){ @@ -210,7 +218,7 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, if (indexVar == getInnerVar()){ if (indexVar.getBound() != getSplitFactor()){ - taco_uerror << "Bounded a split inner varibale with illegal bound, real bound: "<< getSplitFactor() << endl; + taco_uerror << "Bounded a split inner varibale with bound: " << indexVar.getBound() << " real bound: " << getSplitFactor() << endl; } } @@ -224,15 +232,14 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, } - taco_iassert(indexVar == getOuterVar() || indexVar == getInnerVar()); - taco_iassert(parentIterBounds.size() == 1); - taco_iassert(parentIterBounds.count(getParentVar()) == 1); - std::vector parentBound = parentIterBounds.at(getParentVar()); Datatype splitFactorType = parentBound[0].type(); if (indexVar == getOuterVar()) { + // cout << "OUTER VAR" << endl; ir::Expr minBound = ir::Div::make(parentBound[0], ir::Literal::make(getSplitFactor(), splitFactorType)); ir::Expr maxBound = ir::Div::make(ir::Add::make(parentBound[1], ir::Literal::make(getSplitFactor()-1, splitFactorType)), ir::Literal::make(getSplitFactor(), splitFactorType)); + // cout << "Provenance graph : "<< provGraph << endl; + // cout << "PARENT BOUND[1]: "<< parentBound[1] << endl; return {minBound, maxBound}; } else if (indexVar == getInnerVar()) { @@ -1216,8 +1223,15 @@ std::vector ProvenanceGraph::deriveIterBounds(IndexVar indexVar, std:: // for split: outer: Div(expr, splitfactor), Div(expr, splitfactor), inner: 0, splitfactor // what about for reordered split: same loop bounds just reordered loops (this might change for different tail strategies) + // cout << "in derive iter bounds prov graph" << endl; + // cout << "INDEX VAR:" << indexVar << endl; if (isUnderived(indexVar)) { + // cout << "underived" << endl; taco_iassert(underivedBounds.count(indexVar) == 1); + + // for (size_t i = 0; i< underivedBounds[indexVar].size(); i++){ + // cout << underivedBounds[indexVar][i] << endl; + // } return underivedBounds[indexVar]; } @@ -1230,6 +1244,7 @@ std::vector ProvenanceGraph::deriveIterBounds(IndexVar indexVar, std:: std::map> parentIterBounds; std::map> parentCoordBounds; for (const IndexVar& parent : getParents(indexVar)) { + // cout << "in the for loop" << endl; parentIterBounds[parent] = deriveIterBounds(parent, derivedVarOrder, underivedBounds, variableNames, iterators); vector underivedParentAncestors = getUnderivedAncestors(parent); // TODO: this is okay for now because we don't need parentCoordBounds for fused taco_iassert(underivedParentAncestors.size() == 1); @@ -1238,6 +1253,7 @@ std::vector ProvenanceGraph::deriveIterBounds(IndexVar indexVar, std:: } IndexVarRel rel = parentRelMap.at(indexVar); + return rel.getNode()->deriveIterBounds(indexVar, parentIterBounds, parentCoordBounds, variableNames, iterators, *this); } @@ -1406,4 +1422,23 @@ bool ProvenanceGraph::isDivided(IndexVar indexVar) const { return false; } + +void ProvenanceGraph::printGraph() const { + + for (const auto &item : parentsMap){ + cout << "PARENT: " << item.first; + if (item.second.size() > 0){ + cout << " type of parent: " << parentRelMap.at(item.second[0]); + } + cout << endl; + for (auto child : item.second){ + cout << " "; + cout << "CHILD : " << child << " of type : " << childRelMap.at(item.first) << endl; + } + } } + +} + + + diff --git a/test/tests-bound.cpp b/test/tests-bound.cpp index 1b5c9c06e..0dc94ba70 100644 --- a/test/tests-bound.cpp +++ b/test/tests-bound.cpp @@ -275,9 +275,12 @@ TEST(bound, split_bound_illegal) { IndexStmt stmt = A.getAssignment().concretize(); TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - ASSERT_THROW(stmt.bound(i, 176, BoundType::MaxExact) + stmt = stmt.bound(i, 17, BoundType::MaxExact) .split(i, i0, i1, 4) .bound(i1, 2, BoundType::MaxExact) - .precompute(precomputedExpr, i1, i1, precomputed), taco::TacoException); + .precompute(precomputedExpr, i1, i1, precomputed); + + + ASSERT_THROW(A.compile(stmt.concretize()), taco::TacoException); } \ No newline at end of file From 9ec3e6e91aa6653925b005233401dad16280fdd5 Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sat, 26 Feb 2022 16:52:06 -0800 Subject: [PATCH 14/23] print prov graph --- .../taco/index_notation/provenance_graph.h | 8 +- src/index_notation/provenance_graph.cpp | 33 ++-- test/tests-bound.cpp | 5 + test/tests-provGraph.cpp | 143 ++++++++++++++++++ 4 files changed, 178 insertions(+), 11 deletions(-) create mode 100644 test/tests-provGraph.cpp diff --git a/include/taco/index_notation/provenance_graph.h b/include/taco/index_notation/provenance_graph.h index 98a80c908..f38f60015 100644 --- a/include/taco/index_notation/provenance_graph.h +++ b/include/taco/index_notation/provenance_graph.h @@ -277,6 +277,7 @@ class ProvenanceGraph { public: ProvenanceGraph() {} ProvenanceGraph(IndexStmt concreteStmt); + /// Returns the children of a given index variable, {} if no children or if indexVar is not in graph std::vector getChildren(IndexVar indexVar) const; @@ -357,7 +358,9 @@ class ProvenanceGraph { /// does the index variable have an exact bound known at compile-time bool hasExactBound(IndexVar indexVar) const; - void printGraph(void) const; + void printGraphParent(void) const; + + void printGraphChild(void) const; /// Once indexVar is defined what new variables become recoverable /// returned in order of recovery (ie if parent being recovered allows its parent to also be recovered then parent comes first) @@ -389,6 +392,9 @@ class ProvenanceGraph { std::map> parentsMap; std::map> childrenMap; + std::map>> childrenRelMap; + std::map>> parentsRelMap; + std::set nodes; }; diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index 99bea6c32..bc76798d7 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -201,9 +201,6 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { - cout << "derive iter bounds for split" << endl; - - taco_iassert(indexVar == getOuterVar() || indexVar == getInnerVar()); taco_iassert(parentIterBounds.size() == 1); taco_iassert(parentIterBounds.count(getParentVar()) == 1); @@ -922,12 +919,21 @@ ProvenanceGraph::ProvenanceGraph(IndexStmt concreteStmt) { // space? childRelMap[parent] = rel; childrenMap[parent] = children; + + for (IndexVar child : children){ + childrenRelMap[parent].push_back(make_pair(child, rel)); + } } for (IndexVar child : children) { nodes.insert(child); parentRelMap[child] = rel; parentsMap[child] = parents; + + for (IndexVar parent : parents){ + parentsRelMap[child].push_back(make_pair(parent, rel)); + } + } } } @@ -1423,17 +1429,24 @@ bool ProvenanceGraph::isDivided(IndexVar indexVar) const { } -void ProvenanceGraph::printGraph() const { +void ProvenanceGraph::printGraphParent() const { - for (const auto &item : parentsMap){ - cout << "PARENT: " << item.first; - if (item.second.size() > 0){ - cout << " type of parent: " << parentRelMap.at(item.second[0]); + for (const auto &item : childrenRelMap){ + cout << "PARENT: " << item.first << endl; + for (auto child : item.second){ + cout << " "; + cout << "CHILD : " << child.first << " type of : " << child.second << endl; } - cout << endl; + } +} + +void ProvenanceGraph::printGraphChild() const { + + for (const auto &item : parentsRelMap){ + cout << "CHILD: " << item.first << endl; for (auto child : item.second){ cout << " "; - cout << "CHILD : " << child << " of type : " << childRelMap.at(item.first) << endl; + cout << "PARENT : " << child.first << " type of : " << child.second << endl; } } } diff --git a/test/tests-bound.cpp b/test/tests-bound.cpp index 0dc94ba70..b8dc03701 100644 --- a/test/tests-bound.cpp +++ b/test/tests-bound.cpp @@ -113,11 +113,16 @@ TEST(bound, bound_and_split) { stmt = stmt.bound(i, 17, BoundType::MaxExact) .split(i, i0, i1, 4) .precompute(precomputedExpr, i1, i1, precomputed); + + + A.compile(stmt.concretize()); A.assemble(); A.compute(); + + Tensor expected("expected", {17}, Format{Dense}); expected(i) = B(i) * C(i); expected.compile(); diff --git a/test/tests-provGraph.cpp b/test/tests-provGraph.cpp new file mode 100644 index 000000000..8099bd8f9 --- /dev/null +++ b/test/tests-provGraph.cpp @@ -0,0 +1,143 @@ +#include +#include +#include +#include "test.h" +#include "test_tensors.h" +#include "taco/tensor.h" +#include "taco/index_notation/index_notation.h" +#include "codegen/codegen.h" +#include "taco/lower/lower.h" + +using namespace taco; + + +TEST(provGraph, print_provGraph1) { + + Tensor A("A", {16}, Format{Dense}); + Tensor B("B", {16}, Format{Dense}); + Tensor C("C", {16}, Format{Dense}); + + for (int i = 0; i < 16; i++) { + A.insert({i}, (double) i); + B.insert({i}, (double) i); + } + + A.pack(); + B.pack(); + + IndexVar i("i"); + IndexVar i0("i0"), i1("i1"), i2("i2"), i3("i3"), iw("iw"); + IndexExpr precomputedExpr = B(i) * C(i); + A(i) = precomputedExpr; + + IndexStmt stmt = A.getAssignment().concretize(); + TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); + stmt = stmt.bound(i, 18, BoundType::MaxExact) + .bound(i, 16, BoundType::MaxExact) + .split(i, i0, i1, 5) + .split(i1, i2, i3, 2) + .precompute(precomputedExpr, i1, iw, precomputed); + + ir::IRPrinter irp = ir::IRPrinter(cout); + + cout << stmt << endl; + + ProvenanceGraph provGraph = ProvenanceGraph(stmt.concretize()); + + cout << "PRINT WRT PARENTS" << endl; + provGraph.printGraphParent(); + cout << "***********************" << endl; + cout << "PRINT WRT CHILD" << endl; + provGraph.printGraphChild(); + cout << "***********************" << endl; + +} + + +TEST(provGraph, print_provGraph2) { + + Tensor A("A", {16}, Format{Dense}); + Tensor B("B", {16}, Format{Dense}); + Tensor C("C", {16}, Format{Dense}); + + for (int i = 0; i < 16; i++) { + A.insert({i}, (double) i); + B.insert({i}, (double) i); + } + + A.pack(); + B.pack(); + + IndexVar i("i"); + IndexVar i0("i0"), i1("i1"), i2("i2"), i3("i3"), iw("iw"); + IndexExpr precomputedExpr = B(i) * C(i); + A(i) = precomputedExpr; + + IndexStmt stmt = A.getAssignment().concretize(); + TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); + stmt = stmt.bound(i, 18, BoundType::MaxExact) + .bound(i, 16, BoundType::MaxExact) + .split(i, i0, i1, 5) + .precompute(precomputedExpr, i1, iw, precomputed) + .split(i1, i2, i3, 2); + + + ir::IRPrinter irp = ir::IRPrinter(cout); + + cout << stmt << endl; + + ProvenanceGraph provGraph = ProvenanceGraph(stmt.concretize()); + + cout << "PRINT WRT PARENTS" << endl; + provGraph.printGraphParent(); + cout << "***********************" << endl; + cout << "PRINT WRT CHILD" << endl; + provGraph.printGraphChild(); + cout << "***********************" << endl; + +} + + + +TEST(provGraph, print_normal_provGraph) { + + Tensor A("A", {16}, Format{Dense}); + Tensor B("B", {16}, Format{Dense}); + Tensor C("C", {16}, Format{Dense}); + + for (int i = 0; i < 16; i++) { + A.insert({i}, (double) i); + B.insert({i}, (double) i); + } + + A.pack(); + B.pack(); + + IndexVar i("i"); + IndexVar i0("i0"), i1("i1"), i2("i2"), i3("i3"), iw("iw"); + IndexExpr precomputedExpr = B(i) * C(i); + A(i) = precomputedExpr; + + IndexStmt stmt = A.getAssignment().concretize(); + TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); + stmt = stmt.bound(i, 18, BoundType::MaxExact) + .bound(i, 16, BoundType::MaxExact) + .split(i, i0, i1, 5) + .precompute(precomputedExpr, i1, iw, precomputed); + + + ir::IRPrinter irp = ir::IRPrinter(cout); + + cout << stmt << endl; + + ProvenanceGraph provGraph = ProvenanceGraph(stmt.concretize()); + + cout << "PRINT WRT PARENTS" << endl; + provGraph.printGraphParent(); + cout << "***********************" << endl; + cout << "PRINT WRT CHILD" << endl; + provGraph.printGraphChild(); + cout << "***********************" << endl; + +} + From 79f2d47ba8ee9ccf5454895089867ef1c5d2142c Mon Sep 17 00:00:00 2001 From: Manya Bansal Date: Sat, 26 Feb 2022 16:57:14 -0800 Subject: [PATCH 15/23] added an additional test --- src/index_notation/provenance_graph.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index bc76798d7..96b470366 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -920,6 +920,10 @@ ProvenanceGraph::ProvenanceGraph(IndexStmt concreteStmt) { childRelMap[parent] = rel; childrenMap[parent] = children; + if (rel.getRelType() != PRECOMPUTE && childrenRelMap[parent].size() > 0){ + taco_uerror << " Cannot attach two relation types to one node " << endl; + } + for (IndexVar child : children){ childrenRelMap[parent].push_back(make_pair(child, rel)); } From 12af135addcfe195093a53c1f8d39369b4a06f58 Mon Sep 17 00:00:00 2001 From: Manya-bansal Date: Sun, 10 Apr 2022 16:59:40 -0700 Subject: [PATCH 16/23] change taco-cli-tests.bats to refkect new bound api --- include/taco/index_notation/index_notation.h | 5 - .../taco/index_notation/provenance_graph.h | 3 - src/index_notation/index_notation.cpp | 28 +--- src/index_notation/provenance_graph.cpp | 54 +------ test/taco-cli-test.bats | 18 +-- test/tests-provGraph.cpp | 143 ------------------ tools/taco.cpp | 10 +- 7 files changed, 22 insertions(+), 239 deletions(-) delete mode 100644 test/tests-provGraph.cpp diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index d6bcb89f8..aac4a22dc 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -1038,7 +1038,6 @@ class IndexVar : public IndexExpr, public IndexVarInterface { /// Returns the name of the index variable. std::string getName() const; -<<<<<<< HEAD size_t& getBound() const; const BoundType& getBoundType() const; @@ -1051,11 +1050,7 @@ class IndexVar : public IndexExpr, public IndexVarInterface { bool isBound(); - // void setBoundVar(IndexVar boundVar); - -======= // Need these to overshadow the comparisons in for the IndexExpr instrusive pointer ->>>>>>> d0654a84137169883973c40a951dfdb89883fd9c friend bool operator==(const IndexVar&, const IndexVar&); friend bool operator<(const IndexVar&, const IndexVar&); friend bool operator!=(const IndexVar&, const IndexVar&); diff --git a/include/taco/index_notation/provenance_graph.h b/include/taco/index_notation/provenance_graph.h index f38f60015..6cfbc6e63 100644 --- a/include/taco/index_notation/provenance_graph.h +++ b/include/taco/index_notation/provenance_graph.h @@ -392,9 +392,6 @@ class ProvenanceGraph { std::map> parentsMap; std::map> childrenMap; - std::map>> childrenRelMap; - std::map>> parentsRelMap; - std::set nodes; }; diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index f017b31e9..5395a3eca 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1998,31 +1998,9 @@ IndexStmt IndexStmt::fuse(IndexVar i, IndexVar j, IndexVar f) const { return transformed; } -// IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { - -// i.bound(bound, bound_type); - -// IndexVarRel rel = IndexVarRel(new BoundRelNode(i, i1, bound, bound_type)); -// string reason; - -// // Add predicate to concrete index notation -// IndexStmt transformed = Transformation(AddSuchThatPredicates({rel})).apply(*this, &reason); -// if (!transformed.defined()) { -// taco_uerror << reason; -// } - -// // Replace all occurrences of i with i1 -// transformed = Transformation(ForAllReplace({i}, {i1})).apply(transformed, &reason); -// if (!transformed.defined()) { -// taco_uerror << reason; -// } - -// return transformed; -// } - IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { - taco_uerror << "Depericated Bound: bounding " << i.getName() << "ignoring " << i1.getName() << endl; + cout << "Depericated Bound: bounding " << i.getName() << " ignoring " << i1.getName() << endl; i.bound(bound, bound_type); @@ -2370,7 +2348,9 @@ IndexVar::IndexVar(const std::string& name) : IndexVar(name, Datatype::Int32) {} IndexVar::IndexVar(const std::string& name, const Datatype& type) : IndexVar(new IndexVarNode(name, type)) {} -IndexVar::IndexVar(const IndexVarNode* n) : IndexExpr(n) {} +IndexVar::IndexVar(const IndexVarNode* n) : IndexExpr(n), content(new Content) { + content->isbound = false; +} template <> bool isa(IndexExpr e) { return isa(e.ptr); diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index 96b470366..39453762c 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -205,16 +205,14 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, taco_iassert(parentIterBounds.size() == 1); taco_iassert(parentIterBounds.count(getParentVar()) == 1); - - - // taco::IndexVar parent = getParentVar(); if (indexVar.isBound()){ - + // if the variable has been bounded, derive iteration bounds using existing bound taco_iassert(parentCoordBounds.count(getParentVar()) == 1); std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); if (indexVar == getInnerVar()){ if (indexVar.getBound() != getSplitFactor()){ + // check that bound and split factor do not conflict taco_uerror << "Bounded a split inner varibale with bound: " << indexVar.getBound() << " real bound: " << getSplitFactor() << endl; } } @@ -232,11 +230,8 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, std::vector parentBound = parentIterBounds.at(getParentVar()); Datatype splitFactorType = parentBound[0].type(); if (indexVar == getOuterVar()) { - // cout << "OUTER VAR" << endl; ir::Expr minBound = ir::Div::make(parentBound[0], ir::Literal::make(getSplitFactor(), splitFactorType)); ir::Expr maxBound = ir::Div::make(ir::Add::make(parentBound[1], ir::Literal::make(getSplitFactor()-1, splitFactorType)), ir::Literal::make(getSplitFactor(), splitFactorType)); - // cout << "Provenance graph : "<< provGraph << endl; - // cout << "PARENT BOUND[1]: "<< parentBound[1] << endl; return {minBound, maxBound}; } else if (indexVar == getInnerVar()) { @@ -890,6 +885,7 @@ bool operator==(const PrecomputeRelNode& a, const PrecomputeRelNode& b) { return a.equals(b); } +// class ProvenanceGraph // class ProvenanceGraph ProvenanceGraph::ProvenanceGraph(IndexStmt concreteStmt) { // Add all nodes (not all nodes may be scheduled) @@ -904,8 +900,7 @@ ProvenanceGraph::ProvenanceGraph(IndexStmt concreteStmt) { // No relations defined return; } - // q: does a such node impose some restrictions on - // the value of the variable + SuchThat suchThat = to(concreteStmt); vector relations = suchThat.getPredicate(); @@ -914,30 +909,14 @@ ProvenanceGraph::ProvenanceGraph(IndexStmt concreteStmt) { std::vector children = rel.getNode()->getChildren(); for (IndexVar parent : parents) { nodes.insert(parent); - // q: childrelmap maps the - // parent to a constrained iteration - // space? childRelMap[parent] = rel; childrenMap[parent] = children; - - if (rel.getRelType() != PRECOMPUTE && childrenRelMap[parent].size() > 0){ - taco_uerror << " Cannot attach two relation types to one node " << endl; - } - - for (IndexVar child : children){ - childrenRelMap[parent].push_back(make_pair(child, rel)); - } } for (IndexVar child : children) { nodes.insert(child); parentRelMap[child] = rel; parentsMap[child] = parents; - - for (IndexVar parent : parents){ - parentsRelMap[child].push_back(make_pair(parent, rel)); - } - } } } @@ -1154,7 +1133,6 @@ bool ProvenanceGraph::isRecoverablePrecompute(taco::IndexVar indexVar, std::set< return isRecoverablePrecompute(precomputeChild, defined, producers, consumers); } for (const IndexVar& child : getChildren(indexVar)) { - // q: why is it !isRecoverablePrecompute? if (!defined.count(child) && (isFullyDerived(child) || !isRecoverablePrecompute(child, defined, producers, consumers))) { return false; @@ -1431,30 +1409,6 @@ bool ProvenanceGraph::isDivided(IndexVar indexVar) const { } return false; } - - -void ProvenanceGraph::printGraphParent() const { - - for (const auto &item : childrenRelMap){ - cout << "PARENT: " << item.first << endl; - for (auto child : item.second){ - cout << " "; - cout << "CHILD : " << child.first << " type of : " << child.second << endl; - } - } -} - -void ProvenanceGraph::printGraphChild() const { - - for (const auto &item : parentsRelMap){ - cout << "CHILD: " << item.first << endl; - for (auto child : item.second){ - cout << " "; - cout << "PARENT : " << child.first << " type of : " << child.second << endl; - } - } -} - } diff --git a/test/taco-cli-test.bats b/test/taco-cli-test.bats index 63d03b1af..e4193ad98 100755 --- a/test/taco-cli-test.bats +++ b/test/taco-cli-test.bats @@ -234,9 +234,9 @@ EOF @test 'test -s=bound' { expression="a(i,j) = b(i,k) * c(k,j)" scheduling_directives=( - "-s=bound(i,ibound,100,MaxExact)" - "-s=bound(j,jbound,100,MaxExact)" - "-s=bound(k,kbound,100,MaxExact)" + "-s=bound(i,100,MaxExact)" + "-s=bound(j,100,MaxExact)" + "-s=bound(k,100,MaxExact)" ) for directive in "${scheduling_directives[@]}"; do echo "trying: taco '$expression' '$directive'" @@ -245,9 +245,9 @@ EOF # These should all die with "Not supported yet" scheduling_directives=( - "-s=bound(k,kbound,100,MinExact)" - "-s=bound(k,kbound,100,MaxConstraint)" - "-s=bound(k,kbound,100,MinConstraint)" + "-s=bound(k,100,MinExact)" + "-s=bound(k,100,MaxConstraint)" + "-s=bound(k,100,MinConstraint)" ) for directive in "${TODO_scheduling_directives[@]}"; do @@ -259,7 +259,7 @@ EOF done # This should die with "Bound type not defined" - directive="-s=bound(k,kbound,100,Unknown)" + directive="-s=bound(k,100,Unknown)" echo "this should fail: taco '$expression' '$directive'" run $TACO "$expression" "$directive" @@ -273,7 +273,7 @@ EOF expression="a(i,j) = b(i,k) * c(k,j)" scheduling_directives=( "-s=reorder(i,j,k),split(k,k0,k1,32),unroll(k0,4)" - "-s=reorder(i,j,k),bound(k,k0,32,MaxExact),unroll(k0,4)" + "-s=reorder(i,j,k),bound(k,32,MaxExact),unroll(k,4)" ) for directive in "${scheduling_directives[@]}"; do echo "trying: taco '$expression' '$directive'" @@ -346,7 +346,7 @@ EOF "-s=parallelize(i,NotParallel,Temporary)" "-s=parallelize(i,NotParallel,ParallelReduction)" "-s=reorder(i,j,k),split(k,k0,k1,32),parallelize(k0,CPUVector,IgnoreRaces)" - "-s=reorder(i,j,k),bound(k,k0,32,MaxExact),parallelize(k0,CPUVector,IgnoreRaces)" + "-s=reorder(i,j,k),bound(k,32,MaxExact),parallelize(k,CPUVector,IgnoreRaces)" "-s=parallelize(i,CPUThread,IgnoreRaces)" "-s=parallelize(i,GPUBlock,IgnoreRaces),parallelize(j,GPUThread,IgnoreRaces)" ) diff --git a/test/tests-provGraph.cpp b/test/tests-provGraph.cpp deleted file mode 100644 index 8099bd8f9..000000000 --- a/test/tests-provGraph.cpp +++ /dev/null @@ -1,143 +0,0 @@ -#include -#include -#include -#include "test.h" -#include "test_tensors.h" -#include "taco/tensor.h" -#include "taco/index_notation/index_notation.h" -#include "codegen/codegen.h" -#include "taco/lower/lower.h" - -using namespace taco; - - -TEST(provGraph, print_provGraph1) { - - Tensor A("A", {16}, Format{Dense}); - Tensor B("B", {16}, Format{Dense}); - Tensor C("C", {16}, Format{Dense}); - - for (int i = 0; i < 16; i++) { - A.insert({i}, (double) i); - B.insert({i}, (double) i); - } - - A.pack(); - B.pack(); - - IndexVar i("i"); - IndexVar i0("i0"), i1("i1"), i2("i2"), i3("i3"), iw("iw"); - IndexExpr precomputedExpr = B(i) * C(i); - A(i) = precomputedExpr; - - IndexStmt stmt = A.getAssignment().concretize(); - TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - stmt = stmt.bound(i, 18, BoundType::MaxExact) - .bound(i, 16, BoundType::MaxExact) - .split(i, i0, i1, 5) - .split(i1, i2, i3, 2) - .precompute(precomputedExpr, i1, iw, precomputed); - - ir::IRPrinter irp = ir::IRPrinter(cout); - - cout << stmt << endl; - - ProvenanceGraph provGraph = ProvenanceGraph(stmt.concretize()); - - cout << "PRINT WRT PARENTS" << endl; - provGraph.printGraphParent(); - cout << "***********************" << endl; - cout << "PRINT WRT CHILD" << endl; - provGraph.printGraphChild(); - cout << "***********************" << endl; - -} - - -TEST(provGraph, print_provGraph2) { - - Tensor A("A", {16}, Format{Dense}); - Tensor B("B", {16}, Format{Dense}); - Tensor C("C", {16}, Format{Dense}); - - for (int i = 0; i < 16; i++) { - A.insert({i}, (double) i); - B.insert({i}, (double) i); - } - - A.pack(); - B.pack(); - - IndexVar i("i"); - IndexVar i0("i0"), i1("i1"), i2("i2"), i3("i3"), iw("iw"); - IndexExpr precomputedExpr = B(i) * C(i); - A(i) = precomputedExpr; - - IndexStmt stmt = A.getAssignment().concretize(); - TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - stmt = stmt.bound(i, 18, BoundType::MaxExact) - .bound(i, 16, BoundType::MaxExact) - .split(i, i0, i1, 5) - .precompute(precomputedExpr, i1, iw, precomputed) - .split(i1, i2, i3, 2); - - - ir::IRPrinter irp = ir::IRPrinter(cout); - - cout << stmt << endl; - - ProvenanceGraph provGraph = ProvenanceGraph(stmt.concretize()); - - cout << "PRINT WRT PARENTS" << endl; - provGraph.printGraphParent(); - cout << "***********************" << endl; - cout << "PRINT WRT CHILD" << endl; - provGraph.printGraphChild(); - cout << "***********************" << endl; - -} - - - -TEST(provGraph, print_normal_provGraph) { - - Tensor A("A", {16}, Format{Dense}); - Tensor B("B", {16}, Format{Dense}); - Tensor C("C", {16}, Format{Dense}); - - for (int i = 0; i < 16; i++) { - A.insert({i}, (double) i); - B.insert({i}, (double) i); - } - - A.pack(); - B.pack(); - - IndexVar i("i"); - IndexVar i0("i0"), i1("i1"), i2("i2"), i3("i3"), iw("iw"); - IndexExpr precomputedExpr = B(i) * C(i); - A(i) = precomputedExpr; - - IndexStmt stmt = A.getAssignment().concretize(); - TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); - stmt = stmt.bound(i, 18, BoundType::MaxExact) - .bound(i, 16, BoundType::MaxExact) - .split(i, i0, i1, 5) - .precompute(precomputedExpr, i1, iw, precomputed); - - - ir::IRPrinter irp = ir::IRPrinter(cout); - - cout << stmt << endl; - - ProvenanceGraph provGraph = ProvenanceGraph(stmt.concretize()); - - cout << "PRINT WRT PARENTS" << endl; - provGraph.printGraphParent(); - cout << "***********************" << endl; - cout << "PRINT WRT CHILD" << endl; - provGraph.printGraphChild(); - cout << "***********************" << endl; - -} - diff --git a/tools/taco.cpp b/tools/taco.cpp index 449b09918..4c2d11641 100644 --- a/tools/taco.cpp +++ b/tools/taco.cpp @@ -498,13 +498,13 @@ static bool setSchedulingCommands(vector> scheduleCommands, parse stmt = stmt.reorder(reorderedVars); } else if (command == "bound") { - taco_uassert(scheduleCommand.size() == 4) << "'bound' scheduling directive takes 4 parameters: bound(i, i1, bound, type)"; + taco_uassert(scheduleCommand.size() == 3) << "'bound' scheduling directive takes 3 parameters: bound(i, bound, type)"; string i, i1, type; size_t bound; i = scheduleCommand[0]; - i1 = scheduleCommand[1]; - taco_uassert(sscanf(scheduleCommand[2].c_str(), "%zu", &bound) == 1) << "failed to parse third parameter to `bound` directive as a size_t"; - type = scheduleCommand[3]; + i1 = scheduleCommand[0]; + taco_uassert(sscanf(scheduleCommand[1].c_str(), "%zu", &bound) == 1) << "failed to parse second parameter to `bound` directive as a size_t"; + type = scheduleCommand[2]; BoundType bound_type; if (type == "MinExact") { @@ -521,7 +521,7 @@ static bool setSchedulingCommands(vector> scheduleCommands, parse } IndexVar bound1(i1); - stmt = stmt.bound(findVar(i), bound1, bound, bound_type); + stmt = stmt.bound(findVar(i), bound, bound_type); } else if (command == "unroll") { taco_uassert(scheduleCommand.size() == 2) << "'unroll' scheduling directive takes 2 parameters: unroll(i, unrollFactor)"; From 27288ff7c1759f40357c32fe5cdd2af39073094c Mon Sep 17 00:00:00 2001 From: Manya-bansal Date: Mon, 11 Apr 2022 18:09:06 -0700 Subject: [PATCH 17/23] ataco-cli-test passing --- include/taco/index_notation/provenance_graph.h | 2 +- src/index_notation/provenance_graph.cpp | 6 +----- src/lower/lowerer_impl_imperative.cpp | 5 +++++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/taco/index_notation/provenance_graph.h b/include/taco/index_notation/provenance_graph.h index 6cfbc6e63..24ba626a5 100644 --- a/include/taco/index_notation/provenance_graph.h +++ b/include/taco/index_notation/provenance_graph.h @@ -5,7 +5,7 @@ namespace taco { struct IndexVarRelNode; -enum IndexVarRelType {UNDEFINED, SPLIT, DIVIDE, POS, FUSE, BOUND, PRECOMPUTE}; +enum IndexVarRelType {UNDEFINED, SPLIT, DIVIDE, POS, FUSE, PRECOMPUTE}; /// A pointer class for IndexVarRelNodes provides some operations for all IndexVarRelTypes class IndexVarRel : public util::IntrusivePtr { diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index 39453762c..c285d357e 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -1299,12 +1299,8 @@ bool ProvenanceGraph::isCoordVariable(taco::IndexVar indexVar) const { } bool ProvenanceGraph::hasExactBound(IndexVar indexVar) const { - if (isUnderived(indexVar)) { - return false; - } - IndexVarRel rel = parentRelMap.at(indexVar); - if(rel.getRelType() == BOUND) + if(indexVar.isBound()) { return indexVar.getBoundType() == BoundType::MaxExact; } diff --git a/src/lower/lowerer_impl_imperative.cpp b/src/lower/lowerer_impl_imperative.cpp index f370242db..3f1a8d162 100644 --- a/src/lower/lowerer_impl_imperative.cpp +++ b/src/lower/lowerer_impl_imperative.cpp @@ -963,8 +963,13 @@ Stmt LowererImplImperative::lowerForallCloned(Forall forall) { } // Build guards + cout << "before loop" << endl; + Expr guardCondition; for (auto var : varsWithGuard) { + + cout << "Var = " << var << endl; + std::vector iterBounds = provGraph.deriveIterBounds(var, definedIndexVarsOrdered, underivedBounds, indexVarToExprMap, iterators); Expr minGuard = Lt::make(minVarValues[var], iterBounds[0]); From a52bb4eb741c456e5edf6ef580c7741d14c8e338 Mon Sep 17 00:00:00 2001 From: Manya-bansal Date: Sun, 17 Apr 2022 14:36:29 -0700 Subject: [PATCH 18/23] remove assert --- src/index_notation/provenance_graph.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index c285d357e..97918eadb 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -718,7 +718,6 @@ std::vector FuseRelNode::deriveIterBounds(taco::IndexVar indexVar, Iterators iterators, ProvenanceGraph provGraph) const { if (indexVar.isBound()){ - taco_iassert(parentCoordBounds.count(getParentVar()) == 1); std::vector parentCoordBound = combineParentBounds(parentIterBounds[getOuterParentVar()], parentIterBounds[getInnerParentVar()]); if (indexVar.getBoundType() == BoundType::MaxExact) { From 57dd389cf1cbde71fb33b0d5941f39aa20fc1696 Mon Sep 17 00:00:00 2001 From: Manya-bansal Date: Sun, 17 Apr 2022 15:10:06 -0700 Subject: [PATCH 19/23] check literal split --- src/index_notation/index_notation.cpp | 2 +- src/index_notation/provenance_graph.cpp | 31 ++++-------- test/tests-bound.cpp | 66 +------------------------ 3 files changed, 12 insertions(+), 87 deletions(-) diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index 5395a3eca..ef76c9682 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -2000,7 +2000,7 @@ IndexStmt IndexStmt::fuse(IndexVar i, IndexVar j, IndexVar f) const { IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { - cout << "Depericated Bound: bounding " << i.getName() << " ignoring " << i1.getName() << endl; + taco_uerror << "Depericated Bound: bounding " << i.getName() << " ignoring " << i1.getName() << endl; i.bound(bound, bound_type); diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index 97918eadb..db0e77bde 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -205,36 +205,23 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, taco_iassert(parentIterBounds.size() == 1); taco_iassert(parentIterBounds.count(getParentVar()) == 1); - if (indexVar.isBound()){ - // if the variable has been bounded, derive iteration bounds using existing bound - taco_iassert(parentCoordBounds.count(getParentVar()) == 1); - std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); - - if (indexVar == getInnerVar()){ - if (indexVar.getBound() != getSplitFactor()){ - // check that bound and split factor do not conflict - taco_uerror << "Bounded a split inner varibale with bound: " << indexVar.getBound() << " real bound: " << getSplitFactor() << endl; - } - } - - if (indexVar.getBoundType() == BoundType::MaxExact) { - return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; - } - else { - taco_not_supported_yet; - } - return {}; - - } - std::vector parentBound = parentIterBounds.at(getParentVar()); + std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); Datatype splitFactorType = parentBound[0].type(); + + if (indexVar == getOuterVar()) { ir::Expr minBound = ir::Div::make(parentBound[0], ir::Literal::make(getSplitFactor(), splitFactorType)); ir::Expr maxBound = ir::Div::make(ir::Add::make(parentBound[1], ir::Literal::make(getSplitFactor()-1, splitFactorType)), ir::Literal::make(getSplitFactor(), splitFactorType)); + if (isa(maxBound) && indexVar.isBound() && !isValue(maxBound, indexVar.getBound())){ + taco_uerror << "Bounded a split outer varibale with bound: " << indexVar.getBound() << " real bound: " << maxBound << endl; + } return {minBound, maxBound}; } else if (indexVar == getInnerVar()) { + if (indexVar.isBound() && (indexVar.getBound() != getSplitFactor())){ + taco_uerror << "Bounded a split inner varibale with bound: " << indexVar.getBound() << " real bound: " << getSplitFactor() << endl; + } ir::Expr minBound = 0; ir::Expr maxBound = ir::Literal::make(getSplitFactor(), splitFactorType); return {minBound, maxBound}; diff --git a/test/tests-bound.cpp b/test/tests-bound.cpp index b8dc03701..718d4b8f4 100644 --- a/test/tests-bound.cpp +++ b/test/tests-bound.cpp @@ -51,44 +51,6 @@ TEST(bound, bound_and_rebound) { } -// TEST(bound, bound_and_fuse) { - -// Tensor A("A", {16}, Format{Dense}); -// Tensor B("B", {16}, Format{Dense}); -// Tensor C("C", {16}, Format{Dense}); - -// for (int i = 0; i < 16; i++) { -// A.insert({i}, (double) i); -// B.insert({i}, (double) i); -// } - -// A.pack(); -// B.pack(); - -// IndexVar i("i"); -// IndexVar i0("i0"), i1("i1"); -// IndexExpr precomputedExpr = B(i) * C(i); -// A(i) = precomputedExpr; - -// IndexStmt stmt = A.getAssignment().concretize(); -// TensorVar precomputed("precomputed", Type(Float64, {Dimension(i1)}), taco::dense); -// stmt = stmt.bound(i, 16, BoundType::MaxExact) -// .split(i, i0, i1, 5) -// .fuse() -// .precompute(precomputedExpr, i1, i1, precomputed); - -// A.compile(stmt.concretize()); -// A.assemble(); -// A.compute(); - -// Tensor expected("expected", {16}, Format{Dense}); -// expected(i) = B(i) * C(i); -// expected.compile(); -// expected.assemble(); -// expected.compute(); -// ASSERT_TENSOR_EQ(expected, A); -// } - TEST(bound, bound_and_split) { Tensor A("A", {17}, Format{Dense}); @@ -179,17 +141,6 @@ TEST(bound, bound_normal_1) { expected.compute(); ASSERT_TENSOR_EQ(expected, A); - -// ir::IRPrinter irp = ir::IRPrinter(cout); -// -// cout << stmt << endl; -// -// std::shared_ptr codegen = ir::CodeGen::init_default(cout, ir::CodeGen::ImplementationGen); -// ir::Stmt compute = lower(stmt, "compute", false, true); -// -// irp.print(compute); -// cout << endl; -// codegen->compile(compute, false); } @@ -229,17 +180,6 @@ TEST(bound, bound_normal_2) { expected.compute(); ASSERT_TENSOR_EQ(expected, A); - -// ir::IRPrinter irp = ir::IRPrinter(cout); -// -// cout << stmt << endl; -// -// std::shared_ptr codegen = ir::CodeGen::init_default(cout, ir::CodeGen::ImplementationGen); -// ir::Stmt compute = lower(stmt, "compute", false, true); -// -// irp.print(compute); -// cout << endl; -// codegen->compile(compute, false); } @@ -282,10 +222,8 @@ TEST(bound, split_bound_illegal) { stmt = stmt.bound(i, 17, BoundType::MaxExact) .split(i, i0, i1, 4) - .bound(i1, 2, BoundType::MaxExact) - .precompute(precomputedExpr, i1, i1, precomputed); - - + .bound(i1, 2, BoundType::MaxExact); + ASSERT_THROW(A.compile(stmt.concretize()), taco::TacoException); } \ No newline at end of file From c27ef8a87e11f0b2357af480a1129a2f042bbf39 Mon Sep 17 00:00:00 2001 From: Manya-bansal Date: Sun, 17 Apr 2022 15:25:44 -0700 Subject: [PATCH 20/23] check biunds against literal values --- src/index_notation/provenance_graph.cpp | 56 ++++++++++--------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index db0e77bde..01dafff23 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -213,14 +213,14 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, if (indexVar == getOuterVar()) { ir::Expr minBound = ir::Div::make(parentBound[0], ir::Literal::make(getSplitFactor(), splitFactorType)); ir::Expr maxBound = ir::Div::make(ir::Add::make(parentBound[1], ir::Literal::make(getSplitFactor()-1, splitFactorType)), ir::Literal::make(getSplitFactor(), splitFactorType)); - if (isa(maxBound) && indexVar.isBound() && !isValue(maxBound, indexVar.getBound())){ - taco_uerror << "Bounded a split outer varibale with bound: " << indexVar.getBound() << " real bound: " << maxBound << endl; + if (isa(ir::simplify(maxBound)) && indexVar.isBound() && !isValue(ir::simplify(maxBound), indexVar.getBound())){ + taco_uerror << "Bounded a split outer variable with bound: " << indexVar.getBound() << " real bound: " << maxBound << endl; } return {minBound, maxBound}; } else if (indexVar == getInnerVar()) { if (indexVar.isBound() && (indexVar.getBound() != getSplitFactor())){ - taco_uerror << "Bounded a split inner varibale with bound: " << indexVar.getBound() << " real bound: " << getSplitFactor() << endl; + taco_uerror << "Bounded a split inner variable with bound: " << indexVar.getBound() << " real bound: " << getSplitFactor() << endl; } ir::Expr minBound = 0; ir::Expr maxBound = ir::Literal::make(getSplitFactor(), splitFactorType); @@ -357,21 +357,6 @@ std::vector DivideRelNode::deriveIterBounds(taco::IndexVar indexVar, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { - if (indexVar.isBound()){ - - taco_iassert(parentCoordBounds.count(getParentVar()) == 1); - std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); - - if (indexVar.getBoundType() == BoundType::MaxExact) { - return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; - } - else { - taco_not_supported_yet; - } - return {}; - - } - taco_iassert(indexVar == getOuterVar() || indexVar == getInnerVar()); taco_iassert(parentIterBounds.size() == 1); taco_iassert(parentIterBounds.count(getParentVar()) == 1); @@ -384,10 +369,16 @@ std::vector DivideRelNode::deriveIterBounds(taco::IndexVar indexVar, // ranges from 0 to divFactor. ir::Expr minBound = 0; ir::Expr maxBound = divFactor; + if (isa(ir::simplify(maxBound)) && indexVar.isBound() && !isValue(ir::simplify(maxBound), indexVar.getBound())){ + taco_uerror << "Bounded a DIVIDE outer variable with bound: " << indexVar.getBound() << " real bound: " << maxBound << endl; + } return {minBound, maxBound}; } + else if (indexVar == getInnerVar()) { - // The inner loop ranges over a chunk of size parentBound / divFactor. + if (indexVar.isBound() && (indexVar.getBound() != getDivFactor())){ + taco_uerror << "Bounded a divide inner variable with bound: " << indexVar.getBound() << " real bound: " << getDivFactor() << endl; + } ir::Expr minBound = ir::Div::make(parentBound[0], divFactor); ir::Expr maxBound = ir::Div::make(ir::Add::make(parentBound[1], ir::Literal::make(getDivFactor()-1, divFactorType)), divFactor); return {minBound, maxBound}; @@ -703,23 +694,20 @@ std::vector FuseRelNode::deriveIterBounds(taco::IndexVar indexVar, std::map> parentCoordBounds, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { - if (indexVar.isBound()){ - - std::vector parentCoordBound = combineParentBounds(parentIterBounds[getOuterParentVar()], parentIterBounds[getInnerParentVar()]); + + taco_iassert(indexVar == getFusedVar()); + taco_iassert(parentIterBounds.count(getOuterParentVar()) && parentIterBounds.count(getInnerParentVar())); + std::vector parentCoordBound = combineParentBounds(parentIterBounds[getOuterParentVar()], parentIterBounds[getInnerParentVar()]); - if (indexVar.getBoundType() == BoundType::MaxExact) { - return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; - } - else { - taco_not_supported_yet; + if (indexVar.isBound()){ + //check if max bound matches + if (isa(ir::simplify(parentCoordBound[1])) && indexVar.isBound() && !isValue(ir::simplify(parentCoordBound[1]), indexVar.getBound())){ + taco_uerror << "Bounded a fuse index variable with bound: " << indexVar.getBound() << " real bound: " << parentCoordBound[1] << endl; } - return {}; } + return parentCoordBound; - taco_iassert(indexVar == getFusedVar()); - taco_iassert(parentIterBounds.count(getOuterParentVar()) && parentIterBounds.count(getInnerParentVar())); - return combineParentBounds(parentIterBounds[getOuterParentVar()], parentIterBounds[getInnerParentVar()]); } ir::Expr FuseRelNode::recoverVariable(taco::IndexVar indexVar, @@ -827,6 +815,10 @@ std::vector PrecomputeRelNode::deriveIterBounds(taco::IndexVar indexVa std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { + + taco_iassert(indexVar == getPrecomputeVar()); + taco_iassert(parentIterBounds.count(getParentVar()) == 1); + if (indexVar.isBound()){ taco_iassert(parentCoordBounds.count(getParentVar()) == 1); @@ -842,8 +834,6 @@ std::vector PrecomputeRelNode::deriveIterBounds(taco::IndexVar indexVa } - taco_iassert(indexVar == getPrecomputeVar()); - taco_iassert(parentIterBounds.count(getParentVar()) == 1); std::vector parentIterBound = parentIterBounds.at(getParentVar()); return parentIterBound; } From e21812bcdbd1914d0e8e29fad274950059196318 Mon Sep 17 00:00:00 2001 From: Manya-bansal Date: Sun, 24 Apr 2022 16:13:41 -0700 Subject: [PATCH 21/23] change ir simplify logic + add additional ir tests --- src/index_notation/provenance_graph.cpp | 6 +++--- test/tests-indexexpr.cpp | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index 01dafff23..ec5f8f7a9 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -213,7 +213,7 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, if (indexVar == getOuterVar()) { ir::Expr minBound = ir::Div::make(parentBound[0], ir::Literal::make(getSplitFactor(), splitFactorType)); ir::Expr maxBound = ir::Div::make(ir::Add::make(parentBound[1], ir::Literal::make(getSplitFactor()-1, splitFactorType)), ir::Literal::make(getSplitFactor(), splitFactorType)); - if (isa(ir::simplify(maxBound)) && indexVar.isBound() && !isValue(ir::simplify(maxBound), indexVar.getBound())){ + if ( ir::isa(ir::simplify(maxBound)) && indexVar.isBound() && !ir::to(ir::simplify(maxBound))->equalsScalar(indexVar.getBound()) ){ taco_uerror << "Bounded a split outer variable with bound: " << indexVar.getBound() << " real bound: " << maxBound << endl; } return {minBound, maxBound}; @@ -369,7 +369,7 @@ std::vector DivideRelNode::deriveIterBounds(taco::IndexVar indexVar, // ranges from 0 to divFactor. ir::Expr minBound = 0; ir::Expr maxBound = divFactor; - if (isa(ir::simplify(maxBound)) && indexVar.isBound() && !isValue(ir::simplify(maxBound), indexVar.getBound())){ + if (ir::isa(ir::simplify(maxBound)) && indexVar.isBound() && !ir::to(ir::simplify(maxBound))->equalsScalar(indexVar.getBound()) ){ taco_uerror << "Bounded a DIVIDE outer variable with bound: " << indexVar.getBound() << " real bound: " << maxBound << endl; } return {minBound, maxBound}; @@ -701,7 +701,7 @@ std::vector FuseRelNode::deriveIterBounds(taco::IndexVar indexVar, if (indexVar.isBound()){ //check if max bound matches - if (isa(ir::simplify(parentCoordBound[1])) && indexVar.isBound() && !isValue(ir::simplify(parentCoordBound[1]), indexVar.getBound())){ + if (ir::isa(ir::simplify(parentCoordBound[1])) && !ir::to(ir::simplify(parentCoordBound[1]))->equalsScalar(indexVar.getBound())){ taco_uerror << "Bounded a fuse index variable with bound: " << indexVar.getBound() << " real bound: " << parentCoordBound[1] << endl; } diff --git a/test/tests-indexexpr.cpp b/test/tests-indexexpr.cpp index cdca2b2eb..3f9c50ff6 100644 --- a/test/tests-indexexpr.cpp +++ b/test/tests-indexexpr.cpp @@ -1,5 +1,6 @@ #include "test.h" #include "test_tensors.h" +#include #include "taco/tensor.h" #include "taco/index_notation/index_notation.h" #include "taco/index_notation/index_notation_nodes.h" @@ -82,3 +83,21 @@ TEST(indexexpr, indexvar) { ASSERT_EQ(type(), var.getDataType()); ASSERT_EQ("i", var.getName()); } + +TEST(indexexpr, simplifyAdd) { + + ir::Expr add = ir::Add::make(20, 40); + add = ir::simplify(add); + + ASSERT_TRUE(ir::isa(add)); + ASSERT_TRUE(ir::to(add)->equalsScalar(60)); +} + +TEST(indexexpr, simplifyMul) { + + ir::Expr mul = ir::Mul::make(20, 40); + mul = ir::simplify(mul); + + ASSERT_TRUE(ir::isa(mul)); + ASSERT_TRUE(ir::to(mul)->equalsScalar(800)); +} \ No newline at end of file From e258dca24294e16935faa747ba270869ae96e503 Mon Sep 17 00:00:00 2001 From: Manya-bansal Date: Sun, 29 May 2022 20:22:11 -0700 Subject: [PATCH 22/23] remove split up and down --- include/taco/index_notation/index_notation.h | 2 -- src/index_notation/index_notation.cpp | 9 --------- 2 files changed, 11 deletions(-) diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index aac4a22dc..08d53bc37 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -601,8 +601,6 @@ class IndexStmt : public util::IntrusivePtr { /// variable, a \textit{tail strategy} is employed such as emitting a variable /// sized loop that handles remaining iterations. /// Preconditions: splitFactor is a positive nonzero integer - IndexStmt splitUpDown(IndexVar i, IndexVar i1, IndexVar i2, bool split_up, size_t splitFactor) const; - IndexStmt split(IndexVar i, IndexVar i1, IndexVar i2, size_t splitFactor) const; // TODO: TailStrategy /// The divide transformation splits one index variable into diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index ef76c9682..975246801 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1816,15 +1816,6 @@ IndexStmt IndexStmt::concretize() const { return stmt; } -IndexStmt IndexStmt::splitUpDown(IndexVar i, IndexVar i1, IndexVar i2, bool split_up, size_t splitFactor) const { - - if (split_up){ - return split(i, i1, i2, splitFactor); - } - return divide(i, i1, i2, splitFactor); - -} - IndexStmt IndexStmt::split(IndexVar i, IndexVar i1, IndexVar i2, size_t splitFactor) const { IndexVarRel rel = IndexVarRel(new SplitRelNode(i, i1, i2, splitFactor)); string reason; From af4ff8fc91c85e40d2edabf1835195b68fd4af4f Mon Sep 17 00:00:00 2001 From: Manya-bansal Date: Mon, 6 Jun 2022 14:37:49 -0700 Subject: [PATCH 23/23] change bound abstraction from indexVar to indexStmt --- include/taco/index_notation/index_notation.h | 26 +---- .../index_notation/index_notation_nodes.h | 4 +- .../taco/index_notation/provenance_graph.h | 15 ++- include/taco/index_notation/transformations.h | 24 +++++ src/index_notation/index_notation.cpp | 63 +++-------- src/index_notation/index_notation_printer.cpp | 6 ++ .../index_notation_rewriter.cpp | 2 +- src/index_notation/provenance_graph.cpp | 101 +++++++----------- src/index_notation/transformations.cpp | 65 ++++++++++- test/tests-bound.cpp | 20 +--- test/tests-index_notation.cpp | 6 +- test/tests-scheduling-eval.cpp | 18 ++-- 12 files changed, 177 insertions(+), 173 deletions(-) diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index 08d53bc37..e2fc01a95 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -737,10 +737,6 @@ class IndexStmt : public util::IntrusivePtr { /// The precondition for bound is that the computation bounds supplied are /// correct given the inputs that this code will be run on. // IndexStmt bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const; - - IndexStmt bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const; - - IndexStmt bound(IndexVar i, size_t bound, BoundType bound_type) const; /// The unroll primitive unrolls the corresponding loop by a statically-known @@ -1036,19 +1032,7 @@ class IndexVar : public IndexExpr, public IndexVarInterface { /// Returns the name of the index variable. std::string getName() const; - size_t& getBound() const; - - const BoundType& getBoundType() const; - - void setBoundType(BoundType boundType); - - void setBound(size_t bound); - - void bound(size_t bound, BoundType boundType); - - bool isBound(); - - // Need these to overshadow the comparisons in for the IndexExpr instrusive pointer + /// Need these to overshadow the comparisons in for the IndexExpr instrusive pointer friend bool operator==(const IndexVar&, const IndexVar&); friend bool operator<(const IndexVar&, const IndexVar&); friend bool operator!=(const IndexVar&, const IndexVar&); @@ -1072,9 +1056,6 @@ class IndexVar : public IndexExpr, public IndexVarInterface { struct IndexVar::Content { std::string name; - size_t bound; - taco::BoundType boundType; - bool isbound; }; struct WindowedIndexVar::Content { @@ -1100,16 +1081,17 @@ class SuchThat : public IndexStmt { public: SuchThat() = default; SuchThat(const SuchThatNode*); - SuchThat(IndexStmt stmt, std::vector predicate); + SuchThat(IndexStmt stmt, std::vector predicate, std::map> boundsMap); IndexStmt getStmt() const; std::vector getPredicate() const; + std::map> getBounds() const; typedef SuchThatNode Node; }; /// Create a suchthat index statement. -SuchThat suchthat(IndexStmt stmt, std::vector predicate); +SuchThat suchthat(IndexStmt stmt, std::vector predicate, std::map> boundsMap); /// A tensor variable in an index expression, which can either be an operand /// or the result of the expression. diff --git a/include/taco/index_notation/index_notation_nodes.h b/include/taco/index_notation/index_notation_nodes.h index 5289ff069..b41be4ce8 100644 --- a/include/taco/index_notation/index_notation_nodes.h +++ b/include/taco/index_notation/index_notation_nodes.h @@ -436,7 +436,8 @@ struct MultiNode : public IndexStmtNode { }; struct SuchThatNode : public IndexStmtNode { - SuchThatNode(IndexStmt stmt, std::vector predicate) : stmt(stmt), predicate(predicate) {} + SuchThatNode(IndexStmt stmt, std::vector predicate, std::map> boundsMap) : \ + stmt(stmt), predicate(predicate), boundsMap(boundsMap) {} void accept(IndexStmtVisitorStrict* v) const { v->visit(this); @@ -444,6 +445,7 @@ struct SuchThatNode : public IndexStmtNode { IndexStmt stmt; std::vector predicate; + std::map> boundsMap; }; struct SequenceNode : public IndexStmtNode { diff --git a/include/taco/index_notation/provenance_graph.h b/include/taco/index_notation/provenance_graph.h index 24ba626a5..92aa7f6d8 100644 --- a/include/taco/index_notation/provenance_graph.h +++ b/include/taco/index_notation/provenance_graph.h @@ -355,12 +355,17 @@ class ProvenanceGraph { /// does the index variable have a descendant in position space bool hasPosDescendant(IndexVar indexVar) const; - /// does the index variable have an exact bound known at compile-time - bool hasExactBound(IndexVar indexVar) const; + // /does the index variable have a bound + bool hasBound(IndexVar indexVar) const; + + /// get the indexVar's bound + size_t getBound(IndexVar indexVar) const; - void printGraphParent(void) const; + /// get the indexVar's boundType + taco::BoundType getBoundType(IndexVar indexVar) const; - void printGraphChild(void) const; + /// does the index variable have an exact bound known at compile-time + bool hasExactBound(IndexVar indexVar) const; /// Once indexVar is defined what new variables become recoverable /// returned in order of recovery (ie if parent being recovered allows its parent to also be recovered then parent comes first) @@ -392,6 +397,8 @@ class ProvenanceGraph { std::map> parentsMap; std::map> childrenMap; + std::map> boundsMap; + std::set nodes; }; diff --git a/include/taco/index_notation/transformations.h b/include/taco/index_notation/transformations.h index f898c92b9..60e5e0341 100644 --- a/include/taco/index_notation/transformations.h +++ b/include/taco/index_notation/transformations.h @@ -19,6 +19,7 @@ class Reorder; class Precompute; class ForAllReplace; class AddSuchThatPredicates; +class AddSuchThatBoundMap; class Parallelize; class TopoReorder; class SetAssembleStrategy; @@ -35,6 +36,7 @@ class Transformation { Transformation(Parallelize); Transformation(TopoReorder); Transformation(AddSuchThatPredicates); + Transformation(AddSuchThatBoundMap); Transformation(SetAssembleStrategy); IndexStmt apply(IndexStmt stmt, std::string *reason = nullptr) const; @@ -159,6 +161,28 @@ class AddSuchThatPredicates : public TransformationInterface { std::ostream& operator<<(std::ostream&, const AddSuchThatPredicates&); +/// Adds a SuchThat node if it does not exist and adds the given BoundsList +class AddSuchThatBoundMap : public TransformationInterface { +public: + AddSuchThatBoundMap(); + + AddSuchThatBoundMap(std::map> boundsMap); + + std::map> getBoundsMap() const; + + IndexStmt apply(IndexStmt stmt, std::string *reason = nullptr) const; + + void print(std::ostream &os) const; + +private: + struct Content; + std::shared_ptr content; +}; + +std::ostream& operator<<(std::ostream&, const AddSuchThatBoundMap&); + + + /// The parallelize optimization tags a Forall as parallelized /// after checking for preconditions class Parallelize : public TransformationInterface { diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index 975246801..852b5e3ee 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -1989,22 +1989,15 @@ IndexStmt IndexStmt::fuse(IndexVar i, IndexVar j, IndexVar f) const { return transformed; } -IndexStmt IndexStmt::bound(IndexVar i, IndexVar i1, size_t bound, BoundType bound_type) const { - - taco_uerror << "Depericated Bound: bounding " << i.getName() << " ignoring " << i1.getName() << endl; - - i.bound(bound, bound_type); - - return *this; - -} - - IndexStmt IndexStmt::bound(IndexVar i, size_t bound, BoundType bound_type) const { - i.bound(bound, bound_type); - return *this; + string reason; + IndexStmt transformed = Transformation(AddSuchThatBoundMap({ { i, std::make_pair(bound, bound_type) } })).apply(*this, &reason); + if (!transformed.defined()) { + taco_uerror << reason; + } + return transformed; } IndexStmt IndexStmt::unroll(IndexVar i, size_t unrollFactor) const { @@ -2307,8 +2300,8 @@ template <> Multi to(IndexStmt s) { SuchThat::SuchThat(const SuchThatNode* n) : IndexStmt(n) { } -SuchThat::SuchThat(IndexStmt stmt, std::vector predicate) - : SuchThat(new SuchThatNode(stmt, predicate)) { +SuchThat::SuchThat(IndexStmt stmt, std::vector predicate, std::map> boundsMap) + : SuchThat(new SuchThatNode(stmt, predicate, boundsMap)) { } IndexStmt SuchThat::getStmt() const { @@ -2319,8 +2312,12 @@ std::vector SuchThat::getPredicate() const { return getNode(*this)->predicate; } -SuchThat suchthat(IndexStmt stmt, std::vector predicate) { - return SuchThat(stmt, predicate); +std::map> SuchThat::getBounds() const { + return getNode(*this)->boundsMap; +} + +SuchThat suchthat(IndexStmt stmt, std::vector predicate, std::map> boundsMap) { + return SuchThat(stmt, predicate, boundsMap); } template <> bool isa(IndexStmt s) { @@ -2339,9 +2336,7 @@ IndexVar::IndexVar(const std::string& name) : IndexVar(name, Datatype::Int32) {} IndexVar::IndexVar(const std::string& name, const Datatype& type) : IndexVar(new IndexVarNode(name, type)) {} -IndexVar::IndexVar(const IndexVarNode* n) : IndexExpr(n), content(new Content) { - content->isbound = false; -} +IndexVar::IndexVar(const IndexVarNode* n) : IndexExpr(n) {} template <> bool isa(IndexExpr e) { return isa(e.ptr); @@ -2356,32 +2351,6 @@ std::string IndexVar::getName() const { return getNode(*this)->getName(); } -size_t& IndexVar::getBound() const { - return content->bound; -} - -const BoundType& IndexVar::getBoundType() const { - return content->boundType; -} - -void IndexVar::setBound(size_t bound){ - content->bound = bound; -} - -void IndexVar::setBoundType(BoundType boundType){ - content->boundType = boundType; -} - -void IndexVar::bound(size_t bound, BoundType boundType){ - content->isbound = true; - setBound(bound); - setBoundType(boundType); -} - -bool IndexVar::isBound(){ - return content->isbound; -} - WindowedIndexVar IndexVar::operator()(int lo, int hi, int stride) { return WindowedIndexVar(*this, lo, hi, stride); } @@ -4007,7 +3976,7 @@ struct Zero : public IndexNotationRewriterStrict { stmt = op; } else { - stmt = new SuchThatNode(body, op->predicate); + stmt = new SuchThatNode(body, op->predicate, op->boundsMap); } } }; diff --git a/src/index_notation/index_notation_printer.cpp b/src/index_notation/index_notation_printer.cpp index eb4e32417..32010fc4f 100644 --- a/src/index_notation/index_notation_printer.cpp +++ b/src/index_notation/index_notation_printer.cpp @@ -269,12 +269,18 @@ void IndexNotationPrinter::visit(const SuchThatNode* op) { os << "suchthat("; op->stmt.accept(this); os << ", "; + + for (auto iter = op->boundsMap.begin(); iter != op->boundsMap.end(); ++iter) { + os << "bound(" << iter->first << ", " << iter->second.first << ", " << BoundType_NAMES[(int) iter->second.second] << ") "; + } + for (auto iter = op->predicate.begin(); iter != op->predicate.end(); ++iter) { os << *iter; if (iter + 1 != op->predicate.end()) { os << " and "; } } + os << ")"; } diff --git a/src/index_notation/index_notation_rewriter.cpp b/src/index_notation/index_notation_rewriter.cpp index 5caa2da4b..2743bc04f 100644 --- a/src/index_notation/index_notation_rewriter.cpp +++ b/src/index_notation/index_notation_rewriter.cpp @@ -239,7 +239,7 @@ void IndexNotationRewriter::visit(const SuchThatNode* op) { stmt = op; } else { - stmt = new SuchThatNode(s, op->predicate); + stmt = new SuchThatNode(s, op->predicate, op->boundsMap); } } diff --git a/src/index_notation/provenance_graph.cpp b/src/index_notation/provenance_graph.cpp index ec5f8f7a9..d7e0367b4 100644 --- a/src/index_notation/provenance_graph.cpp +++ b/src/index_notation/provenance_graph.cpp @@ -200,7 +200,7 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, std::map> parentCoordBounds, std::map variableNames, Iterators iterators, ProvenanceGraph provGraph) const { - + taco_iassert(indexVar == getOuterVar() || indexVar == getInnerVar()); taco_iassert(parentIterBounds.size() == 1); taco_iassert(parentIterBounds.count(getParentVar()) == 1); @@ -213,15 +213,9 @@ std::vector SplitRelNode::deriveIterBounds(taco::IndexVar indexVar, if (indexVar == getOuterVar()) { ir::Expr minBound = ir::Div::make(parentBound[0], ir::Literal::make(getSplitFactor(), splitFactorType)); ir::Expr maxBound = ir::Div::make(ir::Add::make(parentBound[1], ir::Literal::make(getSplitFactor()-1, splitFactorType)), ir::Literal::make(getSplitFactor(), splitFactorType)); - if ( ir::isa(ir::simplify(maxBound)) && indexVar.isBound() && !ir::to(ir::simplify(maxBound))->equalsScalar(indexVar.getBound()) ){ - taco_uerror << "Bounded a split outer variable with bound: " << indexVar.getBound() << " real bound: " << maxBound << endl; - } return {minBound, maxBound}; } else if (indexVar == getInnerVar()) { - if (indexVar.isBound() && (indexVar.getBound() != getSplitFactor())){ - taco_uerror << "Bounded a split inner variable with bound: " << indexVar.getBound() << " real bound: " << getSplitFactor() << endl; - } ir::Expr minBound = 0; ir::Expr maxBound = ir::Literal::make(getSplitFactor(), splitFactorType); return {minBound, maxBound}; @@ -369,16 +363,10 @@ std::vector DivideRelNode::deriveIterBounds(taco::IndexVar indexVar, // ranges from 0 to divFactor. ir::Expr minBound = 0; ir::Expr maxBound = divFactor; - if (ir::isa(ir::simplify(maxBound)) && indexVar.isBound() && !ir::to(ir::simplify(maxBound))->equalsScalar(indexVar.getBound()) ){ - taco_uerror << "Bounded a DIVIDE outer variable with bound: " << indexVar.getBound() << " real bound: " << maxBound << endl; - } return {minBound, maxBound}; } else if (indexVar == getInnerVar()) { - if (indexVar.isBound() && (indexVar.getBound() != getDivFactor())){ - taco_uerror << "Bounded a divide inner variable with bound: " << indexVar.getBound() << " real bound: " << getDivFactor() << endl; - } ir::Expr minBound = ir::Div::make(parentBound[0], divFactor); ir::Expr maxBound = ir::Div::make(ir::Add::make(parentBound[1], ir::Literal::make(getDivFactor()-1, divFactorType)), divFactor); return {minBound, maxBound}; @@ -475,21 +463,6 @@ std::vector PosRelNode::deriveIterBounds(taco::IndexVar indexVar, ProvenanceGraph provGraph) const { - if (indexVar.isBound()){ - - taco_iassert(parentCoordBounds.count(getParentVar()) == 1); - std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); - - if (indexVar.getBoundType() == BoundType::MaxExact) { - return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; - } - else { - taco_not_supported_yet; - } - return {}; - - } - taco_iassert(indexVar == getPosVar()); taco_iassert(parentCoordBounds.count(getParentVar()) == 1); std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); @@ -699,13 +672,6 @@ std::vector FuseRelNode::deriveIterBounds(taco::IndexVar indexVar, taco_iassert(parentIterBounds.count(getOuterParentVar()) && parentIterBounds.count(getInnerParentVar())); std::vector parentCoordBound = combineParentBounds(parentIterBounds[getOuterParentVar()], parentIterBounds[getInnerParentVar()]); - if (indexVar.isBound()){ - //check if max bound matches - if (ir::isa(ir::simplify(parentCoordBound[1])) && !ir::to(ir::simplify(parentCoordBound[1]))->equalsScalar(indexVar.getBound())){ - taco_uerror << "Bounded a fuse index variable with bound: " << indexVar.getBound() << " real bound: " << parentCoordBound[1] << endl; - } - - } return parentCoordBound; } @@ -819,21 +785,6 @@ std::vector PrecomputeRelNode::deriveIterBounds(taco::IndexVar indexVa taco_iassert(indexVar == getPrecomputeVar()); taco_iassert(parentIterBounds.count(getParentVar()) == 1); - if (indexVar.isBound()){ - - taco_iassert(parentCoordBounds.count(getParentVar()) == 1); - std::vector parentCoordBound = parentCoordBounds.at(getParentVar()); - - if (indexVar.getBoundType() == BoundType::MaxExact) { - return {parentCoordBound[0], ir::Literal::make(indexVar.getBound(), parentCoordBound[1].type())}; - } - else { - taco_not_supported_yet; - } - return {}; - - } - std::vector parentIterBound = parentIterBounds.at(getParentVar()); return parentIterBound; } @@ -879,6 +830,7 @@ ProvenanceGraph::ProvenanceGraph(IndexStmt concreteStmt) { SuchThat suchThat = to(concreteStmt); vector relations = suchThat.getPredicate(); + boundsMap = suchThat.getBounds(); for (IndexVarRel rel : relations) { std::vector parents = rel.getNode()->getParents(); @@ -1187,28 +1139,22 @@ std::vector ProvenanceGraph::deriveIterBounds(IndexVar indexVar, std:: // for split: outer: Div(expr, splitfactor), Div(expr, splitfactor), inner: 0, splitfactor // what about for reordered split: same loop bounds just reordered loops (this might change for different tail strategies) - // cout << "in derive iter bounds prov graph" << endl; - // cout << "INDEX VAR:" << indexVar << endl; if (isUnderived(indexVar)) { - // cout << "underived" << endl; taco_iassert(underivedBounds.count(indexVar) == 1); - - // for (size_t i = 0; i< underivedBounds[indexVar].size(); i++){ - // cout << underivedBounds[indexVar][i] << endl; - // } return underivedBounds[indexVar]; } + std::map> parentIterBounds; + std::map> parentCoordBounds; + std::vector derivedVarOrderExceptLast = derivedVarOrder; if (!derivedVarOrderExceptLast.empty()) { derivedVarOrderExceptLast.pop_back(); } taco_iassert(std::find(derivedVarOrderExceptLast.begin(), derivedVarOrderExceptLast.end(), indexVar) == derivedVarOrderExceptLast.end()); - std::map> parentIterBounds; - std::map> parentCoordBounds; + for (const IndexVar& parent : getParents(indexVar)) { - // cout << "in the for loop" << endl; parentIterBounds[parent] = deriveIterBounds(parent, derivedVarOrder, underivedBounds, variableNames, iterators); vector underivedParentAncestors = getUnderivedAncestors(parent); // TODO: this is okay for now because we don't need parentCoordBounds for fused taco_iassert(underivedParentAncestors.size() == 1); @@ -1217,8 +1163,23 @@ std::vector ProvenanceGraph::deriveIterBounds(IndexVar indexVar, std:: } IndexVarRel rel = parentRelMap.at(indexVar); - - return rel.getNode()->deriveIterBounds(indexVar, parentIterBounds, parentCoordBounds, variableNames, iterators, *this); + std::vector derivedBounds = rel.getNode()->deriveIterBounds(indexVar, parentIterBounds, parentCoordBounds, variableNames, iterators, *this); + + if (hasBound(indexVar)) { + taco_iassert(variableNames.count(indexVar) == 1); + if (getBoundType(indexVar) == BoundType::MaxExact) { + ir::Expr maxBound = derivedBounds[1]; + if ( ir::isa(ir::simplify(maxBound)) && hasBound(indexVar) && !ir::to(ir::simplify(maxBound))->equalsScalar(getBound(indexVar)) ){ + taco_uerror << "Bounded a variable with bound " << getBound(indexVar) << ", while derived bound is " << maxBound << endl; + } + } + else { + taco_not_supported_yet; + } + } + + return derivedBounds; + } bool ProvenanceGraph::hasCoordBounds(IndexVar indexVar) const { @@ -1274,11 +1235,23 @@ bool ProvenanceGraph::isCoordVariable(taco::IndexVar indexVar) const { return !isPosVariable(indexVar); } +bool ProvenanceGraph::hasBound(IndexVar indexVar) const { + return boundsMap.count(indexVar); +} + +size_t ProvenanceGraph::getBound(IndexVar indexVar) const { + return boundsMap.at(indexVar).first; +} + +taco::BoundType ProvenanceGraph::getBoundType(IndexVar indexVar) const { + return boundsMap.at(indexVar).second; +} + bool ProvenanceGraph::hasExactBound(IndexVar indexVar) const { - if(indexVar.isBound()) + if(hasBound(indexVar)) { - return indexVar.getBoundType() == BoundType::MaxExact; + return getBoundType(indexVar) == BoundType::MaxExact; } // TODO: include non-irregular variables return false; diff --git a/src/index_notation/transformations.cpp b/src/index_notation/transformations.cpp index a2c4dcef4..9643789e6 100644 --- a/src/index_notation/transformations.cpp +++ b/src/index_notation/transformations.cpp @@ -38,8 +38,12 @@ Transformation::Transformation(Parallelize parallelize) : transformation(new Parallelize(parallelize)) { } -Transformation::Transformation(AddSuchThatPredicates addsuchthatpredicates) - : transformation(new AddSuchThatPredicates(addsuchthatpredicates)) { +Transformation::Transformation(AddSuchThatPredicates addSuchThatBoundMap) + : transformation(new AddSuchThatPredicates(addSuchThatBoundMap)) { +} + +Transformation::Transformation(AddSuchThatBoundMap addsuchthatpredicates) + : transformation(new AddSuchThatBoundMap(addsuchthatpredicates)) { } IndexStmt Transformation::apply(IndexStmt stmt, string* reason) const { @@ -620,10 +624,10 @@ IndexStmt AddSuchThatPredicates::apply(IndexStmt stmt, string* reason) const { vector predicate = suchThat.getPredicate(); vector predicates = getPredicates(); predicate.insert(predicate.end(), predicates.begin(), predicates.end()); - return SuchThat(suchThat.getStmt(), predicate); + return SuchThat(suchThat.getStmt(), predicate, suchThat.getBounds()); } else{ - return SuchThat(stmt, content->predicates); + return SuchThat(stmt, content->predicates, {}); } } @@ -636,6 +640,59 @@ std::ostream& operator<<(std::ostream& os, const AddSuchThatPredicates& addSuchT return os; } + +// class AddSuchThatBoundMap +struct AddSuchThatBoundMap::Content { + std::map> addSuchThatBoundMap; +}; + +AddSuchThatBoundMap::AddSuchThatBoundMap() : content(nullptr) { +} + +AddSuchThatBoundMap::AddSuchThatBoundMap(std::map> addSuchThatBoundMap) : content(new Content) { + // taco_iassert(!predicates.empty()); + content->addSuchThatBoundMap = addSuchThatBoundMap; +} + +std::map> AddSuchThatBoundMap::getBoundsMap() const { + return content->addSuchThatBoundMap; +} + +IndexStmt AddSuchThatBoundMap::apply(IndexStmt stmt, string* reason) const { + INIT_REASON(reason); + + string r; + if (!isConcreteNotation(stmt, &r)) { + *reason = "The index statement is not valid concrete index notation: " + r; + return IndexStmt(); + } + + if (isa(stmt)) { + SuchThat suchThat = to(stmt); + std::map> boundMap = suchThat.getBounds(); + std::map> bounds = getBoundsMap(); + bounds.insert(boundMap.begin(), boundMap.end()); + return SuchThat(suchThat.getStmt(), suchThat.getPredicate(), bounds); + } + else{ + return SuchThat(stmt, {}, getBoundsMap()); + } +} + +void AddSuchThatBoundMap::print(std::ostream& os) const { + os << "boundsMap(" ; + for (auto const& bound: getBoundsMap()){ + os << bound.first << endl; + } + os << endl; +} + +std::ostream& operator<<(std::ostream& os, const AddSuchThatBoundMap& addSuchThatBoundMap) { + addSuchThatBoundMap.print(os); + return os; +} + + struct ReplaceReductionExpr : public IndexNotationRewriter { const std::map& substitutions; ReplaceReductionExpr(const std::map& substitutions) diff --git a/test/tests-bound.cpp b/test/tests-bound.cpp index 718d4b8f4..86309b011 100644 --- a/test/tests-bound.cpp +++ b/test/tests-bound.cpp @@ -168,7 +168,7 @@ TEST(bound, bound_normal_2) { stmt = stmt.bound(i, 176, BoundType::MaxExact) .split(i, i0, i1, 4) .precompute(precomputedExpr, i1, i1, precomputed); - + A.compile(stmt.concretize()); A.assemble(); A.compute(); @@ -183,22 +183,6 @@ TEST(bound, bound_normal_2) { } -TEST(bound, bound_throw_assert) { - Tensor A("A", {176}, Format{Dense}); - - for (int i = 0; i < 3; i++) { - A.insert({i}, (double) i); - } - A.pack(); - - IndexVar i0("i0"), i1("i1"); - - IndexStmt stmt = A.getAssignment().concretize(); - - ASSERT_THROW(stmt.bound(i0, i1, 4, BoundType::MaxExact), taco::TacoException); - -} - TEST(bound, split_bound_illegal) { Tensor A("A", {176}, Format{Dense}); Tensor B("B", {176}, Format{Dense}); @@ -223,7 +207,7 @@ TEST(bound, split_bound_illegal) { stmt = stmt.bound(i, 17, BoundType::MaxExact) .split(i, i0, i1, 4) .bound(i1, 2, BoundType::MaxExact); - + ASSERT_THROW(A.compile(stmt.concretize()), taco::TacoException); } \ No newline at end of file diff --git a/test/tests-index_notation.cpp b/test/tests-index_notation.cpp index 8090e71de..316bbf67c 100644 --- a/test/tests-index_notation.cpp +++ b/test/tests-index_notation.cpp @@ -99,9 +99,9 @@ TEST(notation, isConcreteNotation) { forall(j, a(i) += sum(j, B(i,j) * c(j)))))); - ASSERT_TRUE(isConcreteNotation(suchthat(as = bs + cs, {}))); - ASSERT_FALSE(isConcreteNotation(suchthat(suchthat(as = bs + cs, {}), {}))); - ASSERT_FALSE(isConcreteNotation(forall(i, suchthat(a(i) = b(i) + c(i), {})))); + ASSERT_TRUE(isConcreteNotation(suchthat(as = bs + cs, {}, {}))); + ASSERT_FALSE(isConcreteNotation(suchthat(suchthat(as = bs + cs, {}, {}), {}, {}))); + ASSERT_FALSE(isConcreteNotation(forall(i, suchthat(a(i) = b(i) + c(i), {}, {})))); } TEST(notation, makeReductionNotation) { diff --git a/test/tests-scheduling-eval.cpp b/test/tests-scheduling-eval.cpp index 64da1e9ab..62b0f5db7 100644 --- a/test/tests-scheduling-eval.cpp +++ b/test/tests-scheduling-eval.cpp @@ -257,10 +257,10 @@ IndexStmt scheduleSpMMGPU(IndexStmt stmt, Tensor A, IndexExpr precompute .pos(f, fpos, A(i, j)) .split(fpos, block, fpos1, NNZ_PER_TB) .split(fpos1, warp, nnz, NNZ_PER_WARP) - .split(k, dense_val_unbounded, thread, WARP_SIZE) - .reorder({block, warp, thread, dense_val_unbounded, nnz}) + .split(k, dense_val, thread, WARP_SIZE) + .reorder({block, warp, thread, dense_val, nnz}) //.precompute(precomputedExpr, nnz, nnz, precomputed) - .bound(dense_val_unbounded, dense_val, 4, BoundType::MaxExact) + .bound(dense_val, 4, BoundType::MaxExact) //.unroll(dense_val, 4) .parallelize(block, ParallelUnit::GPUBlock, OutputRaceStrategy::IgnoreRaces) .parallelize(warp, ParallelUnit::GPUWarp, OutputRaceStrategy::IgnoreRaces) @@ -278,8 +278,8 @@ IndexStmt scheduleSDDMMGPU(IndexStmt stmt, Tensor B, int NNZ_PER_WARP=8* .pos(f, fpos, B(i,k)) .split(fpos, block, fpos1, NNZ_PER_TB) .split(fpos1, warp, nnz, NNZ_PER_WARP) - .split(j, dense_val_unbounded, thread, WARP_SIZE) - .bound(dense_val_unbounded, dense_val, CO_FACTOR, BoundType::MaxExact) + .split(j, dense_val, thread, WARP_SIZE) + .bound(dense_val, CO_FACTOR, BoundType::MaxExact) .reorder({block, warp, nnz, thread, dense_val}) .unroll(dense_val, CO_FACTOR) .parallelize(block, ParallelUnit::GPUBlock, OutputRaceStrategy::IgnoreRaces) @@ -313,8 +313,8 @@ IndexStmt scheduleTTMGPU(IndexStmt stmt, Tensor B, int NNZ_PER_WARP=8*32 .pos(f, fpos, B(i, j, k)) .split(fpos, block, fpos1, NNZ_PER_TB) .split(fpos1, warp, nnz, NNZ_PER_WARP) - .split(l, dense_val_unbounded, thread, WARP_SIZE) - .bound(dense_val_unbounded, dense_val, CO_FACTOR, BoundType::MaxExact) + .split(l, dense_val, thread, WARP_SIZE) + .bound(dense_val, CO_FACTOR, BoundType::MaxExact) .reorder({block, warp, nnz, thread, dense_val}) .unroll(dense_val, CO_FACTOR) .parallelize(block, ParallelUnit::GPUBlock, OutputRaceStrategy::IgnoreRaces) @@ -350,8 +350,8 @@ IndexStmt scheduleMTTKRPGPU(IndexStmt stmt, Tensor B, int NNZ_PER_WARP=1 .pos(f, fpos, B(i, k, l)) .split(fpos, block, fpos1, NNZ_PER_TB) .split(fpos1, warp, nnz, NNZ_PER_WARP) - .split(j, dense_val_unbounded, thread, WARP_SIZE) - .bound(dense_val_unbounded, dense_val, 1, BoundType::MaxExact) + .split(j, dense_val, thread, WARP_SIZE) + .bound(dense_val, 1, BoundType::MaxExact) .reorder({block, warp, dense_val, thread, nnz}) .parallelize(block, ParallelUnit::GPUBlock, OutputRaceStrategy::IgnoreRaces) .parallelize(warp, ParallelUnit::GPUWarp, OutputRaceStrategy::IgnoreRaces)