From 735e7a6f28cec763f446fee83f4f14671a38ec9e Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 17 Jul 2024 22:23:46 +0000 Subject: [PATCH 01/56] adjust stride ordering rules for standard shape: stride can be anything in a dimension of size 1 --- src/shape.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/shape.cpp b/src/shape.cpp index f9a42361465..9b48a631b73 100644 --- a/src/shape.cpp +++ b/src/shape.cpp @@ -63,8 +63,14 @@ struct shape_impl { assert(t != shape::tuple_type); assert(m_lens.size() == m_strides.size()); + + std::vector filtered_strides; + for(size_t ind = 0; ind < m_strides.size(); ind++) + if(m_lens[ind] != 1) + filtered_strides.push_back(m_strides[ind]); + m_standard = this->elements() == this->element_space() and not skips() and - std::is_sorted(m_strides.rbegin(), m_strides.rend()); + std::is_sorted(filtered_strides.rbegin(), filtered_strides.rend()); } shape_impl(shape::type_t t, std::vector dims) From 92bac55267b171ba35d12505ccf0944ecfaee5e4 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 17 Jul 2024 23:42:34 +0000 Subject: [PATCH 02/56] add a shape test --- src/shape.cpp | 5 ++++- test/shape_test.cpp | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/shape.cpp b/src/shape.cpp index 9b48a631b73..59355d36874 100644 --- a/src/shape.cpp +++ b/src/shape.cpp @@ -65,10 +65,13 @@ struct shape_impl assert(m_lens.size() == m_strides.size()); std::vector filtered_strides; + std::vector ffss; for(size_t ind = 0; ind < m_strides.size(); ind++) if(m_lens[ind] != 1) filtered_strides.push_back(m_strides[ind]); - +auto asdf = std::is_sorted(filtered_strides.begin(), filtered_strides.end()); +auto asdf2 = std::is_sorted(filtered_strides.rbegin(), filtered_strides.rend()); +auto asdf3 = skips(); m_standard = this->elements() == this->element_space() and not skips() and std::is_sorted(filtered_strides.rbegin(), filtered_strides.rend()); } diff --git a/test/shape_test.cpp b/test/shape_test.cpp index 22ac7f54c0d..e1b683def27 100644 --- a/test/shape_test.cpp +++ b/test/shape_test.cpp @@ -86,6 +86,17 @@ TEST_CASE(test_shape_standard_singleton_dim) EXPECT(not s.broadcasted()); } +TEST_CASE(test_shape_standard_stray_singleton_dim) +{ + // A shape can be transposed (nonzero strides out of order) but still be considered + // standard if the only out-of-order strides are on axes with a length of 1. + migraphx::shape s{migraphx::shape::float_type, {5, 1, 1, 8}, {8, 3, 4, 1}}; + EXPECT(s.standard()); + EXPECT(s.packed()); + EXPECT(s.transposed()); + EXPECT(not s.broadcasted()); +} + TEST_CASE(test_shape_min_max_opt) { migraphx::shape s{migraphx::shape::float_type, {2, 2, 3}, {6, 3, 1}}; From 090c767a04d7e8c42a24da96995711a25f201de9 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 17 Jul 2024 23:54:16 +0000 Subject: [PATCH 03/56] debug code removed --- src/shape.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/shape.cpp b/src/shape.cpp index 59355d36874..463d12d7d3c 100644 --- a/src/shape.cpp +++ b/src/shape.cpp @@ -69,9 +69,6 @@ struct shape_impl for(size_t ind = 0; ind < m_strides.size(); ind++) if(m_lens[ind] != 1) filtered_strides.push_back(m_strides[ind]); -auto asdf = std::is_sorted(filtered_strides.begin(), filtered_strides.end()); -auto asdf2 = std::is_sorted(filtered_strides.rbegin(), filtered_strides.rend()); -auto asdf3 = skips(); m_standard = this->elements() == this->element_space() and not skips() and std::is_sorted(filtered_strides.rbegin(), filtered_strides.rend()); } From 9e8528ff87f1c9664d16e0b811d607fb485600a6 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 18 Jul 2024 15:01:54 +0000 Subject: [PATCH 04/56] fix a test --- src/shape.cpp | 2 +- test/shape_test.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shape.cpp b/src/shape.cpp index 463d12d7d3c..7e4e1ef4924 100644 --- a/src/shape.cpp +++ b/src/shape.cpp @@ -64,8 +64,8 @@ struct shape_impl assert(t != shape::tuple_type); assert(m_lens.size() == m_strides.size()); + // Calculate standard shape flag for these lens/strides std::vector filtered_strides; - std::vector ffss; for(size_t ind = 0; ind < m_strides.size(); ind++) if(m_lens[ind] != 1) filtered_strides.push_back(m_strides[ind]); diff --git a/test/shape_test.cpp b/test/shape_test.cpp index e1b683def27..56425c52c61 100644 --- a/test/shape_test.cpp +++ b/test/shape_test.cpp @@ -537,7 +537,7 @@ TEST_CASE(test_shape_broadcasted) TEST_CASE(test_shape_broadcasted2) { migraphx::shape s{migraphx::shape::float_type, {1, 2}, {0, 1}}; - EXPECT(not s.standard()); + EXPECT(s.standard()); EXPECT(s.packed()); EXPECT(not s.transposed()); EXPECT(s.broadcasted()); From 22cc5ffcd6922374bb58f048fb33df0090414e3f Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Tue, 30 Jul 2024 21:48:31 +0000 Subject: [PATCH 05/56] added shape::compatible_lens() method --- src/include/migraphx/shape.hpp | 5 +++++ src/program.cpp | 2 +- src/shape.cpp | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/include/migraphx/shape.hpp b/src/include/migraphx/shape.hpp index 0c1e7b269d4..822d0f33d94 100644 --- a/src/include/migraphx/shape.hpp +++ b/src/include/migraphx/shape.hpp @@ -296,6 +296,11 @@ struct MIGRAPHX_EXPORT shape /// not transposed. bool standard() const; + /// Returns true if the shapes are compatible. TODO: better description + // Paul, How would you describe the purpose of that equality check for the shapes? I'm trying to come up with a + // function description that explains why it's ok for the strides not to match sometimes. + bool compatible_lens(const shape& s2) const; + /// Returns true if all strides are equal to 0 (scalar tensor) bool scalar() const; diff --git a/src/program.cpp b/src/program.cpp index 2e34cd51505..a00f600d5dc 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -508,7 +508,7 @@ std::vector generic_eval(const module* mod, } assert(results.find(ins) != results.end()); assert(ins->get_shape().any_of_dynamic() or - results.at(ins).get_shape() == ins->get_shape()); + results.at(ins).get_shape().compatible_lens(ins->get_shape())); } return {results.at(std::prev(mod->end()))}; } diff --git a/src/shape.cpp b/src/shape.cpp index 7e4e1ef4924..5cf19831d10 100644 --- a/src/shape.cpp +++ b/src/shape.cpp @@ -490,6 +490,26 @@ bool shape::scalar() const bool shape::standard() const { return impl->m_standard; } + +bool shape::compatible_lens(const shape& s2) const +{ + if(dynamic() or s2.dynamic()) return true; + if(lens() != s2.lens() or type() != s2.type()) return false; + + // Lens must be the same; strides must be same except that + // axes with len=1 don't matter + for(size_t ind = 0; ind < lens().size(); ind++) + { + size_t l_ind(lens()[ind]); + if(l_ind != s2.lens()[ind] or + (l_ind != 1 and strides()[ind] != s2.strides()[ind])) + return false; + } + return true; + // TODO: Do these checks matter here? + // m_standard = this->elements() == this->element_space() and not skips() and +} + shape shape::normalize_standard() const { if(this->standard()) From c5508e6abbb2769a6657877c7829714c2e9af088 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Tue, 30 Jul 2024 22:03:46 +0000 Subject: [PATCH 06/56] format --- src/include/migraphx/shape.hpp | 5 +++-- src/shape.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/include/migraphx/shape.hpp b/src/include/migraphx/shape.hpp index 822d0f33d94..4d0bc406d43 100644 --- a/src/include/migraphx/shape.hpp +++ b/src/include/migraphx/shape.hpp @@ -297,8 +297,9 @@ struct MIGRAPHX_EXPORT shape bool standard() const; /// Returns true if the shapes are compatible. TODO: better description - // Paul, How would you describe the purpose of that equality check for the shapes? I'm trying to come up with a - // function description that explains why it's ok for the strides not to match sometimes. + // Paul, How would you describe the purpose of that equality check for the shapes? I'm trying + // to come up with a function description that explains why it's ok for the strides not to match + // sometimes. bool compatible_lens(const shape& s2) const; /// Returns true if all strides are equal to 0 (scalar tensor) diff --git a/src/shape.cpp b/src/shape.cpp index 5cf19831d10..7f0f4aa9385 100644 --- a/src/shape.cpp +++ b/src/shape.cpp @@ -490,19 +490,19 @@ bool shape::scalar() const bool shape::standard() const { return impl->m_standard; } - bool shape::compatible_lens(const shape& s2) const { - if(dynamic() or s2.dynamic()) return true; - if(lens() != s2.lens() or type() != s2.type()) return false; - + if(dynamic() or s2.dynamic()) + return true; + if(lens() != s2.lens() or type() != s2.type()) + return false; + // Lens must be the same; strides must be same except that // axes with len=1 don't matter for(size_t ind = 0; ind < lens().size(); ind++) { size_t l_ind(lens()[ind]); - if(l_ind != s2.lens()[ind] or - (l_ind != 1 and strides()[ind] != s2.strides()[ind])) + if(l_ind != s2.lens()[ind] or (l_ind != 1 and strides()[ind] != s2.strides()[ind])) return false; } return true; From 21dddd0e47363eccb2807eee78ca5729d263630d Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 31 Jul 2024 18:24:54 +0000 Subject: [PATCH 07/56] refactor the function for testing compatible shapes to a non-member static. --- src/include/migraphx/shape.hpp | 6 ------ src/program.cpp | 23 ++++++++++++++++++++++- src/shape.cpp | 20 -------------------- 3 files changed, 22 insertions(+), 27 deletions(-) diff --git a/src/include/migraphx/shape.hpp b/src/include/migraphx/shape.hpp index 4d0bc406d43..0c1e7b269d4 100644 --- a/src/include/migraphx/shape.hpp +++ b/src/include/migraphx/shape.hpp @@ -296,12 +296,6 @@ struct MIGRAPHX_EXPORT shape /// not transposed. bool standard() const; - /// Returns true if the shapes are compatible. TODO: better description - // Paul, How would you describe the purpose of that equality check for the shapes? I'm trying - // to come up with a function description that explains why it's ok for the strides not to match - // sometimes. - bool compatible_lens(const shape& s2) const; - /// Returns true if all strides are equal to 0 (scalar tensor) bool scalar() const; diff --git a/src/program.cpp b/src/program.cpp index a00f600d5dc..ced6e43df68 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -426,6 +426,25 @@ void preview_argument(std::ostream& os, const argument& a) }); } +static bool is_compatible_shape(const shape& actual, const shape& expected) +{ + // Check subshapes + if(expected.type() == shape::tuple_type) + return equal(actual.sub_shapes(), expected.sub_shapes(), &is_compatible_shape); + // Only the expected can be dynamic + if(expected.dynamic()) + return true; + if(actual == expected) + return true; + if(actual.type() != expected.type()) + return false; + // If both shapes are standard and lens match, they are considered compatible + // even if strides are different. + if(actual.standard() and expected.standard()) + return actual.lens() == expected.lens(); + return false; +} + template std::vector generic_eval(const module* mod, std::vector& ctx, @@ -507,8 +526,10 @@ std::vector generic_eval(const module* mod, })); } assert(results.find(ins) != results.end()); + // TODO: what order do the arguments to is_compatible_shape() come in? One + // can be dynamic. assert(ins->get_shape().any_of_dynamic() or - results.at(ins).get_shape().compatible_lens(ins->get_shape())); + is_compatible_shape(ins->get_shape(), results.at(ins).get_shape())); } return {results.at(std::prev(mod->end()))}; } diff --git a/src/shape.cpp b/src/shape.cpp index 7f0f4aa9385..7e4e1ef4924 100644 --- a/src/shape.cpp +++ b/src/shape.cpp @@ -490,26 +490,6 @@ bool shape::scalar() const bool shape::standard() const { return impl->m_standard; } -bool shape::compatible_lens(const shape& s2) const -{ - if(dynamic() or s2.dynamic()) - return true; - if(lens() != s2.lens() or type() != s2.type()) - return false; - - // Lens must be the same; strides must be same except that - // axes with len=1 don't matter - for(size_t ind = 0; ind < lens().size(); ind++) - { - size_t l_ind(lens()[ind]); - if(l_ind != s2.lens()[ind] or (l_ind != 1 and strides()[ind] != s2.strides()[ind])) - return false; - } - return true; - // TODO: Do these checks matter here? - // m_standard = this->elements() == this->element_space() and not skips() and -} - shape shape::normalize_standard() const { if(this->standard()) From 31addfc49b141d0569f4772506449961e6be6f2a Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 31 Jul 2024 18:44:18 +0000 Subject: [PATCH 08/56] changing recursive equal call --- src/program.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/program.cpp b/src/program.cpp index ced6e43df68..6392f179280 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -430,7 +430,7 @@ static bool is_compatible_shape(const shape& actual, const shape& expected) { // Check subshapes if(expected.type() == shape::tuple_type) - return equal(actual.sub_shapes(), expected.sub_shapes(), &is_compatible_shape); + return equal(actual.sub_shapes().begin(), actual.sub_shapes().end(), expected.sub_shapes().begin(), &is_compatible_shape); // Only the expected can be dynamic if(expected.dynamic()) return true; From 02633f230d848d7bdbdb2d29c2501897a6c42c45 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 31 Jul 2024 20:43:14 +0000 Subject: [PATCH 09/56] conditional conpilation for is_compatible_shape() --- src/program.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/program.cpp b/src/program.cpp index 6392f179280..7ea5f884e9c 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -426,11 +426,16 @@ void preview_argument(std::ostream& os, const argument& a) }); } +// This function currently used only in an Assertion. +#ifndef NDEBUG static bool is_compatible_shape(const shape& actual, const shape& expected) { // Check subshapes if(expected.type() == shape::tuple_type) - return equal(actual.sub_shapes().begin(), actual.sub_shapes().end(), expected.sub_shapes().begin(), &is_compatible_shape); + return equal(actual.sub_shapes().begin(), + actual.sub_shapes().end(), + expected.sub_shapes().begin(), + &is_compatible_shape); // Only the expected can be dynamic if(expected.dynamic()) return true; @@ -444,6 +449,7 @@ static bool is_compatible_shape(const shape& actual, const shape& expected) return actual.lens() == expected.lens(); return false; } +#endif template std::vector generic_eval(const module* mod, From 68467b633d36adc98a98cd31486f2d9ddcc5a4b8 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 31 Jul 2024 22:18:59 +0000 Subject: [PATCH 10/56] different workaround for compile problem --- src/program.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/program.cpp b/src/program.cpp index 7ea5f884e9c..0c00c8526e5 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -427,7 +427,6 @@ void preview_argument(std::ostream& os, const argument& a) } // This function currently used only in an Assertion. -#ifndef NDEBUG static bool is_compatible_shape(const shape& actual, const shape& expected) { // Check subshapes @@ -449,7 +448,6 @@ static bool is_compatible_shape(const shape& actual, const shape& expected) return actual.lens() == expected.lens(); return false; } -#endif template std::vector generic_eval(const module* mod, @@ -534,6 +532,7 @@ std::vector generic_eval(const module* mod, assert(results.find(ins) != results.end()); // TODO: what order do the arguments to is_compatible_shape() come in? One // can be dynamic. + (void)(is_compatible_shape(shape{}, shape{})); assert(ins->get_shape().any_of_dynamic() or is_compatible_shape(ins->get_shape(), results.at(ins).get_shape())); } From be7a72e67e12d338963ff3cb797ae5e3a8f0e8e7 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 1 Aug 2024 14:54:08 +0000 Subject: [PATCH 11/56] misc small fixes --- src/program.cpp | 10 +++++----- src/shape.cpp | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/program.cpp b/src/program.cpp index 0c00c8526e5..25cb16cc950 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -427,6 +427,9 @@ void preview_argument(std::ostream& os, const argument& a) } // This function currently used only in an Assertion. +// "Almost identical" shapes. To support an MLIR feature, there is a limited +// case where shapes may both be standard but have non-identical strides. +#ifndef NDEBUG static bool is_compatible_shape(const shape& actual, const shape& expected) { // Check subshapes @@ -448,6 +451,7 @@ static bool is_compatible_shape(const shape& actual, const shape& expected) return actual.lens() == expected.lens(); return false; } +#endif template std::vector generic_eval(const module* mod, @@ -530,11 +534,7 @@ std::vector generic_eval(const module* mod, })); } assert(results.find(ins) != results.end()); - // TODO: what order do the arguments to is_compatible_shape() come in? One - // can be dynamic. - (void)(is_compatible_shape(shape{}, shape{})); - assert(ins->get_shape().any_of_dynamic() or - is_compatible_shape(ins->get_shape(), results.at(ins).get_shape())); + assert(is_compatible_shape(results.at(ins).get_shape(), ins->get_shape())); } return {results.at(std::prev(mod->end()))}; } diff --git a/src/shape.cpp b/src/shape.cpp index 7e4e1ef4924..cfa3a1c2b43 100644 --- a/src/shape.cpp +++ b/src/shape.cpp @@ -64,7 +64,8 @@ struct shape_impl assert(t != shape::tuple_type); assert(m_lens.size() == m_strides.size()); - // Calculate standard shape flag for these lens/strides + // Calculate standard shape flag for these lens/strides. Strides on size-1 + // axes are ignored to support an MLIR rule. std::vector filtered_strides; for(size_t ind = 0; ind < m_strides.size(); ind++) if(m_lens[ind] != 1) From e0f169546db636c379baed121c1191baae8f37f8 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 7 Aug 2024 21:06:24 +0000 Subject: [PATCH 12/56] changes to compatible check, want to see if this passes jenkins --- src/include/migraphx/check_shapes.hpp | 27 +++++++++++++++++++++++++- src/include/migraphx/program.hpp | 1 - src/include/migraphx/shape.hpp | 27 ++++++++++++++++++++++++++ src/program.cpp | 28 +-------------------------- 4 files changed, 54 insertions(+), 29 deletions(-) diff --git a/src/include/migraphx/check_shapes.hpp b/src/include/migraphx/check_shapes.hpp index 05118082ee8..a19dbe2e000 100644 --- a/src/include/migraphx/check_shapes.hpp +++ b/src/include/migraphx/check_shapes.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -224,6 +225,16 @@ struct check_shapes return *this; } + /*! + * Check all shapes are compatible. + */ + // const check_shapes& same_compatible() const + // { + // if(not this->same([](const shape& s) { return is_compatible(*this, s); })) + // MIGRAPHX_THROW(prefix() + "Shapes don't match"); + // return *this; + // } + /*! * Check all shapes have the same number of dimensions. */ @@ -239,8 +250,13 @@ struct check_shapes */ const check_shapes& same_layout() const { - if(not this->same([](const shape& s) { return find_permutation(s); })) + + if(not same_compatible()) MIGRAPHX_THROW(prefix() + "Layouts do not match"); + + + // if(not this->same_compatible([](const shape& s) { return find_permutation(s); })) + // MIGRAPHX_THROW(prefix() + "Layouts do not match"); return *this; } @@ -368,6 +384,15 @@ struct check_shapes return this->all_of([&](const shape& s) { return f(s) == key; }); } + + bool same_compatible() const + { + if(begin == end) + return true; + return this->all_of([&](const shape& s) { return migraphx::is_compatible_shape(s, *begin) + or find_permutation(s) == find_permutation(*begin) ; }); + } + template bool all_of(Predicate p) const { diff --git a/src/include/migraphx/program.hpp b/src/include/migraphx/program.hpp index e86ba628656..6e9bb41f4af 100644 --- a/src/include/migraphx/program.hpp +++ b/src/include/migraphx/program.hpp @@ -161,7 +161,6 @@ struct MIGRAPHX_EXPORT program void assign(const program& p); std::unique_ptr impl; }; - } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx diff --git a/src/include/migraphx/shape.hpp b/src/include/migraphx/shape.hpp index 0c1e7b269d4..890d7721eb4 100644 --- a/src/include/migraphx/shape.hpp +++ b/src/include/migraphx/shape.hpp @@ -431,6 +431,33 @@ struct MIGRAPHX_EXPORT shape std::shared_ptr impl; }; + +// "Almost identical" shapes. To support an MLIR feature, there is a limited +// case where shapes may both be standard but have non-identical strides. +// #ifndef NDEBUG +static bool inline is_compatible_shape(const shape& actual, const shape& expected) +{ + // Check subshapes + if(expected.type() == shape::tuple_type) + return equal(actual.sub_shapes().begin(), + actual.sub_shapes().end(), + expected.sub_shapes().begin(), + &is_compatible_shape); + // Only the expected can be dynamic + if(expected.dynamic()) + return true; + if(actual == expected) + return true; + if(actual.type() != expected.type()) + return false; + // If both shapes are standard and lens match, they are considered compatible + // even if strides are different. + if(actual.standard() and expected.standard()) + return actual.lens() == expected.lens(); + return false; +} +// #endif + /// Flatten subshapes to a single vector of non-tuple type of shapes MIGRAPHX_EXPORT std::vector flatten(const std::vector& shapes); diff --git a/src/program.cpp b/src/program.cpp index 25cb16cc950..7de4fa65054 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -426,33 +427,6 @@ void preview_argument(std::ostream& os, const argument& a) }); } -// This function currently used only in an Assertion. -// "Almost identical" shapes. To support an MLIR feature, there is a limited -// case where shapes may both be standard but have non-identical strides. -#ifndef NDEBUG -static bool is_compatible_shape(const shape& actual, const shape& expected) -{ - // Check subshapes - if(expected.type() == shape::tuple_type) - return equal(actual.sub_shapes().begin(), - actual.sub_shapes().end(), - expected.sub_shapes().begin(), - &is_compatible_shape); - // Only the expected can be dynamic - if(expected.dynamic()) - return true; - if(actual == expected) - return true; - if(actual.type() != expected.type()) - return false; - // If both shapes are standard and lens match, they are considered compatible - // even if strides are different. - if(actual.standard() and expected.standard()) - return actual.lens() == expected.lens(); - return false; -} -#endif - template std::vector generic_eval(const module* mod, std::vector& ctx, From 95d7a2fe29a62ca09103c764c5438aaecf81855b Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 8 Aug 2024 16:02:23 +0000 Subject: [PATCH 13/56] style --- src/include/migraphx/check_shapes.hpp | 9 +++++---- src/onnx/onnx_parser.cpp | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/include/migraphx/check_shapes.hpp b/src/include/migraphx/check_shapes.hpp index a19dbe2e000..dbee0d2242c 100644 --- a/src/include/migraphx/check_shapes.hpp +++ b/src/include/migraphx/check_shapes.hpp @@ -251,10 +251,9 @@ struct check_shapes const check_shapes& same_layout() const { - if(not same_compatible()) + if(not same_compatible()) MIGRAPHX_THROW(prefix() + "Layouts do not match"); - // if(not this->same_compatible([](const shape& s) { return find_permutation(s); })) // MIGRAPHX_THROW(prefix() + "Layouts do not match"); return *this; @@ -389,8 +388,10 @@ struct check_shapes { if(begin == end) return true; - return this->all_of([&](const shape& s) { return migraphx::is_compatible_shape(s, *begin) - or find_permutation(s) == find_permutation(*begin) ; }); + return this->all_of([&](const shape& s) { + return migraphx::is_compatible_shape(s, *begin) or + find_permutation(s) == find_permutation(*begin); + }); } template diff --git a/src/onnx/onnx_parser.cpp b/src/onnx/onnx_parser.cpp index 07d7f6a52d7..0e58aabf976 100644 --- a/src/onnx/onnx_parser.cpp +++ b/src/onnx/onnx_parser.cpp @@ -300,7 +300,7 @@ int64_t onnx_parser::get_opset_version(const onnx::ModelProto& model) return version; } -void print_added_instructions(module* mod, +void print_added_instructions(const module* mod, const std::vector& args, const std::vector& result) { From a3f40ddba1a62d4f30f4c149c66e2141b7aa7319 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 8 Aug 2024 17:26:21 +0000 Subject: [PATCH 14/56] cleanup method names --- src/include/migraphx/check_shapes.hpp | 34 ++++--------------- src/include/migraphx/shape.hpp | 1 - .../gpu/include/migraphx/gpu/convolution.hpp | 2 +- test/check_shapes_test.cpp | 4 +-- 4 files changed, 9 insertions(+), 32 deletions(-) diff --git a/src/include/migraphx/check_shapes.hpp b/src/include/migraphx/check_shapes.hpp index dbee0d2242c..afc77c0c55e 100644 --- a/src/include/migraphx/check_shapes.hpp +++ b/src/include/migraphx/check_shapes.hpp @@ -225,16 +225,6 @@ struct check_shapes return *this; } - /*! - * Check all shapes are compatible. - */ - // const check_shapes& same_compatible() const - // { - // if(not this->same([](const shape& s) { return is_compatible(*this, s); })) - // MIGRAPHX_THROW(prefix() + "Shapes don't match"); - // return *this; - // } - /*! * Check all shapes have the same number of dimensions. */ @@ -246,16 +236,15 @@ struct check_shapes } /*! - * Check all shapes have the same layout. + * Check all shapes have the same layout, with minor differences allowed. */ - const check_shapes& same_layout() const + const check_shapes& compatible_layout() const { - - if(not same_compatible()) + if(begin != end and this->any_of([&](const shape& s) { + return not migraphx::is_compatible_shape(s, *begin) and + find_permutation(s) != find_permutation(*begin); + })) MIGRAPHX_THROW(prefix() + "Layouts do not match"); - - // if(not this->same_compatible([](const shape& s) { return find_permutation(s); })) - // MIGRAPHX_THROW(prefix() + "Layouts do not match"); return *this; } @@ -383,17 +372,6 @@ struct check_shapes return this->all_of([&](const shape& s) { return f(s) == key; }); } - - bool same_compatible() const - { - if(begin == end) - return true; - return this->all_of([&](const shape& s) { - return migraphx::is_compatible_shape(s, *begin) or - find_permutation(s) == find_permutation(*begin); - }); - } - template bool all_of(Predicate p) const { diff --git a/src/include/migraphx/shape.hpp b/src/include/migraphx/shape.hpp index 890d7721eb4..6a2492792f6 100644 --- a/src/include/migraphx/shape.hpp +++ b/src/include/migraphx/shape.hpp @@ -431,7 +431,6 @@ struct MIGRAPHX_EXPORT shape std::shared_ptr impl; }; - // "Almost identical" shapes. To support an MLIR feature, there is a limited // case where shapes may both be standard but have non-identical strides. // #ifndef NDEBUG diff --git a/src/targets/gpu/include/migraphx/gpu/convolution.hpp b/src/targets/gpu/include/migraphx/gpu/convolution.hpp index 1b1c3169830..0738324af4a 100644 --- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp +++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp @@ -85,7 +85,7 @@ struct miopen_convolution check_shapes{conv_inputs, *this} .max_ndims(5) .packed_layouts({{0, 1, 2}, {0, 1, 2, 3}, {0, 2, 3, 1}, {0, 1, 2, 3, 4}}) - .same_layout(); + .compatible_layout(); return migraphx::compute_shape(op, conv_inputs); } diff --git a/test/check_shapes_test.cpp b/test/check_shapes_test.cpp index 42b514d02f8..58241576648 100644 --- a/test/check_shapes_test.cpp +++ b/test/check_shapes_test.cpp @@ -53,7 +53,7 @@ TEST_CASE(same_layout_fail) EXPECT(test::throws([] { shape a{shape::float_type, {2, 3}}; shape b{shape::float_type, {2, 3}, {1, 2}}; - migraphx::check_shapes{{a, b}, ""}.same_layout(); + migraphx::check_shapes{{a, b}, ""}.compatible_layout(); })); } @@ -62,7 +62,7 @@ TEST_CASE(same_layout_pass) EXPECT(not test::throws([] { shape a{shape::float_type, {2, 3}, {1, 2}}; shape b{shape::float_type, {2, 3}, {1, 2}}; - migraphx::check_shapes{{a, b}, ""}.same_layout(); + migraphx::check_shapes{{a, b}, ""}.compatible_layout(); })); } From 4fffb177e1e58fae3e18782374d587ec652fa752 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 8 Aug 2024 17:26:59 +0000 Subject: [PATCH 15/56] format --- src/include/migraphx/check_shapes.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/include/migraphx/check_shapes.hpp b/src/include/migraphx/check_shapes.hpp index afc77c0c55e..0d59588509f 100644 --- a/src/include/migraphx/check_shapes.hpp +++ b/src/include/migraphx/check_shapes.hpp @@ -241,9 +241,9 @@ struct check_shapes const check_shapes& compatible_layout() const { if(begin != end and this->any_of([&](const shape& s) { - return not migraphx::is_compatible_shape(s, *begin) and - find_permutation(s) != find_permutation(*begin); - })) + return not migraphx::is_compatible_shape(s, *begin) and + find_permutation(s) != find_permutation(*begin); + })) MIGRAPHX_THROW(prefix() + "Layouts do not match"); return *this; } From 20fb5bc849e931fb9655cc90edbe1cfa3e64ca98 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 8 Aug 2024 18:22:19 +0000 Subject: [PATCH 16/56] style --- src/include/migraphx/check_shapes.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/include/migraphx/check_shapes.hpp b/src/include/migraphx/check_shapes.hpp index 0d59588509f..073b2fe31f0 100644 --- a/src/include/migraphx/check_shapes.hpp +++ b/src/include/migraphx/check_shapes.hpp @@ -28,7 +28,6 @@ #include #include #include -#include #include #include From 23712315e4731e67d859827953988100b2a53567 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 8 Aug 2024 18:29:03 +0000 Subject: [PATCH 17/56] comment --- src/include/migraphx/shape.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/include/migraphx/shape.hpp b/src/include/migraphx/shape.hpp index 6a2492792f6..e7ff55dfcf1 100644 --- a/src/include/migraphx/shape.hpp +++ b/src/include/migraphx/shape.hpp @@ -433,7 +433,6 @@ struct MIGRAPHX_EXPORT shape // "Almost identical" shapes. To support an MLIR feature, there is a limited // case where shapes may both be standard but have non-identical strides. -// #ifndef NDEBUG static bool inline is_compatible_shape(const shape& actual, const shape& expected) { // Check subshapes @@ -455,7 +454,6 @@ static bool inline is_compatible_shape(const shape& actual, const shape& expecte return actual.lens() == expected.lens(); return false; } -// #endif /// Flatten subshapes to a single vector of non-tuple type of shapes MIGRAPHX_EXPORT std::vector flatten(const std::vector& shapes); From 0c6bef7d29abd0c767e9c1b4b399ac9896ccbcc9 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Fri, 9 Aug 2024 16:31:06 +0000 Subject: [PATCH 18/56] add test subcases for new function --- src/onnx/onnx_parser.cpp | 2 +- test/shape_test.cpp | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/onnx/onnx_parser.cpp b/src/onnx/onnx_parser.cpp index 0e58aabf976..07d7f6a52d7 100644 --- a/src/onnx/onnx_parser.cpp +++ b/src/onnx/onnx_parser.cpp @@ -300,7 +300,7 @@ int64_t onnx_parser::get_opset_version(const onnx::ModelProto& model) return version; } -void print_added_instructions(const module* mod, +void print_added_instructions(module* mod, const std::vector& args, const std::vector& result) { diff --git a/test/shape_test.cpp b/test/shape_test.cpp index 56425c52c61..f8f9f34074e 100644 --- a/test/shape_test.cpp +++ b/test/shape_test.cpp @@ -90,11 +90,13 @@ TEST_CASE(test_shape_standard_stray_singleton_dim) { // A shape can be transposed (nonzero strides out of order) but still be considered // standard if the only out-of-order strides are on axes with a length of 1. - migraphx::shape s{migraphx::shape::float_type, {5, 1, 1, 8}, {8, 3, 4, 1}}; - EXPECT(s.standard()); - EXPECT(s.packed()); - EXPECT(s.transposed()); - EXPECT(not s.broadcasted()); + migraphx::shape s1{migraphx::shape::float_type, {5, 1, 1, 8}, {8, 3, 4, 1}}; + migraphx::shape s2{migraphx::shape::float_type, {5, 1, 1, 8}, {8, 3, 5, 1}}; + EXPECT(s1.standard()); + EXPECT(s1.packed()); + EXPECT(s1.transposed()); + EXPECT(not s1.broadcasted()); + EXPECT(is_compatible_shape(s1, s2)); } TEST_CASE(test_shape_min_max_opt) @@ -826,6 +828,7 @@ TEST_CASE(tuple_copy) EXPECT(s3 == s2); migraphx::shape s4{{migraphx::shape{migraphx::shape::int8_type}, migraphx::shape{migraphx::shape::float_type}}}; + EXPECT(!is_compatible_shape(s1, s4)); EXPECT(s4 != s1); EXPECT(s4 != s2); EXPECT(s4 != s3); From ef1d2f6ff2726af0bc2cc833362b5e86c6a2dff6 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Fri, 9 Aug 2024 20:48:30 +0000 Subject: [PATCH 19/56] style --- test/shape_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/shape_test.cpp b/test/shape_test.cpp index f8f9f34074e..005968c4996 100644 --- a/test/shape_test.cpp +++ b/test/shape_test.cpp @@ -828,7 +828,7 @@ TEST_CASE(tuple_copy) EXPECT(s3 == s2); migraphx::shape s4{{migraphx::shape{migraphx::shape::int8_type}, migraphx::shape{migraphx::shape::float_type}}}; - EXPECT(!is_compatible_shape(s1, s4)); + EXPECT(not is_compatible_shape(s1, s4)); EXPECT(s4 != s1); EXPECT(s4 != s2); EXPECT(s4 != s3); From 94392aac0218e95dbb7bee685c2409e0fb4f5c64 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Tue, 17 Sep 2024 22:44:38 +0000 Subject: [PATCH 20/56] bug fix work in progress. Contains fixed source code. Contains debug code. Tests need to be completed, including updating generated onnx test files. --- src/include/migraphx/op/roialign.hpp | 91 ++++++++++++++++++++++---- test/onnx/gen_onnx.py | 15 +++-- test/onnx/roialign_default_test.onnx | 5 +- test/onnx/roialign_test.onnx | Bin 345 -> 338 bytes test/onnx/verify/celu_verify_test.cpp | 20 +++++- 5 files changed, 108 insertions(+), 23 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index d66e8f0feeb..63a398fe5b0 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -114,46 +114,94 @@ struct roialign { std::vector results(bin_grid_size[0] * bin_grid_size[1] * output_height * output_width); + shape_for_each(comp_s, [&](const auto& idx_v, size_t index) { - std::array p = {idx_v[0], idx_v[1]}; - std::array i = {idx_v[2], idx_v[3]}; +for(auto aa : comp_s.multi(index)) printf(", %lu ", aa); +printf("index\n"); + + // The p and i indexes are looping parameters in ORT and go in y, x order. The i[x] value is least significant + // and iterates the fastest. + std::array p = {idx_v[1], idx_v[0]}; + std::array i = {idx_v[3], idx_v[2]}; +printf(" IIIII other index %lu , %lu , %lu , %lu\n", p[0], p[1], i[0], i[1]); + // xy is scaled coordinates of start point of ROI std::array xy{}; + // low, high are floor and ceiling of the xy value (i.e. the bounds of the pixel it lies inside) std::array low{}; std::array high{}; + // std::cout << " GGGGG inputs to xy calculation: roi_start=" << roi_start[0] << ", " << roi_start[1] << ", p=[0,1]: " << p[0] << ", " << p[1] << ", bin_size=" + // << bin_size[0] << ", " << bin_size[1] << " rounding factor=" << (i[0] + .5f) << ", " << (i[1] + .5f) << " bin_grid_size=" << bin_grid_size[0] <<", " << bin_grid_size[1] <<"\n"; for(auto ii : range(p.size())) { + // if(ii == 0) + // printf("QQQQQ x: " ); + // else + // printf("QQQQQ y: " ); + // for width & height dimensions, + // transform the roi start point to scaled coordinates +// printf(" roi_start[ii] %f p[ii] %lu bin_size[ii] %f (i[ii] + .5f) %f bin_size[ii] %f bin_grid_size[ii] %lu ", +// roi_start[ii], p[ii], bin_size[ii], (i[ii] + .5f), bin_size[ii], bin_grid_size[ii] ); + + xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; - xy[ii] = (coord_trans_mode == "half_pixel") ? (xy[ii] - 0.5f) : xy[ii]; +// printf("L137 %f ", xy[ii]); + xy[ii] = (coord_trans_mode != "half_pixel") ? (xy[ii] - 0.5f) : xy[ii]; +// printf("L139 %f ", xy[ii]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { +// printf("L142 results = pos_weight \n "); results[index] = pos_weight{}; return; } xy[ii] = std::max(xy[ii], 0.0f); +// printf("L148 %f ", xy[ii]); low[ii] = xy[ii]; high[ii] = low[ii] + 1; if(low[ii] >= dims[ii] - 1) { xy[ii] = high[ii] = low[ii] = dims[ii] - 1; +// printf("L154 %f ", xy[ii]); } +// printf("\n"); } - + // printf(" FFFFF xy[0]=%f xy[1] = %f dims[1]=%lu low%ld-%ld high %ld-%ld \n", + // xy[0], xy[1], dims[1], low[0], low[1], high[0], high[1]); results[index].pos = {low[0] * dims[1] + low[1], low[0] * dims[1] + high[1], high[0] * dims[1] + low[1], high[0] * dims[1] + high[1]}; - float ly = xy[0] - low[0]; - float lx = xy[1] - low[1]; + float lx = xy[0] - low[0]; + float ly = xy[1] - low[1]; float hy = 1.0f - ly; float hx = 1.0f - lx; - + printf(" HHHHH partial pixel values, index=%lu ly=%f, lx=%f, hy=%f, hx=%f\n", index, ly, lx, hy, hx); // save weights and indeces results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; + + // printf(" DDDDD calc_pos_weight precalc "); + // for(int aa = 0; aa < 4; aa++) + // { + // std::cout << results[index].pos[aa] << ", " << results[index].w[aa] << " "; + // } + + printf(" DDDDD index %zu %f %f %f %f \n\n", index, + // results[index].pos[0], + // results[index].pos[1], + // results[index].pos[2], + // results[index].pos[3], + float(results[index].w[0]), + float(results[index].w[1]), + float(results[index].w[2]), + float(results[index].w[3]) + ); + + }); + printf("size of calc_pos_weight vector is %lu\n", results.size()); return results; } @@ -219,14 +267,26 @@ struct roialign const auto* batch_indices = args.at(2).cast(); par_for(n_rois, [&](auto n) { const auto bottom_data = x.begin(); + std::cout << "MIGraphX AAAAA x begins " << "\n"; const auto roi_batch_ind = batch_indices[n]; // Do not using rounding; this implementation detail is critical + float offset = (coord_trans_mode == "half_pixel") ? 0.5 : 0.0; std::array roi_starts = { - static_cast(roi[roi_s.index({n, 1})] * spatial_scale), - static_cast(roi[roi_s.index({n, 0})] * spatial_scale)}; + static_cast(roi[roi_s.index({n, 0})] * spatial_scale - offset), + static_cast(roi[roi_s.index({n, 1})] * spatial_scale - offset)}; std::array roi_ends = { - static_cast(roi[roi_s.index({n, 3})] * spatial_scale), - static_cast(roi[roi_s.index({n, 2})] * spatial_scale)}; + static_cast(roi[roi_s.index({n, 2})] * spatial_scale - offset), + static_cast(roi[roi_s.index({n, 3})] * spatial_scale - offset)}; + + // std::cout << " CCCCC roialign compute(): scale , starts (x, x) ends (x, x)" << ", " << spatial_scale << ", " << roi_starts[0] << ", " << + // roi_starts[1] << ", " << + // roi_ends[0] << ", " << roi_ends[1] << "\n"; + // std::cout << " CCCCC roi is x, x, x, x x" << ", " << roi[roi_s.index({n, 0})] << ", " << + // roi[roi_s.index({n, 1})] << ", " << roi[roi_s.index({n, 2})] << ", " << roi[roi_s.index({n, 3})] << "\n\n"; + + printf("CCCCC roialign compute(): roi_start_w = %f, roi_start_h =%f, roi_end_w=%f, roi_end_h=%f \n", + float(roi_starts[0]), float(roi_starts[1]), float(roi_ends[0]), float(roi_ends[1])); + // Force malformed ROIs to be 1x1 std::array roi_size{}; @@ -236,7 +296,8 @@ struct roialign for(auto ii : range(roi_size.size())) { roi_size[ii] = roi_ends[ii] - roi_starts[ii]; - roi_size[ii] = std::max(roi_size[ii], 1.0f); + if(coord_trans_mode != "half_pixel") + roi_size[ii] = std::max(roi_size[ii], 1.0f); bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_grid_size[ii] = (sampling_ratio > 0) @@ -277,10 +338,16 @@ struct roialign vec_index[c], max_pool{}); output(n, c, ph, pw) = output_val; + // int64_t index = index_n_c + ph * pooled_width + pw; + + // printf(" GGGGG a single output is %f f n %lu c %lu ph %lu pw %lu\n" , + // float(output_val), n, c , ph , pw); }); }); }); + printf(" end compute\n\n\n"); + return result; } }; diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 3efc787c559..c31805e8294 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -751,7 +751,7 @@ def celu_default_test(): return ([node], [x], [y]) - +# see also def roialign_test(): @onnx_test() def celu_verify_test(): x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 3]) @@ -10110,14 +10110,15 @@ def roialign_default_test(): return ([node], [x, roi, bi], [y]) - +# see also celu_verify_test @onnx_test() def roialign_test(): - x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 5, 4, 7]) - roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [8, 4]) - bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [8]) - y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [8, 4, 5, 5]) + x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 1, 2, 3]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [1, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [1]) + y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 4, 2, 2]) + # half_pixel is the new mode we're developing for node = onnx.helper.make_node( 'RoiAlign', inputs=['x', 'rois', 'batch_ind'], @@ -10127,7 +10128,7 @@ def roialign_test(): output_width=5, sampling_ratio=3, mode="avg", - coordinate_transformation_mode="output_half_pixel") + coordinate_transformation_mode="half_pixel") return ([node], [x, roi, bi], [y]) diff --git a/test/onnx/roialign_default_test.onnx b/test/onnx/roialign_default_test.onnx index 4421e17be60..3f54104fdd6 100644 --- a/test/onnx/roialign_default_test.onnx +++ b/test/onnx/roialign_default_test.onnx @@ -1,4 +1,5 @@ -roialign_default_test:¥ + +roialign_default_test:¥ ! x rois @@ -23,4 +24,4 @@    -B \ No newline at end of file +B \ No newline at end of file diff --git a/test/onnx/roialign_test.onnx b/test/onnx/roialign_test.onnx index f39485530c4758b3fadd6c7d5fe5ad180cc75a73..d5b9d5bbad1d95d7b076fb3c3e0d258aedb88a37 100644 GIT binary patch delta 113 zcmcb~bcsofgG-3FC_ghXCo?@Sz9hA{#A+84*WQU@%8dFGtrQu#CMHh@l1v;R%ml>D xQDR&yKn=x0d|W&nj6xhBxyjOuGD?g|(p-#{LSkGZKw$x>78a;BCMPBVQ2?7o64L+x delta 143 zcmcb_bdyPpgI$QXC_ghXCo?@Sz9hA{#Ofdu*O7^0%8X_ctrR%~^Giz#N=xD=#!Qdr z;$q@p1!5K;W{(o%Vgc$a7UJXL;b0Wv0E&SyOO!YlXHsHGaz=b+UWyPG7YEP+kOqz< aX)eY}Au%oyplSh_Y9I|Vk=2PwfCm771{x6n diff --git a/test/onnx/verify/celu_verify_test.cpp b/test/onnx/verify/celu_verify_test.cpp index dc715255037..e71e300e665 100644 --- a/test/onnx/verify/celu_verify_test.cpp +++ b/test/onnx/verify/celu_verify_test.cpp @@ -28,18 +28,34 @@ TEST_CASE(celu_verify_test) { - migraphx::program p = read_onnx("celu_verify_test.onnx"); + // ../../build/bin/test_verify_onnx celu_verify_test + migraphx::program p = read_onnx("roialign_test.onnx"); p.compile(migraphx::make_target("ref")); - migraphx::shape s{migraphx::shape::float_type, {2, 3}}; + migraphx::shape s{migraphx::shape::float_type, {1, 1, 2, 3}}; std::vector data = {-5.5, 2.0, 100., 7.0, 0., -1.}; migraphx::parameter_map pp; pp["x"] = migraphx::argument(s, data.data()); + pp["y"] = migraphx::argument(s, data.data()); // ? + + // migraphx::shape sx{migraphx::shape::float_type, {10, 5, 4, 7}}; + migraphx::shape srois{migraphx::shape::float_type, {1, 4}}; + std::vector rois_data = {0.1, 0.15, 0.6, 0.35}; + migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index + std::vector bi_data = {0}; + + pp["rois"] = migraphx::argument(srois, rois_data.data()); + pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); + auto result = p.eval(pp).back(); std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); +printf(" result: "); +for(auto aa : result_vector) printf(" %f ", aa); +printf("\n"); + std::vector gold(6); float alpha = 0.5; std::transform(data.begin(), data.end(), gold.begin(), [&](auto x) { From a43303c4612e623daee46a60a82a8e2d886f8019 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 23 Sep 2024 22:21:10 +0000 Subject: [PATCH 21/56] reordered lens for iteration shape; added some tests. Passes roialign_half_pixel_verify_test for first roi but fails for second --- ort_roialign.py | 59 ++++++++++++++ src/include/migraphx/op/roialign.hpp | 93 ++++++---------------- test/onnx/gen_onnx.py | 61 ++++++++++++-- test/onnx/parse/roialign_test.cpp | 4 +- test/onnx/roialign_half_pixel_test.onnx | Bin 0 -> 360 bytes test/onnx/roialign_test.onnx | Bin 338 -> 345 bytes test/onnx/verify/celu_verify_test.cpp | 20 +---- test/onnx/verify/roialign_verify_test.cpp | 71 +++++++++++++++++ tools/build_and_test_onnxrt.sh | 10 +-- 9 files changed, 218 insertions(+), 100 deletions(-) create mode 100644 ort_roialign.py create mode 100644 test/onnx/roialign_half_pixel_test.onnx create mode 100644 test/onnx/verify/roialign_verify_test.cpp diff --git a/ort_roialign.py b/ort_roialign.py new file mode 100644 index 00000000000..b7a1a770bd8 --- /dev/null +++ b/ort_roialign.py @@ -0,0 +1,59 @@ + +# Not for release. This test script is for develop/test only + +import onnx +import onnxruntime as rt +# from https://onnxruntime.ai/docs/get-started/with-python.html +import numpy as np +print(" version: ", onnx.__version__, rt.__version__) + + +x = np.array(np.arange(2*2*4*3), dtype='f') +x = np.reshape(x, [2, 2, 4, 3]) + +y=np.ones([2, 2, 4, 7], dtype='f') + +# x = np.array([[[[2,3,4], [5,6, 7]]]], dtype='f') +rois=np.array([[0.1, 0.15, 0.6, 0.35], + [0.1, 0.15, 2.6, 1.35]], dtype='f') +sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/roialign_half_pixel_test.onnx') +res = sess.run(['y'], {'x': x, + 'rois': rois, + 'batch_ind': [0, 1]}) +print(res) + + +# model_file = "test/onnx/roialign_test.onnx" +# onnx_model = onnx.load(model_file) +# onnx.checker.check_model(onnx_model) + + +# #define the priority order for the execution providers +# EP_list = ['CPUExecutionProvider'] + +# aa = np.asarray(np.arange(3*2*4*5), dtype='f') +# # bi = np.reshape(aa, [3, 2, 4, 5]) + +# # initialize the model.onnx +# sess = rt.InferenceSession(model_file, providers=EP_list) +# x, rois, batch_ind = (np.reshape(aa, [3, 2, 4, 5]), +# np.array([[0.1, 0.15, 0.6, 0.35], +# [2.1, 1.73, 3.8, 2.13]], dtype='f'), +# np.array([0, 1], dtype='int64')) + +# # Use the parameter names defined in the onnx file +# output = sess.run(None, {'x': x, +# 'rois': rois, +# 'batch_ind': batch_ind, +# }) + +# print(' output is ', output) + + +# # get the outputs metadata as a list of :class:`onnxruntime.NodeArg` +# output_name = sess.get_outputs()[0].name + +# # get the inputs metadata as a list of :class:`onnxruntime.NodeArg` +# input_name = sess.get_inputs()[0].name +# print("Names are ",input_name, output_name) + diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 63a398fe5b0..c36b7f9b501 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -92,8 +92,8 @@ struct roialign std::vector out_lens = x_lens; out_lens[0] = roi_lens[0]; - out_lens[2] = output_height; - out_lens[3] = output_width; + out_lens[2] = output_width; + out_lens[3] = output_height; return {type, out_lens}; } @@ -116,59 +116,54 @@ struct roialign output_width); shape_for_each(comp_s, [&](const auto& idx_v, size_t index) { -for(auto aa : comp_s.multi(index)) printf(", %lu ", aa); -printf("index\n"); - // The p and i indexes are looping parameters in ORT and go in y, x order. The i[x] value is least significant + // The p and i indexes correspond to nested looping parameters in ORT that go in y, x order. The i[x] value is least significant // and iterates the fastest. std::array p = {idx_v[1], idx_v[0]}; - std::array i = {idx_v[3], idx_v[2]}; -printf(" IIIII other index %lu , %lu , %lu , %lu\n", p[0], p[1], i[0], i[1]); - + std::array i = {idx_v[3], idx_v[2]};// <== these are always the same +printf(" IIIII other index %lu , %lu , %lu , %lu i=%lu \n", p[0], p[1], i[0], i[1], index); // xy is scaled coordinates of start point of ROI std::array xy{}; // low, high are floor and ceiling of the xy value (i.e. the bounds of the pixel it lies inside) std::array low{}; std::array high{}; - // std::cout << " GGGGG inputs to xy calculation: roi_start=" << roi_start[0] << ", " << roi_start[1] << ", p=[0,1]: " << p[0] << ", " << p[1] << ", bin_size=" - // << bin_size[0] << ", " << bin_size[1] << " rounding factor=" << (i[0] + .5f) << ", " << (i[1] + .5f) << " bin_grid_size=" << bin_grid_size[0] <<", " << bin_grid_size[1] <<"\n"; + for(auto ii : range(p.size())) { // if(ii == 0) - // printf("QQQQQ x: " ); + // printf("x: " ); // else - // printf("QQQQQ y: " ); + // printf("y: " ); // for width & height dimensions, // transform the roi start point to scaled coordinates -// printf(" roi_start[ii] %f p[ii] %lu bin_size[ii] %f (i[ii] + .5f) %f bin_size[ii] %f bin_grid_size[ii] %lu ", -// roi_start[ii], p[ii], bin_size[ii], (i[ii] + .5f), bin_size[ii], bin_grid_size[ii] ); - +printf(" roi_start[ii] %f p[ii] %lu bin_size[ii] %f (i[ii] + .5f) %f bin_grid_size[ii] %lu \n", +roi_start[ii], p[ii], bin_size[ii], (i[ii] + .5f), bin_grid_size[ii] ); xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; -// printf("L137 %f ", xy[ii]); +printf(" QQQQQQ L137 x=%f y=%f\n", xy[0], xy[1]); xy[ii] = (coord_trans_mode != "half_pixel") ? (xy[ii] - 0.5f) : xy[ii]; // printf("L139 %f ", xy[ii]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { -// printf("L142 results = pos_weight \n "); +// printf("L142 results = pos_weight \n "); results[index] = pos_weight{}; return; } xy[ii] = std::max(xy[ii], 0.0f); -// printf("L148 %f ", xy[ii]); +// printf("L148 %f ", xy[ii]); low[ii] = xy[ii]; high[ii] = low[ii] + 1; if(low[ii] >= dims[ii] - 1) { +// printf("L154 %f ", xy[ii]); xy[ii] = high[ii] = low[ii] = dims[ii] - 1; -// printf("L154 %f ", xy[ii]); } -// printf("\n"); +// printf("\n"); } - // printf(" FFFFF xy[0]=%f xy[1] = %f dims[1]=%lu low%ld-%ld high %ld-%ld \n", - // xy[0], xy[1], dims[1], low[0], low[1], high[0], high[1]); + // printf(" JJJJJ xy[0]=%f xy[1] = %f dims[1]=%lu low%ld-%ld high %ld-%ld i=%zu\n\n", + // xy[0], xy[1], dims[1], low[0], low[1], high[0], high[1], index); results[index].pos = {low[0] * dims[1] + low[1], low[0] * dims[1] + high[1], high[0] * dims[1] + low[1], @@ -178,30 +173,11 @@ printf(" IIIII other index %lu , %lu , %lu , %lu\n", p[0], p[1], i[0], i[1]); float ly = xy[1] - low[1]; float hy = 1.0f - ly; float hx = 1.0f - lx; - printf(" HHHHH partial pixel values, index=%lu ly=%f, lx=%f, hy=%f, hx=%f\n", index, ly, lx, hy, hx); - // save weights and indeces + // printf(" HHHHH partial pixel values, index=%lu ly=%f, lx=%f, hy=%f, hx=%f\n\n", index, ly, lx, hy, hx); + // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; - // printf(" DDDDD calc_pos_weight precalc "); - // for(int aa = 0; aa < 4; aa++) - // { - // std::cout << results[index].pos[aa] << ", " << results[index].w[aa] << " "; - // } - - printf(" DDDDD index %zu %f %f %f %f \n\n", index, - // results[index].pos[0], - // results[index].pos[1], - // results[index].pos[2], - // results[index].pos[3], - float(results[index].w[0]), - float(results[index].w[1]), - float(results[index].w[2]), - float(results[index].w[3]) - ); - - }); - printf("size of calc_pos_weight vector is %lu\n", results.size()); return results; } @@ -256,7 +232,7 @@ printf(" IIIII other index %lu , %lu , %lu , %lu\n", p[0], p[1], i[0], i[1]); int64_t n_rois = out_lens[0]; std::size_t channels = out_lens[1]; // output dims of height and width, in all 2-dim arrays, the first dim - // is for height and second dim is for width + // is for height and second dim is for width i.e. (y, x) order std::array out_dims = {out_lens[2], out_lens[3]}; const auto& x_lens = args.at(0).get_shape().lens(); // input dims of height and width @@ -267,10 +243,9 @@ printf(" IIIII other index %lu , %lu , %lu , %lu\n", p[0], p[1], i[0], i[1]); const auto* batch_indices = args.at(2).cast(); par_for(n_rois, [&](auto n) { const auto bottom_data = x.begin(); - std::cout << "MIGraphX AAAAA x begins " << "\n"; const auto roi_batch_ind = batch_indices[n]; - // Do not using rounding; this implementation detail is critical - float offset = (coord_trans_mode == "half_pixel") ? 0.5 : 0.0; + // Do not use rounding; this implementation detail is critical + float offset = (coord_trans_mode == "half_pixel") ? 0.5 : 0.0; std::array roi_starts = { static_cast(roi[roi_s.index({n, 0})] * spatial_scale - offset), static_cast(roi[roi_s.index({n, 1})] * spatial_scale - offset)}; @@ -278,17 +253,7 @@ printf(" IIIII other index %lu , %lu , %lu , %lu\n", p[0], p[1], i[0], i[1]); static_cast(roi[roi_s.index({n, 2})] * spatial_scale - offset), static_cast(roi[roi_s.index({n, 3})] * spatial_scale - offset)}; - // std::cout << " CCCCC roialign compute(): scale , starts (x, x) ends (x, x)" << ", " << spatial_scale << ", " << roi_starts[0] << ", " << - // roi_starts[1] << ", " << - // roi_ends[0] << ", " << roi_ends[1] << "\n"; - // std::cout << " CCCCC roi is x, x, x, x x" << ", " << roi[roi_s.index({n, 0})] << ", " << - // roi[roi_s.index({n, 1})] << ", " << roi[roi_s.index({n, 2})] << ", " << roi[roi_s.index({n, 3})] << "\n\n"; - - printf("CCCCC roialign compute(): roi_start_w = %f, roi_start_h =%f, roi_end_w=%f, roi_end_h=%f \n", - float(roi_starts[0]), float(roi_starts[1]), float(roi_ends[0]), float(roi_ends[1])); - - - // Force malformed ROIs to be 1x1 + // Force malformed ROIs to be 1x1, output_half_pixel transform mode std::array roi_size{}; std::array bin_size{}; std::array bin_grid_size{}; @@ -298,7 +263,7 @@ printf(" IIIII other index %lu , %lu , %lu , %lu\n", p[0], p[1], i[0], i[1]); roi_size[ii] = roi_ends[ii] - roi_starts[ii]; if(coord_trans_mode != "half_pixel") roi_size[ii] = std::max(roi_size[ii], 1.0f); - +printf("\n KKKKK roi_size %f out_dims %lu \n", roi_size[ii] , out_dims[ii]); bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_grid_size[ii] = (sampling_ratio > 0) ? sampling_ratio @@ -308,7 +273,7 @@ printf(" IIIII other index %lu , %lu , %lu , %lu\n", p[0], p[1], i[0], i[1]); // we want to precalculate indices and weights shared by all channels, // this is the key point of optimization std::vector comp_lens = { - out_dims[0], out_dims[1], bin_grid_size[0], bin_grid_size[1]}; + out_dims[1], out_dims[0], bin_grid_size[1], bin_grid_size[0]}; shape comp_s{shape::float_type, comp_lens}; auto pre_calc = this->calc_pos_weight(in_dims, comp_s, roi_starts, bin_size, bin_grid_size); @@ -338,16 +303,10 @@ printf(" IIIII other index %lu , %lu , %lu , %lu\n", p[0], p[1], i[0], i[1]); vec_index[c], max_pool{}); output(n, c, ph, pw) = output_val; - // int64_t index = index_n_c + ph * pooled_width + pw; - - // printf(" GGGGG a single output is %f f n %lu c %lu ph %lu pw %lu\n" , - // float(output_val), n, c , ph , pw); }); }); }); - printf(" end compute\n\n\n"); - return result; } }; diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index c31805e8294..8f0e9656c51 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -751,7 +751,6 @@ def celu_default_test(): return ([node], [x], [y]) -# see also def roialign_test(): @onnx_test() def celu_verify_test(): x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 3]) @@ -10110,15 +10109,13 @@ def roialign_default_test(): return ([node], [x, roi, bi], [y]) -# see also celu_verify_test + @onnx_test() def roialign_test(): - x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 1, 2, 3]) - roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [1, 4]) - bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [1]) - y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 4, 2, 2]) - - # half_pixel is the new mode we're developing for + x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 2, 4, 5]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [2, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [2]) + y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 2, 4, 5]) node = onnx.helper.make_node( 'RoiAlign', inputs=['x', 'rois', 'batch_ind'], @@ -10128,11 +10125,59 @@ def roialign_test(): output_width=5, sampling_ratio=3, mode="avg", + coordinate_transformation_mode="output_half_pixel") + + return ([node], [x, roi, bi], [y]) + + +@onnx_test() +def roialign_half_pixel_test(): + x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 2, 4, 3]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [2, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [2]) + y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 2, 4, 3]) + + # half_pixel is the new mode we're developing for + node = onnx.helper.make_node( + 'RoiAlign', + inputs=['x', 'rois', 'batch_ind'], + outputs=['y'], + spatial_scale=2.0, + output_height=7, + output_width=9, + sampling_ratio=3, + mode="avg", coordinate_transformation_mode="half_pixel") return ([node], [x, roi, bi], [y]) + + + + +# @onnx_test() +# def roialign_half_pixel_test(): +# x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 1, 2, 3]) +# roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [1, 4]) +# bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [1]) +# y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 4, 2, 2]) + +# # half_pixel is the new mode we're developing for +# node = onnx.helper.make_node( +# 'RoiAlign', +# inputs=['x', 'rois', 'batch_ind'], +# outputs=['y'], +# spatial_scale=2.0, +# output_height=5, +# output_width=5, +# sampling_ratio=3, +# mode="avg", +# coordinate_transformation_mode="half_pixel") + +# return ([node], [x, roi, bi], [y]) + + @onnx_test() def round_half_test(): x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [4, 4]) diff --git a/test/onnx/parse/roialign_test.cpp b/test/onnx/parse/roialign_test.cpp index 05f27b6473c..52bb8681d4d 100644 --- a/test/onnx/parse/roialign_test.cpp +++ b/test/onnx/parse/roialign_test.cpp @@ -26,7 +26,7 @@ TEST_CASE(roialign_test) { - migraphx::shape sx{migraphx::shape::float_type, {10, 5, 4, 7}}; + migraphx::shape sx{migraphx::shape::float_type, {3, 2, 4, 5}}; migraphx::shape srois{migraphx::shape::float_type, {8, 4}}; migraphx::shape sbi{migraphx::shape::int64_type, {8}}; @@ -41,7 +41,7 @@ TEST_CASE(roialign_test) {{"coordinate_transformation_mode", "output_half_pixel"}, {"spatial_scale", 2.0f}, {"output_height", 5}, - {"output_width", 5}, + {"output_width", 3}, {"sampling_ratio", 3}}), x, rois, diff --git a/test/onnx/roialign_half_pixel_test.onnx b/test/onnx/roialign_half_pixel_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b6ca215a9fd362ea9737b42897c1a291c8502e03 GIT binary patch literal 360 zcmZ`#u};G<5RIKkbx&cB>M5{h}@{L>ePPXy=$Sy*u}-*eyeOzM`R^Nze3`4VN- zBRn+5*j8%p1@o3`cQAI~UK-5?(~2GZIZr69a>#z@l)rZouVHD9emr{C3)%I)oJ|N< zcd)pyAF}m*Igexkx#PpAr0$rF7s@o!TUfd=K(3f;xDrK@B+nBD xQDR&yKn=x0d|W&nj6xhBxyjOuGD?g|(p-#{LSkGZKw$x>78a;BCMPBVQ2?7o64L+x diff --git a/test/onnx/verify/celu_verify_test.cpp b/test/onnx/verify/celu_verify_test.cpp index e71e300e665..dc715255037 100644 --- a/test/onnx/verify/celu_verify_test.cpp +++ b/test/onnx/verify/celu_verify_test.cpp @@ -28,34 +28,18 @@ TEST_CASE(celu_verify_test) { - // ../../build/bin/test_verify_onnx celu_verify_test - migraphx::program p = read_onnx("roialign_test.onnx"); + migraphx::program p = read_onnx("celu_verify_test.onnx"); p.compile(migraphx::make_target("ref")); - migraphx::shape s{migraphx::shape::float_type, {1, 1, 2, 3}}; + migraphx::shape s{migraphx::shape::float_type, {2, 3}}; std::vector data = {-5.5, 2.0, 100., 7.0, 0., -1.}; migraphx::parameter_map pp; pp["x"] = migraphx::argument(s, data.data()); - pp["y"] = migraphx::argument(s, data.data()); // ? - - // migraphx::shape sx{migraphx::shape::float_type, {10, 5, 4, 7}}; - migraphx::shape srois{migraphx::shape::float_type, {1, 4}}; - std::vector rois_data = {0.1, 0.15, 0.6, 0.35}; - migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index - std::vector bi_data = {0}; - - pp["rois"] = migraphx::argument(srois, rois_data.data()); - pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); - auto result = p.eval(pp).back(); std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); -printf(" result: "); -for(auto aa : result_vector) printf(" %f ", aa); -printf("\n"); - std::vector gold(6); float alpha = 0.5; std::transform(data.begin(), data.end(), gold.begin(), [&](auto x) { diff --git a/test/onnx/verify/roialign_verify_test.cpp b/test/onnx/verify/roialign_verify_test.cpp new file mode 100644 index 00000000000..b1c9b715af4 --- /dev/null +++ b/test/onnx/verify/roialign_verify_test.cpp @@ -0,0 +1,71 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +TEST_CASE(roialign_verify_test) +{ + migraphx::program p = read_onnx("roialign_test.onnx"); + p.compile(migraphx::make_target("ref")); + + migraphx::shape s{migraphx::shape::float_type, {3, 2, 4, 5}}; + std::vector data(3*5*4*2); + std::iota(data.begin(), data.end(), 0); + + migraphx::parameter_map pp; + pp["x"] = migraphx::argument(s, data.data()); + pp["y"] = migraphx::argument(s, data.data()); + + migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; + std::vector rois_data = {0.1, 0.15, 0.6, 0.35, + 2.1, 1.73, 3.8, 2.13}; + migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index + std::vector bi_data = {0, 1}; + + pp["rois"] = migraphx::argument(srois, rois_data.data()); + pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); + + auto result = p.eval(pp).back(); + std::vector result_vector; + result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); + +printf(" result: "); +for(auto aa : result_vector) printf(" %f ", aa); +printf("\n"); + + std::vector gold = { 0.000000, 0.022222, 0.200000, 0.400000, 0.600000, 0.500000, 0.522222, 0.700000, 0.900000, 1.100000, 1.500000, 1.522223, 1.700000, + 1.900000, 2.100000, 2.500000, 2.522222, 2.700000, 2.900000, 3.100000, 3.500000, 3.522222, 3.700000, 3.900000, 4.100000, 20.000000, 20.022223, 20.200001, 20.400000, 20.600000, 20.500000, 20.522223, + 20.700001, 20.900000, 21.100000, 21.500000, 21.522223, 21.700001, 21.900000, 22.100000, 22.500000, 22.522223, 22.700001, 22.900000, 23.100000, 23.500000, 23.522223, 23.700001, + 23.900000, 24.100000, 5.888889, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, + 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, 12.555555, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, + 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, + 0.000000, 0.000000 }; + float alpha = 0.5; + std::transform(data.begin(), data.end(), gold.begin(), [&](auto x) { + return std::max(0.0f, x) + std::min(0.0f, alpha * std::expm1(x / alpha)); + }); + EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); +} diff --git a/tools/build_and_test_onnxrt.sh b/tools/build_and_test_onnxrt.sh index 19147c84ddb..a3a8fdfbf61 100755 --- a/tools/build_and_test_onnxrt.sh +++ b/tools/build_and_test_onnxrt.sh @@ -36,8 +36,8 @@ export CXXFLAGS="-D__HIP_PLATFORM_AMD__=1 -w" cd build/Linux/Release #Add test launcher for onnxrt tests -echo 'InferenceSessionTests.CheckRunProfilerWithSessionOptions' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt -echo 'InferenceSessionTests.CheckRunProfilerWithSessionOptions2' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt -echo 'InferenceSessionTests.Test3LayerNestedSubgraph' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt -echo 'InferenceSessionTests.Test2LayerNestedSubgraph' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt -../../../tools/ci_build/github/pai/pai_test_launcher.sh || (gdb ./onnxruntime_test_all core -batch -ex bt && exit 1) +# echo 'InferenceSessionTests.CheckRunProfilerWithSessionOptions' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt +# echo 'InferenceSessionTests.CheckRunProfilerWithSessionOptions2' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt +# echo 'InferenceSessionTests.Test3LayerNestedSubgraph' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt +# echo 'InferenceSessionTests.Test2LayerNestedSubgraph' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt +# ../../../tools/ci_build/github/pai/pai_test_launcher.sh || (gdb ./onnxruntime_test_all core -batch -ex bt && exit 1) From 69d0d444d10299550a97c0211f2bec38d1b5e100 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Tue, 24 Sep 2024 23:13:07 +0000 Subject: [PATCH 22/56] bug fixes and added roialign_half_pixel_verify_test which passes. Work in progress with debug code. --- ort_roialign.py | 11 +- src/include/migraphx/op/roialign.hpp | 49 ++++--- test/onnx/gen_onnx.py | 4 +- test/onnx/roialign_half_pixel_test.onnx | Bin 360 -> 360 bytes .../roialign_half_pixel_verify_test.cpp | 120 ++++++++++++++++++ test/onnx/verify/roialign_verify_test.cpp | 14 +- 6 files changed, 169 insertions(+), 29 deletions(-) create mode 100644 test/onnx/verify/roialign_half_pixel_verify_test.cpp diff --git a/ort_roialign.py b/ort_roialign.py index b7a1a770bd8..db06f24c07d 100644 --- a/ort_roialign.py +++ b/ort_roialign.py @@ -13,13 +13,16 @@ y=np.ones([2, 2, 4, 7], dtype='f') -# x = np.array([[[[2,3,4], [5,6, 7]]]], dtype='f') -rois=np.array([[0.1, 0.15, 0.6, 0.35], - [0.1, 0.15, 2.6, 1.35]], dtype='f') +# rois=np.array([[0.1, 0.15, 0.6, 0.35], +# [0.1, 0.15, 2.6, 1.35]], dtype='f') + +rois=np.array([ + [ 1.1, 0.73, 2.2, 1.13]], dtype='f') sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/roialign_half_pixel_test.onnx') res = sess.run(['y'], {'x': x, 'rois': rois, - 'batch_ind': [0, 1]}) + # 'batch_ind': [0, 1]}) + 'batch_ind': [0]}) print(res) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index c36b7f9b501..06ec14c0016 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -114,20 +114,25 @@ struct roialign { std::vector results(bin_grid_size[0] * bin_grid_size[1] * output_height * output_width); - +std::vector temp_lens = comp_s.lens(); +shape temp_s = {shape::float_type,{temp_lens[1], temp_lens[0], temp_lens[3], temp_lens[2] }}; shape_for_each(comp_s, [&](const auto& idx_v, size_t index) { // The p and i indexes correspond to nested looping parameters in ORT that go in y, x order. The i[x] value is least significant // and iterates the fastest. std::array p = {idx_v[1], idx_v[0]}; std::array i = {idx_v[3], idx_v[2]};// <== these are always the same -printf(" IIIII other index %lu , %lu , %lu , %lu i=%lu \n", p[0], p[1], i[0], i[1], index); +printf("\n IIIII other index %lu , %lu , %lu , %lu i=%lu temp_index = %lu \n", p[0], p[1], i[0], i[1], index, temp_s.index({p[0], p[1], i[0], i[1]})); +printf(" my index= %lu reverse temp=%lu\n ", comp_s.index({p[1], p[0], i[1], i[0]}), temp_s.index({p[1], p[0], i[1], i[0]})); +printf(" more index= %lu reverse ...=%lu\n ", comp_s.index({p[0], p[1], i[0], i[1]}), temp_s.index({p[0], p[1], i[0], i[1]})); // xy is scaled coordinates of start point of ROI std::array xy{}; // low, high are floor and ceiling of the xy value (i.e. the bounds of the pixel it lies inside) std::array low{}; std::array high{}; + // size_t adj_index = temp_s.index({p[1], p[0], i[1], i[0]}); + for(auto ii : range(p.size())) { // if(ii == 0) @@ -136,48 +141,54 @@ printf(" IIIII other index %lu , %lu , %lu , %lu i=%lu \n", p[0], p[1], i[0], i // printf("y: " ); // for width & height dimensions, // transform the roi start point to scaled coordinates -printf(" roi_start[ii] %f p[ii] %lu bin_size[ii] %f (i[ii] + .5f) %f bin_grid_size[ii] %lu \n", -roi_start[ii], p[ii], bin_size[ii], (i[ii] + .5f), bin_grid_size[ii] ); +// printf(" roi_start[ii] %f p[ii] %lu bin_size[ii] %f (i[ii] + .5f) %f bin_grid_size[ii] %lu \n", +// roi_start[ii], p[ii], bin_size[ii], (i[ii] + .5f), bin_grid_size[ii] ); xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; -printf(" QQQQQQ L137 x=%f y=%f\n", xy[0], xy[1]); +// printf(" QQQQQQ L137 x=%f y=%f ", xy[0], xy[1]); xy[ii] = (coord_trans_mode != "half_pixel") ? (xy[ii] - 0.5f) : xy[ii]; -// printf("L139 %f ", xy[ii]); +// printf(" L139 %f ", xy[ii]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { -// printf("L142 results = pos_weight \n "); - results[index] = pos_weight{}; +// printf(" L142 results = pos_weight i=%lu dims=%lu, %lu \n ", index, dims[0], dims[1]); + // results[adj_index] = pos_weight{}; // all zeroes + results[index] = pos_weight{}; // all zeroes return; } xy[ii] = std::max(xy[ii], 0.0f); -// printf("L148 %f ", xy[ii]); low[ii] = xy[ii]; high[ii] = low[ii] + 1; +// printf(" L148 %f low[ii] %lu, dims[ii] %lu", xy[ii], low[ii], dims[ii]); if(low[ii] >= dims[ii] - 1) { -// printf("L154 %f ", xy[ii]); xy[ii] = high[ii] = low[ii] = dims[ii] - 1; +// printf(" L154 %f ", xy[ii]); } -// printf("\n"); +// printf(" \n"); } - // printf(" JJJJJ xy[0]=%f xy[1] = %f dims[1]=%lu low%ld-%ld high %ld-%ld i=%zu\n\n", - // xy[0], xy[1], dims[1], low[0], low[1], high[0], high[1], index); - results[index].pos = {low[0] * dims[1] + low[1], - low[0] * dims[1] + high[1], - high[0] * dims[1] + low[1], - high[0] * dims[1] + high[1]}; + printf(" JJJJJ xy[0]=%f xy[1] = %f dims[1]=%lu low%ld-%ld high %ld-%ld i=%zu dims[0]=%lu \n\n", + xy[0], xy[1], dims[1], low[1], low[0], high[1], high[0], index, dims[0]); + results[index].pos = {low[1] * dims[0] + low[0], + low[1] * dims[0] + high[0], + high[1] * dims[0] + low[0], + high[1] * dims[0] + high[0]}; float lx = xy[0] - low[0]; float ly = xy[1] - low[1]; float hy = 1.0f - ly; float hx = 1.0f - lx; - // printf(" HHHHH partial pixel values, index=%lu ly=%f, lx=%f, hy=%f, hx=%f\n\n", index, ly, lx, hy, hx); + // printf(" HHHHH partial pixel values, index=%lu pci=%lu ly=%f, lx=%f, hy=%f, hx=%f\n\n", index, temp_s.index({p[1], p[0], i[1], i[0]}), + // ly, lx, hy, hx); // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; }); +printf(" AAAAA here we are\n"); + for(int iix = 0; iix < results.size(); iix++) + printf(" SSSSS %d %lu %lu %lu %lu %f %f %f %f\n", iix, results[iix].pos[0], results[iix].pos[1], results[iix].pos[2], results[iix].pos[3], + results[iix].w[0], results[iix].w[1], results[iix].w[2], results[iix].w[3]); return results; } @@ -236,7 +247,7 @@ printf(" QQQQQQ L137 x=%f y=%f\n", xy[0], xy[1]); std::array out_dims = {out_lens[2], out_lens[3]}; const auto& x_lens = args.at(0).get_shape().lens(); // input dims of height and width - std::array in_dims = {x_lens[2], x_lens[3]}; + std::array in_dims = {x_lens[3], x_lens[2]}; auto roi_s = args.at(1).get_shape(); visit_all(result, args.at(0), args.at(1))([&](auto output, auto x, auto roi) { diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 8f0e9656c51..f95feec6bc2 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -10133,8 +10133,8 @@ def roialign_test(): @onnx_test() def roialign_half_pixel_test(): x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 2, 4, 3]) - roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [2, 4]) - bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [2]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [1, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [1]) y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 2, 4, 3]) # half_pixel is the new mode we're developing for diff --git a/test/onnx/roialign_half_pixel_test.onnx b/test/onnx/roialign_half_pixel_test.onnx index b6ca215a9fd362ea9737b42897c1a291c8502e03..cf2d236317be872ab8ba51a28bb8d8c93c6591f5 100644 GIT binary patch delta 28 jcmaFC^nz)FCL<%`WGzM+HpV1rF2>4<|0GzPm;^)tZcqlN delta 28 jcmaFC^nz)FCL<%$WGzM+Hl`$LF2>4<|0GzPm;^)tZgK{x diff --git a/test/onnx/verify/roialign_half_pixel_verify_test.cpp b/test/onnx/verify/roialign_half_pixel_verify_test.cpp new file mode 100644 index 00000000000..03b7cd48ac5 --- /dev/null +++ b/test/onnx/verify/roialign_half_pixel_verify_test.cpp @@ -0,0 +1,120 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +// This passes its own test but doesn't match ort version of test +TEST_CASE(roialign_half_pixel_verify_test) +{ + migraphx::program p = read_onnx("roialign_half_pixel_test.onnx"); + p.compile(migraphx::make_target("ref")); + + migraphx::shape s{migraphx::shape::float_type, {2, 2, 4, 3}}; + std::vector data(2*2*4*3); + std::iota(data.begin(), data.end(), 0.f); + migraphx::parameter_map pp; + pp["x"] = migraphx::argument(s, data.data()); + pp["y"] = migraphx::argument(s, data.data()); // ? + + // migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; + // std::vector rois_data = {0.1, 0.15, 0.6, 0.35, + // 2.1, 1.73, 3.8, 2.13}; + // migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index + // std::vector bi_data = {0, 1}; + + migraphx::shape srois{migraphx::shape::float_type, {1, 4}}; + std::vector rois_data = { + 1.1, 0.73, 2.2, 1.13}; + migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index + std::vector bi_data = {0}; + + + pp["rois"] = migraphx::argument(srois, rois_data.data()); + pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); + pp["y"] = migraphx::argument(s, data.data()); + + auto result = p.eval(pp).back(); + std::vector result_vector; + result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); + +printf(" result: \n"); +for(int i = 0; i < result_vector.size(); i++) +{ + printf(" %f ", result_vector[i]); + if(i % 9 == 8) + printf("\n"); +} +printf("\n"); + + std::vector gold={ + 0.000000, 0.022222, 0.200000, 0.044444, 0.066667, 0.244444, 0.400000, 0.422222, 0.600000, 0.800000, + 0.822222, 1.000000, 1.200000, 1.222222, 1.400000, 12.000000, 12.022223, 12.200000, 12.044445, 12.066667, + 12.244445, 12.400000, 12.422222, 12.600000, 12.800000, 12.822222, 13.000000, 13.200000, 13.222222, 13.400000, + 0.911111, 3.200000, 6.200000, 1.911111, 4.200000, 7.200000, 2.829630, 5.022223, 8.022223, 2.000000, + 4.000000, 7.000000, 0.000000, 0.000000, 0.000000, 12.911111, 15.200000, 18.200001, 13.911111, 16.199999, + 19.200001, 14.829630, 17.022223, 20.022223, 14.000000, 16.000000, 19.000000, 0.000000, 0.000000, 0.000000 + }; + + EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); +} + + +// TEST_CASE(roialign_half_pixel_verify_test) +// { +// migraphx::program p = read_onnx("roialign_half_pixel_test.onnx"); +// p.compile(migraphx::make_target("ref")); + +// migraphx::shape s{migraphx::shape::float_type, {1, 1, 2, 3}}; +// std::vector data = {-5.5, 2.0, 100., 7.0, 0., -1.}; + +// migraphx::parameter_map pp; +// pp["x"] = migraphx::argument(s, data.data()); +// pp["y"] = migraphx::argument(s, data.data()); + +// // migraphx::shape sx{migraphx::shape::float_type, {10, 5, 4, 7}}; +// migraphx::shape srois{migraphx::shape::float_type, {1, 4}}; +// std::vector rois_data = {0.1, 0.15, 0.6, 0.35}; +// migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index +// std::vector bi_data = {0}; + +// pp["rois"] = migraphx::argument(srois, rois_data.data()); +// pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); + +// auto result = p.eval(pp).back(); +// std::vector result_vector; +// result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); + +// printf(" result: "); +// for(auto aa : result_vector) printf(" %f ", aa); +// printf("\n"); + +// std::vector gold(6); +// float alpha = 0.5; +// std::transform(data.begin(), data.end(), gold.begin(), [&](auto x) { +// return std::max(0.0f, x) + std::min(0.0f, alpha * std::expm1(x / alpha)); +// }); +// EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); +// } diff --git a/test/onnx/verify/roialign_verify_test.cpp b/test/onnx/verify/roialign_verify_test.cpp index b1c9b715af4..f2107280df0 100644 --- a/test/onnx/verify/roialign_verify_test.cpp +++ b/test/onnx/verify/roialign_verify_test.cpp @@ -39,11 +39,17 @@ TEST_CASE(roialign_verify_test) pp["x"] = migraphx::argument(s, data.data()); pp["y"] = migraphx::argument(s, data.data()); - migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; - std::vector rois_data = {0.1, 0.15, 0.6, 0.35, + // migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; + // std::vector rois_data = {0.1, 0.15, 0.6, 0.35, + // 2.1, 1.73, 3.8, 2.13}; + // migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index + // std::vector bi_data = {0, 1}; + + migraphx::shape srois{migraphx::shape::float_type, {1, 4}}; + std::vector rois_data = { 2.1, 1.73, 3.8, 2.13}; - migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index - std::vector bi_data = {0, 1}; + migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index + std::vector bi_data = {0}; pp["rois"] = migraphx::argument(srois, rois_data.data()); pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); From dbe18b552fa9fee276e4d090394c7289e8162593 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 25 Sep 2024 16:45:47 +0000 Subject: [PATCH 23/56] test cases 2 rois, fails --- ort_roialign.py | 12 +- test/onnx/gen_onnx.py | 6 +- test/onnx/roialign_half_pixel_test.onnx | Bin 360 -> 360 bytes .../roialign_half_pixel_verify_test.cpp | 111 +++++++++++++++--- 4 files changed, 102 insertions(+), 27 deletions(-) diff --git a/ort_roialign.py b/ort_roialign.py index db06f24c07d..296d69f20ab 100644 --- a/ort_roialign.py +++ b/ort_roialign.py @@ -13,16 +13,16 @@ y=np.ones([2, 2, 4, 7], dtype='f') -# rois=np.array([[0.1, 0.15, 0.6, 0.35], -# [0.1, 0.15, 2.6, 1.35]], dtype='f') +rois=np.array([[0.1, 0.15, 0.6, 0.35], + [0.1, 0.15, 2.6, 1.35]], dtype='f') -rois=np.array([ - [ 1.1, 0.73, 2.2, 1.13]], dtype='f') +# rois=np.array([ +# [ 1.1, 0.73, 2.2, 1.13]], dtype='f') sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/roialign_half_pixel_test.onnx') res = sess.run(['y'], {'x': x, 'rois': rois, - # 'batch_ind': [0, 1]}) - 'batch_ind': [0]}) + 'batch_ind': [0, 1]}) + # 'batch_ind': [0]}) print(res) diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index f95feec6bc2..997d8510c7a 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -10133,11 +10133,11 @@ def roialign_test(): @onnx_test() def roialign_half_pixel_test(): x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 2, 4, 3]) - roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [1, 4]) - bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [1]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [2, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [2]) y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 2, 4, 3]) - # half_pixel is the new mode we're developing for + # half_pixel is the newer mode for ROIAlign node = onnx.helper.make_node( 'RoiAlign', inputs=['x', 'rois', 'batch_ind'], diff --git a/test/onnx/roialign_half_pixel_test.onnx b/test/onnx/roialign_half_pixel_test.onnx index cf2d236317be872ab8ba51a28bb8d8c93c6591f5..b6ca215a9fd362ea9737b42897c1a291c8502e03 100644 GIT binary patch delta 28 jcmaFC^nz)FCL<%$WGzM+Hl`$LF2>4<|0GzPm;^)tZgK{x delta 28 jcmaFC^nz)FCL<%`WGzM+HpV1rF2>4<|0GzPm;^)tZcqlN diff --git a/test/onnx/verify/roialign_half_pixel_verify_test.cpp b/test/onnx/verify/roialign_half_pixel_verify_test.cpp index 03b7cd48ac5..6dcb45521c0 100644 --- a/test/onnx/verify/roialign_half_pixel_verify_test.cpp +++ b/test/onnx/verify/roialign_half_pixel_verify_test.cpp @@ -26,12 +26,10 @@ #include #include -// This passes its own test but doesn't match ort version of test TEST_CASE(roialign_half_pixel_verify_test) { migraphx::program p = read_onnx("roialign_half_pixel_test.onnx"); p.compile(migraphx::make_target("ref")); - migraphx::shape s{migraphx::shape::float_type, {2, 2, 4, 3}}; std::vector data(2*2*4*3); std::iota(data.begin(), data.end(), 0.f); @@ -39,17 +37,12 @@ TEST_CASE(roialign_half_pixel_verify_test) pp["x"] = migraphx::argument(s, data.data()); pp["y"] = migraphx::argument(s, data.data()); // ? - // migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; - // std::vector rois_data = {0.1, 0.15, 0.6, 0.35, - // 2.1, 1.73, 3.8, 2.13}; - // migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index - // std::vector bi_data = {0, 1}; - - migraphx::shape srois{migraphx::shape::float_type, {1, 4}}; + migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; std::vector rois_data = { + 0.1, 0.15, 0.6, 0.35, 1.1, 0.73, 2.2, 1.13}; - migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index - std::vector bi_data = {0}; + migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index + std::vector bi_data = {0, 1}; pp["rois"] = migraphx::argument(srois, rois_data.data()); @@ -70,13 +63,95 @@ for(int i = 0; i < result_vector.size(); i++) printf("\n"); std::vector gold={ - 0.000000, 0.022222, 0.200000, 0.044444, 0.066667, 0.244444, 0.400000, 0.422222, 0.600000, 0.800000, - 0.822222, 1.000000, 1.200000, 1.222222, 1.400000, 12.000000, 12.022223, 12.200000, 12.044445, 12.066667, - 12.244445, 12.400000, 12.422222, 12.600000, 12.800000, 12.822222, 13.000000, 13.200000, 13.222222, 13.400000, - 0.911111, 3.200000, 6.200000, 1.911111, 4.200000, 7.200000, 2.829630, 5.022223, 8.022223, 2.000000, - 4.000000, 7.000000, 0.000000, 0.000000, 0.000000, 12.911111, 15.200000, 18.200001, 13.911111, 16.199999, - 19.200001, 14.829630, 17.022223, 20.022223, 14.000000, 16.000000, 19.000000, 0.000000, 0.000000, 0.000000 - }; + 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, + 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, + 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, + 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, + 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, + 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, + 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, + 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, + 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, + 1.90476179e-02, 1.90476179e-02, 2.39858869e-02, + 1.07936502e-01, 2.19047621e-01, 3.30158740e-01, + 4.41269815e-01, 5.52380979e-01, 6.63492084e-01, + 1.71428561e-01, 1.71428561e-01, 1.76366836e-01, + 2.60317445e-01, 3.71428549e-01, 4.82539713e-01, + 5.93650818e-01, 7.04761863e-01, 8.15872967e-01, + 3.42857152e-01, 3.42857152e-01, 3.47795397e-01, + 4.31746036e-01, 5.42857111e-01, 6.53968275e-01, + 7.65079260e-01, 8.76190484e-01, 9.87301588e-01, + 5.14285743e-01, 5.14285743e-01, 5.19223928e-01, + 6.03174567e-01, 7.14285672e-01, 8.25396836e-01, + 9.36507940e-01, 1.04761910e+00, 1.15873003e+00, + + 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, + 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, + 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, + 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, + 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, + 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, + 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, + 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, + 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, + 1.20190477e+01, 1.20190477e+01, 1.20239868e+01, + 1.21079369e+01, 1.22190475e+01, 1.23301582e+01, + 1.24412699e+01, 1.25523796e+01, 1.26634922e+01, + 1.21714277e+01, 1.21714277e+01, 1.21763659e+01, + 1.22603178e+01, 1.23714285e+01, 1.24825401e+01, + 1.25936518e+01, 1.27047615e+01, 1.28158722e+01, + 1.23428583e+01, 1.23428583e+01, 1.23477964e+01, + 1.24317465e+01, 1.25428581e+01, 1.26539688e+01, + 1.27650795e+01, 1.28761902e+01, 1.29873009e+01, + 1.25142860e+01, 1.25142860e+01, 1.25192232e+01, + 1.26031752e+01, 1.27142859e+01, 1.28253975e+01, + 1.29365072e+01, 1.30476189e+01, 1.31587305e+01, + + + 2.41400356e+01, 2.46190472e+01, 2.51746025e+01, + 2.57301579e+01, 2.60857143e+01, 2.60857143e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 2.49971752e+01, 2.54761906e+01, 2.60317459e+01, + 2.65873032e+01, 2.69428539e+01, 2.69428539e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 2.60257492e+01, 2.65047607e+01, 2.70603180e+01, + 2.76158714e+01, 2.79714279e+01, 2.79714279e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 2.70543232e+01, 2.75333328e+01, 2.80888901e+01, + 2.86444473e+01, 2.90000038e+01, 2.90000038e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 2.80828934e+01, 2.85619030e+01, 2.91174583e+01, + 2.96730137e+01, 3.00285721e+01, 3.00285721e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 2.91114635e+01, 2.95904770e+01, 3.01460342e+01, + 3.07015896e+01, 3.10571423e+01, 3.10571423e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 3.01400356e+01, 3.06190453e+01, 3.11746006e+01, + 3.17301598e+01, 3.20857124e+01, 3.20857124e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + + 3.61400337e+01, 3.66190453e+01, 3.71746063e+01, + 3.77301559e+01, 3.80857124e+01, 3.80857124e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 3.69971771e+01, 3.74761848e+01, 3.80317497e+01, + 3.85872993e+01, 3.89428558e+01, 3.89428558e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 3.80257492e+01, 3.85047646e+01, 3.90603180e+01, + 3.96158714e+01, 3.99714279e+01, 3.99714279e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 3.90543251e+01, 3.95333328e+01, 4.00888863e+01, + 4.06444435e+01, 4.10000038e+01, 4.10000038e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 4.00828934e+01, 4.05619049e+01, 4.11174622e+01, + 4.16730156e+01, 4.20285721e+01, 4.20285721e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 4.11114655e+01, 4.15904732e+01, 4.21460304e+01, + 4.27015839e+01, 4.30571404e+01, 4.30571404e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 4.21400299e+01, 4.26190529e+01, 4.31746025e+01, + 4.37301636e+01, 4.40857201e+01, 4.40857201e+01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00 + }; EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); } From 4cb582e2522b5b61cae8d8d8c005e4b24b05ebe2 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 25 Sep 2024 18:28:19 +0000 Subject: [PATCH 24/56] created out of bounds test for roialign. Learned that existing code give correct result only for ROI in bounds. --- ort_roialign.py | 2 +- .../roialign_half_pixel_verify_test.cpp | 213 ++++++++---------- 2 files changed, 96 insertions(+), 119 deletions(-) diff --git a/ort_roialign.py b/ort_roialign.py index 296d69f20ab..cf02b431da4 100644 --- a/ort_roialign.py +++ b/ort_roialign.py @@ -14,7 +14,7 @@ y=np.ones([2, 2, 4, 7], dtype='f') rois=np.array([[0.1, 0.15, 0.6, 0.35], - [0.1, 0.15, 2.6, 1.35]], dtype='f') + [1.1, 0.73, 1.9, 1.13]], dtype='f') # rois=np.array([ # [ 1.1, 0.73, 2.2, 1.13]], dtype='f') diff --git a/test/onnx/verify/roialign_half_pixel_verify_test.cpp b/test/onnx/verify/roialign_half_pixel_verify_test.cpp index 6dcb45521c0..417988277b7 100644 --- a/test/onnx/verify/roialign_half_pixel_verify_test.cpp +++ b/test/onnx/verify/roialign_half_pixel_verify_test.cpp @@ -40,7 +40,7 @@ TEST_CASE(roialign_half_pixel_verify_test) migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; std::vector rois_data = { 0.1, 0.15, 0.6, 0.35, - 1.1, 0.73, 2.2, 1.13}; + 1.1, 0.73, 1.9, 1.13}; migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index std::vector bi_data = {0, 1}; @@ -63,133 +63,110 @@ for(int i = 0; i < result_vector.size(); i++) printf("\n"); std::vector gold={ - 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, - 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, - 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, - 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, - 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, - 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, - 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, - 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, - 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, - 1.90476179e-02, 1.90476179e-02, 2.39858869e-02, - 1.07936502e-01, 2.19047621e-01, 3.30158740e-01, - 4.41269815e-01, 5.52380979e-01, 6.63492084e-01, - 1.71428561e-01, 1.71428561e-01, 1.76366836e-01, - 2.60317445e-01, 3.71428549e-01, 4.82539713e-01, - 5.93650818e-01, 7.04761863e-01, 8.15872967e-01, - 3.42857152e-01, 3.42857152e-01, 3.47795397e-01, - 4.31746036e-01, 5.42857111e-01, 6.53968275e-01, - 7.65079260e-01, 8.76190484e-01, 9.87301588e-01, - 5.14285743e-01, 5.14285743e-01, 5.19223928e-01, - 6.03174567e-01, 7.14285672e-01, 8.25396836e-01, - 9.36507940e-01, 1.04761910e+00, 1.15873003e+00, - - 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, - 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, - 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, - 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, - 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, - 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, - 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, - 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, - 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, - 1.20190477e+01, 1.20190477e+01, 1.20239868e+01, - 1.21079369e+01, 1.22190475e+01, 1.23301582e+01, - 1.24412699e+01, 1.25523796e+01, 1.26634922e+01, - 1.21714277e+01, 1.21714277e+01, 1.21763659e+01, - 1.22603178e+01, 1.23714285e+01, 1.24825401e+01, - 1.25936518e+01, 1.27047615e+01, 1.28158722e+01, - 1.23428583e+01, 1.23428583e+01, 1.23477964e+01, - 1.24317465e+01, 1.25428581e+01, 1.26539688e+01, - 1.27650795e+01, 1.28761902e+01, 1.29873009e+01, - 1.25142860e+01, 1.25142860e+01, 1.25192232e+01, - 1.26031752e+01, 1.27142859e+01, 1.28253975e+01, - 1.29365072e+01, 1.30476189e+01, 1.31587305e+01, - - - 2.41400356e+01, 2.46190472e+01, 2.51746025e+01, - 2.57301579e+01, 2.60857143e+01, 2.60857143e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 2.49971752e+01, 2.54761906e+01, 2.60317459e+01, - 2.65873032e+01, 2.69428539e+01, 2.69428539e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 2.60257492e+01, 2.65047607e+01, 2.70603180e+01, - 2.76158714e+01, 2.79714279e+01, 2.79714279e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 2.70543232e+01, 2.75333328e+01, 2.80888901e+01, - 2.86444473e+01, 2.90000038e+01, 2.90000038e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 2.80828934e+01, 2.85619030e+01, 2.91174583e+01, - 2.96730137e+01, 3.00285721e+01, 3.00285721e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 2.91114635e+01, 2.95904770e+01, 3.01460342e+01, - 3.07015896e+01, 3.10571423e+01, 3.10571423e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 3.01400356e+01, 3.06190453e+01, 3.11746006e+01, - 3.17301598e+01, 3.20857124e+01, 3.20857124e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - - 3.61400337e+01, 3.66190453e+01, 3.71746063e+01, - 3.77301559e+01, 3.80857124e+01, 3.80857124e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 3.69971771e+01, 3.74761848e+01, 3.80317497e+01, - 3.85872993e+01, 3.89428558e+01, 3.89428558e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 3.80257492e+01, 3.85047646e+01, 3.90603180e+01, - 3.96158714e+01, 3.99714279e+01, 3.99714279e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 3.90543251e+01, 3.95333328e+01, 4.00888863e+01, - 4.06444435e+01, 4.10000038e+01, 4.10000038e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 4.00828934e+01, 4.05619049e+01, 4.11174622e+01, - 4.16730156e+01, 4.20285721e+01, 4.20285721e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 4.11114655e+01, 4.15904732e+01, 4.21460304e+01, - 4.27015839e+01, 4.30571404e+01, 4.30571404e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 4.21400299e+01, 4.26190529e+01, 4.31746025e+01, - 4.37301636e+01, 4.40857201e+01, 4.40857201e+01, - 0.00000000e+00, 0.00000000e+00, 0.00000000e+00 + 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, + 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, + 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, + 0.019048, 0.019048, 0.023986, 0.107937, 0.219048, 0.330159, 0.441270, 0.552381, 0.663492, + 0.171429, 0.171429, 0.176367, 0.260317, 0.371429, 0.482540, 0.593651, 0.704762, 0.815873, + 0.342857, 0.342857, 0.347795, 0.431746, 0.542857, 0.653968, 0.765079, 0.876190, 0.987302, + 0.514286, 0.514286, 0.519224, 0.603175, 0.714286, 0.825397, 0.936508, 1.047619, 1.158730, + 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, + 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, + 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, + 12.019048, 12.019048, 12.023986, 12.107937, 12.219048, 12.330158, 12.441270, 12.552382, 12.663492, + 12.171429, 12.171429, 12.176367, 12.260318, 12.371428, 12.482540, 12.593651, 12.704762, 12.815873, + 12.342857, 12.342857, 12.347795, 12.431746, 12.542857, 12.653969, 12.765079, 12.876190, 12.987302, + 12.514286, 12.514286, 12.519224, 12.603174, 12.714286, 12.825397, 12.936508, 13.047619, 13.158731, + 4.840318, 5.009453, 5.051429, 5.051429, 5.051429, 5.051429, 5.051429, 1.683810, 0.000000, + 5.183175, 5.352311, 5.394286, 5.394286, 5.394286, 5.394286, 5.394286, 1.798095, 0.000000, + 5.526032, 5.695168, 5.737143, 5.737143, 5.737143, 5.737143, 5.737143, 1.912381, 0.000000, + 5.868889, 6.038025, 6.080000, 6.080000, 6.080000, 6.080000, 6.080000, 2.026667, 0.000000, + 6.211746, 6.380882, 6.422857, 6.422857, 6.422857, 6.422857, 6.422857, 2.140952, 0.000000, + 6.554603, 6.723739, 6.765714, 6.765714, 6.765714, 6.765714, 6.765714, 2.255238, 0.000000, + 6.897460, 7.066596, 7.108572, 7.108572, 7.108572, 7.108572, 7.108572, 2.369524, 0.000000, + 16.840317, 17.009453, 17.051428, 17.051428, 17.051428, 17.051428, 17.051428, 5.683809, 0.000000, + 17.183174, 17.352310, 17.394285, 17.394285, 17.394285, 17.394285, 17.394285, 5.798095, 0.000000, + 17.526031, 17.695168, 17.737143, 17.737143, 17.737143, 17.737143, 17.737143, 5.912381, 0.000000, + 17.868889, 18.038025, 18.080000, 18.080000, 18.080000, 18.080000, 18.080000, 6.026667, 0.000000, + 18.211746, 18.380882, 18.422857, 18.422857, 18.422857, 18.422857, 18.422857, 6.140953, 0.000000, + 18.554604, 18.723740, 18.765715, 18.765715, 18.765715, 18.765715, 18.765715, 6.255238, 0.000000, + 18.897461, 19.066597, 19.108572, 19.108572, 19.108572, 19.108572, 19.108572, 6.369524, 0.000000 }; EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); } -// TEST_CASE(roialign_half_pixel_verify_test) -// { -// migraphx::program p = read_onnx("roialign_half_pixel_test.onnx"); -// p.compile(migraphx::make_target("ref")); +TEST_CASE(roialign_half_pixel_oob_verify_test) +{ + // One ROI extends outside of bounds of input array, + // when scaled by spatial_scale + migraphx::program p = read_onnx("roialign_half_pixel_test.onnx"); + p.compile(migraphx::make_target("ref")); + migraphx::shape s{migraphx::shape::float_type, {2, 2, 4, 3}}; + std::vector data(2*2*4*3); + std::iota(data.begin(), data.end(), 0.f); + migraphx::parameter_map pp; + pp["x"] = migraphx::argument(s, data.data()); + pp["y"] = migraphx::argument(s, data.data()); // ? -// migraphx::shape s{migraphx::shape::float_type, {1, 1, 2, 3}}; -// std::vector data = {-5.5, 2.0, 100., 7.0, 0., -1.}; + migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; + std::vector rois_data = { + 0.1, 0.15, 0.6, 0.35, + 1.1, 0.73, 2.5, 1.13}; + migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index + std::vector bi_data = {0, 1}; + + + pp["rois"] = migraphx::argument(srois, rois_data.data()); + pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); + pp["y"] = migraphx::argument(s, data.data()); -// migraphx::parameter_map pp; -// pp["x"] = migraphx::argument(s, data.data()); -// pp["y"] = migraphx::argument(s, data.data()); + auto result = p.eval(pp).back(); + std::vector result_vector; + result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); -// // migraphx::shape sx{migraphx::shape::float_type, {10, 5, 4, 7}}; -// migraphx::shape srois{migraphx::shape::float_type, {1, 4}}; -// std::vector rois_data = {0.1, 0.15, 0.6, 0.35}; -// migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index -// std::vector bi_data = {0}; +printf(" result: \n"); +for(int i = 0; i < result_vector.size(); i++) +{ + printf(" %f ", result_vector[i]); + if(i % 9 == 8) + printf("\n"); +} +printf("\n"); -// pp["rois"] = migraphx::argument(srois, rois_data.data()); -// pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); + std::vector gold={ + 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, + 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, + 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, + 0.019048, 0.019048, 0.023986, 0.107937, 0.219048, 0.330159, 0.441270, 0.552381, 0.663492, + 0.171429, 0.171429, 0.176367, 0.260317, 0.371429, 0.482540, 0.593651, 0.704762, 0.815873, + 0.342857, 0.342857, 0.347795, 0.431746, 0.542857, 0.653968, 0.765079, 0.876190, 0.987302, + 0.514286, 0.514286, 0.519224, 0.603175, 0.714286, 0.825397, 0.936508, 1.047619, 1.158730, + 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, + 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, + 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, + 12.019048, 12.019048, 12.023986, 12.107937, 12.219048, 12.330158, 12.441270, 12.552382, 12.663492, + 12.171429, 12.171429, 12.176367, 12.260318, 12.371428, 12.482540, 12.593651, 12.704762, 12.815873, + 12.342857, 12.342857, 12.347795, 12.431746, 12.542857, 12.653969, 12.765079, 12.876190, 12.987302, + 12.514286, 12.514286, 12.519224, 12.603174, 12.714286, 12.825397, 12.936508, 13.047619, 13.158731, + 4.840318, 5.009453, 5.051429, 5.051429, 5.051429, 5.051429, 5.051429, 1.683810, 0.000000, + 5.183175, 5.352311, 5.394286, 5.394286, 5.394286, 5.394286, 5.394286, 1.798095, 0.000000, + 5.526032, 5.695168, 5.737143, 5.737143, 5.737143, 5.737143, 5.737143, 1.912381, 0.000000, + 5.868889, 6.038025, 6.080000, 6.080000, 6.080000, 6.080000, 6.080000, 2.026667, 0.000000, + 6.211746, 6.380882, 6.422857, 6.422857, 6.422857, 6.422857, 6.422857, 2.140952, 0.000000, + 6.554603, 6.723739, 6.765714, 6.765714, 6.765714, 6.765714, 6.765714, 2.255238, 0.000000, + 6.897460, 7.066596, 7.108572, 7.108572, 7.108572, 7.108572, 7.108572, 2.369524, 0.000000, + 16.840317, 17.009453, 17.051428, 17.051428, 17.051428, 17.051428, 17.051428, 5.683809, 0.000000, + 17.183174, 17.352310, 17.394285, 17.394285, 17.394285, 17.394285, 17.394285, 5.798095, 0.000000, + 17.526031, 17.695168, 17.737143, 17.737143, 17.737143, 17.737143, 17.737143, 5.912381, 0.000000, + 17.868889, 18.038025, 18.080000, 18.080000, 18.080000, 18.080000, 18.080000, 6.026667, 0.000000, + 18.211746, 18.380882, 18.422857, 18.422857, 18.422857, 18.422857, 18.422857, 6.140953, 0.000000, + 18.554604, 18.723740, 18.765715, 18.765715, 18.765715, 18.765715, 18.765715, 6.255238, 0.000000, + 18.897461, 19.066597, 19.108572, 19.108572, 19.108572, 19.108572, 19.108572, 6.369524, 0.000000 + }; -// auto result = p.eval(pp).back(); -// std::vector result_vector; -// result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); + EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); +} -// printf(" result: "); -// for(auto aa : result_vector) printf(" %f ", aa); -// printf("\n"); -// std::vector gold(6); -// float alpha = 0.5; -// std::transform(data.begin(), data.end(), gold.begin(), [&](auto x) { -// return std::max(0.0f, x) + std::min(0.0f, alpha * std::expm1(x / alpha)); -// }); -// EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); -// } From 0469b83ef13efec8078a4fff549aec73f3fbd9f9 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 2 Oct 2024 19:00:40 +0000 Subject: [PATCH 25/56] work in progress --- ort_roialign.py | 57 +++----- src/include/migraphx/op/roialign.hpp | 62 ++++++--- test/onnx/gen_onnx.py | 57 ++++---- test/onnx/parse/roialign_test.cpp | 4 +- test/onnx/roialign_half_pixel_test.onnx | Bin 360 -> 360 bytes test/onnx/roialign_test.onnx | Bin 345 -> 345 bytes .../roialign_half_pixel_verify_test.cpp | 127 +++--------------- test/op_shape_test.cpp | 11 ++ 8 files changed, 121 insertions(+), 197 deletions(-) diff --git a/ort_roialign.py b/ort_roialign.py index cf02b431da4..6a7a3ee3dce 100644 --- a/ort_roialign.py +++ b/ort_roialign.py @@ -11,52 +11,27 @@ x = np.array(np.arange(2*2*4*3), dtype='f') x = np.reshape(x, [2, 2, 4, 3]) -y=np.ones([2, 2, 4, 7], dtype='f') - -rois=np.array([[0.1, 0.15, 0.6, 0.35], - [1.1, 0.73, 1.9, 1.13]], dtype='f') +y=np.ones([2, 2, 4, 3], dtype='f') + +# matches roialign_half_pixel_verify_test +# rois=np.array([[0.1, 0.15, 0.6, 0.35], +# [1.1, 0.73, 1.9, 1.13]], dtype='f') +# matches roialign_half_pixel_oob_verify_test +rois=np.array([ + [1.1, 0.73, 1.7, 1.13], + [1.1, 0.73, 2.6, 1.13] + # [1.1, 0.73, 2.6, 1.13] + ], dtype='f') # rois=np.array([ # [ 1.1, 0.73, 2.2, 1.13]], dtype='f') sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/roialign_half_pixel_test.onnx') +# sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/roialign_one_roi_asdf_test.onnx') res = sess.run(['y'], {'x': x, 'rois': rois, 'batch_ind': [0, 1]}) - # 'batch_ind': [0]}) -print(res) + # 'batch_ind': [0]}) +print(' ORT test model is roialign_one_roi_asdf_test.onnx, rois_data is \n',rois, + ' result is \n', res) - -# model_file = "test/onnx/roialign_test.onnx" -# onnx_model = onnx.load(model_file) -# onnx.checker.check_model(onnx_model) - - -# #define the priority order for the execution providers -# EP_list = ['CPUExecutionProvider'] - -# aa = np.asarray(np.arange(3*2*4*5), dtype='f') -# # bi = np.reshape(aa, [3, 2, 4, 5]) - -# # initialize the model.onnx -# sess = rt.InferenceSession(model_file, providers=EP_list) -# x, rois, batch_ind = (np.reshape(aa, [3, 2, 4, 5]), -# np.array([[0.1, 0.15, 0.6, 0.35], -# [2.1, 1.73, 3.8, 2.13]], dtype='f'), -# np.array([0, 1], dtype='int64')) - -# # Use the parameter names defined in the onnx file -# output = sess.run(None, {'x': x, -# 'rois': rois, -# 'batch_ind': batch_ind, -# }) - -# print(' output is ', output) - - -# # get the outputs metadata as a list of :class:`onnxruntime.NodeArg` -# output_name = sess.get_outputs()[0].name - -# # get the inputs metadata as a list of :class:`onnxruntime.NodeArg` -# input_name = sess.get_inputs()[0].name -# print("Names are ",input_name, output_name) - + \ No newline at end of file diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 06ec14c0016..ca8a2db1e3d 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -74,6 +74,11 @@ struct roialign auto type = inputs.at(0).type(); // check input correct + if(inputs.at(0).type() != shape::float_type or inputs.at(1).type() != shape::float_type or inputs.at(2).type() != shape::int64_type) + { + MIGRAPHX_THROW("ROIALIGN: incorrect type for input 1 or 2 or 3!"); + } + if(bi_lens.size() != 1) { MIGRAPHX_THROW("ROIALIGN: batch indices should be 1 dimension!"); @@ -122,9 +127,9 @@ shape temp_s = {shape::float_type,{temp_lens[1], temp_lens[0], temp_lens[3], tem // and iterates the fastest. std::array p = {idx_v[1], idx_v[0]}; std::array i = {idx_v[3], idx_v[2]};// <== these are always the same -printf("\n IIIII other index %lu , %lu , %lu , %lu i=%lu temp_index = %lu \n", p[0], p[1], i[0], i[1], index, temp_s.index({p[0], p[1], i[0], i[1]})); -printf(" my index= %lu reverse temp=%lu\n ", comp_s.index({p[1], p[0], i[1], i[0]}), temp_s.index({p[1], p[0], i[1], i[0]})); -printf(" more index= %lu reverse ...=%lu\n ", comp_s.index({p[0], p[1], i[0], i[1]}), temp_s.index({p[0], p[1], i[0], i[1]})); +// printf("\n IIIII other index %lu , %lu , %lu , %lu i=%lu temp_index = %lu \n", p[0], p[1], i[0], i[1], index, temp_s.index({p[0], p[1], i[0], i[1]})); +// printf(" my index= %lu reverse temp=%lu\n ", comp_s.index({p[1], p[0], i[1], i[0]}), temp_s.index({p[1], p[0], i[1], i[0]})); +// printf(" more index= %lu reverse ...=%lu\n ", comp_s.index({p[0], p[1], i[0], i[1]}), temp_s.index({p[0], p[1], i[0], i[1]})); // xy is scaled coordinates of start point of ROI std::array xy{}; // low, high are floor and ceiling of the xy value (i.e. the bounds of the pixel it lies inside) @@ -168,8 +173,8 @@ printf(" more index= %lu reverse ...=%lu\n ", comp_s.index({p[0], p[1], i[0], i } // printf(" \n"); } - printf(" JJJJJ xy[0]=%f xy[1] = %f dims[1]=%lu low%ld-%ld high %ld-%ld i=%zu dims[0]=%lu \n\n", - xy[0], xy[1], dims[1], low[1], low[0], high[1], high[0], index, dims[0]); + // printf(" JJJJJ xy[0]=%f xy[1] = %f dims[1]=%lu low%ld-%ld high %ld-%ld i=%zu dims[0]=%lu \n\n", + // xy[0], xy[1], dims[1], low[1], low[0], high[1], high[0], index, dims[0]); results[index].pos = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], @@ -183,12 +188,14 @@ printf(" more index= %lu reverse ...=%lu\n ", comp_s.index({p[0], p[1], i[0], i // ly, lx, hy, hx); // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; +// printf(" DDDDD index %d %f %f %f %f \n", pre_calc_index, +// float(pc.w1), float(pc.w2), float(pc.w3), float(pc.w4)); }); -printf(" AAAAA here we are\n"); - for(int iix = 0; iix < results.size(); iix++) - printf(" SSSSS %d %lu %lu %lu %lu %f %f %f %f\n", iix, results[iix].pos[0], results[iix].pos[1], results[iix].pos[2], results[iix].pos[3], - results[iix].w[0], results[iix].w[1], results[iix].w[2], results[iix].w[3]); +// // printf(" AAAAA here we are\n"); +// for(int iix = 0; iix < results.size(); iix++) +// printf(" SSSSS %d %lu %lu %lu %lu %f %f %f %f\n", iix, results[iix].pos[0], results[iix].pos[1], results[iix].pos[2], results[iix].pos[3], +// results[iix].w[0], results[iix].w[1], results[iix].w[2], results[iix].w[3]); return results; } @@ -211,11 +218,12 @@ printf(" AAAAA here we are\n"); double final(double x, std::size_t y) { return (y == 0) ? 0.0 : (x / y); } }; + // Calculate a pooling value for 1 block of bin_grid_size*bin_grid_size weights template - std::tuple calc_pooling(const T& data, + double calc_pooling(const T& data, const std::array& bin_grid_size, const std::vector& pos_weights, - int64_t index, + int64_t& index, Op op) const { double output_val = op.init(); @@ -223,17 +231,26 @@ printf(" AAAAA here we are\n"); dfor(bin_grid_size[0], bin_grid_size[1])([&](auto, auto) { const auto& pc = pos_weights[index]; std::array wv; + // printf(" WWWWW "); std::transform( pc.w.begin(), pc.w.end(), pc.pos.begin(), wv.begin(), [&](auto w, auto pos) { + + + +// std::cout << " YYYYY data starting at " << &(*(data)) ; +// printf(" %lu, %f->%f \n", pos, w, *(data + pos) * w); return *(data + pos) * w; }); + // for(double aa : wv) + // printf(" %d ", aa); + // printf("\n"); output_val = std::accumulate(wv.begin(), wv.end(), output_val, op); index += 1; }); output_val = op.final(output_val, count); - return {output_val, index}; + return output_val; } argument compute(const shape& output_shape, std::vector args) const @@ -274,7 +291,7 @@ printf(" AAAAA here we are\n"); roi_size[ii] = roi_ends[ii] - roi_starts[ii]; if(coord_trans_mode != "half_pixel") roi_size[ii] = std::max(roi_size[ii], 1.0f); -printf("\n KKKKK roi_size %f out_dims %lu \n", roi_size[ii] , out_dims[ii]); +// printf("\n KKKKK ii %ld roi_size %f roi_batch_ind %ld out_dims %lu \n", ii, roi_size[ii] , roi_batch_ind, out_dims[ii]); bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_grid_size[ii] = (sampling_ratio > 0) ? sampling_ratio @@ -292,8 +309,14 @@ printf("\n KKKKK roi_size %f out_dims %lu \n", roi_size[ii] , out_dims[ii]); std::vector comp_lens1 = {channels, out_dims[0], out_dims[1]}; shape comp_s1{migraphx::shape::float_type, comp_lens1}; std::vector vec_index(channels, 0); - shape_for_each(comp_s1, [&](const auto& idx) { - auto c = idx[0]; +// printf(" XXXXX %lu (bottom_data + %d * %ld + %ld) * %lu * %lu\n",// ORT does this for 2 channels, 2 ROI +// static_cast((roi_batch_ind * channels + 0) * +// in_dims[0] * in_dims[1]), +// int(roi_batch_ind), channels, (size_t)0, in_dims[0], in_dims[1]); // offset pointer to data for this ROI (4 total) + + // Iterate through each dimension in [channels, out_dims[1], out_dims[2]] + shape_for_each(comp_s1, [&](const auto& idx) { + auto c = idx[0]; // channel count auto ph = idx[1]; auto pw = idx[2]; @@ -301,7 +324,13 @@ printf("\n KKKKK roi_size %f out_dims %lu \n", roi_size[ii] , out_dims[ii]); bottom_data + static_cast((roi_batch_ind * channels + c) * in_dims[0] * in_dims[1]); double output_val; - std::tie(output_val, vec_index[c]) = +// printf(" UUUUU bottom_data %d %lu %lu pre_calc size=%lu vec_index %lu ", int(*offset_bottom_data), +// bin_grid_size[0], bin_grid_size[1], +// pre_calc.size(), vec_index[c]); + +// printf("cont. c=%ld ph = %ld pw = %ld n=%ld roi_batch_ind %ld\n", c, ph, pw, n, roi_batch_ind); + + output_val = (mode == migraphx::op::pooling_mode::average) ? this->calc_pooling(offset_bottom_data, bin_grid_size, @@ -313,6 +342,7 @@ printf("\n KKKKK roi_size %f out_dims %lu \n", roi_size[ii] , out_dims[ii]); pre_calc, vec_index[c], max_pool{}); +// printf(" TTTTT idx=%3ld output_val=%f\n", vec_index[c] % 9 - 1, output_val); output(n, c, ph, pw) = output_val; }); }); diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 997d8510c7a..3b7e4697556 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -10112,10 +10112,11 @@ def roialign_default_test(): @onnx_test() def roialign_test(): - x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 2, 4, 5]) - roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [2, 4]) - bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [2]) - y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 2, 4, 5]) + # Roialign with output_half_pixel mode is backward-compatible. + x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 5, 4, 7]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [8, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [8]) + y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [8, 4, 5, 5]) node = onnx.helper.make_node( 'RoiAlign', inputs=['x', 'rois', 'batch_ind'], @@ -10143,39 +10144,37 @@ def roialign_half_pixel_test(): inputs=['x', 'rois', 'batch_ind'], outputs=['y'], spatial_scale=2.0, - output_height=7, - output_width=9, - sampling_ratio=3, + output_height=2, + output_width=3, + sampling_ratio=2, mode="avg", coordinate_transformation_mode="half_pixel") return ([node], [x, roi, bi], [y]) +@onnx_test() +def roialign_half_pixel_roi_test(): + # Same as roialign_half_pixel_test but contains more ROIs than there + # are batch dimensions. + x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 1, 2, 3]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [2, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [2]) + y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 4, 2, 2]) + # half_pixel is the new mode we're developing for + node = onnx.helper.make_node( + 'RoiAlign', + inputs=['x', 'rois', 'batch_ind'], + outputs=['y'], + spatial_scale=2.0, + output_height=2, + output_width=3, + sampling_ratio=2, + mode="avg", + coordinate_transformation_mode="half_pixel") - - -# @onnx_test() -# def roialign_half_pixel_test(): -# x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 1, 2, 3]) -# roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [1, 4]) -# bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [1]) -# y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 4, 2, 2]) - -# # half_pixel is the new mode we're developing for -# node = onnx.helper.make_node( -# 'RoiAlign', -# inputs=['x', 'rois', 'batch_ind'], -# outputs=['y'], -# spatial_scale=2.0, -# output_height=5, -# output_width=5, -# sampling_ratio=3, -# mode="avg", -# coordinate_transformation_mode="half_pixel") - -# return ([node], [x, roi, bi], [y]) + return ([node], [x, roi, bi], [y]) @onnx_test() diff --git a/test/onnx/parse/roialign_test.cpp b/test/onnx/parse/roialign_test.cpp index 52bb8681d4d..05f27b6473c 100644 --- a/test/onnx/parse/roialign_test.cpp +++ b/test/onnx/parse/roialign_test.cpp @@ -26,7 +26,7 @@ TEST_CASE(roialign_test) { - migraphx::shape sx{migraphx::shape::float_type, {3, 2, 4, 5}}; + migraphx::shape sx{migraphx::shape::float_type, {10, 5, 4, 7}}; migraphx::shape srois{migraphx::shape::float_type, {8, 4}}; migraphx::shape sbi{migraphx::shape::int64_type, {8}}; @@ -41,7 +41,7 @@ TEST_CASE(roialign_test) {{"coordinate_transformation_mode", "output_half_pixel"}, {"spatial_scale", 2.0f}, {"output_height", 5}, - {"output_width", 3}, + {"output_width", 5}, {"sampling_ratio", 3}}), x, rois, diff --git a/test/onnx/roialign_half_pixel_test.onnx b/test/onnx/roialign_half_pixel_test.onnx index b6ca215a9fd362ea9737b42897c1a291c8502e03..76daf3d0c0df8de4fe13e5e0133da6c3bb1e4cce 100644 GIT binary patch delta 55 zcmaFC^nz)^G+m|zj7(a>Ts--uB?YA=@#UE*B^eUTAVEWsE delta 55 zcmaFC^nz)^G+p)uj7(a>Ts--uB?YA=@#UE*B^eT&AVEC^0UUqWsKaAwDi14n`pkklbWxMj0ihBxx?jN+B^W Q5umUDP#8s<6O(`_0QMURegFUf diff --git a/test/onnx/verify/roialign_half_pixel_verify_test.cpp b/test/onnx/verify/roialign_half_pixel_verify_test.cpp index 417988277b7..579106b7a52 100644 --- a/test/onnx/verify/roialign_half_pixel_verify_test.cpp +++ b/test/onnx/verify/roialign_half_pixel_verify_test.cpp @@ -26,6 +26,7 @@ #include #include +// The half_pixel mode for the ROIAlign op TEST_CASE(roialign_half_pixel_verify_test) { migraphx::program p = read_onnx("roialign_half_pixel_test.onnx"); @@ -35,88 +36,15 @@ TEST_CASE(roialign_half_pixel_verify_test) std::iota(data.begin(), data.end(), 0.f); migraphx::parameter_map pp; pp["x"] = migraphx::argument(s, data.data()); - pp["y"] = migraphx::argument(s, data.data()); // ? - - migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; - std::vector rois_data = { - 0.1, 0.15, 0.6, 0.35, - 1.1, 0.73, 1.9, 1.13}; - migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index - std::vector bi_data = {0, 1}; - - - pp["rois"] = migraphx::argument(srois, rois_data.data()); - pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); pp["y"] = migraphx::argument(s, data.data()); - auto result = p.eval(pp).back(); - std::vector result_vector; - result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); - -printf(" result: \n"); -for(int i = 0; i < result_vector.size(); i++) -{ - printf(" %f ", result_vector[i]); - if(i % 9 == 8) - printf("\n"); -} -printf("\n"); - - std::vector gold={ - 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, - 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, - 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, - 0.019048, 0.019048, 0.023986, 0.107937, 0.219048, 0.330159, 0.441270, 0.552381, 0.663492, - 0.171429, 0.171429, 0.176367, 0.260317, 0.371429, 0.482540, 0.593651, 0.704762, 0.815873, - 0.342857, 0.342857, 0.347795, 0.431746, 0.542857, 0.653968, 0.765079, 0.876190, 0.987302, - 0.514286, 0.514286, 0.519224, 0.603175, 0.714286, 0.825397, 0.936508, 1.047619, 1.158730, - 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, - 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, - 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, - 12.019048, 12.019048, 12.023986, 12.107937, 12.219048, 12.330158, 12.441270, 12.552382, 12.663492, - 12.171429, 12.171429, 12.176367, 12.260318, 12.371428, 12.482540, 12.593651, 12.704762, 12.815873, - 12.342857, 12.342857, 12.347795, 12.431746, 12.542857, 12.653969, 12.765079, 12.876190, 12.987302, - 12.514286, 12.514286, 12.519224, 12.603174, 12.714286, 12.825397, 12.936508, 13.047619, 13.158731, - 4.840318, 5.009453, 5.051429, 5.051429, 5.051429, 5.051429, 5.051429, 1.683810, 0.000000, - 5.183175, 5.352311, 5.394286, 5.394286, 5.394286, 5.394286, 5.394286, 1.798095, 0.000000, - 5.526032, 5.695168, 5.737143, 5.737143, 5.737143, 5.737143, 5.737143, 1.912381, 0.000000, - 5.868889, 6.038025, 6.080000, 6.080000, 6.080000, 6.080000, 6.080000, 2.026667, 0.000000, - 6.211746, 6.380882, 6.422857, 6.422857, 6.422857, 6.422857, 6.422857, 2.140952, 0.000000, - 6.554603, 6.723739, 6.765714, 6.765714, 6.765714, 6.765714, 6.765714, 2.255238, 0.000000, - 6.897460, 7.066596, 7.108572, 7.108572, 7.108572, 7.108572, 7.108572, 2.369524, 0.000000, - 16.840317, 17.009453, 17.051428, 17.051428, 17.051428, 17.051428, 17.051428, 5.683809, 0.000000, - 17.183174, 17.352310, 17.394285, 17.394285, 17.394285, 17.394285, 17.394285, 5.798095, 0.000000, - 17.526031, 17.695168, 17.737143, 17.737143, 17.737143, 17.737143, 17.737143, 5.912381, 0.000000, - 17.868889, 18.038025, 18.080000, 18.080000, 18.080000, 18.080000, 18.080000, 6.026667, 0.000000, - 18.211746, 18.380882, 18.422857, 18.422857, 18.422857, 18.422857, 18.422857, 6.140953, 0.000000, - 18.554604, 18.723740, 18.765715, 18.765715, 18.765715, 18.765715, 18.765715, 6.255238, 0.000000, - 18.897461, 19.066597, 19.108572, 19.108572, 19.108572, 19.108572, 19.108572, 6.369524, 0.000000 - }; - - EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); -} - - -TEST_CASE(roialign_half_pixel_oob_verify_test) -{ - // One ROI extends outside of bounds of input array, - // when scaled by spatial_scale - migraphx::program p = read_onnx("roialign_half_pixel_test.onnx"); - p.compile(migraphx::make_target("ref")); - migraphx::shape s{migraphx::shape::float_type, {2, 2, 4, 3}}; - std::vector data(2*2*4*3); - std::iota(data.begin(), data.end(), 0.f); - migraphx::parameter_map pp; - pp["x"] = migraphx::argument(s, data.data()); - pp["y"] = migraphx::argument(s, data.data()); // ? - migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; std::vector rois_data = { - 0.1, 0.15, 0.6, 0.35, - 1.1, 0.73, 2.5, 1.13}; + 1.1, 0.73, 1.7, 1.13, + 1.1, 0.73, 2.6, 1.13 + }; migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index - std::vector bi_data = {0, 1}; - + std::vector bi_data = {0, 1}; pp["rois"] = migraphx::argument(srois, rois_data.data()); pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); @@ -130,43 +58,24 @@ printf(" result: \n"); for(int i = 0; i < result_vector.size(); i++) { printf(" %f ", result_vector[i]); - if(i % 9 == 8) + if(i % 6 == 5) printf("\n"); } printf("\n"); - - std::vector gold={ - 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, - 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, - 0.000000, 0.000000, 0.004938, 0.088889, 0.200000, 0.311111, 0.422222, 0.533333, 0.644444, - 0.019048, 0.019048, 0.023986, 0.107937, 0.219048, 0.330159, 0.441270, 0.552381, 0.663492, - 0.171429, 0.171429, 0.176367, 0.260317, 0.371429, 0.482540, 0.593651, 0.704762, 0.815873, - 0.342857, 0.342857, 0.347795, 0.431746, 0.542857, 0.653968, 0.765079, 0.876190, 0.987302, - 0.514286, 0.514286, 0.519224, 0.603175, 0.714286, 0.825397, 0.936508, 1.047619, 1.158730, - 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, - 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, - 12.000000, 12.000000, 12.004938, 12.088889, 12.200000, 12.311111, 12.422222, 12.533334, 12.644444, - 12.019048, 12.019048, 12.023986, 12.107937, 12.219048, 12.330158, 12.441270, 12.552382, 12.663492, - 12.171429, 12.171429, 12.176367, 12.260318, 12.371428, 12.482540, 12.593651, 12.704762, 12.815873, - 12.342857, 12.342857, 12.347795, 12.431746, 12.542857, 12.653969, 12.765079, 12.876190, 12.987302, - 12.514286, 12.514286, 12.519224, 12.603174, 12.714286, 12.825397, 12.936508, 13.047619, 13.158731, - 4.840318, 5.009453, 5.051429, 5.051429, 5.051429, 5.051429, 5.051429, 1.683810, 0.000000, - 5.183175, 5.352311, 5.394286, 5.394286, 5.394286, 5.394286, 5.394286, 1.798095, 0.000000, - 5.526032, 5.695168, 5.737143, 5.737143, 5.737143, 5.737143, 5.737143, 1.912381, 0.000000, - 5.868889, 6.038025, 6.080000, 6.080000, 6.080000, 6.080000, 6.080000, 2.026667, 0.000000, - 6.211746, 6.380882, 6.422857, 6.422857, 6.422857, 6.422857, 6.422857, 2.140952, 0.000000, - 6.554603, 6.723739, 6.765714, 6.765714, 6.765714, 6.765714, 6.765714, 2.255238, 0.000000, - 6.897460, 7.066596, 7.108572, 7.108572, 7.108572, 7.108572, 7.108572, 2.369524, 0.000000, - 16.840317, 17.009453, 17.051428, 17.051428, 17.051428, 17.051428, 17.051428, 5.683809, 0.000000, - 17.183174, 17.352310, 17.394285, 17.394285, 17.394285, 17.394285, 17.394285, 5.798095, 0.000000, - 17.526031, 17.695168, 17.737143, 17.737143, 17.737143, 17.737143, 17.737143, 5.912381, 0.000000, - 17.868889, 18.038025, 18.080000, 18.080000, 18.080000, 18.080000, 18.080000, 6.026667, 0.000000, - 18.211746, 18.380882, 18.422857, 18.422857, 18.422857, 18.422857, 18.422857, 6.140953, 0.000000, - 18.554604, 18.723740, 18.765715, 18.765715, 18.765715, 18.765715, 18.765715, 6.255238, 0.000000, - 18.897461, 19.066597, 19.108572, 19.108572, 19.108572, 19.108572, 19.108572, 6.369524, 0.000000 - }; + // Gold values were generated with onnxruntime + std::vector gold={ + 5.38, 5.4799995, 5.4799995, + 6.58, 6.68, 6.68, + 17.38, 17.48, 17.48, + 18.58, 18.68, 18.68, + 29.454998, 14.74, 0., + 30.654999, 15.34, 0., + 41.455, 20.74, 0., + 42.655003, 21.34, 0. + }; EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); } + diff --git a/test/op_shape_test.cpp b/test/op_shape_test.cpp index 8d08455d814..2b7d38fd3db 100644 --- a/test/op_shape_test.cpp +++ b/test/op_shape_test.cpp @@ -5161,11 +5161,22 @@ TEST_CASE(roialign_test) migraphx::shape sbi2{migraphx::shape::int64_type, {3}}; throws_shape(migraphx::make_op("roialign"), sx, srois, sbi2); + migraphx::shape sbi_float{migraphx::shape::float_type, {2}}; + throws_shape(migraphx::make_op("roialign"), sx, srois, sbi_float); + migraphx::shape srois1{migraphx::shape::float_type, {2, 4, 3}}; throws_shape(migraphx::make_op("roialign"), sx, srois1, sbi); + // wrong data types migraphx::shape srois2{migraphx::shape::float_type, {2, 3}}; throws_shape(migraphx::make_op("roialign"), sx, srois2, sbi); + + migraphx::shape srois_int{migraphx::shape::int32_type, {2, 3}}; + throws_shape(migraphx::make_op("roialign"), sx, srois_int, sbi); + + migraphx::shape sx_int{migraphx::shape::int64_type, {3, 4, 5, 6}}; + throws_shape(migraphx::make_op("roialign"), sx_int, srois, sbi); + } TEST_CASE(test_concat) From 1837f1add5254c549fa513f4c4b4a1f71f9df30b Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 2 Oct 2024 21:55:45 +0000 Subject: [PATCH 26/56] clean up debug code and tests work in progress --- src/include/migraphx/op/roialign.hpp | 77 +++++----------------------- test/op_shape_test.cpp | 9 ++++ 2 files changed, 21 insertions(+), 65 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index ca8a2db1e3d..f3f54af8028 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -74,11 +74,12 @@ struct roialign auto type = inputs.at(0).type(); // check input correct - if(inputs.at(0).type() != shape::float_type or inputs.at(1).type() != shape::float_type or inputs.at(2).type() != shape::int64_type) - { - MIGRAPHX_THROW("ROIALIGN: incorrect type for input 1 or 2 or 3!"); - } - + if(shape::is_integral(inputs.at(0).type())) + MIGRAPHX_THROW("ROIALIGN: incorrect type for input 1!"); + if(shape::is_integral(inputs.at(1).type())) + MIGRAPHX_THROW("ROIALIGN: incorrect type for input 2!"); + if(shape::is_integral(inputs.at(2).type())) + MIGRAPHX_THROW("ROIALIGN: incorrect type for input 3!"); if(bi_lens.size() != 1) { MIGRAPHX_THROW("ROIALIGN: batch indices should be 1 dimension!"); @@ -119,62 +120,39 @@ struct roialign { std::vector results(bin_grid_size[0] * bin_grid_size[1] * output_height * output_width); -std::vector temp_lens = comp_s.lens(); -shape temp_s = {shape::float_type,{temp_lens[1], temp_lens[0], temp_lens[3], temp_lens[2] }}; + shape_for_each(comp_s, [&](const auto& idx_v, size_t index) { // The p and i indexes correspond to nested looping parameters in ORT that go in y, x order. The i[x] value is least significant // and iterates the fastest. std::array p = {idx_v[1], idx_v[0]}; std::array i = {idx_v[3], idx_v[2]};// <== these are always the same -// printf("\n IIIII other index %lu , %lu , %lu , %lu i=%lu temp_index = %lu \n", p[0], p[1], i[0], i[1], index, temp_s.index({p[0], p[1], i[0], i[1]})); -// printf(" my index= %lu reverse temp=%lu\n ", comp_s.index({p[1], p[0], i[1], i[0]}), temp_s.index({p[1], p[0], i[1], i[0]})); -// printf(" more index= %lu reverse ...=%lu\n ", comp_s.index({p[0], p[1], i[0], i[1]}), temp_s.index({p[0], p[1], i[0], i[1]})); // xy is scaled coordinates of start point of ROI std::array xy{}; // low, high are floor and ceiling of the xy value (i.e. the bounds of the pixel it lies inside) + // from which we will interpolate. std::array low{}; std::array high{}; - // size_t adj_index = temp_s.index({p[1], p[0], i[1], i[0]}); - for(auto ii : range(p.size())) { - // if(ii == 0) - // printf("x: " ); - // else - // printf("y: " ); - // for width & height dimensions, - // transform the roi start point to scaled coordinates -// printf(" roi_start[ii] %f p[ii] %lu bin_size[ii] %f (i[ii] + .5f) %f bin_grid_size[ii] %lu \n", -// roi_start[ii], p[ii], bin_size[ii], (i[ii] + .5f), bin_grid_size[ii] ); - xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; -// printf(" QQQQQQ L137 x=%f y=%f ", xy[0], xy[1]); xy[ii] = (coord_trans_mode != "half_pixel") ? (xy[ii] - 0.5f) : xy[ii]; -// printf(" L139 %f ", xy[ii]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { -// printf(" L142 results = pos_weight i=%lu dims=%lu, %lu \n ", index, dims[0], dims[1]); - // results[adj_index] = pos_weight{}; // all zeroes - results[index] = pos_weight{}; // all zeroes + results[index] = pos_weight{}; return; } xy[ii] = std::max(xy[ii], 0.0f); low[ii] = xy[ii]; high[ii] = low[ii] + 1; -// printf(" L148 %f low[ii] %lu, dims[ii] %lu", xy[ii], low[ii], dims[ii]); if(low[ii] >= dims[ii] - 1) { xy[ii] = high[ii] = low[ii] = dims[ii] - 1; -// printf(" L154 %f ", xy[ii]); } -// printf(" \n"); } - // printf(" JJJJJ xy[0]=%f xy[1] = %f dims[1]=%lu low%ld-%ld high %ld-%ld i=%zu dims[0]=%lu \n\n", - // xy[0], xy[1], dims[1], low[1], low[0], high[1], high[0], index, dims[0]); results[index].pos = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], @@ -184,19 +162,10 @@ shape temp_s = {shape::float_type,{temp_lens[1], temp_lens[0], temp_lens[3], tem float ly = xy[1] - low[1]; float hy = 1.0f - ly; float hx = 1.0f - lx; - // printf(" HHHHH partial pixel values, index=%lu pci=%lu ly=%f, lx=%f, hy=%f, hx=%f\n\n", index, temp_s.index({p[1], p[0], i[1], i[0]}), - // ly, lx, hy, hx); + // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; -// printf(" DDDDD index %d %f %f %f %f \n", pre_calc_index, -// float(pc.w1), float(pc.w2), float(pc.w3), float(pc.w4)); - }); -// // printf(" AAAAA here we are\n"); -// for(int iix = 0; iix < results.size(); iix++) -// printf(" SSSSS %d %lu %lu %lu %lu %f %f %f %f\n", iix, results[iix].pos[0], results[iix].pos[1], results[iix].pos[2], results[iix].pos[3], -// results[iix].w[0], results[iix].w[1], results[iix].w[2], results[iix].w[3]); - return results; } @@ -231,19 +200,10 @@ shape temp_s = {shape::float_type,{temp_lens[1], temp_lens[0], temp_lens[3], tem dfor(bin_grid_size[0], bin_grid_size[1])([&](auto, auto) { const auto& pc = pos_weights[index]; std::array wv; - // printf(" WWWWW "); std::transform( pc.w.begin(), pc.w.end(), pc.pos.begin(), wv.begin(), [&](auto w, auto pos) { - - - -// std::cout << " YYYYY data starting at " << &(*(data)) ; -// printf(" %lu, %f->%f \n", pos, w, *(data + pos) * w); return *(data + pos) * w; }); - // for(double aa : wv) - // printf(" %d ", aa); - // printf("\n"); output_val = std::accumulate(wv.begin(), wv.end(), output_val, op); index += 1; }); @@ -291,7 +251,6 @@ shape temp_s = {shape::float_type,{temp_lens[1], temp_lens[0], temp_lens[3], tem roi_size[ii] = roi_ends[ii] - roi_starts[ii]; if(coord_trans_mode != "half_pixel") roi_size[ii] = std::max(roi_size[ii], 1.0f); -// printf("\n KKKKK ii %ld roi_size %f roi_batch_ind %ld out_dims %lu \n", ii, roi_size[ii] , roi_batch_ind, out_dims[ii]); bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_grid_size[ii] = (sampling_ratio > 0) ? sampling_ratio @@ -309,13 +268,8 @@ shape temp_s = {shape::float_type,{temp_lens[1], temp_lens[0], temp_lens[3], tem std::vector comp_lens1 = {channels, out_dims[0], out_dims[1]}; shape comp_s1{migraphx::shape::float_type, comp_lens1}; std::vector vec_index(channels, 0); -// printf(" XXXXX %lu (bottom_data + %d * %ld + %ld) * %lu * %lu\n",// ORT does this for 2 channels, 2 ROI -// static_cast((roi_batch_ind * channels + 0) * -// in_dims[0] * in_dims[1]), -// int(roi_batch_ind), channels, (size_t)0, in_dims[0], in_dims[1]); // offset pointer to data for this ROI (4 total) - - // Iterate through each dimension in [channels, out_dims[1], out_dims[2]] - shape_for_each(comp_s1, [&](const auto& idx) { + + shape_for_each(comp_s1, [&](const auto& idx) { auto c = idx[0]; // channel count auto ph = idx[1]; auto pw = idx[2]; @@ -324,12 +278,6 @@ shape temp_s = {shape::float_type,{temp_lens[1], temp_lens[0], temp_lens[3], tem bottom_data + static_cast((roi_batch_ind * channels + c) * in_dims[0] * in_dims[1]); double output_val; -// printf(" UUUUU bottom_data %d %lu %lu pre_calc size=%lu vec_index %lu ", int(*offset_bottom_data), -// bin_grid_size[0], bin_grid_size[1], -// pre_calc.size(), vec_index[c]); - -// printf("cont. c=%ld ph = %ld pw = %ld n=%ld roi_batch_ind %ld\n", c, ph, pw, n, roi_batch_ind); - output_val = (mode == migraphx::op::pooling_mode::average) ? this->calc_pooling(offset_bottom_data, @@ -342,7 +290,6 @@ shape temp_s = {shape::float_type,{temp_lens[1], temp_lens[0], temp_lens[3], tem pre_calc, vec_index[c], max_pool{}); -// printf(" TTTTT idx=%3ld output_val=%f\n", vec_index[c] % 9 - 1, output_val); output(n, c, ph, pw) = output_val; }); }); diff --git a/test/op_shape_test.cpp b/test/op_shape_test.cpp index 2b7d38fd3db..6241b495ce2 100644 --- a/test/op_shape_test.cpp +++ b/test/op_shape_test.cpp @@ -5167,6 +5167,15 @@ TEST_CASE(roialign_test) migraphx::shape srois1{migraphx::shape::float_type, {2, 4, 3}}; throws_shape(migraphx::make_op("roialign"), sx, srois1, sbi); + // alternate data types + migraphx::shape sx_d{migraphx::shape::double_type, {3, 4, 5, 6}}; + migraphx::shape srois_d{migraphx::shape::double_type, {2, 4}}; + migraphx::shape sbi_int{migraphx::shape::int32_type, {2}}; + migraphx::shape sout_d{migraphx::shape::double_type, {2, 4, 1, 1}}; + // to do: debug why this commented-out test failed + // expect_shape(sout_d, migraphx::make_op("roialign"), sx_d, srois_d, sbi_int); + expect_shape(sout, migraphx::make_op("roialign"), sx_d, srois, sbi); + // wrong data types migraphx::shape srois2{migraphx::shape::float_type, {2, 3}}; throws_shape(migraphx::make_op("roialign"), sx, srois2, sbi); From 9196b2ea7e2afee80ff5ca6f9c407c9f1554b373 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 2 Oct 2024 22:25:34 +0000 Subject: [PATCH 27/56] fixed some tests/checks --- src/include/migraphx/op/roialign.hpp | 10 +++++----- test/op_shape_test.cpp | 16 +++++++--------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index f3f54af8028..1850131b52c 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -74,12 +74,12 @@ struct roialign auto type = inputs.at(0).type(); // check input correct - if(shape::is_integral(inputs.at(0).type())) - MIGRAPHX_THROW("ROIALIGN: incorrect type for input 1!"); + if(shape::is_integral(type)) + MIGRAPHX_THROW("ROIALIGN: incorrect type for input data! (should be non-integer)"); if(shape::is_integral(inputs.at(1).type())) - MIGRAPHX_THROW("ROIALIGN: incorrect type for input 2!"); - if(shape::is_integral(inputs.at(2).type())) - MIGRAPHX_THROW("ROIALIGN: incorrect type for input 3!"); + MIGRAPHX_THROW("ROIALIGN: incorrect data type for rois! (should be non-integer)"); + if(!shape::is_integral(inputs.at(2).type())) + MIGRAPHX_THROW("ROIALIGN: incorrect datatype for roi indices! (should be an integral type)"); if(bi_lens.size() != 1) { MIGRAPHX_THROW("ROIALIGN: batch indices should be 1 dimension!"); diff --git a/test/op_shape_test.cpp b/test/op_shape_test.cpp index 6241b495ce2..ecf4efa121a 100644 --- a/test/op_shape_test.cpp +++ b/test/op_shape_test.cpp @@ -5161,31 +5161,29 @@ TEST_CASE(roialign_test) migraphx::shape sbi2{migraphx::shape::int64_type, {3}}; throws_shape(migraphx::make_op("roialign"), sx, srois, sbi2); - migraphx::shape sbi_float{migraphx::shape::float_type, {2}}; - throws_shape(migraphx::make_op("roialign"), sx, srois, sbi_float); - migraphx::shape srois1{migraphx::shape::float_type, {2, 4, 3}}; throws_shape(migraphx::make_op("roialign"), sx, srois1, sbi); + migraphx::shape srois2{migraphx::shape::float_type, {2, 3}}; + throws_shape(migraphx::make_op("roialign"), sx, srois2, sbi); + // alternate data types migraphx::shape sx_d{migraphx::shape::double_type, {3, 4, 5, 6}}; migraphx::shape srois_d{migraphx::shape::double_type, {2, 4}}; migraphx::shape sbi_int{migraphx::shape::int32_type, {2}}; migraphx::shape sout_d{migraphx::shape::double_type, {2, 4, 1, 1}}; - // to do: debug why this commented-out test failed - // expect_shape(sout_d, migraphx::make_op("roialign"), sx_d, srois_d, sbi_int); - expect_shape(sout, migraphx::make_op("roialign"), sx_d, srois, sbi); + expect_shape(sout_d, migraphx::make_op("roialign"), sx_d, srois_d, sbi_int); // wrong data types - migraphx::shape srois2{migraphx::shape::float_type, {2, 3}}; - throws_shape(migraphx::make_op("roialign"), sx, srois2, sbi); - migraphx::shape srois_int{migraphx::shape::int32_type, {2, 3}}; throws_shape(migraphx::make_op("roialign"), sx, srois_int, sbi); migraphx::shape sx_int{migraphx::shape::int64_type, {3, 4, 5, 6}}; throws_shape(migraphx::make_op("roialign"), sx_int, srois, sbi); + migraphx::shape sbi_float{migraphx::shape::float_type, {2}}; + throws_shape(migraphx::make_op("roialign"), sx, srois, sbi_float); + } TEST_CASE(test_concat) From 8f348b5a3b76ffb2db56f8567763eaa4e6f89937 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 2 Oct 2024 23:05:02 +0000 Subject: [PATCH 28/56] revert accidental change --- src/include/migraphx/check_shapes.hpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/include/migraphx/check_shapes.hpp b/src/include/migraphx/check_shapes.hpp index 073b2fe31f0..cbffb758057 100644 --- a/src/include/migraphx/check_shapes.hpp +++ b/src/include/migraphx/check_shapes.hpp @@ -235,14 +235,11 @@ struct check_shapes } /*! - * Check all shapes have the same layout, with minor differences allowed. + * Check all shapes have the same layout. */ - const check_shapes& compatible_layout() const + const check_shapes& same_layout() const { - if(begin != end and this->any_of([&](const shape& s) { - return not migraphx::is_compatible_shape(s, *begin) and - find_permutation(s) != find_permutation(*begin); - })) + if(not this->same([](const shape& s) { return find_permutation(s); })) MIGRAPHX_THROW(prefix() + "Layouts do not match"); return *this; } @@ -335,7 +332,7 @@ struct check_shapes */ const check_shapes& not_broadcasted() const { - if(not this->all_of([](const shape& s) { return not s.broadcasted(); })) + if(not this->all_of([](const shape& s) { return s.standard() or not s.broadcasted(); })) MIGRAPHX_THROW(prefix() + "Shapes are broadcasted"); return *this; } From 4920232985df4bac771a42f560417bfba0075aa9 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 2 Oct 2024 23:12:38 +0000 Subject: [PATCH 29/56] revert unwanted changes --- src/include/migraphx/check_shapes.hpp | 2 +- src/include/migraphx/shape.hpp | 31 +++++---------------------- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/src/include/migraphx/check_shapes.hpp b/src/include/migraphx/check_shapes.hpp index cbffb758057..05118082ee8 100644 --- a/src/include/migraphx/check_shapes.hpp +++ b/src/include/migraphx/check_shapes.hpp @@ -332,7 +332,7 @@ struct check_shapes */ const check_shapes& not_broadcasted() const { - if(not this->all_of([](const shape& s) { return s.standard() or not s.broadcasted(); })) + if(not this->all_of([](const shape& s) { return not s.broadcasted(); })) MIGRAPHX_THROW(prefix() + "Shapes are broadcasted"); return *this; } diff --git a/src/include/migraphx/shape.hpp b/src/include/migraphx/shape.hpp index e7ff55dfcf1..290656f003d 100644 --- a/src/include/migraphx/shape.hpp +++ b/src/include/migraphx/shape.hpp @@ -62,8 +62,10 @@ struct MIGRAPHX_EXPORT shape m(int64_type, int64_t) \ m(uint32_type, uint32_t) \ m(uint64_type, uint64_t) \ - m(fp8e4m3fnuz_type, migraphx::fp8::fp8e4m3fnuz) - // clang-format on + m(fp8e4m3fnuz_type, migraphx::fp8::fp8e4m3fnuz) \ + m(fp8e4m3fn_type, migraphx::fp8::fp8e4m3fn) \ + m(fp8e5m2_type, migraphx::fp8::fp8e5m2) +// clang-format on #define MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES(x, t) x, enum type_t @@ -147,6 +149,7 @@ struct MIGRAPHX_EXPORT shape static std::string cpp_type(type_t t); static bool is_integral(type_t t); + static bool is_compatible(const shape& actual, const shape& expected); shape(); shape(type_t t); @@ -431,30 +434,6 @@ struct MIGRAPHX_EXPORT shape std::shared_ptr impl; }; -// "Almost identical" shapes. To support an MLIR feature, there is a limited -// case where shapes may both be standard but have non-identical strides. -static bool inline is_compatible_shape(const shape& actual, const shape& expected) -{ - // Check subshapes - if(expected.type() == shape::tuple_type) - return equal(actual.sub_shapes().begin(), - actual.sub_shapes().end(), - expected.sub_shapes().begin(), - &is_compatible_shape); - // Only the expected can be dynamic - if(expected.dynamic()) - return true; - if(actual == expected) - return true; - if(actual.type() != expected.type()) - return false; - // If both shapes are standard and lens match, they are considered compatible - // even if strides are different. - if(actual.standard() and expected.standard()) - return actual.lens() == expected.lens(); - return false; -} - /// Flatten subshapes to a single vector of non-tuple type of shapes MIGRAPHX_EXPORT std::vector flatten(const std::vector& shapes); From 61cc9a6d07b0e4fd25a66040df7398e72f908871 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 2 Oct 2024 23:18:02 +0000 Subject: [PATCH 30/56] revert unwanted changes --- src/shape.cpp | 24 +++++++++++++++++++ .../gpu/include/migraphx/gpu/convolution.hpp | 13 ++++++---- test/onnx/gen_onnx.py | 24 ------------------- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/shape.cpp b/src/shape.cpp index cfa3a1c2b43..657a131be70 100644 --- a/src/shape.cpp +++ b/src/shape.cpp @@ -267,6 +267,30 @@ bool shape::is_integral(shape::type_t t) return result; } +bool shape::is_compatible(const shape& actual, const shape& expected) +{ + // Check subshapes + if(expected.type() == shape::tuple_type) + return migraphx::equal(actual.sub_shapes(), expected.sub_shapes(), &is_compatible); + if(actual == expected) + return true; + if(actual.type() != expected.type()) + return false; + // Only the expected can be dynamic + if(expected.dynamic()) + return actual.ndim() == expected.ndim(); + if(actual.dynamic()) + return false; + if(actual.lens() != expected.lens()) + return false; + // Check strides from dimensions that are not 1 + return all_of(range(actual.lens().size()), [&](auto i) { + if(actual.lens()[i] == 1) + return true; + return actual.strides()[i] == expected.strides()[i]; + }); +} + shape::shape() : impl(shape_impl::default_shape()) {} shape::shape(type_t t) : impl(std::make_shared(t)) {} diff --git a/src/targets/gpu/include/migraphx/gpu/convolution.hpp b/src/targets/gpu/include/migraphx/gpu/convolution.hpp index 0738324af4a..1a6d1bc2497 100644 --- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp +++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp @@ -85,7 +85,7 @@ struct miopen_convolution check_shapes{conv_inputs, *this} .max_ndims(5) .packed_layouts({{0, 1, 2}, {0, 1, 2, 3}, {0, 2, 3, 1}, {0, 1, 2, 3, 4}}) - .compatible_layout(); + .same_layout(); return migraphx::compute_shape(op, conv_inputs); } @@ -180,6 +180,7 @@ struct miopen_convolution const auto& x_shape = inputs[0]; const auto& w_shape = inputs[1]; + unsigned long seed = 0; #ifdef MIGRAPHX_HAS_FIND_2_API { auto conv_problem = make_obj( @@ -192,8 +193,10 @@ struct miopen_convolution // MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6 preallocate = true; #endif - auto x = preallocate ? to_gpu(generate_argument(x_shape)) : argument{inputs[0]}; - auto w = preallocate ? to_gpu(generate_argument(w_shape)) : argument{inputs[1]}; + auto x = preallocate ? to_gpu(generate_argument(x_shape, seed++, random_mode::random)) + : argument{inputs[0]}; + auto w = preallocate ? to_gpu(generate_argument(w_shape, seed++, random_mode::random)) + : argument{inputs[1]}; auto y = preallocate ? allocate_gpu(output_shape) : argument{inputs[2]}; auto workspace = preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape); @@ -233,8 +236,8 @@ struct miopen_convolution return shape{shape::int8_type, {workspace_size}}; } #else - auto x = to_gpu(generate_argument(x_shape)); - auto w = to_gpu(generate_argument(w_shape)); + auto x = to_gpu(generate_argument(x_shape, seed++, random_mode::random)); + auto w = to_gpu(generate_argument(w_shape, seed++, random_mode::random)); auto y = allocate_gpu(output_shape); auto workspace = allocate_gpu(workspace_shape); int algo_count = 1; diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 3b7e4697556..f80b59c3cb2 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -10153,30 +10153,6 @@ def roialign_half_pixel_test(): return ([node], [x, roi, bi], [y]) -@onnx_test() -def roialign_half_pixel_roi_test(): - # Same as roialign_half_pixel_test but contains more ROIs than there - # are batch dimensions. - x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 1, 2, 3]) - roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [2, 4]) - bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [2]) - y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 4, 2, 2]) - - # half_pixel is the new mode we're developing for - node = onnx.helper.make_node( - 'RoiAlign', - inputs=['x', 'rois', 'batch_ind'], - outputs=['y'], - spatial_scale=2.0, - output_height=2, - output_width=3, - sampling_ratio=2, - mode="avg", - coordinate_transformation_mode="half_pixel") - - return ([node], [x, roi, bi], [y]) - - @onnx_test() def round_half_test(): x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [4, 4]) From ae12b10c52875c81af47d1caf707c191fa887c2c Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 3 Oct 2024 19:33:45 +0000 Subject: [PATCH 31/56] format --- ort_roialign.py | 33 +++++------- src/include/migraphx/op/roialign.hpp | 51 +++++++++--------- test/onnx/gen_onnx.py | 20 +++---- .../roialign_half_pixel_verify_test.cpp | 54 ++++++++----------- test/onnx/verify/roialign_verify_test.cpp | 43 ++++++++------- test/op_shape_test.cpp | 1 - 6 files changed, 95 insertions(+), 107 deletions(-) diff --git a/ort_roialign.py b/ort_roialign.py index 6a7a3ee3dce..70ee2b410d6 100644 --- a/ort_roialign.py +++ b/ort_roialign.py @@ -1,4 +1,3 @@ - # Not for release. This test script is for develop/test only import onnx @@ -7,31 +6,27 @@ import numpy as np print(" version: ", onnx.__version__, rt.__version__) - -x = np.array(np.arange(2*2*4*3), dtype='f') +x = np.array(np.arange(2 * 2 * 4 * 3), dtype='f') x = np.reshape(x, [2, 2, 4, 3]) -y=np.ones([2, 2, 4, 3], dtype='f') +y = np.ones([2, 2, 4, 3], dtype='f') # matches roialign_half_pixel_verify_test # rois=np.array([[0.1, 0.15, 0.6, 0.35], # [1.1, 0.73, 1.9, 1.13]], dtype='f') # matches roialign_half_pixel_oob_verify_test -rois=np.array([ - [1.1, 0.73, 1.7, 1.13], - [1.1, 0.73, 2.6, 1.13] - # [1.1, 0.73, 2.6, 1.13] - ], dtype='f') +rois = np.array( + [[1.1, 0.73, 1.7, 1.13], [1.1, 0.73, 2.6, 1.13] + # [1.1, 0.73, 2.6, 1.13] + ], + dtype='f') # rois=np.array([ # [ 1.1, 0.73, 2.2, 1.13]], dtype='f') -sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/roialign_half_pixel_test.onnx') -# sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/roialign_one_roi_asdf_test.onnx') -res = sess.run(['y'], {'x': x, - 'rois': rois, - 'batch_ind': [0, 1]}) - # 'batch_ind': [0]}) -print(' ORT test model is roialign_one_roi_asdf_test.onnx, rois_data is \n',rois, - ' result is \n', res) - - \ No newline at end of file +sess = rt.InferenceSession( + '/workspace/AMDMIGraphX/test/onnx/roialign_half_pixel_test.onnx') +# sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/roialign_one_roi_asdf_test.onnx') +res = sess.run(['y'], {'x': x, 'rois': rois, 'batch_ind': [0, 1]}) +# 'batch_ind': [0]}) +print(' ORT test model is roialign_one_roi_asdf_test.onnx, rois_data is \n', + rois, ' result is \n', res) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 1850131b52c..98f0bdcba9e 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -76,10 +76,11 @@ struct roialign // check input correct if(shape::is_integral(type)) MIGRAPHX_THROW("ROIALIGN: incorrect type for input data! (should be non-integer)"); - if(shape::is_integral(inputs.at(1).type())) + if(shape::is_integral(inputs.at(1).type())) MIGRAPHX_THROW("ROIALIGN: incorrect data type for rois! (should be non-integer)"); if(!shape::is_integral(inputs.at(2).type())) - MIGRAPHX_THROW("ROIALIGN: incorrect datatype for roi indices! (should be an integral type)"); + MIGRAPHX_THROW( + "ROIALIGN: incorrect datatype for roi indices! (should be an integral type)"); if(bi_lens.size() != 1) { MIGRAPHX_THROW("ROIALIGN: batch indices should be 1 dimension!"); @@ -122,15 +123,14 @@ struct roialign output_width); shape_for_each(comp_s, [&](const auto& idx_v, size_t index) { - - // The p and i indexes correspond to nested looping parameters in ORT that go in y, x order. The i[x] value is least significant - // and iterates the fastest. + // The p and i indexes correspond to nested looping parameters in ORT that go in y, x + // order. The i[x] value is least significant and iterates the fastest. std::array p = {idx_v[1], idx_v[0]}; - std::array i = {idx_v[3], idx_v[2]};// <== these are always the same + std::array i = {idx_v[3], idx_v[2]}; // <== these are always the same // xy is scaled coordinates of start point of ROI std::array xy{}; - // low, high are floor and ceiling of the xy value (i.e. the bounds of the pixel it lies inside) - // from which we will interpolate. + // low, high are floor and ceiling of the xy value (i.e. the bounds of the pixel it lies + // inside) from which we will interpolate. std::array low{}; std::array high{}; @@ -190,10 +190,10 @@ struct roialign // Calculate a pooling value for 1 block of bin_grid_size*bin_grid_size weights template double calc_pooling(const T& data, - const std::array& bin_grid_size, - const std::vector& pos_weights, - int64_t& index, - Op op) const + const std::array& bin_grid_size, + const std::vector& pos_weights, + int64_t& index, + Op op) const { double output_val = op.init(); const int64_t count = bin_grid_size[0] * bin_grid_size[1]; @@ -233,7 +233,7 @@ struct roialign const auto bottom_data = x.begin(); const auto roi_batch_ind = batch_indices[n]; // Do not use rounding; this implementation detail is critical - float offset = (coord_trans_mode == "half_pixel") ? 0.5 : 0.0; + float offset = (coord_trans_mode == "half_pixel") ? 0.5 : 0.0; std::array roi_starts = { static_cast(roi[roi_s.index({n, 0})] * spatial_scale - offset), static_cast(roi[roi_s.index({n, 1})] * spatial_scale - offset)}; @@ -270,7 +270,7 @@ struct roialign std::vector vec_index(channels, 0); shape_for_each(comp_s1, [&](const auto& idx) { - auto c = idx[0]; // channel count + auto c = idx[0]; // channel count auto ph = idx[1]; auto pw = idx[2]; @@ -278,18 +278,17 @@ struct roialign bottom_data + static_cast((roi_batch_ind * channels + c) * in_dims[0] * in_dims[1]); double output_val; - output_val = - (mode == migraphx::op::pooling_mode::average) - ? this->calc_pooling(offset_bottom_data, - bin_grid_size, - pre_calc, - vec_index[c], - avg_pool{}) - : this->calc_pooling(offset_bottom_data, - bin_grid_size, - pre_calc, - vec_index[c], - max_pool{}); + output_val = (mode == migraphx::op::pooling_mode::average) + ? this->calc_pooling(offset_bottom_data, + bin_grid_size, + pre_calc, + vec_index[c], + avg_pool{}) + : this->calc_pooling(offset_bottom_data, + bin_grid_size, + pre_calc, + vec_index[c], + max_pool{}); output(n, c, ph, pw) = output_val; }); }); diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 1b041069caf..b838a8f065f 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -751,6 +751,7 @@ def celu_default_test(): return ([node], [x], [y]) + @onnx_test() def celu_verify_test(): x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 3]) @@ -10629,16 +10630,15 @@ def roialign_half_pixel_test(): y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 2, 4, 3]) # half_pixel is the newer mode for ROIAlign - node = onnx.helper.make_node( - 'RoiAlign', - inputs=['x', 'rois', 'batch_ind'], - outputs=['y'], - spatial_scale=2.0, - output_height=2, - output_width=3, - sampling_ratio=2, - mode="avg", - coordinate_transformation_mode="half_pixel") + node = onnx.helper.make_node('RoiAlign', + inputs=['x', 'rois', 'batch_ind'], + outputs=['y'], + spatial_scale=2.0, + output_height=2, + output_width=3, + sampling_ratio=2, + mode="avg", + coordinate_transformation_mode="half_pixel") return ([node], [x, roi, bi], [y]) diff --git a/test/onnx/verify/roialign_half_pixel_verify_test.cpp b/test/onnx/verify/roialign_half_pixel_verify_test.cpp index 579106b7a52..ea570792249 100644 --- a/test/onnx/verify/roialign_half_pixel_verify_test.cpp +++ b/test/onnx/verify/roialign_half_pixel_verify_test.cpp @@ -26,56 +26,44 @@ #include #include -// The half_pixel mode for the ROIAlign op +// The half_pixel mode for the ROIAlign op TEST_CASE(roialign_half_pixel_verify_test) { migraphx::program p = read_onnx("roialign_half_pixel_test.onnx"); p.compile(migraphx::make_target("ref")); migraphx::shape s{migraphx::shape::float_type, {2, 2, 4, 3}}; - std::vector data(2*2*4*3); + std::vector data(2 * 2 * 4 * 3); std::iota(data.begin(), data.end(), 0.f); migraphx::parameter_map pp; - pp["x"] = migraphx::argument(s, data.data()); - pp["y"] = migraphx::argument(s, data.data()); + pp["x"] = migraphx::argument(s, data.data()); + pp["y"] = migraphx::argument(s, data.data()); migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; - std::vector rois_data = { - 1.1, 0.73, 1.7, 1.13, - 1.1, 0.73, 2.6, 1.13 - }; - migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index + std::vector rois_data = {1.1, 0.73, 1.7, 1.13, 1.1, 0.73, 2.6, 1.13}; + migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index std::vector bi_data = {0, 1}; - pp["rois"] = migraphx::argument(srois, rois_data.data()); - pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); - pp["y"] = migraphx::argument(s, data.data()); + pp["rois"] = migraphx::argument(srois, rois_data.data()); + pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); + pp["y"] = migraphx::argument(s, data.data()); auto result = p.eval(pp).back(); std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); -printf(" result: \n"); -for(int i = 0; i < result_vector.size(); i++) -{ - printf(" %f ", result_vector[i]); - if(i % 6 == 5) - printf("\n"); -} -printf("\n"); + printf(" result: \n"); + for(int i = 0; i < result_vector.size(); i++) + { + printf(" %f ", result_vector[i]); + if(i % 6 == 5) + printf("\n"); + } + printf("\n"); // Gold values were generated with onnxruntime - std::vector gold={ - 5.38, 5.4799995, 5.4799995, - 6.58, 6.68, 6.68, - 17.38, 17.48, 17.48, - 18.58, 18.68, 18.68, - 29.454998, 14.74, 0., - 30.654999, 15.34, 0., - 41.455, 20.74, 0., - 42.655003, 21.34, 0. - }; + std::vector gold = {5.38, 5.4799995, 5.4799995, 6.58, 6.68, 6.68, + 17.38, 17.48, 17.48, 18.58, 18.68, 18.68, + 29.454998, 14.74, 0., 30.654999, 15.34, 0., + 41.455, 20.74, 0., 42.655003, 21.34, 0.}; EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); } - - - diff --git a/test/onnx/verify/roialign_verify_test.cpp b/test/onnx/verify/roialign_verify_test.cpp index f2107280df0..f99b7072c69 100644 --- a/test/onnx/verify/roialign_verify_test.cpp +++ b/test/onnx/verify/roialign_verify_test.cpp @@ -32,12 +32,12 @@ TEST_CASE(roialign_verify_test) p.compile(migraphx::make_target("ref")); migraphx::shape s{migraphx::shape::float_type, {3, 2, 4, 5}}; - std::vector data(3*5*4*2); + std::vector data(3 * 5 * 4 * 2); std::iota(data.begin(), data.end(), 0); migraphx::parameter_map pp; - pp["x"] = migraphx::argument(s, data.data()); - pp["y"] = migraphx::argument(s, data.data()); + pp["x"] = migraphx::argument(s, data.data()); + pp["y"] = migraphx::argument(s, data.data()); // migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; // std::vector rois_data = {0.1, 0.15, 0.6, 0.35, @@ -46,29 +46,36 @@ TEST_CASE(roialign_verify_test) // std::vector bi_data = {0, 1}; migraphx::shape srois{migraphx::shape::float_type, {1, 4}}; - std::vector rois_data = { - 2.1, 1.73, 3.8, 2.13}; - migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index + std::vector rois_data = {2.1, 1.73, 3.8, 2.13}; + migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index std::vector bi_data = {0}; - pp["rois"] = migraphx::argument(srois, rois_data.data()); - pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); + pp["rois"] = migraphx::argument(srois, rois_data.data()); + pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); auto result = p.eval(pp).back(); std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); -printf(" result: "); -for(auto aa : result_vector) printf(" %f ", aa); -printf("\n"); + printf(" result: "); + for(auto aa : result_vector) + printf(" %f ", aa); + printf("\n"); - std::vector gold = { 0.000000, 0.022222, 0.200000, 0.400000, 0.600000, 0.500000, 0.522222, 0.700000, 0.900000, 1.100000, 1.500000, 1.522223, 1.700000, - 1.900000, 2.100000, 2.500000, 2.522222, 2.700000, 2.900000, 3.100000, 3.500000, 3.522222, 3.700000, 3.900000, 4.100000, 20.000000, 20.022223, 20.200001, 20.400000, 20.600000, 20.500000, 20.522223, - 20.700001, 20.900000, 21.100000, 21.500000, 21.522223, 21.700001, 21.900000, 22.100000, 22.500000, 22.522223, 22.700001, 22.900000, 23.100000, 23.500000, 23.522223, 23.700001, - 23.900000, 24.100000, 5.888889, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, - 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, 12.555555, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, - 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, - 0.000000, 0.000000 }; + std::vector gold = { + 0.000000, 0.022222, 0.200000, 0.400000, 0.600000, 0.500000, 0.522222, 0.700000, + 0.900000, 1.100000, 1.500000, 1.522223, 1.700000, 1.900000, 2.100000, 2.500000, + 2.522222, 2.700000, 2.900000, 3.100000, 3.500000, 3.522222, 3.700000, 3.900000, + 4.100000, 20.000000, 20.022223, 20.200001, 20.400000, 20.600000, 20.500000, 20.522223, + 20.700001, 20.900000, 21.100000, 21.500000, 21.522223, 21.700001, 21.900000, 22.100000, + 22.500000, 22.522223, 22.700001, 22.900000, 23.100000, 23.500000, 23.522223, 23.700001, + 23.900000, 24.100000, 5.888889, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, + 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 12.555555, 0.000000, 0.000000, 0.000000, 0.000000, + 12.666667, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, + 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, + 0.000000, 0.000000, 0.000000, 0.000000}; float alpha = 0.5; std::transform(data.begin(), data.end(), gold.begin(), [&](auto x) { return std::max(0.0f, x) + std::min(0.0f, alpha * std::expm1(x / alpha)); diff --git a/test/op_shape_test.cpp b/test/op_shape_test.cpp index ecf4efa121a..24b9afb1377 100644 --- a/test/op_shape_test.cpp +++ b/test/op_shape_test.cpp @@ -5183,7 +5183,6 @@ TEST_CASE(roialign_test) migraphx::shape sbi_float{migraphx::shape::float_type, {2}}; throws_shape(migraphx::make_op("roialign"), sx, srois, sbi_float); - } TEST_CASE(test_concat) From 717b03c279e5c860ac63f3dc729cf676dadcf9e5 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 3 Oct 2024 22:20:04 +0000 Subject: [PATCH 32/56] undo a return type change and a test error --- src/include/migraphx/op/roialign.hpp | 37 ++++++++++++++-------------- test/check_shapes_test.cpp | 4 +-- test/shape_test.cpp | 1 - 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 98f0bdcba9e..5d987c9c1ca 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -126,7 +126,7 @@ struct roialign // The p and i indexes correspond to nested looping parameters in ORT that go in y, x // order. The i[x] value is least significant and iterates the fastest. std::array p = {idx_v[1], idx_v[0]}; - std::array i = {idx_v[3], idx_v[2]}; // <== these are always the same + std::array i = {idx_v[3], idx_v[2]}; // these are always equal // xy is scaled coordinates of start point of ROI std::array xy{}; // low, high are floor and ceiling of the xy value (i.e. the bounds of the pixel it lies @@ -189,11 +189,11 @@ struct roialign // Calculate a pooling value for 1 block of bin_grid_size*bin_grid_size weights template - double calc_pooling(const T& data, - const std::array& bin_grid_size, - const std::vector& pos_weights, - int64_t& index, - Op op) const + std::tuple calc_pooling(const T& data, + const std::array& bin_grid_size, + const std::vector& pos_weights, + int64_t index, + Op op) const { double output_val = op.init(); const int64_t count = bin_grid_size[0] * bin_grid_size[1]; @@ -210,7 +210,7 @@ struct roialign output_val = op.final(output_val, count); - return output_val; + return {output_val, index}; } argument compute(const shape& output_shape, std::vector args) const @@ -278,17 +278,18 @@ struct roialign bottom_data + static_cast((roi_batch_ind * channels + c) * in_dims[0] * in_dims[1]); double output_val; - output_val = (mode == migraphx::op::pooling_mode::average) - ? this->calc_pooling(offset_bottom_data, - bin_grid_size, - pre_calc, - vec_index[c], - avg_pool{}) - : this->calc_pooling(offset_bottom_data, - bin_grid_size, - pre_calc, - vec_index[c], - max_pool{}); + std::tie(output_val, vec_index[c]) = + (mode == migraphx::op::pooling_mode::average) + ? this->calc_pooling(offset_bottom_data, + bin_grid_size, + pre_calc, + vec_index[c], + avg_pool{}) + : this->calc_pooling(offset_bottom_data, + bin_grid_size, + pre_calc, + vec_index[c], + max_pool{}); output(n, c, ph, pw) = output_val; }); }); diff --git a/test/check_shapes_test.cpp b/test/check_shapes_test.cpp index 58241576648..42b514d02f8 100644 --- a/test/check_shapes_test.cpp +++ b/test/check_shapes_test.cpp @@ -53,7 +53,7 @@ TEST_CASE(same_layout_fail) EXPECT(test::throws([] { shape a{shape::float_type, {2, 3}}; shape b{shape::float_type, {2, 3}, {1, 2}}; - migraphx::check_shapes{{a, b}, ""}.compatible_layout(); + migraphx::check_shapes{{a, b}, ""}.same_layout(); })); } @@ -62,7 +62,7 @@ TEST_CASE(same_layout_pass) EXPECT(not test::throws([] { shape a{shape::float_type, {2, 3}, {1, 2}}; shape b{shape::float_type, {2, 3}, {1, 2}}; - migraphx::check_shapes{{a, b}, ""}.compatible_layout(); + migraphx::check_shapes{{a, b}, ""}.same_layout(); })); } diff --git a/test/shape_test.cpp b/test/shape_test.cpp index fd56faad95b..8a200f2c51c 100644 --- a/test/shape_test.cpp +++ b/test/shape_test.cpp @@ -826,7 +826,6 @@ TEST_CASE(tuple_copy) EXPECT(s3 == s2); migraphx::shape s4{{migraphx::shape{migraphx::shape::int8_type}, migraphx::shape{migraphx::shape::float_type}}}; - EXPECT(not is_compatible_shape(s1, s4)); EXPECT(s4 != s1); EXPECT(s4 != s2); EXPECT(s4 != s3); From 6fe841d2682ef682e30ae1b4ebf24e086fc25fac Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 3 Oct 2024 22:33:39 +0000 Subject: [PATCH 33/56] revert default test --- test/onnx/roialign_default_test.onnx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/onnx/roialign_default_test.onnx b/test/onnx/roialign_default_test.onnx index 3f54104fdd6..5b0165fc093 100644 --- a/test/onnx/roialign_default_test.onnx +++ b/test/onnx/roialign_default_test.onnx @@ -1,5 +1,4 @@ - -roialign_default_test:¥ + roialign_default_test:¥ ! x rois @@ -24,4 +23,4 @@    -B \ No newline at end of file +B \ No newline at end of file From 09adc29ef8c49b30afd8ad79b68029336f91d393 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 7 Oct 2024 15:41:41 +0000 Subject: [PATCH 34/56] debugging crash --- ort_roialign.py | 27 ++- src/include/migraphx/op/roialign.hpp | 20 ++- test/onnx/conv_relu_maxpool_test.onnx | Bin 316 -> 329 bytes test/onnx/gen_onnx.py | 9 +- test/onnx/roialign_half_pixel_test.onnx | Bin 360 -> 360 bytes test/onnx/roialign_test.onnx | Bin 345 -> 345 bytes test/onnx/verify/roialign_verify_test.cpp | 206 ++++++++++++++++++---- 7 files changed, 209 insertions(+), 53 deletions(-) diff --git a/ort_roialign.py b/ort_roialign.py index 70ee2b410d6..ec6e1920b8d 100644 --- a/ort_roialign.py +++ b/ort_roialign.py @@ -6,27 +6,22 @@ import numpy as np print(" version: ", onnx.__version__, rt.__version__) -x = np.array(np.arange(2 * 2 * 4 * 3), dtype='f') -x = np.reshape(x, [2, 2, 4, 3]) +x = np.array(np.arange(10 * 5 * 4 * 7), dtype='f') +x = np.reshape(x, [10, 5, 4, 7]) -y = np.ones([2, 2, 4, 3], dtype='f') +y = np.ones([10, 5, 4, 7], dtype='f') -# matches roialign_half_pixel_verify_test -# rois=np.array([[0.1, 0.15, 0.6, 0.35], -# [1.1, 0.73, 1.9, 1.13]], dtype='f') -# matches roialign_half_pixel_oob_verify_test rois = np.array( - [[1.1, 0.73, 1.7, 1.13], [1.1, 0.73, 2.6, 1.13] - # [1.1, 0.73, 2.6, 1.13] - ], + [ + [0.1, 0.15, 0.6, 0.35], + [2.1, 1.73, 3.8, 2.13] + ], dtype='f') -# rois=np.array([ -# [ 1.1, 0.73, 2.2, 1.13]], dtype='f') +themodel = 'roialign_test.onnx' sess = rt.InferenceSession( - '/workspace/AMDMIGraphX/test/onnx/roialign_half_pixel_test.onnx') -# sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/roialign_one_roi_asdf_test.onnx') + '/workspace/AMDMIGraphX/test/onnx/' + themodel) res = sess.run(['y'], {'x': x, 'rois': rois, 'batch_ind': [0, 1]}) -# 'batch_ind': [0]}) -print(' ORT test model is roialign_one_roi_asdf_test.onnx, rois_data is \n', + +print(' ORT test model is ' + themodel + ', rois_data is \n', rois, ' result is \n', res) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 5d987c9c1ca..1d19f3e514f 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -121,7 +121,8 @@ struct roialign { std::vector results(bin_grid_size[0] * bin_grid_size[1] * output_height * output_width); - +// printf(" bin grid %ldx%ld, height %lu width %lu\n", bin_grid_size[0], bin_grid_size[1], output_height, +// output_width); shape_for_each(comp_s, [&](const auto& idx_v, size_t index) { // The p and i indexes correspond to nested looping parameters in ORT that go in y, x // order. The i[x] value is least significant and iterates the fastest. @@ -138,9 +139,12 @@ struct roialign { xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; - xy[ii] = (coord_trans_mode != "half_pixel") ? (xy[ii] - 0.5f) : xy[ii]; +// printf(" QQQQQQ L137 x=%f y=%f ", xy[0], xy[1]); + // xy[ii] = (coord_trans_mode != "half_pixel") ? (xy[ii] - 0.5f) : xy[ii]; +// printf(" L139 %f ", xy[ii]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { +// printf(" L142 results = pos_weight i=%lu dims=%lu, %lu \n ", index, dims[0], dims[1]); results[index] = pos_weight{}; return; } @@ -148,10 +152,13 @@ struct roialign xy[ii] = std::max(xy[ii], 0.0f); low[ii] = xy[ii]; high[ii] = low[ii] + 1; +// printf(" L148 %f low[ii] %lu, dims[ii] %lu", xy[ii], low[ii], dims[ii]); if(low[ii] >= dims[ii] - 1) { xy[ii] = high[ii] = low[ii] = dims[ii] - 1; +// printf(" L154 %f ", xy[ii]); } +// printf(" \n"); } results[index].pos = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], @@ -162,10 +169,15 @@ struct roialign float ly = xy[1] - low[1]; float hy = 1.0f - ly; float hx = 1.0f - lx; - +printf(" !!!!! %ld\n", index); // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; }); +// printf(" AAAAA here we are\n"); + // for(int iix = 0; iix < results.size(); iix++) + // printf(" SSSSS %ld %d\n", results.size(), iix); + // printf(" SSSSS %d %lu %lu %lu %lu %f %f %f %f\n", iix, results[iix].pos[0], results[iix].pos[1], results[iix].pos[2], results[iix].pos[3], + // results[iix].w[0], results[iix].w[1], results[iix].w[2], results[iix].w[3]); return results; } @@ -251,10 +263,12 @@ struct roialign roi_size[ii] = roi_ends[ii] - roi_starts[ii]; if(coord_trans_mode != "half_pixel") roi_size[ii] = std::max(roi_size[ii], 1.0f); +printf("\n KKKKK ii %ld roi_size %f roi_batch_ind %ld out_dims %lu \n", ii, roi_size[ii] , roi_batch_ind, out_dims[ii]); bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_grid_size[ii] = (sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]); +printf(" KLKLKL bin_grid_size= %ld x %ld\n", bin_grid_size[0], bin_grid_size[1]); } // we want to precalculate indices and weights shared by all channels, diff --git a/test/onnx/conv_relu_maxpool_test.onnx b/test/onnx/conv_relu_maxpool_test.onnx index f5bfe4c1514a128bbde7d847205baffbb35763fb..4403d8de5f70722d44e60d46fb26e575d7a959ca 100644 GIT binary patch delta 65 zcmdnPbdpJigHuQ>IX|x~z9==PG(I=6q98v%C%z=LxWsD3MDbPPI3y=t$YvA<00*)d Awg3PC delta 52 zcmX@fw1-KYgH=c{IX|x~z9==PR5!IEF}ENm)oQ^+nN|FJC8@qBce5F}0M$Se A)c^nh diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index b838a8f065f..14206f7abbc 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -10605,17 +10605,18 @@ def roialign_default_test(): def roialign_test(): # Roialign with output_half_pixel mode is backward-compatible. x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 5, 4, 7]) - roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [8, 4]) - bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [8]) - y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [8, 4, 5, 5]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [2, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [2]) + y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 4, 5, 5]) node = onnx.helper.make_node( 'RoiAlign', inputs=['x', 'rois', 'batch_ind'], outputs=['y'], spatial_scale=2.0, output_height=5, - output_width=5, + output_width=3, sampling_ratio=3, + # todo: max test mode="avg", coordinate_transformation_mode="output_half_pixel") diff --git a/test/onnx/roialign_half_pixel_test.onnx b/test/onnx/roialign_half_pixel_test.onnx index 76daf3d0c0df8de4fe13e5e0133da6c3bb1e4cce..4b4ff5dcb2f89884e01805636aec6b7122df873d 100644 GIT binary patch delta 16 XcmaFC^n!_rgLC42sf{w;jEuqnF8>6d delta 16 XcmaFC^n!_rgKOe_sf{w;jEtfHFAD^s diff --git a/test/onnx/roialign_test.onnx b/test/onnx/roialign_test.onnx index 0a60795f561572d993de35769d4b8aef8b520e49..eb6703f49d54786be995473bd87d0534717b30ee 100644 GIT binary patch delta 56 zcmcb~bd!mRgL5L&Qby*9D~cGICQCEQC^02Tb1_y5iE)W=FbWB9F>x>fF$)m0g0K^l GfG_|!><8Qc delta 56 zcmcb~bd!mRgKHwwQbyK^D~cF7CQCEQC~+i7b1_y5iE)W=FbWB9F>!DJF$)m0g0K^l GfG7YvoCos& diff --git a/test/onnx/verify/roialign_verify_test.cpp b/test/onnx/verify/roialign_verify_test.cpp index f99b7072c69..747171a8a7a 100644 --- a/test/onnx/verify/roialign_verify_test.cpp +++ b/test/onnx/verify/roialign_verify_test.cpp @@ -31,51 +31,197 @@ TEST_CASE(roialign_verify_test) migraphx::program p = read_onnx("roialign_test.onnx"); p.compile(migraphx::make_target("ref")); - migraphx::shape s{migraphx::shape::float_type, {3, 2, 4, 5}}; - std::vector data(3 * 5 * 4 * 2); + migraphx::shape s{migraphx::shape::float_type, {10, 5, 4, 7}}; + std::vector data(10 * 5 * 4 * 7); std::iota(data.begin(), data.end(), 0); migraphx::parameter_map pp; pp["x"] = migraphx::argument(s, data.data()); pp["y"] = migraphx::argument(s, data.data()); - // migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; - // std::vector rois_data = {0.1, 0.15, 0.6, 0.35, - // 2.1, 1.73, 3.8, 2.13}; - // migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index - // std::vector bi_data = {0, 1}; - - migraphx::shape srois{migraphx::shape::float_type, {1, 4}}; - std::vector rois_data = {2.1, 1.73, 3.8, 2.13}; - migraphx::shape sbi{migraphx::shape::int64_type, {1}}; // batch_index - std::vector bi_data = {0}; - + migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; + std::vector rois_data = { + 2.1, 1.73, 3.8, 2.13, + 0.1, 0.15, 0.6, 0.35 + }; + migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index + std::vector bi_data = {1, 0}; +printf("sdfkgjusdfgjk\n"); pp["rois"] = migraphx::argument(srois, rois_data.data()); pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); auto result = p.eval(pp).back(); +printf(" dfssdgf \n"); std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); - printf(" result: "); - for(auto aa : result_vector) - printf(" %f ", aa); - printf("\n"); +// printf(" result: "); +// for(int aa = 0; aa < result_vector.size(); aa++) +// { +// printf(" %f ", result_vector[aa]); +// if(aa % s.lens()[0] == s.lens()[0]-1) +// printf("\n"); +// } printf("\n"); std::vector gold = { - 0.000000, 0.022222, 0.200000, 0.400000, 0.600000, 0.500000, 0.522222, 0.700000, - 0.900000, 1.100000, 1.500000, 1.522223, 1.700000, 1.900000, 2.100000, 2.500000, - 2.522222, 2.700000, 2.900000, 3.100000, 3.500000, 3.522222, 3.700000, 3.900000, - 4.100000, 20.000000, 20.022223, 20.200001, 20.400000, 20.600000, 20.500000, 20.522223, - 20.700001, 20.900000, 21.100000, 21.500000, 21.522223, 21.700001, 21.900000, 22.100000, - 22.500000, 22.522223, 22.700001, 22.900000, 23.100000, 23.500000, 23.522223, 23.700001, - 23.900000, 24.100000, 5.888889, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, - 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, - 0.000000, 6.000000, 0.000000, 0.000000, 0.000000, 0.000000, 6.000000, 0.000000, - 0.000000, 0.000000, 0.000000, 12.555555, 0.000000, 0.000000, 0.000000, 0.000000, - 12.666667, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, - 0.000000, 0.000000, 12.666667, 0.000000, 0.000000, 0.000000, 0.000000, 12.666667, - 0.000000, 0.000000, 0.000000, 0.000000}; + 3.1666667, 3.5000002, 3.8333333, + 4.566667 , 4.9 , 5.2333336, + 5.9666677, 6.3 , 6.6333337, + 7.366667 , 7.7000003, 8.033334 , + 8.766666 , 9.100001 , 9.433333 , + + + 31.166666 , 31.5 , 31.833334 , + 32.566666 , 32.9 , 33.23333 , + 33.966667 , 34.300003 , 34.633335 , + 35.366665 , 35.699997 , 36.033337 , + 36.766666 , 37.100002 , 37.433334 , + + + 59.166668 , 59.5 , 59.833332 , + 60.566666 , 60.899998 , 61.23333 , + 61.966667 , 62.299995 , 62.633335 , + 63.366665 , 63.700005 , 64.03334 , + 64.76666 , 65.100006 , 65.433334 , + + + 87.166664 , 87.5 , 87.83334 , + 88.566666 , 88.899994 , 89.23333 , + 89.96666 , 90.30001 , 90.63333 , + 91.36667 , 91.7 , 92.033325 , + 92.766655 , 93.100006 , 93.433334 , + + +115.166664 , 115.5 , 115.833336 , + 116.56668 , 116.899994 , 117.23333 , + 117.96666 , 118.30001 , 118.63333 , + 119.36667 , 119.700005 , 120.03334 , + 120.766655 , 121.100006 , 121.433334 , + + + +165.76666 , 166.80742 , 55.666668 , + 165.76666 , 166.80742 , 55.666668 , + 110.51111 , 111.20494 , 37.11111 , + 0. , 0. , 0. , + 0. , 0. , 0. , + + +193.76666 , 194.80742 , 65. , + 193.76666 , 194.80742 , 65. , + 129.17778 , 129.87161 , 43.333332 , + 0. , 0. , 0. , + 0. , 0. , 0. , + + +221.76668 , 222.80742 , 74.333336 , + 221.76668 , 222.80742 , 74.333336 , + 147.84445 , 148.53827 , 49.555557 , + 0. , 0. , 0. , + 0. , 0. , 0. , + + +249.76668 , 250.8074 , 83.666664 , + 249.76668 , 250.8074 , 83.666664 , + 166.51111 , 167.20494 , 55.77778 , + 0. , 0. , 0. , + 0. , 0. , 0. , + + +277.7667 , 278.8074 , 93. , + 277.7667 , 278.8074 , 93. , + 185.17778 , 185.87161 , 62. , + 0. , 0. , 0. , + 0. , 0. , 0. +// 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, +// 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, +// 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, +// 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, +// 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, +// 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, +// 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, +// 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, +// 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, +// 1.90476179e-02, 1.90476179e-02, 2.39858869e-02, +// 1.07936502e-01, 2.19047621e-01, 3.30158740e-01, +// 4.41269815e-01, 5.52380979e-01, 6.63492084e-01, +// 1.71428561e-01, 1.71428561e-01, 1.76366836e-01, +// 2.60317445e-01, 3.71428549e-01, 4.82539713e-01, +// 5.93650818e-01, 7.04761863e-01, 8.15872967e-01, +// 3.42857152e-01, 3.42857152e-01, 3.47795397e-01, +// 4.31746036e-01, 5.42857111e-01, 6.53968275e-01, +// 7.65079260e-01, 8.76190484e-01, 9.87301588e-01, +// 5.14285743e-01, 5.14285743e-01, 5.19223928e-01, +// 6.03174567e-01, 7.14285672e-01, 8.25396836e-01, +// 9.36507940e-01, 1.04761910e+00, 1.15873003e+00, + +// 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, +// 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, +// 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, +// 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, +// 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, +// 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, +// 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, +// 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, +// 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, +// 1.20190477e+01, 1.20190477e+01, 1.20239868e+01, +// 1.21079369e+01, 1.22190475e+01, 1.23301582e+01, +// 1.24412699e+01, 1.25523796e+01, 1.26634922e+01, +// 1.21714277e+01, 1.21714277e+01, 1.21763659e+01, +// 1.22603178e+01, 1.23714285e+01, 1.24825401e+01, +// 1.25936518e+01, 1.27047615e+01, 1.28158722e+01, +// 1.23428583e+01, 1.23428583e+01, 1.23477964e+01, +// 1.24317465e+01, 1.25428581e+01, 1.26539688e+01, +// 1.27650795e+01, 1.28761902e+01, 1.29873009e+01, +// 1.25142860e+01, 1.25142860e+01, 1.25192232e+01, +// 1.26031752e+01, 1.27142859e+01, 1.28253975e+01, +// 1.29365072e+01, 1.30476189e+01, 1.31587305e+01, + + +// 2.88403187e+01, 2.90094528e+01, 2.90514297e+01, +// 2.90514297e+01, 2.90514297e+01, 2.90514297e+01, +// 2.90514297e+01, 9.68380928e+00, 0.00000000e+00, +// 2.91831741e+01, 2.93523083e+01, 2.93942871e+01, +// 2.93942871e+01, 2.93942871e+01, 2.93942871e+01, +// 2.93942871e+01, 9.79809570e+00, 0.00000000e+00, +// 2.95260353e+01, 2.96951675e+01, 2.97371426e+01, +// 2.97371426e+01, 2.97371426e+01, 2.97371426e+01, +// 2.97371426e+01, 9.91238022e+00, 0.00000000e+00, +// 2.98688869e+01, 3.00380211e+01, 3.00799999e+01, +// 3.00799999e+01, 3.00799999e+01, 3.00799999e+01, +// 3.00799999e+01, 1.00266676e+01, 0.00000000e+00, +// 3.02117481e+01, 3.03808823e+01, 3.04228554e+01, +// 3.04228554e+01, 3.04228554e+01, 3.04228554e+01, +// 3.04228554e+01, 1.01409521e+01, 0.00000000e+00, +// 3.05546055e+01, 3.07237377e+01, 3.07657166e+01, +// 3.07657166e+01, 3.07657166e+01, 3.07657166e+01, +// 3.07657166e+01, 1.02552385e+01, 0.00000000e+00, +// 3.08974609e+01, 3.10665970e+01, 3.11085720e+01, +// 3.11085720e+01, 3.11085720e+01, 3.11085720e+01, +// 3.11085720e+01, 1.03695240e+01, 0.00000000e+00, + +// 4.08403168e+01, 4.10094528e+01, 4.10514259e+01, +// 4.10514259e+01, 4.10514259e+01, 4.10514259e+01, +// 4.10514259e+01, 1.36838093e+01, 0.00000000e+00, +// 4.11831741e+01, 4.13523102e+01, 4.13942871e+01, +// 4.13942871e+01, 4.13942871e+01, 4.13942871e+01, +// 4.13942871e+01, 1.37980957e+01, 0.00000000e+00, +// 4.15260315e+01, 4.16951675e+01, 4.17371483e+01, +// 4.17371483e+01, 4.17371483e+01, 4.17371483e+01, +// 4.17371483e+01, 1.39123802e+01, 0.00000000e+00, +// 4.18688889e+01, 4.20380211e+01, 4.20799980e+01, +// 4.20799980e+01, 4.20799980e+01, 4.20799980e+01, +// 4.20799980e+01, 1.40266676e+01, 0.00000000e+00, +// 4.22117462e+01, 4.23808823e+01, 4.24228554e+01, +// 4.24228554e+01, 4.24228554e+01, 4.24228554e+01, +// 4.24228554e+01, 1.41409521e+01, 0.00000000e+00, +// 4.25546036e+01, 4.27237396e+01, 4.27657166e+01, +// 4.27657166e+01, 4.27657166e+01, 4.27657166e+01, +// 4.27657166e+01, 1.42552385e+01, 0.00000000e+00, +// 4.28974609e+01, 4.30666008e+01, 4.31085777e+01, +// 4.31085777e+01, 4.31085777e+01, 4.31085777e+01, +// 4.31085777e+01, 1.43695240e+01, 0.00000000e+00 +}; float alpha = 0.5; std::transform(data.begin(), data.end(), gold.begin(), [&](auto x) { return std::max(0.0f, x) + std::min(0.0f, alpha * std::expm1(x / alpha)); From fb30afbf448cea4048f93586671f55f243db2123 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 7 Oct 2024 21:42:30 +0000 Subject: [PATCH 35/56] probably fixed --- ort_roialign.py | 5 +- src/include/migraphx/op/roialign.hpp | 5 +- test/onnx/verify/roialign_verify_test.cpp | 233 ++++++---------------- 3 files changed, 71 insertions(+), 172 deletions(-) diff --git a/ort_roialign.py b/ort_roialign.py index ec6e1920b8d..eb60fb05352 100644 --- a/ort_roialign.py +++ b/ort_roialign.py @@ -14,14 +14,15 @@ rois = np.array( [ [0.1, 0.15, 0.6, 0.35], - [2.1, 1.73, 3.8, 2.13] + [2.1, 1.73, 3.8, 2.13] ], dtype='f') themodel = 'roialign_test.onnx' sess = rt.InferenceSession( '/workspace/AMDMIGraphX/test/onnx/' + themodel) -res = sess.run(['y'], {'x': x, 'rois': rois, 'batch_ind': [0, 1]}) +res = sess.run(['y'], {'x': x, 'rois': rois, 'batch_ind': [1, 0]}) print(' ORT test model is ' + themodel + ', rois_data is \n', rois, ' result is \n', res) + \ No newline at end of file diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 1d19f3e514f..0fc68736fa8 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -169,7 +169,6 @@ struct roialign float ly = xy[1] - low[1]; float hy = 1.0f - ly; float hx = 1.0f - lx; -printf(" !!!!! %ld\n", index); // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; }); @@ -263,12 +262,12 @@ printf(" !!!!! %ld\n", index); roi_size[ii] = roi_ends[ii] - roi_starts[ii]; if(coord_trans_mode != "half_pixel") roi_size[ii] = std::max(roi_size[ii], 1.0f); -printf("\n KKKKK ii %ld roi_size %f roi_batch_ind %ld out_dims %lu \n", ii, roi_size[ii] , roi_batch_ind, out_dims[ii]); +// printf("\n KKKKK ii %ld roi_size %f roi_batch_ind %ld out_dims %lu \n", ii, roi_size[ii] , roi_batch_ind, out_dims[ii]); bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_grid_size[ii] = (sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]); -printf(" KLKLKL bin_grid_size= %ld x %ld\n", bin_grid_size[0], bin_grid_size[1]); +// printf(" KLKLKL bin_grid_size= %ld x %ld\n", bin_grid_size[0], bin_grid_size[1]); } // we want to precalculate indices and weights shared by all channels, diff --git a/test/onnx/verify/roialign_verify_test.cpp b/test/onnx/verify/roialign_verify_test.cpp index 747171a8a7a..1ed1c52ccc4 100644 --- a/test/onnx/verify/roialign_verify_test.cpp +++ b/test/onnx/verify/roialign_verify_test.cpp @@ -41,17 +41,16 @@ TEST_CASE(roialign_verify_test) migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; std::vector rois_data = { - 2.1, 1.73, 3.8, 2.13, - 0.1, 0.15, 0.6, 0.35 + 0.1, 0.15, 0.6, 0.35, + 2.1, 1.73, 3.8, 2.13 }; migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index - std::vector bi_data = {1, 0}; -printf("sdfkgjusdfgjk\n"); + std::vector bi_data = {1, 0}; + pp["rois"] = migraphx::argument(srois, rois_data.data()); pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); auto result = p.eval(pp).back(); -printf(" dfssdgf \n"); std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); @@ -59,172 +58,72 @@ printf(" dfssdgf \n"); // for(int aa = 0; aa < result_vector.size(); aa++) // { // printf(" %f ", result_vector[aa]); -// if(aa % s.lens()[0] == s.lens()[0]-1) +// if(aa % 15 == 15-1) // printf("\n"); // } printf("\n"); std::vector gold = { - 3.1666667, 3.5000002, 3.8333333, - 4.566667 , 4.9 , 5.2333336, - 5.9666677, 6.3 , 6.6333337, - 7.366667 , 7.7000003, 8.033334 , - 8.766666 , 9.100001 , 9.433333 , - - - 31.166666 , 31.5 , 31.833334 , - 32.566666 , 32.9 , 33.23333 , - 33.966667 , 34.300003 , 34.633335 , - 35.366665 , 35.699997 , 36.033337 , - 36.766666 , 37.100002 , 37.433334 , - - - 59.166668 , 59.5 , 59.833332 , - 60.566666 , 60.899998 , 61.23333 , - 61.966667 , 62.299995 , 62.633335 , - 63.366665 , 63.700005 , 64.03334 , - 64.76666 , 65.100006 , 65.433334 , - - - 87.166664 , 87.5 , 87.83334 , - 88.566666 , 88.899994 , 89.23333 , - 89.96666 , 90.30001 , 90.63333 , - 91.36667 , 91.7 , 92.033325 , - 92.766655 , 93.100006 , 93.433334 , - - -115.166664 , 115.5 , 115.833336 , - 116.56668 , 116.899994 , 117.23333 , - 117.96666 , 118.30001 , 118.63333 , - 119.36667 , 119.700005 , 120.03334 , - 120.766655 , 121.100006 , 121.433334 , - - - -165.76666 , 166.80742 , 55.666668 , - 165.76666 , 166.80742 , 55.666668 , - 110.51111 , 111.20494 , 37.11111 , - 0. , 0. , 0. , - 0. , 0. , 0. , - - -193.76666 , 194.80742 , 65. , - 193.76666 , 194.80742 , 65. , - 129.17778 , 129.87161 , 43.333332 , - 0. , 0. , 0. , - 0. , 0. , 0. , - - -221.76668 , 222.80742 , 74.333336 , - 221.76668 , 222.80742 , 74.333336 , - 147.84445 , 148.53827 , 49.555557 , - 0. , 0. , 0. , - 0. , 0. , 0. , - - -249.76668 , 250.8074 , 83.666664 , - 249.76668 , 250.8074 , 83.666664 , - 166.51111 , 167.20494 , 55.77778 , - 0. , 0. , 0. , - 0. , 0. , 0. , - - -277.7667 , 278.8074 , 93. , - 277.7667 , 278.8074 , 93. , - 185.17778 , 185.87161 , 62. , - 0. , 0. , 0. , - 0. , 0. , 0. -// 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, -// 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, -// 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, -// 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, -// 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, -// 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, -// 0.00000000e+00, 0.00000000e+00, 4.93826950e-03, -// 8.88888836e-02, 2.00000003e-01, 3.11111122e-01, -// 4.22222227e-01, 5.33333302e-01, 6.44444466e-01, -// 1.90476179e-02, 1.90476179e-02, 2.39858869e-02, -// 1.07936502e-01, 2.19047621e-01, 3.30158740e-01, -// 4.41269815e-01, 5.52380979e-01, 6.63492084e-01, -// 1.71428561e-01, 1.71428561e-01, 1.76366836e-01, -// 2.60317445e-01, 3.71428549e-01, 4.82539713e-01, -// 5.93650818e-01, 7.04761863e-01, 8.15872967e-01, -// 3.42857152e-01, 3.42857152e-01, 3.47795397e-01, -// 4.31746036e-01, 5.42857111e-01, 6.53968275e-01, -// 7.65079260e-01, 8.76190484e-01, 9.87301588e-01, -// 5.14285743e-01, 5.14285743e-01, 5.19223928e-01, -// 6.03174567e-01, 7.14285672e-01, 8.25396836e-01, -// 9.36507940e-01, 1.04761910e+00, 1.15873003e+00, - -// 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, -// 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, -// 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, -// 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, -// 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, -// 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, -// 1.20000000e+01, 1.20000000e+01, 1.20049391e+01, -// 1.20888891e+01, 1.21999998e+01, 1.23111115e+01, -// 1.24222221e+01, 1.25333328e+01, 1.26444445e+01, -// 1.20190477e+01, 1.20190477e+01, 1.20239868e+01, -// 1.21079369e+01, 1.22190475e+01, 1.23301582e+01, -// 1.24412699e+01, 1.25523796e+01, 1.26634922e+01, -// 1.21714277e+01, 1.21714277e+01, 1.21763659e+01, -// 1.22603178e+01, 1.23714285e+01, 1.24825401e+01, -// 1.25936518e+01, 1.27047615e+01, 1.28158722e+01, -// 1.23428583e+01, 1.23428583e+01, 1.23477964e+01, -// 1.24317465e+01, 1.25428581e+01, 1.26539688e+01, -// 1.27650795e+01, 1.28761902e+01, 1.29873009e+01, -// 1.25142860e+01, 1.25142860e+01, 1.25192232e+01, -// 1.26031752e+01, 1.27142859e+01, 1.28253975e+01, -// 1.29365072e+01, 1.30476189e+01, 1.31587305e+01, - - -// 2.88403187e+01, 2.90094528e+01, 2.90514297e+01, -// 2.90514297e+01, 2.90514297e+01, 2.90514297e+01, -// 2.90514297e+01, 9.68380928e+00, 0.00000000e+00, -// 2.91831741e+01, 2.93523083e+01, 2.93942871e+01, -// 2.93942871e+01, 2.93942871e+01, 2.93942871e+01, -// 2.93942871e+01, 9.79809570e+00, 0.00000000e+00, -// 2.95260353e+01, 2.96951675e+01, 2.97371426e+01, -// 2.97371426e+01, 2.97371426e+01, 2.97371426e+01, -// 2.97371426e+01, 9.91238022e+00, 0.00000000e+00, -// 2.98688869e+01, 3.00380211e+01, 3.00799999e+01, -// 3.00799999e+01, 3.00799999e+01, 3.00799999e+01, -// 3.00799999e+01, 1.00266676e+01, 0.00000000e+00, -// 3.02117481e+01, 3.03808823e+01, 3.04228554e+01, -// 3.04228554e+01, 3.04228554e+01, 3.04228554e+01, -// 3.04228554e+01, 1.01409521e+01, 0.00000000e+00, -// 3.05546055e+01, 3.07237377e+01, 3.07657166e+01, -// 3.07657166e+01, 3.07657166e+01, 3.07657166e+01, -// 3.07657166e+01, 1.02552385e+01, 0.00000000e+00, -// 3.08974609e+01, 3.10665970e+01, 3.11085720e+01, -// 3.11085720e+01, 3.11085720e+01, 3.11085720e+01, -// 3.11085720e+01, 1.03695240e+01, 0.00000000e+00, - -// 4.08403168e+01, 4.10094528e+01, 4.10514259e+01, -// 4.10514259e+01, 4.10514259e+01, 4.10514259e+01, -// 4.10514259e+01, 1.36838093e+01, 0.00000000e+00, -// 4.11831741e+01, 4.13523102e+01, 4.13942871e+01, -// 4.13942871e+01, 4.13942871e+01, 4.13942871e+01, -// 4.13942871e+01, 1.37980957e+01, 0.00000000e+00, -// 4.15260315e+01, 4.16951675e+01, 4.17371483e+01, -// 4.17371483e+01, 4.17371483e+01, 4.17371483e+01, -// 4.17371483e+01, 1.39123802e+01, 0.00000000e+00, -// 4.18688889e+01, 4.20380211e+01, 4.20799980e+01, -// 4.20799980e+01, 4.20799980e+01, 4.20799980e+01, -// 4.20799980e+01, 1.40266676e+01, 0.00000000e+00, -// 4.22117462e+01, 4.23808823e+01, 4.24228554e+01, -// 4.24228554e+01, 4.24228554e+01, 4.24228554e+01, -// 4.24228554e+01, 1.41409521e+01, 0.00000000e+00, -// 4.25546036e+01, 4.27237396e+01, 4.27657166e+01, -// 4.27657166e+01, 4.27657166e+01, 4.27657166e+01, -// 4.27657166e+01, 1.42552385e+01, 0.00000000e+00, -// 4.28974609e+01, 4.30666008e+01, 4.31085777e+01, -// 4.31085777e+01, 4.31085777e+01, 4.31085777e+01, -// 4.31085777e+01, 1.43695240e+01, 0.00000000e+00 + 143.16667 , 143.49998 , 143.83333 , + 144.56667 , 144.9 , 145.23334 , + 145.96667 , 146.3 , 146.63333 , + 147.36667 , 147.70001 , 148.03334 , + 148.76666 , 149.09999 , 149.43333 , + + 171.16667 , 171.5 , 171.83333 , + 172.56667 , 172.90001 , 173.23334 , + 173.96667 , 174.3 , 174.63333 , + 175.36667 , 175.70001 , 176.03333 , + 176.76666 , 177.09999 , 177.43335 , + + 199.16667 , 199.5 , 199.83333 , + 200.56667 , 200.90001 , 201.23334 , + 201.96666 , 202.3 , 202.63333 , + 203.36665 , 203.70001 , 204.03333 , + 204.76668 , 205.09999 , 205.43333 , + + 227.16667 , 227.5 , 227.83333 , + 228.56668 , 228.90001 , 229.23332 , + 229.96669 , 230.29999 , 230.63333 , + 231.36664 , 231.70001 , 232.03334 , + 232.76668 , 233.09999 , 233.43332 , + + 255.16667 , 255.5 , 255.83333 , + 256.56668 , 256.90002 , 257.2333 , + 257.96667 , 258.3 , 258.63333 , + 259.36664 , 259.69998 , 260.03333 , + 260.7667 , 261.09998 , 261.43338 , + + + 25.766665, 26.807405, 9. , + 25.766665, 26.807405, 9. , + 17.177776, 17.871605, 6. , + 0. , 0. , 0. , + 0. , 0. , 0. , + + 53.766666, 54.807407, 18.333334, + 53.766666, 54.807407, 18.333334, + 35.844444, 36.538273, 12.222222, + 0. , 0. , 0. , + 0. , 0. , 0. , + + 81.76667 , 82.8074 , 27.666666, + 81.76667 , 82.8074 , 27.666666, + 54.51111 , 55.204937, 18.444445, + 0. , 0. , 0. , + 0. , 0. , 0. , + + 109.76667 , 110.8074 , 37. , + 109.76667 , 110.8074 , 37. , + 73.17777 , 73.871605, 24.666666, + 0. , 0. , 0. , + 0. , 0. , 0. , + + 137.76666 , 138.80742 , 46.333332, + 137.76666 , 138.80742 , 46.333332, + 91.844444, 92.53828 , 30.88889 , + 0. , 0. , 0. , + 0. , 0. , 0. }; - float alpha = 0.5; - std::transform(data.begin(), data.end(), gold.begin(), [&](auto x) { - return std::max(0.0f, x) + std::min(0.0f, alpha * std::expm1(x / alpha)); - }); + EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); } From 4c11f71987beb428544e3644cc6568dfb21a8544 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 7 Oct 2024 21:54:25 +0000 Subject: [PATCH 36/56] clean up debug code --- src/include/migraphx/op/roialign.hpp | 16 ---- test/onnx/verify/roialign_verify_test.cpp | 106 +++++++--------------- 2 files changed, 32 insertions(+), 90 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 0fc68736fa8..de5a58aaa81 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -121,8 +121,6 @@ struct roialign { std::vector results(bin_grid_size[0] * bin_grid_size[1] * output_height * output_width); -// printf(" bin grid %ldx%ld, height %lu width %lu\n", bin_grid_size[0], bin_grid_size[1], output_height, -// output_width); shape_for_each(comp_s, [&](const auto& idx_v, size_t index) { // The p and i indexes correspond to nested looping parameters in ORT that go in y, x // order. The i[x] value is least significant and iterates the fastest. @@ -139,12 +137,8 @@ struct roialign { xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; -// printf(" QQQQQQ L137 x=%f y=%f ", xy[0], xy[1]); - // xy[ii] = (coord_trans_mode != "half_pixel") ? (xy[ii] - 0.5f) : xy[ii]; -// printf(" L139 %f ", xy[ii]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { -// printf(" L142 results = pos_weight i=%lu dims=%lu, %lu \n ", index, dims[0], dims[1]); results[index] = pos_weight{}; return; } @@ -152,13 +146,10 @@ struct roialign xy[ii] = std::max(xy[ii], 0.0f); low[ii] = xy[ii]; high[ii] = low[ii] + 1; -// printf(" L148 %f low[ii] %lu, dims[ii] %lu", xy[ii], low[ii], dims[ii]); if(low[ii] >= dims[ii] - 1) { xy[ii] = high[ii] = low[ii] = dims[ii] - 1; -// printf(" L154 %f ", xy[ii]); } -// printf(" \n"); } results[index].pos = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], @@ -172,11 +163,6 @@ struct roialign // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; }); -// printf(" AAAAA here we are\n"); - // for(int iix = 0; iix < results.size(); iix++) - // printf(" SSSSS %ld %d\n", results.size(), iix); - // printf(" SSSSS %d %lu %lu %lu %lu %f %f %f %f\n", iix, results[iix].pos[0], results[iix].pos[1], results[iix].pos[2], results[iix].pos[3], - // results[iix].w[0], results[iix].w[1], results[iix].w[2], results[iix].w[3]); return results; } @@ -262,12 +248,10 @@ struct roialign roi_size[ii] = roi_ends[ii] - roi_starts[ii]; if(coord_trans_mode != "half_pixel") roi_size[ii] = std::max(roi_size[ii], 1.0f); -// printf("\n KKKKK ii %ld roi_size %f roi_batch_ind %ld out_dims %lu \n", ii, roi_size[ii] , roi_batch_ind, out_dims[ii]); bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_grid_size[ii] = (sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]); -// printf(" KLKLKL bin_grid_size= %ld x %ld\n", bin_grid_size[0], bin_grid_size[1]); } // we want to precalculate indices and weights shared by all channels, diff --git a/test/onnx/verify/roialign_verify_test.cpp b/test/onnx/verify/roialign_verify_test.cpp index 1ed1c52ccc4..051080adf25 100644 --- a/test/onnx/verify/roialign_verify_test.cpp +++ b/test/onnx/verify/roialign_verify_test.cpp @@ -40,11 +40,8 @@ TEST_CASE(roialign_verify_test) pp["y"] = migraphx::argument(s, data.data()); migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; - std::vector rois_data = { - 0.1, 0.15, 0.6, 0.35, - 2.1, 1.73, 3.8, 2.13 - }; - migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index + std::vector rois_data = {0.1, 0.15, 0.6, 0.35, 2.1, 1.73, 3.8, 2.13}; + migraphx::shape sbi{migraphx::shape::int64_type, {2}}; std::vector bi_data = {1, 0}; pp["rois"] = migraphx::argument(srois, rois_data.data()); @@ -54,76 +51,37 @@ TEST_CASE(roialign_verify_test) std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); -// printf(" result: "); -// for(int aa = 0; aa < result_vector.size(); aa++) -// { -// printf(" %f ", result_vector[aa]); -// if(aa % 15 == 15-1) -// printf("\n"); -// } printf("\n"); - + // gold results were generated with onnxruntime std::vector gold = { - 143.16667 , 143.49998 , 143.83333 , - 144.56667 , 144.9 , 145.23334 , - 145.96667 , 146.3 , 146.63333 , - 147.36667 , 147.70001 , 148.03334 , - 148.76666 , 149.09999 , 149.43333 , - - 171.16667 , 171.5 , 171.83333 , - 172.56667 , 172.90001 , 173.23334 , - 173.96667 , 174.3 , 174.63333 , - 175.36667 , 175.70001 , 176.03333 , - 176.76666 , 177.09999 , 177.43335 , - - 199.16667 , 199.5 , 199.83333 , - 200.56667 , 200.90001 , 201.23334 , - 201.96666 , 202.3 , 202.63333 , - 203.36665 , 203.70001 , 204.03333 , - 204.76668 , 205.09999 , 205.43333 , - - 227.16667 , 227.5 , 227.83333 , - 228.56668 , 228.90001 , 229.23332 , - 229.96669 , 230.29999 , 230.63333 , - 231.36664 , 231.70001 , 232.03334 , - 232.76668 , 233.09999 , 233.43332 , - - 255.16667 , 255.5 , 255.83333 , - 256.56668 , 256.90002 , 257.2333 , - 257.96667 , 258.3 , 258.63333 , - 259.36664 , 259.69998 , 260.03333 , - 260.7667 , 261.09998 , 261.43338 , - - - 25.766665, 26.807405, 9. , - 25.766665, 26.807405, 9. , - 17.177776, 17.871605, 6. , - 0. , 0. , 0. , - 0. , 0. , 0. , - - 53.766666, 54.807407, 18.333334, - 53.766666, 54.807407, 18.333334, - 35.844444, 36.538273, 12.222222, - 0. , 0. , 0. , - 0. , 0. , 0. , - - 81.76667 , 82.8074 , 27.666666, - 81.76667 , 82.8074 , 27.666666, - 54.51111 , 55.204937, 18.444445, - 0. , 0. , 0. , - 0. , 0. , 0. , - - 109.76667 , 110.8074 , 37. , - 109.76667 , 110.8074 , 37. , - 73.17777 , 73.871605, 24.666666, - 0. , 0. , 0. , - 0. , 0. , 0. , - - 137.76666 , 138.80742 , 46.333332, - 137.76666 , 138.80742 , 46.333332, - 91.844444, 92.53828 , 30.88889 , - 0. , 0. , 0. , - 0. , 0. , 0. -}; + 143.16667, 143.49998, 143.83333, 144.56667, 144.9, 145.23334, 145.96667, 146.3, + 146.63333, 147.36667, 147.70001, 148.03334, 148.76666, 149.09999, 149.43333, + + 171.16667, 171.5, 171.83333, 172.56667, 172.90001, 173.23334, 173.96667, 174.3, + 174.63333, 175.36667, 175.70001, 176.03333, 176.76666, 177.09999, 177.43335, + + 199.16667, 199.5, 199.83333, 200.56667, 200.90001, 201.23334, 201.96666, 202.3, + 202.63333, 203.36665, 203.70001, 204.03333, 204.76668, 205.09999, 205.43333, + + 227.16667, 227.5, 227.83333, 228.56668, 228.90001, 229.23332, 229.96669, 230.29999, + 230.63333, 231.36664, 231.70001, 232.03334, 232.76668, 233.09999, 233.43332, + + 255.16667, 255.5, 255.83333, 256.56668, 256.90002, 257.2333, 257.96667, 258.3, + 258.63333, 259.36664, 259.69998, 260.03333, 260.7667, 261.09998, 261.43338, + + 25.766665, 26.807405, 9., 25.766665, 26.807405, 9., 17.177776, 17.871605, + 6., 0., 0., 0., 0., 0., 0., + + 53.766666, 54.807407, 18.333334, 53.766666, 54.807407, 18.333334, 35.844444, 36.538273, + 12.222222, 0., 0., 0., 0., 0., 0., + + 81.76667, 82.8074, 27.666666, 81.76667, 82.8074, 27.666666, 54.51111, 55.204937, + 18.444445, 0., 0., 0., 0., 0., 0., + + 109.76667, 110.8074, 37., 109.76667, 110.8074, 37., 73.17777, 73.871605, + 24.666666, 0., 0., 0., 0., 0., 0., + + 137.76666, 138.80742, 46.333332, 137.76666, 138.80742, 46.333332, 91.844444, 92.53828, + 30.88889, 0., 0., 0., 0., 0., 0.}; EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); } From 0b0bcb67d8d3feeaf20d00195c98c975cd48dd66 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 7 Oct 2024 22:43:51 +0000 Subject: [PATCH 37/56] fix roialign_test onnx test to reflect changed test file --- test/onnx/parse/roialign_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/onnx/parse/roialign_test.cpp b/test/onnx/parse/roialign_test.cpp index 05f27b6473c..346346727b2 100644 --- a/test/onnx/parse/roialign_test.cpp +++ b/test/onnx/parse/roialign_test.cpp @@ -27,8 +27,8 @@ TEST_CASE(roialign_test) { migraphx::shape sx{migraphx::shape::float_type, {10, 5, 4, 7}}; - migraphx::shape srois{migraphx::shape::float_type, {8, 4}}; - migraphx::shape sbi{migraphx::shape::int64_type, {8}}; + migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; + migraphx::shape sbi{migraphx::shape::int64_type, {2}}; migraphx::program p; auto* mm = p.get_main_module(); @@ -41,7 +41,7 @@ TEST_CASE(roialign_test) {{"coordinate_transformation_mode", "output_half_pixel"}, {"spatial_scale", 2.0f}, {"output_height", 5}, - {"output_width", 5}, + {"output_width", 3}, {"sampling_ratio", 3}}), x, rois, From 9d658a72cebb59bba89bf81a80c73e1b680c7e9d Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 7 Oct 2024 23:50:10 +0000 Subject: [PATCH 38/56] Update Onnx test models to allow specified op set; add roialign default parsing tests with op set; re-add a new verify test that wasn't commmitted yet --- test/onnx/gen_onnx.py | 28 +++++++++++++++++++++--- test/onnx/roialign_default_test.onnx | Bin 197 -> 199 bytes test/onnx/roialign_default_test_12.onnx | Bin 0 -> 205 bytes test/verify/roialign_verify_test.cpp | 0 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 test/onnx/roialign_default_test_12.onnx create mode 100644 test/verify/roialign_verify_test.cpp diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 14206f7abbc..27cc178dd96 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -31,10 +31,12 @@ from onnx.numpy_helper import from_array -def onnx_test(external_data=False): +def onnx_test(external_data=False, opset_imports=None): def create_onnx_test(op_test): def run_test(): op_info = op_test() + opset_id = [helper.make_operatorsetid('', opset_imports)] if opset_imports is not None else None + if len(op_info) > 3: graph_def = helper.make_graph(op_info[0], op_test.__name__, @@ -45,7 +47,9 @@ def run_test(): graph_def = helper.make_graph(op_info[0], op_test.__name__, op_info[1], op_info[2]) model_def = helper.make_model(graph_def, - producer_name=op_test.__name__) + producer_name=op_test.__name__, + opset_imports=opset_id + ) onnx.save_model(model_def, '{}.onnx'.format(op_test.__name__), save_as_external_data=external_data, @@ -10587,8 +10591,26 @@ def rnn_r_3arg_layout_test(): return ([node], [seq, w, r], [hs, output]) -@onnx_test() +@onnx_test(external_data=False, opset_imports=16) def roialign_default_test(): + # The op. ROIAlign had an attribute coordinate_transformation_mode added + # as of Onnx opset 16; we make opset-specific test models which give + # different default values. + x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 4, 7, 8]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [8, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [8]) + y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [8, 4, 1, 1]) + + node = onnx.helper.make_node('RoiAlign', + inputs=['x', 'rois', 'batch_ind'], + outputs=['y']) + + return ([node], [x, roi, bi], [y]) + + +@onnx_test(external_data=False, opset_imports=12) +def roialign_default_test_12(): + # Same model as in roialign_default_test() but with an older opset specified x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 4, 7, 8]) roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [8, 4]) bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [8]) diff --git a/test/onnx/roialign_default_test.onnx b/test/onnx/roialign_default_test.onnx index 5b0165fc093f58fb94b2efcd3c55023ec430af2d..cc47b78b9dfbfa97583e635b7268cee0598a4f51 100644 GIT binary patch delta 12 TcmX@gc${&)&rHn8OwWr?Nli;E%_)g5Ni8mkH#D+Z!N{e^#aO|`0#s4V#hH{?lAIBr znU^BOSgFJjl%MGc(uG@plr&JYkQkQ;2cwVx7ZV2;5VHU=I}me3i9w7N;^X20%5#9k mK=M)I$QE&Nad5B;fi!U>0Zpq!G7YR6D8UHAPApsu0z3dznkkt8 literal 0 HcmV?d00001 diff --git a/test/verify/roialign_verify_test.cpp b/test/verify/roialign_verify_test.cpp new file mode 100644 index 00000000000..e69de29bb2d From c54c1399fda508ae1060f36ef7383367d4b8929b Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 7 Oct 2024 23:53:09 +0000 Subject: [PATCH 39/56] add 1 file to previous commit --- test/onnx/parse/roialign_default_test.cpp | 27 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/test/onnx/parse/roialign_default_test.cpp b/test/onnx/parse/roialign_default_test.cpp index b4869740a57..ceb4b12ccba 100644 --- a/test/onnx/parse/roialign_default_test.cpp +++ b/test/onnx/parse/roialign_default_test.cpp @@ -36,16 +36,33 @@ TEST_CASE(roialign_default_test) auto rois = mm->add_parameter("rois", srois); auto bi = mm->add_parameter("batch_ind", sbi); - // Due to the onnx model using opset 12, the coordinate_transformation_mode should be set to - // output_half_pixel + // Depending on whether the model was built for opset 16 or earlier, the default + // coordinate_transformation_mode is different. These models had opset specified + // when they were created.. auto r = mm->add_instruction( - migraphx::make_op("roialign", {{"coordinate_transformation_mode", "output_half_pixel"}}), + migraphx::make_op("roialign", {{"coordinate_transformation_mode", "half_pixel"}}), x, rois, bi); mm->add_return({r}); - auto prog = read_onnx("roialign_default_test.onnx"); - EXPECT(p == prog); + + + migraphx::program p_12; + auto* mm_12 = p_12.get_main_module(); + auto x_12 = mm_12->add_parameter("x", sx); + auto rois_12 = mm_12->add_parameter("rois", srois); + auto bi_12 = mm_12->add_parameter("batch_ind", sbi); + + auto r_12 = mm_12->add_instruction( + migraphx::make_op("roialign", {{"coordinate_transformation_mode", "output_half_pixel"}}), + x_12, + rois_12, + bi_12); + mm_12->add_return({r_12}); + auto prog_12 = read_onnx("roialign_default_test_12.onnx"); + EXPECT(p_12 == prog_12); + + } From 6fae7d5716e609a0da335694f331a210c6773ff2 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Tue, 8 Oct 2024 15:23:18 +0000 Subject: [PATCH 40/56] file cleanup --- src/program.cpp | 1 - test/onnx/conv_relu_maxpool_test.onnx | Bin 329 -> 316 bytes test/onnx/parse/roialign_default_test.cpp | 10 ++++------ test/verify/roialign_verify_test.cpp | 0 4 files changed, 4 insertions(+), 7 deletions(-) delete mode 100644 test/verify/roialign_verify_test.cpp diff --git a/src/program.cpp b/src/program.cpp index f22a3322037..25cb16cc950 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/test/onnx/conv_relu_maxpool_test.onnx b/test/onnx/conv_relu_maxpool_test.onnx index 4403d8de5f70722d44e60d46fb26e575d7a959ca..f5bfe4c1514a128bbde7d847205baffbb35763fb 100644 GIT binary patch delta 52 zcmX@fw1-KYgH=c{IX|x~z9==PR5!IEF}ENm)oQ^+nN|FJC8@qBce5F}0M$Se A)c^nh delta 65 zcmdnPbdpJigHuQ>IX|x~z9==PG(I=6q98v%C%z=LxWsD3MDbPPI3y=t$YvA<00*)d Awg3PC diff --git a/test/onnx/parse/roialign_default_test.cpp b/test/onnx/parse/roialign_default_test.cpp index ceb4b12ccba..410d7ed62d4 100644 --- a/test/onnx/parse/roialign_default_test.cpp +++ b/test/onnx/parse/roialign_default_test.cpp @@ -36,9 +36,9 @@ TEST_CASE(roialign_default_test) auto rois = mm->add_parameter("rois", srois); auto bi = mm->add_parameter("batch_ind", sbi); - // Depending on whether the model was built for opset 16 or earlier, the default - // coordinate_transformation_mode is different. These models had opset specified - // when they were created.. + // Depending on whether the model was built for Onnx opset 16 or earlier, the default + // coordinate_transformation_mode is different. These model files had explicit opset given + // when they were created. auto r = mm->add_instruction( migraphx::make_op("roialign", {{"coordinate_transformation_mode", "half_pixel"}}), x, @@ -48,7 +48,7 @@ TEST_CASE(roialign_default_test) auto prog = read_onnx("roialign_default_test.onnx"); EXPECT(p == prog); - + // Opset 12 program migraphx::program p_12; auto* mm_12 = p_12.get_main_module(); auto x_12 = mm_12->add_parameter("x", sx); @@ -63,6 +63,4 @@ TEST_CASE(roialign_default_test) mm_12->add_return({r_12}); auto prog_12 = read_onnx("roialign_default_test_12.onnx"); EXPECT(p_12 == prog_12); - - } diff --git a/test/verify/roialign_verify_test.cpp b/test/verify/roialign_verify_test.cpp deleted file mode 100644 index e69de29bb2d..00000000000 From c4565bdf8da3164266dbac2144ba37a0830e3aa5 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Tue, 8 Oct 2024 19:33:06 +0000 Subject: [PATCH 41/56] first-try updates to gpu roialign plus misc. cleanup; WIP doesn't pass test_verify --- docs/dev/onnx_operators.rst | 2 +- src/include/migraphx/op/roialign.hpp | 2 +- .../include/migraphx/kernels/roialign.hpp | 28 +++++++++++-------- test/onnx/gen_onnx.py | 4 +-- test/verify/test_roialign.cpp | 18 ++++++------ 5 files changed, 31 insertions(+), 23 deletions(-) diff --git a/docs/dev/onnx_operators.rst b/docs/dev/onnx_operators.rst index fc621b4f894..a87af00e755 100644 --- a/docs/dev/onnx_operators.rst +++ b/docs/dev/onnx_operators.rst @@ -697,7 +697,7 @@ Operator Support Matrix | | | | functions are | | | | | not enabled | +--------------------------+-----------+-----------------+------------------------------+ -| RoiAlign | ✅ | FP8, FP16, | | +| RoiAlign | ✅ | FP8, FP16, | | | | | FP32, FP64 | | +--------------------------+-----------+-----------------+------------------------------+ | Round | ✅ | FP8, FP16, | | diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index de5a58aaa81..4231f33621f 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -230,7 +230,7 @@ struct roialign const auto bottom_data = x.begin(); const auto roi_batch_ind = batch_indices[n]; // Do not use rounding; this implementation detail is critical - float offset = (coord_trans_mode == "half_pixel") ? 0.5 : 0.0; + const float offset = (coord_trans_mode == "half_pixel") ? 0.5 : 0.0; std::array roi_starts = { static_cast(roi[roi_s.index({n, 0})] * spatial_scale - offset), static_cast(roi[roi_s.index({n, 1})] * spatial_scale - offset)}; diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp index b7d7216c690..92b40028080 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -24,6 +24,7 @@ #ifndef MIGRAPHX_GUARD_KERNELS_ROIALIGN_HPP #define MIGRAPHX_GUARD_KERNELS_ROIALIGN_HPP +#include #include #include #include @@ -87,18 +88,19 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( xy[ii] = high[ii] = low[ii] = dims[ii] - 1; } } - array locs = {low[0] * dims[1] + low[1], - low[0] * dims[1] + high[1], - high[0] * dims[1] + low[1], - high[0] * dims[1] + high[1]}; + array locs = {low[1] * dims[0] + low[0], + low[1] * dims[0] + high[0], + high[1] * dims[0] + low[0], + high[1] * dims[0] + high[0]}; - float ly = xy[0] - low[0]; - float lx = xy[1] - low[1]; + float lx = xy[0] - low[0]; + float ly = xy[1] - low[1]; float hy = 1.0f - ly; float hx = 1.0f - lx; // do calculations in floating point and convert final result to required type array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; + // todo: Should we change the order of these indices? auto v01 = pooling(data[locs[0]] * ws[0], data[locs[1]] * ws[1]); auto v23 = pooling(data[locs[2]] * ws[2], data[locs[3]] * ws[3]); return implicit_conversion(pooling(v01, v23)); @@ -177,12 +179,15 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, const auto offset_rois = rois + (n * roi_column_num); const int batch_ind = ind[n]; + // todo: did roi_offset get initialized to -0.5 in src/targets/gpu/jit/roialign.cpp? array roi_starts = { - static_cast(offset_rois[1]) * static_cast(s.spatial_scale), - static_cast(offset_rois[0]) * static_cast(s.spatial_scale)}; + static_cast(offset_rois[0]) * static_cast(s.spatial_scale) - s.roi_offset, + static_cast(offset_rois[1]) * static_cast(s.spatial_scale) - + s.roi_offset}; array roi_ends = { - static_cast(offset_rois[3]) * static_cast(s.spatial_scale), - static_cast(offset_rois[2]) * static_cast(s.spatial_scale)}; + static_cast(offset_rois[2]) * static_cast(s.spatial_scale) - s.roi_offset, + static_cast(offset_rois[3]) * static_cast(s.spatial_scale) - + s.roi_offset}; array roi_size{}; array bin_size{}; @@ -191,7 +196,8 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, for(index_int ii = 0; ii < roi_size.size(); ++ii) { roi_size[ii] = roi_ends[ii] - roi_starts[ii]; - roi_size[ii] = migraphx::max(roi_size[ii], 1.0f); + if(s.roi_offset == 0.f) + roi_size[ii] = migraphx::max(roi_size[ii], 1.0f); bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_grid_size[ii] = (s.sampling_ratio > 0) diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 27cc178dd96..bc5639d0424 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -10591,7 +10591,7 @@ def rnn_r_3arg_layout_test(): return ([node], [seq, w, r], [hs, output]) -@onnx_test(external_data=False, opset_imports=16) +@onnx_test(opset_imports=16) def roialign_default_test(): # The op. ROIAlign had an attribute coordinate_transformation_mode added # as of Onnx opset 16; we make opset-specific test models which give @@ -10608,7 +10608,7 @@ def roialign_default_test(): return ([node], [x, roi, bi], [y]) -@onnx_test(external_data=False, opset_imports=12) +@onnx_test(opset_imports=12) def roialign_default_test_12(): # Same model as in roialign_default_test() but with an older opset specified x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 4, 7, 8]) diff --git a/test/verify/test_roialign.cpp b/test/verify/test_roialign.cpp index 6314491e10d..9a0f706c93d 100644 --- a/test/verify/test_roialign.cpp +++ b/test/verify/test_roialign.cpp @@ -44,14 +44,16 @@ struct test_roialign : verify_program> auto x = mm->add_parameter("x", x_s); auto roi = mm->add_parameter("roi", roi_s); auto ind = mm->add_literal(migraphx::literal(ind_s, ind_vec)); - auto r = mm->add_instruction(migraphx::make_op("roialign", - {{"spatial_scale", 1.0}, - {"output_height", 5}, - {"output_width", 5}, - {"sampling_ratio", 2}}), - x, - roi, - ind); + auto r = mm->add_instruction( + migraphx::make_op("roialign", + {{"spatial_scale", 1.1}, + {"output_height", 5}, + {"output_width", 3}, + {"sampling_ratio", 2}, + {"coordinate_transformation_mode", "half_pixel"}}), + x, + roi, + ind); mm->add_return({r}); return p; From 3978d410f1e717c95937cc7dff76aa8bb204cd68 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 9 Oct 2024 22:28:57 +0000 Subject: [PATCH 42/56] work in progress --- src/include/migraphx/op/roialign.hpp | 15 +++++ .../include/migraphx/kernels/roialign.hpp | 67 ++++++++++++++----- test/verify/test_roialign.cpp | 46 +++++++++++-- 3 files changed, 107 insertions(+), 21 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 4231f33621f..a201d443bc1 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -135,22 +135,30 @@ struct roialign for(auto ii : range(p.size())) { +// printf(" ttttt roi_start[%d] = %f p=%lu bin_size = %f i[%d] + .5f = %f bin_grid_size = %lu\n", ii, +// roi_start[ii], p[ii], bin_size[ii], ii, (i[ii] + .5f), bin_grid_size[ii]); + xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; +// printf(" uuuuu xy[%d]: %f\n", ii, xy[ii]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { results[index] = pos_weight{}; +// printf(" vvvvv xy[%d]: %f\n", ii, xy[ii]); return; } xy[ii] = std::max(xy[ii], 0.0f); +// printf(" wwwww xy[%d]: %f\n", ii, xy[ii]); low[ii] = xy[ii]; high[ii] = low[ii] + 1; if(low[ii] >= dims[ii] - 1) { xy[ii] = high[ii] = low[ii] = dims[ii] - 1; +// printf(" xxxxx xy[%d]: %f\n", ii, xy[ii]); } } +printf(" fufufu xy: %f, %f index %d\n", xy[0], xy[1], index); results[index].pos = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], @@ -226,6 +234,8 @@ struct roialign visit_all(result, args.at(0), args.at(1))([&](auto output, auto x, auto roi) { const auto* batch_indices = args.at(2).cast(); +// printf(" UUUUU roi = %f, %f roi_s.index(0, 0) = %zu roi_s.index(0, 1) = %zu (1, 0)=%zu (1, 1)=%zu\n", roi[0], roi[1], +// roi_s.index({0, 0}), roi_s.index({0, 1}), roi_s.index({1, 0}), roi_s.index({1, 1})) ; par_for(n_rois, [&](auto n) { const auto bottom_data = x.begin(); const auto roi_batch_ind = batch_indices[n]; @@ -238,6 +248,7 @@ struct roialign static_cast(roi[roi_s.index({n, 2})] * spatial_scale - offset), static_cast(roi[roi_s.index({n, 3})] * spatial_scale - offset)}; + // Force malformed ROIs to be 1x1, output_half_pixel transform mode std::array roi_size{}; std::array bin_size{}; @@ -274,6 +285,9 @@ struct roialign const auto offset_bottom_data = bottom_data + static_cast((roi_batch_ind * channels + c) * in_dims[0] * in_dims[1]); +// std::cout << " VVVVV offset_bottom_data: " << offset_bottom_data[0] << "\n" ; +// std::cout << " WWWWW offset_bottom_data_asdf: " << static_cast((roi_batch_ind * channels + c) * +// in_dims[0] * in_dims[1]) << "\n" ; double output_val; std::tie(output_val, vec_index[c]) = (mode == migraphx::op::pooling_mode::average) @@ -287,6 +301,7 @@ struct roialign pre_calc, vec_index[c], max_pool{}); +// printf(" XXXXX output_val: %f \n", output_val) ; output(n, c, ph, pw) = output_val; }); }); diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp index 92b40028080..5f12b74d714 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -25,6 +25,7 @@ #define MIGRAPHX_GUARD_KERNELS_ROIALIGN_HPP #include +#include #include #include #include @@ -75,34 +76,47 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( array high{}; for(index_int ii = 0; ii < xy.size(); ++ii) { + println_once(" fffff xy: ", xy[ii]); if(xy[ii] < -1.0f or xy[ii] > dims[ii]) { + println_once(" ggggg xy: ", xy[ii]); return implicit_conversion(0); } xy[ii] = migraphx::max(xy[ii], 0.0f); + println_once(" hhhhh xy: ", xy[ii]); low[ii] = xy[ii]; high[ii] = low[ii] + 1; if(low[ii] >= dims[ii] - 1) { xy[ii] = high[ii] = low[ii] = dims[ii] - 1; + println_once(" iiiii xy: ", xy[ii]); } + println_once(" FFFFF xy: ", xy[ii]); } - array locs = {low[1] * dims[0] + low[0], +println(" FUFUFU xy: ", xy); + array locs = {low[1] * dims[0] + low[0], // new low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], high[1] * dims[0] + high[0]}; - - float lx = xy[0] - low[0]; - float ly = xy[1] - low[1]; +// array locs = {low[0] * dims[1] + low[1], //old +// low[0] * dims[1] + high[1], +// high[0] * dims[1] + low[1], +// high[0] * dims[1] + high[1]}; + // float lx = xy[0] - low[0]; // new + // float ly = xy[1] - low[1]; + float ly = xy[0] - low[0]; + float lx = xy[1] - low[1]; float hy = 1.0f - ly; float hx = 1.0f - lx; // do calculations in floating point and convert final result to required type array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; // todo: Should we change the order of these indices? - auto v01 = pooling(data[locs[0]] * ws[0], data[locs[1]] * ws[1]); - auto v23 = pooling(data[locs[2]] * ws[2], data[locs[3]] * ws[3]); + // auto v01 = pooling(data[locs[0]] * ws[0], data[locs[1]] * ws[1]); + // auto v23 = pooling(data[locs[2]] * ws[2], data[locs[3]] * ws[3]); + auto v01 = pooling(data[locs[1]] * ws[1], data[locs[0]] * ws[0]); + auto v23 = pooling(data[locs[3]] * ws[3], data[locs[2]] * ws[2]); return implicit_conversion(pooling(v01, v23)); } @@ -121,9 +135,18 @@ MIGRAPHX_DEVICE_CONSTEXPR auto calc_pooling(const Iterator& data, const int64_t count = bin_grid_size[0] * bin_grid_size[1]; dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { array id = {iy, ix}; +println_once(" eeeee roi_starts: ", roi_starts); +println(" eeeee idx: ", idx); +println_once(" eeeee bin_size: ", bin_size); +println_once(" eeeee (id + 0.5f): ", (id + 0.5f)); +println_once(" eeeee bin_grid_size: ", bin_grid_size); +println_once(" eeeee roi_offset: ", roi_offset); + // array locs = + // roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size + roi_offset; // old array locs = - roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size + roi_offset; - + roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size; // new +print(" EEEEE locs: ", locs); +println("", ""); auto val = bilinear_interpolate(data, dims, locs, op); output_val = op(output_val, val); }); @@ -179,20 +202,26 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, const auto offset_rois = rois + (n * roi_column_num); const int batch_ind = ind[n]; - // todo: did roi_offset get initialized to -0.5 in src/targets/gpu/jit/roialign.cpp? + // Note that roi_offset in src/targets/gpu/jit/roialign.cpp uses a negative value, so we add it here +println_once(" AAAAA s.roi_offset: ", s.roi_offset); array roi_starts = { - static_cast(offset_rois[0]) * static_cast(s.spatial_scale) - s.roi_offset, - static_cast(offset_rois[1]) * static_cast(s.spatial_scale) - + static_cast(offset_rois[0]) * static_cast(s.spatial_scale) + s.roi_offset, + static_cast(offset_rois[1]) * static_cast(s.spatial_scale) + s.roi_offset}; +// static_cast(offset_rois[1]) * static_cast(s.spatial_scale), +// static_cast(offset_rois[0]) * static_cast(s.spatial_scale)}; + array roi_ends = { - static_cast(offset_rois[2]) * static_cast(s.spatial_scale) - s.roi_offset, - static_cast(offset_rois[3]) * static_cast(s.spatial_scale) - + static_cast(offset_rois[2]) * static_cast(s.spatial_scale) + s.roi_offset, + static_cast(offset_rois[3]) * static_cast(s.spatial_scale) + s.roi_offset}; - + // static_cast(offset_rois[3]) * static_cast(s.spatial_scale), + // static_cast(offset_rois[2]) * static_cast(s.spatial_scale)}; array roi_size{}; array bin_size{}; array bin_grid_size{}; + for(index_int ii = 0; ii < roi_size.size(); ++ii) { roi_size[ii] = roi_ends[ii] - roi_starts[ii]; @@ -204,8 +233,9 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, ? s.sampling_ratio : migraphx::ceil(roi_size[ii] / out_dims[ii]); } - +// const auto offset_asdf = ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); const auto offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); +// println_once(" CCCCC offset_asdf: ", offset_asdf); if constexpr(s.is_avg_pooling) { y_t[i] = calc_pooling(offset_x, @@ -216,6 +246,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, in_dims, s.roi_offset, avg_pool{}); +// println_once(" ddddd roi_starts[0]: ", roi_starts[0]); looks good here +// println_once(" ddddd1 roi_starts[1]: ", roi_starts[1]); +// print(" DDDDD i: ", i) ; +// println(" y_t[i]: ", y_t[i]) ; // these are all y_t[i]: 0.500000 make sense? } else { @@ -227,6 +261,9 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, in_dims, s.roi_offset, max_pool{}); +println(" EEEEE i: ", i) ;// EEEEE locs: -0.805208 EEEEE locs: -0.805208 EEEEE locs: -0.805208 EEEEE locs: -0.805208 EEEEE locs: -0.805208 EEEEE locs: -0.576042 EEEEE locs: -0.576042 EEEEE locs: -0.576042 EEEEE locs: -0.576042 EEEEE locs: -0.576042 EEEEE locs: -0.346875 EEEEE locs: -0.346875 EEEEE locs: -0.346875 EEEEE locs: -0.346875 EEEEE locs: -0.346875, -0.212812, -0.364062, -0.515312, -0.666562, -0.817813, -0.212812, -0.364062, -0.515312, -0.666562, -0.817813, -0.212812, -0.364062, -0.515312, -0.666562, -0.817813 FFFFF xy: 0.000000 + +print(" y_t[i]: ", y_t[i]) ; } } } diff --git a/test/verify/test_roialign.cpp b/test/verify/test_roialign.cpp index 9a0f706c93d..d9a93a8db05 100644 --- a/test/verify/test_roialign.cpp +++ b/test/verify/test_roialign.cpp @@ -27,6 +27,39 @@ #include #include +template +struct test_roialign_half_pixel : verify_program> +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + migraphx::shape x_s{DType, {1, 1, 2, 2}}; + + migraphx::shape roi_s{DType, {1, 4}}; + + migraphx::shape ind_s{migraphx::shape::int64_type, {1}}; + std::vector ind_vec = {0}; + + auto x = mm->add_parameter("x", x_s); + auto roi = mm->add_parameter("roi", roi_s); + auto ind = mm->add_literal(migraphx::literal(ind_s, ind_vec)); + auto r = mm->add_instruction( + migraphx::make_op("roialign", + {{"spatial_scale", 1.1}, + {"output_height", 5}, + {"output_width", 3}, + {"sampling_ratio", 2}, + {"coordinate_transformation_mode", "half_pixel"}}), + x, + roi, + ind); + mm->add_return({r}); + + return p; + } +}; + template struct test_roialign : verify_program> { @@ -48,9 +81,9 @@ struct test_roialign : verify_program> migraphx::make_op("roialign", {{"spatial_scale", 1.1}, {"output_height", 5}, - {"output_width", 3}, + {"output_width", 2}, {"sampling_ratio", 2}, - {"coordinate_transformation_mode", "half_pixel"}}), + {"coordinate_transformation_mode", "output_half_pixel"}}), x, roi, ind); @@ -60,8 +93,9 @@ struct test_roialign : verify_program> } }; +template struct test_roialign_half_pixel; template struct test_roialign; -template struct test_roialign; -template struct test_roialign; -template struct test_roialign; -template struct test_roialign; +// template struct test_roialign; commented out for debug +// template struct test_roialign; +// template struct test_roialign; +// template struct test_roialign; From d6dd2e1cd4fa10f3d5193bd41d33e937925ac859 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 9 Oct 2024 22:47:24 +0000 Subject: [PATCH 43/56] work in progress --- src/include/migraphx/op/roialign.hpp | 4 ++-- .../kernels/include/migraphx/kernels/roialign.hpp | 15 ++++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index a201d443bc1..38120c99918 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -158,7 +158,7 @@ struct roialign // printf(" xxxxx xy[%d]: %f\n", ii, xy[ii]); } } -printf(" fufufu xy: %f, %f index %d\n", xy[0], xy[1], index); +printf(" fufufu xy: %f, %f\n", xy[0], xy[1]); results[index].pos = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], @@ -272,7 +272,7 @@ printf(" fufufu xy: %f, %f index %d\n", xy[0], xy[1], index); shape comp_s{shape::float_type, comp_lens}; auto pre_calc = this->calc_pos_weight(in_dims, comp_s, roi_starts, bin_size, bin_grid_size); - +// The array returned here should correspond to the GGGGG and HHHHH values in the GPU std::vector comp_lens1 = {channels, out_dims[0], out_dims[1]}; shape comp_s1{migraphx::shape::float_type, comp_lens1}; std::vector vec_index(channels, 0); diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp index 5f12b74d714..6d3f5931285 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -76,23 +76,23 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( array high{}; for(index_int ii = 0; ii < xy.size(); ++ii) { - println_once(" fffff xy: ", xy[ii]); + // println_once(" fffff xy: ", xy[ii]); if(xy[ii] < -1.0f or xy[ii] > dims[ii]) { - println_once(" ggggg xy: ", xy[ii]); + // println_once(" ggggg xy: ", xy[ii]); return implicit_conversion(0); } xy[ii] = migraphx::max(xy[ii], 0.0f); - println_once(" hhhhh xy: ", xy[ii]); + // println_once(" hhhhh xy: ", xy[ii]); low[ii] = xy[ii]; high[ii] = low[ii] + 1; if(low[ii] >= dims[ii] - 1) { xy[ii] = high[ii] = low[ii] = dims[ii] - 1; - println_once(" iiiii xy: ", xy[ii]); + // println_once(" iiiii xy: ", xy[ii]); } - println_once(" FFFFF xy: ", xy[ii]); + // println_once(" FFFFF xy: ", xy[ii]); } println(" FUFUFU xy: ", xy); array locs = {low[1] * dims[0] + low[0], // new @@ -112,6 +112,11 @@ println(" FUFUFU xy: ", xy); // do calculations in floating point and convert final result to required type array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; + //debug + array pooling_input01 = {data[locs[1]] * ws[1], data[locs[0]] * ws[0]}; + array pooling_input23 = {data[locs[3]] * ws[3], data[locs[2]] * ws[2]}; +println(" GGGGG pooling_input01", pooling_input01); +println(" HHHHH pooling_input23", pooling_input23); // todo: Should we change the order of these indices? // auto v01 = pooling(data[locs[0]] * ws[0], data[locs[1]] * ws[1]); // auto v23 = pooling(data[locs[2]] * ws[2], data[locs[3]] * ws[3]); From d425d5644611e1da1e87d02d27904207b0b5f7b8 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Thu, 10 Oct 2024 21:18:14 +0000 Subject: [PATCH 44/56] work in progress; a lot of debug code --- src/include/migraphx/op/roialign.hpp | 24 +++++----- .../include/migraphx/kernels/roialign.hpp | 45 +++++++++++-------- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 38120c99918..05ce5d9a65c 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -135,30 +135,24 @@ struct roialign for(auto ii : range(p.size())) { -// printf(" ttttt roi_start[%d] = %f p=%lu bin_size = %f i[%d] + .5f = %f bin_grid_size = %lu\n", ii, -// roi_start[ii], p[ii], bin_size[ii], ii, (i[ii] + .5f), bin_grid_size[ii]); - xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; -// printf(" uuuuu xy[%d]: %f\n", ii, xy[ii]); +printf(" FUFUFU index %lu xy: (%f, %f)\n", index, xy[0], xy[1]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { results[index] = pos_weight{}; -// printf(" vvvvv xy[%d]: %f\n", ii, xy[ii]); return; } xy[ii] = std::max(xy[ii], 0.0f); -// printf(" wwwww xy[%d]: %f\n", ii, xy[ii]); low[ii] = xy[ii]; high[ii] = low[ii] + 1; if(low[ii] >= dims[ii] - 1) { xy[ii] = high[ii] = low[ii] = dims[ii] - 1; -// printf(" xxxxx xy[%d]: %f\n", ii, xy[ii]); } } -printf(" fufufu xy: %f, %f\n", xy[0], xy[1]); +printf(" FFFFF index %lu xy: (%f, %f)\n", index, xy[0], xy[1]); results[index].pos = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], @@ -202,13 +196,19 @@ printf(" fufufu xy: %f, %f\n", xy[0], xy[1]); { double output_val = op.init(); const int64_t count = bin_grid_size[0] * bin_grid_size[1]; - dfor(bin_grid_size[0], bin_grid_size[1])([&](auto, auto) { + dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { const auto& pc = pos_weights[index]; std::array wv; +// printf(" HHHHH dfor index: (%lu, %lu)\n", iy, ix); +// printf(" GGGGG transform: "); +printf(" IIIII transform ws: "); std::transform( pc.w.begin(), pc.w.end(), pc.pos.begin(), wv.begin(), [&](auto w, auto pos) { +printf(" %f ", w); +// printf(" %f ", *(data + pos) * w); return *(data + pos) * w; }); +printf("\n"); output_val = std::accumulate(wv.begin(), wv.end(), output_val, op); index += 1; }); @@ -234,8 +234,6 @@ printf(" fufufu xy: %f, %f\n", xy[0], xy[1]); visit_all(result, args.at(0), args.at(1))([&](auto output, auto x, auto roi) { const auto* batch_indices = args.at(2).cast(); -// printf(" UUUUU roi = %f, %f roi_s.index(0, 0) = %zu roi_s.index(0, 1) = %zu (1, 0)=%zu (1, 1)=%zu\n", roi[0], roi[1], -// roi_s.index({0, 0}), roi_s.index({0, 1}), roi_s.index({1, 0}), roi_s.index({1, 1})) ; par_for(n_rois, [&](auto n) { const auto bottom_data = x.begin(); const auto roi_batch_ind = batch_indices[n]; @@ -285,9 +283,7 @@ printf(" fufufu xy: %f, %f\n", xy[0], xy[1]); const auto offset_bottom_data = bottom_data + static_cast((roi_batch_ind * channels + c) * in_dims[0] * in_dims[1]); -// std::cout << " VVVVV offset_bottom_data: " << offset_bottom_data[0] << "\n" ; -// std::cout << " WWWWW offset_bottom_data_asdf: " << static_cast((roi_batch_ind * channels + c) * -// in_dims[0] * in_dims[1]) << "\n" ; +printf(" KKKKK n, c, ph, pw = %lu %lu %lu %lu\n", n, c, ph, pw); double output_val; std::tie(output_val, vec_index[c]) = (mode == migraphx::op::pooling_mode::average) diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp index 6d3f5931285..caecf6dd4bc 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -79,22 +79,20 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( // println_once(" fffff xy: ", xy[ii]); if(xy[ii] < -1.0f or xy[ii] > dims[ii]) { - // println_once(" ggggg xy: ", xy[ii]); + // println_once(" g@gggg xy: ", xy[ii]); return implicit_conversion(0); } xy[ii] = migraphx::max(xy[ii], 0.0f); - // println_once(" hhhhh xy: ", xy[ii]); + // println_once(" h@hhhh xy: ", xy[ii]); low[ii] = xy[ii]; high[ii] = low[ii] + 1; if(low[ii] >= dims[ii] - 1) { xy[ii] = high[ii] = low[ii] = dims[ii] - 1; - // println_once(" iiiii xy: ", xy[ii]); } - // println_once(" FFFFF xy: ", xy[ii]); } -println(" FUFUFU xy: ", xy); +println_once(" fffff xy: ", xy); array locs = {low[1] * dims[0] + low[0], // new low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], @@ -110,13 +108,16 @@ println(" FUFUFU xy: ", xy); float hy = 1.0f - ly; float hx = 1.0f - lx; // do calculations in floating point and convert final result to required type - array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; + // array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; //old + array ws = {hy * hx, ly * hx, hy * lx, ly * lx}; // new //debug - array pooling_input01 = {data[locs[1]] * ws[1], data[locs[0]] * ws[0]}; - array pooling_input23 = {data[locs[3]] * ws[3], data[locs[2]] * ws[2]}; -println(" GGGGG pooling_input01", pooling_input01); -println(" HHHHH pooling_input23", pooling_input23); +// array pooling_input01 = {data[locs[1]] * ws[1], data[locs[0]] * ws[0]}; +// array pooling_input23 = {data[locs[3]] * ws[3], data[locs[2]] * ws[2]}; +// println(" ggggg pooling_input01", pooling_input01); +// println(" hhhhh pooling_input23", pooling_input23); +println(" iiiii ws: ", ws); +println(); // todo: Should we change the order of these indices? // auto v01 = pooling(data[locs[0]] * ws[0], data[locs[1]] * ws[1]); // auto v23 = pooling(data[locs[2]] * ws[2], data[locs[3]] * ws[3]); @@ -140,18 +141,24 @@ MIGRAPHX_DEVICE_CONSTEXPR auto calc_pooling(const Iterator& data, const int64_t count = bin_grid_size[0] * bin_grid_size[1]; dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { array id = {iy, ix}; +println_once(" hhhhh id: ", id); +(void) roi_offset; +println("How does locs increment? 12 steps in idx = 1 step in ref version", ""); println_once(" eeeee roi_starts: ", roi_starts); println(" eeeee idx: ", idx); println_once(" eeeee bin_size: ", bin_size); println_once(" eeeee (id + 0.5f): ", (id + 0.5f)); println_once(" eeeee bin_grid_size: ", bin_grid_size); +array zap = idx * bin_size; +println("idx * bin_size: ", zap); +array zap2 = bin_size * (id + 0.5f) / bin_grid_size; +println("(id + 0.5f) / bin_grid_size: ", zap2); println_once(" eeeee roi_offset: ", roi_offset); // array locs = - // roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size + roi_offset; // old + // roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size + roi_offset; // old leads to all 0's array locs = roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size; // new -print(" EEEEE locs: ", locs); -println("", ""); +println(" eeeeeEEE locs: ", locs); auto val = bilinear_interpolate(data, dims, locs, op); output_val = op(output_val, val); }); @@ -208,7 +215,8 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, const int batch_ind = ind[n]; // Note that roi_offset in src/targets/gpu/jit/roialign.cpp uses a negative value, so we add it here -println_once(" AAAAA s.roi_offset: ", s.roi_offset); +println(" aaaaa idx: ", idx); +// println(" out_dims ", out_lens); array roi_starts = { static_cast(offset_rois[0]) * static_cast(s.spatial_scale) + s.roi_offset, static_cast(offset_rois[1]) * static_cast(s.spatial_scale) + @@ -238,9 +246,11 @@ println_once(" AAAAA s.roi_offset: ", s.roi_offset); ? s.sampling_ratio : migraphx::ceil(roi_size[ii] / out_dims[ii]); } -// const auto offset_asdf = ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); +array zap = {n, c, ph, pw}; + +println(" kkkkk n, c, ph, pw: ", zap); + const auto offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); -// println_once(" CCCCC offset_asdf: ", offset_asdf); if constexpr(s.is_avg_pooling) { y_t[i] = calc_pooling(offset_x, @@ -266,9 +276,8 @@ println_once(" AAAAA s.roi_offset: ", s.roi_offset); in_dims, s.roi_offset, max_pool{}); -println(" EEEEE i: ", i) ;// EEEEE locs: -0.805208 EEEEE locs: -0.805208 EEEEE locs: -0.805208 EEEEE locs: -0.805208 EEEEE locs: -0.805208 EEEEE locs: -0.576042 EEEEE locs: -0.576042 EEEEE locs: -0.576042 EEEEE locs: -0.576042 EEEEE locs: -0.576042 EEEEE locs: -0.346875 EEEEE locs: -0.346875 EEEEE locs: -0.346875 EEEEE locs: -0.346875 EEEEE locs: -0.346875, -0.212812, -0.364062, -0.515312, -0.666562, -0.817813, -0.212812, -0.364062, -0.515312, -0.666562, -0.817813, -0.212812, -0.364062, -0.515312, -0.666562, -0.817813 FFFFF xy: 0.000000 -print(" y_t[i]: ", y_t[i]) ; +// print(" y_t[i]: ", y_t[i]) ; } } } From 682b6532db5234d2c22aaf4370668811b835bb3a Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Fri, 11 Oct 2024 20:01:01 +0000 Subject: [PATCH 45/56] work in progress, gpu kernel closer to correct. Gives correct results but in mixed up order. --- src/include/migraphx/op/roialign.hpp | 43 +++++++++---- .../include/migraphx/kernels/roialign.hpp | 64 ++++++------------- test/verify/test_roialign.cpp | 2 +- 3 files changed, 51 insertions(+), 58 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 05ce5d9a65c..3e6e31d9524 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -126,18 +126,28 @@ struct roialign // order. The i[x] value is least significant and iterates the fastest. std::array p = {idx_v[1], idx_v[0]}; std::array i = {idx_v[3], idx_v[2]}; // these are always equal +printf(" EEEEE p, i-index %lu %lu %lu %lu ( %lu %lu %lu %lu)\n", p[0], p[1], i[0], i[1], +idx_v[0], idx_v[1], idx_v[2], idx_v[3]); + + + + + // xy is scaled coordinates of start point of ROI std::array xy{}; // low, high are floor and ceiling of the xy value (i.e. the bounds of the pixel it lies // inside) from which we will interpolate. std::array low{}; std::array high{}; - +float asdf=-1.f; for(auto ii : range(p.size())) { xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; -printf(" FUFUFU index %lu xy: (%f, %f)\n", index, xy[0], xy[1]); +// initial calculated values, before adjustments +if(ii == 0 ) asdf = xy[0]; +if(ii == 1) +printf(" IIIII index %lu xy: (%f, %f)\n", index, asdf, xy[1]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { results[index] = pos_weight{}; @@ -152,7 +162,7 @@ printf(" FUFUFU index %lu xy: (%f, %f)\n", index, xy[0], xy[1]); xy[ii] = high[ii] = low[ii] = dims[ii] - 1; } } -printf(" FFFFF index %lu xy: (%f, %f)\n", index, xy[0], xy[1]); +// printf(" FFFFF index %lu xy: (%f, %f)\n", index, xy[0], xy[1]); results[index].pos = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], @@ -164,6 +174,12 @@ printf(" FFFFF index %lu xy: (%f, %f)\n", index, xy[0], xy[1]); float hx = 1.0f - lx; // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; + printf(" AAAAA index %lu results.w: %f, %f, %f, %f\n", index, + results[index].w[0], + results[index].w[1], + results[index].w[2], + results[index].w[3] + ); }); return results; } @@ -191,24 +207,19 @@ printf(" FFFFF index %lu xy: (%f, %f)\n", index, xy[0], xy[1]); std::tuple calc_pooling(const T& data, const std::array& bin_grid_size, const std::vector& pos_weights, - int64_t index, + int64_t index, // index to c Op op) const { double output_val = op.init(); const int64_t count = bin_grid_size[0] * bin_grid_size[1]; dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { +printf(" IIIIIKKKKK iy, ix, index = %lu %lu %ld\n", iy, ix, index ); const auto& pc = pos_weights[index]; - std::array wv; -// printf(" HHHHH dfor index: (%lu, %lu)\n", iy, ix); -// printf(" GGGGG transform: "); -printf(" IIIII transform ws: "); + std::array wv; std::transform( pc.w.begin(), pc.w.end(), pc.pos.begin(), wv.begin(), [&](auto w, auto pos) { -printf(" %f ", w); -// printf(" %f ", *(data + pos) * w); return *(data + pos) * w; }); -printf("\n"); output_val = std::accumulate(wv.begin(), wv.end(), output_val, op); index += 1; }); @@ -270,7 +281,7 @@ printf("\n"); shape comp_s{shape::float_type, comp_lens}; auto pre_calc = this->calc_pos_weight(in_dims, comp_s, roi_starts, bin_size, bin_grid_size); -// The array returned here should correspond to the GGGGG and HHHHH values in the GPU + std::vector comp_lens1 = {channels, out_dims[0], out_dims[1]}; shape comp_s1{migraphx::shape::float_type, comp_lens1}; std::vector vec_index(channels, 0); @@ -280,11 +291,15 @@ printf("\n"); auto ph = idx[1]; auto pw = idx[2]; +// n anc c are 0 because that's the size of the test case +printf(" IIIII n, c, ph, pw = %lu %lu %lu %lu\n", n, c, ph, pw); + const auto offset_bottom_data = bottom_data + static_cast((roi_batch_ind * channels + c) * in_dims[0] * in_dims[1]); -printf(" KKKKK n, c, ph, pw = %lu %lu %lu %lu\n", n, c, ph, pw); + double output_val; +printf(" IIIIIc vec_index[c] = %ld\n", vec_index[c]); std::tie(output_val, vec_index[c]) = (mode == migraphx::op::pooling_mode::average) ? this->calc_pooling(offset_bottom_data, @@ -297,7 +312,7 @@ printf(" KKKKK n, c, ph, pw = %lu %lu %lu %lu\n", n, c, ph, pw); pre_calc, vec_index[c], max_pool{}); -// printf(" XXXXX output_val: %f \n", output_val) ; +printf(" DDDDD %f\n", output_val); output(n, c, ph, pw) = output_val; }); }); diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp index caecf6dd4bc..9d60e705f0f 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -76,15 +76,12 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( array high{}; for(index_int ii = 0; ii < xy.size(); ++ii) { - // println_once(" fffff xy: ", xy[ii]); if(xy[ii] < -1.0f or xy[ii] > dims[ii]) { - // println_once(" g@gggg xy: ", xy[ii]); return implicit_conversion(0); } xy[ii] = migraphx::max(xy[ii], 0.0f); - // println_once(" h@hhhh xy: ", xy[ii]); low[ii] = xy[ii]; high[ii] = low[ii] + 1; if(low[ii] >= dims[ii] - 1) @@ -92,40 +89,25 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( xy[ii] = high[ii] = low[ii] = dims[ii] - 1; } } -println_once(" fffff xy: ", xy); array locs = {low[1] * dims[0] + low[0], // new low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], high[1] * dims[0] + high[0]}; -// array locs = {low[0] * dims[1] + low[1], //old -// low[0] * dims[1] + high[1], -// high[0] * dims[1] + low[1], -// high[0] * dims[1] + high[1]}; - // float lx = xy[0] - low[0]; // new - // float ly = xy[1] - low[1]; - float ly = xy[0] - low[0]; - float lx = xy[1] - low[1]; + + float lx = xy[0] - low[0]; // new + float ly = xy[1] - low[1]; +\ float hy = 1.0f - ly; float hx = 1.0f - lx; // do calculations in floating point and convert final result to required type - // array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; //old - array ws = {hy * hx, ly * hx, hy * lx, ly * lx}; // new + array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; //old - //debug -// array pooling_input01 = {data[locs[1]] * ws[1], data[locs[0]] * ws[0]}; -// array pooling_input23 = {data[locs[3]] * ws[3], data[locs[2]] * ws[2]}; -// println(" ggggg pooling_input01", pooling_input01); -// println(" hhhhh pooling_input23", pooling_input23); -println(" iiiii ws: ", ws); -println(); - // todo: Should we change the order of these indices? - // auto v01 = pooling(data[locs[0]] * ws[0], data[locs[1]] * ws[1]); - // auto v23 = pooling(data[locs[2]] * ws[2], data[locs[3]] * ws[3]); auto v01 = pooling(data[locs[1]] * ws[1], data[locs[0]] * ws[0]); auto v23 = pooling(data[locs[3]] * ws[3], data[locs[2]] * ws[2]); return implicit_conversion(pooling(v01, v23)); } +// Calculate a single pooled output value template MIGRAPHX_DEVICE_CONSTEXPR auto calc_pooling(const Iterator& data, const array& roi_starts, @@ -136,29 +118,24 @@ MIGRAPHX_DEVICE_CONSTEXPR auto calc_pooling(const Iterator& data, float roi_offset, Op op) { + // for one idx (output height and width coordinates) we iterate through all bin_grid values using in_dtype = typename Iterator::value_type; in_dtype output_val = in_dtype{op.init()}; const int64_t count = bin_grid_size[0] * bin_grid_size[1]; dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { array id = {iy, ix}; -println_once(" hhhhh id: ", id); +println_once(" jjjjj id: ", id); (void) roi_offset; -println("How does locs increment? 12 steps in idx = 1 step in ref version", ""); -println_once(" eeeee roi_starts: ", roi_starts); +println_once(" jjjjj roi_starts: ", roi_starts); println(" eeeee idx: ", idx); -println_once(" eeeee bin_size: ", bin_size); -println_once(" eeeee (id + 0.5f): ", (id + 0.5f)); -println_once(" eeeee bin_grid_size: ", bin_grid_size); -array zap = idx * bin_size; -println("idx * bin_size: ", zap); -array zap2 = bin_size * (id + 0.5f) / bin_grid_size; -println("(id + 0.5f) / bin_grid_size: ", zap2); -println_once(" eeeee roi_offset: ", roi_offset); - // array locs = - // roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size + roi_offset; // old leads to all 0's + array locs = roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size; // new -println(" eeeeeEEE locs: ", locs); +// idx same as ph, pw + array asdf_idx = {float(iy), float(ix), float(idx[0]), float(idx[1]),locs[0], locs[1]}; +// put idx, ix, iy, and locs into a single array to debug together + +println(" iiiii asdf_idx/locs: ", asdf_idx); auto val = bilinear_interpolate(data, dims, locs, op); output_val = op(output_val, val); }); @@ -192,7 +169,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, auto channel_num = x_lens[1]; // input dims of height and width, in all 2-dim arrays, the first dim // is for height and second dim is for width - array in_dims = {x_lens[2], x_lens[3]}; + array in_dims = {x_lens[3], x_lens[2]}; const auto stride = index.nglobal(); auto out_s = y_t.get_shape(); @@ -202,6 +179,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, // is for height and second dim is for width const auto& out_lens = out_s.lens; array out_dims = {out_lens[2], out_lens[3]}; +println_once(" aaaaa stride: ", stride); for(index_int i = index.global; i < out_s.elements(); i += stride) { @@ -215,8 +193,6 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, const int batch_ind = ind[n]; // Note that roi_offset in src/targets/gpu/jit/roialign.cpp uses a negative value, so we add it here -println(" aaaaa idx: ", idx); -// println(" out_dims ", out_lens); array roi_starts = { static_cast(offset_rois[0]) * static_cast(s.spatial_scale) + s.roi_offset, static_cast(offset_rois[1]) * static_cast(s.spatial_scale) + @@ -251,6 +227,8 @@ array zap = {n, c, ph, pw}; println(" kkkkk n, c, ph, pw: ", zap); const auto offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); +array reindex = {n, c, pw, ph};//;; rearrange the gpu indices to what the ref indices would be +// and insert that location in y_t if constexpr(s.is_avg_pooling) { y_t[i] = calc_pooling(offset_x, @@ -263,8 +241,8 @@ println(" kkkkk n, c, ph, pw: ", zap); avg_pool{}); // println_once(" ddddd roi_starts[0]: ", roi_starts[0]); looks good here // println_once(" ddddd1 roi_starts[1]: ", roi_starts[1]); -// print(" DDDDD i: ", i) ; -// println(" y_t[i]: ", y_t[i]) ; // these are all y_t[i]: 0.500000 make sense? +print(" ddddd i: ", i) ; +println(" y_t[i]: ", y_t[i]) ; // these are all y_t[i]: 0.500000 make sense? } else { diff --git a/test/verify/test_roialign.cpp b/test/verify/test_roialign.cpp index d9a93a8db05..c036dbb5e23 100644 --- a/test/verify/test_roialign.cpp +++ b/test/verify/test_roialign.cpp @@ -49,7 +49,7 @@ struct test_roialign_half_pixel : verify_program {{"spatial_scale", 1.1}, {"output_height", 5}, {"output_width", 3}, - {"sampling_ratio", 2}, + {"sampling_ratio", 3}, {"coordinate_transformation_mode", "half_pixel"}}), x, roi, From 4e122bc03a8e4d4f2f0c19787a98c27ffa19515e Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Fri, 11 Oct 2024 23:14:49 +0000 Subject: [PATCH 46/56] work in progress --- .../include/migraphx/kernels/roialign.hpp | 72 +++++++++++++++---- test/verify/test_roialign.cpp | 2 +- 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp index 9d60e705f0f..80d2bd7bffe 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -124,10 +124,10 @@ MIGRAPHX_DEVICE_CONSTEXPR auto calc_pooling(const Iterator& data, const int64_t count = bin_grid_size[0] * bin_grid_size[1]; dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { array id = {iy, ix}; -println_once(" jjjjj id: ", id); +// println_once(" jjjjj id: ", id); (void) roi_offset; -println_once(" jjjjj roi_starts: ", roi_starts); -println(" eeeee idx: ", idx); +// println_once(" jjjjj roi_starts: ", roi_starts); +// println(" eeeee idx: ", idx); array locs = roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size; // new @@ -135,7 +135,7 @@ println(" eeeee idx: ", idx); array asdf_idx = {float(iy), float(ix), float(idx[0]), float(idx[1]),locs[0], locs[1]}; // put idx, ix, iy, and locs into a single array to debug together -println(" iiiii asdf_idx/locs: ", asdf_idx); +// println(" iiiii asdf_idx/locs: ", asdf_idx); auto val = bilinear_interpolate(data, dims, locs, op); output_val = op(output_val, val); }); @@ -197,15 +197,12 @@ println_once(" aaaaa stride: ", stride); static_cast(offset_rois[0]) * static_cast(s.spatial_scale) + s.roi_offset, static_cast(offset_rois[1]) * static_cast(s.spatial_scale) + s.roi_offset}; -// static_cast(offset_rois[1]) * static_cast(s.spatial_scale), -// static_cast(offset_rois[0]) * static_cast(s.spatial_scale)}; array roi_ends = { static_cast(offset_rois[2]) * static_cast(s.spatial_scale) + s.roi_offset, static_cast(offset_rois[3]) * static_cast(s.spatial_scale) + s.roi_offset}; - // static_cast(offset_rois[3]) * static_cast(s.spatial_scale), - // static_cast(offset_rois[2]) * static_cast(s.spatial_scale)}; + array roi_size{}; array bin_size{}; array bin_grid_size{}; @@ -227,8 +224,10 @@ array zap = {n, c, ph, pw}; println(" kkkkk n, c, ph, pw: ", zap); const auto offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); -array reindex = {n, c, pw, ph};//;; rearrange the gpu indices to what the ref indices would be -// and insert that location in y_t +// array reindex = {size_t(n), size_t(c), size_t(pw), size_t(ph)};//;; rearrange the gpu indices to what the ref indices would be +// migraphx::shape reindex_shape(reindex); +// and insert that location in y_t + if constexpr(s.is_avg_pooling) { y_t[i] = calc_pooling(offset_x, @@ -239,10 +238,54 @@ array reindex = {n, c, pw, ph};//;; rearrange the gpu indices to what t in_dims, s.roi_offset, avg_pool{}); -// println_once(" ddddd roi_starts[0]: ", roi_starts[0]); looks good here -// println_once(" ddddd1 roi_starts[1]: ", roi_starts[1]); -print(" ddddd i: ", i) ; -println(" y_t[i]: ", y_t[i]) ; // these are all y_t[i]: 0.500000 make sense? +// what are the indices corresponding to i? + + std::size_t jj = 0; + // std::size_t ss = 1; +array m_lens{out_lens[0], out_lens[1], out_lens[3], out_lens[2]}; +array m_strides; +m_strides[3] = 1; + for(auto k: {2, 1, 0}) + { + m_strides[k] = m_strides[k+1] * m_lens[k+1]; + + } +println_once(" m_lens: ", m_lens); +println_once(" m_strides: ", m_strides); + // for(auto k : {3, 2, 1, 0}) + // { + // std::size_t stride2 = m_strides[k]; + // std::size_t len = m_lens[k]; + // std::size_t idxx = (i % (ss * len)) / ss; + // jj += stride2 * idxx; + // ss *= len; + // } + // println(" jj2: ", jj); + +size_t pp = i; +jj = (pp/m_strides[0])*m_strides[0]; +pp = pp % m_strides[1]; +jj += (pp/m_strides[1])*m_strides[1]; +pp %= m_strides[2]; +jj += (pp/m_strides[2])*m_strides[2]; +pp %= m_strides[3]; +jj += pp; + + +// jj = i/m_strides[2] + (i%m_strides[2])*m_lens[2] + (i/m_strides[1])*m_strides[1] + (i/m_strides[0])*m_strides[0]; +// jj = (i % m_strides[1]) + +array zapzap = {float(n), float(c), float(ph), float(pw), y_t[i], float(i), static_cast(jj)}; +// array zapzap = {i, jj}; + +/** + * I want to turn 0->0, + * 1->5, + * 2->10, + * 3->1, + * i.e. (i%3) * 5 + (i/3) but accounting for n and c too. + */ +println(" ddddd y_t[i]: ", zapzap) ; } else { @@ -255,7 +298,6 @@ println(" y_t[i]: ", y_t[i]) ; // these are all y_t[i]: 0.500000 make s s.roi_offset, max_pool{}); -// print(" y_t[i]: ", y_t[i]) ; } } } diff --git a/test/verify/test_roialign.cpp b/test/verify/test_roialign.cpp index c036dbb5e23..e8878c6c8e8 100644 --- a/test/verify/test_roialign.cpp +++ b/test/verify/test_roialign.cpp @@ -34,7 +34,7 @@ struct test_roialign_half_pixel : verify_program { migraphx::program p; auto* mm = p.get_main_module(); - migraphx::shape x_s{DType, {1, 1, 2, 2}}; + migraphx::shape x_s{DType, {1, 7, 2, 2}}; migraphx::shape roi_s{DType, {1, 4}}; From dbd28a4395479a6639d47d37aed4dd5a08c41f04 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 14 Oct 2024 16:13:39 +0000 Subject: [PATCH 47/56] work in progress with GPU output indexes --- src/include/migraphx/op/roialign.hpp | 32 ++++++------ .../include/migraphx/kernels/roialign.hpp | 51 ++++++++++--------- test/verify/test_roialign.cpp | 2 +- 3 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index 3e6e31d9524..a9885cdec8b 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -126,8 +126,8 @@ struct roialign // order. The i[x] value is least significant and iterates the fastest. std::array p = {idx_v[1], idx_v[0]}; std::array i = {idx_v[3], idx_v[2]}; // these are always equal -printf(" EEEEE p, i-index %lu %lu %lu %lu ( %lu %lu %lu %lu)\n", p[0], p[1], i[0], i[1], -idx_v[0], idx_v[1], idx_v[2], idx_v[3]); +// printf(" EEEEE p, i-index %lu %lu %lu %lu ( %lu %lu %lu %lu)\n", p[0], p[1], i[0], i[1], +// idx_v[0], idx_v[1], idx_v[2], idx_v[3]); @@ -139,15 +139,15 @@ idx_v[0], idx_v[1], idx_v[2], idx_v[3]); // inside) from which we will interpolate. std::array low{}; std::array high{}; -float asdf=-1.f; +// float asdf=-1.f; for(auto ii : range(p.size())) { xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; // initial calculated values, before adjustments -if(ii == 0 ) asdf = xy[0]; -if(ii == 1) -printf(" IIIII index %lu xy: (%f, %f)\n", index, asdf, xy[1]); +// if(ii == 0 ) asdf = xy[0]; +// if(ii == 1) +// printf(" IIIII index %lu xy: (%f, %f)\n", index, asdf, xy[1]); if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { results[index] = pos_weight{}; @@ -174,12 +174,12 @@ printf(" IIIII index %lu xy: (%f, %f)\n", index, asdf, xy[1]); float hx = 1.0f - lx; // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; - printf(" AAAAA index %lu results.w: %f, %f, %f, %f\n", index, - results[index].w[0], - results[index].w[1], - results[index].w[2], - results[index].w[3] - ); + // printf(" AAAAA index %lu results.w: %f, %f, %f, %f\n", index, + // results[index].w[0], + // results[index].w[1], + // results[index].w[2], + // results[index].w[3] + // ); }); return results; } @@ -213,7 +213,7 @@ printf(" IIIII index %lu xy: (%f, %f)\n", index, asdf, xy[1]); double output_val = op.init(); const int64_t count = bin_grid_size[0] * bin_grid_size[1]; dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { -printf(" IIIIIKKKKK iy, ix, index = %lu %lu %ld\n", iy, ix, index ); +// printf(" IIIIIKKKKK iy, ix, index = %lu %lu %ld\n", iy, ix, index ); const auto& pc = pos_weights[index]; std::array wv; std::transform( @@ -292,14 +292,14 @@ printf(" IIIIIKKKKK iy, ix, index = %lu %lu %ld\n", iy, ix, index ); auto pw = idx[2]; // n anc c are 0 because that's the size of the test case -printf(" IIIII n, c, ph, pw = %lu %lu %lu %lu\n", n, c, ph, pw); +// printf(" IIIII n, c, ph, pw = %lu %lu %lu %lu\n", n, c, ph, pw); const auto offset_bottom_data = bottom_data + static_cast((roi_batch_ind * channels + c) * in_dims[0] * in_dims[1]); double output_val; -printf(" IIIIIc vec_index[c] = %ld\n", vec_index[c]); +// printf(" IIIIIc vec_index[c] = %ld\n", vec_index[c]); std::tie(output_val, vec_index[c]) = (mode == migraphx::op::pooling_mode::average) ? this->calc_pooling(offset_bottom_data, @@ -312,7 +312,7 @@ printf(" IIIIIc vec_index[c] = %ld\n", vec_index[c]); pre_calc, vec_index[c], max_pool{}); -printf(" DDDDD %f\n", output_val); +printf(" DDDDD index %lu %f\n", output_shape.index({n, c, ph, pw}), output_val); output(n, c, ph, pw) = output_val; }); }); diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp index 80d2bd7bffe..c275eb3e11c 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -219,9 +219,9 @@ println_once(" aaaaa stride: ", stride); ? s.sampling_ratio : migraphx::ceil(roi_size[ii] / out_dims[ii]); } -array zap = {n, c, ph, pw}; +// array zap = {n, c, ph, pw}; -println(" kkkkk n, c, ph, pw: ", zap); +// println(" kkkkk n, c, ph, pw: ", zap); const auto offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); // array reindex = {size_t(n), size_t(c), size_t(pw), size_t(ph)};//;; rearrange the gpu indices to what the ref indices would be @@ -230,14 +230,6 @@ println(" kkkkk n, c, ph, pw: ", zap); if constexpr(s.is_avg_pooling) { - y_t[i] = calc_pooling(offset_x, - roi_starts, - bin_size, - {ph, pw}, - bin_grid_size, - in_dims, - s.roi_offset, - avg_pool{}); // what are the indices corresponding to i? std::size_t jj = 0; @@ -264,28 +256,37 @@ println_once(" m_strides: ", m_strides); size_t pp = i; jj = (pp/m_strides[0])*m_strides[0]; -pp = pp % m_strides[1]; +pp = pp % m_strides[0]; jj += (pp/m_strides[1])*m_strides[1]; -pp %= m_strides[2]; -jj += (pp/m_strides[2])*m_strides[2]; -pp %= m_strides[3]; +pp %= m_strides[1]; +println(" i, pp: ", 10000*i + pp); + +println(" pp/m_strides[2], pp % m_strides[2]",1000000*(pp/m_lens[2] + (pp % m_lens[2])*m_strides[2]) + + 10000*(pp/m_lens[2]) + (pp%m_lens[2]) + 100000000); +pp = pp/m_lens[2] + (pp % m_lens[2])*m_strides[2]; +println(" jj, pp: ", jj * 10000 + pp); // <===== may still be relevant +// jj += (pp/m_strides[2])*m_strides[2]; +// pp %= m_strides[2]; jj += pp; -// jj = i/m_strides[2] + (i%m_strides[2])*m_lens[2] + (i/m_strides[1])*m_strides[1] + (i/m_strides[0])*m_strides[0]; +// jj = i/m_strides[2] + (i%m_strides[2])*m_lens[2] // jj = (i % m_strides[1]) -array zapzap = {float(n), float(c), float(ph), float(pw), y_t[i], float(i), static_cast(jj)}; -// array zapzap = {i, jj}; -/** - * I want to turn 0->0, - * 1->5, - * 2->10, - * 3->1, - * i.e. (i%3) * 5 + (i/3) but accounting for n and c too. - */ -println(" ddddd y_t[i]: ", zapzap) ; +y_t[jj] = calc_pooling(offset_x, + // y_t[i] = calc_pooling(offset_x, + roi_starts, + bin_size, + {ph, pw}, + bin_grid_size, + in_dims, + s.roi_offset, + avg_pool{}); +array zapzap = {float(n), float(c), float(ph), float(pw), float(i), static_cast(jj), y_t[jj]}; + + +println(" ddddd y_t[jj]: ", zapzap) ; } else { diff --git a/test/verify/test_roialign.cpp b/test/verify/test_roialign.cpp index e8878c6c8e8..8833618a079 100644 --- a/test/verify/test_roialign.cpp +++ b/test/verify/test_roialign.cpp @@ -34,7 +34,7 @@ struct test_roialign_half_pixel : verify_program { migraphx::program p; auto* mm = p.get_main_module(); - migraphx::shape x_s{DType, {1, 7, 2, 2}}; + migraphx::shape x_s{DType, {1, 5, 2, 2}}; migraphx::shape roi_s{DType, {1, 4}}; From 7c9175783fdb329e5c488d14829cad341a74db6d Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 14 Oct 2024 16:58:50 +0000 Subject: [PATCH 48/56] fixed GPU kernel and cleaned up debug code. Passes all test_verify test_roialign* cases. still contains output index workaround --- ort_roialign.py | 15 +- src/include/migraphx/op/roialign.hpp | 32 +---- .../include/migraphx/kernels/roialign.hpp | 129 +++++------------- test/onnx/gen_onnx.py | 6 +- test/verify/test_roialign.cpp | 16 +-- test/verify/test_roialign_nondefault.cpp | 1 - 6 files changed, 55 insertions(+), 144 deletions(-) diff --git a/ort_roialign.py b/ort_roialign.py index eb60fb05352..817e5fe2e84 100644 --- a/ort_roialign.py +++ b/ort_roialign.py @@ -11,18 +11,11 @@ y = np.ones([10, 5, 4, 7], dtype='f') -rois = np.array( - [ - [0.1, 0.15, 0.6, 0.35], - [2.1, 1.73, 3.8, 2.13] - ], - dtype='f') +rois = np.array([[0.1, 0.15, 0.6, 0.35], [2.1, 1.73, 3.8, 2.13]], dtype='f') themodel = 'roialign_test.onnx' -sess = rt.InferenceSession( - '/workspace/AMDMIGraphX/test/onnx/' + themodel) +sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/' + themodel) res = sess.run(['y'], {'x': x, 'rois': rois, 'batch_ind': [1, 0]}) -print(' ORT test model is ' + themodel + ', rois_data is \n', - rois, ' result is \n', res) - \ No newline at end of file +print(' ORT test model is ' + themodel + ', rois_data is \n', rois, + ' result is \n', res) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index a9885cdec8b..ded80e5080f 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -126,12 +126,6 @@ struct roialign // order. The i[x] value is least significant and iterates the fastest. std::array p = {idx_v[1], idx_v[0]}; std::array i = {idx_v[3], idx_v[2]}; // these are always equal -// printf(" EEEEE p, i-index %lu %lu %lu %lu ( %lu %lu %lu %lu)\n", p[0], p[1], i[0], i[1], -// idx_v[0], idx_v[1], idx_v[2], idx_v[3]); - - - - // xy is scaled coordinates of start point of ROI std::array xy{}; @@ -139,15 +133,11 @@ struct roialign // inside) from which we will interpolate. std::array low{}; std::array high{}; -// float asdf=-1.f; for(auto ii : range(p.size())) { xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] + (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii]; -// initial calculated values, before adjustments -// if(ii == 0 ) asdf = xy[0]; -// if(ii == 1) -// printf(" IIIII index %lu xy: (%f, %f)\n", index, asdf, xy[1]); + if(xy[ii] < -1.0 or xy[ii] > dims[ii]) { results[index] = pos_weight{}; @@ -162,7 +152,6 @@ struct roialign xy[ii] = high[ii] = low[ii] = dims[ii] - 1; } } -// printf(" FFFFF index %lu xy: (%f, %f)\n", index, xy[0], xy[1]); results[index].pos = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], @@ -174,12 +163,6 @@ struct roialign float hx = 1.0f - lx; // save weights and indices results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx}; - // printf(" AAAAA index %lu results.w: %f, %f, %f, %f\n", index, - // results[index].w[0], - // results[index].w[1], - // results[index].w[2], - // results[index].w[3] - // ); }); return results; } @@ -207,15 +190,14 @@ struct roialign std::tuple calc_pooling(const T& data, const std::array& bin_grid_size, const std::vector& pos_weights, - int64_t index, // index to c + int64_t index, // index to c Op op) const { double output_val = op.init(); const int64_t count = bin_grid_size[0] * bin_grid_size[1]; - dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { -// printf(" IIIIIKKKKK iy, ix, index = %lu %lu %ld\n", iy, ix, index ); + dfor(bin_grid_size[0], bin_grid_size[1])([&](auto, auto) { const auto& pc = pos_weights[index]; - std::array wv; + std::array wv; std::transform( pc.w.begin(), pc.w.end(), pc.pos.begin(), wv.begin(), [&](auto w, auto pos) { return *(data + pos) * w; @@ -257,7 +239,6 @@ struct roialign static_cast(roi[roi_s.index({n, 2})] * spatial_scale - offset), static_cast(roi[roi_s.index({n, 3})] * spatial_scale - offset)}; - // Force malformed ROIs to be 1x1, output_half_pixel transform mode std::array roi_size{}; std::array bin_size{}; @@ -291,15 +272,11 @@ struct roialign auto ph = idx[1]; auto pw = idx[2]; -// n anc c are 0 because that's the size of the test case -// printf(" IIIII n, c, ph, pw = %lu %lu %lu %lu\n", n, c, ph, pw); - const auto offset_bottom_data = bottom_data + static_cast((roi_batch_ind * channels + c) * in_dims[0] * in_dims[1]); double output_val; -// printf(" IIIIIc vec_index[c] = %ld\n", vec_index[c]); std::tie(output_val, vec_index[c]) = (mode == migraphx::op::pooling_mode::average) ? this->calc_pooling(offset_bottom_data, @@ -312,7 +289,6 @@ struct roialign pre_calc, vec_index[c], max_pool{}); -printf(" DDDDD index %lu %f\n", output_shape.index({n, c, ph, pw}), output_val); output(n, c, ph, pw) = output_val; }); }); diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp index c275eb3e11c..22721aca2d6 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -89,18 +89,18 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( xy[ii] = high[ii] = low[ii] = dims[ii] - 1; } } - array locs = {low[1] * dims[0] + low[0], // new + array locs = {low[1] * dims[0] + low[0], // new low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], high[1] * dims[0] + high[0]}; - float lx = xy[0] - low[0]; // new + float lx = xy[0] - low[0]; // new float ly = xy[1] - low[1]; -\ + float hy = 1.0f - ly; float hx = 1.0f - lx; // do calculations in floating point and convert final result to required type - array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; //old + array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; // old auto v01 = pooling(data[locs[1]] * ws[1], data[locs[0]] * ws[0]); auto v23 = pooling(data[locs[3]] * ws[3], data[locs[2]] * ws[2]); @@ -115,7 +115,6 @@ MIGRAPHX_DEVICE_CONSTEXPR auto calc_pooling(const Iterator& data, const array& idx, const array& bin_grid_size, const array& dims, - float roi_offset, Op op) { // for one idx (output height and width coordinates) we iterate through all bin_grid values @@ -124,18 +123,9 @@ MIGRAPHX_DEVICE_CONSTEXPR auto calc_pooling(const Iterator& data, const int64_t count = bin_grid_size[0] * bin_grid_size[1]; dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { array id = {iy, ix}; -// println_once(" jjjjj id: ", id); -(void) roi_offset; -// println_once(" jjjjj roi_starts: ", roi_starts); -// println(" eeeee idx: ", idx); - - array locs = - roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size; // new -// idx same as ph, pw - array asdf_idx = {float(iy), float(ix), float(idx[0]), float(idx[1]),locs[0], locs[1]}; -// put idx, ix, iy, and locs into a single array to debug together - -// println(" iiiii asdf_idx/locs: ", asdf_idx); + array locs = roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size; + array asdf_idx = { + float(iy), float(ix), float(idx[0]), float(idx[1]), locs[0], locs[1]}; auto val = bilinear_interpolate(data, dims, locs, op); output_val = op(output_val, val); }); @@ -179,7 +169,17 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, // is for height and second dim is for width const auto& out_lens = out_s.lens; array out_dims = {out_lens[2], out_lens[3]}; -println_once(" aaaaa stride: ", stride); + + // Compute lens and strides vectors for use in reindexing output. + // Todo: look for a less indirect way to reconcile the ordering of iteration + // between this op. and the reference. + array m_lens{out_lens[0], out_lens[1], out_lens[3], out_lens[2]}; + array m_strides; + m_strides[3] = 1; + for(auto k : {2, 1, 0}) + { + m_strides[k] = m_strides[k + 1] * m_lens[k + 1]; + } for(index_int i = index.global; i < out_s.elements(); i += stride) { @@ -192,7 +192,8 @@ println_once(" aaaaa stride: ", stride); const auto offset_rois = rois + (n * roi_column_num); const int batch_ind = ind[n]; - // Note that roi_offset in src/targets/gpu/jit/roialign.cpp uses a negative value, so we add it here + // Note that roi_offset in src/targets/gpu/jit/roialign.cpp uses a negative value, so we add + // rather than subtract it here array roi_starts = { static_cast(offset_rois[0]) * static_cast(s.spatial_scale) + s.roi_offset, static_cast(offset_rois[1]) * static_cast(s.spatial_scale) + @@ -207,7 +208,6 @@ println_once(" aaaaa stride: ", stride); array bin_size{}; array bin_grid_size{}; - for(index_int ii = 0; ii < roi_size.size(); ++ii) { roi_size[ii] = roi_ends[ii] - roi_starts[ii]; @@ -219,86 +219,29 @@ println_once(" aaaaa stride: ", stride); ? s.sampling_ratio : migraphx::ceil(roi_size[ii] / out_dims[ii]); } -// array zap = {n, c, ph, pw}; - -// println(" kkkkk n, c, ph, pw: ", zap); - const auto offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); -// array reindex = {size_t(n), size_t(c), size_t(pw), size_t(ph)};//;; rearrange the gpu indices to what the ref indices would be -// migraphx::shape reindex_shape(reindex); -// and insert that location in y_t + + // + // Reindexing. Calculations to this point did not iterate in the same order as + // in the reference op; we now calculate the output index corresponding to i + // + size_t pp = i; + size_t jj = (pp / m_strides[0]) * m_strides[0]; + pp = pp % m_strides[0]; + jj += (pp / m_strides[1]) * m_strides[1]; + pp %= m_strides[1]; + pp = pp / m_lens[2] + (pp % m_lens[2]) * m_strides[2]; + jj += pp; if constexpr(s.is_avg_pooling) { -// what are the indices corresponding to i? - - std::size_t jj = 0; - // std::size_t ss = 1; -array m_lens{out_lens[0], out_lens[1], out_lens[3], out_lens[2]}; -array m_strides; -m_strides[3] = 1; - for(auto k: {2, 1, 0}) - { - m_strides[k] = m_strides[k+1] * m_lens[k+1]; - - } -println_once(" m_lens: ", m_lens); -println_once(" m_strides: ", m_strides); - // for(auto k : {3, 2, 1, 0}) - // { - // std::size_t stride2 = m_strides[k]; - // std::size_t len = m_lens[k]; - // std::size_t idxx = (i % (ss * len)) / ss; - // jj += stride2 * idxx; - // ss *= len; - // } - // println(" jj2: ", jj); - -size_t pp = i; -jj = (pp/m_strides[0])*m_strides[0]; -pp = pp % m_strides[0]; -jj += (pp/m_strides[1])*m_strides[1]; -pp %= m_strides[1]; -println(" i, pp: ", 10000*i + pp); - -println(" pp/m_strides[2], pp % m_strides[2]",1000000*(pp/m_lens[2] + (pp % m_lens[2])*m_strides[2]) - + 10000*(pp/m_lens[2]) + (pp%m_lens[2]) + 100000000); -pp = pp/m_lens[2] + (pp % m_lens[2])*m_strides[2]; -println(" jj, pp: ", jj * 10000 + pp); // <===== may still be relevant -// jj += (pp/m_strides[2])*m_strides[2]; -// pp %= m_strides[2]; -jj += pp; - - -// jj = i/m_strides[2] + (i%m_strides[2])*m_lens[2] -// jj = (i % m_strides[1]) - - -y_t[jj] = calc_pooling(offset_x, - // y_t[i] = calc_pooling(offset_x, - roi_starts, - bin_size, - {ph, pw}, - bin_grid_size, - in_dims, - s.roi_offset, - avg_pool{}); -array zapzap = {float(n), float(c), float(ph), float(pw), float(i), static_cast(jj), y_t[jj]}; - - -println(" ddddd y_t[jj]: ", zapzap) ; + y_t[jj] = calc_pooling( + offset_x, roi_starts, bin_size, {ph, pw}, bin_grid_size, in_dims, avg_pool{}); } else { - y_t[i] = calc_pooling(offset_x, - roi_starts, - bin_size, - {ph, pw}, - bin_grid_size, - in_dims, - s.roi_offset, - max_pool{}); - + y_t[jj] = calc_pooling( + offset_x, roi_starts, bin_size, {ph, pw}, bin_grid_size, in_dims, max_pool{}); } } } diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index bc5639d0424..ca8f549f89d 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -35,7 +35,8 @@ def onnx_test(external_data=False, opset_imports=None): def create_onnx_test(op_test): def run_test(): op_info = op_test() - opset_id = [helper.make_operatorsetid('', opset_imports)] if opset_imports is not None else None + opset_id = [helper.make_operatorsetid('', opset_imports) + ] if opset_imports is not None else None if len(op_info) > 3: graph_def = helper.make_graph(op_info[0], @@ -48,8 +49,7 @@ def run_test(): op_info[1], op_info[2]) model_def = helper.make_model(graph_def, producer_name=op_test.__name__, - opset_imports=opset_id - ) + opset_imports=opset_id) onnx.save_model(model_def, '{}.onnx'.format(op_test.__name__), save_as_external_data=external_data, diff --git a/test/verify/test_roialign.cpp b/test/verify/test_roialign.cpp index 8833618a079..e957920af1a 100644 --- a/test/verify/test_roialign.cpp +++ b/test/verify/test_roialign.cpp @@ -34,12 +34,12 @@ struct test_roialign_half_pixel : verify_program { migraphx::program p; auto* mm = p.get_main_module(); - migraphx::shape x_s{DType, {1, 5, 2, 2}}; + migraphx::shape x_s{DType, {2, 7, 2, 2}}; - migraphx::shape roi_s{DType, {1, 4}}; + migraphx::shape roi_s{DType, {2, 4}}; - migraphx::shape ind_s{migraphx::shape::int64_type, {1}}; - std::vector ind_vec = {0}; + migraphx::shape ind_s{migraphx::shape::int64_type, {2}}; + std::vector ind_vec = {1, 0}; auto x = mm->add_parameter("x", x_s); auto roi = mm->add_parameter("roi", roi_s); @@ -95,7 +95,7 @@ struct test_roialign : verify_program> template struct test_roialign_half_pixel; template struct test_roialign; -// template struct test_roialign; commented out for debug -// template struct test_roialign; -// template struct test_roialign; -// template struct test_roialign; +template struct test_roialign; +template struct test_roialign; +template struct test_roialign; +template struct test_roialign; diff --git a/test/verify/test_roialign_nondefault.cpp b/test/verify/test_roialign_nondefault.cpp index d4785014512..ac9be3b7281 100644 --- a/test/verify/test_roialign_nondefault.cpp +++ b/test/verify/test_roialign_nondefault.cpp @@ -40,7 +40,6 @@ struct test_roialign_nondefault : verify_program migraphx::shape ind_s{migraphx::shape::int64_type, {5}}; std::vector ind_vec = {0, 2, 3, 4, 1}; - auto x = mm->add_parameter("x", x_s); auto roi = mm->add_parameter("roi", roi_s); auto ind = mm->add_literal(migraphx::literal(ind_s, ind_vec)); From 2da60d1b6ae1b3dd75c24aa5d11112e1033a98ee Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Mon, 14 Oct 2024 17:12:06 +0000 Subject: [PATCH 49/56] removed a debug file --- ort_roialign.py | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 ort_roialign.py diff --git a/ort_roialign.py b/ort_roialign.py deleted file mode 100644 index 817e5fe2e84..00000000000 --- a/ort_roialign.py +++ /dev/null @@ -1,21 +0,0 @@ -# Not for release. This test script is for develop/test only - -import onnx -import onnxruntime as rt -# from https://onnxruntime.ai/docs/get-started/with-python.html -import numpy as np -print(" version: ", onnx.__version__, rt.__version__) - -x = np.array(np.arange(10 * 5 * 4 * 7), dtype='f') -x = np.reshape(x, [10, 5, 4, 7]) - -y = np.ones([10, 5, 4, 7], dtype='f') - -rois = np.array([[0.1, 0.15, 0.6, 0.35], [2.1, 1.73, 3.8, 2.13]], dtype='f') - -themodel = 'roialign_test.onnx' -sess = rt.InferenceSession('/workspace/AMDMIGraphX/test/onnx/' + themodel) -res = sess.run(['y'], {'x': x, 'rois': rois, 'batch_ind': [1, 0]}) - -print(' ORT test model is ' + themodel + ', rois_data is \n', rois, - ' result is \n', res) From 6f9475889510a0dea49b17c9bee4cda58139aaa5 Mon Sep 17 00:00:00 2001 From: Brian Pickrell <95253842+bpickrel@users.noreply.github.com> Date: Mon, 14 Oct 2024 10:14:51 -0700 Subject: [PATCH 50/56] comment Co-authored-by: spolifroni-amd --- test/onnx/parse/roialign_default_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/onnx/parse/roialign_default_test.cpp b/test/onnx/parse/roialign_default_test.cpp index 410d7ed62d4..b5a940e7927 100644 --- a/test/onnx/parse/roialign_default_test.cpp +++ b/test/onnx/parse/roialign_default_test.cpp @@ -37,7 +37,7 @@ TEST_CASE(roialign_default_test) auto bi = mm->add_parameter("batch_ind", sbi); // Depending on whether the model was built for Onnx opset 16 or earlier, the default - // coordinate_transformation_mode is different. These model files had explicit opset given + // coordinate_transformation_mode will be different. These model files had explicit opset given // when they were created. auto r = mm->add_instruction( migraphx::make_op("roialign", {{"coordinate_transformation_mode", "half_pixel"}}), From c1000cfa31611a9d16566b5af19d625426df3338 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Tue, 15 Oct 2024 15:54:32 +0000 Subject: [PATCH 51/56] misc. cleanup; fixed one Jenkins-only fail; added one more shape check --- src/include/migraphx/op/roialign.hpp | 7 +++++-- .../include/migraphx/kernels/roialign.hpp | 18 ++++++++---------- test/op_shape_test.cpp | 5 +++++ test/verify/test_roialign.cpp | 2 +- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/include/migraphx/op/roialign.hpp b/src/include/migraphx/op/roialign.hpp index ded80e5080f..76b7c8b967e 100644 --- a/src/include/migraphx/op/roialign.hpp +++ b/src/include/migraphx/op/roialign.hpp @@ -81,6 +81,8 @@ struct roialign if(!shape::is_integral(inputs.at(2).type())) MIGRAPHX_THROW( "ROIALIGN: incorrect datatype for roi indices! (should be an integral type)"); + if(x_lens.size() != 4) + MIGRAPHX_THROW("ROIALIGN: data input must have 4 dimensions n, c, h, w"); if(bi_lens.size() != 1) { MIGRAPHX_THROW("ROIALIGN: batch indices should be 1 dimension!"); @@ -230,7 +232,8 @@ struct roialign par_for(n_rois, [&](auto n) { const auto bottom_data = x.begin(); const auto roi_batch_ind = batch_indices[n]; - // Do not use rounding; this implementation detail is critical + // Do not use rounding here even if data is a quantized type; this + // implementation detail is critical const float offset = (coord_trans_mode == "half_pixel") ? 0.5 : 0.0; std::array roi_starts = { static_cast(roi[roi_s.index({n, 0})] * spatial_scale - offset), @@ -239,7 +242,7 @@ struct roialign static_cast(roi[roi_s.index({n, 2})] * spatial_scale - offset), static_cast(roi[roi_s.index({n, 3})] * spatial_scale - offset)}; - // Force malformed ROIs to be 1x1, output_half_pixel transform mode + // Force malformed ROIs to be 1x1, if in output_half_pixel transform mode std::array roi_size{}; std::array bin_size{}; std::array bin_grid_size{}; diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp index 22721aca2d6..769c7c978bf 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -24,8 +24,8 @@ #ifndef MIGRAPHX_GUARD_KERNELS_ROIALIGN_HPP #define MIGRAPHX_GUARD_KERNELS_ROIALIGN_HPP -#include -#include +// #include +// #include #include #include #include @@ -89,21 +89,21 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( xy[ii] = high[ii] = low[ii] = dims[ii] - 1; } } - array locs = {low[1] * dims[0] + low[0], // new + array locs = {low[1] * dims[0] + low[0], low[1] * dims[0] + high[0], high[1] * dims[0] + low[0], high[1] * dims[0] + high[0]}; - float lx = xy[0] - low[0]; // new + float lx = xy[0] - low[0]; float ly = xy[1] - low[1]; float hy = 1.0f - ly; float hx = 1.0f - lx; // do calculations in floating point and convert final result to required type - array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; // old + array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; - auto v01 = pooling(data[locs[1]] * ws[1], data[locs[0]] * ws[0]); - auto v23 = pooling(data[locs[3]] * ws[3], data[locs[2]] * ws[2]); + auto v01 = pooling(data[locs[0]] * ws[0], data[locs[1]] * ws[1]); + auto v23 = pooling(data[locs[2]] * ws[2], data[locs[3]] * ws[3]); return implicit_conversion(pooling(v01, v23)); } @@ -124,8 +124,6 @@ MIGRAPHX_DEVICE_CONSTEXPR auto calc_pooling(const Iterator& data, dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { array id = {iy, ix}; array locs = roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size; - array asdf_idx = { - float(iy), float(ix), float(idx[0]), float(idx[1]), locs[0], locs[1]}; auto val = bilinear_interpolate(data, dims, locs, op); output_val = op(output_val, val); }); @@ -176,7 +174,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, array m_lens{out_lens[0], out_lens[1], out_lens[3], out_lens[2]}; array m_strides; m_strides[3] = 1; - for(auto k : {2, 1, 0}) + for(int k = 2; k >= 0; k--) { m_strides[k] = m_strides[k + 1] * m_lens[k + 1]; } diff --git a/test/op_shape_test.cpp b/test/op_shape_test.cpp index 24b9afb1377..819d05f9556 100644 --- a/test/op_shape_test.cpp +++ b/test/op_shape_test.cpp @@ -5155,6 +5155,11 @@ TEST_CASE(roialign_test) expect_shape(sout, migraphx::make_op("roialign"), sx, srois, sbi); + // data input must be 4 dimensions + migraphx::shape sx2{migraphx::shape::float_type, {2, 3, 4, 5, 6}}; + throws_shape(migraphx::make_op("roialign"), sx2, srois, sbi); + + // batch index must be 1 dimension migraphx::shape sbi1{migraphx::shape::int64_type, {2, 3}}; throws_shape(migraphx::make_op("roialign"), sx, srois, sbi1); diff --git a/test/verify/test_roialign.cpp b/test/verify/test_roialign.cpp index e957920af1a..88864631e87 100644 --- a/test/verify/test_roialign.cpp +++ b/test/verify/test_roialign.cpp @@ -34,7 +34,7 @@ struct test_roialign_half_pixel : verify_program { migraphx::program p; auto* mm = p.get_main_module(); - migraphx::shape x_s{DType, {2, 7, 2, 2}}; + migraphx::shape x_s{DType, {5, 7, 2, 2}}; migraphx::shape roi_s{DType, {2, 4}}; From 379dcef6bc694b89c1bce92dd1ba54f03e886d08 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Tue, 22 Oct 2024 17:14:17 +0000 Subject: [PATCH 52/56] revert debugging changes --- test/verify/test_roialign_nondefault.cpp | 1 + tools/build_and_test_onnxrt.sh | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/test/verify/test_roialign_nondefault.cpp b/test/verify/test_roialign_nondefault.cpp index ac9be3b7281..d4785014512 100644 --- a/test/verify/test_roialign_nondefault.cpp +++ b/test/verify/test_roialign_nondefault.cpp @@ -40,6 +40,7 @@ struct test_roialign_nondefault : verify_program migraphx::shape ind_s{migraphx::shape::int64_type, {5}}; std::vector ind_vec = {0, 2, 3, 4, 1}; + auto x = mm->add_parameter("x", x_s); auto roi = mm->add_parameter("roi", roi_s); auto ind = mm->add_literal(migraphx::literal(ind_s, ind_vec)); diff --git a/tools/build_and_test_onnxrt.sh b/tools/build_and_test_onnxrt.sh index a3a8fdfbf61..19147c84ddb 100755 --- a/tools/build_and_test_onnxrt.sh +++ b/tools/build_and_test_onnxrt.sh @@ -36,8 +36,8 @@ export CXXFLAGS="-D__HIP_PLATFORM_AMD__=1 -w" cd build/Linux/Release #Add test launcher for onnxrt tests -# echo 'InferenceSessionTests.CheckRunProfilerWithSessionOptions' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt -# echo 'InferenceSessionTests.CheckRunProfilerWithSessionOptions2' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt -# echo 'InferenceSessionTests.Test3LayerNestedSubgraph' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt -# echo 'InferenceSessionTests.Test2LayerNestedSubgraph' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt -# ../../../tools/ci_build/github/pai/pai_test_launcher.sh || (gdb ./onnxruntime_test_all core -batch -ex bt && exit 1) +echo 'InferenceSessionTests.CheckRunProfilerWithSessionOptions' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt +echo 'InferenceSessionTests.CheckRunProfilerWithSessionOptions2' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt +echo 'InferenceSessionTests.Test3LayerNestedSubgraph' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt +echo 'InferenceSessionTests.Test2LayerNestedSubgraph' >> ../../../tools/ci_build/github/pai/migraphx-excluded-tests.txt +../../../tools/ci_build/github/pai/pai_test_launcher.sh || (gdb ./onnxruntime_test_all core -batch -ex bt && exit 1) From 4c4846edc88024b63356da74bdb9d3e7b851c549 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Tue, 22 Oct 2024 17:21:25 +0000 Subject: [PATCH 53/56] clean up debug code --- test/onnx/verify/roialign_half_pixel_verify_test.cpp | 8 -------- test/onnx/verify/roialign_verify_test.cpp | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/test/onnx/verify/roialign_half_pixel_verify_test.cpp b/test/onnx/verify/roialign_half_pixel_verify_test.cpp index ea570792249..62c91ee63b5 100644 --- a/test/onnx/verify/roialign_half_pixel_verify_test.cpp +++ b/test/onnx/verify/roialign_half_pixel_verify_test.cpp @@ -51,14 +51,6 @@ TEST_CASE(roialign_half_pixel_verify_test) std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); - printf(" result: \n"); - for(int i = 0; i < result_vector.size(); i++) - { - printf(" %f ", result_vector[i]); - if(i % 6 == 5) - printf("\n"); - } - printf("\n"); // Gold values were generated with onnxruntime std::vector gold = {5.38, 5.4799995, 5.4799995, 6.58, 6.68, 6.68, 17.38, 17.48, 17.48, 18.58, 18.68, 18.68, diff --git a/test/onnx/verify/roialign_verify_test.cpp b/test/onnx/verify/roialign_verify_test.cpp index 051080adf25..ea9d84e7e8a 100644 --- a/test/onnx/verify/roialign_verify_test.cpp +++ b/test/onnx/verify/roialign_verify_test.cpp @@ -51,7 +51,7 @@ TEST_CASE(roialign_verify_test) std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); - // gold results were generated with onnxruntime + // gold values were generated with onnxruntime std::vector gold = { 143.16667, 143.49998, 143.83333, 144.56667, 144.9, 145.23334, 145.96667, 146.3, 146.63333, 147.36667, 147.70001, 148.03334, 148.76666, 149.09999, 149.43333, From e9fd0fa7ccba3e3a235df23f802fae7820d7ad08 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 23 Oct 2024 18:10:25 +0000 Subject: [PATCH 54/56] work in progress --- docs/dev/onnx_operators.rst | 7 +++++-- test/onnx/parse/roialign_default_test.cpp | 13 +++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/dev/onnx_operators.rst b/docs/dev/onnx_operators.rst index a87af00e755..7d58431ee39 100644 --- a/docs/dev/onnx_operators.rst +++ b/docs/dev/onnx_operators.rst @@ -697,8 +697,11 @@ Operator Support Matrix | | | | functions are | | | | | not enabled | +--------------------------+-----------+-----------------+------------------------------+ -| RoiAlign | ✅ | FP8, FP16, | | -| | | FP32, FP64 | | +| RoiAlign | ✅ | FP8, FP16, | ``X``, | +| | | FP32, FP64, | ``ROI`` take any floating- | +| | | UINT8, UINT16, | point type; | +| | | UINT32, UINT64, | ``batch_indices`` | +| | | | takes any integral type | +--------------------------+-----------+-----------------+------------------------------+ | Round | ✅ | FP8, FP16, | | | | | FP32, FP64 | | diff --git a/test/onnx/parse/roialign_default_test.cpp b/test/onnx/parse/roialign_default_test.cpp index b5a940e7927..f2f4426d485 100644 --- a/test/onnx/parse/roialign_default_test.cpp +++ b/test/onnx/parse/roialign_default_test.cpp @@ -35,7 +35,7 @@ TEST_CASE(roialign_default_test) auto x = mm->add_parameter("x", sx); auto rois = mm->add_parameter("rois", srois); auto bi = mm->add_parameter("batch_ind", sbi); - +asdf // Depending on whether the model was built for Onnx opset 16 or earlier, the default // coordinate_transformation_mode will be different. These model files had explicit opset given // when they were created. @@ -47,6 +47,15 @@ TEST_CASE(roialign_default_test) mm->add_return({r}); auto prog = read_onnx("roialign_default_test.onnx"); EXPECT(p == prog); +} + + +TEST_CASE(roialign_default_12_test) +{ + // opset 12 version + migraphx::shape sx{migraphx::shape::float_type, {10, 4, 7, 8}}; + migraphx::shape srois{migraphx::shape::float_type, {8, 4}}; + migraphx::shape sbi{migraphx::shape::int64_type, {8}}; // Opset 12 program migraphx::program p_12; @@ -62,5 +71,5 @@ TEST_CASE(roialign_default_test) bi_12); mm_12->add_return({r_12}); auto prog_12 = read_onnx("roialign_default_test_12.onnx"); - EXPECT(p_12 == prog_12); + EXPECT(p == prog_12); } From 174a5b7898a4be27d3caab928da00948aa5f87b0 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 23 Oct 2024 21:13:30 +0000 Subject: [PATCH 55/56] split test into 2 cases --- test/onnx/parse/roialign_default_test.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/onnx/parse/roialign_default_test.cpp b/test/onnx/parse/roialign_default_test.cpp index f2f4426d485..9a14778c12c 100644 --- a/test/onnx/parse/roialign_default_test.cpp +++ b/test/onnx/parse/roialign_default_test.cpp @@ -35,7 +35,6 @@ TEST_CASE(roialign_default_test) auto x = mm->add_parameter("x", sx); auto rois = mm->add_parameter("rois", srois); auto bi = mm->add_parameter("batch_ind", sbi); -asdf // Depending on whether the model was built for Onnx opset 16 or earlier, the default // coordinate_transformation_mode will be different. These model files had explicit opset given // when they were created. @@ -71,5 +70,5 @@ TEST_CASE(roialign_default_12_test) bi_12); mm_12->add_return({r_12}); auto prog_12 = read_onnx("roialign_default_test_12.onnx"); - EXPECT(p == prog_12); + EXPECT(p_12 == prog_12); } From 0f25c4f222563743fc0ed97cd331c4dbedd50811 Mon Sep 17 00:00:00 2001 From: Brian Pickrell Date: Wed, 23 Oct 2024 21:49:16 +0000 Subject: [PATCH 56/56] add roialign verify test for max pooling; doesn't pass --- test/onnx/gen_onnx.py | 21 ++++++++++ test/onnx/roialign_half_pixel_max_test.onnx | Bin 0 -> 368 bytes .../roialign_half_pixel_verify_test.cpp | 38 ++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 test/onnx/roialign_half_pixel_max_test.onnx diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index ca8f549f89d..b8e1945e3ea 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -10666,6 +10666,27 @@ def roialign_half_pixel_test(): return ([node], [x, roi, bi], [y]) +@onnx_test() +def roialign_half_pixel_max_test(): + x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 2, 4, 3]) + roi = helper.make_tensor_value_info('rois', TensorProto.FLOAT, [2, 4]) + bi = helper.make_tensor_value_info('batch_ind', TensorProto.INT64, [2]) + y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 2, 4, 3]) + + # half_pixel is the newer mode for ROIAlign + node = onnx.helper.make_node('RoiAlign', + inputs=['x', 'rois', 'batch_ind'], + outputs=['y'], + spatial_scale=2.0, + output_height=2, + output_width=3, + sampling_ratio=2, + mode="max", + coordinate_transformation_mode="half_pixel") + + return ([node], [x, roi, bi], [y]) + + @onnx_test() def round_half_test(): x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [4, 4]) diff --git a/test/onnx/roialign_half_pixel_max_test.onnx b/test/onnx/roialign_half_pixel_max_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fc192568d6f8bcb0b93ac275aae3541be33e1ace GIT binary patch literal 368 zcmaJ-%TB{E5R9EjxT`j*+^5uvLyx&|=LCEL$6PEoabru49oeo(|0X}e2k=)ovKWdjsW$L#WfF;BM+5i%G4BH4&>FU z=kc_7fcwf?S1ZGVV8L+S|-zKbK*%vW5CRgxr61491?EO#*+UBi+f qItNVft+LqDa(#_WbC^b+d~@aI_aguZxufF&K1Q|6UurLSzW4+v<6gu7 literal 0 HcmV?d00001 diff --git a/test/onnx/verify/roialign_half_pixel_verify_test.cpp b/test/onnx/verify/roialign_half_pixel_verify_test.cpp index 62c91ee63b5..2653471af1b 100644 --- a/test/onnx/verify/roialign_half_pixel_verify_test.cpp +++ b/test/onnx/verify/roialign_half_pixel_verify_test.cpp @@ -59,3 +59,41 @@ TEST_CASE(roialign_half_pixel_verify_test) EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); } + +// The half_pixel mode for the ROIAlign op, max pooling +TEST_CASE(roialign_half_pixel_max_verify_test) +{ + migraphx::program p = read_onnx("roialign_half_pixel_max_test.onnx"); + p.compile(migraphx::make_target("ref")); + migraphx::shape s{migraphx::shape::float_type, {2, 2, 4, 3}}; + std::vector data(2 * 2 * 4 * 3); + std::iota(data.begin(), data.end(), 0.f); + migraphx::parameter_map pp; + pp["x"] = migraphx::argument(s, data.data()); + pp["y"] = migraphx::argument(s, data.data()); + + migraphx::shape srois{migraphx::shape::float_type, {2, 4}}; + std::vector rois_data = {1.1, 0.73, 1.7, 1.13, 1.1, 0.73, 2.6, 1.13}; + migraphx::shape sbi{migraphx::shape::int64_type, {2}}; // batch_index + std::vector bi_data = {0, 1}; + + pp["rois"] = migraphx::argument(srois, rois_data.data()); + pp["batch_ind"] = migraphx::argument(sbi, bi_data.data()); + pp["y"] = migraphx::argument(s, data.data()); + + auto result = p.eval(pp).back(); + std::vector result_vector; + result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); + + // Gold values were generated with onnxruntime + std::vector gold = { 4.7 , 4.7 , 4.7 ,5.2799997, 5.2799997, 5.2799997, + + 15.979999 , 15.979999 , 15.979999 , 13.199999 , 13.199999 , 13.199999 , + + + 27.477499 , 27.477499 , 0. ,19.440002 , 19.440002 , 0. , + + 38.8475 , 38.8475 , 0. , 26.730003 , 26.730003 , 0. }; + + EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); +}