-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add OV GQA op and decomposition pass
Fix interleave logic in decomposition Add ONNX frontend tests
- Loading branch information
Showing
15 changed files
with
1,957 additions
and
261 deletions.
There are no files selected for viewing
24 changes: 24 additions & 0 deletions
24
...formations/include/transformations/op_conversions/group_query_attention_decomposition.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// Copyright (C) 2018-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "openvino/op/group_query_attention.hpp" | ||
#include "openvino/pass/matcher_pass.hpp" | ||
#include "transformations_visibility.hpp" | ||
|
||
namespace ov { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API GroupQueryAttentionDecomposition; | ||
|
||
} // namespace pass | ||
} // namespace ov | ||
|
||
class ov::pass::GroupQueryAttentionDecomposition : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_RTTI("GroupQueryAttentionDecomposition", "0"); | ||
GroupQueryAttentionDecomposition(); | ||
ov::OutputVector decompose(std::shared_ptr<ov::op::v15::GroupQueryAttention> node); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
322 changes: 322 additions & 0 deletions
322
...ransformations/src/transformations/op_conversions/group_query_attention_decomposition.cpp
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
// Copyright (C) 2018-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
#pragma once | ||
|
||
#include "openvino/op/op.hpp" | ||
|
||
namespace ov { | ||
namespace op { | ||
namespace v15 { | ||
|
||
// This is an experimental operation that is implemented in the plugins. | ||
class OPENVINO_API GroupQueryAttention : public Op { | ||
public: | ||
OPENVINO_OP("GroupQueryAttention", "opset15", op::Op); | ||
|
||
GroupQueryAttention() = default; | ||
GroupQueryAttention(const ov::OutputVector& args, | ||
unsigned int num_heads, | ||
unsigned int kv_num_heads, | ||
float scale, | ||
bool do_rotary, | ||
bool rotary_interleaved); | ||
void validate_and_infer_types() override; | ||
bool visit_attributes(AttributeVisitor& visitor) override; | ||
std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override; | ||
|
||
unsigned int get_num_heads() const { | ||
return m_num_heads; | ||
} | ||
unsigned int get_kv_num_heads() const { | ||
return m_kv_num_heads; | ||
} | ||
float get_scale() const { | ||
return m_scale; | ||
} | ||
bool get_do_rotary() const { | ||
return m_do_rotary; | ||
} | ||
bool get_rotary_interleaved() const { | ||
return m_rotary_interleaved; | ||
} | ||
|
||
private: | ||
unsigned int m_num_heads; | ||
unsigned int m_kv_num_heads; | ||
float m_scale = 0; | ||
bool m_do_rotary = false; | ||
bool m_rotary_interleaved = false; | ||
}; | ||
|
||
} // namespace v15 | ||
} // namespace op | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright (C) 2018-2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "openvino/op/op.hpp" | ||
|
||
namespace ov { | ||
namespace op { | ||
namespace v15 { | ||
|
||
/// \brief Represents a missing optional input or output of an ONNX node | ||
/// | ||
/// Some ONNX operators have inputs or outputs that are marked as optional, | ||
/// which means that a referring node MAY forgo providing values for such inputs | ||
/// or computing these outputs. | ||
/// An empty string is used in place of a name of such input or output. | ||
/// | ||
/// More: | ||
/// https://github.com/onnx/onnx/blob/master/docs/IR.md#optional-inputs-and-outputs | ||
class OPENVINO_API Null : public Op { | ||
public: | ||
OPENVINO_OP("Null", "opset15", op::Op); | ||
Null() { | ||
set_output_size(1); | ||
} | ||
|
||
static bool is_null(const ov::Node* node) { | ||
return ov::as_type<const ov::op::v15::Null>(node) != nullptr; | ||
} | ||
|
||
static bool is_null(const std::shared_ptr<ov::Node>& node) { | ||
return is_null(node.get()); | ||
} | ||
|
||
static bool is_null(const Output<ov::Node>& output) { | ||
return is_null(output.get_node()); | ||
} | ||
|
||
virtual std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override { | ||
return std::make_shared<ov::op::v15::Null>(); | ||
} | ||
}; | ||
} // namespace v15 | ||
} // namespace op | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
// Copyright (C) 2018-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "openvino/op/group_query_attention.hpp" | ||
|
||
#include "itt.hpp" | ||
#include "openvino/op/null.hpp" | ||
|
||
using namespace std; | ||
namespace ov { | ||
namespace op { | ||
namespace v15 { | ||
|
||
GroupQueryAttention::GroupQueryAttention(const OutputVector& args, | ||
unsigned int num_heads, | ||
unsigned int kv_num_heads, | ||
float scale, | ||
bool do_rotary, | ||
bool rotary_interleaved) | ||
: Op(args), | ||
m_num_heads(num_heads), | ||
m_kv_num_heads(kv_num_heads), | ||
m_scale(scale), | ||
m_do_rotary(do_rotary), | ||
m_rotary_interleaved(rotary_interleaved) { | ||
constructor_validate_and_infer_types(); | ||
} | ||
|
||
int64_t get_head_size(const PartialShape& input_shape, int num_heads, int kv_num_heads) { | ||
return input_shape[2].get_length() / (num_heads + kv_num_heads * 2); | ||
} | ||
|
||
std::vector<int64_t> get_qkv_sizes(const PartialShape& input_shape, int num_heads, int kv_num_heads) { | ||
int64_t per_head_size = get_head_size(input_shape, num_heads, kv_num_heads); | ||
const std::vector<int64_t> qkv_sizes = {num_heads * per_head_size, | ||
kv_num_heads * per_head_size, | ||
kv_num_heads * per_head_size}; | ||
return qkv_sizes; | ||
} | ||
|
||
void GroupQueryAttention::validate_and_infer_types() { | ||
OV_OP_SCOPE(v15_GroupQueryAttention_validate_and_infer_types); | ||
PartialShape input_shape = get_input_partial_shape(0); | ||
Dimension batch_size = input_shape[0]; | ||
Dimension sequence_len = input_shape[1]; | ||
Dimension head_size; | ||
if (Null::is_null(input_value(1)) && Null::is_null(input_value(2))) { | ||
head_size = get_head_size(input_shape, m_num_heads, m_kv_num_heads); | ||
} else { | ||
head_size = input_shape[2].get_length() / m_num_heads; | ||
} | ||
Dimension output_kv_len; | ||
PartialShape kv_past_shape = get_input_partial_shape(3); | ||
// FIXME: https://github.com/openvinotoolkit/openvino/pull/27648 | ||
if (kv_past_shape[2].is_static()) { | ||
output_kv_len = kv_past_shape[2] + sequence_len; | ||
} else { | ||
output_kv_len = ov::Dimension(); | ||
} | ||
auto element_type = get_input_element_type(0); | ||
NODE_VALIDATION_CHECK(this, | ||
element_type == element::f32 || element_type == element::f16, | ||
"GroupQueryAttention only suuports f32 and f16"); | ||
set_output_type(0, element_type, PartialShape{batch_size, sequence_len, head_size * m_num_heads}); | ||
set_output_type(1, element_type, PartialShape{batch_size, m_kv_num_heads, output_kv_len, head_size}); | ||
set_output_type(2, element_type, PartialShape{batch_size, m_kv_num_heads, output_kv_len, head_size}); | ||
} | ||
|
||
bool GroupQueryAttention::visit_attributes(AttributeVisitor& visitor) { | ||
OV_OP_SCOPE(v15_GroupQueryAttention_visit_attributes); | ||
visitor.on_attribute("do_rotary", m_do_rotary); | ||
visitor.on_attribute("kv_num_heads", m_kv_num_heads); | ||
visitor.on_attribute("num_heads", m_num_heads); | ||
visitor.on_attribute("rotary_interleaved", m_rotary_interleaved); | ||
visitor.on_attribute("scale", m_scale); | ||
return true; | ||
} | ||
|
||
std::shared_ptr<ov::Node> GroupQueryAttention::clone_with_new_inputs(const ov::OutputVector& new_args) const { | ||
OV_OP_SCOPE(v15_GroupQueryAttention_clone_with_new_inputs); | ||
return std::make_shared<GroupQueryAttention>(new_args, | ||
m_num_heads, | ||
m_kv_num_heads, | ||
m_scale, | ||
m_do_rotary, | ||
m_rotary_interleaved); | ||
} | ||
|
||
} // namespace v15 | ||
} // namespace op | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.