Skip to content

Commit

Permalink
Add OV GQA op and decomposition pass
Browse files Browse the repository at this point in the history
Fix interleave logic in decomposition

Add ONNX frontend tests
  • Loading branch information
wine99 committed Jan 26, 2025
1 parent d5ed312 commit 911691b
Show file tree
Hide file tree
Showing 15 changed files with 1,957 additions and 261 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/op/group_query_attention.hpp"
#include "openvino/pass/matcher_pass.hpp"
#include "transformations_visibility.hpp"

namespace ov {
namespace pass {

class TRANSFORMATIONS_API GroupQueryAttentionDecomposition;

} // namespace pass
} // namespace ov

class ov::pass::GroupQueryAttentionDecomposition : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("GroupQueryAttentionDecomposition", "0");
GroupQueryAttentionDecomposition();
ov::OutputVector decompose(std::shared_ptr<ov::op::v15::GroupQueryAttention> node);
};
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
#include "transformations/op_conversions/eye_decomposition.hpp"
#include "transformations/op_conversions/gelu7_downgrade.hpp"
#include "transformations/op_conversions/group_normalization_decomposition.hpp"
#include "transformations/op_conversions/group_query_attention_decomposition.hpp"
#include "transformations/op_conversions/hsigmoid_decomposition.hpp"
#include "transformations/op_conversions/hswish_decomposition.hpp"
#include "transformations/op_conversions/log_softmax_decomposition.hpp"
Expand Down Expand Up @@ -156,6 +157,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr<ov::Model
REGISTER_DISABLED_PASS(manager, ConvertInterpolate1ToInterpolate4)

auto decomp = manager.register_pass<GraphRewrite>();
ADD_MATCHER(decomp, GroupQueryAttentionDecomposition)
ADD_MATCHER(decomp, ScaledDotProductAttentionDecomposition)
ADD_MATCHER(decomp, Gelu7Downgrade)
ADD_MATCHER(decomp, BidirectionalSequenceDecomposition)
Expand Down

Large diffs are not rendered by default.

54 changes: 54 additions & 0 deletions src/core/include/openvino/op/group_query_attention.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once

#include "openvino/op/op.hpp"

namespace ov {
namespace op {
namespace v15 {

// This is an experimental operation that is implemented in the plugins.
class OPENVINO_API GroupQueryAttention : public Op {
public:
OPENVINO_OP("GroupQueryAttention", "opset15", op::Op);

GroupQueryAttention() = default;
GroupQueryAttention(const ov::OutputVector& args,
unsigned int num_heads,
unsigned int kv_num_heads,
float scale,
bool do_rotary,
bool rotary_interleaved);
void validate_and_infer_types() override;
bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;

unsigned int get_num_heads() const {
return m_num_heads;
}
unsigned int get_kv_num_heads() const {
return m_kv_num_heads;
}
float get_scale() const {
return m_scale;
}
bool get_do_rotary() const {
return m_do_rotary;
}
bool get_rotary_interleaved() const {
return m_rotary_interleaved;
}

private:
unsigned int m_num_heads;
unsigned int m_kv_num_heads;
float m_scale = 0;
bool m_do_rotary = false;
bool m_rotary_interleaved = false;
};

} // namespace v15
} // namespace op
} // namespace ov
47 changes: 47 additions & 0 deletions src/core/include/openvino/op/null.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/op/op.hpp"

namespace ov {
namespace op {
namespace v15 {

/// \brief Represents a missing optional input or output of an ONNX node
///
/// Some ONNX operators have inputs or outputs that are marked as optional,
/// which means that a referring node MAY forgo providing values for such inputs
/// or computing these outputs.
/// An empty string is used in place of a name of such input or output.
///
/// More:
/// https://github.com/onnx/onnx/blob/master/docs/IR.md#optional-inputs-and-outputs
class OPENVINO_API Null : public Op {
public:
OPENVINO_OP("Null", "opset15", op::Op);
Null() {
set_output_size(1);
}

static bool is_null(const ov::Node* node) {
return ov::as_type<const ov::op::v15::Null>(node) != nullptr;
}

static bool is_null(const std::shared_ptr<ov::Node>& node) {
return is_null(node.get());
}

static bool is_null(const Output<ov::Node>& output) {
return is_null(output.get_node());
}

virtual std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override {
return std::make_shared<ov::op::v15::Null>();
}
};
} // namespace v15
} // namespace op
} // namespace ov
2 changes: 2 additions & 0 deletions src/core/include/openvino/op/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@
#include "openvino/op/roll.hpp"
#include "openvino/op/round.hpp"
#include "openvino/op/scaled_dot_product_attention.hpp"
#include "openvino/op/null.hpp"
#include "openvino/op/group_query_attention.hpp"
#include "openvino/op/scatter_elements_update.hpp"
#include "openvino/op/scatter_nd_update.hpp"
#include "openvino/op/scatter_update.hpp"
Expand Down
2 changes: 2 additions & 0 deletions src/core/include/openvino/opsets/opset15_tbl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,5 @@ _OPENVINO_OP_REG(BitwiseLeftShift, ov::op::v15)
_OPENVINO_OP_REG(BitwiseRightShift, ov::op::v15)
_OPENVINO_OP_REG(SliceScatter, ov::op::v15)
_OPENVINO_OP_REG(SearchSorted, ov::op::v15)
_OPENVINO_OP_REG(GroupQueryAttention, ov::op::v15)
_OPENVINO_OP_REG(Null, ov::op::v15)
92 changes: 92 additions & 0 deletions src/core/src/op/group_query_attention.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "openvino/op/group_query_attention.hpp"

#include "itt.hpp"
#include "openvino/op/null.hpp"

using namespace std;
namespace ov {
namespace op {
namespace v15 {

GroupQueryAttention::GroupQueryAttention(const OutputVector& args,
unsigned int num_heads,
unsigned int kv_num_heads,
float scale,
bool do_rotary,
bool rotary_interleaved)
: Op(args),
m_num_heads(num_heads),
m_kv_num_heads(kv_num_heads),
m_scale(scale),
m_do_rotary(do_rotary),
m_rotary_interleaved(rotary_interleaved) {
constructor_validate_and_infer_types();
}

int64_t get_head_size(const PartialShape& input_shape, int num_heads, int kv_num_heads) {
return input_shape[2].get_length() / (num_heads + kv_num_heads * 2);
}

std::vector<int64_t> get_qkv_sizes(const PartialShape& input_shape, int num_heads, int kv_num_heads) {
int64_t per_head_size = get_head_size(input_shape, num_heads, kv_num_heads);
const std::vector<int64_t> qkv_sizes = {num_heads * per_head_size,
kv_num_heads * per_head_size,
kv_num_heads * per_head_size};
return qkv_sizes;
}

void GroupQueryAttention::validate_and_infer_types() {
OV_OP_SCOPE(v15_GroupQueryAttention_validate_and_infer_types);
PartialShape input_shape = get_input_partial_shape(0);
Dimension batch_size = input_shape[0];
Dimension sequence_len = input_shape[1];
Dimension head_size;
if (Null::is_null(input_value(1)) && Null::is_null(input_value(2))) {
head_size = get_head_size(input_shape, m_num_heads, m_kv_num_heads);
} else {
head_size = input_shape[2].get_length() / m_num_heads;
}
Dimension output_kv_len;
PartialShape kv_past_shape = get_input_partial_shape(3);
// FIXME: https://github.com/openvinotoolkit/openvino/pull/27648
if (kv_past_shape[2].is_static()) {
output_kv_len = kv_past_shape[2] + sequence_len;
} else {
output_kv_len = ov::Dimension();
}
auto element_type = get_input_element_type(0);
NODE_VALIDATION_CHECK(this,
element_type == element::f32 || element_type == element::f16,
"GroupQueryAttention only suuports f32 and f16");
set_output_type(0, element_type, PartialShape{batch_size, sequence_len, head_size * m_num_heads});
set_output_type(1, element_type, PartialShape{batch_size, m_kv_num_heads, output_kv_len, head_size});
set_output_type(2, element_type, PartialShape{batch_size, m_kv_num_heads, output_kv_len, head_size});
}

bool GroupQueryAttention::visit_attributes(AttributeVisitor& visitor) {
OV_OP_SCOPE(v15_GroupQueryAttention_visit_attributes);
visitor.on_attribute("do_rotary", m_do_rotary);
visitor.on_attribute("kv_num_heads", m_kv_num_heads);
visitor.on_attribute("num_heads", m_num_heads);
visitor.on_attribute("rotary_interleaved", m_rotary_interleaved);
visitor.on_attribute("scale", m_scale);
return true;
}

std::shared_ptr<ov::Node> GroupQueryAttention::clone_with_new_inputs(const ov::OutputVector& new_args) const {
OV_OP_SCOPE(v15_GroupQueryAttention_clone_with_new_inputs);
return std::make_shared<GroupQueryAttention>(new_args,
m_num_heads,
m_kv_num_heads,
m_scale,
m_do_rotary,
m_rotary_interleaved);
}

} // namespace v15
} // namespace op
} // namespace ov
2 changes: 1 addition & 1 deletion src/core/tests/opset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ INSTANTIATE_TEST_SUITE_P(opset,
OpsetTestParams{ov::get_opset12, 178},
OpsetTestParams{ov::get_opset13, 186},
OpsetTestParams{ov::get_opset14, 188},
OpsetTestParams{ov::get_opset15, 199},
OpsetTestParams{ov::get_opset15, 201},
OpsetTestParams{ov::get_opset16, 4}),
OpsetTestNameGenerator{});

Expand Down
Loading

0 comments on commit 911691b

Please sign in to comment.