Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ggml : move rope type enum to ggml.h #8949

Merged
merged 9 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,8 @@
#define GGML_EXIT_SUCCESS 0
#define GGML_EXIT_ABORTED 1

#define GGML_ROPE_TYPE_NEOX 2

#define GGUF_MAGIC "GGUF"

#define GGUF_VERSION 3
Expand Down Expand Up @@ -1453,8 +1455,8 @@ extern "C" {
struct ggml_tensor * b);

// rotary position embedding
// if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
// if mode & 2 == 1, GPT-NeoX style
danbev marked this conversation as resolved.
Show resolved Hide resolved
// if (mode & 1) - skip n_past elements (NOT SUPPORTED)
// if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
//
// b is an int32 vector with size a->ne[2], it contains the positions
GGML_API struct ggml_tensor * ggml_rope(
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cann/aclnn_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2881,7 +2881,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast,
beta_slow, corr_dims);

const bool is_neox = mode & 2;
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;

// init cos/sin cache
ggml_cann_pool_alloc sin_allocator(
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cuda/rope.cu
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));

const bool is_neox = mode & 2;
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;

const int32_t * pos = (const int32_t *) src1_d;

Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -2313,7 +2313,7 @@ static enum ggml_status ggml_metal_graph_compute(
memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));

const bool is_neox = mode & 2;
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;

id<MTLComputePipelineState> pipeline = nil;

Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-sycl/rope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ void ggml_sycl_op_rope(
memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));

const bool is_neox = mode & 2;
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;

const int32_t * pos = (const int32_t *) src1_dd;

Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4050,7 +4050,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
case GGML_OP_ROPE:
{
const int mode = ((const int32_t *) dst->op_params)[2];
const bool is_neox = mode & 2;
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;

if (is_neox) {
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -14093,7 +14093,7 @@ static void ggml_compute_forward_rope_f32(
float corr_dims[2];
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);

const bool is_neox = mode & 2;
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;

const float * freq_factors = NULL;
if (src2 != NULL) {
Expand Down Expand Up @@ -14218,7 +14218,7 @@ static void ggml_compute_forward_rope_f16(
float corr_dims[2];
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);

const bool is_neox = mode & 2;
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;

const float * freq_factors = NULL;
if (src2 != NULL) {
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/kompute-shaders/op_rope_f16.comp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ void main() {
const uint i2 = gl_WorkGroupID.y;
const uint i1 = gl_WorkGroupID.x;

const bool is_neox = (pcs.mode & 2) != 0;
const bool is_neox = (pcs.mode & GGML_ROPE_TYPE_NEOX) != 0;

float corr_dims[2];
rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims);
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/kompute-shaders/op_rope_f32.comp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ void main() {
const uint i2 = gl_WorkGroupID.y;
const uint i1 = gl_WorkGroupID.x;

const bool is_neox = (pcs.mode & 2) != 0;
const bool is_neox = (pcs.mode & GGML_ROPE_TYPE_NEOX) != 0;

float corr_dims[2];
rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims);
Expand Down
2 changes: 2 additions & 0 deletions ggml/src/kompute-shaders/rope_common.comp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "common.comp"

#define GGML_ROPE_TYPE_NEOX 2

// TODO: use a local size of 32 or more (Metal uses 1024)
layout(local_size_x = 1) in;

Expand Down
7 changes: 2 additions & 5 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,10 @@ extern "C" {
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
};

// note: these values should be synchronized with ggml_rope
// TODO: maybe move this enum to ggml.h (ggml_rope_type)
enum llama_rope_type {
LLAMA_ROPE_TYPE_NONE = -1,
LLAMA_ROPE_TYPE_NORM = 0,
LLAMA_ROPE_TYPE_NEOX = 2,
LLAMA_ROPE_TYPE_GLM = 4,
LLAMA_ROPE_TYPE_NORM = 0,
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
};

enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
Expand Down
Loading