Skip to content

【训练营】add operators: clip, gather, where, reduce_min, reduce_max, reduce_mean #160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions include/infini_operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,10 @@
#include "ops/rms_norm/rms_norm.h"
#include "ops/rotary_embedding/rotary_embedding.h"
#include "ops/swiglu/swiglu.h"
#include "ops/clip/clip.h"
#include "ops/where/where.h"
#include "ops/gather/gather.h"
#include "ops/reduce_max/reduce_max.h"
#include "ops/reduce_mean/reduce_mean.h"
#include "ops/reduce_min/reduce_min.h"
#include "tensor/tensor_descriptor.h"
27 changes: 27 additions & 0 deletions include/ops/clip/clip.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef CLIP_H
#define CLIP_H

#include "../../export.h"
#include "../../operators.h"

typedef struct ClipDescriptor {
Device device;
} ClipDescriptor;

typedef ClipDescriptor *infiniopClipDescriptor_t;

__C __export infiniopStatus_t infiniopCreateClipDescriptor(infiniopHandle_t handle,
infiniopClipDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input,
float *min,
float *max);

__C __export infiniopStatus_t infiniopClip(infiniopClipDescriptor_t desc,
void *output,
void const *input,
void *stream);

__C __export infiniopStatus_t infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc);

#endif
29 changes: 29 additions & 0 deletions include/ops/gather/gather.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef GATHER_H
#define GATHER_H

#include "../../export.h"
#include "../../operators.h"
#include <cstdint>

typedef struct GatherDescriptor {
Device device;
} GatherDescriptor;

typedef GatherDescriptor *infiniopGatherDescriptor_t;

__C __export infiniopStatus_t infiniopCreateGatherDescriptor(infiniopHandle_t handle,
infiniopGatherDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input,
infiniopTensorDescriptor_t indices,
int64_t axis);

__C __export infiniopStatus_t infiniopGather(infiniopGatherDescriptor_t desc,
void *output,
void const *input,
void const *indices,
void *stream);

__C __export infiniopStatus_t infiniopDestroyGatherDescriptor(infiniopGatherDescriptor_t desc);

#endif
24 changes: 24 additions & 0 deletions include/ops/reduce_max/reduce_max.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef REDUCE_MAX_H
#define REDUCE_MAX_H

#include "../../export.h"
#include "../../operators.h"

typedef struct ReduceMaxDescriptor {
Device device;
} ReduceMaxDescriptor;
typedef ReduceMaxDescriptor *infiniopReduceMaxDescriptor_t;

__C __export infiniopStatus_t infiniopCreateReduceMaxDescriptor(infiniopHandle_t handle,
infiniopReduceMaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
int64_t const *axes,
uint64_t n_axes,
int keep_dims);

__C __export infiniopStatus_t infiniopReduceMax(infiniopReduceMaxDescriptor_t desc, void *y, void const *x, void *stream);

__C __export infiniopStatus_t infiniopDestroyReduceMaxDescriptor(infiniopReduceMaxDescriptor_t desc);

#endif
24 changes: 24 additions & 0 deletions include/ops/reduce_mean/reduce_mean.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef REDUCE_MEAN_H
#define REDUCE_MEAN_H

#include "../../export.h"
#include "../../operators.h"

typedef struct ReduceMeanDescriptor {
Device device;
} ReduceMeanDescriptor;
typedef ReduceMeanDescriptor *infiniopReduceMeanDescriptor_t;

__C __export infiniopStatus_t infiniopCreateReduceMeanDescriptor(infiniopHandle_t handle,
infiniopReduceMeanDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
int64_t const *axes,
uint64_t n_axes,
int keep_dims);

__C __export infiniopStatus_t infiniopReduceMean(infiniopReduceMeanDescriptor_t desc, void *y, void const *x, void *stream);

__C __export infiniopStatus_t infiniopDestroyReduceMeanDescriptor(infiniopReduceMeanDescriptor_t desc);

#endif
24 changes: 24 additions & 0 deletions include/ops/reduce_min/reduce_min.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef REDUCE_MIN_H
#define REDUCE_MIN_H

#include "../../export.h"
#include "../../operators.h"

typedef struct ReduceMinDescriptor {
Device device;
} ReduceMinDescriptor;
typedef ReduceMinDescriptor *infiniopReduceMinDescriptor_t;

__C __export infiniopStatus_t infiniopCreateReduceMinDescriptor(infiniopHandle_t handle,
infiniopReduceMinDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
int64_t const *axes,
uint64_t n_axes,
int keep_dims);

__C __export infiniopStatus_t infiniopReduceMin(infiniopReduceMinDescriptor_t desc, void *y, void const *x, void *stream);

__C __export infiniopStatus_t infiniopDestroyReduceMinDescriptor(infiniopReduceMinDescriptor_t desc);

#endif
29 changes: 29 additions & 0 deletions include/ops/where/where.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef WHERE_H
#define WHERE_H

#include "../../export.h"
#include "../../operators.h"

typedef struct WhereDescriptor {
Device device;
} WhereDescriptor;

typedef WhereDescriptor *infiniopWhereDescriptor_t;

__C __export infiniopStatus_t infiniopCreateWhereDescriptor(infiniopHandle_t handle,
infiniopWhereDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t condition,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t y);

__C __export infiniopStatus_t infiniopWhere(infiniopWhereDescriptor_t desc,
void *output,
void const *condition,
void const *x,
void const *y,
void *stream);

__C __export infiniopStatus_t infiniopDestroyWhereDescriptor(infiniopWhereDescriptor_t desc);

#endif
133 changes: 133 additions & 0 deletions operatorspy/tests/clip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
from ctypes import POINTER, Structure, c_int32, c_void_p, c_float
import ctypes
import sys
import os

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from operatorspy import (
open_lib,
to_tensor,
DeviceEnum,
infiniopHandle_t,
infiniopTensorDescriptor_t,
create_handle,
destroy_handle,
check_error,
)

from operatorspy.tests.test_utils import get_args
from enum import Enum, auto
import torch
import numpy as np


class ClipDescriptor(Structure):
_fields_ = [("device", c_int32)]


infiniopClipDescriptor_t = POINTER(ClipDescriptor)


def clip(x, min, max):
return torch.clip(x, min, max)


def test(
lib,
handle,
torch_device,
c_shape,
min,
max,
tensor_dtype=torch.float16,
):
print(
f"Testing Clip on {torch_device} with c_shape:{c_shape} dtype:{tensor_dtype}"
)

input = torch.rand(c_shape, dtype=tensor_dtype).to(torch_device)
output = torch.empty(c_shape, dtype=tensor_dtype).to(torch_device)
min_v = min if min else torch.finfo(tensor_dtype).min
max_v = max if max else torch.finfo(tensor_dtype).max
min_val = torch.tensor(min_v, dtype=tensor_dtype).to(torch_device)
max_val = torch.tensor(max_v, dtype=tensor_dtype).to(torch_device)
# min = np.random.uniform(0, 1)
# max = np.random.uniform(0, 1)
min_fp16_value = min_val.item()
max_fp16_value = max_val.item()

ans = clip(input, min_val, max_val)

input_tensor = to_tensor(input, lib)
output_tensor = to_tensor(output, lib)
descriptor = infiniopClipDescriptor_t()

min_c = c_float(min_fp16_value)
max_c = c_float(max_fp16_value)

check_error(
lib.infiniopCreateClipDescriptor(
handle,
ctypes.byref(descriptor),
output_tensor.descriptor,
input_tensor.descriptor,
ctypes.byref(min_c) if min else None,
ctypes.byref(max_c) if max else None
)
)

input_tensor.descriptor.contents.invalidate()
output_tensor.descriptor.contents.invalidate()

check_error(
lib.infiniopClip(descriptor, output_tensor.data, input_tensor.data, None)
)

assert torch.allclose(output, ans, atol=0, rtol=0)
check_error(lib.infiniopDestroyClipDescriptor(descriptor))


def test_cpu(lib, test_cases):
device = DeviceEnum.DEVICE_CPU
handle = create_handle(lib, device)
for c_shape, min, max in test_cases:
test(lib, handle, "cpu", c_shape, min, max, tensor_dtype=torch.float16)
test(lib, handle, "cpu", c_shape, min, max, tensor_dtype=torch.float32)
destroy_handle(lib, handle)


if __name__ == "__main__":
test_cases = [
# c_shape
((1, 3), 0.2, 0.4),
((3, 3), -0.1, 0.7),
((2, 20, 3), 0.5, 0.9),
((32, 20, 512), -0.2, 0.9),
((32, 256, 112, 112), 0.1, None),
((3, 2, 4, 5), None, None),
]
args = get_args()
lib = open_lib()
lib.infiniopCreateClipDescriptor.restype = c_int32
lib.infiniopCreateClipDescriptor.argtypes = [
infiniopHandle_t,
POINTER(infiniopClipDescriptor_t),
infiniopTensorDescriptor_t,
infiniopTensorDescriptor_t,
POINTER(c_float),
POINTER(c_float),
]
lib.infiniopClip.restype = c_int32
lib.infiniopClip.argtypes = [
infiniopClipDescriptor_t,
c_void_p,
c_void_p,
c_void_p,
]
lib.infiniopDestroyClipDescriptor.restype = c_int32
lib.infiniopDestroyClipDescriptor.argtypes = [
infiniopClipDescriptor_t,
]

test_cpu(lib, test_cases)
print("\033[92mTest passed!\033[0m")
Loading