Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions benchmark/test_unary_pointwise_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,38 @@ def test_glu_backward_perf():
is_backward=True,
)
bench.run()


class BinaryPointwiseBenchmark(Benchmark):
def set_more_shapes(self):
special_shapes_2d = [(1024, 2**i) for i in range(0, 20, 4)]
sp_shapes_3d = [(64, 64, 2**i) for i in range(0, 15, 4)]
return special_shapes_2d + sp_shapes_3d

def get_input_iter(self, cur_dtype) -> Generator:
for shape in self.shapes:
inp1 = generate_tensor_input(shape, cur_dtype, self.device)
shift_amount = torch.randint(
0, 8, shape, dtype=cur_dtype, device=self.device
)
Comment on lines +205 to +207
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The range of shift amounts [0, 8) is a bit narrow for integer types larger than 8 bits. To ensure the performance benchmark covers a more realistic range of inputs, consider using a range for the shift amount that is dependent on the cur_dtype, up to the number of bits in the type.

Suggested change
shift_amount = torch.randint(
0, 8, shape, dtype=cur_dtype, device=self.device
)
shift_amount = torch.randint(
0, torch.iinfo(cur_dtype).bits, shape, dtype=cur_dtype, device=self.device
)

yield inp1, shift_amount


@pytest.mark.bitwise_left_shift
def test_bitwise_left_shift_perf():
bench = BinaryPointwiseBenchmark(
op_name="bitwise_left_shift",
torch_op=torch.bitwise_left_shift,
dtypes=INT_DTYPES,
)
bench.run()


@pytest.mark.bitwise_right_shift
def test_bitwise_right_shift_perf():
bench = BinaryPointwiseBenchmark(
op_name="bitwise_right_shift",
torch_op=torch.bitwise_right_shift,
dtypes=INT_DTYPES,
)
bench.run()
2 changes: 2 additions & 0 deletions src/flag_gems/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def enable(
("bitwise_and.Tensor", bitwise_and_tensor),
("bitwise_and_.Scalar", bitwise_and_scalar_),
("bitwise_and_.Tensor", bitwise_and_tensor_),
("bitwise_left_shift", bitwise_left_shift),
("bitwise_right_shift", bitwise_right_shift),
("bitwise_not", bitwise_not),
("bitwise_not_", bitwise_not_),
("bitwise_or.Scalar", bitwise_or_scalar),
Expand Down
4 changes: 4 additions & 0 deletions src/flag_gems/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
bitwise_and_tensor,
bitwise_and_tensor_,
)
from flag_gems.ops.bitwise_left_shift import bitwise_left_shift
from flag_gems.ops.bitwise_not import bitwise_not, bitwise_not_
from flag_gems.ops.bitwise_or import (
bitwise_or_scalar,
Expand All @@ -32,6 +33,7 @@
bitwise_or_tensor,
bitwise_or_tensor_,
)
from flag_gems.ops.bitwise_right_shift import bitwise_right_shift
from flag_gems.ops.bmm import bmm
from flag_gems.ops.cat import cat
from flag_gems.ops.celu import celu, celu_
Expand Down Expand Up @@ -233,6 +235,8 @@
"bitwise_and_scalar_tensor",
"bitwise_and_tensor",
"bitwise_and_tensor_",
"bitwise_left_shift",
"bitwise_right_shift",
"bitwise_not",
"bitwise_not_",
"bitwise_or_scalar",
Expand Down
18 changes: 18 additions & 0 deletions src/flag_gems/ops/bitwise_left_shift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import logging

import triton

from flag_gems.utils import pointwise_dynamic

logger = logging.getLogger(__name__)


@pointwise_dynamic(promotion_methods=[(0, 1, "DEFAULT")])
@triton.jit
def bitwise_left_shift_kernel(a, b):
return a << b


def bitwise_left_shift(self, other, *, out=None):
logger.debug("GEMS BITWISE_LEFT_SHIFT")
return bitwise_left_shift_kernel(self, other, out=out)
17 changes: 17 additions & 0 deletions src/flag_gems/ops/bitwise_right_shift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import logging

import triton

from flag_gems.utils import pointwise_dynamic

logger = logging.getLogger(__name__)


@pointwise_dynamic(promotion_methods=[(0, 1, "DEFAULT")])
@triton.jit
def bitwise_right_shift_kernel(a, b):
return a >> b


def bitwise_right_shift(self, other, *, out=None):
return bitwise_right_shift_kernel(self, other, out=out)
Comment on lines +16 to +17
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with bitwise_left_shift and to aid in debugging, it would be good to add a debug log message here.

Suggested change
def bitwise_right_shift(self, other, *, out=None):
return bitwise_right_shift_kernel(self, other, out=out)
def bitwise_right_shift(self, other, *, out=None):
logger.debug("GEMS BITWISE_RIGHT_SHIFT")
return bitwise_right_shift_kernel(self, other, out=out)

83 changes: 83 additions & 0 deletions tests/test_unary_pointwise_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,89 @@ def test_accuracy_angle(shape, dtype):
gems_assert_close(res_out, ref_out, dtype_out)


BITWISE_SHAPES = [
((512, 1024), (512, 1024)),
((256, 512), (1, 512)),
((256, 512), (256, 1)),
((1, 512), (256, 512)),
((256, 1), (256, 512)),
((1024,), ()),
((), (1024,)),
]


@pytest.mark.bitwise_left_shift
@pytest.mark.parametrize("shapes", BITWISE_SHAPES)
@pytest.mark.parametrize("dtype", ALL_INT_DTYPES + [torch.uint8])
def test_accuracy_bitwise_left_shift(shapes, dtype):
shape_a, shape_b = shapes
res_a = torch.randint(0, 100, shape_a, dtype=dtype, device=flag_gems.device)
res_b = torch.randint(0, 8, shape_b, dtype=dtype, device=flag_gems.device)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The range of shift amounts [0, 8) is a bit narrow for integer types larger than 8 bits. To improve test coverage, consider using a range that is dependent on the dtype, up to the number of bits in the type. This will ensure that shifts by larger amounts are also tested.

Suggested change
res_b = torch.randint(0, 8, shape_b, dtype=dtype, device=flag_gems.device)
res_b = torch.randint(0, torch.iinfo(dtype).bits, shape_b, dtype=dtype, device=flag_gems.device)

ref_a = to_reference(res_a)
ref_b = to_reference(res_b)

ref_out = torch.bitwise_left_shift(ref_a, ref_b)
with flag_gems.use_gems():
res_out = torch.bitwise_left_shift(res_a, res_b)
gems_assert_close(res_out, ref_out, dtype)


@pytest.mark.bitwise_right_shift
@pytest.mark.parametrize("shapes", BITWISE_SHAPES)
@pytest.mark.parametrize("dtype", ALL_INT_DTYPES + [torch.uint8])
def test_accuracy_bitwise_right_shift(shapes, dtype):
shape_a, shape_b = shapes
res_a = torch.randint(0, 100, shape_a, dtype=dtype, device=flag_gems.device)
res_b = torch.randint(0, 8, shape_b, dtype=dtype, device=flag_gems.device)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The range of shift amounts [0, 8) is a bit narrow for integer types larger than 8 bits. To improve test coverage, consider using a range that is dependent on the dtype, up to the number of bits in the type. This will ensure that shifts by larger amounts are also tested.

Suggested change
res_b = torch.randint(0, 8, shape_b, dtype=dtype, device=flag_gems.device)
res_b = torch.randint(0, torch.iinfo(dtype).bits, shape_b, dtype=dtype, device=flag_gems.device)

ref_a = to_reference(res_a)
ref_b = to_reference(res_b)

ref_out = torch.bitwise_right_shift(ref_a, ref_b)
with flag_gems.use_gems():
res_out = torch.bitwise_right_shift(res_a, res_b)
gems_assert_close(res_out, ref_out, dtype)


INPLACE_BITWISE_SHAPES = [
((512, 1024), (512, 1024)),
((256, 512), (1, 512)),
((256, 512), (256, 1)),
((1024,), ()),
]


@pytest.mark.bitwise_left_shift
@pytest.mark.parametrize("shapes", INPLACE_BITWISE_SHAPES)
@pytest.mark.parametrize("dtype", ALL_INT_DTYPES + [torch.uint8])
def test_accuracy_bitwise_left_shift_(shapes, dtype):
shape_a, shape_b = shapes
res_a = torch.randint(0, 100, shape_a, dtype=dtype, device=flag_gems.device)
res_b = torch.randint(0, 8, shape_b, dtype=dtype, device=flag_gems.device)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The range of shift amounts [0, 8) is a bit narrow for integer types larger than 8 bits. To improve test coverage, consider using a range that is dependent on the dtype, up to the number of bits in the type. This will ensure that shifts by larger amounts are also tested for the in-place operation.

Suggested change
res_b = torch.randint(0, 8, shape_b, dtype=dtype, device=flag_gems.device)
res_b = torch.randint(0, torch.iinfo(dtype).bits, shape_b, dtype=dtype, device=flag_gems.device)

ref_a = to_reference(res_a.clone())
ref_b = to_reference(res_b)

ref_a.bitwise_left_shift_(ref_b)
with flag_gems.use_gems():
res_a.bitwise_left_shift_(res_b)
gems_assert_close(res_a, ref_a, dtype)


@pytest.mark.bitwise_right_shift
@pytest.mark.parametrize("shapes", INPLACE_BITWISE_SHAPES)
@pytest.mark.parametrize("dtype", ALL_INT_DTYPES + [torch.uint8])
def test_accuracy_bitwise_right_shift_(shapes, dtype):
shape_a, shape_b = shapes
res_a = torch.randint(0, 100, shape_a, dtype=dtype, device=flag_gems.device)
res_b = torch.randint(0, 8, shape_b, dtype=dtype, device=flag_gems.device)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The range of shift amounts [0, 8) is a bit narrow for integer types larger than 8 bits. To improve test coverage, consider using a range that is dependent on the dtype, up to the number of bits in the type. This will ensure that shifts by larger amounts are also tested for the in-place operation.

Suggested change
res_b = torch.randint(0, 8, shape_b, dtype=dtype, device=flag_gems.device)
res_b = torch.randint(0, torch.iinfo(dtype).bits, shape_b, dtype=dtype, device=flag_gems.device)

ref_a = to_reference(res_a.clone())
ref_b = to_reference(res_b)

ref_a.bitwise_right_shift_(ref_b)
with flag_gems.use_gems():
res_a.bitwise_right_shift_(res_b)
gems_assert_close(res_a, ref_a, dtype)


@pytest.mark.bitwise_not
@pytest.mark.parametrize("shape", POINTWISE_SHAPES)
@pytest.mark.parametrize("dtype", INT_DTYPES + BOOL_TYPES)
Expand Down
Loading