Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/bytecode/encodings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1519,14 +1519,16 @@ function encode_XOrIOp!(cb::CodeBuilder, result_type::TypeId, lhs::Value, rhs::V
end

"""
encode_ShLIOp!(cb, result_type, lhs, rhs) -> Value
encode_ShLIOp!(cb, result_type, lhs, rhs; overflow) -> Value

Shift left.
Opcode: 96
"""
function encode_ShLIOp!(cb::CodeBuilder, result_type::TypeId, lhs::Value, rhs::Value)
function encode_ShLIOp!(cb::CodeBuilder, result_type::TypeId, lhs::Value, rhs::Value;
overflow::IntegerOverflow=OverflowNone)
encode_varint!(cb.buf, Opcode.ShLIOp)
encode_typeid!(cb.buf, result_type)
encode_enum!(cb.buf, overflow)
encode_operand!(cb.buf, lhs)
encode_operand!(cb.buf, rhs)
return new_op!(cb)
Expand Down
39 changes: 3 additions & 36 deletions src/compiler/intrinsics/arithmetic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -335,18 +335,7 @@ function tfunc(𝕃, ::typeof(Intrinsics.andi), @nospecialize(x), @nospecialize(
return CC.widenconst(x)
end
function emit_intrinsic!(ctx::CGCtx, ::typeof(Intrinsics.andi), args)
cb = ctx.cb
tt = ctx.tt

lhs = @something emit_value!(ctx, args[1]) throw(IRError("andi: cannot resolve lhs"))
rhs = @something emit_value!(ctx, args[2]) throw(IRError("andi: cannot resolve rhs"))

lhs_type = CC.widenconst(lhs.jltype)
dtype = julia_to_tile_dtype!(tt, eltype(lhs_type))
result_type_id = tile_type!(tt, dtype, lhs.shape)

result = encode_AndIOp!(cb, result_type_id, lhs.v, rhs.v)
CGVal(result, result_type_id, lhs.jltype, lhs.shape)
emit_binop!(ctx, args, encode_AndIOp!)
end

# cuda_tile.ori
Expand All @@ -361,35 +350,13 @@ function tfunc(𝕃, ::typeof(Intrinsics.ori), @nospecialize(x), @nospecialize(y
return CC.widenconst(x)
end
function emit_intrinsic!(ctx::CGCtx, ::typeof(Intrinsics.ori), args)
cb = ctx.cb
tt = ctx.tt

lhs = @something emit_value!(ctx, args[1]) throw(IRError("ori: cannot resolve lhs"))
rhs = @something emit_value!(ctx, args[2]) throw(IRError("ori: cannot resolve rhs"))

lhs_type = CC.widenconst(lhs.jltype)
dtype = julia_to_tile_dtype!(tt, eltype(lhs_type))
result_type_id = tile_type!(tt, dtype, lhs.shape)

result = encode_OrIOp!(cb, result_type_id, lhs.v, rhs.v)
CGVal(result, result_type_id, lhs.jltype, lhs.shape)
emit_binop!(ctx, args, encode_OrIOp!)
end

# cuda_tile.xori
@intrinsic xori(x::T, y::T) where {T<:Integer}
@intrinsic xori(a::Tile{T}, b::Tile{T}) where {T<:Integer}
tfunc(𝕃, ::typeof(Intrinsics.xori), @nospecialize(x), @nospecialize(y)) = CC.widenconst(x)
function emit_intrinsic!(ctx::CGCtx, ::typeof(Intrinsics.xori), args)
cb = ctx.cb
tt = ctx.tt

lhs = @something emit_value!(ctx, args[1]) throw(IRError("xori: cannot resolve lhs"))
rhs = @something emit_value!(ctx, args[2]) throw(IRError("xori: cannot resolve rhs"))

lhs_type = CC.widenconst(lhs.jltype)
dtype = julia_to_tile_dtype!(tt, eltype(lhs_type))
result_type_id = tile_type!(tt, dtype, lhs.shape)

result = encode_XOrIOp!(cb, result_type_id, lhs.v, rhs.v)
CGVal(result, result_type_id, lhs.jltype, lhs.shape)
emit_binop!(ctx, args, encode_XOrIOp!)
end
37 changes: 36 additions & 1 deletion test/codegen/operations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1387,7 +1387,42 @@
#=========================================================================
8.9 Bitwise
=========================================================================#
# TODO: andi - bitwise AND
@testset "Bitwise" begin
spec_i32 = ct.ArraySpec{1}(16, true)

@testset "andi, ori, xori" begin
@test @filecheck begin
@check_label "entry"
code_tiled(Tuple{ct.TileArray{Int32,1,spec_i32}, ct.TileArray{Int32,1,spec_i32}}) do a, b
pid = ct.bid(1)
ta = ct.load(a, pid, (16,))
tb = ct.load(b, pid, (16,))
@check "andi"
Base.donotdelete(map(&, ta, tb))
@check "ori"
Base.donotdelete(map(|, ta, tb))
@check "xori"
Base.donotdelete(map(xor, ta, tb))
return
end
end
end

@testset "shli, shri" begin
@test @filecheck begin
@check_label "entry"
code_tiled(Tuple{ct.TileArray{Int32,1,spec_i32}}) do a
pid = ct.bid(1)
tile = ct.load(a, pid, (16,))
@check "shli"
Base.donotdelete(map(x -> x << Int32(4), tile))
@check "shri"
Base.donotdelete(map(x -> x >> Int32(8), tile))
return
end
end
end
end

#=========================================================================
8.10 Atomics
Expand Down
125 changes: 125 additions & 0 deletions test/execution/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1216,4 +1216,129 @@ end
b3 = CUDA.zeros(Float32, 64)
ct.launch(multi_early_return, 4, a, b3, Int32(1), Int32(0))
@test all(Array(b3) .== 0.0f0)
end

@testset "bitwise operations" begin

@testset "andi (bitwise AND)" begin
function bitwise_and_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1},
c::ct.TileArray{Int32,1})
pid = ct.bid(1)
ta = ct.load(a, pid, (16,))
tb = ct.load(b, pid, (16,))
ct.store(c, pid, map(&, ta, tb))
return
end

n = 1024
tile_size = 16
a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n))
b = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n))
c = CUDA.zeros(Int32, n)

ct.launch(bitwise_and_kernel, cld(n, tile_size), a, b, c)

@test Array(c) == Array(a) .& Array(b)
end

@testset "ori (bitwise OR)" begin
function bitwise_or_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1},
c::ct.TileArray{Int32,1})
pid = ct.bid(1)
ta = ct.load(a, pid, (16,))
tb = ct.load(b, pid, (16,))
ct.store(c, pid, map(|, ta, tb))
return
end

n = 1024
tile_size = 16
a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n))
b = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n))
c = CUDA.zeros(Int32, n)

ct.launch(bitwise_or_kernel, cld(n, tile_size), a, b, c)

@test Array(c) == Array(a) .| Array(b)
end

@testset "xori (bitwise XOR)" begin
function bitwise_xor_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1},
c::ct.TileArray{Int32,1})
pid = ct.bid(1)
ta = ct.load(a, pid, (16,))
tb = ct.load(b, pid, (16,))
ct.store(c, pid, map(xor, ta, tb))
return
end

n = 1024
tile_size = 16
a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n))
b = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n))
c = CUDA.zeros(Int32, n)

ct.launch(bitwise_xor_kernel, cld(n, tile_size), a, b, c)

@test Array(c) == Array(a) .⊻ Array(b)
end

@testset "shli (shift left)" begin
function shift_left_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1})
pid = ct.bid(1)
tile = ct.load(a, pid, (16,))
ct.store(b, pid, map(x -> x << Int32(4), tile))
return
end

n = 1024
tile_size = 16
a = CuArray(rand(Int32(0):Int32(0x0fff_ffff), n))
b = CUDA.zeros(Int32, n)

ct.launch(shift_left_kernel, cld(n, tile_size), a, b)

@test Array(b) == Array(a) .<< Int32(4)
end

@testset "shri (shift right)" begin
function shift_right_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1})
pid = ct.bid(1)
tile = ct.load(a, pid, (16,))
ct.store(b, pid, map(x -> x >> Int32(8), tile))
return
end

n = 1024
tile_size = 16
a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n))
b = CUDA.zeros(Int32, n)

ct.launch(shift_right_kernel, cld(n, tile_size), a, b)

@test Array(b) == Array(a) .>> Int32(8)
end

@testset "combined bitwise ops" begin
# (a & b) | (a ^ b) \u2014 exercises all three ops in a single kernel
function combined_bitwise_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1},
c::ct.TileArray{Int32,1})
pid = ct.bid(1)
ta = ct.load(a, pid, (16,))
tb = ct.load(b, pid, (16,))
ct.store(c, pid, map(|, map(&, ta, tb), map(xor, ta, tb)))
return
end

n = 1024
tile_size = 16
a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n))
b = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n))
c = CUDA.zeros(Int32, n)

ct.launch(combined_bitwise_kernel, cld(n, tile_size), a, b, c)

@test Array(c) == (Array(a) .& Array(b)) .| (Array(a) .⊻ Array(b))
end

end
Loading