diff --git a/src/bytecode/encodings.jl b/src/bytecode/encodings.jl index b759241..65e6356 100644 --- a/src/bytecode/encodings.jl +++ b/src/bytecode/encodings.jl @@ -1519,14 +1519,16 @@ function encode_XOrIOp!(cb::CodeBuilder, result_type::TypeId, lhs::Value, rhs::V end """ - encode_ShLIOp!(cb, result_type, lhs, rhs) -> Value + encode_ShLIOp!(cb, result_type, lhs, rhs; overflow) -> Value Shift left. Opcode: 96 """ -function encode_ShLIOp!(cb::CodeBuilder, result_type::TypeId, lhs::Value, rhs::Value) +function encode_ShLIOp!(cb::CodeBuilder, result_type::TypeId, lhs::Value, rhs::Value; + overflow::IntegerOverflow=OverflowNone) encode_varint!(cb.buf, Opcode.ShLIOp) encode_typeid!(cb.buf, result_type) + encode_enum!(cb.buf, overflow) encode_operand!(cb.buf, lhs) encode_operand!(cb.buf, rhs) return new_op!(cb) diff --git a/src/compiler/intrinsics/arithmetic.jl b/src/compiler/intrinsics/arithmetic.jl index 803639c..31649e4 100644 --- a/src/compiler/intrinsics/arithmetic.jl +++ b/src/compiler/intrinsics/arithmetic.jl @@ -335,18 +335,7 @@ function tfunc(𝕃, ::typeof(Intrinsics.andi), @nospecialize(x), @nospecialize( return CC.widenconst(x) end function emit_intrinsic!(ctx::CGCtx, ::typeof(Intrinsics.andi), args) - cb = ctx.cb - tt = ctx.tt - - lhs = @something emit_value!(ctx, args[1]) throw(IRError("andi: cannot resolve lhs")) - rhs = @something emit_value!(ctx, args[2]) throw(IRError("andi: cannot resolve rhs")) - - lhs_type = CC.widenconst(lhs.jltype) - dtype = julia_to_tile_dtype!(tt, eltype(lhs_type)) - result_type_id = tile_type!(tt, dtype, lhs.shape) - - result = encode_AndIOp!(cb, result_type_id, lhs.v, rhs.v) - CGVal(result, result_type_id, lhs.jltype, lhs.shape) + emit_binop!(ctx, args, encode_AndIOp!) end # cuda_tile.ori @@ -361,18 +350,7 @@ function tfunc(𝕃, ::typeof(Intrinsics.ori), @nospecialize(x), @nospecialize(y return CC.widenconst(x) end function emit_intrinsic!(ctx::CGCtx, ::typeof(Intrinsics.ori), args) - cb = ctx.cb - tt = ctx.tt - - lhs = @something emit_value!(ctx, args[1]) throw(IRError("ori: cannot resolve lhs")) - rhs = @something emit_value!(ctx, args[2]) throw(IRError("ori: cannot resolve rhs")) - - lhs_type = CC.widenconst(lhs.jltype) - dtype = julia_to_tile_dtype!(tt, eltype(lhs_type)) - result_type_id = tile_type!(tt, dtype, lhs.shape) - - result = encode_OrIOp!(cb, result_type_id, lhs.v, rhs.v) - CGVal(result, result_type_id, lhs.jltype, lhs.shape) + emit_binop!(ctx, args, encode_OrIOp!) end # cuda_tile.xori @@ -380,16 +358,5 @@ end @intrinsic xori(a::Tile{T}, b::Tile{T}) where {T<:Integer} tfunc(𝕃, ::typeof(Intrinsics.xori), @nospecialize(x), @nospecialize(y)) = CC.widenconst(x) function emit_intrinsic!(ctx::CGCtx, ::typeof(Intrinsics.xori), args) - cb = ctx.cb - tt = ctx.tt - - lhs = @something emit_value!(ctx, args[1]) throw(IRError("xori: cannot resolve lhs")) - rhs = @something emit_value!(ctx, args[2]) throw(IRError("xori: cannot resolve rhs")) - - lhs_type = CC.widenconst(lhs.jltype) - dtype = julia_to_tile_dtype!(tt, eltype(lhs_type)) - result_type_id = tile_type!(tt, dtype, lhs.shape) - - result = encode_XOrIOp!(cb, result_type_id, lhs.v, rhs.v) - CGVal(result, result_type_id, lhs.jltype, lhs.shape) + emit_binop!(ctx, args, encode_XOrIOp!) end diff --git a/test/codegen/operations.jl b/test/codegen/operations.jl index 3483e93..e02aa53 100644 --- a/test/codegen/operations.jl +++ b/test/codegen/operations.jl @@ -1387,7 +1387,42 @@ #========================================================================= 8.9 Bitwise =========================================================================# - # TODO: andi - bitwise AND + @testset "Bitwise" begin + spec_i32 = ct.ArraySpec{1}(16, true) + + @testset "andi, ori, xori" begin + @test @filecheck begin + @check_label "entry" + code_tiled(Tuple{ct.TileArray{Int32,1,spec_i32}, ct.TileArray{Int32,1,spec_i32}}) do a, b + pid = ct.bid(1) + ta = ct.load(a, pid, (16,)) + tb = ct.load(b, pid, (16,)) + @check "andi" + Base.donotdelete(map(&, ta, tb)) + @check "ori" + Base.donotdelete(map(|, ta, tb)) + @check "xori" + Base.donotdelete(map(xor, ta, tb)) + return + end + end + end + + @testset "shli, shri" begin + @test @filecheck begin + @check_label "entry" + code_tiled(Tuple{ct.TileArray{Int32,1,spec_i32}}) do a + pid = ct.bid(1) + tile = ct.load(a, pid, (16,)) + @check "shli" + Base.donotdelete(map(x -> x << Int32(4), tile)) + @check "shri" + Base.donotdelete(map(x -> x >> Int32(8), tile)) + return + end + end + end + end #========================================================================= 8.10 Atomics diff --git a/test/execution/basic.jl b/test/execution/basic.jl index 5c9a8ae..79e0fd9 100644 --- a/test/execution/basic.jl +++ b/test/execution/basic.jl @@ -1216,4 +1216,129 @@ end b3 = CUDA.zeros(Float32, 64) ct.launch(multi_early_return, 4, a, b3, Int32(1), Int32(0)) @test all(Array(b3) .== 0.0f0) +end + +@testset "bitwise operations" begin + +@testset "andi (bitwise AND)" begin + function bitwise_and_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1}, + c::ct.TileArray{Int32,1}) + pid = ct.bid(1) + ta = ct.load(a, pid, (16,)) + tb = ct.load(b, pid, (16,)) + ct.store(c, pid, map(&, ta, tb)) + return + end + + n = 1024 + tile_size = 16 + a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n)) + b = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n)) + c = CUDA.zeros(Int32, n) + + ct.launch(bitwise_and_kernel, cld(n, tile_size), a, b, c) + + @test Array(c) == Array(a) .& Array(b) +end + +@testset "ori (bitwise OR)" begin + function bitwise_or_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1}, + c::ct.TileArray{Int32,1}) + pid = ct.bid(1) + ta = ct.load(a, pid, (16,)) + tb = ct.load(b, pid, (16,)) + ct.store(c, pid, map(|, ta, tb)) + return + end + + n = 1024 + tile_size = 16 + a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n)) + b = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n)) + c = CUDA.zeros(Int32, n) + + ct.launch(bitwise_or_kernel, cld(n, tile_size), a, b, c) + + @test Array(c) == Array(a) .| Array(b) +end + +@testset "xori (bitwise XOR)" begin + function bitwise_xor_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1}, + c::ct.TileArray{Int32,1}) + pid = ct.bid(1) + ta = ct.load(a, pid, (16,)) + tb = ct.load(b, pid, (16,)) + ct.store(c, pid, map(xor, ta, tb)) + return + end + + n = 1024 + tile_size = 16 + a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n)) + b = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n)) + c = CUDA.zeros(Int32, n) + + ct.launch(bitwise_xor_kernel, cld(n, tile_size), a, b, c) + + @test Array(c) == Array(a) .⊻ Array(b) +end + +@testset "shli (shift left)" begin + function shift_left_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1}) + pid = ct.bid(1) + tile = ct.load(a, pid, (16,)) + ct.store(b, pid, map(x -> x << Int32(4), tile)) + return + end + + n = 1024 + tile_size = 16 + a = CuArray(rand(Int32(0):Int32(0x0fff_ffff), n)) + b = CUDA.zeros(Int32, n) + + ct.launch(shift_left_kernel, cld(n, tile_size), a, b) + + @test Array(b) == Array(a) .<< Int32(4) +end + +@testset "shri (shift right)" begin + function shift_right_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1}) + pid = ct.bid(1) + tile = ct.load(a, pid, (16,)) + ct.store(b, pid, map(x -> x >> Int32(8), tile)) + return + end + + n = 1024 + tile_size = 16 + a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n)) + b = CUDA.zeros(Int32, n) + + ct.launch(shift_right_kernel, cld(n, tile_size), a, b) + + @test Array(b) == Array(a) .>> Int32(8) +end + +@testset "combined bitwise ops" begin + # (a & b) | (a ^ b) \u2014 exercises all three ops in a single kernel + function combined_bitwise_kernel(a::ct.TileArray{Int32,1}, b::ct.TileArray{Int32,1}, + c::ct.TileArray{Int32,1}) + pid = ct.bid(1) + ta = ct.load(a, pid, (16,)) + tb = ct.load(b, pid, (16,)) + ct.store(c, pid, map(|, map(&, ta, tb), map(xor, ta, tb))) + return + end + + n = 1024 + tile_size = 16 + a = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n)) + b = CuArray(rand(Int32(0):Int32(0x7fff_ffff), n)) + c = CUDA.zeros(Int32, n) + + ct.launch(combined_bitwise_kernel, cld(n, tile_size), a, b, c) + + @test Array(c) == (Array(a) .& Array(b)) .| (Array(a) .⊻ Array(b)) +end + end \ No newline at end of file