Skip to content

Commit 6ddb3a6

Browse files
authored
[AMDGPU] Add another test showing unwanted VALU codegen (#145062)
1 parent 945ce1a commit 6ddb3a6

File tree

1 file changed

+37
-16
lines changed

1 file changed

+37
-16
lines changed

llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -410,19 +410,40 @@ define amdgpu_cs half @srcmods_neg_f16(half inreg %src) {
410410
ret half %result
411411
}
412412

413-
declare half @llvm.exp2.f16(half)
414-
declare float @llvm.exp2.f32(float)
415-
declare half @llvm.amdgcn.exp2.f16(half)
416-
declare float @llvm.amdgcn.exp2.f32(float)
417-
declare half @llvm.log2.f16(half)
418-
declare float @llvm.log2.f32(float)
419-
declare half @llvm.amdgcn.log.f16(half)
420-
declare float @llvm.amdgcn.log.f32(float)
421-
declare half @llvm.amdgcn.rcp.f16(half)
422-
declare float @llvm.amdgcn.rcp.f32(float)
423-
declare half @llvm.sqrt.f16(half)
424-
declare float @llvm.sqrt.f32(float)
425-
declare half @llvm.amdgcn.sqrt.f16(half)
426-
declare float @llvm.amdgcn.sqrt.f32(float)
427-
declare half @llvm.fabs.f16(half)
428-
declare float @llvm.fabs.f32(float)
413+
; TODO: SelectionDAG should avoid generating v_rcp_iflag_f32.
414+
define amdgpu_cs float @fdiv_f32_i32(float inreg %a, i32 inreg %b) {
415+
; GFX12-SDAG-LABEL: fdiv_f32_i32:
416+
; GFX12-SDAG: ; %bb.0:
417+
; GFX12-SDAG-NEXT: s_cvt_f32_u32 s1, s1
418+
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(TRANS32_DEP_1)
419+
; GFX12-SDAG-NEXT: v_rcp_iflag_f32_e32 v0, s1
420+
; GFX12-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0
421+
; GFX12-SDAG-NEXT: ; return to shader part epilog
422+
;
423+
; GFX12-GISEL-LABEL: fdiv_f32_i32:
424+
; GFX12-GISEL: ; %bb.0:
425+
; GFX12-GISEL-NEXT: s_cvt_f32_u32 s1, s1
426+
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(TRANS32_DEP_1)
427+
; GFX12-GISEL-NEXT: v_s_rcp_f32 s1, s1
428+
; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1
429+
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
430+
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
431+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
432+
; GFX12-GISEL-NEXT: ; return to shader part epilog
433+
%uint = uitofp i32 %b to float
434+
%result = fdiv afn float %a, %uint
435+
ret float %result
436+
}
437+
438+
define amdgpu_cs half @fdiv_f16_i16(half inreg %a, i16 inreg %b) {
439+
; GFX12-LABEL: fdiv_f16_i16:
440+
; GFX12: ; %bb.0:
441+
; GFX12-NEXT: v_cvt_f16_u16_e32 v0, s1
442+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
443+
; GFX12-NEXT: v_rcp_f16_e32 v0, v0
444+
; GFX12-NEXT: v_mul_f16_e32 v0, s0, v0
445+
; GFX12-NEXT: ; return to shader part epilog
446+
%uint = uitofp i16 %b to half
447+
%result = fdiv afn half %a, %uint
448+
ret half %result
449+
}

0 commit comments

Comments
 (0)