@@ -410,19 +410,40 @@ define amdgpu_cs half @srcmods_neg_f16(half inreg %src) {
410
410
ret half %result
411
411
}
412
412
413
- declare half @llvm.exp2.f16 (half )
414
- declare float @llvm.exp2.f32 (float )
415
- declare half @llvm.amdgcn.exp2.f16 (half )
416
- declare float @llvm.amdgcn.exp2.f32 (float )
417
- declare half @llvm.log2.f16 (half )
418
- declare float @llvm.log2.f32 (float )
419
- declare half @llvm.amdgcn.log.f16 (half )
420
- declare float @llvm.amdgcn.log.f32 (float )
421
- declare half @llvm.amdgcn.rcp.f16 (half )
422
- declare float @llvm.amdgcn.rcp.f32 (float )
423
- declare half @llvm.sqrt.f16 (half )
424
- declare float @llvm.sqrt.f32 (float )
425
- declare half @llvm.amdgcn.sqrt.f16 (half )
426
- declare float @llvm.amdgcn.sqrt.f32 (float )
427
- declare half @llvm.fabs.f16 (half )
428
- declare float @llvm.fabs.f32 (float )
413
+ ; TODO: SelectionDAG should avoid generating v_rcp_iflag_f32.
414
+ define amdgpu_cs float @fdiv_f32_i32 (float inreg %a , i32 inreg %b ) {
415
+ ; GFX12-SDAG-LABEL: fdiv_f32_i32:
416
+ ; GFX12-SDAG: ; %bb.0:
417
+ ; GFX12-SDAG-NEXT: s_cvt_f32_u32 s1, s1
418
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(TRANS32_DEP_1)
419
+ ; GFX12-SDAG-NEXT: v_rcp_iflag_f32_e32 v0, s1
420
+ ; GFX12-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0
421
+ ; GFX12-SDAG-NEXT: ; return to shader part epilog
422
+ ;
423
+ ; GFX12-GISEL-LABEL: fdiv_f32_i32:
424
+ ; GFX12-GISEL: ; %bb.0:
425
+ ; GFX12-GISEL-NEXT: s_cvt_f32_u32 s1, s1
426
+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(TRANS32_DEP_1)
427
+ ; GFX12-GISEL-NEXT: v_s_rcp_f32 s1, s1
428
+ ; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1
429
+ ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
430
+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
431
+ ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
432
+ ; GFX12-GISEL-NEXT: ; return to shader part epilog
433
+ %uint = uitofp i32 %b to float
434
+ %result = fdiv afn float %a , %uint
435
+ ret float %result
436
+ }
437
+
438
+ define amdgpu_cs half @fdiv_f16_i16 (half inreg %a , i16 inreg %b ) {
439
+ ; GFX12-LABEL: fdiv_f16_i16:
440
+ ; GFX12: ; %bb.0:
441
+ ; GFX12-NEXT: v_cvt_f16_u16_e32 v0, s1
442
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
443
+ ; GFX12-NEXT: v_rcp_f16_e32 v0, v0
444
+ ; GFX12-NEXT: v_mul_f16_e32 v0, s0, v0
445
+ ; GFX12-NEXT: ; return to shader part epilog
446
+ %uint = uitofp i16 %b to half
447
+ %result = fdiv afn half %a , %uint
448
+ ret half %result
449
+ }
0 commit comments