Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5925,15 +5925,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/*ZeroPurifies=*/true, /*EltSizeInBits=*/16);
break;

// TODO: Dot Product of BF16 Pairs Accumulated Into Packed Single
// Precision
// <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128
// (<4 x float>, <8 x bfloat>, <8 x bfloat>)
// <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256
// (<8 x float>, <16 x bfloat>, <16 x bfloat>)
// <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512
// (<16 x float>, <32 x bfloat>, <32 x bfloat>)
// handleVectorPmaddIntrinsic() currently only handles integer types.
// Dot Product of BF16 Pairs Accumulated Into Packed Single
// Precision
// <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128
// (<4 x float>, <8 x bfloat>, <8 x bfloat>)
// <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256
// (<8 x float>, <16 x bfloat>, <16 x bfloat>)
// <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512
// (<16 x float>, <32 x bfloat>, <32 x bfloat>)
case Intrinsic::x86_avx512bf16_dpbf16ps_128:
case Intrinsic::x86_avx512bf16_dpbf16ps_256:
case Intrinsic::x86_avx512bf16_dpbf16ps_512:
handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2,
/*ZeroPurifies=*/false);
break;

case Intrinsic::x86_sse_cmp_ss:
case Intrinsic::x86_sse2_cmp_sd:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
; Strictly handled:
; - llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B)
; - llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A)
; - llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <32 x bfloat> %A, <32 x bfloat> %B)
;
; Heuristically handled: (none)

Expand Down Expand Up @@ -241,25 +240,20 @@ define <16 x float> @test_mm512_dpbf16ps_512(<16 x float> %E, <32 x bfloat> %A,
; CHECK-LABEL: define <16 x float> @test_mm512_dpbf16ps_512(
; CHECK-SAME: <16 x float> [[E:%.*]], <32 x bfloat> [[A:%.*]], <32 x bfloat> [[B:%.*]]) local_unnamed_addr #[[ATTR1]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 64), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 128), align 8
; CHECK-NEXT: [[TMP11:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP0]] to i512
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP5]], 0
; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i1> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP6]] to <16 x i32>
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32>
; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP11]]
; CHECK-NEXT: [[TMP8:%.*]] = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> [[E]], <32 x bfloat> [[A]], <32 x bfloat> [[B]])
; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store <16 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x float> [[TMP8]]
;
entry:
Expand All @@ -271,31 +265,26 @@ define <16 x float> @test_mm512_maskz_dpbf16ps_512(<16 x float> %E, <32 x bfloat
; CHECK-LABEL: define <16 x float> @test_mm512_maskz_dpbf16ps_512(
; CHECK-SAME: <16 x float> [[E:%.*]], <32 x bfloat> [[A:%.*]], <32 x bfloat> [[B:%.*]], i16 zeroext [[U:%.*]]) local_unnamed_addr #[[ATTR1]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 64), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 128), align 8
; CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr getelementptr (i8, ptr @__msan_param_tls, i64 192), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP0]] to i512
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = or <32 x i1> [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP7]] to <16 x i32>
; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32>
; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i32> [[TMP20]], [[TMP18]]
; CHECK-NEXT: [[TMP9:%.*]] = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> [[E]], <32 x bfloat> [[A]], <32 x bfloat> [[B]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[U]] to <16 x i1>
; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP21]], <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP21]]
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP16]], <16 x i32> [[TMP12]]
; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> zeroinitializer
Expand All @@ -312,32 +301,27 @@ define <16 x float> @test_mm512_mask_dpbf16ps_512(i16 zeroext %U, <16 x float> %
; CHECK-LABEL: define <16 x float> @test_mm512_mask_dpbf16ps_512(
; CHECK-SAME: i16 zeroext [[U:%.*]], <16 x float> [[E:%.*]], <32 x bfloat> [[A:%.*]], <32 x bfloat> [[B:%.*]]) local_unnamed_addr #[[ATTR1]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 72), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 136), align 8
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP0]] to i512
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = or <32 x i1> [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP7]] to <16 x i32>
; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32>
; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i32> [[TMP20]], [[TMP0]]
; CHECK-NEXT: [[TMP9:%.*]] = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> [[E]], <32 x bfloat> [[A]], <32 x bfloat> [[B]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[U]] to <16 x i1>
; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP0]]
; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP21]], <16 x i32> [[TMP0]]
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[E]] to <16 x i32>
; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP21]]
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP0]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[E]]
Expand Down
Loading