From 9071951d161178305e67956d3b221a1d8dbbb2bc Mon Sep 17 00:00:00 2001 From: Shahzad Malik Muzaffar Date: Wed, 22 May 2024 08:10:57 +0200 Subject: [PATCH] Add numerical sanitizer --- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 1 + llvm/include/llvm/IR/Attributes.td | 4 + .../NumericalStabilitySanitizer.h | 40 + llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 + llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 + llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 2 + .../Transforms/Instrumentation/CMakeLists.txt | 1 + .../NumericalStabilitySanitizer.cpp | 2263 +++++++++++++++++ llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 + llvm/test/Bitcode/compatibility.ll | 7 +- .../NumericalStabilitySanitizer/basic.ll | 917 +++++++ .../NumericalStabilitySanitizer/cfg.ll | 113 + .../NumericalStabilitySanitizer/invoke.ll | 148 ++ .../NumericalStabilitySanitizer/memory.ll | 405 +++ .../non_float_store.ll | 20 + .../scalable_vector.ll | 16 + 17 files changed, 3941 insertions(+), 2 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h create mode 100644 llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/non_float_store.ll create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/scalable_vector.ll diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index c6f0ddf29a6da8..4fda602d67ace2 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -724,6 +724,7 @@ enum AttributeKindCodes { ATTR_KIND_WRITABLE = 89, ATTR_KIND_CORO_ONLY_DESTROY_WHEN_COMPLETE = 90, ATTR_KIND_DEAD_ON_UNWIND = 91, + ATTR_KIND_SANITIZE_NUMERICAL_STABILITY = 92, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index d22eb76d2292d5..2c328baa0bd4de 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -279,6 +279,9 @@ def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress", [FnAttr]>; /// MemTagSanitizer is on. def SanitizeMemTag : EnumAttr<"sanitize_memtag", [FnAttr]>; +/// NumericalStabilitySanitizer is on. +def SanitizeNumericalStability : EnumAttr<"sanitize_numericalstability", [FnAttr]>; + /// Speculative Load Hardening is enabled. /// /// Note that this uses the default compatibility (always compatible during @@ -366,6 +369,7 @@ def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; diff --git a/llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h new file mode 100644 index 00000000000000..89a6019edd3982 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h @@ -0,0 +1,40 @@ +//===- NumericalStabilitySanitizer.h - NSan Pass ---------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the numerical stability sanitizer (nsan) pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_NUMERICALSTABIITYSANITIZER_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_NUMERICALSTABIITYSANITIZER_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// Inserts NumericalStabilitySanitizer instrumentation. +// FunctionPass *createNumericalStabilitySanitizerLegacyPassPass(); + +/// A function pass for nsan instrumentation. +/// +/// Instruments functions to duplicate floating point computations in a +/// higher-precision type. +/// This function pass inserts calls to runtime library functions. If the +/// functions aren't declared yet, the pass inserts the declarations. +struct NumericalStabilitySanitizerPass + : public PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_INSTRUMENTATION_NUMERICALSTABIITYSANITIZER_H diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index a027d0c21ba0bb..af8d597dfb9ef4 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2060,6 +2060,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::SanitizeThread; case bitc::ATTR_KIND_SANITIZE_MEMORY: return Attribute::SanitizeMemory; + case bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY: + return Attribute::SanitizeNumericalStability; case bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING: return Attribute::SpeculativeLoadHardening; case bitc::ATTR_KIND_SWIFT_ERROR: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index a5fc267b1883bf..a4221b991c633e 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -806,6 +806,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_SANITIZE_THREAD; case Attribute::SanitizeMemory: return bitc::ATTR_KIND_SANITIZE_MEMORY; + case Attribute::SanitizeNumericalStability: + return bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY; case Attribute::SpeculativeLoadHardening: return bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING; case Attribute::SwiftError: diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 000594f0e7f4b5..d3926817edd1e5 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -169,6 +169,7 @@ #include "llvm/Transforms/Instrumentation/KCFI.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index e59795c7b0840e..3c9918623bbbdf 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -92,6 +92,7 @@ MODULE_PASS("metarenamer", MetaRenamerPass()) MODULE_PASS("module-inline", ModuleInlinerPass()) MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) +MODULE_PASS("nsan-module", NumericalStabilitySanitizerPass()) MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) MODULE_PASS("openmp-opt", OpenMPOptPass()) MODULE_PASS("openmp-opt-postlink", @@ -377,6 +378,7 @@ FUNCTION_PASS("move-auto-init", MoveAutoInitPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("newgvn", NewGVNPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) +FUNCTION_PASS("nsan", NumericalStabilitySanitizerPass()) FUNCTION_PASS("objc-arc", ObjCARCOptPass()) FUNCTION_PASS("objc-arc-contract", ObjCARCContractPass()) FUNCTION_PASS("objc-arc-expand", ObjCARCExpandPass()) diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 424f1d43360677..f4342a8e1f187d 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMInstrumentation BlockCoverageInference.cpp MemProfiler.cpp MemorySanitizer.cpp + NumericalStabilitySanitizer.cpp IndirectCallPromotion.cpp Instrumentation.cpp InstrOrderFile.cpp diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp new file mode 100644 index 00000000000000..9d7338e6e866cb --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp @@ -0,0 +1,2263 @@ +//===-- NumericalStabilitySanitizer.cpp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of NumericalStabilitySanitizer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" + +#include +#include + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "nsan" + +STATISTIC(NumInstrumentedFTLoads, + "Number of instrumented floating-point loads"); + +STATISTIC(NumInstrumentedFTCalls, + "Number of instrumented floating-point calls"); +STATISTIC(NumInstrumentedFTRets, + "Number of instrumented floating-point returns"); +STATISTIC(NumInstrumentedFTStores, + "Number of instrumented floating-point stores"); +STATISTIC(NumInstrumentedNonFTStores, + "Number of instrumented non floating-point stores"); +STATISTIC( + NumInstrumentedNonFTMemcpyStores, + "Number of instrumented non floating-point stores with memcpy semantics"); +STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps"); + +// Using smaller shadow types types can help improve speed. For example, `dlq` +// is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to +// `dqq`. +static cl::opt ClShadowMapping( + "nsan-shadow-type-mapping", cl::init("dqq"), + cl::desc("One shadow type id for each of `float`, `double`, `long double`. " + "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and " + "ppc_fp128 (extended double) respectively. The default is to " + "shadow `float` as `double`, and `double` and `x86_fp80` as " + "`fp128`"), + cl::Hidden); + +static cl::opt + ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true), + cl::desc("Instrument floating-point comparisons"), + cl::Hidden); + +static cl::opt ClCheckFunctionsFilter( + "check-functions-filter", + cl::desc("Only emit checks for arguments of functions " + "whose names match the given regular expression"), + cl::value_desc("regex")); + +static cl::opt ClTruncateFCmpEq( + "nsan-truncate-fcmp-eq", cl::init(true), + cl::desc( + "This flag controls the behaviour of fcmp equality comparisons:" + "For equality comparisons such as `x == 0.0f`, we can perform the " + "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app " + " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps " + "catch the case when `x_shadow` is accurate enough (and therefore " + "close enough to zero) so that `trunc(x_shadow)` is zero even though " + "both `x` and `x_shadow` are not. "), + cl::Hidden); + +// When there is external, uninstrumented code writing to memory, the shadow +// memory can get out of sync with the application memory. Enabling this flag +// emits consistency checks for loads to catch this situation. +// When everything is instrumented, this is not strictly necessary because any +// load should have a corresponding store, but can help debug cases when the +// framework did a bad job at tracking shadow memory modifications by failing on +// load rather than store. +// FIXME: provide a way to resume computations from the FT value when the load +// is inconsistent. This ensures that further computations are not polluted. +static cl::opt ClCheckLoads("nsan-check-loads", cl::init(false), + cl::desc("Check floating-point load"), + cl::Hidden); + +static cl::opt ClCheckStores("nsan-check-stores", cl::init(true), + cl::desc("Check floating-point stores"), + cl::Hidden); + +static cl::opt ClCheckRet("nsan-check-ret", cl::init(true), + cl::desc("Check floating-point return values"), + cl::Hidden); + +// LLVM may store constant floats as bitcasted ints. +// It's not really necessary to shadow such stores, +// if the shadow value is unknown the framework will re-extend it on load +// anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is +// impossible to determine the floating-point type based on the size. +// However, for debugging purposes it can be useful to model such stores. +static cl::opt ClPropagateNonFTConstStoresAsFT( + "nsan-propagate-non-ft-const-stores-as-ft", cl::init(false), + cl::desc( + "Propagate non floating-point const stores as floating point values." + "For debugging purposes only"), + cl::Hidden); + +static constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor"); +static constexpr StringLiteral kNsanInitName("__nsan_init"); + +// The following values must be kept in sync with the runtime. +static constexpr const int kShadowScale = 2; +static constexpr const int kMaxVectorWidth = 8; +static constexpr const int kMaxNumArgs = 128; +static constexpr const int kMaxShadowTypeSizeBytes = 16; // fp128 + +namespace { + +// Defines the characteristics (type id, type, and floating-point semantics) +// attached for all possible shadow types. +class ShadowTypeConfig { +public: + static std::unique_ptr fromNsanTypeId(char TypeId); + // The floating-point semantics of the shadow type. + virtual const fltSemantics &semantics() const = 0; + + // The LLVM Type corresponding to the shadow type. + virtual Type *getType(LLVMContext &Context) const = 0; + + // The nsan type id of the shadow type (`d`, `l`, `q`, ...). + virtual char getNsanTypeId() const = 0; + + virtual ~ShadowTypeConfig() {} +}; + +template +class ShadowTypeConfigImpl : public ShadowTypeConfig { +public: + char getNsanTypeId() const override { return NsanTypeId; } + static constexpr const char kNsanTypeId = NsanTypeId; +}; + +// `double` (`d`) shadow type. +class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> { + const fltSemantics &semantics() const override { + return APFloat::IEEEdouble(); + } + Type *getType(LLVMContext &Context) const override { + return Type::getDoubleTy(Context); + } +}; + +// `x86_fp80` (`l`) shadow type: X86 long double. +class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> { + const fltSemantics &semantics() const override { + return APFloat::x87DoubleExtended(); + } + Type *getType(LLVMContext &Context) const override { + return Type::getX86_FP80Ty(Context); + } +}; + +// `fp128` (`q`) shadow type. +class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> { + const fltSemantics &semantics() const override { return APFloat::IEEEquad(); } + Type *getType(LLVMContext &Context) const override { + return Type::getFP128Ty(Context); + } +}; + +// `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa. +class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> { + const fltSemantics &semantics() const override { + return APFloat::PPCDoubleDouble(); + } + Type *getType(LLVMContext &Context) const override { + return Type::getPPC_FP128Ty(Context); + } +}; + +// Creates a ShadowTypeConfig given its type id. +std::unique_ptr +ShadowTypeConfig::fromNsanTypeId(const char TypeId) { + switch (TypeId) { + case F64ShadowConfig::kNsanTypeId: + return std::make_unique(); + case F80ShadowConfig::kNsanTypeId: + return std::make_unique(); + case F128ShadowConfig::kNsanTypeId: + return std::make_unique(); + case PPC128ShadowConfig::kNsanTypeId: + return std::make_unique(); + } + errs() << "nsan: invalid shadow type id'" << TypeId << "'\n"; + return nullptr; +} + +// An enum corresponding to shadow value types. Used as indices in arrays, so +// not an `enum class`. +enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes }; + +static FTValueType semanticsToFTValueType(const fltSemantics &Sem) { + if (&Sem == &APFloat::IEEEsingle()) { + return kFloat; + } else if (&Sem == &APFloat::IEEEdouble()) { + return kDouble; + } else if (&Sem == &APFloat::x87DoubleExtended()) { + return kLongDouble; + } + llvm_unreachable("semantics are not one of the handled types"); +} + +// If `FT` corresponds to a primitive FTValueType, return it. +static std::optional ftValueTypeFromType(Type *FT) { + if (FT->isFloatTy()) + return kFloat; + if (FT->isDoubleTy()) + return kDouble; + if (FT->isX86_FP80Ty()) + return kLongDouble; + return {}; +} + +// Returns the LLVM type for an FTValueType. +static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) { + switch (VT) { + case kFloat: + return Type::getFloatTy(Context); + case kDouble: + return Type::getDoubleTy(Context); + case kLongDouble: + return Type::getX86_FP80Ty(Context); + case kNumValueTypes: + return nullptr; + } +} + +// Returns the type name for an FTValueType. +static const char *typeNameFromFTValueType(FTValueType VT) { + switch (VT) { + case kFloat: + return "float"; + case kDouble: + return "double"; + case kLongDouble: + return "longdouble"; + case kNumValueTypes: + return nullptr; + } +} + +// A specific mapping configuration of application type to shadow type for nsan +// (see -nsan-shadow-mapping flag). +class MappingConfig { +public: + bool initialize(LLVMContext *C) { + if (ClShadowMapping.size() != 3) { + errs() << "Invalid nsan mapping: " << ClShadowMapping << "\n"; + } + Context = C; + unsigned ShadowTypeSizeBits[kNumValueTypes]; + for (int VT = 0; VT < kNumValueTypes; ++VT) { + auto Config = ShadowTypeConfig::fromNsanTypeId(ClShadowMapping[VT]); + if (Config == nullptr) + return false; + const unsigned AppTypeSize = + typeFromFTValueType(static_cast(VT), *C) + ->getScalarSizeInBits(); + const unsigned ShadowTypeSize = + Config->getType(*C)->getScalarSizeInBits(); + // Check that the shadow type size is at most kShadowScale times the + // application type size, so that shadow memory compoutations are valid. + if (ShadowTypeSize > kShadowScale * AppTypeSize) { + errs() << "Invalid nsan mapping f" << AppTypeSize << "->f" + << ShadowTypeSize << ": The shadow type size should be at most " + << kShadowScale << " times the application type size\n"; + return false; + } + ShadowTypeSizeBits[VT] = ShadowTypeSize; + Configs[VT] = std::move(Config); + } + + // Check that the mapping is monotonous. This is required because if one + // does an fpextend of `float->long double` in application code, nsan is + // going to do an fpextend of `shadow(float) -> shadow(long double)` in + // shadow code. This will fail in `qql` mode, since nsan would be + // fpextending `f128->long`, which is invalid. + // FIXME: Relax this. + if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] || + ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble]) { + errs() << "Invalid nsan mapping: { float->f" << ShadowTypeSizeBits[kFloat] + << "; double->f" << ShadowTypeSizeBits[kDouble] + << "; long double->f" << ShadowTypeSizeBits[kLongDouble] << " }\n"; + return false; + } + return true; + } + + const ShadowTypeConfig &byValueType(FTValueType VT) const { + assert(VT < FTValueType::kNumValueTypes && "invalid value type"); + return *Configs[VT]; + } + + const ShadowTypeConfig &bySemantics(const fltSemantics &Sem) const { + return byValueType(semanticsToFTValueType(Sem)); + } + + // Returns the extended shadow type for a given application type. + Type *getExtendedFPType(Type *FT) const { + if (const auto VT = ftValueTypeFromType(FT)) + return Configs[*VT]->getType(*Context); + if (FT->isVectorTy()) { + auto *VecTy = cast(FT); + // FIXME: add support for scalable vector types. + if (VecTy->isScalableTy()) + return nullptr; + Type *ExtendedScalar = getExtendedFPType(VecTy->getElementType()); + return ExtendedScalar + ? VectorType::get(ExtendedScalar, VecTy->getElementCount()) + : nullptr; + } + return nullptr; + } + +private: + LLVMContext *Context = nullptr; + std::unique_ptr Configs[FTValueType::kNumValueTypes]; +}; + +// The memory extents of a type specifies how many elements of a given +// FTValueType needs to be stored when storing this type. +struct MemoryExtents { + FTValueType ValueType; + uint64_t NumElts; +}; +static MemoryExtents getMemoryExtentsOrDie(Type *FT) { + if (const auto VT = ftValueTypeFromType(FT)) + return {*VT, 1}; + if (FT->isVectorTy()) { + auto *VecTy = cast(FT); + const auto ScalarExtents = getMemoryExtentsOrDie(VecTy->getElementType()); + return {ScalarExtents.ValueType, + ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()}; + } + llvm_unreachable("invalid value type"); +} + +// The location of a check. Passed as parameters to runtime checking functions. +class CheckLoc { +public: + // Creates a location that references an application memory location. + static CheckLoc makeStore(Value *Address) { + CheckLoc Result(kStore); + Result.Address = Address; + return Result; + } + static CheckLoc makeLoad(Value *Address) { + CheckLoc Result(kLoad); + Result.Address = Address; + return Result; + } + + // Creates a location that references an argument, given by id. + static CheckLoc makeArg(int ArgId) { + CheckLoc Result(kArg); + Result.ArgId = ArgId; + return Result; + } + + // Creates a location that references the return value of a function. + static CheckLoc makeRet() { return CheckLoc(kRet); } + + // Creates a location that references a vector insert. + static CheckLoc makeInsert() { return CheckLoc(kInsert); } + + // Returns the CheckType of location this refers to, as an integer-typed LLVM + // IR value. + Value *getType(LLVMContext &C) const { + return ConstantInt::get(Type::getInt32Ty(C), static_cast(CheckTy)); + } + + // Returns a CheckType-specific value representing details of the location + // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM + // IR value. + Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const { + switch (CheckTy) { + case kUnknown: + llvm_unreachable("unknown type"); + case kRet: + case kInsert: + return ConstantInt::get(IntptrTy, 0); + case kArg: + return ConstantInt::get(IntptrTy, ArgId); + case kLoad: + case kStore: + return Builder.CreatePtrToInt(Address, IntptrTy); + } + } + +private: + // Must be kept in sync with the runtime. + enum CheckType { + kUnknown = 0, + kRet, + kArg, + kLoad, + kStore, + kInsert, + }; + explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {} + + const CheckType CheckTy; + Value *Address = nullptr; + int ArgId = -1; +}; + +// A map of LLVM IR values to shadow LLVM IR values. +class ValueToShadowMap { +public: + explicit ValueToShadowMap(MappingConfig *Config) : Config(Config) {} + + // Sets the shadow value for a value. Asserts that the value does not already + // have a value. + void setShadow(Value *V, Value *Shadow) { + assert(V); + assert(Shadow); + const bool Inserted = Map.emplace(V, Shadow).second; +#ifdef LLVM_ENABLE_DUMP + if (!Inserted) { + if (const auto *const I = dyn_cast(V)) + I->getParent()->getParent()->dump(); + errs() << "duplicate shadow (" << V << "): "; + V->dump(); + } +#endif + assert(Inserted && "duplicate shadow"); + (void)Inserted; + } + + // Returns true if the value already has a shadow (including if the value is a + // constant). If true, calling getShadow() is valid. + bool hasShadow(Value *V) const { + return isa(V) || (Map.find(V) != Map.end()); + } + + // Returns the shadow value for a given value. Asserts that the value has + // a shadow value. Lazily creates shadows for constant values. + Value *getShadow(Value *V) const { + assert(V); + if (Constant *C = dyn_cast(V)) + return getShadowConstant(C); + const auto ShadowValIt = Map.find(V); + assert(ShadowValIt != Map.end() && "shadow val does not exist"); + assert(ShadowValIt->second && "shadow val is null"); + return ShadowValIt->second; + } + + bool empty() const { return Map.empty(); } + +private: + // Extends a constant application value to its shadow counterpart. + APFloat extendConstantFP(APFloat CV) const { + bool LosesInfo = false; + CV.convert(Config->bySemantics(CV.getSemantics()).semantics(), + APFloatBase::rmTowardZero, &LosesInfo); + return CV; + } + + // Returns the shadow constant for the given application constant. + Constant *getShadowConstant(Constant *C) const { + if (UndefValue *U = dyn_cast(C)) { + return UndefValue::get(Config->getExtendedFPType(U->getType())); + } + if (ConstantFP *CFP = dyn_cast(C)) { + // Floating-point constants. + return ConstantFP::get(Config->getExtendedFPType(CFP->getType()), + extendConstantFP(CFP->getValueAPF())); + } + // Vector, array, or aggregate constants. + if (C->getType()->isVectorTy()) { + SmallVector Elements; + for (int I = 0, E = cast(C->getType()) + ->getElementCount() + .getFixedValue(); + I < E; ++I) + Elements.push_back(getShadowConstant(C->getAggregateElement(I))); + return ConstantVector::get(Elements); + } + llvm_unreachable("unimplemented"); + } + + MappingConfig *const Config; + std::unordered_map Map; +}; + +/// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library +/// API function declarations into the module if they don't exist already. +/// Instantiating ensures the __nsan_init function is in the list of global +/// constructors for the module. +class NumericalStabilitySanitizer { +public: + bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI); + +private: + void initialize(Module &M); + bool instrumentMemIntrinsic(MemIntrinsic *MI); + void maybeAddSuffixForNsanInterface(CallBase *CI); + bool addrPointsToConstantData(Value *Addr); + void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI, + ValueToShadowMap &Map); + Value *createShadowValueWithOperandsAvailable(Instruction &Inst, + const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map); + PHINode *maybeCreateShadowPhi(PHINode &Phi, const TargetLibraryInfo &TLI); + void createShadowArguments(Function &F, const TargetLibraryInfo &TLI, + ValueToShadowMap &Map); + + void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map); + + void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map); + Value *emitCheck(Value *V, Value *ShadowV, IRBuilder<> &Builder, + CheckLoc Loc); + Value *emitCheckInternal(Value *V, Value *ShadowV, IRBuilder<> &Builder, + CheckLoc Loc); + void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map); + + // Value creation handlers. + Value *handleLoad(LoadInst &Load, Type *VT, Type *ExtendedVT); + Value *handleTrunc(FPTruncInst &Trunc, Type *VT, Type *ExtendedVT, + const ValueToShadowMap &Map); + Value *handleExt(FPExtInst &Ext, Type *VT, Type *ExtendedVT, + const ValueToShadowMap &Map); + Value *handleCallBase(CallBase &Call, Type *VT, Type *ExtendedVT, + const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map, IRBuilder<> &Builder); + Value *maybeHandleKnownCallBase(CallBase &Call, Type *VT, Type *ExtendedVT, + const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map, + IRBuilder<> &Builder); + + // Value propagation handlers. + void propagateFTStore(StoreInst &Store, Type *VT, Type *ExtendedVT, + const ValueToShadowMap &Map); + void propagateNonFTStore(StoreInst &Store, Type *VT, + const ValueToShadowMap &Map); + + MappingConfig Config; + LLVMContext *Context = nullptr; + IntegerType *IntptrTy = nullptr; + FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes]; + FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes]; + FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes]; + FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes]; + FunctionCallee NsanCopyValues; + FunctionCallee NsanSetValueUnknown; + FunctionCallee NsanGetRawShadowTypePtr; + FunctionCallee NsanGetRawShadowPtr; + GlobalValue *NsanShadowRetTag; + + Type *NsanShadowRetType; + GlobalValue *NsanShadowRetPtr; + + GlobalValue *NsanShadowArgsTag; + + Type *NsanShadowArgsType; + GlobalValue *NsanShadowArgsPtr; + + std::optional CheckFunctionsFilter; +}; + +void insertModuleCtor(Module &M) { + getOrCreateSanitizerCtorAndInitFunctions( + M, kNsanModuleCtorName, kNsanInitName, /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); }); +} + +} // end anonymous namespace + +PreservedAnalyses +NumericalStabilitySanitizerPass::run(Function &F, + FunctionAnalysisManager &FAM) { + NumericalStabilitySanitizer Nsan; + if (Nsan.sanitizeFunction(F, FAM.getResult(F))) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +PreservedAnalyses +NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) { + insertModuleCtor(M); + return PreservedAnalyses::none(); +} + +static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) { + return dyn_cast(M.getOrInsertGlobal(Name, Ty, [&M, Ty, Name] { + return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, + nullptr, Name, nullptr, + GlobalVariable::InitialExecTLSModel); + })); +} + +void NumericalStabilitySanitizer::initialize(Module &M) { + const DataLayout &DL = M.getDataLayout(); + Context = &M.getContext(); + IntptrTy = DL.getIntPtrType(*Context); + Type *PtrTy = PointerType::getUnqual(*Context); + Type *Int32Ty = Type::getInt32Ty(*Context); + Type *Int1Ty = Type::getInt1Ty(*Context); + Type *VoidTy = Type::getVoidTy(*Context); + + AttributeList Attr; + Attr = Attr.addFnAttribute(*Context, Attribute::NoUnwind); + // Initialize the runtime values (functions and global variables). + for (int I = 0; I < kNumValueTypes; ++I) { + const FTValueType VT = static_cast(I); + const char *const VTName = typeNameFromFTValueType(VT); + Type *const VTTy = typeFromFTValueType(VT, *Context); + + // Load/store. + const std::string GetterPrefix = + std::string("__nsan_get_shadow_ptr_for_") + VTName; + NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction( + GetterPrefix + "_store", Attr, PtrTy, PtrTy, IntptrTy); + NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction( + GetterPrefix + "_load", Attr, PtrTy, PtrTy, IntptrTy); + + // Check. + const auto &ShadowConfig = Config.byValueType(VT); + Type *ShadowTy = ShadowConfig.getType(*Context); + NsanCheckValue[VT] = + M.getOrInsertFunction(std::string("__nsan_internal_check_") + VTName + + "_" + ShadowConfig.getNsanTypeId(), + Attr, Int32Ty, VTTy, ShadowTy, Int32Ty, IntptrTy); + NsanFCmpFail[VT] = M.getOrInsertFunction( + std::string("__nsan_fcmp_fail_") + VTName + "_" + + ShadowConfig.getNsanTypeId(), + Attr, VoidTy, VTTy, VTTy, ShadowTy, ShadowTy, Int32Ty, Int1Ty, Int1Ty); + } + + NsanCopyValues = M.getOrInsertFunction("__nsan_copy_values", Attr, VoidTy, + PtrTy, PtrTy, IntptrTy); + NsanSetValueUnknown = M.getOrInsertFunction("__nsan_set_value_unknown", Attr, + VoidTy, PtrTy, IntptrTy); + + // FIXME: Add attributes nofree, nosync, readnone, readonly, + NsanGetRawShadowTypePtr = M.getOrInsertFunction( + "__nsan_internal_get_raw_shadow_type_ptr", Attr, PtrTy, PtrTy); + NsanGetRawShadowPtr = M.getOrInsertFunction( + "__nsan_internal_get_raw_shadow_ptr", Attr, PtrTy, PtrTy); + + NsanShadowRetTag = createThreadLocalGV("__nsan_shadow_ret_tag", M, IntptrTy); + + NsanShadowRetType = ArrayType::get(Type::getInt8Ty(*Context), + kMaxVectorWidth * kMaxShadowTypeSizeBytes); + NsanShadowRetPtr = + createThreadLocalGV("__nsan_shadow_ret_ptr", M, NsanShadowRetType); + + NsanShadowArgsTag = + createThreadLocalGV("__nsan_shadow_args_tag", M, IntptrTy); + + NsanShadowArgsType = + ArrayType::get(Type::getInt8Ty(*Context), + kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes); + + NsanShadowArgsPtr = + createThreadLocalGV("__nsan_shadow_args_ptr", M, NsanShadowArgsType); + + if (!ClCheckFunctionsFilter.empty()) { + Regex R = Regex(ClCheckFunctionsFilter); + std::string RegexError; + assert(R.isValid(RegexError)); + CheckFunctionsFilter = std::move(R); + } +} + +// Returns true if the given LLVM Value points to constant data (typically, a +// global variable reference). +bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) { + // If this is a GEP, just analyze its pointer operand. + if (GetElementPtrInst *GEP = dyn_cast(Addr)) + Addr = GEP->getPointerOperand(); + + if (GlobalVariable *GV = dyn_cast(Addr)) { + return GV->isConstant(); + } + return false; +} + +// This instruments the function entry to create shadow arguments. +// Pseudocode: +// if (this_fn_ptr == __nsan_shadow_args_tag) { +// s(arg0) = LOAD(__nsan_shadow_args); +// s(arg1) = LOAD(__nsan_shadow_args + sizeof(arg0)); +// ... +// __nsan_shadow_args_tag = 0; +// } else { +// s(arg0) = fext(arg0); +// s(arg1) = fext(arg1); +// ... +// } +void NumericalStabilitySanitizer::createShadowArguments( + Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) { + assert(!F.getIntrinsicID() && "found a definition of an intrinsic"); + + // Do not bother if there are no FP args. + if (all_of(F.args(), [this](const Argument &Arg) { + return Config.getExtendedFPType(Arg.getType()) == nullptr; + })) + return; + + const DataLayout &DL = F.getParent()->getDataLayout(); + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHI()); + // The function has shadow args if the shadow args tag matches the function + // address. + Value *HasShadowArgs = Builder.CreateICmpEQ( + Builder.CreateLoad(IntptrTy, NsanShadowArgsTag, /*isVolatile=*/false), + Builder.CreatePtrToInt(&F, IntptrTy)); + + unsigned ShadowArgsOffsetBytes = 0; + for (Argument &Arg : F.args()) { + Type *const VT = Arg.getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + continue; // Not an FT value. + Value *L = Builder.CreateAlignedLoad( + ExtendedVT, + Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0, + ShadowArgsOffsetBytes), + Align(1), /*isVolatile=*/false); + Value *Shadow = Builder.CreateSelect( + HasShadowArgs, L, + Builder.CreateCast(Instruction::FPExt, &Arg, ExtendedVT)); + Map.setShadow(&Arg, Shadow); + TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT); + assert(!SlotSize.isScalable() && "unsupported"); + ShadowArgsOffsetBytes += SlotSize; + } + Builder.CreateStore(ConstantInt::get(IntptrTy, 0), NsanShadowArgsTag); +} + +// Returns true if the instrumentation should emit code to check arguments +// before a function call. +static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI, + const std::optional &CheckFunctionsFilter) { + + Function *Fn = CI.getCalledFunction(); + + if (CheckFunctionsFilter) { + // Skip checking args of indirect calls. + if (Fn == nullptr) + return false; + if (CheckFunctionsFilter->match(Fn->getName())) + return true; + return false; + } + + if (Fn == nullptr) + return true; // Always check args of indirect calls. + + // Never check nsan functions, the user called them for a reason. + if (Fn->getName().starts_with("__nsan_")) + return false; + + const auto ID = Fn->getIntrinsicID(); + LibFunc LFunc = LibFunc::NumLibFuncs; + // Always check args of unknown functions. + if (ID == Intrinsic::ID() && !TLI.getLibFunc(*Fn, LFunc)) + return true; + + // Do not check args of an `fabs` call that is used for a comparison. + // This is typically used for `fabs(a-b) < tolerance`, where what matters is + // the result of the comparison, which is already caught be the fcmp checks. + if (ID == Intrinsic::fabs || LFunc == LibFunc_fabsf || + LFunc == LibFunc_fabs || LFunc == LibFunc_fabsl) + for (const auto &U : CI.users()) + if (isa(U)) + return false; + + return true; // Default is check. +} + +// Populates the shadow call stack (which contains shadow values for every +// floating-point parameter to the function). +void NumericalStabilitySanitizer::populateShadowStack( + CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) { + // Do not create a shadow stack for inline asm. + if (CI.isInlineAsm()) + return; + + // Do not bother if there are no FP args. + if (all_of(CI.operands(), [this](const Value *Arg) { + return Config.getExtendedFPType(Arg->getType()) == nullptr; + })) + return; + + IRBuilder<> Builder(&CI); + SmallVector ArgShadows; + const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI, CheckFunctionsFilter); + int ArgId = -1; + for (Value *Arg : CI.operands()) { + ++ArgId; + if (Config.getExtendedFPType(Arg->getType()) == nullptr) + continue; // Not an FT value. + Value *ArgShadow = Map.getShadow(Arg); + ArgShadows.push_back(ShouldCheckArgs ? emitCheck(Arg, ArgShadow, Builder, + CheckLoc::makeArg(ArgId)) + : ArgShadow); + } + + // Do not create shadow stacks for intrinsics/known lib funcs. + if (Function *Fn = CI.getCalledFunction()) { + LibFunc LFunc; + if (Fn->isIntrinsic() || TLI.getLibFunc(*Fn, LFunc)) + return; + } + + const DataLayout &DL = CI.getModule()->getDataLayout(); + // Set the shadow stack tag. + Builder.CreateStore(CI.getCalledOperand(), NsanShadowArgsTag); + TypeSize ShadowArgsOffsetBytes = TypeSize::getFixed(0); + + unsigned ShadowArgId = 0; + for (const Value *Arg : CI.operands()) { + Type *const VT = Arg->getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + continue; // Not an FT value. + Builder.CreateAlignedStore( + ArgShadows[ShadowArgId++], + Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0, + ShadowArgsOffsetBytes), + Align(1), /*isVolatile=*/false); + TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT); + assert(!SlotSize.isScalable() && "unsupported"); + ShadowArgsOffsetBytes += SlotSize; + } +} + +// Internal part of emitCheck(). Returns a value that indicates whether +// computation should continue with the shadow or resume by re-fextending the +// value. +enum ContinuationType { // Keep in sync with runtime. + kContinueWithShadow = 0, + kResumeFromValue = 1, +}; +Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV, + IRBuilder<> &Builder, + CheckLoc Loc) { + // Do not emit checks for constant values, this is redundant. + if (isa(V)) + return ConstantInt::get(Builder.getInt32Ty(), kContinueWithShadow); + + Type *const Ty = V->getType(); + if (const auto VT = ftValueTypeFromType(Ty)) + return Builder.CreateCall( + NsanCheckValue[*VT], + {V, ShadowV, Loc.getType(*Context), Loc.getValue(IntptrTy, Builder)}); + + if (Ty->isVectorTy()) { + auto *VecTy = cast(Ty); + // We currently skip scalable vector types in MappingConfig, + // thus we should not encounter any such types here. + assert(!VecTy->isScalableTy() && + "Scalable vector types are not supported yet"); + Value *CheckResult = nullptr; + for (int I = 0, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) { + // We resume if any element resumes. Another option would be to create a + // vector shuffle with the array of ContinueWithShadow, but that is too + // complex. + Value *ExtractV = Builder.CreateExtractElement(V, I); + Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I); + Value *ComponentCheckResult = + emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc); + CheckResult = CheckResult + ? Builder.CreateOr(CheckResult, ComponentCheckResult) + : ComponentCheckResult; + } + return CheckResult; + } + if (Ty->isArrayTy()) { + Value *CheckResult = nullptr; + for (int I = 0, E = Ty->getArrayNumElements(); I < E; ++I) { + Value *ExtractV = Builder.CreateExtractElement(V, I); + Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I); + Value *ComponentCheckResult = + emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc); + CheckResult = CheckResult + ? Builder.CreateOr(CheckResult, ComponentCheckResult) + : ComponentCheckResult; + } + return CheckResult; + } + if (Ty->isStructTy()) { + Value *CheckResult = nullptr; + for (int I = 0, E = Ty->getStructNumElements(); I < E; ++I) { + if (Config.getExtendedFPType(Ty->getStructElementType(I)) == nullptr) + continue; // Only check FT values. + Value *ExtractV = Builder.CreateExtractValue(V, I); + Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I); + Value *ComponentCheckResult = + emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc); + CheckResult = CheckResult + ? Builder.CreateOr(CheckResult, ComponentCheckResult) + : ComponentCheckResult; + } + if (!CheckResult) + return ConstantInt::get(Builder.getInt32Ty(), kContinueWithShadow); + return CheckResult; + } + + llvm_unreachable("not implemented"); +} + +// Inserts a runtime check of V against its shadow value ShadowV. +// We check values whenever they escape: on return, call, stores, and +// insertvalue. +// Returns the shadow value that should be used to continue the computations, +// depending on the answer from the runtime. +// FIXME: Should we check on select ? phi ? +Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV, + IRBuilder<> &Builder, + CheckLoc Loc) { + // Do not emit checks for constant values, this is redundant. + if (isa(V)) + return ShadowV; + + if (Instruction *Inst = dyn_cast(V)) { + Function *F = Inst->getFunction(); + if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName())) { + return ShadowV; + } + } + + Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc); + Value *ICmpEQ = Builder.CreateICmpEQ( + CheckResult, ConstantInt::get(Builder.getInt32Ty(), kResumeFromValue)); + return Builder.CreateSelect( + ICmpEQ, Builder.CreateFPExt(V, Config.getExtendedFPType(V->getType())), + ShadowV); +} + +static Instruction *getNextInstructionOrDie(Instruction &Inst) { + assert(Inst.getNextNode() && "instruction is a terminator"); + return Inst.getNextNode(); +} + +// Inserts a check that fcmp on shadow values are consistent with that on base +// values. +void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp, + const ValueToShadowMap &Map) { + if (!ClInstrumentFCmp) + return; + + Function *F = FCmp.getFunction(); + if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName())) { + return; + } + + Value *LHS = FCmp.getOperand(0); + if (Config.getExtendedFPType(LHS->getType()) == nullptr) + return; + Value *RHS = FCmp.getOperand(1); + + // Split the basic block. On mismatch, we'll jump to the new basic block with + // a call to the runtime for error reporting. + BasicBlock *FCmpBB = FCmp.getParent(); + BasicBlock *NextBB = FCmpBB->splitBasicBlock(getNextInstructionOrDie(FCmp)); + // Remove the newly created terminator unconditional branch. + FCmpBB->back().eraseFromParent(); + BasicBlock *FailBB = + BasicBlock::Create(*Context, "", FCmpBB->getParent(), NextBB); + + // Create the shadow fcmp and comparison between the fcmps. + IRBuilder<> FCmpBuilder(FCmpBB); + FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc()); + Value *ShadowLHS = Map.getShadow(LHS); + Value *ShadowRHS = Map.getShadow(RHS); + // See comment on ClTruncateFCmpEq. + if (FCmp.isEquality() && ClTruncateFCmpEq) { + Type *Ty = ShadowLHS->getType(); + ShadowLHS = FCmpBuilder.CreateFPExt( + FCmpBuilder.CreateFPTrunc(ShadowLHS, LHS->getType()), Ty); + ShadowRHS = FCmpBuilder.CreateFPExt( + FCmpBuilder.CreateFPTrunc(ShadowRHS, RHS->getType()), Ty); + } + Value *ShadowFCmp = + FCmpBuilder.CreateFCmp(FCmp.getPredicate(), ShadowLHS, ShadowRHS); + Value *OriginalAndShadowFcmpMatch = + FCmpBuilder.CreateICmpEQ(&FCmp, ShadowFCmp); + + if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) { + // If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1, + // where an element is true if the corresponding elements in original and + // shadow are the same. We want all elements to be 1. + OriginalAndShadowFcmpMatch = + FCmpBuilder.CreateAndReduce(OriginalAndShadowFcmpMatch); + } + + FCmpBuilder.CreateCondBr(OriginalAndShadowFcmpMatch, NextBB, FailBB); + + // Fill in FailBB. + IRBuilder<> FailBuilder(FailBB); + FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc()); + + const auto EmitFailCall = [this, &FCmp, &FCmpBuilder, + &FailBuilder](Value *L, Value *R, Value *ShadowL, + Value *ShadowR, Value *Result, + Value *ShadowResult) { + Type *FT = L->getType(); + FunctionCallee *Callee = nullptr; + if (FT->isFloatTy()) { + Callee = &(NsanFCmpFail[kFloat]); + } else if (FT->isDoubleTy()) { + Callee = &(NsanFCmpFail[kDouble]); + } else if (FT->isX86_FP80Ty()) { + // FIXME: make NsanFCmpFailLongDouble work. + Callee = &(NsanFCmpFail[kDouble]); + L = FailBuilder.CreateCast(Instruction::FPTrunc, L, + Type::getDoubleTy(*Context)); + R = FailBuilder.CreateCast(Instruction::FPTrunc, L, + Type::getDoubleTy(*Context)); + } else { + llvm_unreachable("not implemented"); + } + FailBuilder.CreateCall(*Callee, {L, R, ShadowL, ShadowR, + ConstantInt::get(FCmpBuilder.getInt32Ty(), + FCmp.getPredicate()), + Result, ShadowResult}); + }; + if (LHS->getType()->isVectorTy()) { + for (int I = 0, E = cast(LHS->getType()) + ->getElementCount() + .getFixedValue(); + I < E; ++I) { + Value *ExtractLHS = FailBuilder.CreateExtractElement(LHS, I); + Value *ExtractRHS = FailBuilder.CreateExtractElement(RHS, I); + Value *ExtractShaodwLHS = FailBuilder.CreateExtractElement(ShadowLHS, I); + Value *ExtractShaodwRHS = FailBuilder.CreateExtractElement(ShadowRHS, I); + Value *ExtractFCmp = FailBuilder.CreateExtractElement(&FCmp, I); + Value *ExtractShadowFCmp = + FailBuilder.CreateExtractElement(ShadowFCmp, I); + EmitFailCall(ExtractLHS, ExtractRHS, ExtractShaodwLHS, ExtractShaodwRHS, + ExtractFCmp, ExtractShadowFCmp); + } + } else { + EmitFailCall(LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp); + } + FailBuilder.CreateBr(NextBB); + + ++NumInstrumentedFCmp; +} + +// Creates a shadow phi value for any phi that defines a value of FT type. +PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi( + PHINode &Phi, const TargetLibraryInfo &TLI) { + Type *const VT = Phi.getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return nullptr; // Not an FT value. + // The phi operands are shadow values and are not available when the phi is + // created. They will be populated in a final phase, once all shadow values + // have been created. + PHINode *Shadow = PHINode::Create(ExtendedVT, Phi.getNumIncomingValues()); + Shadow->insertAfter(&Phi); + return Shadow; +} + +Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT, + Type *ExtendedVT) { + IRBuilder<> Builder(getNextInstructionOrDie(Load)); + Builder.SetCurrentDebugLocation(Load.getDebugLoc()); + if (addrPointsToConstantData(Load.getPointerOperand())) { + // No need to look into the shadow memory, the value is a constant. Just + // convert from FT to 2FT. + return Builder.CreateFPExt(&Load, ExtendedVT); + } + + // if (%shadowptr == &) + // %shadow = fpext %v + // else + // %shadow = load (ptrcast %shadow_ptr)) + // Considered options here: + // - Have `NsanGetShadowPtrForLoad` return a fixed address + // &__nsan_unknown_value_shadow_address that is valid to load from, and + // use a select. This has the advantage that the generated IR is simpler. + // - Have `NsanGetShadowPtrForLoad` return nullptr. Because `select` does + // not short-circuit, dereferencing the returned pointer is no longer an + // option, have to split and create a separate basic block. This has the + // advantage of being easier to debug because it crashes if we ever mess + // up. + + const auto Extents = getMemoryExtentsOrDie(VT); + Value *ShadowPtr = Builder.CreateCall( + NsanGetShadowPtrForLoad[Extents.ValueType], + {Load.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)}); + ++NumInstrumentedFTLoads; + + // Split the basic block. + BasicBlock *LoadBB = Load.getParent(); + BasicBlock *NextBB = LoadBB->splitBasicBlock(Builder.GetInsertPoint()); + // Create the two options for creating the shadow value. + BasicBlock *ShadowLoadBB = + BasicBlock::Create(*Context, "", LoadBB->getParent(), NextBB); + BasicBlock *FExtBB = + BasicBlock::Create(*Context, "", LoadBB->getParent(), NextBB); + + // Replace the newly created terminator unconditional branch by a conditional + // branch to one of the options. + { + LoadBB->back().eraseFromParent(); + IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated. + LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc()); + LoadBBBuilder.CreateCondBr(LoadBBBuilder.CreateIsNull(ShadowPtr), FExtBB, + ShadowLoadBB); + } + + // Fill in ShadowLoadBB. + IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB); + ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc()); + Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad( + ExtendedVT, ShadowPtr, Align(1), Load.isVolatile()); + if (ClCheckLoads) { + ShadowLoad = emitCheck(&Load, ShadowLoad, ShadowLoadBBBuilder, + CheckLoc::makeLoad(Load.getPointerOperand())); + } + ShadowLoadBBBuilder.CreateBr(NextBB); + + // Fill in FExtBB. + IRBuilder<> FExtBBBuilder(FExtBB); + FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc()); + Value *const FExt = + FExtBBBuilder.CreateCast(Instruction::FPExt, &Load, ExtendedVT); + FExtBBBuilder.CreateBr(NextBB); + + // The shadow value come from any of the options. + IRBuilder<> NextBBBuilder(&*NextBB->begin()); + NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc()); + PHINode *ShadowPhi = NextBBBuilder.CreatePHI(ExtendedVT, 2); + ShadowPhi->addIncoming(ShadowLoad, ShadowLoadBB); + ShadowPhi->addIncoming(FExt, FExtBB); + return ShadowPhi; +} + +Value *NumericalStabilitySanitizer::handleTrunc(FPTruncInst &Trunc, Type *VT, + Type *ExtendedVT, + const ValueToShadowMap &Map) { + Value *const OrigSource = Trunc.getOperand(0); + Type *const OrigSourceTy = OrigSource->getType(); + Type *const ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy); + + // When truncating: + // - (A) If the source has a shadow, we truncate from the shadow, else we + // truncate from the original source. + // - (B) If the shadow of the source is larger than the shadow of the dest, + // we still need a truncate. Else, the shadow of the source is the same + // type as the shadow of the dest (because mappings are non-decreasing), so + // we don't need to emit a truncate. + // Examples, + // with a mapping of {f32->f64;f64->f80;f80->f128} + // fptrunc double %1 to float -> fptrunc x86_fp80 s(%1) to double + // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double + // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double + // fptrunc x86_fp80 %1 to double -> x86_fp80 s(%1) + // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to x86_fp80 + // fptrunc fp128 %1 to x86_fp80 -> fp128 %1 + // with a mapping of {f32->f64;f64->f128;f80->f128} + // fptrunc double %1 to float -> fptrunc fp128 s(%1) to double + // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double + // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double + // fptrunc x86_fp80 %1 to double -> fp128 %1 + // fptrunc fp128 %1 to double -> fp128 %1 + // fptrunc fp128 %1 to x86_fp80 -> fp128 %1 + // with a mapping of {f32->f32;f64->f32;f80->f64} + // fptrunc double %1 to float -> float s(%1) + // fptrunc x86_fp80 %1 to float -> fptrunc double s(%1) to float + // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to float + // fptrunc x86_fp80 %1 to double -> fptrunc double s(%1) to float + // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to float + // fptrunc fp128 %1 to x86_fp80 -> fptrunc fp128 %1 to double + + // See (A) above. + Value *const Source = + ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource; + Type *const SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy; + // See (B) above. + if (SourceTy == ExtendedVT) + return Source; + + Instruction *const Shadow = + CastInst::Create(Instruction::FPTrunc, Source, ExtendedVT); + Shadow->insertAfter(&Trunc); + return Shadow; +} + +Value *NumericalStabilitySanitizer::handleExt(FPExtInst &Ext, Type *VT, + Type *ExtendedVT, + const ValueToShadowMap &Map) { + Value *const OrigSource = Ext.getOperand(0); + Type *const OrigSourceTy = OrigSource->getType(); + Type *const ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy); + // When extending: + // - (A) If the source has a shadow, we extend from the shadow, else we + // extend from the original source. + // - (B) If the shadow of the dest is larger than the shadow of the source, + // we still need an extend. Else, the shadow of the source is the same + // type as the shadow of the dest (because mappings are non-decreasing), so + // we don't need to emit an extend. + // Examples, + // with a mapping of {f32->f64;f64->f80;f80->f128} + // fpext half %1 to float -> fpext half %1 to double + // fpext half %1 to double -> fpext half %1 to x86_fp80 + // fpext half %1 to x86_fp80 -> fpext half %1 to fp128 + // fpext float %1 to double -> double s(%1) + // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128 + // fpext double %1 to x86_fp80 -> fpext x86_fp80 s(%1) to fp128 + // with a mapping of {f32->f64;f64->f128;f80->f128} + // fpext half %1 to float -> fpext half %1 to double + // fpext half %1 to double -> fpext half %1 to fp128 + // fpext half %1 to x86_fp80 -> fpext half %1 to fp128 + // fpext float %1 to double -> fpext double s(%1) to fp128 + // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128 + // fpext double %1 to x86_fp80 -> fp128 s(%1) + // with a mapping of {f32->f32;f64->f32;f80->f64} + // fpext half %1 to float -> fpext half %1 to float + // fpext half %1 to double -> fpext half %1 to float + // fpext half %1 to x86_fp80 -> fpext half %1 to double + // fpext float %1 to double -> s(%1) + // fpext float %1 to x86_fp80 -> fpext float s(%1) to double + // fpext double %1 to x86_fp80 -> fpext float s(%1) to double + + // See (A) above. + Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource; + Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy; + // See (B) above. + if (SourceTy == ExtendedVT) + return Source; + + Instruction *Shadow = + CastInst::Create(Instruction::FPExt, Source, ExtendedVT); + Shadow->insertAfter(&Ext); + return Shadow; +} + +namespace { + +// FIXME: This should be tablegen-ed. + +struct KnownIntrinsic { + struct WidenedIntrinsic { + const char *NarrowName; + Intrinsic::ID ID; // wide id. + using FnTypeFactory = FunctionType *(*)(LLVMContext &); + FnTypeFactory MakeFnTy; + }; + + static const char *get(LibFunc LFunc); + + // Given an intrinsic with an `FT` argument, try to find a wider intrinsic + // that applies the same operation on the shadow argument. + // Options are: + // - pass in the ID and full function type, + // - pass in the name, which includes the function type through mangling. + static const WidenedIntrinsic *widen(StringRef Name); + +private: + struct LFEntry { + LibFunc LFunc; + const char *IntrinsicName; + }; + static const LFEntry kLibfuncIntrinsics[]; + + static const WidenedIntrinsic kWidenedIntrinsics[]; +}; + +FunctionType *Make_Double_Double(LLVMContext &C) { + return FunctionType::get(Type::getDoubleTy(C), {Type::getDoubleTy(C)}, false); +} + +FunctionType *Make_X86FP80_X86FP80(LLVMContext &C) { + return FunctionType::get(Type::getX86_FP80Ty(C), {Type::getX86_FP80Ty(C)}, + false); +} + +FunctionType *Make_Double_DoubleI32(LLVMContext &C) { + return FunctionType::get(Type::getDoubleTy(C), + {Type::getDoubleTy(C), Type::getInt32Ty(C)}, false); +} + +FunctionType *Make_X86FP80_X86FP80I32(LLVMContext &C) { + return FunctionType::get(Type::getX86_FP80Ty(C), + {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)}, + false); +} + +FunctionType *Make_Double_DoubleDouble(LLVMContext &C) { + return FunctionType::get(Type::getDoubleTy(C), + {Type::getDoubleTy(C), Type::getDoubleTy(C)}, false); +} + +FunctionType *Make_X86FP80_X86FP80X86FP80(LLVMContext &C) { + return FunctionType::get(Type::getX86_FP80Ty(C), + {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)}, + false); +} + +FunctionType *Make_Double_DoubleDoubleDouble(LLVMContext &C) { + return FunctionType::get( + Type::getDoubleTy(C), + {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)}, + false); +} + +FunctionType *Make_X86FP80_X86FP80X86FP80X86FP80(LLVMContext &C) { + return FunctionType::get( + Type::getX86_FP80Ty(C), + {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)}, + false); +} + +const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = { + // FIXME: Right now we ignore vector intrinsics. + // This is hard because we have to model the semantics of the intrinsics, + // e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back. + // Intrinsics that take any non-vector FT types: + // NOTE: Right now because of https://bugs.llvm.org/show_bug.cgi?id=45399 + // for f128 we need to use Make_X86FP80_X86FP80 (go to a lower precision and + // come back). + {"llvm.sqrt.f32", Intrinsic::sqrt, Make_Double_Double}, + {"llvm.sqrt.f64", Intrinsic::sqrt, Make_X86FP80_X86FP80}, + {"llvm.sqrt.f80", Intrinsic::sqrt, Make_X86FP80_X86FP80}, + {"llvm.powi.f32", Intrinsic::powi, Make_Double_DoubleI32}, + {"llvm.powi.f64", Intrinsic::powi, Make_X86FP80_X86FP80I32}, + {"llvm.powi.f80", Intrinsic::powi, Make_X86FP80_X86FP80I32}, + {"llvm.sin.f32", Intrinsic::sin, Make_Double_Double}, + {"llvm.sin.f64", Intrinsic::sin, Make_X86FP80_X86FP80}, + {"llvm.sin.f80", Intrinsic::sin, Make_X86FP80_X86FP80}, + {"llvm.cos.f32", Intrinsic::cos, Make_Double_Double}, + {"llvm.cos.f64", Intrinsic::cos, Make_X86FP80_X86FP80}, + {"llvm.cos.f80", Intrinsic::cos, Make_X86FP80_X86FP80}, + {"llvm.pow.f32", Intrinsic::pow, Make_Double_DoubleDouble}, + {"llvm.pow.f64", Intrinsic::pow, Make_X86FP80_X86FP80X86FP80}, + {"llvm.pow.f80", Intrinsic::pow, Make_X86FP80_X86FP80X86FP80}, + {"llvm.exp.f32", Intrinsic::exp, Make_Double_Double}, + {"llvm.exp.f64", Intrinsic::exp, Make_X86FP80_X86FP80}, + {"llvm.exp.f80", Intrinsic::exp, Make_X86FP80_X86FP80}, + {"llvm.exp2.f32", Intrinsic::exp2, Make_Double_Double}, + {"llvm.exp2.f64", Intrinsic::exp2, Make_X86FP80_X86FP80}, + {"llvm.exp2.f80", Intrinsic::exp2, Make_X86FP80_X86FP80}, + {"llvm.log.f32", Intrinsic::log, Make_Double_Double}, + {"llvm.log.f64", Intrinsic::log, Make_X86FP80_X86FP80}, + {"llvm.log.f80", Intrinsic::log, Make_X86FP80_X86FP80}, + {"llvm.log10.f32", Intrinsic::log10, Make_Double_Double}, + {"llvm.log10.f64", Intrinsic::log10, Make_X86FP80_X86FP80}, + {"llvm.log10.f80", Intrinsic::log10, Make_X86FP80_X86FP80}, + {"llvm.log2.f32", Intrinsic::log2, Make_Double_Double}, + {"llvm.log2.f64", Intrinsic::log2, Make_X86FP80_X86FP80}, + {"llvm.log2.f80", Intrinsic::log2, Make_X86FP80_X86FP80}, + {"llvm.fma.f32", Intrinsic::fma, Make_Double_DoubleDoubleDouble}, + + {"llvm.fmuladd.f32", Intrinsic::fmuladd, Make_Double_DoubleDoubleDouble}, + + {"llvm.fma.f64", Intrinsic::fma, Make_X86FP80_X86FP80X86FP80X86FP80}, + + {"llvm.fmuladd.f64", Intrinsic::fma, Make_X86FP80_X86FP80X86FP80X86FP80}, + + {"llvm.fma.f80", Intrinsic::fma, Make_X86FP80_X86FP80X86FP80X86FP80}, + {"llvm.fabs.f32", Intrinsic::fabs, Make_Double_Double}, + {"llvm.fabs.f64", Intrinsic::fabs, Make_X86FP80_X86FP80}, + {"llvm.fabs.f80", Intrinsic::fabs, Make_X86FP80_X86FP80}, + {"llvm.minnum.f32", Intrinsic::minnum, Make_Double_DoubleDouble}, + {"llvm.minnum.f64", Intrinsic::minnum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.minnum.f80", Intrinsic::minnum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.maxnum.f32", Intrinsic::maxnum, Make_Double_DoubleDouble}, + {"llvm.maxnum.f64", Intrinsic::maxnum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.maxnum.f80", Intrinsic::maxnum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.minimum.f32", Intrinsic::minimum, Make_Double_DoubleDouble}, + {"llvm.minimum.f64", Intrinsic::minimum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.minimum.f80", Intrinsic::minimum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.maximum.f32", Intrinsic::maximum, Make_Double_DoubleDouble}, + {"llvm.maximum.f64", Intrinsic::maximum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.maximum.f80", Intrinsic::maximum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.copysign.f32", Intrinsic::copysign, Make_Double_DoubleDouble}, + {"llvm.copysign.f64", Intrinsic::copysign, Make_X86FP80_X86FP80X86FP80}, + {"llvm.copysign.f80", Intrinsic::copysign, Make_X86FP80_X86FP80X86FP80}, + {"llvm.floor.f32", Intrinsic::floor, Make_Double_Double}, + {"llvm.floor.f64", Intrinsic::floor, Make_X86FP80_X86FP80}, + {"llvm.floor.f80", Intrinsic::floor, Make_X86FP80_X86FP80}, + {"llvm.ceil.f32", Intrinsic::ceil, Make_Double_Double}, + {"llvm.ceil.f64", Intrinsic::ceil, Make_X86FP80_X86FP80}, + {"llvm.ceil.f80", Intrinsic::ceil, Make_X86FP80_X86FP80}, + {"llvm.trunc.f32", Intrinsic::trunc, Make_Double_Double}, + {"llvm.trunc.f64", Intrinsic::trunc, Make_X86FP80_X86FP80}, + {"llvm.trunc.f80", Intrinsic::trunc, Make_X86FP80_X86FP80}, + {"llvm.rint.f32", Intrinsic::rint, Make_Double_Double}, + {"llvm.rint.f64", Intrinsic::rint, Make_X86FP80_X86FP80}, + {"llvm.rint.f80", Intrinsic::rint, Make_X86FP80_X86FP80}, + {"llvm.nearbyint.f32", Intrinsic::nearbyint, Make_Double_Double}, + {"llvm.nearbyint.f64", Intrinsic::nearbyint, Make_X86FP80_X86FP80}, + {"llvm.nearbyin80f64", Intrinsic::nearbyint, Make_X86FP80_X86FP80}, + {"llvm.round.f32", Intrinsic::round, Make_Double_Double}, + {"llvm.round.f64", Intrinsic::round, Make_X86FP80_X86FP80}, + {"llvm.round.f80", Intrinsic::round, Make_X86FP80_X86FP80}, + {"llvm.lround.f32", Intrinsic::lround, Make_Double_Double}, + {"llvm.lround.f64", Intrinsic::lround, Make_X86FP80_X86FP80}, + {"llvm.lround.f80", Intrinsic::lround, Make_X86FP80_X86FP80}, + {"llvm.llround.f32", Intrinsic::llround, Make_Double_Double}, + {"llvm.llround.f64", Intrinsic::llround, Make_X86FP80_X86FP80}, + {"llvm.llround.f80", Intrinsic::llround, Make_X86FP80_X86FP80}, + {"llvm.lrint.f32", Intrinsic::lrint, Make_Double_Double}, + {"llvm.lrint.f64", Intrinsic::lrint, Make_X86FP80_X86FP80}, + {"llvm.lrint.f80", Intrinsic::lrint, Make_X86FP80_X86FP80}, + {"llvm.llrint.f32", Intrinsic::llrint, Make_Double_Double}, + {"llvm.llrint.f64", Intrinsic::llrint, Make_X86FP80_X86FP80}, + {"llvm.llrint.f80", Intrinsic::llrint, Make_X86FP80_X86FP80}, +}; + +const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = { + {LibFunc_sqrtf, "llvm.sqrt.f32"}, // + {LibFunc_sqrt, "llvm.sqrt.f64"}, // + {LibFunc_sqrtl, "llvm.sqrt.f80"}, // + {LibFunc_sinf, "llvm.sin.f32"}, // + {LibFunc_sin, "llvm.sin.f64"}, // + {LibFunc_sinl, "llvm.sin.f80"}, // + {LibFunc_cosf, "llvm.cos.f32"}, // + {LibFunc_cos, "llvm.cos.f64"}, // + {LibFunc_cosl, "llvm.cos.f80"}, // + {LibFunc_powf, "llvm.pow.f32"}, // + {LibFunc_pow, "llvm.pow.f64"}, // + {LibFunc_powl, "llvm.pow.f80"}, // + {LibFunc_expf, "llvm.exp.f32"}, // + {LibFunc_exp, "llvm.exp.f64"}, // + {LibFunc_expl, "llvm.exp.f80"}, // + {LibFunc_exp2f, "llvm.exp2.f32"}, // + {LibFunc_exp2, "llvm.exp2.f64"}, // + {LibFunc_exp2l, "llvm.exp2.f80"}, // + {LibFunc_logf, "llvm.log.f32"}, // + {LibFunc_log, "llvm.log.f64"}, // + {LibFunc_logl, "llvm.log.f80"}, // + {LibFunc_log10f, "llvm.log10.f32"}, // + {LibFunc_log10, "llvm.log10.f64"}, // + {LibFunc_log10l, "llvm.log10.f80"}, // + {LibFunc_log2f, "llvm.log2.f32"}, // + {LibFunc_log2, "llvm.log2.f64"}, // + {LibFunc_log2l, "llvm.log2.f80"}, // + {LibFunc_fabsf, "llvm.fabs.f32"}, // + {LibFunc_fabs, "llvm.fabs.f64"}, // + {LibFunc_fabsl, "llvm.fabs.f80"}, // + {LibFunc_copysignf, "llvm.copysign.f32"}, // + {LibFunc_copysign, "llvm.copysign.f64"}, // + {LibFunc_copysignl, "llvm.copysign.f80"}, // + {LibFunc_floorf, "llvm.floor.f32"}, // + {LibFunc_floor, "llvm.floor.f64"}, // + {LibFunc_floorl, "llvm.floor.f80"}, // + {LibFunc_fmaxf, "llvm.maxnum.f32"}, // + {LibFunc_fmax, "llvm.maxnum.f64"}, // + {LibFunc_fmaxl, "llvm.maxnum.f80"}, // + {LibFunc_fminf, "llvm.minnum.f32"}, // + {LibFunc_fmin, "llvm.minnum.f64"}, // + {LibFunc_fminl, "llvm.minnum.f80"}, // + {LibFunc_ceilf, "llvm.ceil.f32"}, // + {LibFunc_ceil, "llvm.ceil.f64"}, // + {LibFunc_ceill, "llvm.ceil.f80"}, // + {LibFunc_truncf, "llvm.trunc.f32"}, // + {LibFunc_trunc, "llvm.trunc.f64"}, // + {LibFunc_truncl, "llvm.trunc.f80"}, // + {LibFunc_rintf, "llvm.rint.f32"}, // + {LibFunc_rint, "llvm.rint.f64"}, // + {LibFunc_rintl, "llvm.rint.f80"}, // + {LibFunc_nearbyintf, "llvm.nearbyint.f32"}, // + {LibFunc_nearbyint, "llvm.nearbyint.f64"}, // + {LibFunc_nearbyintl, "llvm.nearbyint.f80"}, // + {LibFunc_roundf, "llvm.round.f32"}, // + {LibFunc_round, "llvm.round.f64"}, // + {LibFunc_roundl, "llvm.round.f80"}, // +}; + +const char *KnownIntrinsic::get(LibFunc LFunc) { + for (const auto &E : kLibfuncIntrinsics) { + if (E.LFunc == LFunc) + return E.IntrinsicName; + } + return nullptr; +} + +const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) { + for (const auto &E : kWidenedIntrinsics) { + if (E.NarrowName == Name) + return &E; + } + return nullptr; +} + +} // namespace + +// Returns the name of the LLVM intrinsic corresponding to the given function. +static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT, + const TargetLibraryInfo &TLI) { + LibFunc LFunc; + if (!TLI.getLibFunc(Fn, LFunc)) + return nullptr; + + if (const char *Name = KnownIntrinsic::get(LFunc)) + return Name; + + errs() << "FIXME: LibFunc: " << TLI.getName(LFunc) << "\n"; + return nullptr; +} + +// Try to handle a known function call. +Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase( + CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map, IRBuilder<> &Builder) { + Function *const Fn = Call.getCalledFunction(); + if (Fn == nullptr) + return nullptr; + + Intrinsic::ID WidenedId = Intrinsic::ID(); + FunctionType *WidenedFnTy = nullptr; + if (const auto ID = Fn->getIntrinsicID()) { + const auto *const Widened = KnownIntrinsic::widen(Fn->getName()); + if (Widened) { + WidenedId = Widened->ID; + WidenedFnTy = Widened->MakeFnTy(*Context); + } else { + // If we don't know how to widen the intrinsic, we have no choice but to + // call the non-wide version on a truncated shadow and extend again + // afterwards. + WidenedId = ID; + WidenedFnTy = Fn->getFunctionType(); + } + } else if (const char *Name = getIntrinsicFromLibfunc(*Fn, VT, TLI)) { + // We might have a call to a library function that we can replace with a + // wider Intrinsic. + const auto *Widened = KnownIntrinsic::widen(Name); + assert(Widened && "make sure KnownIntrinsic entries are consistent"); + WidenedId = Widened->ID; + WidenedFnTy = Widened->MakeFnTy(*Context); + } else { + // This is not a known library function or intrinsic. + return nullptr; + } + + // Check that the widened intrinsic is valid. + SmallVector Table; + getIntrinsicInfoTableEntries(WidenedId, Table); + SmallVector ArgTys; + ArrayRef TableRef = Table; + const Intrinsic::MatchIntrinsicTypesResult Res = + Intrinsic::matchIntrinsicSignature(WidenedFnTy, TableRef, ArgTys); + assert(Res == Intrinsic::MatchIntrinsicTypes_Match && + "invalid widened intrinsic"); + (void)Res; + + // For known intrinsic functions, we create a second call to the same + // intrinsic with a different type. + SmallVector Args; + // The last operand is the intrinsic itself, skip it. + for (unsigned I = 0, E = Call.getNumOperands() - 1; I < E; ++I) { + Value *Arg = Call.getOperand(I); + Type *const OrigArgTy = Arg->getType(); + Type *const IntrinsicArgTy = WidenedFnTy->getParamType(I); + if (OrigArgTy == IntrinsicArgTy) { + Args.push_back(Arg); // The arg is passed as is. + continue; + } + Type *const ShadowArgTy = Config.getExtendedFPType(Arg->getType()); + assert(ShadowArgTy && + "don't know how to get the shadow value for a non-FT"); + Value *Shadow = Map.getShadow(Arg); + if (ShadowArgTy == IntrinsicArgTy) { + // The shadow is the right type for the intrinsic. + assert(Shadow->getType() == ShadowArgTy); + Args.push_back(Shadow); + continue; + } + // There is no intrinsic with his level of precision, truncate the shadow. + Args.push_back( + Builder.CreateCast(Instruction::FPTrunc, Shadow, IntrinsicArgTy)); + } + Value *IntrinsicCall = Builder.CreateIntrinsic(WidenedId, ArgTys, Args); + return WidenedFnTy->getReturnType() == ExtendedVT + ? IntrinsicCall + : Builder.CreateCast(Instruction::FPExt, IntrinsicCall, + ExtendedVT); +} + +// Handle a CallBase, i.e. a function call, an inline asm sequence, or an +// invoke. +Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT, + Type *ExtendedVT, + const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map, + IRBuilder<> &Builder) { + // We cannot look inside inline asm, just expand the result again. + if (Call.isInlineAsm()) { + return Builder.CreateFPExt(&Call, ExtendedVT); + } + + // Intrinsics and library functions (e.g. sin, exp) are handled + // specifically, because we know their semantics and can do better than + // blindly calling them (e.g. compute the sinus in the actual shadow domain). + if (Value *V = + maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder)) + return V; + + // If the return tag matches that of the called function, read the extended + // return value from the shadow ret ptr. Else, just extend the return value. + Value *L = + Builder.CreateLoad(IntptrTy, NsanShadowRetTag, /*isVolatile=*/false); + Value *HasShadowRet = Builder.CreateICmpEQ( + L, Builder.CreatePtrToInt(Call.getCalledOperand(), IntptrTy)); + + Value *ShadowRetVal = Builder.CreateLoad( + ExtendedVT, + Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0), + /*isVolatile=*/false); + Value *Shadow = Builder.CreateSelect(HasShadowRet, ShadowRetVal, + Builder.CreateFPExt(&Call, ExtendedVT)); + ++NumInstrumentedFTCalls; + return Shadow; + + // NsanShadowRetTag; or it is and it will always do so. +} + +// Creates a shadow value for the given FT value. At that point all operands are +// guaranteed to be available. +Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable( + Instruction &Inst, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map) { + Type *const VT = Inst.getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT"); + + if (LoadInst *Load = dyn_cast(&Inst)) { + return handleLoad(*Load, VT, ExtendedVT); + } + if (CallInst *Call = dyn_cast(&Inst)) { + // Insert after the call. + BasicBlock::iterator It(Inst); + IRBuilder<> Builder(Call->getParent(), ++It); + Builder.SetCurrentDebugLocation(Call->getDebugLoc()); + return handleCallBase(*Call, VT, ExtendedVT, TLI, Map, Builder); + } + if (InvokeInst *Invoke = dyn_cast(&Inst)) { + // The Invoke terminates the basic block, create a new basic block in + // between the successful invoke and the next block. + BasicBlock *InvokeBB = Invoke->getParent(); + BasicBlock *NextBB = Invoke->getNormalDest(); + BasicBlock *NewBB = + BasicBlock::Create(*Context, "", NextBB->getParent(), NextBB); + Inst.replaceSuccessorWith(NextBB, NewBB); + + IRBuilder<> Builder(NewBB); + Builder.SetCurrentDebugLocation(Invoke->getDebugLoc()); + Value *Shadow = handleCallBase(*Invoke, VT, ExtendedVT, TLI, Map, Builder); + Builder.CreateBr(NextBB); + NewBB->replaceSuccessorsPhiUsesWith(InvokeBB, NewBB); + return Shadow; + } + if (BinaryOperator *BinOp = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*BinOp)); + Builder.SetCurrentDebugLocation(BinOp->getDebugLoc()); + return Builder.CreateBinOp(BinOp->getOpcode(), + Map.getShadow(BinOp->getOperand(0)), + Map.getShadow(BinOp->getOperand(1))); + } + if (UnaryOperator *UnaryOp = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*UnaryOp)); + Builder.SetCurrentDebugLocation(UnaryOp->getDebugLoc()); + return Builder.CreateUnOp(UnaryOp->getOpcode(), + Map.getShadow(UnaryOp->getOperand(0))); + } + if (FPTruncInst *Trunc = dyn_cast(&Inst)) { + return handleTrunc(*Trunc, VT, ExtendedVT, Map); + } + if (FPExtInst *Ext = dyn_cast(&Inst)) { + return handleExt(*Ext, VT, ExtendedVT, Map); + } + if (isa(&Inst) || isa(&Inst)) { + CastInst *Cast = dyn_cast(&Inst); + IRBuilder<> Builder(getNextInstructionOrDie(*Cast)); + Builder.SetCurrentDebugLocation(Cast->getDebugLoc()); + return Builder.CreateCast(Cast->getOpcode(), Cast->getOperand(0), + ExtendedVT); + } + + if (SelectInst *S = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*S)); + Builder.SetCurrentDebugLocation(S->getDebugLoc()); + return Builder.CreateSelect(S->getCondition(), + Map.getShadow(S->getTrueValue()), + Map.getShadow(S->getFalseValue())); + } + + if (ExtractElementInst *Extract = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*Extract)); + Builder.SetCurrentDebugLocation(Extract->getDebugLoc()); + return Builder.CreateExtractElement( + Map.getShadow(Extract->getVectorOperand()), Extract->getIndexOperand()); + } + + if (InsertElementInst *Insert = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*Insert)); + Builder.SetCurrentDebugLocation(Insert->getDebugLoc()); + return Builder.CreateInsertElement(Map.getShadow(Insert->getOperand(0)), + Map.getShadow(Insert->getOperand(1)), + Insert->getOperand(2)); + } + + if (ShuffleVectorInst *Shuffle = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*Shuffle)); + Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc()); + return Builder.CreateShuffleVector(Map.getShadow(Shuffle->getOperand(0)), + Map.getShadow(Shuffle->getOperand(1)), + Shuffle->getShuffleMask()); + } + + if (ExtractValueInst *Extract = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*Extract)); + Builder.SetCurrentDebugLocation(Extract->getDebugLoc()); + // FIXME: We could make aggregate object first class citizens. For now we + // just extend the extracted value. + return Builder.CreateFPExt(Extract, ExtendedVT); + } + + if (BitCastInst *BC = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*BC)); + Builder.SetCurrentDebugLocation(BC->getDebugLoc()); + return Builder.CreateCast(Instruction::FPExt, BC, ExtendedVT); + } + + errs() << "FIXME: implement " << Inst.getOpcodeName() << "\n"; + llvm_unreachable("not implemented"); +} + +// Creates a shadow value for an instruction that defines a value of FT type. +// FT operands that do not already have shadow values are created recursively. +// The DFS is guaranteed to not loop as phis and arguments already have +// shadows. +void NumericalStabilitySanitizer::maybeCreateShadowValue( + Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) { + Type *const VT = Root.getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return; // Not an FT value. + + if (Map.hasShadow(&Root)) + return; // Shadow already exists. + + assert(!isa(Root) && "phi nodes should already have shadows"); + + std::vector DfsStack(1, &Root); + while (!DfsStack.empty()) { + // Ensure that all operands to the instruction have shadows before + // proceeding. + Instruction *I = DfsStack.back(); + // The shadow for the instruction might have been created deeper in the DFS, + // see `forward_use_with_two_uses` test. + if (Map.hasShadow(I)) { + DfsStack.pop_back(); + continue; + } + + bool MissingShadow = false; + for (Value *Op : I->operands()) { + Type *const VT = Op->getType(); + if (!Config.getExtendedFPType(VT)) + continue; // Not an FT value. + if (Map.hasShadow(Op)) + continue; // Shadow is already available. + MissingShadow = true; + DfsStack.push_back(cast(Op)); + } + if (MissingShadow) + continue; // Process operands and come back to this instruction later. + + // All operands have shadows. Create a shadow for the current value. + Value *Shadow = createShadowValueWithOperandsAvailable(*I, TLI, Map); + Map.setShadow(I, Shadow); + DfsStack.pop_back(); + } +} + +// A floating-point store needs its value and type written to shadow memory. +void NumericalStabilitySanitizer::propagateFTStore( + StoreInst &Store, Type *const VT, Type *const ExtendedVT, + const ValueToShadowMap &Map) { + Value *StoredValue = Store.getValueOperand(); + IRBuilder<> Builder(&Store); + Builder.SetCurrentDebugLocation(Store.getDebugLoc()); + const auto Extents = getMemoryExtentsOrDie(VT); + Value *ShadowPtr = Builder.CreateCall( + NsanGetShadowPtrForStore[Extents.ValueType], + {Store.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)}); + + Value *StoredShadow = Map.getShadow(StoredValue); + if (!Store.getParent()->getParent()->hasOptNone()) { + // Only check stores when optimizing, because non-optimized code generates + // too many stores to the stack, creating false positives. + if (ClCheckStores) { + StoredShadow = emitCheck(StoredValue, StoredShadow, Builder, + CheckLoc::makeStore(Store.getPointerOperand())); + ++NumInstrumentedFTStores; + } + } + + Builder.CreateAlignedStore(StoredShadow, ShadowPtr, Align(1), + Store.isVolatile()); +} + +// A non-ft store needs to invalidate shadow memory. Exceptions are: +// - memory transfers of floating-point data through other pointer types (llvm +// optimization passes transform `*(float*)a = *(float*)b` into +// `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy. +// - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted +// ints. Note that this is not really necessary because if the value is +// unknown the framework will re-extend it on load anyway. It just felt +// easier to debug tests with vectors of FTs. +void NumericalStabilitySanitizer::propagateNonFTStore( + StoreInst &Store, Type *const VT, const ValueToShadowMap &Map) { + Value *PtrOp = Store.getPointerOperand(); + IRBuilder<> Builder(getNextInstructionOrDie(Store)); + Builder.SetCurrentDebugLocation(Store.getDebugLoc()); + Value *Dst = PtrOp; + const DataLayout &DL = + Store.getParent()->getParent()->getParent()->getDataLayout(); + TypeSize SlotSize = DL.getTypeStoreSize(VT); + assert(!SlotSize.isScalable() && "unsupported"); + const auto LoadSizeBytes = SlotSize.getFixedValue(); + Value *ValueSize = Builder.Insert(Constant::getIntegerValue( + IntptrTy, APInt(IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes))); + + ++NumInstrumentedNonFTStores; + Value *StoredValue = Store.getValueOperand(); + if (LoadInst *Load = dyn_cast(StoredValue)) { + // FIXME: Handle the case when the value is from a phi. + // This is a memory transfer with memcpy semantics. Copy the type and + // value from the source. Note that we cannot use __nsan_copy_values() + // here, because that will not work when there is a write to memory in + // between the load and the store, e.g. in the case of a swap. + Type *ShadowTypeIntTy = Type::getIntNTy(*Context, 8 * LoadSizeBytes); + Type *ShadowValueIntTy = + Type::getIntNTy(*Context, 8 * kShadowScale * LoadSizeBytes); + IRBuilder<> LoadBuilder(getNextInstructionOrDie(*Load)); + Builder.SetCurrentDebugLocation(Store.getDebugLoc()); + Value *LoadSrc = Load->getPointerOperand(); + // Read the shadow type and value at load time. The type has the same size + // as the FT value, the value has twice its size. + // FIXME: cache them to avoid re-creating them when a load is used by + // several stores. Maybe create them like the FT shadows when a load is + // encountered. + Value *RawShadowType = LoadBuilder.CreateAlignedLoad( + ShadowTypeIntTy, + LoadBuilder.CreateCall(NsanGetRawShadowTypePtr, {LoadSrc}), Align(1), + /*isVolatile=*/false); + Value *RawShadowValue = LoadBuilder.CreateAlignedLoad( + ShadowValueIntTy, + LoadBuilder.CreateCall(NsanGetRawShadowPtr, {LoadSrc}), Align(1), + /*isVolatile=*/false); + + // Write back the shadow type and value at store time. + Builder.CreateAlignedStore( + RawShadowType, Builder.CreateCall(NsanGetRawShadowTypePtr, {Dst}), + Align(1), + /*isVolatile=*/false); + Builder.CreateAlignedStore(RawShadowValue, + Builder.CreateCall(NsanGetRawShadowPtr, {Dst}), + Align(1), + /*isVolatile=*/false); + + ++NumInstrumentedNonFTMemcpyStores; + return; + } + if (Constant * C; ClPropagateNonFTConstStoresAsFT /* off by default */ && + (C = dyn_cast(StoredValue))) { + // This might be a fp constant stored as an int. Bitcast and store if it has + // appropriate size. + Type *BitcastTy = nullptr; // The FT type to bitcast to. + if (ConstantInt *CInt = dyn_cast(C)) { + switch (CInt->getType()->getScalarSizeInBits()) { + case 32: + BitcastTy = Type::getFloatTy(*Context); + break; + case 64: + BitcastTy = Type::getDoubleTy(*Context); + break; + case 80: + BitcastTy = Type::getX86_FP80Ty(*Context); + break; + default: + break; + } + } else if (ConstantDataVector *CDV = dyn_cast(C)) { + const int NumElements = + cast(CDV->getType())->getElementCount().getFixedValue(); + switch (CDV->getType()->getScalarSizeInBits()) { + case 32: + BitcastTy = + VectorType::get(Type::getFloatTy(*Context), NumElements, false); + break; + case 64: + BitcastTy = + VectorType::get(Type::getDoubleTy(*Context), NumElements, false); + break; + case 80: + BitcastTy = + VectorType::get(Type::getX86_FP80Ty(*Context), NumElements, false); + break; + default: + break; + } + } + if (BitcastTy) { + const MemoryExtents Extents = getMemoryExtentsOrDie(BitcastTy); + Value *ShadowPtr = Builder.CreateCall( + NsanGetShadowPtrForStore[Extents.ValueType], + {PtrOp, ConstantInt::get(IntptrTy, Extents.NumElts)}); + // Bitcast the integer value to the appropriate FT type and extend to 2FT. + Type *ExtVT = Config.getExtendedFPType(BitcastTy); + Value *Shadow = Builder.CreateCast( + Instruction::FPExt, Builder.CreateBitCast(C, BitcastTy), ExtVT); + Builder.CreateAlignedStore(Shadow, ShadowPtr, Align(1), + Store.isVolatile()); + return; + } + } + // All other stores just reset the shadow value to unknown. + Builder.CreateCall(NsanSetValueUnknown, {Dst, ValueSize}); +} + +void NumericalStabilitySanitizer::propagateShadowValues( + Instruction &Inst, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map) { + if (StoreInst *Store = dyn_cast(&Inst)) { + Value *StoredValue = Store->getValueOperand(); + Type *const VT = StoredValue->getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return propagateNonFTStore(*Store, VT, Map); + return propagateFTStore(*Store, VT, ExtendedVT, Map); + } + + if (FCmpInst *FCmp = dyn_cast(&Inst)) { + emitFCmpCheck(*FCmp, Map); + return; + } + + if (CallBase *CB = dyn_cast(&Inst)) { + maybeAddSuffixForNsanInterface(CB); + if (CallInst *CI = dyn_cast(&Inst)) + maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI); + if (MemIntrinsic *MI = dyn_cast(&Inst)) { + instrumentMemIntrinsic(MI); + return; + } + populateShadowStack(*CB, TLI, Map); + return; + } + + if (ReturnInst *RetInst = dyn_cast(&Inst)) { + if (!ClCheckRet) + return; + + Value *RV = RetInst->getReturnValue(); + if (RV == nullptr) + return; // This is a `ret void`. + Type *VT = RV->getType(); + Type *ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return; // Not an FT ret. + Value *RVShadow = Map.getShadow(RV); + IRBuilder<> Builder(&Inst); + Builder.SetCurrentDebugLocation(RetInst->getDebugLoc()); + + RVShadow = emitCheck(RV, RVShadow, Builder, CheckLoc::makeRet()); + ++NumInstrumentedFTRets; + // Store tag. + Value *FnAddr = + Builder.CreatePtrToInt(Inst.getParent()->getParent(), IntptrTy); + Builder.CreateStore(FnAddr, NsanShadowRetTag); + // Store value. + Value *ShadowRetValPtr = + Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0); + Builder.CreateStore(RVShadow, ShadowRetValPtr); + return; + } + + if (InsertValueInst *Insert = dyn_cast(&Inst)) { + Value *V = Insert->getOperand(1); + Type *const VT = V->getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return; + IRBuilder<> Builder(Insert); + Builder.SetCurrentDebugLocation(Insert->getDebugLoc()); + emitCheck(V, Map.getShadow(V), Builder, CheckLoc::makeInsert()); + return; + } +} + +// Moves fast math flags from the function to individual instructions, and +// removes the attribute from the function. +// FIXME: Make this controllable with a flag. +static void moveFastMathFlags(Function &F, + std::vector &Instructions) { + FastMathFlags FMF; +#define MOVE_FLAG(attr, setter) \ + if (F.getFnAttribute(attr).getValueAsString() == "true") { \ + F.removeFnAttr(attr); \ + FMF.set##setter(); \ + } + MOVE_FLAG("unsafe-fp-math", Fast) + MOVE_FLAG("no-infs-fp-math", NoInfs) + MOVE_FLAG("no-nans-fp-math", NoNaNs) + MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros) +#undef MOVE_FLAG + + for (Instruction *I : Instructions) + if (isa(I)) + I->setFastMathFlags(FMF); +} + +bool NumericalStabilitySanitizer::sanitizeFunction( + Function &F, const TargetLibraryInfo &TLI) { + if (!F.hasFnAttribute(Attribute::SanitizeNumericalStability)) + return false; + + // This is required to prevent instrumenting call to __nsan_init from within + // the module constructor. + if (F.getName() == kNsanModuleCtorName) + return false; + if (!Config.initialize(&F.getParent()->getContext())) + return false; + initialize(*F.getParent()); + SmallVector AllLoadsAndStores; + SmallVector LocalLoadsAndStores; + + // The instrumentation maintains: + // - for each IR value `v` of floating-point (or vector floating-point) type + // FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g. + // double for float and f128 for double). + // - A shadow memory, which stores `s(v)` for any `v` that has been stored, + // along with a shadow memory tag, which stores whether the value in the + // corresponding shadow memory is valid. Note that this might be + // incorrect if a non-instrumented function stores to memory, or if + // memory is stored to through a char pointer. + // - A shadow stack, which holds `s(v)` for any floating-point argument `v` + // of a call to an instrumented function. This allows + // instrumented functions to retrieve the shadow values for their + // arguments. + // Because instrumented functions can be called from non-instrumented + // functions, the stack needs to include a tag so that the instrumented + // function knows whether shadow values are available for their + // parameters (i.e. whether is was called by an instrumented function). + // When shadow arguments are not available, they have to be recreated by + // extending the precision of the non-shadow arguments to the non-shadow + // value. Non-instrumented functions do not modify (or even know about) the + // shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow + // stack tag is __nsan_shadow_args_tag. The tag is any unique identifier + // for the function (we use the address of the function). Both variables + // are thread local. + // Example: + // calls shadow stack tag shadow stack + // ======================================================================= + // non_instrumented_1() 0 0 + // | + // v + // instrumented_2(float a) 0 0 + // | + // v + // instrumented_3(float b, double c) &instrumented_3 s(b),s(c) + // | + // v + // instrumented_4(float d) &instrumented_4 s(d) + // | + // v + // non_instrumented_5(float e) &non_instrumented_5 s(e) + // | + // v + // instrumented_6(float f) &non_instrumented_5 s(e) + // + // On entry, instrumented_2 checks whether the tag corresponds to its + // function ptr. + // Note that functions reset the tag to 0 after reading shadow parameters. + // This ensures that the function does not erroneously read invalid data if + // called twice in the same stack, once from an instrumented function and + // once from an uninstrumented one. For example, in the following example, + // resetting the tag in (A) ensures that (B) does not reuse the same the + // shadow arguments (which would be incorrect). + // instrumented_1(float a) + // | + // v + // instrumented_2(float b) (A) + // | + // v + // non_instrumented_3() + // | + // v + // instrumented_2(float b) (B) + // + // - A shadow return slot. Any function that returns a floating-point value + // places a shadow return value in __nsan_shadow_ret_val. Again, because + // we might be calling non-instrumented functions, this value is guarded + // by __nsan_shadow_ret_tag marker indicating which instrumented function + // placed the value in __nsan_shadow_ret_val, so that the caller can check + // that this corresponds to the callee. Both variables are thread local. + // + // For example, in the following example, the instrumentation in + // `instrumented_1` rejects the shadow return value from `instrumented_3` + // because is is not tagged as expected (`&instrumented_3` instead of + // `non_instrumented_2`): + // + // instrumented_1() + // | + // v + // float non_instrumented_2() + // | + // v + // float instrumented_3() + // + // Calls of known math functions (sin, cos, exp, ...) are duplicated to call + // their overload on the shadow type. + + // Collect all instructions before processing, as creating shadow values + // creates new instructions inside the function. + std::vector OriginalInstructions; + for (auto &BB : F) { + for (auto &Inst : BB) { + OriginalInstructions.emplace_back(&Inst); + } + } + + moveFastMathFlags(F, OriginalInstructions); + ValueToShadowMap ValueToShadow(&Config); + + // In the first pass, we create shadow values for all FT function arguments + // and all phis. This ensures that the DFS of the next pass does not have + // any loops. + std::vector OriginalPhis; + createShadowArguments(F, TLI, ValueToShadow); + for (Instruction *I : OriginalInstructions) { + if (PHINode *Phi = dyn_cast(I)) { + if (PHINode *Shadow = maybeCreateShadowPhi(*Phi, TLI)) { + OriginalPhis.push_back(Phi); + ValueToShadow.setShadow(Phi, Shadow); + } + } + } + + // Create shadow values for all instructions creating FT values. + for (Instruction *I : OriginalInstructions) { + maybeCreateShadowValue(*I, TLI, ValueToShadow); + } + + // Propagate shadow values across stores, calls and rets. + for (Instruction *I : OriginalInstructions) { + propagateShadowValues(*I, TLI, ValueToShadow); + } + + // The last pass populates shadow phis with shadow values. + for (PHINode *Phi : OriginalPhis) { + PHINode *ShadowPhi = dyn_cast(ValueToShadow.getShadow(Phi)); + for (int I = 0, E = Phi->getNumOperands(); I < E; ++I) { + Value *V = Phi->getOperand(I); + Value *Shadow = ValueToShadow.getShadow(V); + BasicBlock *IncomingBB = Phi->getIncomingBlock(I); + // For some instructions (e.g. invoke), we create the shadow in a separate + // block, different from the block where the original value is created. + // In that case, the shadow phi might need to refer to this block instead + // of the original block. + // Note that this can only happen for instructions as constant shadows are + // always created in the same block. + ShadowPhi->addIncoming(Shadow, IncomingBB); + } + } + + return !ValueToShadow.empty(); +} + +// Instrument the memory intrinsics so that they properly modify the shadow +// memory. +bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { + IRBuilder<> Builder(MI); + if (MemSetInst *M = dyn_cast(MI)) { + Builder.SetCurrentDebugLocation(M->getDebugLoc()); + Builder.CreateCall( + NsanSetValueUnknown, + {// Address + M->getArgOperand(0), + // Size + Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); + } else if (MemTransferInst *M = dyn_cast(MI)) { + Builder.SetCurrentDebugLocation(M->getDebugLoc()); + Builder.CreateCall( + NsanCopyValues, + {// Destination + M->getArgOperand(0), + // Source + M->getArgOperand(1), + // Size + Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); + } + return false; +} + +void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) { + Function *Fn = CI->getCalledFunction(); + if (Fn == nullptr) + return; + + if (!Fn->getName().starts_with("__nsan_")) + return; + + if (Fn->getName() == "__nsan_dump_shadow_mem") { + assert(CI->arg_size() == 4 && + "invalid prototype for __nsan_dump_shadow_mem"); + // __nsan_dump_shadow_mem requires an extra parameter with the dynamic + // configuration: + // (shadow_type_id_for_long_double << 16) | (shadow_type_id_for_double << 8) + // | shadow_type_id_for_double + const uint64_t shadow_value_type_ids = + (static_cast(Config.byValueType(kLongDouble).getNsanTypeId()) + << 16) | + (static_cast(Config.byValueType(kDouble).getNsanTypeId()) + << 8) | + static_cast(Config.byValueType(kFloat).getNsanTypeId()); + CI->setArgOperand(3, ConstantInt::get(IntptrTy, shadow_value_type_ids)); + } +} diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 278111883459b3..20a5fa38cc9c56 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -954,6 +954,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::ShadowCallStack: case Attribute::SanitizeAddress: case Attribute::SanitizeMemory: + case Attribute::SanitizeNumericalStability: case Attribute::SanitizeThread: case Attribute::SanitizeHWAddress: case Attribute::SanitizeMemTag: diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 483024a250da02..4289bdfa36f442 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1558,7 +1558,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #51 + ; CHECK: call void @f.nobuiltin() #52 call fastcc noalias ptr @f.noalias() noinline ; CHECK: call fastcc noalias ptr @f.noalias() #12 @@ -1982,6 +1982,8 @@ declare void @f.nosanitize_bounds() nosanitize_bounds declare void @f.allockind() allockind("alloc,uninitialized") ; CHECK: declare void @f.allockind() #50 +declare void @f.sanitize_numericalstability() sanitize_numericalstability +; CHECK: declare void @f.sanitize_numericalstability() #51 ; CHECK: declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) @@ -2104,7 +2106,8 @@ define float @nofpclass_callsites(float %arg) { ; CHECK: attributes #48 = { allocsize(1,0) } ; CHECK: attributes #49 = { nosanitize_bounds } ; CHECK: attributes #50 = { allockind("alloc,uninitialized") } -; CHECK: attributes #51 = { builtin } +; CHECK: attributes #51 = { sanitize_numericalstability } +; CHECK: attributes #52 = { builtin } ;; Metadata diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll new file mode 100644 index 00000000000000..be268a4417e63f --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll @@ -0,0 +1,917 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dqq -nsan-truncate-fcmp-eq=false -S %s | FileCheck %s --check-prefixes=CHECK,DQQ +; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dlq -nsan-truncate-fcmp-eq=false -S %s | FileCheck %s --check-prefixes=CHECK,DLQ + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Tests with simple control flow. + +@float_const = private unnamed_addr constant float 0.5 +@x86_fp80_const = private unnamed_addr constant x86_fp80 0xK3FC9E69594BEC44DE000 +@double_const = private unnamed_addr constant double 0.5 + + +define float @return_param_float(float %a) sanitize_numericalstability { +; CHECK-LABEL: @return_param_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @return_param_float to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_float_d(float [[A]], double [[TMP4]], i32 1, i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[A]] to double +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], double [[TMP7]], double [[TMP4]] +; CHECK-NEXT: store i64 ptrtoint (ptr @return_param_float to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP8]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[A]] +; +entry: + ret float %a +} + +; Note that the shadow fadd should not have a `fast` flag. +define float @param_add_return_float(float %a) sanitize_numericalstability { +; CHECK-LABEL: @param_add_return_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_float to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[B:%.*]] = fadd fast float [[A]], 1.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP5]], i32 1, i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], double [[TMP8]], double [[TMP5]] +; CHECK-NEXT: store i64 ptrtoint (ptr @param_add_return_float to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[B]] +; +entry: + %b = fadd fast float %a, 1.0 + ret float %b +} + +define x86_fp80 @param_add_return_x86_fp80(x86_fp80 %a) sanitize_numericalstability { +; CHECK-LABEL: @param_add_return_x86_fp80( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_x86_fp80 to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext x86_fp80 [[A:%.*]] to fp128 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], fp128 [[TMP2]], fp128 [[TMP3]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[B:%.*]] = fadd x86_fp80 [[A]], 0xK3FC9E69594BEC44DE000 +; CHECK-NEXT: [[TMP5:%.*]] = fadd fp128 [[TMP4]], 0xLC0000000000000003FC9CD2B297D889B +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_longdouble_q(x86_fp80 [[B]], fp128 [[TMP5]], i32 1, i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = fpext x86_fp80 [[B]] to fp128 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], fp128 [[TMP8]], fp128 [[TMP5]] +; CHECK-NEXT: store i64 ptrtoint (ptr @param_add_return_x86_fp80 to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store fp128 [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 16 +; CHECK-NEXT: ret x86_fp80 [[B]] +; +entry: + %b = fadd x86_fp80 %a, 0xK3FC9E69594BEC44DE000 + ret x86_fp80 %b +} + +define double @param_add_return_double(double %a) sanitize_numericalstability { +; DQQ-LABEL: @param_add_return_double( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_double to i64) +; DQQ-NEXT: [[TMP2:%.*]] = load fp128, ptr @__nsan_shadow_args_ptr, align 1 +; DQQ-NEXT: [[TMP3:%.*]] = fpext double [[A:%.*]] to fp128 +; DQQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], fp128 [[TMP2]], fp128 [[TMP3]] +; DQQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[B:%.*]] = fadd double [[A]], 1.000000e+00 +; DQQ-NEXT: [[TMP5:%.*]] = fadd fp128 [[TMP4]], 0xL00000000000000003FFF000000000000 +; DQQ-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_double_q(double [[B]], fp128 [[TMP5]], i32 1, i64 0) +; DQQ-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; DQQ-NEXT: [[TMP8:%.*]] = fpext double [[B]] to fp128 +; DQQ-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], fp128 [[TMP8]], fp128 [[TMP5]] +; DQQ-NEXT: store i64 ptrtoint (ptr @param_add_return_double to i64), ptr @__nsan_shadow_ret_tag, align 8 +; DQQ-NEXT: store fp128 [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 16 +; DQQ-NEXT: ret double [[B]] +; +; DLQ-LABEL: @param_add_return_double( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_double to i64) +; DLQ-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr @__nsan_shadow_args_ptr, align 1 +; DLQ-NEXT: [[TMP3:%.*]] = fpext double [[A:%.*]] to x86_fp80 +; DLQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], x86_fp80 [[TMP2]], x86_fp80 [[TMP3]] +; DLQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[B:%.*]] = fadd double [[A]], 1.000000e+00 +; DLQ-NEXT: [[TMP5:%.*]] = fadd x86_fp80 [[TMP4]], 0xK3FFF8000000000000000 +; DLQ-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_double_l(double [[B]], x86_fp80 [[TMP5]], i32 1, i64 0) +; DLQ-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; DLQ-NEXT: [[TMP8:%.*]] = fpext double [[B]] to x86_fp80 +; DLQ-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], x86_fp80 [[TMP8]], x86_fp80 [[TMP5]] +; DLQ-NEXT: store i64 ptrtoint (ptr @param_add_return_double to i64), ptr @__nsan_shadow_ret_tag, align 8 +; DLQ-NEXT: store x86_fp80 [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 16 +; DLQ-NEXT: ret double [[B]] +; +entry: + %b = fadd double %a, 1.0 + ret double %b +} + +define <2 x float> @return_param_add_return_float_vector(<2 x float> %a) sanitize_numericalstability { +; CHECK-LABEL: @return_param_add_return_float_vector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @return_param_add_return_float_vector to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[A:%.*]] to <2 x double> +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[B:%.*]] = fadd <2 x float> [[A]], +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[B]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP6]], double [[TMP7]], i32 1, i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[B]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP9]], double [[TMP10]], i32 1, i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = fpext <2 x float> [[B]] to <2 x double> +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP13]], <2 x double> [[TMP14]], <2 x double> [[TMP5]] +; CHECK-NEXT: store i64 ptrtoint (ptr @return_param_add_return_float_vector to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store <2 x double> [[TMP15]], ptr @__nsan_shadow_ret_ptr, align 16 +; CHECK-NEXT: ret <2 x float> [[B]] +; +entry: + %b = fadd <2 x float> %a, + ret <2 x float> %b +} + +; TODO: This is ignored for now. +define [2 x float] @return_param_float_array([2 x float] %a) sanitize_numericalstability { +; CHECK-LABEL: @return_param_float_array( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [2 x float] [[A:%.*]] +; +entry: + ret [2 x float] %a +} + +define void @constantload_add_store_float(ptr %dst) sanitize_numericalstability { +; CHECK-LABEL: @constantload_add_store_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load float, ptr @float_const, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], 1.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST:%.*]], i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__nsan_internal_check_float_d(float [[C]], double [[TMP1]], i32 4, i64 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP6]], double [[TMP1]] +; CHECK-NEXT: store double [[TMP7]], ptr [[TMP2]], align 1 +; CHECK-NEXT: store float [[C]], ptr [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load float, ptr @float_const + %c = fadd float %b, 1.0 + store float %c, ptr %dst, align 1 + ret void +} + +define void @constantload_add_store_x86_fp80(ptr %dst) sanitize_numericalstability { +; CHECK-LABEL: @constantload_add_store_x86_fp80( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load x86_fp80, ptr @x86_fp80_const, align 16 +; CHECK-NEXT: [[TMP0:%.*]] = fpext x86_fp80 [[B]] to fp128 +; CHECK-NEXT: [[C:%.*]] = fadd x86_fp80 [[B]], 0xK3FC9E69594BEC44DE000 +; CHECK-NEXT: [[TMP1:%.*]] = fadd fp128 [[TMP0]], 0xLC0000000000000003FC9CD2B297D889B +; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_longdouble_store(ptr [[DST:%.*]], i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__nsan_internal_check_longdouble_q(x86_fp80 [[C]], fp128 [[TMP1]], i32 4, i64 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = fpext x86_fp80 [[C]] to fp128 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], fp128 [[TMP6]], fp128 [[TMP1]] +; CHECK-NEXT: store fp128 [[TMP7]], ptr [[TMP2]], align 1 +; CHECK-NEXT: store x86_fp80 [[C]], ptr [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load x86_fp80, ptr @x86_fp80_const + %c = fadd x86_fp80 %b, 0xK3FC9E69594BEC44DE000 + store x86_fp80 %c, ptr %dst, align 1 + ret void +} + +define void @constantload_add_store_double(ptr %dst) sanitize_numericalstability { +; DQQ-LABEL: @constantload_add_store_double( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[B:%.*]] = load double, ptr @double_const, align 8 +; DQQ-NEXT: [[TMP0:%.*]] = fpext double [[B]] to fp128 +; DQQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00 +; DQQ-NEXT: [[TMP1:%.*]] = fadd fp128 [[TMP0]], 0xL00000000000000003FFF000000000000 +; DQQ-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_store(ptr [[DST:%.*]], i64 1) +; DQQ-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DST]] to i64 +; DQQ-NEXT: [[TMP4:%.*]] = call i32 @__nsan_internal_check_double_q(double [[C]], fp128 [[TMP1]], i32 4, i64 [[TMP3]]) +; DQQ-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 1 +; DQQ-NEXT: [[TMP6:%.*]] = fpext double [[C]] to fp128 +; DQQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], fp128 [[TMP6]], fp128 [[TMP1]] +; DQQ-NEXT: store fp128 [[TMP7]], ptr [[TMP2]], align 1 +; DQQ-NEXT: store double [[C]], ptr [[DST]], align 1 +; DQQ-NEXT: ret void +; +; DLQ-LABEL: @constantload_add_store_double( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[B:%.*]] = load double, ptr @double_const, align 8 +; DLQ-NEXT: [[TMP0:%.*]] = fpext double [[B]] to x86_fp80 +; DLQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00 +; DLQ-NEXT: [[TMP1:%.*]] = fadd x86_fp80 [[TMP0]], 0xK3FFF8000000000000000 +; DLQ-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_store(ptr [[DST:%.*]], i64 1) +; DLQ-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DST]] to i64 +; DLQ-NEXT: [[TMP4:%.*]] = call i32 @__nsan_internal_check_double_l(double [[C]], x86_fp80 [[TMP1]], i32 4, i64 [[TMP3]]) +; DLQ-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 1 +; DLQ-NEXT: [[TMP6:%.*]] = fpext double [[C]] to x86_fp80 +; DLQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], x86_fp80 [[TMP6]], x86_fp80 [[TMP1]] +; DLQ-NEXT: store x86_fp80 [[TMP7]], ptr [[TMP2]], align 1 +; DLQ-NEXT: store double [[C]], ptr [[DST]], align 1 +; DLQ-NEXT: ret void +; +entry: + %b = load double, ptr @double_const + %c = fadd double %b, 1.0 + store double %c, ptr %dst, align 1 + ret void +} + +define void @load_add_store_float(ptr %a) sanitize_numericalstability { +; CHECK-LABEL: @load_add_store_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load float, ptr [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[A]], i64 1) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 1 +; CHECK-NEXT: br label [[TMP6:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ] +; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], 1.000000e+00 +; CHECK-NEXT: [[TMP8:%.*]] = fadd double [[TMP7]], 1.000000e+00 +; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[A]], i64 1) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_float_d(float [[C]], double [[TMP8]], i32 4, i64 [[TMP10]]) +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], double [[TMP13]], double [[TMP8]] +; CHECK-NEXT: store double [[TMP14]], ptr [[TMP9]], align 1 +; CHECK-NEXT: store float [[C]], ptr [[A]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load float, ptr %a, align 1 + %c = fadd float %b, 1.0 + store float %c, ptr %a, align 1 + ret void +} + +define void @load_add_store_x86_fp80(ptr %a) sanitize_numericalstability { +; CHECK-LABEL: @load_add_store_x86_fp80( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load x86_fp80, ptr [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_longdouble_load(ptr [[A]], i64 1) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = load fp128, ptr [[TMP0]], align 1 +; CHECK-NEXT: br label [[TMP6:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = fpext x86_fp80 [[B]] to fp128 +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = phi fp128 [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ] +; CHECK-NEXT: [[C:%.*]] = fadd x86_fp80 [[B]], 0xK3FC9E69594BEC44DE000 +; CHECK-NEXT: [[TMP8:%.*]] = fadd fp128 [[TMP7]], 0xLC0000000000000003FC9CD2B297D889B +; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_longdouble_store(ptr [[A]], i64 1) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_longdouble_q(x86_fp80 [[C]], fp128 [[TMP8]], i32 4, i64 [[TMP10]]) +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = fpext x86_fp80 [[C]] to fp128 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], fp128 [[TMP13]], fp128 [[TMP8]] +; CHECK-NEXT: store fp128 [[TMP14]], ptr [[TMP9]], align 1 +; CHECK-NEXT: store x86_fp80 [[C]], ptr [[A]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load x86_fp80, ptr %a, align 1 + %c = fadd x86_fp80 %b, 0xK3FC9E69594BEC44DE000 + store x86_fp80 %c, ptr %a, align 1 + ret void +} + +define void @load_add_store_double(ptr %a) sanitize_numericalstability { +; DQQ-LABEL: @load_add_store_double( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[B:%.*]] = load double, ptr [[A:%.*]], align 1 +; DQQ-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_load(ptr [[A]], i64 1) +; DQQ-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +; DQQ-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] +; DQQ: 2: +; DQQ-NEXT: [[TMP3:%.*]] = load fp128, ptr [[TMP0]], align 1 +; DQQ-NEXT: br label [[TMP6:%.*]] +; DQQ: 4: +; DQQ-NEXT: [[TMP5:%.*]] = fpext double [[B]] to fp128 +; DQQ-NEXT: br label [[TMP6]] +; DQQ: 6: +; DQQ-NEXT: [[TMP7:%.*]] = phi fp128 [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ] +; DQQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00 +; DQQ-NEXT: [[TMP8:%.*]] = fadd fp128 [[TMP7]], 0xL00000000000000003FFF000000000000 +; DQQ-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_store(ptr [[A]], i64 1) +; DQQ-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; DQQ-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_double_q(double [[C]], fp128 [[TMP8]], i32 4, i64 [[TMP10]]) +; DQQ-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 +; DQQ-NEXT: [[TMP13:%.*]] = fpext double [[C]] to fp128 +; DQQ-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], fp128 [[TMP13]], fp128 [[TMP8]] +; DQQ-NEXT: store fp128 [[TMP14]], ptr [[TMP9]], align 1 +; DQQ-NEXT: store double [[C]], ptr [[A]], align 1 +; DQQ-NEXT: ret void +; +; DLQ-LABEL: @load_add_store_double( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[B:%.*]] = load double, ptr [[A:%.*]], align 1 +; DLQ-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_load(ptr [[A]], i64 1) +; DLQ-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +; DLQ-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] +; DLQ: 2: +; DLQ-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[TMP0]], align 1 +; DLQ-NEXT: br label [[TMP6:%.*]] +; DLQ: 4: +; DLQ-NEXT: [[TMP5:%.*]] = fpext double [[B]] to x86_fp80 +; DLQ-NEXT: br label [[TMP6]] +; DLQ: 6: +; DLQ-NEXT: [[TMP7:%.*]] = phi x86_fp80 [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ] +; DLQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00 +; DLQ-NEXT: [[TMP8:%.*]] = fadd x86_fp80 [[TMP7]], 0xK3FFF8000000000000000 +; DLQ-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_store(ptr [[A]], i64 1) +; DLQ-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; DLQ-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_double_l(double [[C]], x86_fp80 [[TMP8]], i32 4, i64 [[TMP10]]) +; DLQ-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 +; DLQ-NEXT: [[TMP13:%.*]] = fpext double [[C]] to x86_fp80 +; DLQ-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], x86_fp80 [[TMP13]], x86_fp80 [[TMP8]] +; DLQ-NEXT: store x86_fp80 [[TMP14]], ptr [[TMP9]], align 1 +; DLQ-NEXT: store double [[C]], ptr [[A]], align 1 +; DLQ-NEXT: ret void +; +entry: + %b = load double, ptr %a, align 1 + %c = fadd double %b, 1.0 + store double %c, ptr %a, align 1 + ret void +} + +define void @load_add_store_vector(<2 x float>* %a) sanitize_numericalstability { +; CHECK-LABEL: @load_add_store_vector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load <2 x float>, ptr [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[A]], i64 2) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[TMP0]], align 1 +; CHECK-NEXT: br label [[TMP6:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = fpext <2 x float> [[B]] to <2 x double> +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x double> [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ] +; CHECK-NEXT: [[C:%.*]] = fadd <2 x float> [[B]], +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[A]], i64 2) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[C]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP10]], double [[TMP11]], i32 4, i64 [[TMP12]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[C]], i64 1 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP14]], double [[TMP15]], i32 4, i64 [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP13]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = fpext <2 x float> [[C]] to <2 x double> +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], <2 x double> [[TMP20]], <2 x double> [[TMP8]] +; CHECK-NEXT: store <2 x double> [[TMP21]], ptr [[TMP9]], align 1 +; CHECK-NEXT: store <2 x float> [[C]], ptr [[A]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load <2 x float>, ptr %a, align 1 + %c = fadd <2 x float> %b, + store <2 x float> %c, ptr %a, align 1 + ret void +} + +declare float @returns_float() + +define void @call_fn_returning_float(ptr %dst) sanitize_numericalstability { +; CHECK-LABEL: @call_fn_returning_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = call float @returns_float() +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @returns_float to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], 1.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST:%.*]], i64 1) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[C]], double [[TMP5]], i32 4, i64 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], double [[TMP10]], double [[TMP5]] +; CHECK-NEXT: store double [[TMP11]], ptr [[TMP6]], align 1 +; CHECK-NEXT: store float [[C]], ptr [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = call float @returns_float() + %c = fadd float %b, 1.0 + store float %c, ptr %dst, align 1 + ret void +} + +define float @return_fn_returning_float(ptr %dst) sanitize_numericalstability { +; CHECK-LABEL: @return_fn_returning_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = call float @returns_float() +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @returns_float to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP4]], i32 1, i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], double [[TMP7]], double [[TMP4]] +; CHECK-NEXT: store i64 ptrtoint (ptr @return_fn_returning_float to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP8]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[B]] +; +entry: + %b = call float @returns_float() + ret float %b +} + +declare void @takes_floats(float %a, i8 %b, double %c, x86_fp80 %d) + +define void @call_fn_taking_float() sanitize_numericalstability { +; DQQ-LABEL: @call_fn_taking_float( +; DQQ-NEXT: entry: +; DQQ-NEXT: store ptr @takes_floats, ptr @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: store double 1.000000e+00, ptr @__nsan_shadow_args_ptr, align 1 +; DQQ-NEXT: store fp128 0xL00000000000000004000800000000000, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 8), align 1 +; DQQ-NEXT: store fp128 0xLC0000000000000003FC9CD2B297D889B, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 24), align 1 +; DQQ-NEXT: call void @takes_floats(float 1.000000e+00, i8 2, double 3.000000e+00, x86_fp80 0xK3FC9E69594BEC44DE000) +; DQQ-NEXT: ret void +; +; DLQ-LABEL: @call_fn_taking_float( +; DLQ-NEXT: entry: +; DLQ-NEXT: store ptr @takes_floats, ptr @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: store double 1.000000e+00, ptr @__nsan_shadow_args_ptr, align 1 +; DLQ-NEXT: store x86_fp80 0xK4000C000000000000000, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 8), align 1 +; DLQ-NEXT: store fp128 0xLC0000000000000003FC9CD2B297D889B, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 18), align 1 +; DLQ-NEXT: call void @takes_floats(float 1.000000e+00, i8 2, double 3.000000e+00, x86_fp80 0xK3FC9E69594BEC44DE000) +; DLQ-NEXT: ret void +; +entry: + call void @takes_floats(float 1.0, i8 2, double 3.0, x86_fp80 0xK3FC9E69594BEC44DE000) + ret void +} + +declare float @llvm.sin.f32(float) readnone + +define float @call_sin_intrinsic() sanitize_numericalstability { +; CHECK-LABEL: @call_sin_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call float @llvm.sin.f32(float 1.000000e+00) +; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.sin.f64(double 1.000000e+00) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP0]], i32 1, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP0]] +; CHECK-NEXT: store i64 ptrtoint (ptr @call_sin_intrinsic to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP4]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float @llvm.sin.f32(float 1.0) + ret float %r +} + +declare float @sinf(float) + +define float @call_sinf_libfunc() sanitize_numericalstability { +; CHECK-LABEL: @call_sinf_libfunc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call float @sinf(float 1.000000e+00) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.sin.f64(double 1.000000e+00) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP0]], i32 1, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP0]] +; CHECK-NEXT: store i64 ptrtoint (ptr @call_sinf_libfunc to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP4]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float @sinf(float 1.0) + ret float %r +} + +declare double @sin(double) + +; FIXME: nsan uses `sin(double)` for fp128. +define double @call_sin_libfunc() sanitize_numericalstability { +; DQQ-LABEL: @call_sin_libfunc( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[R:%.*]] = call double @sin(double 1.000000e+00) #[[ATTR4]] +; DQQ-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.sin.f80(x86_fp80 0xK3FFF8000000000000000) +; DQQ-NEXT: [[TMP1:%.*]] = fpext x86_fp80 [[TMP0]] to fp128 +; DQQ-NEXT: [[TMP2:%.*]] = call i32 @__nsan_internal_check_double_q(double [[R]], fp128 [[TMP1]], i32 1, i64 0) +; DQQ-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 1 +; DQQ-NEXT: [[TMP4:%.*]] = fpext double [[R]] to fp128 +; DQQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], fp128 [[TMP4]], fp128 [[TMP1]] +; DQQ-NEXT: store i64 ptrtoint (ptr @call_sin_libfunc to i64), ptr @__nsan_shadow_ret_tag, align 8 +; DQQ-NEXT: store fp128 [[TMP5]], ptr @__nsan_shadow_ret_ptr, align 16 +; DQQ-NEXT: ret double [[R]] +; +; DLQ-LABEL: @call_sin_libfunc( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[R:%.*]] = call double @sin(double 1.000000e+00) #[[ATTR4]] +; DLQ-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.sin.f80(x86_fp80 0xK3FFF8000000000000000) +; DLQ-NEXT: [[TMP1:%.*]] = call i32 @__nsan_internal_check_double_l(double [[R]], x86_fp80 [[TMP0]], i32 1, i64 0) +; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 +; DLQ-NEXT: [[TMP3:%.*]] = fpext double [[R]] to x86_fp80 +; DLQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], x86_fp80 [[TMP3]], x86_fp80 [[TMP0]] +; DLQ-NEXT: store i64 ptrtoint (ptr @call_sin_libfunc to i64), ptr @__nsan_shadow_ret_tag, align 8 +; DLQ-NEXT: store x86_fp80 [[TMP4]], ptr @__nsan_shadow_ret_ptr, align 16 +; DLQ-NEXT: ret double [[R]] +; +entry: + %r = call double @sin(double 1.0) + ret double %r +} + +declare double @frexp(double, i32*) + +define double @call_frexp_libfunc_nointrinsic(double %0, i32* nocapture %1) sanitize_numericalstability { +; DQQ-LABEL: @call_frexp_libfunc_nointrinsic( +; DQQ-NEXT: [[TMP3:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], ptrtoint (ptr @call_frexp_libfunc_nointrinsic to i64) +; DQQ-NEXT: [[TMP5:%.*]] = load fp128, ptr @__nsan_shadow_args_ptr, align 1 +; DQQ-NEXT: [[TMP6:%.*]] = fpext double [[TMP0:%.*]] to fp128 +; DQQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP4]], fp128 [[TMP5]], fp128 [[TMP6]] +; DQQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_double_q(double [[TMP0]], fp128 [[TMP7]], i32 2, i64 0) +; DQQ-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +; DQQ-NEXT: [[TMP10:%.*]] = fpext double [[TMP0]] to fp128 +; DQQ-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], fp128 [[TMP10]], fp128 [[TMP7]] +; DQQ-NEXT: [[TMP12:%.*]] = tail call double @frexp(double [[TMP0]], ptr [[TMP1:%.*]]) +; DQQ-NEXT: [[TMP13:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8 +; DQQ-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], ptrtoint (ptr @frexp to i64) +; DQQ-NEXT: [[TMP15:%.*]] = load fp128, ptr @__nsan_shadow_ret_ptr, align 16 +; DQQ-NEXT: [[TMP16:%.*]] = fpext double [[TMP12]] to fp128 +; DQQ-NEXT: [[TMP17:%.*]] = select i1 [[TMP14]], fp128 [[TMP15]], fp128 [[TMP16]] +; DQQ-NEXT: [[TMP18:%.*]] = call i32 @__nsan_internal_check_double_q(double [[TMP12]], fp128 [[TMP17]], i32 1, i64 0) +; DQQ-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 1 +; DQQ-NEXT: [[TMP20:%.*]] = fpext double [[TMP12]] to fp128 +; DQQ-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], fp128 [[TMP20]], fp128 [[TMP17]] +; DQQ-NEXT: store i64 ptrtoint (ptr @call_frexp_libfunc_nointrinsic to i64), ptr @__nsan_shadow_ret_tag, align 8 +; DQQ-NEXT: store fp128 [[TMP21]], ptr @__nsan_shadow_ret_ptr, align 16 +; DQQ-NEXT: ret double [[TMP12]] +; +; DLQ-LABEL: @call_frexp_libfunc_nointrinsic( +; DLQ-NEXT: [[TMP3:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], ptrtoint (ptr @call_frexp_libfunc_nointrinsic to i64) +; DLQ-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr @__nsan_shadow_args_ptr, align 1 +; DLQ-NEXT: [[TMP6:%.*]] = fpext double [[TMP0:%.*]] to x86_fp80 +; DLQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP4]], x86_fp80 [[TMP5]], x86_fp80 [[TMP6]] +; DLQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_double_l(double [[TMP0]], x86_fp80 [[TMP7]], i32 2, i64 0) +; DLQ-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +; DLQ-NEXT: [[TMP10:%.*]] = fpext double [[TMP0]] to x86_fp80 +; DLQ-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], x86_fp80 [[TMP10]], x86_fp80 [[TMP7]] +; DLQ-NEXT: [[TMP12:%.*]] = tail call double @frexp(double [[TMP0]], ptr [[TMP1:%.*]]) +; DLQ-NEXT: [[TMP13:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8 +; DLQ-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], ptrtoint (ptr @frexp to i64) +; DLQ-NEXT: [[TMP15:%.*]] = load x86_fp80, ptr @__nsan_shadow_ret_ptr, align 16 +; DLQ-NEXT: [[TMP16:%.*]] = fpext double [[TMP12]] to x86_fp80 +; DLQ-NEXT: [[TMP17:%.*]] = select i1 [[TMP14]], x86_fp80 [[TMP15]], x86_fp80 [[TMP16]] +; DLQ-NEXT: [[TMP18:%.*]] = call i32 @__nsan_internal_check_double_l(double [[TMP12]], x86_fp80 [[TMP17]], i32 1, i64 0) +; DLQ-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 1 +; DLQ-NEXT: [[TMP20:%.*]] = fpext double [[TMP12]] to x86_fp80 +; DLQ-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], x86_fp80 [[TMP20]], x86_fp80 [[TMP17]] +; DLQ-NEXT: store i64 ptrtoint (ptr @call_frexp_libfunc_nointrinsic to i64), ptr @__nsan_shadow_ret_tag, align 8 +; DLQ-NEXT: store x86_fp80 [[TMP21]], ptr @__nsan_shadow_ret_ptr, align 16 +; DLQ-NEXT: ret double [[TMP12]] +; + %3 = tail call double @frexp(double %0, i32* %1) + ret double %3 +} + +define float @call_fn_taking_float_by_fn_ptr(float (float)* nocapture %fn_ptr) sanitize_numericalstability { +; CHECK-LABEL: @call_fn_taking_float_by_fn_ptr( +; CHECK-NEXT: entry: +; CHECK-NEXT: store ptr [[FN_PTR:%.*]], ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: store double 1.000000e+00, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[R:%.*]] = call float [[FN_PTR]](float 1.000000e+00) +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FN_PTR]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP5]], i32 1, i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], double [[TMP8]], double [[TMP5]] +; CHECK-NEXT: store i64 ptrtoint (ptr @call_fn_taking_float_by_fn_ptr to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float %fn_ptr(float 1.0) + ret float %r +} + +define void @store_float(ptr %dst) sanitize_numericalstability { +; CHECK-LABEL: @store_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST:%.*]], i64 1) +; CHECK-NEXT: store double 4.200000e+01, ptr [[TMP0]], align 1 +; CHECK-NEXT: store float 4.200000e+01, ptr [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + store float 42.0, ptr %dst, align 1 + ret void +} + +define i1 @inline_asm(double %0) sanitize_numericalstability { +; DQQ-LABEL: @inline_asm( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @inline_asm to i64) +; DQQ-NEXT: [[TMP3:%.*]] = load fp128, ptr @__nsan_shadow_args_ptr, align 1 +; DQQ-NEXT: [[TMP4:%.*]] = fpext double [[TMP0:%.*]] to fp128 +; DQQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], fp128 [[TMP3]], fp128 [[TMP4]] +; DQQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP6:%.*]] = call i32 asm "pmovmskb $1, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(double [[TMP0]]) +; DQQ-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 +; DQQ-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], 0 +; DQQ-NEXT: ret i1 [[TMP8]] +; +; DLQ-LABEL: @inline_asm( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @inline_asm to i64) +; DLQ-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr @__nsan_shadow_args_ptr, align 1 +; DLQ-NEXT: [[TMP4:%.*]] = fpext double [[TMP0:%.*]] to x86_fp80 +; DLQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], x86_fp80 [[TMP3]], x86_fp80 [[TMP4]] +; DLQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP6:%.*]] = call i32 asm "pmovmskb $1, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(double [[TMP0]]) +; DLQ-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 +; DLQ-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], 0 +; DLQ-NEXT: ret i1 [[TMP8]] +; +entry: + %1 = call i32 asm "pmovmskb $1, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(double %0) + %2 = trunc i32 %1 to i8 + %3 = icmp slt i8 %2, 0 + ret i1 %3 +} + +define void @vector_extract(<2 x float> %0) sanitize_numericalstability { +; CHECK-LABEL: @vector_extract( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @vector_extract to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %1 = extractelement <2 x float> %0, i32 1 + ret void +} + +define void @vector_insert(<2 x float> %0) sanitize_numericalstability { +; CHECK-LABEL: @vector_insert( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @vector_insert to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP0]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP5]], double 1.000000e+00, i32 1 +; CHECK-NEXT: ret void +; +entry: + %1 = insertelement <2 x float> %0, float 1.0, i32 1 + ret void +} + + +define void @vector_shuffle(<2 x float> %0) sanitize_numericalstability { +; CHECK-LABEL: @vector_shuffle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @vector_shuffle to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> , <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> , <2 x i32> +; CHECK-NEXT: ret void +; +entry: + %1 = shufflevector <2 x float> %0, <2 x float> , <2 x i32> + ret void +} + +define void @aggregate_extract({i32, {float, i1}} %0) sanitize_numericalstability { +; CHECK-LABEL: @aggregate_extract( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, { float, i1 } } [[TMP0:%.*]], 1, 0 +; CHECK-NEXT: [[TMP2:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret void +; +entry: + %1 = extractvalue {i32, {float, i1}} %0, 1, 0 + ret void +} + +define void @aggregate_insert({i32, {float, i1}} %0, float %1) sanitize_numericalstability { +; CHECK-LABEL: @aggregate_insert( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], ptrtoint (ptr @aggregate_insert to i64) +; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP5:%.*]] = fpext float [[TMP1:%.*]] to double +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], double [[TMP4]], double [[TMP5]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP1]], double [[TMP6]], i32 5, i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { i32, { float, i1 } } [[TMP0:%.*]], float [[TMP1]], 1, 0 +; CHECK-NEXT: ret void +; +entry: + %2 = insertvalue {i32, {float, i1}} %0, float %1, 1, 0 + ret void +} + +define void @aggregate_insert_avoid_const_check({i32, {float, i1}} %0) sanitize_numericalstability { +; CHECK-LABEL: @aggregate_insert_avoid_const_check( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, { float, i1 } } [[TMP0:%.*]], float 1.000000e+00, 1, 0 +; CHECK-NEXT: ret void +; +entry: + %1 = insertvalue {i32, {float, i1}} %0, float 1.0, 1, 0 + ret void +} + + +declare float @fabsf(float) + +define float @sub_fabs(float %a, float %b) sanitize_numericalstability { +; CHECK-LABEL: @sub_fabs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @sub_fabs to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 8), align 1 +; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[B:%.*]] to double +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], double [[TMP5]], double [[TMP6]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[S:%.*]] = fsub float [[A]], [[B]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub double [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @__nsan_internal_check_float_d(float [[S]], double [[TMP8]], i32 2, i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = fpext float [[S]] to double +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], double [[TMP11]], double [[TMP8]] +; CHECK-NEXT: [[R:%.*]] = call float @fabsf(float [[S]]) #[[ATTR4]] +; CHECK-NEXT: [[TMP13:%.*]] = call double @llvm.fabs.f64(double [[TMP8]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP13]], i32 1, i64 0) +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], double [[TMP16]], double [[TMP13]] +; CHECK-NEXT: store i64 ptrtoint (ptr @sub_fabs to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP17]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %s = fsub float %a, %b + %r = call float @fabsf(float %s) + ret float %r +} + +; Note that the `unsafe-fp-math` from the function attributes should be moved to +; individual instructions, with the shadow instructions NOT getting the attribute. +define float @param_add_return_float_unsafe_fp_math(float %a) #0 { +; CHECK-LABEL: @param_add_return_float_unsafe_fp_math( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_float_unsafe_fp_math to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[B:%.*]] = fadd fast float [[A]], 1.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP5]], i32 1, i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], double [[TMP8]], double [[TMP5]] +; CHECK-NEXT: store i64 ptrtoint (ptr @param_add_return_float_unsafe_fp_math to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[B]] +; +entry: + %b = fadd float %a, 1.0 + ret float %b +} + + +define void @truncate(<2 x double> %0) sanitize_numericalstability { +; DQQ-LABEL: @truncate( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @truncate to i64) +; DQQ-NEXT: [[TMP3:%.*]] = load <2 x fp128>, ptr @__nsan_shadow_args_ptr, align 1 +; DQQ-NEXT: [[TMP4:%.*]] = fpext <2 x double> [[TMP0:%.*]] to <2 x fp128> +; DQQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x fp128> [[TMP3]], <2 x fp128> [[TMP4]] +; DQQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP6:%.*]] = fptrunc <2 x double> [[TMP0]] to <2 x float> +; DQQ-NEXT: [[TMP7:%.*]] = fptrunc <2 x fp128> [[TMP5]] to <2 x double> +; DQQ-NEXT: ret void +; +; DLQ-LABEL: @truncate( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @truncate to i64) +; DLQ-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr @__nsan_shadow_args_ptr, align 1 +; DLQ-NEXT: [[TMP4:%.*]] = fpext <2 x double> [[TMP0:%.*]] to <2 x x86_fp80> +; DLQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x x86_fp80> [[TMP3]], <2 x x86_fp80> [[TMP4]] +; DLQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP6:%.*]] = fptrunc <2 x double> [[TMP0]] to <2 x float> +; DLQ-NEXT: [[TMP7:%.*]] = fptrunc <2 x x86_fp80> [[TMP5]] to <2 x double> +; DLQ-NEXT: ret void +; +entry: + %1 = fptrunc <2 x double> %0 to <2 x float> + ret void +} + +define void @unaryop(float %a) sanitize_numericalstability { +; CHECK-LABEL: @unaryop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @unaryop to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[C:%.*]] = fneg float [[A]] +; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]] +; CHECK-NEXT: ret void +; +entry: + %c = fneg float %a + ret void +} + + +attributes #0 = { nounwind readonly uwtable sanitize_numericalstability "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll new file mode 100644 index 00000000000000..cb4e1ee13dbcd1 --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=nsan -nsan-shadow-type-mapping=dqq -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Tests with more involved control flow to check lazy construction of the +; shadow values. + +define float @forward_use() sanitize_numericalstability { +; CHECK-LABEL: @forward_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BLOCK1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[D:%.*]] = fadd float [[B:%.*]], 2.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[TMP2:%.*]], 2.000000e+00 +; CHECK-NEXT: br label [[BLOCK1]] +; CHECK: block1: +; CHECK-NEXT: [[A:%.*]] = phi float [ [[D]], [[LOOP:%.*]] ], [ 1.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[TMP0]], [[LOOP]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[B]] = fadd float [[A]], 1.000000e+00 +; CHECK-NEXT: [[TMP2]] = fadd double [[TMP1]], 1.000000e+00 +; CHECK-NEXT: br label [[LOOP]] +; + +entry: + br label %block1 + +loop: + %d = fadd float %b, 2.0 ; this is a forward reference, requiring shadow(%b) to be available. + br label %block1 + +block1: + %a = phi float [ %d, %loop], [ 1.0, %entry ] + %b = fadd float %a, 1.0 + br label %loop +} + +define float @forward_use_with_load(float* %p) sanitize_numericalstability { +; CHECK-LABEL: @forward_use_with_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BLOCK1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[D:%.*]] = fadd float [[B:%.*]], 2.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[TMP10:%.*]], 2.000000e+00 +; CHECK-NEXT: br label [[BLOCK1]] +; CHECK: block1: +; CHECK-NEXT: [[A:%.*]] = phi float [ [[D]], [[LOOP:%.*]] ], [ 1.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[TMP0]], [[LOOP]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq ptr [[TMP2]], null +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP6:%.*]], label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP2]], align 1 +; CHECK-NEXT: br label [[TMP8:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[L]] to double +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[TMP5]], [[TMP4]] ], [ [[TMP7]], [[TMP6]] ] +; CHECK-NEXT: [[B]] = fadd float [[L]], 1.000000e+00 +; CHECK-NEXT: [[TMP10]] = fadd double [[TMP9]], 1.000000e+00 +; CHECK-NEXT: br label [[LOOP]] +; + +entry: + br label %block1 + +loop: + %d = fadd float %b, 2.0 ; this is a forward reference, requiring shadow(%b) to be available. + br label %block1 + +block1: + %a = phi float [ %d, %loop], [ 1.0, %entry ] + %l = load float, float* %p ; the load creates a new block + %b = fadd float %l, 1.0 ; this requires shadow(%l). + br label %loop +} + +define float @forward_use_with_two_uses() sanitize_numericalstability { +; CHECK-LABEL: @forward_use_with_two_uses( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BLOCK1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[D:%.*]] = fadd float [[B:%.*]], 2.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[TMP4:%.*]], 2.000000e+00 +; CHECK-NEXT: br label [[BLOCK1]] +; CHECK: block1: +; CHECK-NEXT: [[A:%.*]] = phi float [ [[D]], [[LOOP:%.*]] ], [ 1.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[TMP0]], [[LOOP]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[T1:%.*]] = fadd float [[A]], 1.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP1]], 1.000000e+00 +; CHECK-NEXT: [[T2:%.*]] = fadd float [[T1]], 3.000000e+00 +; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP2]], 3.000000e+00 +; CHECK-NEXT: [[B]] = fadd float [[T1]], [[T2]] +; CHECK-NEXT: [[TMP4]] = fadd double [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[LOOP]] +; + +entry: + br label %block1 + +loop: + %d = fadd float %b, 2.0 ; this is a forward reference, requiring shadow(%b) to be available. + br label %block1 + +block1: + %a = phi float [ %d, %loop], [ 1.0, %entry ] + %t1 = fadd float %a, 1.0 + %t2 = fadd float %t1, 3.0 ; this requires shadow(%t1) + %b = fadd float %t1, %t2 ; this requires shadow(%t2) and shadow(%t1). + br label %loop +} + diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll new file mode 100644 index 00000000000000..714929ccab05ac --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dqq -S %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Tests for invoke instructions that require special handling of the phis. + +declare float @may_throw() + +declare void @personalityFn() + +define float @invoke1() sanitize_numericalstability personality void ()* @personalityFn { +; CHECK-LABEL: @invoke1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = invoke float @may_throw() +; CHECK-NEXT: to label [[TMP0:%.*]] unwind label [[LAND:%.*]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @may_throw to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: br label [[CONTINUE:%.*]] +; CHECK: continue: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: land: +; CHECK-NEXT: [[RES:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[LV:%.*]] = uitofp i32 1 to float +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[R:%.*]] = phi float [ [[LV]], [[LAND]] ], [ [[C]], [[CONTINUE]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ 1.000000e+00, [[LAND]] ], [ [[TMP5]], [[CONTINUE]] ] +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP6]], i32 1, i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]] +; CHECK-NEXT: store i64 ptrtoint (ptr @invoke1 to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP10]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[R]] +; + +entry: + %c = invoke float @may_throw() to label %continue unwind label %land + +continue: + br label %exit + +land: + %res = landingpad { ptr, i32 } cleanup + %lv = uitofp i32 1 to float + br label %exit + +exit: + %r = phi float [ %lv, %land], [ %c, %continue ] + ret float %r +} + +define float @invoke2() sanitize_numericalstability personality void ()* @personalityFn { +; CHECK-LABEL: @invoke2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = invoke float @may_throw() +; CHECK-NEXT: to label [[TMP0:%.*]] unwind label [[LAND:%.*]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @may_throw to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: br label [[CONTINUE:%.*]] +; CHECK: continue: +; CHECK-NEXT: [[CV:%.*]] = fadd float [[C]], 2.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP5]], 2.000000e+00 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: land: +; CHECK-NEXT: [[RES:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[LV:%.*]] = uitofp i32 1 to float +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[R:%.*]] = phi float [ [[LV]], [[LAND]] ], [ [[CV]], [[CONTINUE]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ 1.000000e+00, [[LAND]] ], [ [[TMP6]], [[CONTINUE]] ] +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP7]], i32 1, i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], double [[TMP10]], double [[TMP7]] +; CHECK-NEXT: store i64 ptrtoint (ptr @invoke2 to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP11]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[R]] +; + +entry: + %c = invoke float @may_throw() to label %continue unwind label %land + +continue: + %cv = fadd float %c, 2.0 + br label %exit + +land: + %res = landingpad { ptr, i32 } cleanup + %lv = uitofp i32 1 to float + br label %exit + +exit: + %r = phi float [ %lv, %land], [ %cv, %continue ] + ret float %r +} + +define float @invoke3() sanitize_numericalstability personality void ()* @personalityFn { +; CHECK-LABEL: @invoke3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = invoke float @may_throw() +; CHECK-NEXT: to label [[TMP0:%.*]] unwind label [[LAND:%.*]] +; CHECK: land: +; CHECK-NEXT: [[RES:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[LV:%.*]] = uitofp i32 1 to float +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @may_throw to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[R:%.*]] = phi float [ [[LV]], [[LAND]] ], [ [[C]], [[TMP0]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ 1.000000e+00, [[LAND]] ], [ [[TMP5]], [[TMP0]] ] +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP6]], i32 1, i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]] +; CHECK-NEXT: store i64 ptrtoint (ptr @invoke3 to i64), ptr @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP10]], ptr @__nsan_shadow_ret_ptr, align 8 +; CHECK-NEXT: ret float [[R]] +; + +entry: + %c = invoke float @may_throw() to label %exit unwind label %land + +land: + %res = landingpad { ptr, i32 } cleanup + %lv = uitofp i32 1 to float + br label %exit + +exit: + %r = phi float [ %lv, %land], [ %c, %entry ] + ret float %r +} diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll new file mode 100644 index 00000000000000..fc55f4f5c5334d --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll @@ -0,0 +1,405 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dqq -S %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Tests with memory manipulation (memcpy, llvm.memcpy, ...). + + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) + +define void @call_memcpy_intrinsic(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b) sanitize_numericalstability { +; CHECK-LABEL: @call_memcpy_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__nsan_copy_values(ptr [[A:%.*]], ptr [[B:%.*]], i64 16) +; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], ptr nonnull align 8 dereferenceable(16) [[B]], i64 16, i1 false) +; CHECK-NEXT: ret void +; +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(ptr nonnull align 8 dereferenceable(16) %a, ptr nonnull align 8 dereferenceable(16) %b, i64 16, i1 false) + ret void +} + +declare dso_local i8* @memcpy(i8*, i8*, i64) local_unnamed_addr + +define void @call_memcpy(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b) sanitize_numericalstability { +; CHECK-LABEL: @call_memcpy( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call ptr @memcpy(ptr nonnull align 8 dereferenceable(16) [[A:%.*]], ptr nonnull align 8 dereferenceable(16) [[B:%.*]], i64 16) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret void +; +entry: + tail call i8* @memcpy(ptr nonnull align 8 dereferenceable(16) %a, ptr nonnull align 8 dereferenceable(16) %b, i64 16) + ret void +} + + +define void @transfer_float(float* %dst, float* %src) sanitize_numericalstability { +; CHECK-LABEL: @transfer_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = load float, ptr [[SRC:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[SRC]], i64 1) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 1 +; CHECK-NEXT: br label [[TMP6:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = fpext float [[T]] to double +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ] +; CHECK-NEXT: [[TMP8:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST:%.*]], i64 1) +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @__nsan_internal_check_float_d(float [[T]], double [[TMP7]], i32 4, i64 [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = fpext float [[T]] to double +; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP11]], double [[TMP12]], double [[TMP7]] +; CHECK-NEXT: store double [[TMP13]], ptr [[TMP8]], align 1 +; CHECK-NEXT: store float [[T]], ptr [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + %t = load float, ptr %src + store float %t, ptr %dst, align 1 + ret void +} + +define void @transfer_non_float(i32* %dst, i32* %src) sanitize_numericalstability { +; CHECK-LABEL: @transfer_non_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[SRC:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[SRC]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[SRC]]) +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 1 +; CHECK-NEXT: store i32 [[T]], ptr [[DST:%.*]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[DST]]) +; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[DST]]) +; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 1 +; CHECK-NEXT: ret void +; +entry: + %t = load i32, ptr %src + store i32 %t, ptr %dst, align 1 + ret void +} + +define void @transfer_array([2 x float]* %a) sanitize_numericalstability { +; CHECK-LABEL: @transfer_array( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load [2 x float], ptr [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[A]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[A]]) +; CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP2]], align 1 +; CHECK-NEXT: store [2 x float] [[B]], ptr [[A]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[A]]) +; CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[A]]) +; CHECK-NEXT: store i128 [[TMP3]], ptr [[TMP5]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load [2 x float], ptr %a, align 1 + store [2 x float] %b, ptr %a, align 1 + ret void +} + +define void @swap_untyped1(i64* nonnull align 8 %p, i64* nonnull align 8 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_untyped1( +; CHECK-NEXT: [[QV:%.*]] = load i64, ptr [[Q:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[Q]]) +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[Q]]) +; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[PV:%.*]] = load i64, ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[P]]) +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[P]]) +; CHECK-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 1 +; CHECK-NEXT: store i64 [[PV]], ptr [[Q]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[Q]]) +; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[Q]]) +; CHECK-NEXT: store i128 [[TMP8]], ptr [[TMP10]], align 1 +; CHECK-NEXT: store i64 [[QV]], ptr [[P]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[P]]) +; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[P]]) +; CHECK-NEXT: store i128 [[TMP4]], ptr [[TMP12]], align 1 +; CHECK-NEXT: ret void +; + %qv = load i64, ptr %q + %pv = load i64, ptr %p + store i64 %pv, ptr %q, align 8 + store i64 %qv, ptr %p, align 8 + ret void +} + +; Same as swap_untyped1, but the load/stores are in the opposite order. +define void @swap_untyped2(i64* nonnull align 8 %p, i64* nonnull align 8 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_untyped2( +; CHECK-NEXT: [[PV:%.*]] = load i64, ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[P]]) +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[P]]) +; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[QV:%.*]] = load i64, ptr [[Q:%.*]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[Q]]) +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[Q]]) +; CHECK-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 1 +; CHECK-NEXT: store i64 [[PV]], ptr [[Q]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[Q]]) +; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[Q]]) +; CHECK-NEXT: store i128 [[TMP4]], ptr [[TMP10]], align 1 +; CHECK-NEXT: store i64 [[QV]], ptr [[P]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[P]]) +; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[P]]) +; CHECK-NEXT: store i128 [[TMP8]], ptr [[TMP12]], align 1 +; CHECK-NEXT: ret void +; + %pv = load i64, ptr %p + %qv = load i64, ptr %q + store i64 %pv, ptr %q, align 8 + store i64 %qv, ptr %p, align 8 + ret void +} + +define void @swap_ft1(float* nonnull align 8 %p, float* nonnull align 8 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_ft1( +; CHECK-NEXT: [[QV:%.*]] = load float, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[Q]], i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 1 +; CHECK-NEXT: br label [[TMP7:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[QV]] to double +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[TMP4]], [[TMP3]] ], [ [[TMP6]], [[TMP5]] ] +; CHECK-NEXT: [[PV:%.*]] = load float, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 1) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP9]], null +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP11:%.*]] +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 1 +; CHECK-NEXT: br label [[TMP15:%.*]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = fpext float [[PV]] to double +; CHECK-NEXT: br label [[TMP15]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = phi double [ [[TMP12]], [[TMP11]] ], [ [[TMP14]], [[TMP13]] ] +; CHECK-NEXT: [[TMP17:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[Q]], i64 1) +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[Q]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @__nsan_internal_check_float_d(float [[PV]], double [[TMP16]], i32 4, i64 [[TMP18]]) +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP19]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = fpext float [[PV]] to double +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP20]], double [[TMP21]], double [[TMP16]] +; CHECK-NEXT: store double [[TMP22]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store float [[PV]], ptr [[Q]], align 8 +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[P]], i64 1) +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = call i32 @__nsan_internal_check_float_d(float [[QV]], double [[TMP8]], i32 4, i64 [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP25]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = fpext float [[QV]] to double +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP26]], double [[TMP27]], double [[TMP8]] +; CHECK-NEXT: store double [[TMP28]], ptr [[TMP23]], align 1 +; CHECK-NEXT: store float [[QV]], ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + %qv = load float, ptr %q + %pv = load float, ptr %p + store float %pv, ptr %q, align 8 + store float %qv, ptr %p, align 8 + ret void +} + +; Same as swap_ft1, but the load/stores are in the opposite order. +define void @swap_ft2(float* nonnull align 8 %p, float* nonnull align 8 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_ft2( +; CHECK-NEXT: [[PV:%.*]] = load float, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 1 +; CHECK-NEXT: br label [[TMP7:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[PV]] to double +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[TMP4]], [[TMP3]] ], [ [[TMP6]], [[TMP5]] ] +; CHECK-NEXT: [[QV:%.*]] = load float, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[Q]], i64 1) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP9]], null +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP11:%.*]] +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 1 +; CHECK-NEXT: br label [[TMP15:%.*]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = fpext float [[QV]] to double +; CHECK-NEXT: br label [[TMP15]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = phi double [ [[TMP12]], [[TMP11]] ], [ [[TMP14]], [[TMP13]] ] +; CHECK-NEXT: [[TMP17:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[Q]], i64 1) +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[Q]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @__nsan_internal_check_float_d(float [[PV]], double [[TMP8]], i32 4, i64 [[TMP18]]) +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP19]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = fpext float [[PV]] to double +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP20]], double [[TMP21]], double [[TMP8]] +; CHECK-NEXT: store double [[TMP22]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store float [[PV]], ptr [[Q]], align 8 +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[P]], i64 1) +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = call i32 @__nsan_internal_check_float_d(float [[QV]], double [[TMP16]], i32 4, i64 [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP25]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = fpext float [[QV]] to double +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP26]], double [[TMP27]], double [[TMP16]] +; CHECK-NEXT: store double [[TMP28]], ptr [[TMP23]], align 1 +; CHECK-NEXT: store float [[QV]], ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + %pv = load float, ptr %p + %qv = load float, ptr %q + store float %pv, ptr %q, align 8 + store float %qv, ptr %p, align 8 + ret void +} + +define void @swap_vectorft1(<2 x float>* nonnull align 16 %p, <2 x float>* nonnull align 16 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_vectorft1( +; CHECK-NEXT: [[QV:%.*]] = load <2 x float>, ptr [[Q:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[Q]], i64 2) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[TMP1]], align 1 +; CHECK-NEXT: br label [[TMP7:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = fpext <2 x float> [[QV]] to <2 x double> +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x double> [ [[TMP4]], [[TMP3]] ], [ [[TMP6]], [[TMP5]] ] +; CHECK-NEXT: [[PV:%.*]] = load <2 x float>, ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 2) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP9]], null +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP11:%.*]] +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, ptr [[TMP9]], align 1 +; CHECK-NEXT: br label [[TMP15:%.*]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = fpext <2 x float> [[PV]] to <2 x double> +; CHECK-NEXT: br label [[TMP15]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x double> [ [[TMP12]], [[TMP11]] ], [ [[TMP14]], [[TMP13]] ] +; CHECK-NEXT: [[TMP17:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[Q]], i64 2) +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[PV]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP16]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[Q]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP18]], double [[TMP19]], i32 4, i64 [[TMP20]]) +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[PV]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[TMP16]], i64 1 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[Q]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP22]], double [[TMP23]], i32 4, i64 [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP21]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP26]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = fpext <2 x float> [[PV]] to <2 x double> +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP27]], <2 x double> [[TMP28]], <2 x double> [[TMP16]] +; CHECK-NEXT: store <2 x double> [[TMP29]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store <2 x float> [[PV]], ptr [[Q]], align 16 +; CHECK-NEXT: [[TMP30:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[P]], i64 2) +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[QV]], i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP31]], double [[TMP32]], i32 4, i64 [[TMP33]]) +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[QV]], i64 1 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP35]], double [[TMP36]], i32 4, i64 [[TMP37]]) +; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP34]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 1 +; CHECK-NEXT: [[TMP41:%.*]] = fpext <2 x float> [[QV]] to <2 x double> +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP40]], <2 x double> [[TMP41]], <2 x double> [[TMP8]] +; CHECK-NEXT: store <2 x double> [[TMP42]], ptr [[TMP30]], align 1 +; CHECK-NEXT: store <2 x float> [[QV]], ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + %qv = load <2 x float>, ptr %q + %pv = load <2 x float>, ptr %p + store <2 x float> %pv, ptr %q, align 16 + store <2 x float> %qv, ptr %p, align 16 + ret void +} + +; Same as swap_vectorft1, but the load/stores are in the opposite order. +define void @swap_vectorft2(<2 x float>* nonnull align 16 %p, <2 x float>* nonnull align 16 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_vectorft2( +; CHECK-NEXT: [[PV:%.*]] = load <2 x float>, ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 2) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[TMP1]], align 1 +; CHECK-NEXT: br label [[TMP7:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = fpext <2 x float> [[PV]] to <2 x double> +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x double> [ [[TMP4]], [[TMP3]] ], [ [[TMP6]], [[TMP5]] ] +; CHECK-NEXT: [[QV:%.*]] = load <2 x float>, ptr [[Q:%.*]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[Q]], i64 2) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP9]], null +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP11:%.*]] +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, ptr [[TMP9]], align 1 +; CHECK-NEXT: br label [[TMP15:%.*]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = fpext <2 x float> [[QV]] to <2 x double> +; CHECK-NEXT: br label [[TMP15]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x double> [ [[TMP12]], [[TMP11]] ], [ [[TMP14]], [[TMP13]] ] +; CHECK-NEXT: [[TMP17:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[Q]], i64 2) +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[PV]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[Q]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP18]], double [[TMP19]], i32 4, i64 [[TMP20]]) +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[PV]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[Q]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP22]], double [[TMP23]], i32 4, i64 [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP21]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP26]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = fpext <2 x float> [[PV]] to <2 x double> +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP27]], <2 x double> [[TMP28]], <2 x double> [[TMP8]] +; CHECK-NEXT: store <2 x double> [[TMP29]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store <2 x float> [[PV]], ptr [[Q]], align 16 +; CHECK-NEXT: [[TMP30:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[P]], i64 2) +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[QV]], i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP16]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP31]], double [[TMP32]], i32 4, i64 [[TMP33]]) +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[QV]], i64 1 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[TMP16]], i64 1 +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP35]], double [[TMP36]], i32 4, i64 [[TMP37]]) +; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP34]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 1 +; CHECK-NEXT: [[TMP41:%.*]] = fpext <2 x float> [[QV]] to <2 x double> +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP40]], <2 x double> [[TMP41]], <2 x double> [[TMP16]] +; CHECK-NEXT: store <2 x double> [[TMP42]], ptr [[TMP30]], align 1 +; CHECK-NEXT: store <2 x float> [[QV]], ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + %pv = load <2 x float>, ptr %p + %qv = load <2 x float>, ptr %q + store <2 x float> %pv, ptr %q, align 16 + store <2 x float> %qv, ptr %p, align 16 + ret void +} + diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/non_float_store.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/non_float_store.ll new file mode 100644 index 00000000000000..0df502bf36fa0e --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/non_float_store.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dqq -nsan-propagate-non-ft-const-stores-as-ft -S %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +define void @store_non_float(ptr %dst) sanitize_numericalstability { +; CHECK-LABEL: @store_non_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST]], i64 1) +; CHECK-NEXT: store double 0x36F5000000000000, ptr [[TMP0]], align 1 +; CHECK-NEXT: ret void +; +entry: + store i32 42, ptr %dst, align 1 + ret void +} + +attributes #0 = { nounwind readonly uwtable sanitize_numericalstability "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/scalable_vector.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/scalable_vector.ll new file mode 100644 index 00000000000000..4c4919cde3355d --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/scalable_vector.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dqq -S %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +define void @add_scalable_vector( %a) sanitize_numericalstability { +; CHECK-LABEL: @add_scalable_vector( +; CHECK-NEXT: [[ADD:%.*]] = fadd [[A:%.*]], [[A]] +; CHECK-NEXT: ret void +; + %add = fadd %a, %a + ret void +} + +attributes #0 = { nounwind readonly uwtable sanitize_numericalstability "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +