diff options
Diffstat (limited to 'lib/Transforms/Instrumentation')
| -rw-r--r-- | lib/Transforms/Instrumentation/AddressSanitizer.cpp | 353 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/BoundsChecking.cpp | 264 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/CFGMST.h | 74 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 131 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/GCOVProfiling.cpp | 31 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/HWAddressSanitizer.cpp | 327 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/IndirectCallPromotion.cpp | 472 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/InstrProfiling.cpp | 4 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/Instrumentation.cpp | 3 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/MemorySanitizer.cpp | 938 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 308 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp | 42 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/SanitizerCoverage.cpp | 260 | 
14 files changed, 2023 insertions, 1185 deletions
| diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f8d255273b2a..8328d4031941 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1,4 +1,4 @@ -//===-- AddressSanitizer.cpp - memory error detector ------------*- C++ -*-===// +//===- AddressSanitizer.cpp - memory error detector -----------------------===//  //  //                     The LLVM Compiler Infrastructure  // @@ -9,59 +9,81 @@  //  // This file is a part of AddressSanitizer, an address sanity checker.  // Details of the algorithm: -//  http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm +//  https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm  //  //===----------------------------------------------------------------------===//  #include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h"  #include "llvm/ADT/SmallSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h"  #include "llvm/ADT/Triple.h"  #include "llvm/ADT/Twine.h"  #include "llvm/Analysis/MemoryBuiltins.h"  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/ValueTracking.h" +#include "llvm/BinaryFormat/MachO.h"  #include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CallSite.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h"  #include "llvm/IR/DIBuilder.h"  #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/InlineAsm.h"  #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h"  #include "llvm/IR/Module.h"  #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h"  #include "llvm/MC/MCSectionMachO.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h" -#include "llvm/Support/DataTypes.h"  #include "llvm/Support/Debug.h" -#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h"  #include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/SwapByteOrder.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Instrumentation.h" -#include "llvm/Transforms/Scalar.h"  #include "llvm/Transforms/Utils/ASanStackFrameLayout.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/ModuleUtils.h"  #include "llvm/Transforms/Utils/PromoteMemToReg.h"  #include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint>  #include <iomanip>  #include <limits> +#include <memory>  #include <sstream>  #include <string> -#include <system_error> +#include <tuple>  using namespace llvm; @@ -70,21 +92,25 @@ using namespace llvm;  static const uint64_t kDefaultShadowScale = 3;  static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;  static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; -static const uint64_t kDynamicShadowSentinel = ~(uint64_t)0; +static const uint64_t kDynamicShadowSentinel = +    std::numeric_limits<uint64_t>::max();  static const uint64_t kIOSShadowOffset32 = 1ULL << 30;  static const uint64_t kIOSSimShadowOffset32 = 1ULL << 30;  static const uint64_t kIOSSimShadowOffset64 = kDefaultShadowOffset64; -static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000;  // < 2G. +static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF;  // < 2G. +static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;  static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000; -static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41; +static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44;  static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52;  static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;  static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;  static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;  static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;  static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; +static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;  static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;  static const uint64_t kWindowsShadowOffset32 = 3ULL << 28; +  // The shadow memory space is dynamically allocated.  static const uint64_t kWindowsShadowOffset64 = kDynamicShadowSentinel; @@ -111,8 +137,8 @@ static const char *const kAsanUnregisterElfGlobalsName =  static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";  static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";  static const char *const kAsanInitName = "__asan_init"; -static const char *const kAsanVersionCheckName = -    "__asan_version_mismatch_check_v8"; +static const char *const kAsanVersionCheckNamePrefix = +    "__asan_version_mismatch_check_v";  static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";  static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";  static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return"; @@ -148,9 +174,11 @@ static const size_t kNumberOfAccessSizes = 5;  static const unsigned kAllocaRzSize = 32;  // Command-line flags. +  static cl::opt<bool> ClEnableKasan(      "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"),      cl::Hidden, cl::init(false)); +  static cl::opt<bool> ClRecover(      "asan-recover",      cl::desc("Enable recovery mode (continue-after-error)."), @@ -160,22 +188,38 @@ static cl::opt<bool> ClRecover(  static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",                                         cl::desc("instrument read instructions"),                                         cl::Hidden, cl::init(true)); +  static cl::opt<bool> ClInstrumentWrites(      "asan-instrument-writes", cl::desc("instrument write instructions"),      cl::Hidden, cl::init(true)); +  static cl::opt<bool> ClInstrumentAtomics(      "asan-instrument-atomics",      cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,      cl::init(true)); +  static cl::opt<bool> ClAlwaysSlowPath(      "asan-always-slow-path",      cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden,      cl::init(false)); +  static cl::opt<bool> ClForceDynamicShadow(      "asan-force-dynamic-shadow",      cl::desc("Load shadow address into a local variable for each function"),      cl::Hidden, cl::init(false)); +static cl::opt<bool> +    ClWithIfunc("asan-with-ifunc", +                cl::desc("Access dynamic shadow through an ifunc global on " +                         "platforms that support this"), +                cl::Hidden, cl::init(true)); + +static cl::opt<bool> ClWithIfuncSuppressRemat( +    "asan-with-ifunc-suppress-remat", +    cl::desc("Suppress rematerialization of dynamic shadow address by passing " +             "it through inline asm in prologue."), +    cl::Hidden, cl::init(true)); +  // This flag limits the number of instructions to be instrumented  // in any given BB. Normally, this should be set to unlimited (INT_MAX),  // but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary @@ -184,6 +228,7 @@ static cl::opt<int> ClMaxInsnsToInstrumentPerBB(      "asan-max-ins-per-bb", cl::init(10000),      cl::desc("maximal number of instructions to instrument in any given BB"),      cl::Hidden); +  // This flag may need to be replaced with -f[no]asan-stack.  static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"),                               cl::Hidden, cl::init(true)); @@ -192,32 +237,40 @@ static cl::opt<uint32_t> ClMaxInlinePoisoningSize(      cl::desc(          "Inline shadow poisoning for blocks up to the given size in bytes."),      cl::Hidden, cl::init(64)); +  static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",                                        cl::desc("Check stack-use-after-return"),                                        cl::Hidden, cl::init(true)); +  static cl::opt<bool> ClRedzoneByvalArgs("asan-redzone-byval-args",                                          cl::desc("Create redzones for byval "                                                   "arguments (extra copy "                                                   "required)"), cl::Hidden,                                          cl::init(true)); +  static cl::opt<bool> ClUseAfterScope("asan-use-after-scope",                                       cl::desc("Check stack-use-after-scope"),                                       cl::Hidden, cl::init(false)); +  // This flag may need to be replaced with -f[no]asan-globals.  static cl::opt<bool> ClGlobals("asan-globals",                                 cl::desc("Handle global objects"), cl::Hidden,                                 cl::init(true)); +  static cl::opt<bool> ClInitializers("asan-initialization-order",                                      cl::desc("Handle C++ initializer order"),                                      cl::Hidden, cl::init(true)); +  static cl::opt<bool> ClInvalidPointerPairs(      "asan-detect-invalid-pointer-pair",      cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden,      cl::init(false)); +  static cl::opt<unsigned> ClRealignStack(      "asan-realign-stack",      cl::desc("Realign stack to the value of this flag (power of two)"),      cl::Hidden, cl::init(32)); +  static cl::opt<int> ClInstrumentationWithCallsThreshold(      "asan-instrumentation-with-call-threshold",      cl::desc( @@ -225,14 +278,17 @@ static cl::opt<int> ClInstrumentationWithCallsThreshold(          "this number of memory accesses, use callbacks instead of "          "inline checks (-1 means never use callbacks)."),      cl::Hidden, cl::init(7000)); +  static cl::opt<std::string> ClMemoryAccessCallbackPrefix(      "asan-memory-access-callback-prefix",      cl::desc("Prefix for memory access callbacks"), cl::Hidden,      cl::init("__asan_")); +  static cl::opt<bool>      ClInstrumentDynamicAllocas("asan-instrument-dynamic-allocas",                                 cl::desc("instrument dynamic allocas"),                                 cl::Hidden, cl::init(true)); +  static cl::opt<bool> ClSkipPromotableAllocas(      "asan-skip-promotable-allocas",      cl::desc("Do not instrument promotable allocas"), cl::Hidden, @@ -241,9 +297,11 @@ static cl::opt<bool> ClSkipPromotableAllocas(  // These flags allow to change the shadow mapping.  // The shadow mapping looks like  //    Shadow = (Mem >> scale) + offset +  static cl::opt<int> ClMappingScale("asan-mapping-scale",                                     cl::desc("scale of asan shadow mapping"),                                     cl::Hidden, cl::init(0)); +  static cl::opt<unsigned long long> ClMappingOffset(      "asan-mapping-offset",      cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"), cl::Hidden, @@ -251,14 +309,18 @@ static cl::opt<unsigned long long> ClMappingOffset(  // Optimization flags. Not user visible, used mostly for testing  // and benchmarking the tool. +  static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"),                             cl::Hidden, cl::init(true)); +  static cl::opt<bool> ClOptSameTemp(      "asan-opt-same-temp", cl::desc("Instrument the same temp just once"),      cl::Hidden, cl::init(true)); +  static cl::opt<bool> ClOptGlobals("asan-opt-globals",                                    cl::desc("Don't instrument scalar globals"),                                    cl::Hidden, cl::init(true)); +  static cl::opt<bool> ClOptStack(      "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"),      cl::Hidden, cl::init(false)); @@ -293,14 +355,19 @@ static cl::opt<bool>                   cl::Hidden, cl::init(true));  // Debug flags. +  static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,                              cl::init(0)); +  static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),                                   cl::Hidden, cl::init(0)); +  static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden,                                          cl::desc("Debug func")); +  static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),                                 cl::Hidden, cl::init(-1)); +  static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug max inst"),                                 cl::Hidden, cl::init(-1)); @@ -312,13 +379,14 @@ STATISTIC(NumOptimizedAccessesToStackVar,            "Number of optimized accesses to stack vars");  namespace { +  /// Frontend-provided metadata for source location.  struct LocationMetadata {    StringRef Filename; -  int LineNo; -  int ColumnNo; +  int LineNo = 0; +  int ColumnNo = 0; -  LocationMetadata() : Filename(), LineNo(0), ColumnNo(0) {} +  LocationMetadata() = default;    bool empty() const { return Filename.empty(); } @@ -335,16 +403,17 @@ struct LocationMetadata {  /// Frontend-provided metadata for global variables.  class GlobalsMetadata { - public: +public:    struct Entry { -    Entry() : SourceLoc(), Name(), IsDynInit(false), IsBlacklisted(false) {}      LocationMetadata SourceLoc;      StringRef Name; -    bool IsDynInit; -    bool IsBlacklisted; +    bool IsDynInit = false; +    bool IsBlacklisted = false; + +    Entry() = default;    }; -  GlobalsMetadata() : inited_(false) {} +  GlobalsMetadata() = default;    void reset() {      inited_ = false; @@ -384,46 +453,57 @@ class GlobalsMetadata {      return (Pos != Entries.end()) ? Pos->second : Entry();    } - private: -  bool inited_; +private: +  bool inited_ = false;    DenseMap<GlobalVariable *, Entry> Entries;  };  /// This struct defines the shadow mapping using the rule:  ///   shadow = (mem >> Scale) ADD-or-OR Offset. +/// If InGlobal is true, then +///   extern char __asan_shadow[]; +///   shadow = (mem >> Scale) + &__asan_shadow  struct ShadowMapping {    int Scale;    uint64_t Offset;    bool OrShadowOffset; +  bool InGlobal;  }; +} // end anonymous namespace +  static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,                                        bool IsKasan) {    bool IsAndroid = TargetTriple.isAndroid();    bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS();    bool IsFreeBSD = TargetTriple.isOSFreeBSD(); +  bool IsNetBSD = TargetTriple.isOSNetBSD();    bool IsPS4CPU = TargetTriple.isPS4CPU();    bool IsLinux = TargetTriple.isOSLinux(); -  bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 || -                 TargetTriple.getArch() == llvm::Triple::ppc64le; -  bool IsSystemZ = TargetTriple.getArch() == llvm::Triple::systemz; -  bool IsX86 = TargetTriple.getArch() == llvm::Triple::x86; -  bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; -  bool IsMIPS32 = TargetTriple.getArch() == llvm::Triple::mips || -                  TargetTriple.getArch() == llvm::Triple::mipsel; -  bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || -                  TargetTriple.getArch() == llvm::Triple::mips64el; -  bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64; +  bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 || +                 TargetTriple.getArch() == Triple::ppc64le; +  bool IsSystemZ = TargetTriple.getArch() == Triple::systemz; +  bool IsX86 = TargetTriple.getArch() == Triple::x86; +  bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; +  bool IsMIPS32 = TargetTriple.getArch() == Triple::mips || +                  TargetTriple.getArch() == Triple::mipsel; +  bool IsMIPS64 = TargetTriple.getArch() == Triple::mips64 || +                  TargetTriple.getArch() == Triple::mips64el; +  bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); +  bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64;    bool IsWindows = TargetTriple.isOSWindows();    bool IsFuchsia = TargetTriple.isOSFuchsia();    ShadowMapping Mapping; +  Mapping.Scale = kDefaultShadowScale; +  if (ClMappingScale.getNumOccurrences() > 0) { +    Mapping.Scale = ClMappingScale; +  } +    if (LongSize == 32) { -    // Android is always PIE, which means that the beginning of the address -    // space is always available.      if (IsAndroid) -      Mapping.Offset = 0; +      Mapping.Offset = kDynamicShadowSentinel;      else if (IsMIPS32)        Mapping.Offset = kMIPS32_ShadowOffset32;      else if (IsFreeBSD) @@ -446,13 +526,16 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,        Mapping.Offset = kSystemZ_ShadowOffset64;      else if (IsFreeBSD)        Mapping.Offset = kFreeBSD_ShadowOffset64; +    else if (IsNetBSD) +      Mapping.Offset = kNetBSD_ShadowOffset64;      else if (IsPS4CPU)        Mapping.Offset = kPS4CPU_ShadowOffset64;      else if (IsLinux && IsX86_64) {        if (IsKasan)          Mapping.Offset = kLinuxKasan_ShadowOffset64;        else -        Mapping.Offset = kSmallX86_64ShadowOffset; +        Mapping.Offset = (kSmallX86_64ShadowOffsetBase & +                          (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale));      } else if (IsWindows && IsX86_64) {        Mapping.Offset = kWindowsShadowOffset64;      } else if (IsMIPS64) @@ -472,11 +555,6 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,      Mapping.Offset = kDynamicShadowSentinel;    } -  Mapping.Scale = kDefaultShadowScale; -  if (ClMappingScale.getNumOccurrences() > 0) { -    Mapping.Scale = ClMappingScale; -  } -    if (ClMappingOffset.getNumOccurrences() > 0) {      Mapping.Offset = ClMappingOffset;    } @@ -489,6 +567,9 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,    Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU &&                             !(Mapping.Offset & (Mapping.Offset - 1)) &&                             Mapping.Offset != kDynamicShadowSentinel; +  bool IsAndroidWithIfuncSupport = +      IsAndroid && !TargetTriple.isAndroidVersionLT(21); +  Mapping.InGlobal = ClWithIfunc && IsAndroidWithIfuncSupport && IsArmOrThumb;    return Mapping;  } @@ -499,23 +580,30 @@ static size_t RedzoneSizeForScale(int MappingScale) {    return std::max(32U, 1U << MappingScale);  } +namespace { +  /// AddressSanitizer: instrument the code in module to find memory bugs.  struct AddressSanitizer : public FunctionPass { +  // Pass identification, replacement for typeid +  static char ID; +    explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false,                              bool UseAfterScope = false)        : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan),          Recover(Recover || ClRecover), -        UseAfterScope(UseAfterScope || ClUseAfterScope), -        LocalDynamicShadow(nullptr) { +        UseAfterScope(UseAfterScope || ClUseAfterScope) {      initializeAddressSanitizerPass(*PassRegistry::getPassRegistry());    } +    StringRef getPassName() const override {      return "AddressSanitizerFunctionPass";    } +    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<DominatorTreeWrapperPass>();      AU.addRequired<TargetLibraryInfoWrapperPass>();    } +    uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const {      uint64_t ArraySize = 1;      if (AI.isArrayAllocation()) { @@ -528,6 +616,7 @@ struct AddressSanitizer : public FunctionPass {          AI.getModule()->getDataLayout().getTypeAllocSize(Ty);      return SizeInBytes * ArraySize;    } +    /// Check if we want (and can) handle this alloca.    bool isInterestingAlloca(const AllocaInst &AI); @@ -538,6 +627,7 @@ struct AddressSanitizer : public FunctionPass {    Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,                                     uint64_t *TypeSize, unsigned *Alignment,                                     Value **MaybeMask = nullptr); +    void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,                       bool UseCalls, const DataLayout &DL);    void instrumentPointerComparisonOrSubtraction(Instruction *I); @@ -562,11 +652,12 @@ struct AddressSanitizer : public FunctionPass {    void markEscapedLocalAllocas(Function &F);    bool doInitialization(Module &M) override;    bool doFinalization(Module &M) override; -  static char ID;  // Pass identification, replacement for typeid    DominatorTree &getDominatorTree() const { return *DT; } - private: +private: +  friend struct FunctionStackPoisoner; +    void initializeCallbacks(Module &M);    bool LooksLikeCodeInBug11395(Instruction *I); @@ -577,11 +668,13 @@ struct AddressSanitizer : public FunctionPass {    /// Helper to cleanup per-function state.    struct FunctionStateRAII {      AddressSanitizer *Pass; +      FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) {        assert(Pass->ProcessedAllocas.empty() &&               "last pass forgot to clear cache");        assert(!Pass->LocalDynamicShadow);      } +      ~FunctionStateRAII() {        Pass->LocalDynamicShadow = nullptr;        Pass->ProcessedAllocas.clear(); @@ -599,23 +692,28 @@ struct AddressSanitizer : public FunctionPass {    DominatorTree *DT;    Function *AsanHandleNoReturnFunc;    Function *AsanPtrCmpFunction, *AsanPtrSubFunction; -  // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize). +  Constant *AsanShadowGlobal; + +  // These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize).    Function *AsanErrorCallback[2][2][kNumberOfAccessSizes];    Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes]; -  // This array is indexed by AccessIsWrite and Experiment. + +  // These arrays is indexed by AccessIsWrite and Experiment.    Function *AsanErrorCallbackSized[2][2];    Function *AsanMemoryAccessCallbackSized[2][2]; +    Function *AsanMemmove, *AsanMemcpy, *AsanMemset;    InlineAsm *EmptyAsm; -  Value *LocalDynamicShadow; +  Value *LocalDynamicShadow = nullptr;    GlobalsMetadata GlobalsMD;    DenseMap<const AllocaInst *, bool> ProcessedAllocas; - -  friend struct FunctionStackPoisoner;  };  class AddressSanitizerModule : public ModulePass {  public: +  // Pass identification, replacement for typeid +  static char ID; +    explicit AddressSanitizerModule(bool CompileKernel = false,                                    bool Recover = false,                                    bool UseGlobalsGC = true) @@ -630,8 +728,8 @@ public:          // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to          // do globals-gc.          UseCtorComdat(UseGlobalsGC && ClWithComdat) {} +    bool runOnModule(Module &M) override; -  static char ID; // Pass identification, replacement for typeid    StringRef getPassName() const override { return "AddressSanitizerModule"; }  private: @@ -667,6 +765,7 @@ private:    size_t MinRedzoneSizeForGlobal() const {      return RedzoneSizeForScale(Mapping.Scale);    } +  int GetAsanVersion(const Module &M) const;    GlobalsMetadata GlobalsMD;    bool CompileKernel; @@ -735,7 +834,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {    IntrinsicInst *LocalEscapeCall = nullptr;    // Maps Value to an AllocaInst from which the Value is originated. -  typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy; +  using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>;    AllocaForValueMapTy AllocaForValue;    bool HasNonEmptyInlineAsm = false; @@ -756,7 +855,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {    bool runOnFunction() {      if (!ClStack) return false; -    if (ClRedzoneByvalArgs && Mapping.Offset != kDynamicShadowSentinel) +    if (ClRedzoneByvalArgs)        copyArgsPassedByValToAllocas();      // Collect alloca, ret, lifetime instructions etc. @@ -899,8 +998,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {    void visitCallSite(CallSite CS) {      Instruction *I = CS.getInstruction();      if (CallInst *CI = dyn_cast<CallInst>(I)) { -      HasNonEmptyInlineAsm |= -          CI->isInlineAsm() && !CI->isIdenticalTo(EmptyInlineAsm.get()); +      HasNonEmptyInlineAsm |= CI->isInlineAsm() && +                              !CI->isIdenticalTo(EmptyInlineAsm.get()) && +                              I != ASan.LocalDynamicShadow;        HasReturnsTwiceCall |= CI->canReturnTwice();      }    } @@ -938,9 +1038,10 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {                       Instruction *ThenTerm, Value *ValueIfFalse);  }; -} // anonymous namespace +} // end anonymous namespace  char AddressSanitizer::ID = 0; +  INITIALIZE_PASS_BEGIN(      AddressSanitizer, "asan",      "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, @@ -951,6 +1052,7 @@ INITIALIZE_PASS_END(      AddressSanitizer, "asan",      "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,      false) +  FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,                                                         bool Recover,                                                         bool UseAfterScope) { @@ -959,11 +1061,13 @@ FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,  }  char AddressSanitizerModule::ID = 0; +  INITIALIZE_PASS(      AddressSanitizerModule, "asan-module",      "AddressSanitizer: detects use-after-free and out-of-bounds bugs."      "ModulePass",      false, false) +  ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,                                                     bool Recover,                                                     bool UseGlobalsGC) { @@ -1544,7 +1648,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {      // Callbacks put into the CRT initializer/terminator sections      // should not be instrumented. -    // See https://code.google.com/p/address-sanitizer/issues/detail?id=305 +    // See https://github.com/google/sanitizers/issues/305      // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx      if (Section.startswith(".CRT")) {        DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n"); @@ -1567,7 +1671,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {          DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n");          return false;        } -      // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 +      // See https://github.com/google/sanitizers/issues/32        // Constant CFString instances are compiled in the following way:        //  -- the string buffer is emitted into        //     __TEXT,__cstring,cstring_literals @@ -1683,9 +1787,14 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata(        C = M.getOrInsertComdat(G->getName());      } -    // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. -    if (TargetTriple.isOSBinFormatCOFF()) +    // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. Also upgrade private +    // linkage to internal linkage so that a symbol table entry is emitted. This +    // is necessary in order to create the comdat group. +    if (TargetTriple.isOSBinFormatCOFF()) {        C->setSelectionKind(Comdat::NoDuplicates); +      if (G->hasPrivateLinkage()) +        G->setLinkage(GlobalValue::InternalLinkage); +    }      G->setComdat(C);    } @@ -1871,6 +1980,8 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(    auto AllGlobals = new GlobalVariable(        M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage,        ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), ""); +  if (Mapping.Scale > 3) +    AllGlobals->setAlignment(1ULL << Mapping.Scale);    IRB.CreateCall(AsanRegisterGlobals,                   {IRB.CreatePointerCast(AllGlobals, IntptrTy), @@ -2070,6 +2181,16 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool    return true;  } +int AddressSanitizerModule::GetAsanVersion(const Module &M) const { +  int LongSize = M.getDataLayout().getPointerSizeInBits(); +  bool isAndroid = Triple(M.getTargetTriple()).isAndroid(); +  int Version = 8; +  // 32-bit Android is one version ahead because of the switch to dynamic +  // shadow. +  Version += (LongSize == 32 && isAndroid); +  return Version; +} +  bool AddressSanitizerModule::runOnModule(Module &M) {    C = &(M.getContext());    int LongSize = M.getDataLayout().getPointerSizeInBits(); @@ -2083,9 +2204,11 @@ bool AddressSanitizerModule::runOnModule(Module &M) {    // Create a module constructor. A destructor is created lazily because not all    // platforms, and not all modules need it. +  std::string VersionCheckName = +      kAsanVersionCheckNamePrefix + std::to_string(GetAsanVersion(M));    std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(        M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{}, -      /*InitArgs=*/{}, kAsanVersionCheckName); +      /*InitArgs=*/{}, VersionCheckName);    bool CtorComdat = true;    bool Changed = false; @@ -2134,31 +2257,31 @@ void AddressSanitizer::initializeCallbacks(Module &M) {          Args2.push_back(ExpType);          Args1.push_back(ExpType);        } -	    AsanErrorCallbackSized[AccessIsWrite][Exp] = -	        checkSanitizerInterfaceFunction(M.getOrInsertFunction( -	            kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + -	                EndingStr, -	            FunctionType::get(IRB.getVoidTy(), Args2, false))); - -	    AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = -	        checkSanitizerInterfaceFunction(M.getOrInsertFunction( -	            ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr, -	            FunctionType::get(IRB.getVoidTy(), Args2, false))); - -	    for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; -	         AccessSizeIndex++) { -	      const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex); -	      AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = -	          checkSanitizerInterfaceFunction(M.getOrInsertFunction( -	              kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr, -	              FunctionType::get(IRB.getVoidTy(), Args1, false))); - -	      AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = -	          checkSanitizerInterfaceFunction(M.getOrInsertFunction( -	              ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr, -	              FunctionType::get(IRB.getVoidTy(), Args1, false))); -	    } -	  } +      AsanErrorCallbackSized[AccessIsWrite][Exp] = +          checkSanitizerInterfaceFunction(M.getOrInsertFunction( +              kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + +                  EndingStr, +              FunctionType::get(IRB.getVoidTy(), Args2, false))); + +      AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = +          checkSanitizerInterfaceFunction(M.getOrInsertFunction( +              ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr, +              FunctionType::get(IRB.getVoidTy(), Args2, false))); + +      for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; +           AccessSizeIndex++) { +        const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex); +        AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = +            checkSanitizerInterfaceFunction(M.getOrInsertFunction( +                kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr, +                FunctionType::get(IRB.getVoidTy(), Args1, false))); + +        AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = +            checkSanitizerInterfaceFunction(M.getOrInsertFunction( +                ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr, +                FunctionType::get(IRB.getVoidTy(), Args1, false))); +      } +    }    }    const std::string MemIntrinCallbackPrefix = @@ -2184,6 +2307,9 @@ void AddressSanitizer::initializeCallbacks(Module &M) {    EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),                              StringRef(""), StringRef(""),                              /*hasSideEffects=*/true); +  if (Mapping.InGlobal) +    AsanShadowGlobal = M.getOrInsertGlobal("__asan_shadow", +                                           ArrayType::get(IRB.getInt8Ty(), 0));  }  // virtual @@ -2229,9 +2355,25 @@ void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {      return;    IRBuilder<> IRB(&F.front().front()); -  Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( -      kAsanShadowMemoryDynamicAddress, IntptrTy); -  LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress); +  if (Mapping.InGlobal) { +    if (ClWithIfuncSuppressRemat) { +      // An empty inline asm with input reg == output reg. +      // An opaque pointer-to-int cast, basically. +      InlineAsm *Asm = InlineAsm::get( +          FunctionType::get(IntptrTy, {AsanShadowGlobal->getType()}, false), +          StringRef(""), StringRef("=r,0"), +          /*hasSideEffects=*/false); +      LocalDynamicShadow = +          IRB.CreateCall(Asm, {AsanShadowGlobal}, ".asan.shadow"); +    } else { +      LocalDynamicShadow = +          IRB.CreatePointerCast(AsanShadowGlobal, IntptrTy, ".asan.shadow"); +    } +  } else { +    Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( +        kAsanShadowMemoryDynamicAddress, IntptrTy); +    LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress); +  }  }  void AddressSanitizer::markEscapedLocalAllocas(Function &F) { @@ -2378,7 +2520,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {    bool ChangedStack = FSP.runOnFunction();    // We must unpoison the stack before every NoReturn call (throw, _exit, etc). -  // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37 +  // See e.g. https://github.com/google/sanitizers/issues/37    for (auto CI : NoReturnCalls) {      IRBuilder<> IRB(CI);      IRB.CreateCall(AsanHandleNoReturnFunc, {}); @@ -2546,8 +2688,13 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) {  }  void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { -  BasicBlock &FirstBB = *F.begin(); -  IRBuilder<> IRB(&FirstBB, FirstBB.getFirstInsertionPt()); +  Instruction *CopyInsertPoint = &F.front().front(); +  if (CopyInsertPoint == ASan.LocalDynamicShadow) { +    // Insert after the dynamic shadow location is determined +    CopyInsertPoint = CopyInsertPoint->getNextNode(); +    assert(CopyInsertPoint); +  } +  IRBuilder<> IRB(CopyInsertPoint);    const DataLayout &DL = F.getParent()->getDataLayout();    for (Argument &Arg : F.args()) {      if (Arg.hasByValAttr()) { @@ -2674,9 +2821,10 @@ void FunctionStackPoisoner::processStaticAllocas() {    // Minimal header size (left redzone) is 4 pointers,    // i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms. -  size_t MinHeaderSize = ASan.LongSize / 2; +  size_t Granularity = 1ULL << Mapping.Scale; +  size_t MinHeaderSize = std::max((size_t)ASan.LongSize / 2, Granularity);    const ASanStackFrameLayout &L = -      ComputeASanStackFrameLayout(SVD, 1ULL << Mapping.Scale, MinHeaderSize); +      ComputeASanStackFrameLayout(SVD, Granularity, MinHeaderSize);    // Build AllocaToSVDMap for ASanStackVariableDescription lookup.    DenseMap<const AllocaInst *, ASanStackVariableDescription *> AllocaToSVDMap; @@ -2721,8 +2869,12 @@ void FunctionStackPoisoner::processStaticAllocas() {    Value *FakeStack;    Value *LocalStackBase; +  Value *LocalStackBaseAlloca; +  bool Deref;    if (DoStackMalloc) { +    LocalStackBaseAlloca = +        IRB.CreateAlloca(IntptrTy, nullptr, "asan_local_stack_base");      // void *FakeStack = __asan_option_detect_stack_use_after_return      //     ? __asan_stack_malloc_N(LocalStackSize)      //     : nullptr; @@ -2753,24 +2905,31 @@ void FunctionStackPoisoner::processStaticAllocas() {      IRBIf.SetCurrentDebugLocation(EntryDebugLocation);      Value *AllocaValue =          DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca; +      IRB.SetInsertPoint(InsBefore);      IRB.SetCurrentDebugLocation(EntryDebugLocation);      LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack); +    IRB.SetCurrentDebugLocation(EntryDebugLocation); +    IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca); +    Deref = true;    } else {      // void *FakeStack = nullptr;      // void *LocalStackBase = alloca(LocalStackSize);      FakeStack = ConstantInt::get(IntptrTy, 0);      LocalStackBase =          DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca; +    LocalStackBaseAlloca = LocalStackBase; +    Deref = false;    }    // Replace Alloca instructions with base+offset.    for (const auto &Desc : SVD) {      AllocaInst *AI = Desc.AI; +    replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, Deref, +                               Desc.Offset, DIExpression::NoDeref);      Value *NewAllocaPtr = IRB.CreateIntToPtr(          IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),          AI->getType()); -    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref);      AI->replaceAllUsesWith(NewAllocaPtr);    } diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index a193efe902cf..be9a22a8681b 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -6,25 +6,33 @@  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===// -// -// This file implements a pass that instruments the code to perform run-time -// bounds checking on loads, stores, and other memory intrinsics. -// -//===----------------------------------------------------------------------===// +#include "llvm/Transforms/Instrumentation/BoundsChecking.h"  #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Twine.h"  #include "llvm/Analysis/MemoryBuiltins.h"  #include "llvm/Analysis/TargetFolder.h"  #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h"  #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Value.h"  #include "llvm/Pass.h" +#include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Instrumentation.h" +#include <cstdint> +#include <vector> +  using namespace llvm;  #define DEBUG_TYPE "bounds-checking" @@ -36,102 +44,30 @@ STATISTIC(ChecksAdded, "Bounds checks added");  STATISTIC(ChecksSkipped, "Bounds checks skipped");  STATISTIC(ChecksUnable, "Bounds checks unable to add"); -typedef IRBuilder<TargetFolder> BuilderTy; - -namespace { -  struct BoundsChecking : public FunctionPass { -    static char ID; - -    BoundsChecking() : FunctionPass(ID) { -      initializeBoundsCheckingPass(*PassRegistry::getPassRegistry()); -    } - -    bool runOnFunction(Function &F) override; - -    void getAnalysisUsage(AnalysisUsage &AU) const override { -      AU.addRequired<TargetLibraryInfoWrapperPass>(); -    } - -  private: -    const TargetLibraryInfo *TLI; -    ObjectSizeOffsetEvaluator *ObjSizeEval; -    BuilderTy *Builder; -    Instruction *Inst; -    BasicBlock *TrapBB; - -    BasicBlock *getTrapBB(); -    void emitBranchToTrap(Value *Cmp = nullptr); -    bool instrument(Value *Ptr, Value *Val, const DataLayout &DL); - }; -} - -char BoundsChecking::ID = 0; -INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking", -                false, false) - - -/// getTrapBB - create a basic block that traps. All overflowing conditions -/// branch to this block. There's only one trap block per function. -BasicBlock *BoundsChecking::getTrapBB() { -  if (TrapBB && SingleTrapBB) -    return TrapBB; - -  Function *Fn = Inst->getParent()->getParent(); -  IRBuilder<>::InsertPointGuard Guard(*Builder); -  TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn); -  Builder->SetInsertPoint(TrapBB); - -  llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap); -  CallInst *TrapCall = Builder->CreateCall(F, {}); -  TrapCall->setDoesNotReturn(); -  TrapCall->setDoesNotThrow(); -  TrapCall->setDebugLoc(Inst->getDebugLoc()); -  Builder->CreateUnreachable(); - -  return TrapBB; -} - - -/// emitBranchToTrap - emit a branch instruction to a trap block. -/// If Cmp is non-null, perform a jump only if its value evaluates to true. -void BoundsChecking::emitBranchToTrap(Value *Cmp) { -  // check if the comparison is always false -  ConstantInt *C = dyn_cast_or_null<ConstantInt>(Cmp); -  if (C) { -    ++ChecksSkipped; -    if (!C->getZExtValue()) -      return; -    else -      Cmp = nullptr; // unconditional branch -  } -  ++ChecksAdded; - -  BasicBlock::iterator Inst = Builder->GetInsertPoint(); -  BasicBlock *OldBB = Inst->getParent(); -  BasicBlock *Cont = OldBB->splitBasicBlock(Inst); -  OldBB->getTerminator()->eraseFromParent(); - -  if (Cmp) -    BranchInst::Create(getTrapBB(), Cont, Cmp, OldBB); -  else -    BranchInst::Create(getTrapBB(), OldBB); -} - +using BuilderTy = IRBuilder<TargetFolder>; -/// instrument - adds run-time bounds checks to memory accessing instructions. -/// Ptr is the pointer that will be read/written, and InstVal is either the -/// result from the load or the value being stored. It is used to determine the -/// size of memory block that is touched. +/// Adds run-time bounds checks to memory accessing instructions. +/// +/// \p Ptr is the pointer that will be read/written, and \p InstVal is either +/// the result from the load or the value being stored. It is used to determine +/// the size of memory block that is touched. +/// +/// \p GetTrapBB is a callable that returns the trap BB to use on failure. +///  /// Returns true if any change was made to the IR, false otherwise. -bool BoundsChecking::instrument(Value *Ptr, Value *InstVal, -                                const DataLayout &DL) { +template <typename GetTrapBBT> +static bool instrumentMemAccess(Value *Ptr, Value *InstVal, +                                const DataLayout &DL, TargetLibraryInfo &TLI, +                                ObjectSizeOffsetEvaluator &ObjSizeEval, +                                BuilderTy &IRB, +                                GetTrapBBT GetTrapBB) {    uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());    DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)                << " bytes\n"); -  SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Ptr); +  SizeOffsetEvalType SizeOffset = ObjSizeEval.compute(Ptr); -  if (!ObjSizeEval->bothKnown(SizeOffset)) { +  if (!ObjSizeEval.bothKnown(SizeOffset)) {      ++ChecksUnable;      return false;    } @@ -150,56 +86,101 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal,    //    // optimization: if Size >= 0 (signed), skip 1st check    // FIXME: add NSW/NUW here?  -- we dont care if the subtraction overflows -  Value *ObjSize = Builder->CreateSub(Size, Offset); -  Value *Cmp2 = Builder->CreateICmpULT(Size, Offset); -  Value *Cmp3 = Builder->CreateICmpULT(ObjSize, NeededSizeVal); -  Value *Or = Builder->CreateOr(Cmp2, Cmp3); +  Value *ObjSize = IRB.CreateSub(Size, Offset); +  Value *Cmp2 = IRB.CreateICmpULT(Size, Offset); +  Value *Cmp3 = IRB.CreateICmpULT(ObjSize, NeededSizeVal); +  Value *Or = IRB.CreateOr(Cmp2, Cmp3);    if (!SizeCI || SizeCI->getValue().slt(0)) { -    Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); -    Or = Builder->CreateOr(Cmp1, Or); +    Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); +    Or = IRB.CreateOr(Cmp1, Or); +  } + +  // check if the comparison is always false +  ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or); +  if (C) { +    ++ChecksSkipped; +    // If non-zero, nothing to do. +    if (!C->getZExtValue()) +      return true; +  } +  ++ChecksAdded; + +  BasicBlock::iterator SplitI = IRB.GetInsertPoint(); +  BasicBlock *OldBB = SplitI->getParent(); +  BasicBlock *Cont = OldBB->splitBasicBlock(SplitI); +  OldBB->getTerminator()->eraseFromParent(); + +  if (C) { +    // If we have a constant zero, unconditionally branch. +    // FIXME: We should really handle this differently to bypass the splitting +    // the block. +    BranchInst::Create(GetTrapBB(IRB), OldBB); +    return true;    } -  emitBranchToTrap(Or); +  // Create the conditional branch. +  BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB);    return true;  } -bool BoundsChecking::runOnFunction(Function &F) { +static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI) {    const DataLayout &DL = F.getParent()->getDataLayout(); -  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - -  TrapBB = nullptr; -  BuilderTy TheBuilder(F.getContext(), TargetFolder(DL)); -  Builder = &TheBuilder; -  ObjectSizeOffsetEvaluator TheObjSizeEval(DL, TLI, F.getContext(), +  ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(),                                             /*RoundToAlign=*/true); -  ObjSizeEval = &TheObjSizeEval;    // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory    // touching instructions -  std::vector<Instruction*> WorkList; -  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { -    Instruction *I = &*i; +  std::vector<Instruction *> WorkList; +  for (Instruction &I : instructions(F)) {      if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicCmpXchgInst>(I) ||          isa<AtomicRMWInst>(I)) -        WorkList.push_back(I); +        WorkList.push_back(&I);    } -  bool MadeChange = false; -  for (Instruction *i : WorkList) { -    Inst = i; +  // Create a trapping basic block on demand using a callback. Depending on +  // flags, this will either create a single block for the entire function or +  // will create a fresh block every time it is called. +  BasicBlock *TrapBB = nullptr; +  auto GetTrapBB = [&TrapBB](BuilderTy &IRB) { +    if (TrapBB && SingleTrapBB) +      return TrapBB; + +    Function *Fn = IRB.GetInsertBlock()->getParent(); +    // FIXME: This debug location doesn't make a lot of sense in the +    // `SingleTrapBB` case. +    auto DebugLoc = IRB.getCurrentDebugLocation(); +    IRBuilder<>::InsertPointGuard Guard(IRB); +    TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn); +    IRB.SetInsertPoint(TrapBB); + +    auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap); +    CallInst *TrapCall = IRB.CreateCall(F, {}); +    TrapCall->setDoesNotReturn(); +    TrapCall->setDoesNotThrow(); +    TrapCall->setDebugLoc(DebugLoc); +    IRB.CreateUnreachable(); + +    return TrapBB; +  }; -    Builder->SetInsertPoint(Inst); +  bool MadeChange = false; +  for (Instruction *Inst : WorkList) { +    BuilderTy IRB(Inst->getParent(), BasicBlock::iterator(Inst), TargetFolder(DL));      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { -      MadeChange |= instrument(LI->getPointerOperand(), LI, DL); +      MadeChange |= instrumentMemAccess(LI->getPointerOperand(), LI, DL, TLI, +                                        ObjSizeEval, IRB, GetTrapBB);      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {        MadeChange |= -          instrument(SI->getPointerOperand(), SI->getValueOperand(), DL); +          instrumentMemAccess(SI->getPointerOperand(), SI->getValueOperand(), +                              DL, TLI, ObjSizeEval, IRB, GetTrapBB);      } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {        MadeChange |= -          instrument(AI->getPointerOperand(), AI->getCompareOperand(), DL); +          instrumentMemAccess(AI->getPointerOperand(), AI->getCompareOperand(), +                              DL, TLI, ObjSizeEval, IRB, GetTrapBB);      } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) {        MadeChange |= -          instrument(AI->getPointerOperand(), AI->getValOperand(), DL); +          instrumentMemAccess(AI->getPointerOperand(), AI->getValOperand(), DL, +                              TLI, ObjSizeEval, IRB, GetTrapBB);      } else {        llvm_unreachable("unknown Instruction type");      } @@ -207,6 +188,41 @@ bool BoundsChecking::runOnFunction(Function &F) {    return MadeChange;  } -FunctionPass *llvm::createBoundsCheckingPass() { -  return new BoundsChecking(); +PreservedAnalyses BoundsCheckingPass::run(Function &F, FunctionAnalysisManager &AM) { +  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + +  if (!addBoundsChecking(F, TLI)) +    return PreservedAnalyses::all(); + +  return PreservedAnalyses::none(); +} + +namespace { +struct BoundsCheckingLegacyPass : public FunctionPass { +  static char ID; + +  BoundsCheckingLegacyPass() : FunctionPass(ID) { +    initializeBoundsCheckingLegacyPassPass(*PassRegistry::getPassRegistry()); +  } + +  bool runOnFunction(Function &F) override { +    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); +    return addBoundsChecking(F, TLI); +  } + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addRequired<TargetLibraryInfoWrapperPass>(); +  } +}; +} // namespace + +char BoundsCheckingLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(BoundsCheckingLegacyPass, "bounds-checking", +                      "Run-time bounds checking", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(BoundsCheckingLegacyPass, "bounds-checking", +                    "Run-time bounds checking", false, false) + +FunctionPass *llvm::createBoundsCheckingLegacyPass() { +  return new BoundsCheckingLegacyPass();  } diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h index 16e2e6b4e730..075e5672cff8 100644 --- a/lib/Transforms/Instrumentation/CFGMST.h +++ b/lib/Transforms/Instrumentation/CFGMST.h @@ -46,6 +46,10 @@ public:    // This map records the auxiliary information for each BB.    DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos; +  // Whehter the function has an exit block with no successors. +  // (For function with an infinite loop, this block may be absent) +  bool ExitBlockFound = false; +    // Find the root group of the G and compress the path from G to the root.    BBInfo *findAndCompressGroup(BBInfo *G) {      if (G->Group != G) @@ -95,14 +99,20 @@ public:    void buildEdges() {      DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); -    const BasicBlock *BB = &(F.getEntryBlock()); +    const BasicBlock *Entry = &(F.getEntryBlock());      uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); +    Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, +        *ExitOutgoing = nullptr, *ExitIncoming = nullptr; +    uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; +      // Add a fake edge to the entry. -    addEdge(nullptr, BB, EntryWeight); +    EntryIncoming = &addEdge(nullptr, Entry, EntryWeight); +    DEBUG(dbgs() << "  Edge: from fake node to " << Entry->getName() +                     << " w = " << EntryWeight << "\n");      // Special handling for single BB functions. -    if (succ_empty(BB)) { -      addEdge(BB, nullptr, EntryWeight); +    if (succ_empty(Entry)) { +      addEdge(Entry, nullptr, EntryWeight);        return;      } @@ -126,16 +136,62 @@ public:            }            if (BPI != nullptr)              Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); -          addEdge(&*BB, TargetBB, Weight).IsCritical = Critical; +          auto *E = &addEdge(&*BB, TargetBB, Weight); +          E->IsCritical = Critical;            DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to "                         << TargetBB->getName() << "  w=" << Weight << "\n"); + +          // Keep track of entry/exit edges: +          if (&*BB == Entry) { +            if (Weight > MaxEntryOutWeight) { +              MaxEntryOutWeight = Weight; +              EntryOutgoing = E; +            } +          } + +          auto *TargetTI = TargetBB->getTerminator(); +          if (TargetTI && !TargetTI->getNumSuccessors()) { +            if (Weight > MaxExitInWeight) { +              MaxExitInWeight = Weight; +              ExitIncoming = E; +            } +          }          }        } else { -        addEdge(&*BB, nullptr, BBWeight); -        DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to exit" +        ExitBlockFound = true; +        Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight); +        if (BBWeight > MaxExitOutWeight) { +          MaxExitOutWeight = BBWeight; +          ExitOutgoing = ExitO; +        } +        DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to fake exit"                       << " w = " << BBWeight << "\n");        }      } + +    // Entry/exit edge adjustment heurisitic: +    // prefer instrumenting entry edge over exit edge +    // if possible. Those exit edges may never have a chance to be +    // executed (for instance the program is an event handling loop) +    // before the profile is asynchronously dumped. +    // +    // If EntryIncoming and ExitOutgoing has similar weight, make sure +    // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing +    // and ExitIncoming has similar weight, make sure ExitIncoming becomes +    // the min-edge. +    uint64_t EntryInWeight = EntryWeight; + +    if (EntryInWeight >= MaxExitOutWeight && +        EntryInWeight * 2 < MaxExitOutWeight * 3) { +      EntryIncoming->Weight = MaxExitOutWeight; +      ExitOutgoing->Weight = EntryInWeight + 1; +    } + +    if (MaxEntryOutWeight >= MaxExitInWeight && +        MaxEntryOutWeight * 2 < MaxExitInWeight * 3) { +      EntryOutgoing->Weight = MaxExitInWeight; +      ExitIncoming->Weight = MaxEntryOutWeight + 1; +    }    }    // Sort CFG edges based on its weight. @@ -167,6 +223,10 @@ public:      for (auto &Ei : AllEdges) {        if (Ei->Removed)          continue; +      // If we detect infinite loops, force +      // instrumenting the entry edge: +      if (!ExitBlockFound && Ei->SrcBB == nullptr) +        continue;        if (unionGroups(Ei->SrcBB, Ei->DestBB))          Ei->InMST = true;      } diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index f2806e278e6e..66fdcb3ccc49 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMInstrumentation    SanitizerCoverage.cpp    ThreadSanitizer.cpp    EfficiencySanitizer.cpp +  HWAddressSanitizer.cpp    ADDITIONAL_HEADER_DIRS    ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index ddc975cbed1a..09bcbb282653 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1,4 +1,4 @@ -//===-- DataFlowSanitizer.cpp - dynamic data flow analysis ----------------===// +//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//  //  //                     The LLVM Compiler Infrastructure  // @@ -6,6 +6,7 @@  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===// +//  /// \file  /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow  /// analysis. @@ -43,32 +44,63 @@  ///  /// For more information, please refer to the design document:  /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html +// +//===----------------------------------------------------------------------===//  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/DenseSet.h"  #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h"  #include "llvm/ADT/Triple.h"  #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/InlineAsm.h"  #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h"  #include "llvm/IR/Type.h" +#include "llvm/IR/User.h"  #include "llvm/IR/Value.h"  #include "llvm/Pass.h" +#include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/SpecialCaseList.h"  #include "llvm/Transforms/Instrumentation.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h"  #include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint>  #include <iterator> +#include <memory>  #include <set> +#include <string>  #include <utility> +#include <vector>  using namespace llvm; @@ -129,10 +161,7 @@ static cl::opt<bool> ClDebugNonzeroLabels(               "load or return with a nonzero label"),      cl::Hidden); - -namespace { - -StringRef GetGlobalTypeString(const GlobalValue &G) { +static StringRef GetGlobalTypeString(const GlobalValue &G) {    // Types of GlobalVariables are always pointer types.    Type *GType = G.getValueType();    // For now we support blacklisting struct types only. @@ -143,11 +172,13 @@ StringRef GetGlobalTypeString(const GlobalValue &G) {    return "<unknown type>";  } +namespace { +  class DFSanABIList {    std::unique_ptr<SpecialCaseList> SCL;   public: -  DFSanABIList() {} +  DFSanABIList() = default;    void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); } @@ -155,7 +186,7 @@ class DFSanABIList {    /// given category.    bool isIn(const Function &F, StringRef Category) const {      return isIn(*F.getParent(), Category) || -           SCL->inSection("fun", F.getName(), Category); +           SCL->inSection("dataflow", "fun", F.getName(), Category);    }    /// Returns whether this global alias is listed in the given category. @@ -167,15 +198,16 @@ class DFSanABIList {        return true;      if (isa<FunctionType>(GA.getValueType())) -      return SCL->inSection("fun", GA.getName(), Category); +      return SCL->inSection("dataflow", "fun", GA.getName(), Category); -    return SCL->inSection("global", GA.getName(), Category) || -           SCL->inSection("type", GetGlobalTypeString(GA), Category); +    return SCL->inSection("dataflow", "global", GA.getName(), Category) || +           SCL->inSection("dataflow", "type", GetGlobalTypeString(GA), +                          Category);    }    /// Returns whether this module is listed in the given category.    bool isIn(const Module &M, StringRef Category) const { -    return SCL->inSection("src", M.getModuleIdentifier(), Category); +    return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);    }  }; @@ -255,7 +287,7 @@ class DataFlowSanitizer : public ModulePass {    DFSanABIList ABIList;    DenseMap<Value *, Function *> UnwrappedFnMap;    AttrBuilder ReadOnlyNoneAttrs; -  bool DFSanRuntimeShadowMask; +  bool DFSanRuntimeShadowMask = false;    Value *getShadowAddress(Value *Addr, Instruction *Pos);    bool isInstrumented(const Function *F); @@ -271,11 +303,13 @@ class DataFlowSanitizer : public ModulePass {                                   FunctionType *NewFT);    Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); - public: +public: +  static char ID; +    DataFlowSanitizer(        const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),        void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr); -  static char ID; +    bool doInitialization(Module &M) override;    bool runOnModule(Module &M) override;  }; @@ -286,12 +320,12 @@ struct DFSanFunction {    DominatorTree DT;    DataFlowSanitizer::InstrumentedABI IA;    bool IsNativeABI; -  Value *ArgTLSPtr; -  Value *RetvalTLSPtr; -  AllocaInst *LabelReturnAlloca; +  Value *ArgTLSPtr = nullptr; +  Value *RetvalTLSPtr = nullptr; +  AllocaInst *LabelReturnAlloca = nullptr;    DenseMap<Value *, Value *> ValShadowMap;    DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap; -  std::vector<std::pair<PHINode *, PHINode *> > PHIFixups; +  std::vector<std::pair<PHINode *, PHINode *>> PHIFixups;    DenseSet<Instruction *> SkipInsts;    std::vector<Value *> NonZeroChecks;    bool AvoidNewBlocks; @@ -305,14 +339,13 @@ struct DFSanFunction {    DenseMap<Value *, std::set<Value *>> ShadowElements;    DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI) -      : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), -        IsNativeABI(IsNativeABI), ArgTLSPtr(nullptr), RetvalTLSPtr(nullptr), -        LabelReturnAlloca(nullptr) { +      : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {      DT.recalculate(*F);      // FIXME: Need to track down the register allocator issue which causes poor      // performance in pathological cases with large numbers of basic blocks.      AvoidNewBlocks = F->size() > 1000;    } +    Value *getArgTLSPtr();    Value *getArgTLS(unsigned Index, Instruction *Pos);    Value *getRetvalTLS(); @@ -327,8 +360,9 @@ struct DFSanFunction {  };  class DFSanVisitor : public InstVisitor<DFSanVisitor> { - public: +public:    DFSanFunction &DFSF; +    DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}    const DataLayout &getDataLayout() const { @@ -336,7 +370,6 @@ class DFSanVisitor : public InstVisitor<DFSanVisitor> {    }    void visitOperandShadowInst(Instruction &I); -    void visitBinaryOperator(BinaryOperator &BO);    void visitCastInst(CastInst &CI);    void visitCmpInst(CmpInst &CI); @@ -357,9 +390,10 @@ class DFSanVisitor : public InstVisitor<DFSanVisitor> {    void visitMemTransferInst(MemTransferInst &I);  }; -} +} // end anonymous namespace  char DataFlowSanitizer::ID; +  INITIALIZE_PASS(DataFlowSanitizer, "dfsan",                  "DataFlowSanitizer: dynamic data flow analysis.", false, false) @@ -373,8 +407,7 @@ llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,  DataFlowSanitizer::DataFlowSanitizer(      const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),      void *(*getRetValTLS)()) -    : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS), -      DFSanRuntimeShadowMask(false) { +    : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {    std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));    AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),                           ClABIListFiles.end()); @@ -382,7 +415,7 @@ DataFlowSanitizer::DataFlowSanitizer(  }  FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { -  llvm::SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end()); +  SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());    ArgTypes.append(T->getNumParams(), ShadowTy);    if (T->isVarArg())      ArgTypes.push_back(ShadowPtrTy); @@ -394,7 +427,7 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {  FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {    assert(!T->isVarArg()); -  llvm::SmallVector<Type *, 4> ArgTypes; +  SmallVector<Type *, 4> ArgTypes;    ArgTypes.push_back(T->getPointerTo());    ArgTypes.append(T->param_begin(), T->param_end());    ArgTypes.append(T->getNumParams(), ShadowTy); @@ -405,7 +438,7 @@ FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {  }  FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { -  llvm::SmallVector<Type *, 4> ArgTypes; +  SmallVector<Type *, 4> ArgTypes;    for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end();         i != e; ++i) {      FunctionType *FT; @@ -428,12 +461,12 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {  }  bool DataFlowSanitizer::doInitialization(Module &M) { -  llvm::Triple TargetTriple(M.getTargetTriple()); -  bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; -  bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || -                  TargetTriple.getArch() == llvm::Triple::mips64el; -  bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64 || -                   TargetTriple.getArch() == llvm::Triple::aarch64_be; +  Triple TargetTriple(M.getTargetTriple()); +  bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; +  bool IsMIPS64 = TargetTriple.getArch() == Triple::mips64 || +                  TargetTriple.getArch() == Triple::mips64el; +  bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 || +                   TargetTriple.getArch() == Triple::aarch64_be;    const DataLayout &DL = M.getDataLayout(); @@ -654,7 +687,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {                                                    DFSanVarargWrapperFnTy);    std::vector<Function *> FnsToInstrument; -  llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI; +  SmallPtrSet<Function *, 2> FnsWithNativeABI;    for (Function &i : M) {      if (!i.isIntrinsic() &&          &i != DFSanUnionFn && @@ -797,11 +830,11 @@ bool DataFlowSanitizer::runOnModule(Module &M) {      // DFSanVisitor may create new basic blocks, which confuses df_iterator.      // Build a copy of the list before iterating over it. -    llvm::SmallVector<BasicBlock *, 4> BBList(depth_first(&i->getEntryBlock())); +    SmallVector<BasicBlock *, 4> BBList(depth_first(&i->getEntryBlock()));      for (BasicBlock *i : BBList) {        Instruction *Inst = &i->front(); -      while (1) { +      while (true) {          // DFSanVisitor may split the current basic block, changing the current          // instruction's next pointer and moving the next instruction to the          // tail block from which we should continue. @@ -821,7 +854,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {      // until we have visited every block.  Therefore, the code that handles phi      // nodes adds them to the PHIFixups list so that they can be properly      // handled here. -    for (std::vector<std::pair<PHINode *, PHINode *> >::iterator +    for (std::vector<std::pair<PHINode *, PHINode *>>::iterator               i = DFSF.PHIFixups.begin(),               e = DFSF.PHIFixups.end();           i != e; ++i) { @@ -1045,8 +1078,7 @@ void DFSanVisitor::visitOperandShadowInst(Instruction &I) {  Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,                                   Instruction *Pos) {    if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { -    llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i = -        AllocaShadowMap.find(AI); +    const auto i = AllocaShadowMap.find(AI);      if (i != AllocaShadowMap.end()) {        IRBuilder<> IRB(Pos);        return IRB.CreateLoad(i->second); @@ -1187,8 +1219,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {  void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,                                  Value *Shadow, Instruction *Pos) {    if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { -    llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i = -        AllocaShadowMap.find(AI); +    const auto i = AllocaShadowMap.find(AI);      if (i != AllocaShadowMap.end()) {        IRBuilder<> IRB(Pos);        IRB.CreateStore(Shadow, i->second); @@ -1409,24 +1440,21 @@ void DFSanVisitor::visitCallSite(CallSite CS) {    if (i != DFSF.DFS.UnwrappedFnMap.end()) {      Function *F = i->second;      switch (DFSF.DFS.getWrapperKind(F)) { -    case DataFlowSanitizer::WK_Warning: { +    case DataFlowSanitizer::WK_Warning:        CS.setCalledFunction(F);        IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,                       IRB.CreateGlobalStringPtr(F->getName()));        DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);        return; -    } -    case DataFlowSanitizer::WK_Discard: { +    case DataFlowSanitizer::WK_Discard:        CS.setCalledFunction(F);        DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);        return; -    } -    case DataFlowSanitizer::WK_Functional: { +    case DataFlowSanitizer::WK_Functional:        CS.setCalledFunction(F);        visitOperandShadowInst(*CS.getInstruction());        return; -    } -    case DataFlowSanitizer::WK_Custom: { +    case DataFlowSanitizer::WK_Custom:        // Don't try to handle invokes of custom functions, it's too complicated.        // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_        // wrapper. @@ -1526,7 +1554,6 @@ void DFSanVisitor::visitCallSite(CallSite CS) {        }        break;      } -    }    }    FunctionType *FT = cast<FunctionType>( diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 56d0f5e983ca..67ca8172b0d5 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -21,6 +21,7 @@  #include "llvm/ADT/StringExtras.h"  #include "llvm/ADT/StringMap.h"  #include "llvm/ADT/UniqueVector.h" +#include "llvm/Analysis/EHPersonalities.h"  #include "llvm/IR/DebugInfo.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/IR/IRBuilder.h" @@ -502,6 +503,23 @@ static bool functionHasLines(Function &F) {    return false;  } +static bool isUsingFuncletBasedEH(Function &F) { +  if (!F.hasPersonalityFn()) return false; + +  EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); +  return isFuncletEHPersonality(Personality); +} + +static bool shouldKeepInEntry(BasicBlock::iterator It) { +	if (isa<AllocaInst>(*It)) return true; +	if (isa<DbgInfoIntrinsic>(*It)) return true; +	if (auto *II = dyn_cast<IntrinsicInst>(It)) { +		if (II->getIntrinsicID() == llvm::Intrinsic::localescape) return true; +	} + +	return false; +} +  void GCOVProfiler::emitProfileNotes() {    NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");    if (!CU_Nodes) return; @@ -519,6 +537,12 @@ void GCOVProfiler::emitProfileNotes() {      std::error_code EC;      raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC, sys::fs::F_None); +    if (EC) { +      Ctx->emitError(Twine("failed to open coverage notes file for writing: ") + +                     EC.message()); +      continue; +    } +      std::string EdgeDestinations;      unsigned FunctionIdent = 0; @@ -526,12 +550,14 @@ void GCOVProfiler::emitProfileNotes() {        DISubprogram *SP = F.getSubprogram();        if (!SP) continue;        if (!functionHasLines(F)) continue; +      // TODO: Functions using funclet-based EH are currently not supported. +      if (isUsingFuncletBasedEH(F)) continue;        // gcov expects every function to start with an entry block that has a        // single successor, so split the entry block to make sure of that.        BasicBlock &EntryBlock = F.getEntryBlock();        BasicBlock::iterator It = EntryBlock.begin(); -      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) +      while (shouldKeepInEntry(It))          ++It;        EntryBlock.splitBasicBlock(It); @@ -603,7 +629,10 @@ bool GCOVProfiler::emitProfileArcs() {        DISubprogram *SP = F.getSubprogram();        if (!SP) continue;        if (!functionHasLines(F)) continue; +      // TODO: Functions using funclet-based EH are currently not supported. +      if (isUsingFuncletBasedEH(F)) continue;        if (!Result) Result = true; +        unsigned Edges = 0;        for (auto &BB : F) {          TerminatorInst *TI = BB.getTerminator(); diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp new file mode 100644 index 000000000000..2a25423e04bd --- /dev/null +++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -0,0 +1,327 @@ +//===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file is a part of HWAddressSanitizer, an address sanity checker +/// based on tagged addressing. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/Function.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "hwasan" + +static const char *const kHwasanModuleCtorName = "hwasan.module_ctor"; +static const char *const kHwasanInitName = "__hwasan_init"; + +// Accesses sizes are powers of two: 1, 2, 4, 8, 16. +static const size_t kNumberOfAccessSizes = 5; + +static const size_t kShadowScale = 4; +static const unsigned kPointerTagShift = 56; + +static cl::opt<std::string> ClMemoryAccessCallbackPrefix( +    "hwasan-memory-access-callback-prefix", +    cl::desc("Prefix for memory access callbacks"), cl::Hidden, +    cl::init("__hwasan_")); + +static cl::opt<bool> +    ClInstrumentWithCalls("hwasan-instrument-with-calls", +                cl::desc("instrument reads and writes with callbacks"), +                cl::Hidden, cl::init(false)); + +static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads", +                                       cl::desc("instrument read instructions"), +                                       cl::Hidden, cl::init(true)); + +static cl::opt<bool> ClInstrumentWrites( +    "hwasan-instrument-writes", cl::desc("instrument write instructions"), +    cl::Hidden, cl::init(true)); + +static cl::opt<bool> ClInstrumentAtomics( +    "hwasan-instrument-atomics", +    cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, +    cl::init(true)); + +namespace { + +/// \brief An instrumentation pass implementing detection of addressability bugs +/// using tagged pointers. +class HWAddressSanitizer : public FunctionPass { +public: +  // Pass identification, replacement for typeid. +  static char ID; + +  HWAddressSanitizer() : FunctionPass(ID) {} + +  StringRef getPassName() const override { return "HWAddressSanitizer"; } + +  bool runOnFunction(Function &F) override; +  bool doInitialization(Module &M) override; + +  void initializeCallbacks(Module &M); +  void instrumentMemAccessInline(Value *PtrLong, bool IsWrite, +                                 unsigned AccessSizeIndex, +                                 Instruction *InsertBefore); +  bool instrumentMemAccess(Instruction *I); +  Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, +                                   uint64_t *TypeSize, unsigned *Alignment, +                                   Value **MaybeMask); + +private: +  LLVMContext *C; +  Type *IntptrTy; + +  Function *HwasanCtorFunction; + +  Function *HwasanMemoryAccessCallback[2][kNumberOfAccessSizes]; +  Function *HwasanMemoryAccessCallbackSized[2]; +}; + +} // end anonymous namespace + +char HWAddressSanitizer::ID = 0; + +INITIALIZE_PASS_BEGIN( +    HWAddressSanitizer, "hwasan", +    "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, false) +INITIALIZE_PASS_END( +    HWAddressSanitizer, "hwasan", +    "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, false) + +FunctionPass *llvm::createHWAddressSanitizerPass() { +  return new HWAddressSanitizer(); +} + +/// \brief Module-level initialization. +/// +/// inserts a call to __hwasan_init to the module's constructor list. +bool HWAddressSanitizer::doInitialization(Module &M) { +  DEBUG(dbgs() << "Init " << M.getName() << "\n"); +  auto &DL = M.getDataLayout(); + +  Triple TargetTriple(M.getTargetTriple()); + +  C = &(M.getContext()); +  IRBuilder<> IRB(*C); +  IntptrTy = IRB.getIntPtrTy(DL); + +  std::tie(HwasanCtorFunction, std::ignore) = +      createSanitizerCtorAndInitFunctions(M, kHwasanModuleCtorName, +                                          kHwasanInitName, +                                          /*InitArgTypes=*/{}, +                                          /*InitArgs=*/{}); +  appendToGlobalCtors(M, HwasanCtorFunction, 0); +  return true; +} + +void HWAddressSanitizer::initializeCallbacks(Module &M) { +  IRBuilder<> IRB(*C); +  for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { +    const std::string TypeStr = AccessIsWrite ? "store" : "load"; + +    HwasanMemoryAccessCallbackSized[AccessIsWrite] = +        checkSanitizerInterfaceFunction(M.getOrInsertFunction( +            ClMemoryAccessCallbackPrefix + TypeStr, +            FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false))); + +    for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; +         AccessSizeIndex++) { +      HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] = +          checkSanitizerInterfaceFunction(M.getOrInsertFunction( +              ClMemoryAccessCallbackPrefix + TypeStr + +                  itostr(1ULL << AccessSizeIndex), +              FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false))); +    } +  } +} + +Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I, +                                                   bool *IsWrite, +                                                   uint64_t *TypeSize, +                                                   unsigned *Alignment, +                                                   Value **MaybeMask) { +  // Skip memory accesses inserted by another instrumentation. +  if (I->getMetadata("nosanitize")) return nullptr; + +  Value *PtrOperand = nullptr; +  const DataLayout &DL = I->getModule()->getDataLayout(); +  if (LoadInst *LI = dyn_cast<LoadInst>(I)) { +    if (!ClInstrumentReads) return nullptr; +    *IsWrite = false; +    *TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); +    *Alignment = LI->getAlignment(); +    PtrOperand = LI->getPointerOperand(); +  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { +    if (!ClInstrumentWrites) return nullptr; +    *IsWrite = true; +    *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); +    *Alignment = SI->getAlignment(); +    PtrOperand = SI->getPointerOperand(); +  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { +    if (!ClInstrumentAtomics) return nullptr; +    *IsWrite = true; +    *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); +    *Alignment = 0; +    PtrOperand = RMW->getPointerOperand(); +  } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { +    if (!ClInstrumentAtomics) return nullptr; +    *IsWrite = true; +    *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); +    *Alignment = 0; +    PtrOperand = XCHG->getPointerOperand(); +  } + +  if (PtrOperand) { +    // Do not instrument acesses from different address spaces; we cannot deal +    // with them. +    Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType()); +    if (PtrTy->getPointerAddressSpace() != 0) +      return nullptr; + +    // Ignore swifterror addresses. +    // swifterror memory addresses are mem2reg promoted by instruction +    // selection. As such they cannot have regular uses like an instrumentation +    // function and it makes no sense to track them as memory. +    if (PtrOperand->isSwiftError()) +      return nullptr; +  } + +  return PtrOperand; +} + +static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { +  size_t Res = countTrailingZeros(TypeSize / 8); +  assert(Res < kNumberOfAccessSizes); +  return Res; +} + +void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite, +                                                   unsigned AccessSizeIndex, +                                                   Instruction *InsertBefore) { +  IRBuilder<> IRB(InsertBefore); +  Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift), IRB.getInt8Ty()); +  Value *AddrLong = +      IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(), +                                              ~(0xFFULL << kPointerTagShift))); +  Value *ShadowLong = IRB.CreateLShr(AddrLong, kShadowScale); +  Value *MemTag = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, IRB.getInt8PtrTy())); +  Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag); + +  TerminatorInst *CheckTerm = +      SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false, +                                MDBuilder(*C).createBranchWeights(1, 100000)); + +  IRB.SetInsertPoint(CheckTerm); +  // The signal handler will find the data address in x0. +  InlineAsm *Asm = InlineAsm::get( +      FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false), +      "hlt #" + itostr(0x100 + IsWrite * 0x10 + AccessSizeIndex), "{x0}", +      /*hasSideEffects=*/true); +  IRB.CreateCall(Asm, PtrLong); +} + +bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) { +  DEBUG(dbgs() << "Instrumenting: " << *I << "\n"); +  bool IsWrite = false; +  unsigned Alignment = 0; +  uint64_t TypeSize = 0; +  Value *MaybeMask = nullptr; +  Value *Addr = +      isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask); + +  if (!Addr) +    return false; + +  if (MaybeMask) +    return false; //FIXME + +  IRBuilder<> IRB(I); +  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); +  if (isPowerOf2_64(TypeSize) && +      (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) && +      (Alignment >= (1UL << kShadowScale) || Alignment == 0 || +       Alignment >= TypeSize / 8)) { +    size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); +    if (ClInstrumentWithCalls) { +      IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex], +                     AddrLong); +    } else { +      instrumentMemAccessInline(AddrLong, IsWrite, AccessSizeIndex, I); +    } +  } else { +    IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite], +                   {AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8)}); +  } + +  return true; +} + +bool HWAddressSanitizer::runOnFunction(Function &F) { +  if (&F == HwasanCtorFunction) +    return false; + +  if (!F.hasFnAttribute(Attribute::SanitizeHWAddress)) +    return false; + +  DEBUG(dbgs() << "Function: " << F.getName() << "\n"); + +  initializeCallbacks(*F.getParent()); + +  bool Changed = false; +  SmallVector<Instruction*, 16> ToInstrument; +  for (auto &BB : F) { +    for (auto &Inst : BB) { +      Value *MaybeMask = nullptr; +      bool IsWrite; +      unsigned Alignment; +      uint64_t TypeSize; +      Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize, +                                              &Alignment, &MaybeMask); +      if (Addr || isa<MemIntrinsic>(Inst)) +        ToInstrument.push_back(&Inst); +    } +  } + +  for (auto Inst : ToInstrument) +    Changed |= instrumentMemAccess(Inst); + +  return Changed; +} diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 4089d81ea3e1..49b8a67a6c14 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -1,4 +1,4 @@ -//===-- IndirectCallPromotion.cpp - Optimizations based on value profiling ===// +//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//  //  //                      The LLVM Compiler Infrastructure  // @@ -14,13 +14,15 @@  //===----------------------------------------------------------------------===//  #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/GlobalsModRef.h"  #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"  #include "llvm/Analysis/IndirectCallSiteVisitor.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/Attributes.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CallSite.h"  #include "llvm/IR/DerivedTypes.h" @@ -34,20 +36,23 @@  #include "llvm/IR/MDBuilder.h"  #include "llvm/IR/PassManager.h"  #include "llvm/IR/Type.h" +#include "llvm/IR/Value.h"  #include "llvm/Pass.h" -#include "llvm/PassRegistry.h" -#include "llvm/PassSupport.h"  #include "llvm/ProfileData/InstrProf.h"  #include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h"  #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Instrumentation.h"  #include "llvm/Transforms/PGOInstrumentation.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h"  #include <cassert>  #include <cstdint> +#include <memory> +#include <string> +#include <utility>  #include <vector>  using namespace llvm; @@ -110,6 +115,7 @@ static cl::opt<bool>                   cl::desc("Dump IR after transformation happens"));  namespace { +  class PGOIndirectCallPromotionLegacyPass : public ModulePass {  public:    static char ID; @@ -120,6 +126,10 @@ public:          *PassRegistry::getPassRegistry());    } +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addRequired<ProfileSummaryInfoWrapperPass>(); +  } +    StringRef getPassName() const override { return "PGOIndirectCallPromotion"; }  private: @@ -133,13 +143,20 @@ private:    // the promoted direct call.    bool SamplePGO;  }; +  } // end anonymous namespace  char PGOIndirectCallPromotionLegacyPass::ID = 0; -INITIALIZE_PASS(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", -                "Use PGO instrumentation profile to promote indirect calls to " -                "direct calls.", -                false, false) + +INITIALIZE_PASS_BEGIN(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", +                      "Use PGO instrumentation profile to promote indirect " +                      "calls to direct calls.", +                      false, false) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_END(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", +                    "Use PGO instrumentation profile to promote indirect " +                    "calls to direct calls.", +                    false, false)  ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,                                                             bool SamplePGO) { @@ -147,6 +164,7 @@ ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,  }  namespace { +  // The class for main data structure to promote indirect calls to conditional  // direct calls.  class ICallPromotionFunc { @@ -160,14 +178,13 @@ private:    bool SamplePGO; -  // Test if we can legally promote this direct-call of Target. -  bool isPromotionLegal(Instruction *Inst, uint64_t Target, Function *&F, -                        const char **Reason = nullptr); +  OptimizationRemarkEmitter &ORE;    // A struct that records the direct target and it's call count.    struct PromotionCandidate {      Function *TargetFunction;      uint64_t Count; +      PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}    }; @@ -186,72 +203,17 @@ private:                          const std::vector<PromotionCandidate> &Candidates,                          uint64_t &TotalCount); -  // Noncopyable -  ICallPromotionFunc(const ICallPromotionFunc &other) = delete; -  ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete; -  public:    ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab, -                     bool SamplePGO) -      : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO) {} +                     bool SamplePGO, OptimizationRemarkEmitter &ORE) +      : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} +  ICallPromotionFunc(const ICallPromotionFunc &) = delete; +  ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete; -  bool processFunction(); +  bool processFunction(ProfileSummaryInfo *PSI);  }; -} // end anonymous namespace -bool llvm::isLegalToPromote(Instruction *Inst, Function *F, -                            const char **Reason) { -  // Check the return type. -  Type *CallRetType = Inst->getType(); -  if (!CallRetType->isVoidTy()) { -    Type *FuncRetType = F->getReturnType(); -    if (FuncRetType != CallRetType && -        !CastInst::isBitCastable(FuncRetType, CallRetType)) { -      if (Reason) -        *Reason = "Return type mismatch"; -      return false; -    } -  } - -  // Check if the arguments are compatible with the parameters -  FunctionType *DirectCalleeType = F->getFunctionType(); -  unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); -  CallSite CS(Inst); -  unsigned ArgNum = CS.arg_size(); - -  if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) { -    if (Reason) -      *Reason = "The number of arguments mismatch"; -    return false; -  } - -  for (unsigned I = 0; I < ParamNum; ++I) { -    Type *PTy = DirectCalleeType->getFunctionParamType(I); -    Type *ATy = CS.getArgument(I)->getType(); -    if (PTy == ATy) -      continue; -    if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) { -      if (Reason) -        *Reason = "Argument type mismatch"; -      return false; -    } -  } - -  DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to " -               << F->getName() << "\n"); -  return true; -} - -bool ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target, -                                          Function *&TargetFunction, -                                          const char **Reason) { -  TargetFunction = Symtab->getFunction(Target); -  if (TargetFunction == nullptr) { -    *Reason = "Cannot find the target"; -    return false; -  } -  return isLegalToPromote(Inst, TargetFunction, Reason); -} +} // end anonymous namespace  // Indirect-call promotion heuristic. The direct targets are sorted based on  // the count. Stop at the first target that is not promoted. @@ -279,51 +241,63 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(      if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {        DEBUG(dbgs() << " Not promote: User options.\n"); +      ORE.emit([&]() { +        return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) +               << " Not promote: User options"; +      });        break;      }      if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {        DEBUG(dbgs() << " Not promote: User option.\n"); +      ORE.emit([&]() { +        return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) +               << " Not promote: User options"; +      });        break;      }      if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {        DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); +      ORE.emit([&]() { +        return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", Inst) +               << " Not promote: Cutoff reached"; +      }); +      break; +    } + +    Function *TargetFunction = Symtab->getFunction(Target); +    if (TargetFunction == nullptr) { +      DEBUG(dbgs() << " Not promote: Cannot find the target\n"); +      ORE.emit([&]() { +        return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst) +               << "Cannot promote indirect call: target not found"; +      });        break;      } -    Function *TargetFunction = nullptr; +      const char *Reason = nullptr; -    if (!isPromotionLegal(Inst, Target, TargetFunction, &Reason)) { -      StringRef TargetFuncName = Symtab->getFuncName(Target); -      DEBUG(dbgs() << " Not promote: " << Reason << "\n"); -      emitOptimizationRemarkMissed( -          F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(), -          Twine("Cannot promote indirect call to ") + -              (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) + -              Twine(" with count of ") + Twine(Count) + ": " + Reason); +    if (!isLegalToPromote(CallSite(Inst), TargetFunction, &Reason)) { +      using namespace ore; + +      ORE.emit([&]() { +        return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", Inst) +               << "Cannot promote indirect call to " +               << NV("TargetFunction", TargetFunction) << " with count of " +               << NV("Count", Count) << ": " << Reason; +      });        break;      } +      Ret.push_back(PromotionCandidate(TargetFunction, Count));      TotalCount -= Count;    }    return Ret;  } -// Create a diamond structure for If_Then_Else. Also update the profile -// count. Do the fix-up for the invoke instruction. -static void createIfThenElse(Instruction *Inst, Function *DirectCallee, -                             uint64_t Count, uint64_t TotalCount, -                             BasicBlock **DirectCallBB, -                             BasicBlock **IndirectCallBB, -                             BasicBlock **MergeBB) { -  CallSite CS(Inst); -  Value *OrigCallee = CS.getCalledValue(); - -  IRBuilder<> BBBuilder(Inst); -  LLVMContext &Ctx = Inst->getContext(); -  Value *BCI1 = -      BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), ""); -  Value *BCI2 = -      BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), ""); -  Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, ""); +Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst, +                                            Function *DirectCallee, +                                            uint64_t Count, uint64_t TotalCount, +                                            bool AttachProfToDirectCall, +                                            OptimizationRemarkEmitter *ORE) {    uint64_t ElseCount = TotalCount - Count;    uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); @@ -331,261 +305,26 @@ static void createIfThenElse(Instruction *Inst, Function *DirectCallee,    MDBuilder MDB(Inst->getContext());    MDNode *BranchWeights = MDB.createBranchWeights(        scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); -  TerminatorInst *ThenTerm, *ElseTerm; -  SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm, -                                BranchWeights); -  *DirectCallBB = ThenTerm->getParent(); -  (*DirectCallBB)->setName("if.true.direct_targ"); -  *IndirectCallBB = ElseTerm->getParent(); -  (*IndirectCallBB)->setName("if.false.orig_indirect"); -  *MergeBB = Inst->getParent(); -  (*MergeBB)->setName("if.end.icp"); - -  // Special handing of Invoke instructions. -  InvokeInst *II = dyn_cast<InvokeInst>(Inst); -  if (!II) -    return; - -  // We don't need branch instructions for invoke. -  ThenTerm->eraseFromParent(); -  ElseTerm->eraseFromParent(); - -  // Add jump from Merge BB to the NormalDest. This is needed for the newly -  // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB. -  BranchInst::Create(II->getNormalDest(), *MergeBB); -} - -// Find the PHI in BB that have the CallResult as the operand. -static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) { -  BasicBlock *From = Inst->getParent(); -  for (auto &I : *BB) { -    PHINode *PHI = dyn_cast<PHINode>(&I); -    if (!PHI) -      continue; -    int IX = PHI->getBasicBlockIndex(From); -    if (IX == -1) -      continue; -    Value *V = PHI->getIncomingValue(IX); -    if (dyn_cast<Instruction>(V) == Inst) -      return true; -  } -  return false; -} - -// This method fixes up PHI nodes in BB where BB is the UnwindDest of an -// invoke instruction. In BB, there may be PHIs with incoming block being -// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke -// instructions to its own BB, OrigBB is no longer the predecessor block of BB. -// Instead two new predecessors are added: IndirectCallBB and DirectCallBB, -// so the PHI node's incoming BBs need to be fixed up accordingly. -static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB, -                                  BasicBlock *OrigBB, -                                  BasicBlock *IndirectCallBB, -                                  BasicBlock *DirectCallBB) { -  for (auto &I : *BB) { -    PHINode *PHI = dyn_cast<PHINode>(&I); -    if (!PHI) -      continue; -    int IX = PHI->getBasicBlockIndex(OrigBB); -    if (IX == -1) -      continue; -    Value *V = PHI->getIncomingValue(IX); -    PHI->addIncoming(V, IndirectCallBB); -    PHI->setIncomingBlock(IX, DirectCallBB); -  } -} - -// This method fixes up PHI nodes in BB where BB is the NormalDest of an -// invoke instruction. In BB, there may be PHIs with incoming block being -// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke -// instructions to its own BB, a new incoming edge will be added to the original -// NormalDstBB from the IndirectCallBB. -static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB, -                                      BasicBlock *OrigBB, -                                      BasicBlock *IndirectCallBB, -                                      Instruction *NewInst) { -  for (auto &I : *BB) { -    PHINode *PHI = dyn_cast<PHINode>(&I); -    if (!PHI) -      continue; -    int IX = PHI->getBasicBlockIndex(OrigBB); -    if (IX == -1) -      continue; -    Value *V = PHI->getIncomingValue(IX); -    if (dyn_cast<Instruction>(V) == Inst) { -      PHI->setIncomingBlock(IX, IndirectCallBB); -      PHI->addIncoming(NewInst, OrigBB); -      continue; -    } -    PHI->addIncoming(V, IndirectCallBB); -  } -} - -// Add a bitcast instruction to the direct-call return value if needed. -static Instruction *insertCallRetCast(const Instruction *Inst, -                                      Instruction *DirectCallInst, -                                      Function *DirectCallee) { -  if (Inst->getType()->isVoidTy()) -    return DirectCallInst; - -  Type *CallRetType = Inst->getType(); -  Type *FuncRetType = DirectCallee->getReturnType(); -  if (FuncRetType == CallRetType) -    return DirectCallInst; - -  BasicBlock *InsertionBB; -  if (CallInst *CI = dyn_cast<CallInst>(DirectCallInst)) -    InsertionBB = CI->getParent(); -  else -    InsertionBB = (dyn_cast<InvokeInst>(DirectCallInst))->getNormalDest(); - -  return (new BitCastInst(DirectCallInst, CallRetType, "", -                          InsertionBB->getTerminator())); -} - -// Create a DirectCall instruction in the DirectCallBB. -// Parameter Inst is the indirect-call (invoke) instruction. -// DirectCallee is the decl of the direct-call (invoke) target. -// DirecallBB is the BB that the direct-call (invoke) instruction is inserted. -// MergeBB is the bottom BB of the if-then-else-diamond after the -// transformation. For invoke instruction, the edges from DirectCallBB and -// IndirectCallBB to MergeBB are removed before this call (during -// createIfThenElse). -static Instruction *createDirectCallInst(const Instruction *Inst, -                                         Function *DirectCallee, -                                         BasicBlock *DirectCallBB, -                                         BasicBlock *MergeBB) { -  Instruction *NewInst = Inst->clone(); -  if (CallInst *CI = dyn_cast<CallInst>(NewInst)) { -    CI->setCalledFunction(DirectCallee); -    CI->mutateFunctionType(DirectCallee->getFunctionType()); -  } else { -    // Must be an invoke instruction. Direct invoke's normal destination is -    // fixed up to MergeBB. MergeBB is the place where return cast is inserted. -    // Also since IndirectCallBB does not have an edge to MergeBB, there is no -    // need to insert new PHIs into MergeBB. -    InvokeInst *II = dyn_cast<InvokeInst>(NewInst); -    assert(II); -    II->setCalledFunction(DirectCallee); -    II->mutateFunctionType(DirectCallee->getFunctionType()); -    II->setNormalDest(MergeBB); -  } - -  DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(), -                                     NewInst); - -  // Clear the value profile data. -  NewInst->setMetadata(LLVMContext::MD_prof, nullptr); -  CallSite NewCS(NewInst); -  FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); -  unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); -  for (unsigned I = 0; I < ParamNum; ++I) { -    Type *ATy = NewCS.getArgument(I)->getType(); -    Type *PTy = DirectCalleeType->getParamType(I); -    if (ATy != PTy) { -      BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst); -      NewCS.setArgument(I, BI); -    } -  } - -  return insertCallRetCast(Inst, NewInst, DirectCallee); -} - -// Create a PHI to unify the return values of calls. -static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult, -                             Function *DirectCallee) { -  if (Inst->getType()->isVoidTy()) -    return; - -  BasicBlock *RetValBB = CallResult->getParent(); - -  BasicBlock *PHIBB; -  if (InvokeInst *II = dyn_cast<InvokeInst>(CallResult)) -    RetValBB = II->getNormalDest(); - -  PHIBB = RetValBB->getSingleSuccessor(); -  if (getCallRetPHINode(PHIBB, Inst)) -    return; - -  PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0); -  PHIBB->getInstList().push_front(CallRetPHI); -  Inst->replaceAllUsesWith(CallRetPHI); -  CallRetPHI->addIncoming(Inst, Inst->getParent()); -  CallRetPHI->addIncoming(CallResult, RetValBB); -} - -// This function does the actual indirect-call promotion transformation: -// For an indirect-call like: -//     Ret = (*Foo)(Args); -// It transforms to: -//     if (Foo == DirectCallee) -//        Ret1 = DirectCallee(Args); -//     else -//        Ret2 = (*Foo)(Args); -//     Ret = phi(Ret1, Ret2); -// It adds type casts for the args do not match the parameters and the return -// value. Branch weights metadata also updated. -// If \p AttachProfToDirectCall is true, a prof metadata is attached to the -// new direct call to contain \p Count. This is used by SamplePGO inliner to -// check callsite hotness. -// Returns the promoted direct call instruction. -Instruction *llvm::promoteIndirectCall(Instruction *Inst, -                                       Function *DirectCallee, uint64_t Count, -                                       uint64_t TotalCount, -                                       bool AttachProfToDirectCall) { -  assert(DirectCallee != nullptr); -  BasicBlock *BB = Inst->getParent(); -  // Just to suppress the non-debug build warning. -  (void)BB; -  DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); -  DEBUG(dbgs() << *BB << "\n"); - -  BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB; -  createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB, -                   &IndirectCallBB, &MergeBB);    Instruction *NewInst = -      createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB); +      promoteCallWithIfThenElse(CallSite(Inst), DirectCallee, BranchWeights);    if (AttachProfToDirectCall) {      SmallVector<uint32_t, 1> Weights;      Weights.push_back(Count);      MDBuilder MDB(NewInst->getContext()); -    dyn_cast<Instruction>(NewInst->stripPointerCasts()) -        ->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); -  } - -  // Move Inst from MergeBB to IndirectCallBB. -  Inst->removeFromParent(); -  IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(), -                                       Inst); - -  if (InvokeInst *II = dyn_cast<InvokeInst>(Inst)) { -    // At this point, the original indirect invoke instruction has the original -    // UnwindDest and NormalDest. For the direct invoke instruction, the -    // NormalDest points to MergeBB, and MergeBB jumps to the original -    // NormalDest. MergeBB might have a new bitcast instruction for the return -    // value. The PHIs are with the original NormalDest. Since we now have two -    // incoming edges to NormalDest and UnwindDest, we have to do some fixups. -    // -    // UnwindDest will not use the return value. So pass nullptr here. -    fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB, -                          DirectCallBB); -    // We don't need to update the operand from NormalDest for DirectCallBB. -    // Pass nullptr here. -    fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB, -                              IndirectCallBB, NewInst); +    NewInst->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));    } -  insertCallRetPHI(Inst, NewInst, DirectCallee); +  using namespace ore; -  DEBUG(dbgs() << "\n== Basic Blocks After ==\n"); -  DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); - -  emitOptimizationRemark( -      BB->getContext(), "pgo-icall-prom", *BB->getParent(), Inst->getDebugLoc(), -      Twine("Promote indirect call to ") + DirectCallee->getName() + -          " with count " + Twine(Count) + " out of " + Twine(TotalCount)); +  if (ORE) +    ORE->emit([&]() { +      return OptimizationRemark(DEBUG_TYPE, "Promoted", Inst) +             << "Promote indirect call to " << NV("DirectCallee", DirectCallee) +             << " with count " << NV("Count", Count) << " out of " +             << NV("TotalCount", TotalCount); +    });    return NewInst;  } @@ -597,7 +336,8 @@ uint32_t ICallPromotionFunc::tryToPromote(    for (auto &C : Candidates) {      uint64_t Count = C.Count; -    promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, SamplePGO); +    pgo::promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, +                             SamplePGO, &ORE);      assert(TotalCount >= Count);      TotalCount -= Count;      NumOfPGOICallPromotion++; @@ -608,7 +348,7 @@ uint32_t ICallPromotionFunc::tryToPromote(  // Traverse all the indirect-call callsite and get the value profile  // annotation to perform indirect-call promotion. -bool ICallPromotionFunc::processFunction() { +bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) {    bool Changed = false;    ICallPromotionAnalysis ICallAnalysis;    for (auto &I : findIndirectCallSites(F)) { @@ -616,7 +356,8 @@ bool ICallPromotionFunc::processFunction() {      uint64_t TotalCount;      auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(          I, NumVals, TotalCount, NumCandidates); -    if (!NumCandidates) +    if (!NumCandidates || +        (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))        continue;      auto PromotionCandidates = getPromotionCandidatesForCallSite(          I, ICallProfDataRef, TotalCount, NumCandidates); @@ -638,7 +379,9 @@ bool ICallPromotionFunc::processFunction() {  }  // A wrapper function that does the actual work. -static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) { +static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, +                                 bool InLTO, bool SamplePGO, +                                 ModuleAnalysisManager *AM = nullptr) {    if (DisableICP)      return false;    InstrProfSymtab Symtab; @@ -654,8 +397,20 @@ static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) {        continue;      if (F.hasFnAttribute(Attribute::OptimizeNone))        continue; -    ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO); -    bool FuncChanged = ICallPromotion.processFunction(); + +    std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; +    OptimizationRemarkEmitter *ORE; +    if (AM) { +      auto &FAM = +          AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); +      ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); +    } else { +      OwnedORE = llvm::make_unique<OptimizationRemarkEmitter>(&F); +      ORE = OwnedORE.get(); +    } + +    ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE); +    bool FuncChanged = ICallPromotion.processFunction(PSI);      if (ICPDUMPAFTER && FuncChanged) {        DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));        DEBUG(dbgs() << "\n"); @@ -670,15 +425,20 @@ static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) {  }  bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) { +  ProfileSummaryInfo *PSI = +      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); +    // Command-line option has the priority for InLTO. -  return promoteIndirectCalls(M, InLTO | ICPLTOMode, +  return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,                                SamplePGO | ICPSamplePGOMode);  }  PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,                                                  ModuleAnalysisManager &AM) { -  if (!promoteIndirectCalls(M, InLTO | ICPLTOMode, -                            SamplePGO | ICPSamplePGOMode)) +  ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); + +  if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, +                            SamplePGO | ICPSamplePGOMode, &AM))      return PreservedAnalyses::all();    return PreservedAnalyses::none(); diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index db8fa8977947..9b70f95480e4 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -43,7 +43,6 @@  #include "llvm/Support/Error.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/LoopSimplify.h"  #include "llvm/Transforms/Utils/ModuleUtils.h"  #include "llvm/Transforms/Utils/SSAUpdater.h"  #include <algorithm> @@ -245,6 +244,9 @@ public:    }    bool run(int64_t *NumPromoted) { +    // Skip 'infinite' loops: +    if (ExitBlocks.size() == 0) +      return false;      unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);      if (MaxProm == 0)        return false; diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index 7bb62d2c8455..8e9eea96ced7 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -58,7 +58,7 @@ BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB,  void llvm::initializeInstrumentation(PassRegistry &Registry) {    initializeAddressSanitizerPass(Registry);    initializeAddressSanitizerModulePass(Registry); -  initializeBoundsCheckingPass(Registry); +  initializeBoundsCheckingLegacyPassPass(Registry);    initializeGCOVProfilerLegacyPassPass(Registry);    initializePGOInstrumentationGenLegacyPassPass(Registry);    initializePGOInstrumentationUseLegacyPassPass(Registry); @@ -66,6 +66,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {    initializePGOMemOPSizeOptLegacyPassPass(Registry);    initializeInstrProfilingLegacyPassPass(Registry);    initializeMemorySanitizerPass(Registry); +  initializeHWAddressSanitizerPass(Registry);    initializeThreadSanitizerPass(Registry);    initializeSanitizerCoverageModulePass(Registry);    initializeDataFlowSanitizerPass(Registry); diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b7c6271869cd..b3c39b5b1665 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1,4 +1,4 @@ -//===-- MemorySanitizer.cpp - detector of uninitialized reads -------------===// +//===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//  //  //                     The LLVM Compiler Infrastructure  // @@ -6,6 +6,7 @@  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===// +//  /// \file  /// This file is a part of MemorySanitizer, a detector of uninitialized  /// reads. @@ -88,32 +89,64 @@  /// implementation ignores the load aspect of CAS/RMW, always returning a clean  /// value. It implements the store part as a simple atomic store by storing a  /// clean shadow. - +//  //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/SmallString.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h"  #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/InlineAsm.h"  #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/MDBuilder.h"  #include "llvm/IR/Module.h"  #include "llvm/IR/Type.h" +#include "llvm/IR/Value.h"  #include "llvm/IR/ValueMap.h" +#include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Instrumentation.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <tuple>  using namespace llvm; @@ -137,18 +170,23 @@ static const size_t kNumberOfAccessSizes = 4;  static cl::opt<int> ClTrackOrigins("msan-track-origins",         cl::desc("Track origins (allocation sites) of poisoned memory"),         cl::Hidden, cl::init(0)); +  static cl::opt<bool> ClKeepGoing("msan-keep-going",         cl::desc("keep going after reporting a UMR"),         cl::Hidden, cl::init(false)); +  static cl::opt<bool> ClPoisonStack("msan-poison-stack",         cl::desc("poison uninitialized stack variables"),         cl::Hidden, cl::init(true)); +  static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",         cl::desc("poison uninitialized stack variables with a call"),         cl::Hidden, cl::init(false)); +  static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",         cl::desc("poison uninitialized stack variables with the given pattern"),         cl::Hidden, cl::init(0xff)); +  static cl::opt<bool> ClPoisonUndef("msan-poison-undef",         cl::desc("poison undef temps"),         cl::Hidden, cl::init(true)); @@ -217,6 +255,8 @@ struct PlatformMemoryMapParams {    const MemoryMapParams *bits64;  }; +} // end anonymous namespace +  // i386 Linux  static const MemoryMapParams Linux_I386_MemoryMapParams = {    0x000080000000,  // AndMask @@ -250,7 +290,7 @@ static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {  // ppc64 Linux  static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = { -  0x200000000000,  // AndMask +  0xE00000000000,  // AndMask    0x100000000000,  // XorMask    0x080000000000,  // ShadowBase    0x1C0000000000,  // OriginBase @@ -280,6 +320,14 @@ static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {    0x380000000000,  // OriginBase  }; +// x86_64 NetBSD +static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = { +  0,               // AndMask +  0x500000000000,  // XorMask +  0,               // ShadowBase +  0x100000000000,  // OriginBase +}; +  static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {    &Linux_I386_MemoryMapParams,    &Linux_X86_64_MemoryMapParams, @@ -305,27 +353,44 @@ static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {    &FreeBSD_X86_64_MemoryMapParams,  }; +static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = { +  nullptr, +  &NetBSD_X86_64_MemoryMapParams, +}; + +namespace { +  /// \brief An instrumentation pass implementing detection of uninitialized  /// reads.  ///  /// MemorySanitizer: instrument the code in module to find  /// uninitialized reads.  class MemorySanitizer : public FunctionPass { - public: +public: +  // Pass identification, replacement for typeid. +  static char ID;  +    MemorySanitizer(int TrackOrigins = 0, bool Recover = false)        : FunctionPass(ID),          TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)), -        Recover(Recover || ClKeepGoing), -        WarningFn(nullptr) {} +        Recover(Recover || ClKeepGoing) {} +    StringRef getPassName() const override { return "MemorySanitizer"; } +    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<TargetLibraryInfoWrapperPass>();    } +    bool runOnFunction(Function &F) override;    bool doInitialization(Module &M) override; -  static char ID;  // Pass identification, replacement for typeid. - private: +private: +  friend struct MemorySanitizerVisitor; +  friend struct VarArgAMD64Helper; +  friend struct VarArgMIPS64Helper; +  friend struct VarArgAArch64Helper; +  friend struct VarArgPowerPC64Helper; +    void initializeCallbacks(Module &M);    /// \brief Track origins (allocation points) of uninitialized values. @@ -335,26 +400,34 @@ class MemorySanitizer : public FunctionPass {    LLVMContext *C;    Type *IntptrTy;    Type *OriginTy; +    /// \brief Thread-local shadow storage for function parameters.    GlobalVariable *ParamTLS; +    /// \brief Thread-local origin storage for function parameters.    GlobalVariable *ParamOriginTLS; +    /// \brief Thread-local shadow storage for function return value.    GlobalVariable *RetvalTLS; +    /// \brief Thread-local origin storage for function return value.    GlobalVariable *RetvalOriginTLS; +    /// \brief Thread-local shadow storage for in-register va_arg function    /// parameters (x86_64-specific).    GlobalVariable *VAArgTLS; +    /// \brief Thread-local shadow storage for va_arg overflow area    /// (x86_64-specific).    GlobalVariable *VAArgOverflowSizeTLS; +    /// \brief Thread-local space used to pass origin value to the UMR reporting    /// function.    GlobalVariable *OriginTLS;    /// \brief The run-time callback to print a warning. -  Value *WarningFn; +  Value *WarningFn = nullptr; +    // These arrays are indexed by log2(AccessSize).    Value *MaybeWarningFn[kNumberOfAccessSizes];    Value *MaybeStoreOriginFn[kNumberOfAccessSizes]; @@ -362,11 +435,14 @@ class MemorySanitizer : public FunctionPass {    /// \brief Run-time helper that generates a new origin value for a stack    /// allocation.    Value *MsanSetAllocaOrigin4Fn; +    /// \brief Run-time helper that poisons stack on function entry.    Value *MsanPoisonStackFn; +    /// \brief Run-time helper that records a store (or any event) of an    /// uninitialized value and returns an updated origin id encoding this info.    Value *MsanChainOriginFn; +    /// \brief MSan runtime replacements for memmove, memcpy and memset.    Value *MemmoveFn, *MemcpyFn, *MemsetFn; @@ -374,21 +450,20 @@ class MemorySanitizer : public FunctionPass {    const MemoryMapParams *MapParams;    MDNode *ColdCallWeights; +    /// \brief Branch weights for origin store.    MDNode *OriginStoreWeights; +    /// \brief An empty volatile inline asm that prevents callback merge.    InlineAsm *EmptyAsm; -  Function *MsanCtorFunction; -  friend struct MemorySanitizerVisitor; -  friend struct VarArgAMD64Helper; -  friend struct VarArgMIPS64Helper; -  friend struct VarArgAArch64Helper; -  friend struct VarArgPowerPC64Helper; +  Function *MsanCtorFunction;  }; -} // anonymous namespace + +} // end anonymous namespace  char MemorySanitizer::ID = 0; +  INITIALIZE_PASS_BEGIN(      MemorySanitizer, "msan",      "MemorySanitizer: detects uninitialized reads.", false, false) @@ -515,6 +590,15 @@ bool MemorySanitizer::doInitialization(Module &M) {            report_fatal_error("unsupported architecture");        }        break; +    case Triple::NetBSD: +      switch (TargetTriple.getArch()) { +        case Triple::x86_64: +          MapParams = NetBSD_X86_MemoryMapParams.bits64; +          break; +        default: +          report_fatal_error("unsupported architecture"); +      } +      break;      case Triple::Linux:        switch (TargetTriple.getArch()) {          case Triple::x86_64: @@ -586,6 +670,8 @@ namespace {  /// the function, and should avoid creating new basic blocks. A new  /// instance of this class is created for each instrumented function.  struct VarArgHelper { +  virtual ~VarArgHelper() = default; +    /// \brief Visit a CallSite.    virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0; @@ -600,21 +686,22 @@ struct VarArgHelper {    /// This method is called after visiting all interesting (see above)    /// instructions in a function.    virtual void finalizeInstrumentation() = 0; - -  virtual ~VarArgHelper() {}  };  struct MemorySanitizerVisitor; -VarArgHelper* -CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, -                   MemorySanitizerVisitor &Visitor); +} // end anonymous namespace -unsigned TypeSizeToSizeIndex(unsigned TypeSize) { +static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, +                                        MemorySanitizerVisitor &Visitor); + +static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {    if (TypeSize <= 8) return 0;    return Log2_32_Ceil((TypeSize + 7) / 8);  } +namespace { +  /// This class does all the work for a given function. Store and Load  /// instructions store and load corresponding shadow and origin  /// values. Most instructions propagate shadow from arguments to their @@ -641,8 +728,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      Value *Shadow;      Value *Origin;      Instruction *OrigIns; +      ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I) -      : Shadow(S), Origin(O), OrigIns(I) { } +      : Shadow(S), Origin(O), OrigIns(I) {}    };    SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;    SmallVector<StoreInst *, 16> StoreList; @@ -711,21 +799,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {    }    void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin, -                   unsigned Alignment, bool AsCall) { +                   Value *OriginPtr, unsigned Alignment, bool AsCall) {      const DataLayout &DL = F.getParent()->getDataLayout();      unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);      unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());      if (Shadow->getType()->isAggregateType()) { -      paintOrigin(IRB, updateOrigin(Origin, IRB), -                  getOriginPtr(Addr, IRB, Alignment), StoreSize, +      paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,                    OriginAlignment);      } else {        Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);        Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);        if (ConstantShadow) {          if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) -          paintOrigin(IRB, updateOrigin(Origin, IRB), -                      getOriginPtr(Addr, IRB, Alignment), StoreSize, +          paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,                        OriginAlignment);          return;        } @@ -746,8 +832,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {          Instruction *CheckTerm = SplitBlockAndInsertIfThen(              Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);          IRBuilder<> IRBNew(CheckTerm); -        paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), -                    getOriginPtr(Addr, IRBNew, Alignment), StoreSize, +        paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,                      OriginAlignment);        }      } @@ -759,22 +844,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {        Value *Val = SI->getValueOperand();        Value *Addr = SI->getPointerOperand();        Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val); -      Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); - -      StoreInst *NewSI = -          IRB.CreateAlignedStore(Shadow, ShadowPtr, SI->getAlignment()); +      Value *ShadowPtr, *OriginPtr; +      Type *ShadowTy = Shadow->getType(); +      unsigned Alignment = SI->getAlignment(); +      unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment); +      std::tie(ShadowPtr, OriginPtr) = +          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment); + +      StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);        DEBUG(dbgs() << "  STORE: " << *NewSI << "\n"); -      (void)NewSI;        if (ClCheckAccessAddress) -        insertShadowCheck(Addr, SI); +        insertShadowCheck(Addr, NewSI);        if (SI->isAtomic())          SI->setOrdering(addReleaseOrdering(SI->getOrdering()));        if (MS.TrackOrigins && !SI->isAtomic()) -        storeOrigin(IRB, Addr, Shadow, getOrigin(Val), SI->getAlignment(), -                    InstrumentWithCalls); +        storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr, +                    OriginAlignment, InstrumentWithCalls);      }    } @@ -856,7 +944,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      for (BasicBlock *BB : depth_first(&F.getEntryBlock()))        visit(*BB); -      // Finalize PHI nodes.      for (PHINode *PN : ShadowPHINodes) {        PHINode *PNS = cast<PHINode>(getShadow(PN)); @@ -954,39 +1041,50 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      return OffsetLong;    } -  /// \brief Compute the shadow address that corresponds to a given application -  /// address. +  /// \brief Compute the shadow and origin addresses corresponding to a given +  /// application address.    ///    /// Shadow = ShadowBase + Offset -  Value *getShadowPtr(Value *Addr, Type *ShadowTy, -                      IRBuilder<> &IRB) { -    Value *ShadowLong = getShadowPtrOffset(Addr, IRB); +  /// Origin = (OriginBase + Offset) & ~3ULL +  std::pair<Value *, Value *> getShadowOriginPtrUserspace( +      Value *Addr, IRBuilder<> &IRB, Type *ShadowTy, unsigned Alignment, +      Instruction **FirstInsn) { +    Value *ShadowOffset = getShadowPtrOffset(Addr, IRB); +    Value *ShadowLong = ShadowOffset;      uint64_t ShadowBase = MS.MapParams->ShadowBase; -    if (ShadowBase != 0) +    *FirstInsn = dyn_cast<Instruction>(ShadowLong); +    if (ShadowBase != 0) {        ShadowLong =          IRB.CreateAdd(ShadowLong,                        ConstantInt::get(MS.IntptrTy, ShadowBase)); -    return IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0)); +    } +    Value *ShadowPtr = +        IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0)); +    Value *OriginPtr = nullptr; +    if (MS.TrackOrigins) { +      Value *OriginLong = ShadowOffset; +      uint64_t OriginBase = MS.MapParams->OriginBase; +      if (OriginBase != 0) +        OriginLong = IRB.CreateAdd(OriginLong, +                                   ConstantInt::get(MS.IntptrTy, OriginBase)); +      if (Alignment < kMinOriginAlignment) { +        uint64_t Mask = kMinOriginAlignment - 1; +        OriginLong = +            IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask)); +      } +      OriginPtr = +          IRB.CreateIntToPtr(OriginLong, PointerType::get(IRB.getInt32Ty(), 0)); +    } +    return std::make_pair(ShadowPtr, OriginPtr);    } -  /// \brief Compute the origin address that corresponds to a given application -  /// address. -  /// -  /// OriginAddr = (OriginBase + Offset) & ~3ULL -  Value *getOriginPtr(Value *Addr, IRBuilder<> &IRB, unsigned Alignment) { -    Value *OriginLong = getShadowPtrOffset(Addr, IRB); -    uint64_t OriginBase = MS.MapParams->OriginBase; -    if (OriginBase != 0) -      OriginLong = -        IRB.CreateAdd(OriginLong, -                      ConstantInt::get(MS.IntptrTy, OriginBase)); -    if (Alignment < kMinOriginAlignment) { -      uint64_t Mask = kMinOriginAlignment - 1; -      OriginLong = IRB.CreateAnd(OriginLong, -                                 ConstantInt::get(MS.IntptrTy, ~Mask)); -    } -    return IRB.CreateIntToPtr(OriginLong, -                              PointerType::get(IRB.getInt32Ty(), 0)); +  std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB, +                                                 Type *ShadowTy, +                                                 unsigned Alignment) { +    Instruction *FirstInsn = nullptr; +    std::pair<Value *, Value *> ret = +        getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment, &FirstInsn); +    return ret;    }    /// \brief Compute the shadow address for a given function argument. @@ -1012,9 +1110,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {    /// \brief Compute the shadow address for a retval.    Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) { -    Value *Base = IRB.CreatePointerCast(MS.RetvalTLS, MS.IntptrTy); -    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0), -                              "_msret"); +    return IRB.CreatePointerCast(MS.RetvalTLS, +                                 PointerType::get(getShadowTy(A), 0), +                                 "_msret");    }    /// \brief Compute the origin address for a retval. @@ -1091,6 +1189,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {    Value *getShadow(Value *V) {      if (!PropagateShadow) return getCleanShadow(V);      if (Instruction *I = dyn_cast<Instruction>(V)) { +      if (I->getMetadata("nosanitize")) +        return getCleanShadow(V);        // For instructions the shadow is already stored in the map.        Value *Shadow = ShadowMap[V];        if (!Shadow) { @@ -1136,16 +1236,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {                Type *EltType = A->getType()->getPointerElementType();                ArgAlign = DL.getABITypeAlignment(EltType);              } +            Value *CpShadowPtr = +                getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign) +                    .first;              if (Overflow) {                // ParamTLS overflow.                EntryIRB.CreateMemSet( -                  getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), -                  Constant::getNullValue(EntryIRB.getInt8Ty()), Size, ArgAlign); +                  CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()), +                  Size, ArgAlign);              } else {                unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment); -              Value *Cpy = EntryIRB.CreateMemCpy( -                  getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), Base, Size, -                  CopyAlign); +              Value *Cpy = +                  EntryIRB.CreateMemCpy(CpShadowPtr, Base, Size, CopyAlign);                DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");                (void)Cpy;              } @@ -1190,6 +1292,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      if (isa<Constant>(V)) return getCleanOrigin();      assert((isa<Instruction>(V) || isa<Argument>(V)) &&             "Unexpected value type in getOrigin()"); +    if (Instruction *I = dyn_cast<Instruction>(V)) { +      if (I->getMetadata("nosanitize")) +        return getCleanOrigin(); +    }      Value *Origin = OriginMap[V];      assert(Origin && "Missing origin");      return Origin; @@ -1270,6 +1376,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {    }    // ------------------- Visitors. +  using InstVisitor<MemorySanitizerVisitor>::visit; +  void visit(Instruction &I) { +    if (!I.getMetadata("nosanitize")) +      InstVisitor<MemorySanitizerVisitor>::visit(I); +  }    /// \brief Instrument LoadInst    /// @@ -1277,13 +1388,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {    /// Optionally, checks that the load address is fully defined.    void visitLoadInst(LoadInst &I) {      assert(I.getType()->isSized() && "Load type must have size"); +    assert(!I.getMetadata("nosanitize"));      IRBuilder<> IRB(I.getNextNode());      Type *ShadowTy = getShadowTy(&I);      Value *Addr = I.getPointerOperand(); -    if (PropagateShadow && !I.getMetadata("nosanitize")) { -      Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); -      setShadow(&I, -                IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld")); +    Value *ShadowPtr, *OriginPtr; +    unsigned Alignment = I.getAlignment(); +    if (PropagateShadow) { +      std::tie(ShadowPtr, OriginPtr) = +          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment); +      setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));      } else {        setShadow(&I, getCleanShadow(&I));      } @@ -1296,10 +1410,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      if (MS.TrackOrigins) {        if (PropagateShadow) { -        unsigned Alignment = I.getAlignment();          unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment); -        setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB, Alignment), -                                            OriginAlignment)); +        setOrigin(&I, IRB.CreateAlignedLoad(OriginPtr, OriginAlignment));        } else {          setOrigin(&I, getCleanOrigin());        } @@ -1319,7 +1431,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      IRBuilder<> IRB(&I);      Value *Addr = I.getOperand(0); -    Value *ShadowPtr = getShadowPtr(Addr, I.getType(), IRB); +    Value *ShadowPtr = +        getShadowOriginPtr(Addr, IRB, I.getType(), /*Alignment*/ 1).first;      if (ClCheckAccessAddress)        insertShadowCheck(Addr, &I); @@ -1489,14 +1602,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {    /// arguments are initialized.    template <bool CombineShadow>    class Combiner { -    Value *Shadow; -    Value *Origin; +    Value *Shadow = nullptr; +    Value *Origin = nullptr;      IRBuilder<> &IRB;      MemorySanitizerVisitor *MSV;    public: -    Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) : -      Shadow(nullptr), Origin(nullptr), IRB(IRB), MSV(MSV) {} +    Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) +        : IRB(IRB), MSV(MSV) {}      /// \brief Add a pair of shadow and origin values to the mix.      Combiner &Add(Value *OpShadow, Value *OpOrigin) { @@ -1550,8 +1663,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      }    }; -  typedef Combiner<true> ShadowAndOriginCombiner; -  typedef Combiner<false> OriginCombiner; +  using ShadowAndOriginCombiner = Combiner<true>; +  using OriginCombiner = Combiner<false>;    /// \brief Propagate origin for arbitrary operation.    void setOriginForNaryOp(Instruction &I) { @@ -1940,18 +2053,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      IRBuilder<> IRB(&I);      Value* Addr = I.getArgOperand(0);      Value *Shadow = getShadow(&I, 1); -    Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); +    Value *ShadowPtr, *OriginPtr;      // We don't know the pointer alignment (could be unaligned SSE store!).      // Have to assume to worst case. +    std::tie(ShadowPtr, OriginPtr) = +        getShadowOriginPtr(Addr, IRB, Shadow->getType(), /*Alignment*/ 1);      IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);      if (ClCheckAccessAddress)        insertShadowCheck(Addr, &I);      // FIXME: factor out common code from materializeStores -    if (MS.TrackOrigins) -      IRB.CreateStore(getOrigin(&I, 1), getOriginPtr(Addr, IRB, 1)); +    if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);      return true;    } @@ -1964,11 +2078,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      Value *Addr = I.getArgOperand(0);      Type *ShadowTy = getShadowTy(&I); +    Value *ShadowPtr, *OriginPtr;      if (PropagateShadow) { -      Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);        // We don't know the pointer alignment (could be unaligned SSE load!).        // Have to assume to worst case. -      setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld")); +      unsigned Alignment = 1; +      std::tie(ShadowPtr, OriginPtr) = +          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment); +      setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));      } else {        setShadow(&I, getCleanShadow(&I));      } @@ -1978,7 +2095,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      if (MS.TrackOrigins) {        if (PropagateShadow) -        setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB, 1))); +        setOrigin(&I, IRB.CreateLoad(OriginPtr));        else          setOrigin(&I, getCleanOrigin());      } @@ -2204,28 +2321,28 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {    // intrinsic.    Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {      switch (id) { -      case llvm::Intrinsic::x86_sse2_packsswb_128: -      case llvm::Intrinsic::x86_sse2_packuswb_128: -        return llvm::Intrinsic::x86_sse2_packsswb_128; +      case Intrinsic::x86_sse2_packsswb_128: +      case Intrinsic::x86_sse2_packuswb_128: +        return Intrinsic::x86_sse2_packsswb_128; -      case llvm::Intrinsic::x86_sse2_packssdw_128: -      case llvm::Intrinsic::x86_sse41_packusdw: -        return llvm::Intrinsic::x86_sse2_packssdw_128; +      case Intrinsic::x86_sse2_packssdw_128: +      case Intrinsic::x86_sse41_packusdw: +        return Intrinsic::x86_sse2_packssdw_128; -      case llvm::Intrinsic::x86_avx2_packsswb: -      case llvm::Intrinsic::x86_avx2_packuswb: -        return llvm::Intrinsic::x86_avx2_packsswb; +      case Intrinsic::x86_avx2_packsswb: +      case Intrinsic::x86_avx2_packuswb: +        return Intrinsic::x86_avx2_packsswb; -      case llvm::Intrinsic::x86_avx2_packssdw: -      case llvm::Intrinsic::x86_avx2_packusdw: -        return llvm::Intrinsic::x86_avx2_packssdw; +      case Intrinsic::x86_avx2_packssdw: +      case Intrinsic::x86_avx2_packusdw: +        return Intrinsic::x86_avx2_packssdw; -      case llvm::Intrinsic::x86_mmx_packsswb: -      case llvm::Intrinsic::x86_mmx_packuswb: -        return llvm::Intrinsic::x86_mmx_packsswb; +      case Intrinsic::x86_mmx_packsswb: +      case Intrinsic::x86_mmx_packuswb: +        return Intrinsic::x86_mmx_packsswb; -      case llvm::Intrinsic::x86_mmx_packssdw: -        return llvm::Intrinsic::x86_mmx_packssdw; +      case Intrinsic::x86_mmx_packssdw: +        return Intrinsic::x86_mmx_packssdw;        default:          llvm_unreachable("unexpected intrinsic id");      } @@ -2255,9 +2372,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {        S2 = IRB.CreateBitCast(S2, T);      }      Value *S1_ext = IRB.CreateSExt( -        IRB.CreateICmpNE(S1, llvm::Constant::getNullValue(T)), T); +        IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);      Value *S2_ext = IRB.CreateSExt( -        IRB.CreateICmpNE(S2, llvm::Constant::getNullValue(T)), T); +        IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);      if (isX86_MMX) {        Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);        S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy); @@ -2336,7 +2453,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      IRBuilder<> IRB(&I);      Value* Addr = I.getArgOperand(0);      Type *Ty = IRB.getInt32Ty(); -    Value *ShadowPtr = getShadowPtr(Addr, Ty, IRB); +    Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, Ty, /*Alignment*/ 1).first;      IRB.CreateStore(getCleanShadow(Ty),                      IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo())); @@ -2352,227 +2469,228 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      Value *Addr = I.getArgOperand(0);      Type *Ty = IRB.getInt32Ty();      unsigned Alignment = 1; +    Value *ShadowPtr, *OriginPtr; +    std::tie(ShadowPtr, OriginPtr) = +        getShadowOriginPtr(Addr, IRB, Ty, Alignment);      if (ClCheckAccessAddress)        insertShadowCheck(Addr, &I); -    Value *Shadow = IRB.CreateAlignedLoad(getShadowPtr(Addr, Ty, IRB), -                                          Alignment, "_ldmxcsr"); -    Value *Origin = MS.TrackOrigins -                        ? IRB.CreateLoad(getOriginPtr(Addr, IRB, Alignment)) -                        : getCleanOrigin(); +    Value *Shadow = IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_ldmxcsr"); +    Value *Origin = +        MS.TrackOrigins ? IRB.CreateLoad(OriginPtr) : getCleanOrigin();      insertShadowCheck(Shadow, Origin, &I);    }    void visitIntrinsicInst(IntrinsicInst &I) {      switch (I.getIntrinsicID()) { -    case llvm::Intrinsic::bswap: +    case Intrinsic::bswap:        handleBswap(I);        break; -    case llvm::Intrinsic::x86_sse_stmxcsr: +    case Intrinsic::x86_sse_stmxcsr:        handleStmxcsr(I);        break; -    case llvm::Intrinsic::x86_sse_ldmxcsr: +    case Intrinsic::x86_sse_ldmxcsr:        handleLdmxcsr(I);        break; -    case llvm::Intrinsic::x86_avx512_vcvtsd2usi64: -    case llvm::Intrinsic::x86_avx512_vcvtsd2usi32: -    case llvm::Intrinsic::x86_avx512_vcvtss2usi64: -    case llvm::Intrinsic::x86_avx512_vcvtss2usi32: -    case llvm::Intrinsic::x86_avx512_cvttss2usi64: -    case llvm::Intrinsic::x86_avx512_cvttss2usi: -    case llvm::Intrinsic::x86_avx512_cvttsd2usi64: -    case llvm::Intrinsic::x86_avx512_cvttsd2usi: -    case llvm::Intrinsic::x86_avx512_cvtusi2sd: -    case llvm::Intrinsic::x86_avx512_cvtusi2ss: -    case llvm::Intrinsic::x86_avx512_cvtusi642sd: -    case llvm::Intrinsic::x86_avx512_cvtusi642ss: -    case llvm::Intrinsic::x86_sse2_cvtsd2si64: -    case llvm::Intrinsic::x86_sse2_cvtsd2si: -    case llvm::Intrinsic::x86_sse2_cvtsd2ss: -    case llvm::Intrinsic::x86_sse2_cvtsi2sd: -    case llvm::Intrinsic::x86_sse2_cvtsi642sd: -    case llvm::Intrinsic::x86_sse2_cvtss2sd: -    case llvm::Intrinsic::x86_sse2_cvttsd2si64: -    case llvm::Intrinsic::x86_sse2_cvttsd2si: -    case llvm::Intrinsic::x86_sse_cvtsi2ss: -    case llvm::Intrinsic::x86_sse_cvtsi642ss: -    case llvm::Intrinsic::x86_sse_cvtss2si64: -    case llvm::Intrinsic::x86_sse_cvtss2si: -    case llvm::Intrinsic::x86_sse_cvttss2si64: -    case llvm::Intrinsic::x86_sse_cvttss2si: +    case Intrinsic::x86_avx512_vcvtsd2usi64: +    case Intrinsic::x86_avx512_vcvtsd2usi32: +    case Intrinsic::x86_avx512_vcvtss2usi64: +    case Intrinsic::x86_avx512_vcvtss2usi32: +    case Intrinsic::x86_avx512_cvttss2usi64: +    case Intrinsic::x86_avx512_cvttss2usi: +    case Intrinsic::x86_avx512_cvttsd2usi64: +    case Intrinsic::x86_avx512_cvttsd2usi: +    case Intrinsic::x86_avx512_cvtusi2sd: +    case Intrinsic::x86_avx512_cvtusi2ss: +    case Intrinsic::x86_avx512_cvtusi642sd: +    case Intrinsic::x86_avx512_cvtusi642ss: +    case Intrinsic::x86_sse2_cvtsd2si64: +    case Intrinsic::x86_sse2_cvtsd2si: +    case Intrinsic::x86_sse2_cvtsd2ss: +    case Intrinsic::x86_sse2_cvtsi2sd: +    case Intrinsic::x86_sse2_cvtsi642sd: +    case Intrinsic::x86_sse2_cvtss2sd: +    case Intrinsic::x86_sse2_cvttsd2si64: +    case Intrinsic::x86_sse2_cvttsd2si: +    case Intrinsic::x86_sse_cvtsi2ss: +    case Intrinsic::x86_sse_cvtsi642ss: +    case Intrinsic::x86_sse_cvtss2si64: +    case Intrinsic::x86_sse_cvtss2si: +    case Intrinsic::x86_sse_cvttss2si64: +    case Intrinsic::x86_sse_cvttss2si:        handleVectorConvertIntrinsic(I, 1);        break; -    case llvm::Intrinsic::x86_sse_cvtps2pi: -    case llvm::Intrinsic::x86_sse_cvttps2pi: +    case Intrinsic::x86_sse_cvtps2pi: +    case Intrinsic::x86_sse_cvttps2pi:        handleVectorConvertIntrinsic(I, 2);        break; -    case llvm::Intrinsic::x86_avx512_psll_w_512: -    case llvm::Intrinsic::x86_avx512_psll_d_512: -    case llvm::Intrinsic::x86_avx512_psll_q_512: -    case llvm::Intrinsic::x86_avx512_pslli_w_512: -    case llvm::Intrinsic::x86_avx512_pslli_d_512: -    case llvm::Intrinsic::x86_avx512_pslli_q_512: -    case llvm::Intrinsic::x86_avx512_psrl_w_512: -    case llvm::Intrinsic::x86_avx512_psrl_d_512: -    case llvm::Intrinsic::x86_avx512_psrl_q_512: -    case llvm::Intrinsic::x86_avx512_psra_w_512: -    case llvm::Intrinsic::x86_avx512_psra_d_512: -    case llvm::Intrinsic::x86_avx512_psra_q_512: -    case llvm::Intrinsic::x86_avx512_psrli_w_512: -    case llvm::Intrinsic::x86_avx512_psrli_d_512: -    case llvm::Intrinsic::x86_avx512_psrli_q_512: -    case llvm::Intrinsic::x86_avx512_psrai_w_512: -    case llvm::Intrinsic::x86_avx512_psrai_d_512: -    case llvm::Intrinsic::x86_avx512_psrai_q_512: -    case llvm::Intrinsic::x86_avx512_psra_q_256: -    case llvm::Intrinsic::x86_avx512_psra_q_128: -    case llvm::Intrinsic::x86_avx512_psrai_q_256: -    case llvm::Intrinsic::x86_avx512_psrai_q_128: -    case llvm::Intrinsic::x86_avx2_psll_w: -    case llvm::Intrinsic::x86_avx2_psll_d: -    case llvm::Intrinsic::x86_avx2_psll_q: -    case llvm::Intrinsic::x86_avx2_pslli_w: -    case llvm::Intrinsic::x86_avx2_pslli_d: -    case llvm::Intrinsic::x86_avx2_pslli_q: -    case llvm::Intrinsic::x86_avx2_psrl_w: -    case llvm::Intrinsic::x86_avx2_psrl_d: -    case llvm::Intrinsic::x86_avx2_psrl_q: -    case llvm::Intrinsic::x86_avx2_psra_w: -    case llvm::Intrinsic::x86_avx2_psra_d: -    case llvm::Intrinsic::x86_avx2_psrli_w: -    case llvm::Intrinsic::x86_avx2_psrli_d: -    case llvm::Intrinsic::x86_avx2_psrli_q: -    case llvm::Intrinsic::x86_avx2_psrai_w: -    case llvm::Intrinsic::x86_avx2_psrai_d: -    case llvm::Intrinsic::x86_sse2_psll_w: -    case llvm::Intrinsic::x86_sse2_psll_d: -    case llvm::Intrinsic::x86_sse2_psll_q: -    case llvm::Intrinsic::x86_sse2_pslli_w: -    case llvm::Intrinsic::x86_sse2_pslli_d: -    case llvm::Intrinsic::x86_sse2_pslli_q: -    case llvm::Intrinsic::x86_sse2_psrl_w: -    case llvm::Intrinsic::x86_sse2_psrl_d: -    case llvm::Intrinsic::x86_sse2_psrl_q: -    case llvm::Intrinsic::x86_sse2_psra_w: -    case llvm::Intrinsic::x86_sse2_psra_d: -    case llvm::Intrinsic::x86_sse2_psrli_w: -    case llvm::Intrinsic::x86_sse2_psrli_d: -    case llvm::Intrinsic::x86_sse2_psrli_q: -    case llvm::Intrinsic::x86_sse2_psrai_w: -    case llvm::Intrinsic::x86_sse2_psrai_d: -    case llvm::Intrinsic::x86_mmx_psll_w: -    case llvm::Intrinsic::x86_mmx_psll_d: -    case llvm::Intrinsic::x86_mmx_psll_q: -    case llvm::Intrinsic::x86_mmx_pslli_w: -    case llvm::Intrinsic::x86_mmx_pslli_d: -    case llvm::Intrinsic::x86_mmx_pslli_q: -    case llvm::Intrinsic::x86_mmx_psrl_w: -    case llvm::Intrinsic::x86_mmx_psrl_d: -    case llvm::Intrinsic::x86_mmx_psrl_q: -    case llvm::Intrinsic::x86_mmx_psra_w: -    case llvm::Intrinsic::x86_mmx_psra_d: -    case llvm::Intrinsic::x86_mmx_psrli_w: -    case llvm::Intrinsic::x86_mmx_psrli_d: -    case llvm::Intrinsic::x86_mmx_psrli_q: -    case llvm::Intrinsic::x86_mmx_psrai_w: -    case llvm::Intrinsic::x86_mmx_psrai_d: +    case Intrinsic::x86_avx512_psll_w_512: +    case Intrinsic::x86_avx512_psll_d_512: +    case Intrinsic::x86_avx512_psll_q_512: +    case Intrinsic::x86_avx512_pslli_w_512: +    case Intrinsic::x86_avx512_pslli_d_512: +    case Intrinsic::x86_avx512_pslli_q_512: +    case Intrinsic::x86_avx512_psrl_w_512: +    case Intrinsic::x86_avx512_psrl_d_512: +    case Intrinsic::x86_avx512_psrl_q_512: +    case Intrinsic::x86_avx512_psra_w_512: +    case Intrinsic::x86_avx512_psra_d_512: +    case Intrinsic::x86_avx512_psra_q_512: +    case Intrinsic::x86_avx512_psrli_w_512: +    case Intrinsic::x86_avx512_psrli_d_512: +    case Intrinsic::x86_avx512_psrli_q_512: +    case Intrinsic::x86_avx512_psrai_w_512: +    case Intrinsic::x86_avx512_psrai_d_512: +    case Intrinsic::x86_avx512_psrai_q_512: +    case Intrinsic::x86_avx512_psra_q_256: +    case Intrinsic::x86_avx512_psra_q_128: +    case Intrinsic::x86_avx512_psrai_q_256: +    case Intrinsic::x86_avx512_psrai_q_128: +    case Intrinsic::x86_avx2_psll_w: +    case Intrinsic::x86_avx2_psll_d: +    case Intrinsic::x86_avx2_psll_q: +    case Intrinsic::x86_avx2_pslli_w: +    case Intrinsic::x86_avx2_pslli_d: +    case Intrinsic::x86_avx2_pslli_q: +    case Intrinsic::x86_avx2_psrl_w: +    case Intrinsic::x86_avx2_psrl_d: +    case Intrinsic::x86_avx2_psrl_q: +    case Intrinsic::x86_avx2_psra_w: +    case Intrinsic::x86_avx2_psra_d: +    case Intrinsic::x86_avx2_psrli_w: +    case Intrinsic::x86_avx2_psrli_d: +    case Intrinsic::x86_avx2_psrli_q: +    case Intrinsic::x86_avx2_psrai_w: +    case Intrinsic::x86_avx2_psrai_d: +    case Intrinsic::x86_sse2_psll_w: +    case Intrinsic::x86_sse2_psll_d: +    case Intrinsic::x86_sse2_psll_q: +    case Intrinsic::x86_sse2_pslli_w: +    case Intrinsic::x86_sse2_pslli_d: +    case Intrinsic::x86_sse2_pslli_q: +    case Intrinsic::x86_sse2_psrl_w: +    case Intrinsic::x86_sse2_psrl_d: +    case Intrinsic::x86_sse2_psrl_q: +    case Intrinsic::x86_sse2_psra_w: +    case Intrinsic::x86_sse2_psra_d: +    case Intrinsic::x86_sse2_psrli_w: +    case Intrinsic::x86_sse2_psrli_d: +    case Intrinsic::x86_sse2_psrli_q: +    case Intrinsic::x86_sse2_psrai_w: +    case Intrinsic::x86_sse2_psrai_d: +    case Intrinsic::x86_mmx_psll_w: +    case Intrinsic::x86_mmx_psll_d: +    case Intrinsic::x86_mmx_psll_q: +    case Intrinsic::x86_mmx_pslli_w: +    case Intrinsic::x86_mmx_pslli_d: +    case Intrinsic::x86_mmx_pslli_q: +    case Intrinsic::x86_mmx_psrl_w: +    case Intrinsic::x86_mmx_psrl_d: +    case Intrinsic::x86_mmx_psrl_q: +    case Intrinsic::x86_mmx_psra_w: +    case Intrinsic::x86_mmx_psra_d: +    case Intrinsic::x86_mmx_psrli_w: +    case Intrinsic::x86_mmx_psrli_d: +    case Intrinsic::x86_mmx_psrli_q: +    case Intrinsic::x86_mmx_psrai_w: +    case Intrinsic::x86_mmx_psrai_d:        handleVectorShiftIntrinsic(I, /* Variable */ false);        break; -    case llvm::Intrinsic::x86_avx2_psllv_d: -    case llvm::Intrinsic::x86_avx2_psllv_d_256: -    case llvm::Intrinsic::x86_avx512_psllv_d_512: -    case llvm::Intrinsic::x86_avx2_psllv_q: -    case llvm::Intrinsic::x86_avx2_psllv_q_256: -    case llvm::Intrinsic::x86_avx512_psllv_q_512: -    case llvm::Intrinsic::x86_avx2_psrlv_d: -    case llvm::Intrinsic::x86_avx2_psrlv_d_256: -    case llvm::Intrinsic::x86_avx512_psrlv_d_512: -    case llvm::Intrinsic::x86_avx2_psrlv_q: -    case llvm::Intrinsic::x86_avx2_psrlv_q_256: -    case llvm::Intrinsic::x86_avx512_psrlv_q_512: -    case llvm::Intrinsic::x86_avx2_psrav_d: -    case llvm::Intrinsic::x86_avx2_psrav_d_256: -    case llvm::Intrinsic::x86_avx512_psrav_d_512: -    case llvm::Intrinsic::x86_avx512_psrav_q_128: -    case llvm::Intrinsic::x86_avx512_psrav_q_256: -    case llvm::Intrinsic::x86_avx512_psrav_q_512: +    case Intrinsic::x86_avx2_psllv_d: +    case Intrinsic::x86_avx2_psllv_d_256: +    case Intrinsic::x86_avx512_psllv_d_512: +    case Intrinsic::x86_avx2_psllv_q: +    case Intrinsic::x86_avx2_psllv_q_256: +    case Intrinsic::x86_avx512_psllv_q_512: +    case Intrinsic::x86_avx2_psrlv_d: +    case Intrinsic::x86_avx2_psrlv_d_256: +    case Intrinsic::x86_avx512_psrlv_d_512: +    case Intrinsic::x86_avx2_psrlv_q: +    case Intrinsic::x86_avx2_psrlv_q_256: +    case Intrinsic::x86_avx512_psrlv_q_512: +    case Intrinsic::x86_avx2_psrav_d: +    case Intrinsic::x86_avx2_psrav_d_256: +    case Intrinsic::x86_avx512_psrav_d_512: +    case Intrinsic::x86_avx512_psrav_q_128: +    case Intrinsic::x86_avx512_psrav_q_256: +    case Intrinsic::x86_avx512_psrav_q_512:        handleVectorShiftIntrinsic(I, /* Variable */ true);        break; -    case llvm::Intrinsic::x86_sse2_packsswb_128: -    case llvm::Intrinsic::x86_sse2_packssdw_128: -    case llvm::Intrinsic::x86_sse2_packuswb_128: -    case llvm::Intrinsic::x86_sse41_packusdw: -    case llvm::Intrinsic::x86_avx2_packsswb: -    case llvm::Intrinsic::x86_avx2_packssdw: -    case llvm::Intrinsic::x86_avx2_packuswb: -    case llvm::Intrinsic::x86_avx2_packusdw: +    case Intrinsic::x86_sse2_packsswb_128: +    case Intrinsic::x86_sse2_packssdw_128: +    case Intrinsic::x86_sse2_packuswb_128: +    case Intrinsic::x86_sse41_packusdw: +    case Intrinsic::x86_avx2_packsswb: +    case Intrinsic::x86_avx2_packssdw: +    case Intrinsic::x86_avx2_packuswb: +    case Intrinsic::x86_avx2_packusdw:        handleVectorPackIntrinsic(I);        break; -    case llvm::Intrinsic::x86_mmx_packsswb: -    case llvm::Intrinsic::x86_mmx_packuswb: +    case Intrinsic::x86_mmx_packsswb: +    case Intrinsic::x86_mmx_packuswb:        handleVectorPackIntrinsic(I, 16);        break; -    case llvm::Intrinsic::x86_mmx_packssdw: +    case Intrinsic::x86_mmx_packssdw:        handleVectorPackIntrinsic(I, 32);        break; -    case llvm::Intrinsic::x86_mmx_psad_bw: -    case llvm::Intrinsic::x86_sse2_psad_bw: -    case llvm::Intrinsic::x86_avx2_psad_bw: +    case Intrinsic::x86_mmx_psad_bw: +    case Intrinsic::x86_sse2_psad_bw: +    case Intrinsic::x86_avx2_psad_bw:        handleVectorSadIntrinsic(I);        break; -    case llvm::Intrinsic::x86_sse2_pmadd_wd: -    case llvm::Intrinsic::x86_avx2_pmadd_wd: -    case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw_128: -    case llvm::Intrinsic::x86_avx2_pmadd_ub_sw: +    case Intrinsic::x86_sse2_pmadd_wd: +    case Intrinsic::x86_avx2_pmadd_wd: +    case Intrinsic::x86_ssse3_pmadd_ub_sw_128: +    case Intrinsic::x86_avx2_pmadd_ub_sw:        handleVectorPmaddIntrinsic(I);        break; -    case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw: +    case Intrinsic::x86_ssse3_pmadd_ub_sw:        handleVectorPmaddIntrinsic(I, 8);        break; -    case llvm::Intrinsic::x86_mmx_pmadd_wd: +    case Intrinsic::x86_mmx_pmadd_wd:        handleVectorPmaddIntrinsic(I, 16);        break; -    case llvm::Intrinsic::x86_sse_cmp_ss: -    case llvm::Intrinsic::x86_sse2_cmp_sd: -    case llvm::Intrinsic::x86_sse_comieq_ss: -    case llvm::Intrinsic::x86_sse_comilt_ss: -    case llvm::Intrinsic::x86_sse_comile_ss: -    case llvm::Intrinsic::x86_sse_comigt_ss: -    case llvm::Intrinsic::x86_sse_comige_ss: -    case llvm::Intrinsic::x86_sse_comineq_ss: -    case llvm::Intrinsic::x86_sse_ucomieq_ss: -    case llvm::Intrinsic::x86_sse_ucomilt_ss: -    case llvm::Intrinsic::x86_sse_ucomile_ss: -    case llvm::Intrinsic::x86_sse_ucomigt_ss: -    case llvm::Intrinsic::x86_sse_ucomige_ss: -    case llvm::Intrinsic::x86_sse_ucomineq_ss: -    case llvm::Intrinsic::x86_sse2_comieq_sd: -    case llvm::Intrinsic::x86_sse2_comilt_sd: -    case llvm::Intrinsic::x86_sse2_comile_sd: -    case llvm::Intrinsic::x86_sse2_comigt_sd: -    case llvm::Intrinsic::x86_sse2_comige_sd: -    case llvm::Intrinsic::x86_sse2_comineq_sd: -    case llvm::Intrinsic::x86_sse2_ucomieq_sd: -    case llvm::Intrinsic::x86_sse2_ucomilt_sd: -    case llvm::Intrinsic::x86_sse2_ucomile_sd: -    case llvm::Intrinsic::x86_sse2_ucomigt_sd: -    case llvm::Intrinsic::x86_sse2_ucomige_sd: -    case llvm::Intrinsic::x86_sse2_ucomineq_sd: +    case Intrinsic::x86_sse_cmp_ss: +    case Intrinsic::x86_sse2_cmp_sd: +    case Intrinsic::x86_sse_comieq_ss: +    case Intrinsic::x86_sse_comilt_ss: +    case Intrinsic::x86_sse_comile_ss: +    case Intrinsic::x86_sse_comigt_ss: +    case Intrinsic::x86_sse_comige_ss: +    case Intrinsic::x86_sse_comineq_ss: +    case Intrinsic::x86_sse_ucomieq_ss: +    case Intrinsic::x86_sse_ucomilt_ss: +    case Intrinsic::x86_sse_ucomile_ss: +    case Intrinsic::x86_sse_ucomigt_ss: +    case Intrinsic::x86_sse_ucomige_ss: +    case Intrinsic::x86_sse_ucomineq_ss: +    case Intrinsic::x86_sse2_comieq_sd: +    case Intrinsic::x86_sse2_comilt_sd: +    case Intrinsic::x86_sse2_comile_sd: +    case Intrinsic::x86_sse2_comigt_sd: +    case Intrinsic::x86_sse2_comige_sd: +    case Intrinsic::x86_sse2_comineq_sd: +    case Intrinsic::x86_sse2_ucomieq_sd: +    case Intrinsic::x86_sse2_ucomilt_sd: +    case Intrinsic::x86_sse2_ucomile_sd: +    case Intrinsic::x86_sse2_ucomigt_sd: +    case Intrinsic::x86_sse2_ucomige_sd: +    case Intrinsic::x86_sse2_ucomineq_sd:        handleVectorCompareScalarIntrinsic(I);        break; -    case llvm::Intrinsic::x86_sse_cmp_ps: -    case llvm::Intrinsic::x86_sse2_cmp_pd: +    case Intrinsic::x86_sse_cmp_ps: +    case Intrinsic::x86_sse2_cmp_pd:        // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function        // generates reasonably looking IR that fails in the backend with "Do not        // know how to split the result of this operator!". @@ -2588,6 +2706,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {    void visitCallSite(CallSite CS) {      Instruction &I = *CS.getInstruction(); +    assert(!I.getMetadata("nosanitize"));      assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");      if (CS.isCall()) {        CallInst *Call = cast<CallInst>(&I); @@ -2646,9 +2765,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {          if (ArgOffset + Size > kParamTLSSize) break;          unsigned ParamAlignment = CS.getParamAlignment(i);          unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment); -        Store = IRB.CreateMemCpy(ArgShadowBase, -                                 getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), -                                 Size, Alignment); +        Value *AShadowPtr = +            getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment).first; + +        Store = IRB.CreateMemCpy(ArgShadowBase, AShadowPtr, Size, Alignment);        } else {          Size = DL.getTypeAllocSize(A->getType());          if (ArgOffset + Size > kParamTLSSize) break; @@ -2695,6 +2815,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {          setOrigin(&I, getCleanOrigin());          return;        } +      // FIXME: NextInsn is likely in a basic block that has not been visited yet. +      // Anything inserted there will be instrumented by MSan later!        NextInsn = NormalDest->getFirstInsertionPt();        assert(NextInsn != NormalDest->end() &&               "Could not find insertion point for retval shadow load"); @@ -2766,7 +2888,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {        IRB.CreateCall(MS.MsanPoisonStackFn,                       {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});      } else { -      Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB); +      Value *ShadowBase = +          getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(), I.getAlignment()).first; +        Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);        IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment());      } @@ -2845,7 +2969,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {    void visitLandingPadInst(LandingPadInst &I) {      // Do nothing. -    // See http://code.google.com/p/memory-sanitizer/issues/detail?id=1 +    // See https://github.com/google/sanitizers/issues/504      setShadow(&I, getCleanShadow(&I));      setOrigin(&I, getCleanOrigin());    } @@ -2938,18 +3062,16 @@ struct VarArgAMD64Helper : public VarArgHelper {    Function &F;    MemorySanitizer &MS;    MemorySanitizerVisitor &MSV; -  Value *VAArgTLSCopy; -  Value *VAArgOverflowSize; +  Value *VAArgTLSCopy = nullptr; +  Value *VAArgOverflowSize = nullptr;    SmallVector<CallInst*, 16> VAStartInstrumentationList; -  VarArgAMD64Helper(Function &F, MemorySanitizer &MS, -                    MemorySanitizerVisitor &MSV) -    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), -      VAArgOverflowSize(nullptr) {} -    enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; +  VarArgAMD64Helper(Function &F, MemorySanitizer &MS, +                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} +    ArgKind classifyArgument(Value* arg) {      // A very rough approximation of X86_64 argument classification rules.      Type *T = arg->getType(); @@ -2990,38 +3112,44 @@ struct VarArgAMD64Helper : public VarArgHelper {          assert(A->getType()->isPointerTy());          Type *RealTy = A->getType()->getPointerElementType();          uint64_t ArgSize = DL.getTypeAllocSize(RealTy); -        Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset); +        Value *ShadowBase = +            getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);          OverflowOffset += alignTo(ArgSize, 8); -        IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), -                         ArgSize, kShadowTLSAlignment); +        Value *ShadowPtr, *OriginPtr; +        std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( +            A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment); + +        IRB.CreateMemCpy(ShadowBase, ShadowPtr, ArgSize, kShadowTLSAlignment);        } else {          ArgKind AK = classifyArgument(A);          if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)            AK = AK_Memory;          if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)            AK = AK_Memory; -        Value *Base; +        Value *ShadowBase;          switch (AK) {            case AK_GeneralPurpose: -            Base = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset); +            ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset);              GpOffset += 8;              break;            case AK_FloatingPoint: -            Base = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset); +            ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset);              FpOffset += 16;              break;            case AK_Memory:              if (IsFixed)                continue;              uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); -            Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); +            ShadowBase = +                getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);              OverflowOffset += alignTo(ArgSize, 8);          }          // Take fixed arguments into account for GpOffset and FpOffset,          // but don't actually store shadows for them.          if (IsFixed)            continue; -        IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); +        IRB.CreateAlignedStore(MSV.getShadow(A), ShadowBase, +                               kShadowTLSAlignment);        }      }      Constant *OverflowSize = @@ -3038,31 +3166,32 @@ struct VarArgAMD64Helper : public VarArgHelper {                                "_msarg");    } -  void visitVAStartInst(VAStartInst &I) override { -    if (F.getCallingConv() == CallingConv::Win64) -      return; +  void unpoisonVAListTagForInst(IntrinsicInst &I) {      IRBuilder<> IRB(&I); -    VAStartInstrumentationList.push_back(&I);      Value *VAListTag = I.getArgOperand(0); -    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); +    Value *ShadowPtr, *OriginPtr; +    unsigned Alignment = 8; +    std::tie(ShadowPtr, OriginPtr) = +        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);      // Unpoison the whole __va_list_tag.      // FIXME: magic ABI constants.      IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -                     /* size */24, /* alignment */8, false); +                     /* size */ 24, Alignment, false); +    // We shouldn't need to zero out the origins, as they're only checked for +    // nonzero shadow.    } -  void visitVACopyInst(VACopyInst &I) override { +  void visitVAStartInst(VAStartInst &I) override {      if (F.getCallingConv() == CallingConv::Win64)        return; -    IRBuilder<> IRB(&I); -    Value *VAListTag = I.getArgOperand(0); -    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); +    VAStartInstrumentationList.push_back(&I); +    unpoisonVAListTagForInst(I); +  } -    // Unpoison the whole __va_list_tag. -    // FIXME: magic ABI constants. -    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -                     /* size */24, /* alignment */8, false); +  void visitVACopyInst(VACopyInst &I) override { +    if (F.getCallingConv() == CallingConv::Win64) return; +    unpoisonVAListTagForInst(I);    }    void finalizeInstrumentation() override { @@ -3087,28 +3216,31 @@ struct VarArgAMD64Helper : public VarArgHelper {        IRBuilder<> IRB(OrigInst->getNextNode());        Value *VAListTag = OrigInst->getArgOperand(0); -      Value *RegSaveAreaPtrPtr = -        IRB.CreateIntToPtr( +      Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(            IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),                          ConstantInt::get(MS.IntptrTy, 16)),            Type::getInt64PtrTy(*MS.C));        Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); -      Value *RegSaveAreaShadowPtr = -        MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); -      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, -                       AMD64FpEndOffset, 16); - -      Value *OverflowArgAreaPtrPtr = -        IRB.CreateIntToPtr( +      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; +      unsigned Alignment = 16; +      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = +          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), +                                 Alignment); +      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, AMD64FpEndOffset, +                       Alignment); +      Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(            IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),                          ConstantInt::get(MS.IntptrTy, 8)),            Type::getInt64PtrTy(*MS.C));        Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr); -      Value *OverflowArgAreaShadowPtr = -        MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB); +      Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr; +      std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) = +          MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(), +                                 Alignment);        Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,                                               AMD64FpEndOffset); -      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16); +      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, +                       Alignment);      }    }  }; @@ -3118,15 +3250,13 @@ struct VarArgMIPS64Helper : public VarArgHelper {    Function &F;    MemorySanitizer &MS;    MemorySanitizerVisitor &MSV; -  Value *VAArgTLSCopy; -  Value *VAArgSize; +  Value *VAArgTLSCopy = nullptr; +  Value *VAArgSize = nullptr;    SmallVector<CallInst*, 16> VAStartInstrumentationList;    VarArgMIPS64Helper(Function &F, MemorySanitizer &MS, -                    MemorySanitizerVisitor &MSV) -    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), -      VAArgSize(nullptr) {} +                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}    void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {      unsigned VAArgOffset = 0; @@ -3134,11 +3264,11 @@ struct VarArgMIPS64Helper : public VarArgHelper {      for (CallSite::arg_iterator ArgIt = CS.arg_begin() +           CS.getFunctionType()->getNumParams(), End = CS.arg_end();           ArgIt != End; ++ArgIt) { -      llvm::Triple TargetTriple(F.getParent()->getTargetTriple()); +      Triple TargetTriple(F.getParent()->getTargetTriple());        Value *A = *ArgIt;        Value *Base;        uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); -      if (TargetTriple.getArch() == llvm::Triple::mips64) { +      if (TargetTriple.getArch() == Triple::mips64) {          // Adjusting the shadow for argument with size < 8 to match the placement          // of bits in big endian system          if (ArgSize < 8) @@ -3169,19 +3299,24 @@ struct VarArgMIPS64Helper : public VarArgHelper {      IRBuilder<> IRB(&I);      VAStartInstrumentationList.push_back(&I);      Value *VAListTag = I.getArgOperand(0); -    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); +    Value *ShadowPtr, *OriginPtr; +    unsigned Alignment = 8; +    std::tie(ShadowPtr, OriginPtr) = +        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);      IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -                     /* size */8, /* alignment */8, false); +                     /* size */ 8, Alignment, false);    }    void visitVACopyInst(VACopyInst &I) override {      IRBuilder<> IRB(&I); +    VAStartInstrumentationList.push_back(&I);      Value *VAListTag = I.getArgOperand(0); -    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); -    // Unpoison the whole __va_list_tag. -    // FIXME: magic ABI constants. +    Value *ShadowPtr, *OriginPtr; +    unsigned Alignment = 8; +    std::tie(ShadowPtr, OriginPtr) = +        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);      IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -                     /* size */8, /* alignment */8, false); +                     /* size */ 8, Alignment, false);    }    void finalizeInstrumentation() override { @@ -3209,14 +3344,16 @@ struct VarArgMIPS64Helper : public VarArgHelper {          IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),                          Type::getInt64PtrTy(*MS.C));        Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); -      Value *RegSaveAreaShadowPtr = -      MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); -      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8); +      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; +      unsigned Alignment = 8; +      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = +          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), +                                 Alignment); +      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, Alignment);      }    }  }; -  /// \brief AArch64-specific implementation of VarArgHelper.  struct VarArgAArch64Helper : public VarArgHelper {    static const unsigned kAArch64GrArgSize = 64; @@ -3233,18 +3370,16 @@ struct VarArgAArch64Helper : public VarArgHelper {    Function &F;    MemorySanitizer &MS;    MemorySanitizerVisitor &MSV; -  Value *VAArgTLSCopy; -  Value *VAArgOverflowSize; +  Value *VAArgTLSCopy = nullptr; +  Value *VAArgOverflowSize = nullptr;    SmallVector<CallInst*, 16> VAStartInstrumentationList; -  VarArgAArch64Helper(Function &F, MemorySanitizer &MS, -                    MemorySanitizerVisitor &MSV) -    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), -      VAArgOverflowSize(nullptr) {} -    enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; +  VarArgAArch64Helper(Function &F, MemorySanitizer &MS, +                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} +    ArgKind classifyArgument(Value* arg) {      Type *T = arg->getType();      if (T->isFPOrFPVectorTy()) @@ -3324,21 +3459,24 @@ struct VarArgAArch64Helper : public VarArgHelper {      IRBuilder<> IRB(&I);      VAStartInstrumentationList.push_back(&I);      Value *VAListTag = I.getArgOperand(0); -    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); -    // Unpoison the whole __va_list_tag. -    // FIXME: magic ABI constants (size of va_list). +    Value *ShadowPtr, *OriginPtr; +    unsigned Alignment = 8; +    std::tie(ShadowPtr, OriginPtr) = +        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);      IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -                     /* size */32, /* alignment */8, false); +                     /* size */ 32, Alignment, false);    }    void visitVACopyInst(VACopyInst &I) override {      IRBuilder<> IRB(&I); +    VAStartInstrumentationList.push_back(&I);      Value *VAListTag = I.getArgOperand(0); -    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); -    // Unpoison the whole __va_list_tag. -    // FIXME: magic ABI constants (size of va_list). +    Value *ShadowPtr, *OriginPtr; +    unsigned Alignment = 8; +    std::tie(ShadowPtr, OriginPtr) = +        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);      IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -                     /* size */32, /* alignment */8, false); +                     /* size */ 32, Alignment, false);    }    // Retrieve a va_list field of 'void*' size. @@ -3424,7 +3562,9 @@ struct VarArgAArch64Helper : public VarArgHelper {          IRB.CreateAdd(GrArgSize, GrOffSaveArea);        Value *GrRegSaveAreaShadowPtr = -        MSV.getShadowPtr(GrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); +          MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(), +                                 /*Alignment*/ 8) +              .first;        Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,                                                GrRegSaveAreaShadowPtrOff); @@ -3437,7 +3577,9 @@ struct VarArgAArch64Helper : public VarArgHelper {            IRB.CreateAdd(VrArgSize, VrOffSaveArea);        Value *VrRegSaveAreaShadowPtr = -        MSV.getShadowPtr(VrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); +          MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(), +                                 /*Alignment*/ 8) +              .first;        Value *VrSrcPtr = IRB.CreateInBoundsGEP(          IRB.getInt8Ty(), @@ -3450,7 +3592,9 @@ struct VarArgAArch64Helper : public VarArgHelper {        // And finally for remaining arguments.        Value *StackSaveAreaShadowPtr = -        MSV.getShadowPtr(StackSaveAreaPtr, IRB.getInt8Ty(), IRB); +          MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(), +                                 /*Alignment*/ 16) +              .first;        Value *StackSrcPtr =          IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, @@ -3467,15 +3611,13 @@ struct VarArgPowerPC64Helper : public VarArgHelper {    Function &F;    MemorySanitizer &MS;    MemorySanitizerVisitor &MSV; -  Value *VAArgTLSCopy; -  Value *VAArgSize; +  Value *VAArgTLSCopy = nullptr; +  Value *VAArgSize = nullptr;    SmallVector<CallInst*, 16> VAStartInstrumentationList;    VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS, -                    MemorySanitizerVisitor &MSV) -    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), -      VAArgSize(nullptr) {} +                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}    void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {      // For PowerPC, we need to deal with alignment of stack arguments - @@ -3485,12 +3627,12 @@ struct VarArgPowerPC64Helper : public VarArgHelper {      // compute current offset from stack pointer (which is always properly      // aligned), and offset for the first vararg, then subtract them.      unsigned VAArgBase; -    llvm::Triple TargetTriple(F.getParent()->getTargetTriple()); +    Triple TargetTriple(F.getParent()->getTargetTriple());      // Parameter save area starts at 48 bytes from frame pointer for ABIv1,      // and 32 bytes for ABIv2.  This is usually determined by target      // endianness, but in theory could be overriden by function attribute.      // For simplicity, we ignore it here (it'd only matter for QPX vectors). -    if (TargetTriple.getArch() == llvm::Triple::ppc64) +    if (TargetTriple.getArch() == Triple::ppc64)        VAArgBase = 48;      else        VAArgBase = 32; @@ -3513,8 +3655,11 @@ struct VarArgPowerPC64Helper : public VarArgHelper {          if (!IsFixed) {            Value *Base = getShadowPtrForVAArgument(RealTy, IRB,                                                    VAArgOffset - VAArgBase); -          IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), -                           ArgSize, kShadowTLSAlignment); +          Value *AShadowPtr, *AOriginPtr; +          std::tie(AShadowPtr, AOriginPtr) = MSV.getShadowOriginPtr( +              A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment); + +          IRB.CreateMemCpy(Base, AShadowPtr, ArgSize, kShadowTLSAlignment);          }          VAArgOffset += alignTo(ArgSize, 8);        } else { @@ -3572,19 +3717,25 @@ struct VarArgPowerPC64Helper : public VarArgHelper {      IRBuilder<> IRB(&I);      VAStartInstrumentationList.push_back(&I);      Value *VAListTag = I.getArgOperand(0); -    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); +    Value *ShadowPtr, *OriginPtr; +    unsigned Alignment = 8; +    std::tie(ShadowPtr, OriginPtr) = +        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);      IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -                     /* size */8, /* alignment */8, false); +                     /* size */ 8, Alignment, false);    }    void visitVACopyInst(VACopyInst &I) override {      IRBuilder<> IRB(&I);      Value *VAListTag = I.getArgOperand(0); -    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); +    Value *ShadowPtr, *OriginPtr; +    unsigned Alignment = 8; +    std::tie(ShadowPtr, OriginPtr) = +        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);      // Unpoison the whole __va_list_tag.      // FIXME: magic ABI constants.      IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -                     /* size */8, /* alignment */8, false); +                     /* size */ 8, Alignment, false);    }    void finalizeInstrumentation() override { @@ -3612,9 +3763,12 @@ struct VarArgPowerPC64Helper : public VarArgHelper {          IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),                          Type::getInt64PtrTy(*MS.C));        Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); -      Value *RegSaveAreaShadowPtr = -      MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); -      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8); +      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; +      unsigned Alignment = 8; +      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = +          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), +                                 Alignment); +      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, Alignment);      }    }  }; @@ -3633,27 +3787,27 @@ struct VarArgNoOpHelper : public VarArgHelper {    void finalizeInstrumentation() override {}  }; -VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, -                                 MemorySanitizerVisitor &Visitor) { +} // end anonymous namespace + +static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, +                                        MemorySanitizerVisitor &Visitor) {    // VarArg handling is only implemented on AMD64. False positives are possible    // on other platforms. -  llvm::Triple TargetTriple(Func.getParent()->getTargetTriple()); -  if (TargetTriple.getArch() == llvm::Triple::x86_64) +  Triple TargetTriple(Func.getParent()->getTargetTriple()); +  if (TargetTriple.getArch() == Triple::x86_64)      return new VarArgAMD64Helper(Func, Msan, Visitor); -  else if (TargetTriple.getArch() == llvm::Triple::mips64 || -           TargetTriple.getArch() == llvm::Triple::mips64el) +  else if (TargetTriple.getArch() == Triple::mips64 || +           TargetTriple.getArch() == Triple::mips64el)      return new VarArgMIPS64Helper(Func, Msan, Visitor); -  else if (TargetTriple.getArch() == llvm::Triple::aarch64) +  else if (TargetTriple.getArch() == Triple::aarch64)      return new VarArgAArch64Helper(Func, Msan, Visitor); -  else if (TargetTriple.getArch() == llvm::Triple::ppc64 || -           TargetTriple.getArch() == llvm::Triple::ppc64le) +  else if (TargetTriple.getArch() == Triple::ppc64 || +           TargetTriple.getArch() == Triple::ppc64le)      return new VarArgPowerPC64Helper(Func, Msan, Visitor);    else      return new VarArgNoOpHelper(Func, Msan, Visitor);  } -} // anonymous namespace -  bool MemorySanitizer::runOnFunction(Function &F) {    if (&F == MsanCtorFunction)      return false; diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 8e4bfc0b91bc..cb4b3a9c2545 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1,4 +1,4 @@ -//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===// +//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//  //  //                      The LLVM Compiler Infrastructure  // @@ -50,36 +50,69 @@  #include "llvm/Transforms/PGOInstrumentation.h"  #include "CFGMST.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h"  #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h"  #include "llvm/Analysis/BlockFrequencyInfo.h"  #include "llvm/Analysis/BranchProbabilityInfo.h"  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/IndirectCallSiteVisitor.h"  #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h"  #include "llvm/IR/CallSite.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h"  #include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h"  #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h"  #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h"  #include "llvm/IR/MDBuilder.h"  #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h"  #include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProf.h"  #include "llvm/ProfileData/InstrProfReader.h" -#include "llvm/ProfileData/ProfileCommon.h"  #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h"  #include "llvm/Support/DOTGraphTraits.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/GraphWriter.h"  #include "llvm/Support/JamCRC.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Instrumentation.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> +#include <numeric>  #include <string>  #include <unordered_map>  #include <utility> @@ -166,15 +199,18 @@ static cl::opt<bool>                     cl::desc("Use this option to turn on/off SELECT "                              "instruction instrumentation. ")); -// Command line option to turn on CFG dot dump of raw profile counts -static cl::opt<bool> -    PGOViewRawCounts("pgo-view-raw-counts", cl::init(false), cl::Hidden, -                     cl::desc("A boolean option to show CFG dag " -                              "with raw profile counts from " -                              "profile data. See also option " -                              "-pgo-view-counts. To limit graph " -                              "display to only one function, use " -                              "filtering option -view-bfi-func-name.")); +// Command line option to turn on CFG dot or text dump of raw profile counts +static cl::opt<PGOViewCountsType> PGOViewRawCounts( +    "pgo-view-raw-counts", cl::Hidden, +    cl::desc("A boolean option to show CFG dag or text " +             "with raw profile counts from " +             "profile data. See also option " +             "-pgo-view-counts. To limit graph " +             "display to only one function, use " +             "filtering option -view-bfi-func-name."), +    cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), +               clEnumValN(PGOVCT_Graph, "graph", "show a graph."), +               clEnumValN(PGOVCT_Text, "text", "show in text.")));  // Command line option to enable/disable memop intrinsic call.size profiling.  static cl::opt<bool> @@ -192,17 +228,15 @@ static cl::opt<bool>  // Command line option to turn on CFG dot dump after profile annotation.  // Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts -extern cl::opt<bool> PGOViewCounts; +extern cl::opt<PGOViewCountsType> PGOViewCounts;  // Command line option to specify the name of the function for CFG dump  // Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=  extern cl::opt<std::string> ViewBlockFreqFuncName; -namespace { -  // Return a string describing the branch condition that can be  // used in static branch probability heuristics: -std::string getBranchCondString(Instruction *TI) { +static std::string getBranchCondString(Instruction *TI) {    BranchInst *BI = dyn_cast<BranchInst>(TI);    if (!BI || !BI->isConditional())      return std::string(); @@ -233,6 +267,8 @@ std::string getBranchCondString(Instruction *TI) {    return result;  } +namespace { +  /// The select instruction visitor plays three roles specified  /// by the mode. In \c VM_counting mode, it simply counts the number of  /// select instructions. In \c VM_instrument mode, it inserts code to count @@ -259,6 +295,7 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {      Mode = VM_counting;      visit(Func);    } +    // Visit the IR stream and instrument all select instructions. \p    // Ind is a pointer to the counter index variable; \p TotalNC    // is the total number of counters; \p FNV is the pointer to the @@ -283,8 +320,10 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {    void instrumentOneSelectInst(SelectInst &SI);    void annotateOneSelectInst(SelectInst &SI); +    // Visit \p SI instruction and perform tasks according to visit mode.    void visitSelectInst(SelectInst &SI); +    // Return the number of select instructions. This needs be called after    // countSelects().    unsigned getNumOfSelectInsts() const { return NSIs; } @@ -328,8 +367,10 @@ struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> {    // Visit the IR stream and annotate all mem intrinsic call instructions.    void instrumentOneMemIntrinsic(MemIntrinsic &MI); +    // Visit \p MI instruction and perform tasks according to visit mode.    void visitMemIntrinsic(MemIntrinsic &SI); +    unsigned getNumOfMemIntrinsics() const { return NMemIs; }  }; @@ -371,6 +412,7 @@ private:    std::string ProfileFileName;    bool runOnModule(Module &M) override; +    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<BlockFrequencyInfoWrapperPass>();    } @@ -379,6 +421,7 @@ private:  } // end anonymous namespace  char PGOInstrumentationGenLegacyPass::ID = 0; +  INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",                        "PGO instrumentation.", false, false)  INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) @@ -391,6 +434,7 @@ ModulePass *llvm::createPGOInstrumentationGenLegacyPass() {  }  char PGOInstrumentationUseLegacyPass::ID = 0; +  INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",                        "Read PGO instrumentation profile.", false, false)  INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) @@ -403,6 +447,7 @@ ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) {  }  namespace { +  /// \brief An MST based instrumentation for PGO  ///  /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO @@ -413,12 +458,13 @@ struct PGOEdge {    const BasicBlock *SrcBB;    const BasicBlock *DestBB;    uint64_t Weight; -  bool InMST; -  bool Removed; -  bool IsCritical; -  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) -      : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false), -        IsCritical(false) {} +  bool InMST = false; +  bool Removed = false; +  bool IsCritical = false; + +  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) +      : SrcBB(Src), DestBB(Dest), Weight(W) {} +    // Return the information string of an edge.    const std::string infoString() const {      return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + @@ -430,9 +476,9 @@ struct PGOEdge {  struct BBInfo {    BBInfo *Group;    uint32_t Index; -  uint32_t Rank; +  uint32_t Rank = 0; -  BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {} +  BBInfo(unsigned IX) : Group(this), Index(IX) {}    // Return the information string of this object.    const std::string infoString() const { @@ -444,19 +490,22 @@ struct BBInfo {  template <class Edge, class BBInfo> class FuncPGOInstrumentation {  private:    Function &F; -  void computeCFGHash(); -  void renameComdatFunction(); +    // A map that stores the Comdat group in function F.    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; +  void computeCFGHash(); +  void renameComdatFunction(); +  public:    std::vector<std::vector<Instruction *>> ValueSites;    SelectInstVisitor SIVisitor;    MemIntrinsicVisitor MIVisitor;    std::string FuncName;    GlobalVariable *FuncNameVar; +    // CFG hash value for this function. -  uint64_t FunctionHash; +  uint64_t FunctionHash = 0;    // The Minimum Spanning Tree of function CFG.    CFGMST<Edge, BBInfo> MST; @@ -483,8 +532,7 @@ public:        bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,        BlockFrequencyInfo *BFI = nullptr)        : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), -        SIVisitor(Func), MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) { - +        SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) {      // This should be done before CFG hash computation.      SIVisitor.countSelects(Func);      MIVisitor.countMemIntrinsics(Func); @@ -495,7 +543,7 @@ public:      FuncName = getPGOFuncName(F);      computeCFGHash(); -    if (ComdatMembers.size()) +    if (!ComdatMembers.empty())        renameComdatFunction();      DEBUG(dumpInfo("after CFGMST")); @@ -523,6 +571,8 @@ public:    }  }; +} // end anonymous namespace +  // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index  // value of each BB in the CFG. The higher 32 bits record the number of edges.  template <class Edge, class BBInfo> @@ -545,6 +595,12 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {    FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |                   (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |                   (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); +  DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" +               << " CRC = " << JC.getCRC() +               << ", Selects = " << SIVisitor.getNumOfSelectInsts() +               << ", Edges = " << MST.AllEdges.size() +               << ", ICSites = " << ValueSites[IPVK_IndirectCallTarget].size() +               << ", Hash = " << FunctionHash << "\n";);  }  // Check if we can safely rename this Comdat function. @@ -660,6 +716,9 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {  static void instrumentOneFunc(      Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,      std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { +  // Split indirectbr critical edges here before computing the MST rather than +  // later in getInstrBB() to avoid invalidating it. +  SplitIndirectBrCriticalEdges(F, BPI, BFI);    FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,                                                     BFI);    unsigned NumCounters = FuncInfo.getNumCounters(); @@ -676,7 +735,7 @@ static void instrumentOneFunc(             "Cannot get the Instrumentation point");      Builder.CreateCall(          Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), -        {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), +        {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),           Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),           Builder.getInt32(I++)});    } @@ -700,7 +759,7 @@ static void instrumentOneFunc(             "Cannot get the Instrumentation point");      Builder.CreateCall(          Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), -        {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), +        {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),           Builder.getInt64(FuncInfo.FunctionHash),           Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),           Builder.getInt32(IPVK_IndirectCallTarget), @@ -713,12 +772,15 @@ static void instrumentOneFunc(        F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash);  } +namespace { +  // This class represents a CFG edge in profile use compilation.  struct PGOUseEdge : public PGOEdge { -  bool CountValid; -  uint64_t CountValue; -  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) -      : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {} +  bool CountValid = false; +  uint64_t CountValue = 0; + +  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) +      : PGOEdge(Src, Dest, W) {}    // Set edge count value    void setEdgeCount(uint64_t Value) { @@ -735,22 +797,21 @@ struct PGOUseEdge : public PGOEdge {    }  }; -typedef SmallVector<PGOUseEdge *, 2> DirectEdges; +using DirectEdges = SmallVector<PGOUseEdge *, 2>;  // This class stores the auxiliary information for each BB.  struct UseBBInfo : public BBInfo { -  uint64_t CountValue; +  uint64_t CountValue = 0;    bool CountValid; -  int32_t UnknownCountInEdge; -  int32_t UnknownCountOutEdge; +  int32_t UnknownCountInEdge = 0; +  int32_t UnknownCountOutEdge = 0;    DirectEdges InEdges;    DirectEdges OutEdges; -  UseBBInfo(unsigned IX) -      : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0), -        UnknownCountOutEdge(0) {} + +  UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} +    UseBBInfo(unsigned IX, uint64_t C) -      : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0), -        UnknownCountOutEdge(0) {} +      : BBInfo(IX), CountValue(C), CountValid(true) {}    // Set the profile count value for this BB.    void setBBInfoCount(uint64_t Value) { @@ -766,6 +827,8 @@ struct UseBBInfo : public BBInfo {    }  }; +} // end anonymous namespace +  // Sum up the count values for all the edges.  static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {    uint64_t Total = 0; @@ -777,14 +840,17 @@ static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {    return Total;  } +namespace { +  class PGOUseFunc {  public:    PGOUseFunc(Function &Func, Module *Modu,               std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,               BranchProbabilityInfo *BPI = nullptr, -             BlockFrequencyInfo *BFI = nullptr) -      : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI), -        CountPosition(0), ProfileCountSize(0), FreqAttr(FFA_Normal) {} +             BlockFrequencyInfo *BFIin = nullptr) +      : F(Func), M(Modu), BFI(BFIin), +        FuncInfo(Func, ComdatMembers, false, BPI, BFIin), +        FreqAttr(FFA_Normal) {}    // Read counts for the instrumented BB from profile.    bool readCounters(IndexedInstrProfReader *PGOReader); @@ -801,6 +867,9 @@ public:    // Annotate the value profile call sites for one value kind.    void annotateValueSites(uint32_t Kind); +  // Annotate the irreducible loop header weights. +  void annotateIrrLoopHeaderWeights(); +    // The hotness of the function from the profile count.    enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; @@ -809,6 +878,7 @@ public:    // Return the function hash.    uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } +    // Return the profile record for this function;    InstrProfRecord &getProfileRecord() { return ProfileRecord; } @@ -824,9 +894,15 @@ public:    Function &getFunc() const { return F; } +  void dumpInfo(std::string Str = "") const { +    FuncInfo.dumpInfo(Str); +  } +  private:    Function &F;    Module *M; +  BlockFrequencyInfo *BFI; +    // This member stores the shared information with class PGOGenFunc.    FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; @@ -835,10 +911,10 @@ private:    uint64_t ProgramMaxCount;    // Position of counter that remains to be read. -  uint32_t CountPosition; +  uint32_t CountPosition = 0;    // Total size of the profile count for this function. -  uint32_t ProfileCountSize; +  uint32_t ProfileCountSize = 0;    // ProfileRecord for this function.    InstrProfRecord ProfileRecord; @@ -873,11 +949,12 @@ private:    }  }; +} // end anonymous namespace +  // Visit all the edges and assign the count value for the instrumented  // edges and the BB.  void PGOUseFunc::setInstrumentedCounts(      const std::vector<uint64_t> &CountFromProfile) { -    assert(FuncInfo.getNumCounters() == CountFromProfile.size());    // Use a worklist as we will update the vector during the iteration.    std::vector<PGOUseEdge *> WorkList; @@ -1087,7 +1164,8 @@ void PGOUseFunc::setBranchWeights() {      TerminatorInst *TI = BB.getTerminator();      if (TI->getNumSuccessors() < 2)        continue; -    if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) +    if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || +          isa<IndirectBrInst>(TI)))        continue;      if (getBBInfo(&BB).CountValue == 0)        continue; @@ -1113,6 +1191,29 @@ void PGOUseFunc::setBranchWeights() {    }  } +static bool isIndirectBrTarget(BasicBlock *BB) { +  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { +    if (isa<IndirectBrInst>((*PI)->getTerminator())) +      return true; +  } +  return false; +} + +void PGOUseFunc::annotateIrrLoopHeaderWeights() { +  DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); +  // Find irr loop headers +  for (auto &BB : F) { +    // As a heuristic also annotate indrectbr targets as they have a high chance +    // to become an irreducible loop header after the indirectbr tail +    // duplication. +    if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { +      TerminatorInst *TI = BB.getTerminator(); +      const UseBBInfo &BBCountInfo = getBBInfo(&BB); +      setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); +    } +  } +} +  void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {    Module *M = F.getParent();    IRBuilder<> Builder(&SI); @@ -1121,7 +1222,7 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {    auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);    Builder.CreateCall(        Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), -      {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), +      {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),         Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),         Builder.getInt32(*CurCtrIdx), Step});    ++(*CurCtrIdx); @@ -1176,7 +1277,7 @@ void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {    assert(!dyn_cast<ConstantInt>(Length));    Builder.CreateCall(        Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), -      {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), +      {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),         Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty),         Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)});    ++CurCtrId; @@ -1242,7 +1343,6 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {      ValueSiteIndex++;    }  } -} // end anonymous namespace  // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime  // aware this is an ir_level profile so it can set the version flag. @@ -1312,7 +1412,6 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {  PreservedAnalyses PGOInstrumentationGen::run(Module &M,                                               ModuleAnalysisManager &AM) { -    auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();    auto LookupBPI = [&FAM](Function &F) {      return &FAM.getResult<BranchProbabilityAnalysis>(F); @@ -1367,33 +1466,48 @@ static bool annotateAllFunctions(        continue;      auto *BPI = LookupBPI(F);      auto *BFI = LookupBFI(F); +    // Split indirectbr critical edges here before computing the MST rather than +    // later in getInstrBB() to avoid invalidating it. +    SplitIndirectBrCriticalEdges(F, BPI, BFI);      PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);      if (!Func.readCounters(PGOReader.get()))        continue;      Func.populateCounters();      Func.setBranchWeights();      Func.annotateValueSites(); +    Func.annotateIrrLoopHeaderWeights();      PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();      if (FreqAttr == PGOUseFunc::FFA_Cold)        ColdFunctions.push_back(&F);      else if (FreqAttr == PGOUseFunc::FFA_Hot)        HotFunctions.push_back(&F); -    if (PGOViewCounts && (ViewBlockFreqFuncName.empty() || -                          F.getName().equals(ViewBlockFreqFuncName))) { +    if (PGOViewCounts != PGOVCT_None && +        (ViewBlockFreqFuncName.empty() || +         F.getName().equals(ViewBlockFreqFuncName))) {        LoopInfo LI{DominatorTree(F)};        std::unique_ptr<BranchProbabilityInfo> NewBPI =            llvm::make_unique<BranchProbabilityInfo>(F, LI);        std::unique_ptr<BlockFrequencyInfo> NewBFI =            llvm::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); - -      NewBFI->view(); +      if (PGOViewCounts == PGOVCT_Graph) +        NewBFI->view(); +      else if (PGOViewCounts == PGOVCT_Text) { +        dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; +        NewBFI->print(dbgs()); +      }      } -    if (PGOViewRawCounts && (ViewBlockFreqFuncName.empty() || -                             F.getName().equals(ViewBlockFreqFuncName))) { -      if (ViewBlockFreqFuncName.empty()) -        WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); -      else -        ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); +    if (PGOViewRawCounts != PGOVCT_None && +        (ViewBlockFreqFuncName.empty() || +         F.getName().equals(ViewBlockFreqFuncName))) { +      if (PGOViewRawCounts == PGOVCT_Graph) +        if (ViewBlockFreqFuncName.empty()) +          WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); +        else +          ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); +      else if (PGOViewRawCounts == PGOVCT_Text) { +        dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; +        Func.dumpInfo(); +      }      }    }    M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); @@ -1402,12 +1516,12 @@ static bool annotateAllFunctions(    // can affect the BranchProbabilityInfo of any callers, resulting in an    // inconsistent MST between prof-gen and prof-use.    for (auto &F : HotFunctions) { -    F->addFnAttr(llvm::Attribute::InlineHint); +    F->addFnAttr(Attribute::InlineHint);      DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()                   << "\n");    }    for (auto &F : ColdFunctions) { -    F->addFnAttr(llvm::Attribute::Cold); +    F->addFnAttr(Attribute::Cold);      DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n");    }    return true; @@ -1451,9 +1565,19 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {    return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI);  } -namespace llvm { -void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts, -                     uint64_t MaxCount) { +static std::string getSimpleNodeName(const BasicBlock *Node) { +  if (!Node->getName().empty()) +    return Node->getName(); + +  std::string SimpleNodeName; +  raw_string_ostream OS(SimpleNodeName); +  Node->printAsOperand(OS, false); +  return OS.str(); +} + +void llvm::setProfMetadata(Module *M, Instruction *TI, +                           ArrayRef<uint64_t> EdgeCounts, +                           uint64_t MaxCount) {    MDBuilder MDB(M->getContext());    assert(MaxCount > 0 && "Bad max count");    uint64_t Scale = calculateCountScale(MaxCount); @@ -1464,7 +1588,7 @@ void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts,    DEBUG(dbgs() << "Weight is: ";          for (const auto &W : Weights) { dbgs() << W << " "; }          dbgs() << "\n";); -  TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); +  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));    if (EmitBranchProbability) {      std::string BrCondStr = getBranchCondString(TI);      if (BrCondStr.empty()) @@ -1483,43 +1607,46 @@ void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts,      OS << " (total count : " << TotalCount << ")";      OS.flush();      Function *F = TI->getParent()->getParent(); -    emitOptimizationRemarkAnalysis( -        F->getContext(), "pgo-use-annot", *F, TI->getDebugLoc(), -        Twine(BrCondStr) + -            " is true with probability : " + Twine(BranchProbStr)); +    OptimizationRemarkEmitter ORE(F); +    ORE.emit([&]() { +      return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) +             << BrCondStr << " is true with probability : " << BranchProbStr; +    });    }  } +namespace llvm { + +void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { +  MDBuilder MDB(M->getContext()); +  TI->setMetadata(llvm::LLVMContext::MD_irr_loop, +                  MDB.createIrrLoopHeaderWeight(Count)); +} +  template <> struct GraphTraits<PGOUseFunc *> { -  typedef const BasicBlock *NodeRef; -  typedef succ_const_iterator ChildIteratorType; -  typedef pointer_iterator<Function::const_iterator> nodes_iterator; +  using NodeRef = const BasicBlock *; +  using ChildIteratorType = succ_const_iterator; +  using nodes_iterator = pointer_iterator<Function::const_iterator>;    static NodeRef getEntryNode(const PGOUseFunc *G) {      return &G->getFunc().front();    } +    static ChildIteratorType child_begin(const NodeRef N) {      return succ_begin(N);    } +    static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } +    static nodes_iterator nodes_begin(const PGOUseFunc *G) {      return nodes_iterator(G->getFunc().begin());    } +    static nodes_iterator nodes_end(const PGOUseFunc *G) {      return nodes_iterator(G->getFunc().end());    }  }; -static std::string getSimpleNodeName(const BasicBlock *Node) { -  if (!Node->getName().empty()) -    return Node->getName(); - -  std::string SimpleNodeName; -  raw_string_ostream OS(SimpleNodeName); -  Node->printAsOperand(OS, false); -  return OS.str(); -} -  template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {    explicit DOTGraphTraits(bool isSimple = false)        : DefaultDOTGraphTraits(isSimple) {} @@ -1559,4 +1686,5 @@ template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {      return Result;    }  }; -} // namespace llvm + +} // end namespace llvm diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 0bc9ddfbe4d3..95eb3680403a 100644 --- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -21,16 +21,16 @@  #include "llvm/ADT/Twine.h"  #include "llvm/Analysis/BlockFrequencyInfo.h"  #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CallSite.h"  #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h"  #include "llvm/IR/InstrTypes.h"  #include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h" -#include "llvm/IR/InstVisitor.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/PassManager.h"  #include "llvm/IR/Type.h" @@ -110,6 +110,7 @@ private:    bool runOnFunction(Function &F) override;    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<BlockFrequencyInfoWrapperPass>(); +    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();      AU.addPreserved<GlobalsAAWrapperPass>();    }  }; @@ -131,8 +132,9 @@ FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {  namespace {  class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {  public: -  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI) -      : Func(Func), BFI(BFI), Changed(false) { +  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, +               OptimizationRemarkEmitter &ORE) +      : Func(Func), BFI(BFI), ORE(ORE), Changed(false) {      ValueDataArray =          llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);      // Get the MemOPSize range information from option MemOPSizeRange, @@ -166,6 +168,7 @@ public:  private:    Function &Func;    BlockFrequencyInfo &BFI; +  OptimizationRemarkEmitter &ORE;    bool Changed;    std::vector<MemIntrinsic *> WorkList;    // Start of the previse range. @@ -358,12 +361,15 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {    DEBUG(dbgs() << "\n\n== Basic Block After==\n");    for (uint64_t SizeId : SizeIds) { -    ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId);      BasicBlock *CaseBB = BasicBlock::Create(          Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);      Instruction *NewInst = MI->clone();      // Fix the argument. -    dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId); +    MemIntrinsic * MemI = dyn_cast<MemIntrinsic>(NewInst); +    IntegerType *SizeType = dyn_cast<IntegerType>(MemI->getLength()->getType()); +    assert(SizeType && "Expected integer type size argument."); +    ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId); +    MemI->setLength(CaseSizeId);      CaseBB->getInstList().push_back(NewInst);      IRBuilder<> IRBCase(CaseBB);      IRBCase.CreateBr(MergeBB); @@ -376,23 +382,27 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {    DEBUG(dbgs() << *DefaultBB << "\n");    DEBUG(dbgs() << *MergeBB << "\n"); -  emitOptimizationRemark(Func.getContext(), "memop-opt", Func, -                         MI->getDebugLoc(), -                         Twine("optimize ") + getMIName(MI) + " with count " + -                             Twine(SumForOpt) + " out of " + Twine(TotalCount) + -                             " for " + Twine(Version) + " versions"); +  ORE.emit([&]() { +    using namespace ore; +    return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MI) +             << "optimized " << NV("Intrinsic", StringRef(getMIName(MI))) +             << " with count " << NV("Count", SumForOpt) << " out of " +             << NV("Total", TotalCount) << " for " << NV("Versions", Version) +             << " versions"; +  });    return true;  }  } // namespace -static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) { +static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, +                                OptimizationRemarkEmitter &ORE) {    if (DisableMemOPOPT)      return false;    if (F.hasFnAttribute(Attribute::OptimizeForSize))      return false; -  MemOPSizeOpt MemOPSizeOpt(F, BFI); +  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE);    MemOPSizeOpt.perform();    return MemOPSizeOpt.isChanged();  } @@ -400,7 +410,8 @@ static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) {  bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {    BlockFrequencyInfo &BFI =        getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); -  return PGOMemOPSizeOptImpl(F, BFI); +  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); +  return PGOMemOPSizeOptImpl(F, BFI, ORE);  }  namespace llvm { @@ -409,7 +420,8 @@ char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;  PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,                                         FunctionAnalysisManager &FAM) {    auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); -  bool Changed = PGOMemOPSizeOptImpl(F, BFI); +  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); +  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE);    if (!Changed)      return PreservedAnalyses::all();    auto PA = PreservedAnalyses(); diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 06fe07598374..d950e2e730f2 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -17,12 +17,16 @@  #include "llvm/Analysis/PostDominators.h"  #include "llvm/IR/CFG.h"  #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DebugInfo.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/MDBuilder.h"  #include "llvm/IR/Module.h" @@ -46,6 +50,14 @@ static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1";  static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2";  static const char *const SanCovTraceCmp4 = "__sanitizer_cov_trace_cmp4";  static const char *const SanCovTraceCmp8 = "__sanitizer_cov_trace_cmp8"; +static const char *const SanCovTraceConstCmp1 = +    "__sanitizer_cov_trace_const_cmp1"; +static const char *const SanCovTraceConstCmp2 = +    "__sanitizer_cov_trace_const_cmp2"; +static const char *const SanCovTraceConstCmp4 = +    "__sanitizer_cov_trace_const_cmp4"; +static const char *const SanCovTraceConstCmp8 = +    "__sanitizer_cov_trace_const_cmp8";  static const char *const SanCovTraceDiv4 = "__sanitizer_cov_trace_div4";  static const char *const SanCovTraceDiv8 = "__sanitizer_cov_trace_div8";  static const char *const SanCovTraceGep = "__sanitizer_cov_trace_gep"; @@ -57,11 +69,15 @@ static const char *const SanCovTracePCGuardName =      "__sanitizer_cov_trace_pc_guard";  static const char *const SanCovTracePCGuardInitName =      "__sanitizer_cov_trace_pc_guard_init"; -static const char *const SanCov8bitCountersInitName =  +static const char *const SanCov8bitCountersInitName =      "__sanitizer_cov_8bit_counters_init"; +static const char *const SanCovPCsInitName = "__sanitizer_cov_pcs_init";  static const char *const SanCovGuardsSectionName = "sancov_guards";  static const char *const SanCovCountersSectionName = "sancov_cntrs"; +static const char *const SanCovPCsSectionName = "sancov_pcs"; + +static const char *const SanCovLowestStackName = "__sancov_lowest_stack";  static cl::opt<int> ClCoverageLevel(      "sanitizer-coverage-level", @@ -77,9 +93,19 @@ static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",                                      cl::desc("pc tracing with a guard"),                                      cl::Hidden, cl::init(false)); -static cl::opt<bool> ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters", -                                    cl::desc("increments 8-bit counter for every edge"), -                                    cl::Hidden, cl::init(false)); +// If true, we create a global variable that contains PCs of all instrumented +// BBs, put this global into a named section, and pass this section's bounds +// to __sanitizer_cov_pcs_init. +// This way the coverage instrumentation does not need to acquire the PCs +// at run-time. Works with trace-pc-guard and inline-8bit-counters. +static cl::opt<bool> ClCreatePCTable("sanitizer-coverage-pc-table", +                                     cl::desc("create a static PC table"), +                                     cl::Hidden, cl::init(false)); + +static cl::opt<bool> +    ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters", +                         cl::desc("increments 8-bit counter for every edge"), +                         cl::Hidden, cl::init(false));  static cl::opt<bool>      ClCMPTracing("sanitizer-coverage-trace-compares", @@ -99,6 +125,10 @@ static cl::opt<bool>                    cl::desc("Reduce the number of instrumented blocks"),                    cl::Hidden, cl::init(true)); +static cl::opt<bool> ClStackDepth("sanitizer-coverage-stack-depth", +                                  cl::desc("max stack depth tracing"), +                                  cl::Hidden, cl::init(false)); +  namespace {  SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) { @@ -135,9 +165,12 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {    Options.TracePC |= ClTracePC;    Options.TracePCGuard |= ClTracePCGuard;    Options.Inline8bitCounters |= ClInline8bitCounters; -  if (!Options.TracePCGuard && !Options.TracePC && !Options.Inline8bitCounters) -    Options.TracePCGuard = true; // TracePCGuard is default. +  Options.PCTable |= ClCreatePCTable;    Options.NoPrune |= !ClPruneBlocks; +  Options.StackDepth |= ClStackDepth; +  if (!Options.TracePCGuard && !Options.TracePC && +      !Options.Inline8bitCounters && !Options.StackDepth) +    Options.TracePCGuard = true; // TracePCGuard is default.    return Options;  } @@ -168,14 +201,19 @@ private:                           ArrayRef<GetElementPtrInst *> GepTraceTargets);    void InjectTraceForSwitch(Function &F,                              ArrayRef<Instruction *> SwitchTraceTargets); -  bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks); +  bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks, +                      bool IsLeafFunc = true);    GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements,                                                      Function &F, Type *Ty,                                                      const char *Section); -  void CreateFunctionLocalArrays(size_t NumGuards, Function &F); -  void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx); -  void CreateInitCallForSection(Module &M, const char *InitFunctionName, -                                Type *Ty, const std::string &Section); +  GlobalVariable *CreatePCArray(Function &F, ArrayRef<BasicBlock *> AllBlocks); +  void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks); +  void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, +                             bool IsLeafFunc = true); +  Function *CreateInitCallsForSections(Module &M, const char *InitFunctionName, +                                       Type *Ty, const char *Section); +  std::pair<GlobalVariable *, GlobalVariable *> +  CreateSecStartEnd(Module &M, const char *Section, Type *Ty);    void SetNoSanitizeMetadata(Instruction *I) {      I->setMetadata(I->getModule()->getMDKindID("nosanitize"), @@ -188,12 +226,14 @@ private:    Function *SanCovTracePCIndir;    Function *SanCovTracePC, *SanCovTracePCGuard;    Function *SanCovTraceCmpFunction[4]; +  Function *SanCovTraceConstCmpFunction[4];    Function *SanCovTraceDivFunction[2];    Function *SanCovTraceGepFunction;    Function *SanCovTraceSwitchFunction; +  GlobalVariable *SanCovLowestStack;    InlineAsm *EmptyAsm;    Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy, -      *Int8Ty, *Int8PtrTy; +      *Int16Ty, *Int8Ty, *Int8PtrTy;    Module *CurModule;    Triple TargetTriple;    LLVMContext *C; @@ -201,17 +241,17 @@ private:    GlobalVariable *FunctionGuardArray;  // for trace-pc-guard.    GlobalVariable *Function8bitCounterArray;  // for inline-8bit-counters. +  GlobalVariable *FunctionPCsArray;  // for pc-table. +  SmallVector<GlobalValue *, 20> GlobalsToAppendToUsed;    SanitizerCoverageOptions Options;  };  } // namespace -void SanitizerCoverageModule::CreateInitCallForSection( -    Module &M, const char *InitFunctionName, Type *Ty, -    const std::string &Section) { -  IRBuilder<> IRB(M.getContext()); -  Function *CtorFunc; +std::pair<GlobalVariable *, GlobalVariable *> +SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section, +                                           Type *Ty) {    GlobalVariable *SecStart =        new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,                           getSectionStart(Section)); @@ -221,6 +261,18 @@ void SanitizerCoverageModule::CreateInitCallForSection(                           nullptr, getSectionEnd(Section));    SecEnd->setVisibility(GlobalValue::HiddenVisibility); +  return std::make_pair(SecStart, SecEnd); +} + + +Function *SanitizerCoverageModule::CreateInitCallsForSections( +    Module &M, const char *InitFunctionName, Type *Ty, +    const char *Section) { +  IRBuilder<> IRB(M.getContext()); +  auto SecStartEnd = CreateSecStartEnd(M, Section, Ty); +  auto SecStart = SecStartEnd.first; +  auto SecEnd = SecStartEnd.second; +  Function *CtorFunc;    std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(        M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty},        {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)}); @@ -232,6 +284,7 @@ void SanitizerCoverageModule::CreateInitCallForSection(    } else {      appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);    } +  return CtorFunc;  }  bool SanitizerCoverageModule::runOnModule(Module &M) { @@ -243,6 +296,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {    TargetTriple = Triple(M.getTargetTriple());    FunctionGuardArray = nullptr;    Function8bitCounterArray = nullptr; +  FunctionPCsArray = nullptr;    IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());    IntptrPtrTy = PointerType::getUnqual(IntptrTy);    Type *VoidTy = Type::getVoidTy(*C); @@ -252,6 +306,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {    Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());    Int64Ty = IRB.getInt64Ty();    Int32Ty = IRB.getInt32Ty(); +  Int16Ty = IRB.getInt16Ty();    Int8Ty = IRB.getInt8Ty();    SanCovTracePCIndir = checkSanitizerInterfaceFunction( @@ -269,6 +324,19 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {        checkSanitizerInterfaceFunction(M.getOrInsertFunction(            SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty)); +  SanCovTraceConstCmpFunction[0] = +      checkSanitizerInterfaceFunction(M.getOrInsertFunction( +          SanCovTraceConstCmp1, VoidTy, Int8Ty, Int8Ty)); +  SanCovTraceConstCmpFunction[1] = +      checkSanitizerInterfaceFunction(M.getOrInsertFunction( +          SanCovTraceConstCmp2, VoidTy, Int16Ty, Int16Ty)); +  SanCovTraceConstCmpFunction[2] = +      checkSanitizerInterfaceFunction(M.getOrInsertFunction( +          SanCovTraceConstCmp4, VoidTy, Int32Ty, Int32Ty)); +  SanCovTraceConstCmpFunction[3] = +      checkSanitizerInterfaceFunction(M.getOrInsertFunction( +          SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty)); +    SanCovTraceDivFunction[0] =        checkSanitizerInterfaceFunction(M.getOrInsertFunction(            SanCovTraceDiv4, VoidTy, IRB.getInt32Ty())); @@ -281,12 +349,23 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {    SanCovTraceSwitchFunction =        checkSanitizerInterfaceFunction(M.getOrInsertFunction(            SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy)); + +  Constant *SanCovLowestStackConstant = +      M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy); +  SanCovLowestStack = cast<GlobalVariable>(SanCovLowestStackConstant); +  SanCovLowestStack->setThreadLocalMode( +      GlobalValue::ThreadLocalMode::InitialExecTLSModel); +  if (Options.StackDepth && !SanCovLowestStack->isDeclaration()) +    SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy)); +    // Make sure smaller parameters are zero-extended to i64 as required by the    // x86_64 ABI.    if (TargetTriple.getArch() == Triple::x86_64) {      for (int i = 0; i < 3; i++) {        SanCovTraceCmpFunction[i]->addParamAttr(0, Attribute::ZExt);        SanCovTraceCmpFunction[i]->addParamAttr(1, Attribute::ZExt); +      SanCovTraceConstCmpFunction[i]->addParamAttr(0, Attribute::ZExt); +      SanCovTraceConstCmpFunction[i]->addParamAttr(1, Attribute::ZExt);      }      SanCovTraceDivFunction[0]->addParamAttr(0, Attribute::ZExt);    } @@ -305,13 +384,27 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {    for (auto &F : M)      runOnFunction(F); +  Function *Ctor = nullptr; +    if (FunctionGuardArray) -    CreateInitCallForSection(M, SanCovTracePCGuardInitName, Int32PtrTy, -                             SanCovGuardsSectionName); +    Ctor = CreateInitCallsForSections(M, SanCovTracePCGuardInitName, Int32PtrTy, +                                      SanCovGuardsSectionName);    if (Function8bitCounterArray) -    CreateInitCallForSection(M, SanCov8bitCountersInitName, Int8PtrTy, -                             SanCovCountersSectionName); - +    Ctor = CreateInitCallsForSections(M, SanCov8bitCountersInitName, Int8PtrTy, +                                      SanCovCountersSectionName); +  if (Ctor && Options.PCTable) { +    auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy); +    Function *InitFunction = declareSanitizerInitFunction( +        M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy}); +    IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator()); +    IRBCtor.CreateCall(InitFunction, +                       {IRB.CreatePointerCast(SecStartEnd.first, IntptrPtrTy), +                        IRB.CreatePointerCast(SecStartEnd.second, IntptrPtrTy)}); +  } +  // We don't reference these arrays directly in any of our runtime functions, +  // so we need to prevent them from being dead stripped. +  if (TargetTriple.isOSBinFormatMachO()) +    appendToUsed(M, GlobalsToAppendToUsed);    return true;  } @@ -362,6 +455,10 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,    if (Options.NoPrune || &F.getEntryBlock() == BB)      return true; +  if (Options.CoverageType == SanitizerCoverageOptions::SCK_Function && +      &F.getEntryBlock() != BB) +    return false; +    // Do not instrument full dominators, or full post-dominators with multiple    // predecessors.    return !isFullDominator(BB, DT) @@ -375,6 +472,9 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {      return false; // Should not instrument sanitizer init functions.    if (F.getName().startswith("__sanitizer_"))      return false;  // Don't instrument __sanitizer_* callbacks. +  // Don't touch available_externally functions, their actual body is elewhere. +  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) +    return false;    // Don't instrument MSVC CRT configuration helpers. They may run before normal    // initialization.    if (F.getName() == "__local_stdio_printf_options" || @@ -399,6 +499,7 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {        &getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();    const PostDominatorTree *PDT =        &getAnalysis<PostDominatorTreeWrapperPass>(F).getPostDomTree(); +  bool IsLeafFunc = true;    for (auto &BB : F) {      if (shouldInstrumentBlock(F, &BB, DT, PDT, Options)) @@ -423,10 +524,14 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {        if (Options.TraceGep)          if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Inst))            GepTraceTargets.push_back(GEP); -   } +      if (Options.StackDepth) +        if (isa<InvokeInst>(Inst) || +            (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))) +          IsLeafFunc = false; +    }    } -  InjectCoverage(F, BlocksToInstrument); +  InjectCoverage(F, BlocksToInstrument, IsLeafFunc);    InjectCoverageForIndirectCalls(F, IndirCalls);    InjectTraceForCmp(F, CmpTraceTargets);    InjectTraceForSwitch(F, SwitchTraceTargets); @@ -444,35 +549,65 @@ GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(    if (auto Comdat = F.getComdat())      Array->setComdat(Comdat);    Array->setSection(getSectionName(Section)); +  Array->setAlignment(Ty->isPointerTy() ? DL->getPointerSize() +                                        : Ty->getPrimitiveSizeInBits() / 8);    return Array;  } -void SanitizerCoverageModule::CreateFunctionLocalArrays(size_t NumGuards, -                                                       Function &F) { -  if (Options.TracePCGuard) + +GlobalVariable * +SanitizerCoverageModule::CreatePCArray(Function &F, +                                       ArrayRef<BasicBlock *> AllBlocks) { +  size_t N = AllBlocks.size(); +  assert(N); +  SmallVector<Constant *, 32> PCs; +  IRBuilder<> IRB(&*F.getEntryBlock().getFirstInsertionPt()); +  for (size_t i = 0; i < N; i++) { +    if (&F.getEntryBlock() == AllBlocks[i]) { +      PCs.push_back((Constant *)IRB.CreatePointerCast(&F, IntptrPtrTy)); +      PCs.push_back((Constant *)IRB.CreateIntToPtr( +          ConstantInt::get(IntptrTy, 1), IntptrPtrTy)); +    } else { +      PCs.push_back((Constant *)IRB.CreatePointerCast( +          BlockAddress::get(AllBlocks[i]), IntptrPtrTy)); +      PCs.push_back((Constant *)IRB.CreateIntToPtr( +          ConstantInt::get(IntptrTy, 0), IntptrPtrTy)); +    } +  } +  auto *PCArray = CreateFunctionLocalArrayInSection(N * 2, F, IntptrPtrTy, +                                                    SanCovPCsSectionName); +  PCArray->setInitializer( +      ConstantArray::get(ArrayType::get(IntptrPtrTy, N * 2), PCs)); +  PCArray->setConstant(true); + +  return PCArray; +} + +void SanitizerCoverageModule::CreateFunctionLocalArrays( +    Function &F, ArrayRef<BasicBlock *> AllBlocks) { +  if (Options.TracePCGuard) {      FunctionGuardArray = CreateFunctionLocalArrayInSection( -        NumGuards, F, Int32Ty, SanCovGuardsSectionName); -  if (Options.Inline8bitCounters) +        AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName); +    GlobalsToAppendToUsed.push_back(FunctionGuardArray); +  } +  if (Options.Inline8bitCounters) {      Function8bitCounterArray = CreateFunctionLocalArrayInSection( -        NumGuards, F, Int8Ty, SanCovCountersSectionName); +        AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName); +    GlobalsToAppendToUsed.push_back(Function8bitCounterArray); +  } +  if (Options.PCTable) { +    FunctionPCsArray = CreatePCArray(F, AllBlocks); +    GlobalsToAppendToUsed.push_back(FunctionPCsArray); +  }  }  bool SanitizerCoverageModule::InjectCoverage(Function &F, -                                             ArrayRef<BasicBlock *> AllBlocks) { +                                             ArrayRef<BasicBlock *> AllBlocks, +                                             bool IsLeafFunc) {    if (AllBlocks.empty()) return false; -  switch (Options.CoverageType) { -  case SanitizerCoverageOptions::SCK_None: -    return false; -  case SanitizerCoverageOptions::SCK_Function: -    CreateFunctionLocalArrays(1, F); -    InjectCoverageAtBlock(F, F.getEntryBlock(), 0); -    return true; -  default: { -    CreateFunctionLocalArrays(AllBlocks.size(), F); -    for (size_t i = 0, N = AllBlocks.size(); i < N; i++) -      InjectCoverageAtBlock(F, *AllBlocks[i], i); -    return true; -  } -  } +  CreateFunctionLocalArrays(F, AllBlocks); +  for (size_t i = 0, N = AllBlocks.size(); i < N; i++) +    InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc); +  return true;  }  // On every indirect call we call a run-time function @@ -585,16 +720,28 @@ void SanitizerCoverageModule::InjectTraceForCmp(                          TypeSize == 64 ? 3 : -1;        if (CallbackIdx < 0) continue;        // __sanitizer_cov_trace_cmp((type_size << 32) | predicate, A0, A1); +      auto CallbackFunc = SanCovTraceCmpFunction[CallbackIdx]; +      bool FirstIsConst = isa<ConstantInt>(A0); +      bool SecondIsConst = isa<ConstantInt>(A1); +      // If both are const, then we don't need such a comparison. +      if (FirstIsConst && SecondIsConst) continue; +      // If only one is const, then make it the first callback argument. +      if (FirstIsConst || SecondIsConst) { +        CallbackFunc = SanCovTraceConstCmpFunction[CallbackIdx]; +        if (SecondIsConst) +          std::swap(A0, A1); +      } +        auto Ty = Type::getIntNTy(*C, TypeSize); -      IRB.CreateCall( -          SanCovTraceCmpFunction[CallbackIdx], -          {IRB.CreateIntCast(A0, Ty, true), IRB.CreateIntCast(A1, Ty, true)}); +      IRB.CreateCall(CallbackFunc, {IRB.CreateIntCast(A0, Ty, true), +              IRB.CreateIntCast(A1, Ty, true)});      }    }  }  void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, -                                                    size_t Idx) { +                                                    size_t Idx, +                                                    bool IsLeafFunc) {    BasicBlock::iterator IP = BB.getFirstInsertionPt();    bool IsEntryBB = &BB == &F.getEntryBlock();    DebugLoc EntryLoc; @@ -633,6 +780,21 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,      SetNoSanitizeMetadata(Load);      SetNoSanitizeMetadata(Store);    } +  if (Options.StackDepth && IsEntryBB && !IsLeafFunc) { +    // Check stack depth.  If it's the deepest so far, record it. +    Function *GetFrameAddr = +        Intrinsic::getDeclaration(F.getParent(), Intrinsic::frameaddress); +    auto FrameAddrPtr = +        IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)}); +    auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy); +    auto LowestStack = IRB.CreateLoad(SanCovLowestStack); +    auto IsStackLower = IRB.CreateICmpULT(FrameAddrInt, LowestStack); +    auto ThenTerm = SplitBlockAndInsertIfThen(IsStackLower, &*IP, false); +    IRBuilder<> ThenIRB(ThenTerm); +    auto Store = ThenIRB.CreateStore(FrameAddrInt, SanCovLowestStack); +    SetNoSanitizeMetadata(LowestStack); +    SetNoSanitizeMetadata(Store); +  }  }  std::string | 
