diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:41:05 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:41:05 +0000 |
commit | 01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch) | |
tree | 4def12e759965de927d963ac65840d663ef9d1ea /lib/Transforms/Instrumentation | |
parent | f0f4822ed4b66e3579e92a89f368f8fb860e218e (diff) |
Diffstat (limited to 'lib/Transforms/Instrumentation')
-rw-r--r-- | lib/Transforms/Instrumentation/AddressSanitizer.cpp | 384 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/BoundsChecking.cpp | 7 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/CFGMST.h | 1 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/CMakeLists.txt | 3 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 34 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/EfficiencySanitizer.cpp | 901 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/GCOVProfiling.cpp | 286 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/IndirectCallPromotion.cpp | 661 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/InstrProfiling.cpp | 407 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/Instrumentation.cpp | 11 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/Makefile | 15 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/MemorySanitizer.cpp | 405 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 409 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/SafeStack.cpp | 760 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/SanitizerCoverage.cpp | 253 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 119 |
16 files changed, 3232 insertions, 1424 deletions
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index a9df5e5898ae3..43d1b377f858a 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -13,14 +13,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -48,6 +45,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -65,17 +63,23 @@ using namespace llvm; static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; -static const uint64_t kIOSShadowOffset32 = 1ULL << 30; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; +static const uint64_t kIOSShadowOffset32 = 1ULL << 30; +static const uint64_t kIOSShadowOffset64 = 0x120200000; +static const uint64_t kIOSSimShadowOffset32 = 1ULL << 30; +static const uint64_t kIOSSimShadowOffset64 = kDefaultShadowOffset64; static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000; // < 2G. static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000; static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41; +static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; static const uint64_t kWindowsShadowOffset32 = 3ULL << 28; +// TODO(wwchrome): Experimental for asan Win64, may change. +static const uint64_t kWindowsShadowOffset64 = 0x1ULL << 45; // 32TB. static const size_t kMinStackMallocSize = 1 << 6; // 64B static const size_t kMaxStackMallocSize = 1 << 16; // 64K @@ -89,11 +93,15 @@ static const char *const kAsanReportErrorTemplate = "__asan_report_"; static const char *const kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; +static const char *const kAsanRegisterImageGlobalsName = + "__asan_register_image_globals"; +static const char *const kAsanUnregisterImageGlobalsName = + "__asan_unregister_image_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *const kAsanInitName = "__asan_init"; static const char *const kAsanVersionCheckName = - "__asan_version_mismatch_check_v6"; + "__asan_version_mismatch_check_v8"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; static const char *const kAsanPtrSub = "__sanitizer_ptr_sub"; static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return"; @@ -101,13 +109,16 @@ static const int kMaxAsanStackMallocSizeClass = 10; static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_"; static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_"; static const char *const kAsanGenPrefix = "__asan_gen_"; +static const char *const kODRGenPrefix = "__odr_asan_gen_"; static const char *const kSanCovGenPrefix = "__sancov_gen_"; static const char *const kAsanPoisonStackMemoryName = "__asan_poison_stack_memory"; static const char *const kAsanUnpoisonStackMemoryName = "__asan_unpoison_stack_memory"; +static const char *const kAsanGlobalsRegisteredFlagName = + "__asan_globals_registered"; -static const char *const kAsanOptionDetectUAR = +static const char *const kAsanOptionDetectUseAfterReturn = "__asan_option_detect_stack_use_after_return"; static const char *const kAsanAllocaPoison = "__asan_alloca_poison"; @@ -154,8 +165,11 @@ static cl::opt<int> ClMaxInsnsToInstrumentPerBB( static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", - cl::desc("Check return-after-free"), + cl::desc("Check stack-use-after-return"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClUseAfterScope("asan-use-after-scope", + cl::desc("Check stack-use-after-scope"), + cl::Hidden, cl::init(false)); // This flag may need to be replaced with -f[no]asan-globals. static cl::opt<bool> ClGlobals("asan-globals", cl::desc("Handle global objects"), cl::Hidden, @@ -192,10 +206,14 @@ static cl::opt<bool> ClSkipPromotableAllocas( // These flags allow to change the shadow mapping. // The shadow mapping looks like -// Shadow = (Mem >> scale) + (1 << offset_log) +// Shadow = (Mem >> scale) + offset static cl::opt<int> ClMappingScale("asan-mapping-scale", cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0)); +static cl::opt<unsigned long long> ClMappingOffset( + "asan-mapping-offset", + cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"), cl::Hidden, + cl::init(0)); // Optimization flags. Not user visible, used mostly for testing // and benchmarking the tool. @@ -211,11 +229,6 @@ static cl::opt<bool> ClOptStack( "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"), cl::Hidden, cl::init(false)); -static cl::opt<bool> ClCheckLifetime( - "asan-check-lifetime", - cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), cl::Hidden, - cl::init(false)); - static cl::opt<bool> ClDynamicAllocaStack( "asan-stack-dynamic-alloca", cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden, @@ -226,6 +239,19 @@ static cl::opt<uint32_t> ClForceExperiment( cl::desc("Force optimization experiment (for testing)"), cl::Hidden, cl::init(0)); +static cl::opt<bool> + ClUsePrivateAliasForGlobals("asan-use-private-alias", + cl::desc("Use private aliases for global" + " variables"), + cl::Hidden, cl::init(false)); + +static cl::opt<bool> + ClUseMachOGlobalsSection("asan-globals-live-support", + cl::desc("Use linker features to support dead " + "code stripping of globals " + "(Mach-O only)"), + cl::Hidden, cl::init(false)); + // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); @@ -334,11 +360,13 @@ struct ShadowMapping { static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, bool IsKasan) { bool IsAndroid = TargetTriple.isAndroid(); - bool IsIOS = TargetTriple.isiOS(); + bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS(); bool IsFreeBSD = TargetTriple.isOSFreeBSD(); bool IsLinux = TargetTriple.isOSLinux(); bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 || TargetTriple.getArch() == llvm::Triple::ppc64le; + bool IsSystemZ = TargetTriple.getArch() == llvm::Triple::systemz; + bool IsX86 = TargetTriple.getArch() == llvm::Triple::x86; bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; bool IsMIPS32 = TargetTriple.getArch() == llvm::Triple::mips || TargetTriple.getArch() == llvm::Triple::mipsel; @@ -359,7 +387,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, else if (IsFreeBSD) Mapping.Offset = kFreeBSD_ShadowOffset32; else if (IsIOS) - Mapping.Offset = kIOSShadowOffset32; + // If we're targeting iOS and x86, the binary is built for iOS simulator. + Mapping.Offset = IsX86 ? kIOSSimShadowOffset32 : kIOSShadowOffset32; else if (IsWindows) Mapping.Offset = kWindowsShadowOffset32; else @@ -367,6 +396,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, } else { // LongSize == 64 if (IsPPC64) Mapping.Offset = kPPC64_ShadowOffset64; + else if (IsSystemZ) + Mapping.Offset = kSystemZ_ShadowOffset64; else if (IsFreeBSD) Mapping.Offset = kFreeBSD_ShadowOffset64; else if (IsLinux && IsX86_64) { @@ -374,8 +405,13 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, Mapping.Offset = kLinuxKasan_ShadowOffset64; else Mapping.Offset = kSmallX86_64ShadowOffset; + } else if (IsWindows && IsX86_64) { + Mapping.Offset = kWindowsShadowOffset64; } else if (IsMIPS64) Mapping.Offset = kMIPS64_ShadowOffset64; + else if (IsIOS) + // If we're targeting iOS and x86, the binary is built for iOS simulator. + Mapping.Offset = IsX86_64 ? kIOSSimShadowOffset64 : kIOSShadowOffset64; else if (IsAArch64) Mapping.Offset = kAArch64_ShadowOffset64; else @@ -383,14 +419,20 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, } Mapping.Scale = kDefaultShadowScale; - if (ClMappingScale) { + if (ClMappingScale.getNumOccurrences() > 0) { Mapping.Scale = ClMappingScale; } + if (ClMappingOffset.getNumOccurrences() > 0) { + Mapping.Offset = ClMappingOffset; + } + // OR-ing shadow offset if more efficient (at least on x86) if the offset // is a power of two, but on ppc64 we have to use add since the shadow - // offset is not necessary 1/8-th of the address space. - Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 + // offset is not necessary 1/8-th of the address space. On SystemZ, + // we could OR the constant in a single instruction, but it's more + // efficient to load it once and use indexed addressing. + Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !(Mapping.Offset & (Mapping.Offset - 1)); return Mapping; @@ -404,9 +446,11 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false) + explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false, + bool UseAfterScope = false) : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan), - Recover(Recover || ClRecover) { + Recover(Recover || ClRecover), + UseAfterScope(UseAfterScope || ClUseAfterScope) { initializeAddressSanitizerPass(*PassRegistry::getPassRegistry()); } const char *getPassName() const override { @@ -417,19 +461,20 @@ struct AddressSanitizer : public FunctionPass { AU.addRequired<TargetLibraryInfoWrapperPass>(); } uint64_t getAllocaSizeInBytes(AllocaInst *AI) const { + uint64_t ArraySize = 1; + if (AI->isArrayAllocation()) { + ConstantInt *CI = dyn_cast<ConstantInt>(AI->getArraySize()); + assert(CI && "non-constant array size"); + ArraySize = CI->getZExtValue(); + } Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = AI->getModule()->getDataLayout().getTypeAllocSize(Ty); - return SizeInBytes; + return SizeInBytes * ArraySize; } /// Check if we want (and can) handle this alloca. bool isInterestingAlloca(AllocaInst &AI); - // Check if we have dynamic alloca. - bool isDynamicAlloca(AllocaInst &AI) const { - return AI.isArrayAllocation() || !AI.isStaticAlloca(); - } - /// If it is an interesting memory access, return the PointerOperand /// and set IsWrite/Alignment. Otherwise return nullptr. Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, @@ -483,6 +528,7 @@ struct AddressSanitizer : public FunctionPass { int LongSize; bool CompileKernel; bool Recover; + bool UseAfterScope; Type *IntptrTy; ShadowMapping Mapping; DominatorTree *DT; @@ -519,6 +565,7 @@ class AddressSanitizerModule : public ModulePass { bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); + bool ShouldUseMachOGlobalsSection() const; void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName); void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); size_t MinRedzoneSizeForGlobal() const { @@ -536,6 +583,8 @@ class AddressSanitizerModule : public ModulePass { Function *AsanUnpoisonGlobals; Function *AsanRegisterGlobals; Function *AsanUnregisterGlobals; + Function *AsanRegisterImageGlobals; + Function *AsanUnregisterImageGlobals; }; // Stack poisoning does not play well with exception handling. @@ -680,7 +729,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { } StackAlignment = std::max(StackAlignment, AI.getAlignment()); - if (ASan.isDynamicAlloca(AI)) + if (!AI.isStaticAlloca()) DynamicAllocaVec.push_back(&AI); else AllocaVec.push_back(&AI); @@ -692,7 +741,8 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { Intrinsic::ID ID = II.getIntrinsicID(); if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II); if (ID == Intrinsic::localescape) LocalEscapeCall = &II; - if (!ClCheckLifetime) return; + if (!ASan.UseAfterScope) + return; if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end) return; // Found lifetime intrinsic, add ASan instrumentation if necessary. @@ -707,7 +757,8 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { return; // Find alloca instruction that corresponds to llvm.lifetime argument. AllocaInst *AI = findAllocaForValue(II.getArgOperand(1)); - if (!AI) return; + if (!AI || !ASan.isInterestingAlloca(*AI)) + return; bool DoPoison = (ID == Intrinsic::lifetime_end); AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison}; AllocaPoisonCallVec.push_back(APC); @@ -760,9 +811,10 @@ INITIALIZE_PASS_END( "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel, - bool Recover) { + bool Recover, + bool UseAfterScope) { assert(!CompileKernel || Recover); - return new AddressSanitizer(CompileKernel, Recover); + return new AddressSanitizer(CompileKernel, Recover, UseAfterScope); } char AddressSanitizerModule::ID = 0; @@ -792,7 +844,7 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str, GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); - if (AllowMerging) GV->setUnnamedAddr(true); + if (AllowMerging) GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(1); // Strings may not be merged w/o setting align 1. return GV; } @@ -809,13 +861,23 @@ static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M, auto GV = new GlobalVariable(M, LocStruct->getType(), true, GlobalValue::PrivateLinkage, LocStruct, kAsanGenPrefix); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); return GV; } -static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { - return G->getName().find(kAsanGenPrefix) == 0 || - G->getName().find(kSanCovGenPrefix) == 0; +/// \brief Check if \p G has been created by a trusted compiler pass. +static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { + // Do not instrument asan globals. + if (G->getName().startswith(kAsanGenPrefix) || + G->getName().startswith(kSanCovGenPrefix) || + G->getName().startswith(kODRGenPrefix)) + return true; + + // Do not instrument gcov counter arrays. + if (G->getName() == "__llvm_gcov_ctr") + return true; + + return false; } Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { @@ -858,7 +920,7 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) { bool IsInteresting = (AI.getAllocatedType()->isSized() && // alloca() may be called with 0 size, ignore it. - getAllocaSizeInBytes(&AI) > 0 && + ((!AI.isStaticAlloca()) || getAllocaSizeInBytes(&AI) > 0) && // We are only interested in allocas not promotable to registers. // Promotable allocas are common under -O0. (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && @@ -907,6 +969,14 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, PtrOperand = XCHG->getPointerOperand(); } + // Do not instrument acesses from different address spaces; we cannot deal + // with them. + if (PtrOperand) { + Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0) + return nullptr; + } + // Treat memory accesses to promotable allocas as non-interesting since they // will not cause memory violations. This greatly speeds up the instrumented // executable at -O0. @@ -948,9 +1018,9 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction( IRBuilder<> IRB(I); Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction; Value *Param[2] = {I->getOperand(0), I->getOperand(1)}; - for (int i = 0; i < 2; i++) { - if (Param[i]->getType()->isPointerTy()) - Param[i] = IRB.CreatePointerCast(Param[i], IntptrTy); + for (Value *&i : Param) { + if (i->getType()->isPointerTy()) + i = IRB.CreatePointerCast(i, IntptrTy); } IRB.CreateCall(F, Param); } @@ -1048,7 +1118,7 @@ Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize) { - size_t Granularity = 1 << Mapping.Scale; + size_t Granularity = static_cast<size_t>(1) << Mapping.Scale; // Addr & (Granularity - 1) Value *LastAccessedByte = IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); @@ -1091,7 +1161,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); - size_t Granularity = 1 << Mapping.Scale; + size_t Granularity = 1ULL << Mapping.Scale; TerminatorInst *CrashTerm = nullptr; if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { @@ -1184,13 +1254,13 @@ void AddressSanitizerModule::createInitializerPoisonCalls( } bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { - Type *Ty = cast<PointerType>(G->getType())->getElementType(); + Type *Ty = G->getValueType(); DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); if (GlobalsMD.get(G).IsBlacklisted) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; - if (GlobalWasGeneratedByAsan(G)) return false; // Our own global. + if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals. // Touch only those globals that will not be defined in other modules. // Don't handle ODR linkage types and COMDATs since other modules may be built // without ASan. @@ -1207,12 +1277,12 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false; if (G->hasSection()) { - StringRef Section(G->getSection()); + StringRef Section = G->getSection(); // Globals from llvm.metadata aren't emitted, do not instrument them. if (Section == "llvm.metadata") return false; // Do not instrument globals from special LLVM sections. - if (Section.find("__llvm") != StringRef::npos) return false; + if (Section.find("__llvm") != StringRef::npos || Section.find("__LLVM") != StringRef::npos) return false; // Do not instrument function pointers to initialization and termination // routines: dynamic linker will not properly handle redzones. @@ -1271,8 +1341,29 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { return true; } +// On Mach-O platforms, we emit global metadata in a separate section of the +// binary in order to allow the linker to properly dead strip. This is only +// supported on recent versions of ld64. +bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const { + if (!ClUseMachOGlobalsSection) + return false; + + if (!TargetTriple.isOSBinFormatMachO()) + return false; + + if (TargetTriple.isMacOSX() && !TargetTriple.isMacOSXVersionLT(10, 11)) + return true; + if (TargetTriple.isiOS() /* or tvOS */ && !TargetTriple.isOSVersionLT(9)) + return true; + if (TargetTriple.isWatchOS() && !TargetTriple.isOSVersionLT(2)) + return true; + + return false; +} + void AddressSanitizerModule::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); + // Declare our poisoning and unpoisoning functions. AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); @@ -1280,6 +1371,7 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr)); AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); + // Declare functions that register/unregister globals. AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); @@ -1288,6 +1380,18 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); + + // Declare the functions that find globals in a shared object and then invoke + // the (un)register function on them. + AsanRegisterImageGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanRegisterImageGlobalsName, + IRB.getVoidTy(), IntptrTy, nullptr)); + AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage); + + AsanUnregisterImageGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanUnregisterImageGlobalsName, + IRB.getVoidTy(), IntptrTy, nullptr)); + AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage); } // This function replaces all global variables with new variables that have @@ -1313,10 +1417,11 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { // const char *module_name; // size_t has_dynamic_init; // void *source_location; + // size_t odr_indicator; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, - IntptrTy, IntptrTy, nullptr); + IntptrTy, IntptrTy, IntptrTy, nullptr); SmallVector<Constant *, 16> Initializers(n); bool HasDynamicallyInitializedGlobals = false; @@ -1332,14 +1437,14 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { GlobalVariable *G = GlobalsToChange[i]; auto MD = GlobalsMD.get(G); + StringRef NameForGlobal = G->getName(); // Create string holding the global name (use global name from metadata // if it's available, otherwise just write the name of global variable). GlobalVariable *Name = createPrivateGlobalForString( - M, MD.Name.empty() ? G->getName() : MD.Name, + M, MD.Name.empty() ? NameForGlobal : MD.Name, /*AllowMerging*/ true); - PointerType *PtrTy = cast<PointerType>(G->getType()); - Type *Ty = PtrTy->getElementType(); + Type *Ty = G->getValueType(); uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); uint64_t MinRZ = MinRedzoneSizeForGlobal(); // MinRZ <= RZ <= kMaxGlobalRedzone @@ -1384,41 +1489,125 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { SourceLoc = ConstantInt::get(IntptrTy, 0); } + Constant *ODRIndicator = ConstantExpr::getNullValue(IRB.getInt8PtrTy()); + GlobalValue *InstrumentedGlobal = NewGlobal; + + bool CanUsePrivateAliases = TargetTriple.isOSBinFormatELF(); + if (CanUsePrivateAliases && ClUsePrivateAliasForGlobals) { + // Create local alias for NewGlobal to avoid crash on ODR between + // instrumented and non-instrumented libraries. + auto *GA = GlobalAlias::create(GlobalValue::InternalLinkage, + NameForGlobal + M.getName(), NewGlobal); + + // With local aliases, we need to provide another externally visible + // symbol __odr_asan_XXX to detect ODR violation. + auto *ODRIndicatorSym = + new GlobalVariable(M, IRB.getInt8Ty(), false, Linkage, + Constant::getNullValue(IRB.getInt8Ty()), + kODRGenPrefix + NameForGlobal, nullptr, + NewGlobal->getThreadLocalMode()); + + // Set meaningful attributes for indicator symbol. + ODRIndicatorSym->setVisibility(NewGlobal->getVisibility()); + ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass()); + ODRIndicatorSym->setAlignment(1); + ODRIndicator = ODRIndicatorSym; + InstrumentedGlobal = GA; + } + Initializers[i] = ConstantStruct::get( - GlobalStructTy, ConstantExpr::getPointerCast(NewGlobal, IntptrTy), + GlobalStructTy, + ConstantExpr::getPointerCast(InstrumentedGlobal, IntptrTy), ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), ConstantExpr::getPointerCast(ModuleName, IntptrTy), - ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, nullptr); + ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, + ConstantExpr::getPointerCast(ODRIndicator, IntptrTy), nullptr); if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true; DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); } - ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n); - GlobalVariable *AllGlobals = new GlobalVariable( - M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, - ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); + + GlobalVariable *AllGlobals = nullptr; + GlobalVariable *RegisteredFlag = nullptr; + + // On recent Mach-O platforms, we emit the global metadata in a way that + // allows the linker to properly strip dead globals. + if (ShouldUseMachOGlobalsSection()) { + // RegisteredFlag serves two purposes. First, we can pass it to dladdr() + // to look up the loaded image that contains it. Second, we can store in it + // whether registration has already occurred, to prevent duplicate + // registration. + // + // Common linkage allows us to coalesce needles defined in each object + // file so that there's only one per shared library. + RegisteredFlag = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::CommonLinkage, + ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); + + // We also emit a structure which binds the liveness of the global + // variable to the metadata struct. + StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr); + + for (size_t i = 0; i < n; i++) { + GlobalVariable *Metadata = new GlobalVariable( + M, GlobalStructTy, false, GlobalVariable::InternalLinkage, + Initializers[i], ""); + Metadata->setSection("__DATA,__asan_globals,regular"); + Metadata->setAlignment(1); // don't leave padding in between + + auto LivenessBinder = ConstantStruct::get(LivenessTy, + Initializers[i]->getAggregateElement(0u), + ConstantExpr::getPointerCast(Metadata, IntptrTy), + nullptr); + GlobalVariable *Liveness = new GlobalVariable( + M, LivenessTy, false, GlobalVariable::InternalLinkage, + LivenessBinder, ""); + Liveness->setSection("__DATA,__asan_liveness,regular,live_support"); + } + } else { + // On all other platfoms, we just emit an array of global metadata + // structures. + ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n); + AllGlobals = new GlobalVariable( + M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, + ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); + } // Create calls for poisoning before initializers run and unpoisoning after. if (HasDynamicallyInitializedGlobals) createInitializerPoisonCalls(M, ModuleName); - IRB.CreateCall(AsanRegisterGlobals, - {IRB.CreatePointerCast(AllGlobals, IntptrTy), - ConstantInt::get(IntptrTy, n)}); - // We also need to unregister globals at the end, e.g. when a shared library + // Create a call to register the globals with the runtime. + if (ShouldUseMachOGlobalsSection()) { + IRB.CreateCall(AsanRegisterImageGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); + } else { + IRB.CreateCall(AsanRegisterGlobals, + {IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, n)}); + } + + // We also need to unregister globals at the end, e.g., when a shared library // gets closed. Function *AsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*C), false), GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB)); - IRB_Dtor.CreateCall(AsanUnregisterGlobals, - {IRB.CreatePointerCast(AllGlobals, IntptrTy), - ConstantInt::get(IntptrTy, n)}); + + if (ShouldUseMachOGlobalsSection()) { + IRB_Dtor.CreateCall(AsanUnregisterImageGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); + } else { + IRB_Dtor.CreateCall(AsanUnregisterGlobals, + {IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, n)}); + } + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); DEBUG(dbgs() << M); @@ -1467,7 +1656,7 @@ void AddressSanitizer::initializeCallbacks(Module &M) { IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { - const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex); + const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex); AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr, @@ -1608,6 +1797,8 @@ bool AddressSanitizer::runOnFunction(Function &F) { bool IsWrite; unsigned Alignment; uint64_t TypeSize; + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); // Fill the set of memory operations to instrument. for (auto &BB : F) { @@ -1636,6 +1827,8 @@ bool AddressSanitizer::runOnFunction(Function &F) { TempsToInstrument.clear(); if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction()); } + if (CallInst *CI = dyn_cast<CallInst>(&Inst)) + maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); continue; } ToInstrument.push_back(&Inst); @@ -1648,8 +1841,6 @@ bool AddressSanitizer::runOnFunction(Function &F) { CompileKernel || (ClInstrumentationWithCallsThreshold >= 0 && ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold); - const TargetLibraryInfo *TLI = - &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); const DataLayout &DL = F.getParent()->getDataLayout(); ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), /*RoundToAlign=*/true); @@ -1713,12 +1904,15 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } - AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(), - IntptrTy, IntptrTy, nullptr)); - AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), - IntptrTy, IntptrTy, nullptr)); + if (ASan.UseAfterScope) { + AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(), + IntptrTy, IntptrTy, nullptr)); + AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), + IntptrTy, IntptrTy, nullptr)); + } + AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanAllocasUnpoisonFunc = @@ -1825,13 +2019,21 @@ void FunctionStackPoisoner::poisonStack() { assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0); // Insert poison calls for lifetime intrinsics for alloca. - bool HavePoisonedAllocas = false; + bool HavePoisonedStaticAllocas = false; for (const auto &APC : AllocaPoisonCallVec) { assert(APC.InsBefore); assert(APC.AI); + assert(ASan.isInterestingAlloca(*APC.AI)); + bool IsDynamicAlloca = !(*APC.AI).isStaticAlloca(); + if (!ClInstrumentAllocas && IsDynamicAlloca) + continue; + IRBuilder<> IRB(APC.InsBefore); poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison); - HavePoisonedAllocas |= APC.DoPoison; + // Dynamic allocas will be unpoisoned unconditionally below in + // unpoisonDynamicAllocas. + // Flag that we need unpoison static allocas. + HavePoisonedStaticAllocas |= (APC.DoPoison && !IsDynamicAlloca); } if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) { @@ -1846,7 +2048,7 @@ void FunctionStackPoisoner::poisonStack() { int StackMallocIdx = -1; DebugLoc EntryDebugLocation; - if (auto SP = getDISubprogram(&F)) + if (auto SP = F.getSubprogram()) EntryDebugLocation = DebugLoc::get(SP->getScopeLine(), 0, SP); Instruction *InsBefore = AllocaVec[0]; @@ -1878,7 +2080,7 @@ void FunctionStackPoisoner::poisonStack() { // i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms. size_t MinHeaderSize = ASan.LongSize / 2; ASanStackFrameLayout L; - ComputeASanStackFrameLayout(SVD, 1UL << Mapping.Scale, MinHeaderSize, &L); + ComputeASanStackFrameLayout(SVD, 1ULL << Mapping.Scale, MinHeaderSize, &L); DEBUG(dbgs() << L.DescriptionString << " --- " << L.FrameSize << "\n"); uint64_t LocalStackSize = L.FrameSize; bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel && @@ -1904,13 +2106,13 @@ void FunctionStackPoisoner::poisonStack() { // ? __asan_stack_malloc_N(LocalStackSize) // : nullptr; // void *LocalStackBase = (FakeStack) ? FakeStack : alloca(LocalStackSize); - Constant *OptionDetectUAR = F.getParent()->getOrInsertGlobal( - kAsanOptionDetectUAR, IRB.getInt32Ty()); - Value *UARIsEnabled = - IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUAR), + Constant *OptionDetectUseAfterReturn = F.getParent()->getOrInsertGlobal( + kAsanOptionDetectUseAfterReturn, IRB.getInt32Ty()); + Value *UseAfterReturnIsEnabled = + IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUseAfterReturn), Constant::getNullValue(IRB.getInt32Ty())); Instruction *Term = - SplitBlockAndInsertIfThen(UARIsEnabled, InsBefore, false); + SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false); IRBuilder<> IRBIf(Term); IRBIf.SetCurrentDebugLocation(EntryDebugLocation); StackMallocIdx = StackMallocSizeClass(LocalStackSize); @@ -1920,7 +2122,7 @@ void FunctionStackPoisoner::poisonStack() { ConstantInt::get(IntptrTy, LocalStackSize)); IRB.SetInsertPoint(InsBefore); IRB.SetCurrentDebugLocation(EntryDebugLocation); - FakeStack = createPHI(IRB, UARIsEnabled, FakeStackValue, Term, + FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term, ConstantInt::get(IntptrTy, 0)); Value *NoFakeStack = @@ -1977,6 +2179,16 @@ void FunctionStackPoisoner::poisonStack() { Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); poisonRedZones(L.ShadowBytes, IRB, ShadowBase, true); + auto UnpoisonStack = [&](IRBuilder<> &IRB) { + if (HavePoisonedStaticAllocas) { + // If we poisoned some allocas in llvm.lifetime analysis, + // unpoison whole stack frame now. + poisonAlloca(LocalStackBase, LocalStackSize, IRB, false); + } else { + poisonRedZones(L.ShadowBytes, IRB, ShadowBase, false); + } + }; + // (Un)poison the stack before all ret instructions. for (auto Ret : RetVec) { IRBuilder<> IRBRet(Ret); @@ -2021,13 +2233,9 @@ void FunctionStackPoisoner::poisonStack() { } IRBuilder<> IRBElse(ElseTerm); - poisonRedZones(L.ShadowBytes, IRBElse, ShadowBase, false); - } else if (HavePoisonedAllocas) { - // If we poisoned some allocas in llvm.lifetime analysis, - // unpoison whole stack frame now. - poisonAlloca(LocalStackBase, LocalStackSize, IRBRet, false); + UnpoisonStack(IRBElse); } else { - poisonRedZones(L.ShadowBytes, IRBRet, ShadowBase, false); + UnpoisonStack(IRBRet); } } diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index fd3dfd9af0331..d4c8369fa9d3b 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -36,7 +36,7 @@ STATISTIC(ChecksAdded, "Bounds checks added"); STATISTIC(ChecksSkipped, "Bounds checks skipped"); STATISTIC(ChecksUnable, "Bounds checks unable to add"); -typedef IRBuilder<true, TargetFolder> BuilderTy; +typedef IRBuilder<TargetFolder> BuilderTy; namespace { struct BoundsChecking : public FunctionPass { @@ -185,9 +185,8 @@ bool BoundsChecking::runOnFunction(Function &F) { } bool MadeChange = false; - for (std::vector<Instruction*>::iterator i = WorkList.begin(), - e = WorkList.end(); i != e; ++i) { - Inst = *i; + for (Instruction *i : WorkList) { + Inst = i; Builder->SetInsertPoint(Inst); if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h index c47fdbf689969..3cd7351cad626 100644 --- a/lib/Transforms/Instrumentation/CFGMST.h +++ b/lib/Transforms/Instrumentation/CFGMST.h @@ -21,7 +21,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include <string> #include <utility> #include <vector> diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index cae1e5af7ac70..57a569b3791ee 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -4,12 +4,13 @@ add_llvm_library(LLVMInstrumentation DataFlowSanitizer.cpp GCOVProfiling.cpp MemorySanitizer.cpp + IndirectCallPromotion.cpp Instrumentation.cpp InstrProfiling.cpp PGOInstrumentation.cpp - SafeStack.cpp SanitizerCoverage.cpp ThreadSanitizer.cpp + EfficiencySanitizer.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index d459fc50d1361..b34d5b8c45a71 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -134,7 +134,7 @@ namespace { StringRef GetGlobalTypeString(const GlobalValue &G) { // Types of GlobalVariables are always pointer types. - Type *GType = G.getType()->getElementType(); + Type *GType = G.getValueType(); // For now we support blacklisting struct types only. if (StructType *SGType = dyn_cast<StructType>(GType)) { if (!SGType->isLiteral()) @@ -166,7 +166,7 @@ class DFSanABIList { if (isIn(*GA.getParent(), Category)) return true; - if (isa<FunctionType>(GA.getType()->getElementType())) + if (isa<FunctionType>(GA.getValueType())) return SCL->inSection("fun", GA.getName(), Category); return SCL->inSection("global", GA.getName(), Category) || @@ -791,25 +791,20 @@ bool DataFlowSanitizer::runOnModule(Module &M) { } } - for (std::vector<Function *>::iterator i = FnsToInstrument.begin(), - e = FnsToInstrument.end(); - i != e; ++i) { - if (!*i || (*i)->isDeclaration()) + for (Function *i : FnsToInstrument) { + if (!i || i->isDeclaration()) continue; - removeUnreachableBlocks(**i); + removeUnreachableBlocks(*i); - DFSanFunction DFSF(*this, *i, FnsWithNativeABI.count(*i)); + DFSanFunction DFSF(*this, i, FnsWithNativeABI.count(i)); // DFSanVisitor may create new basic blocks, which confuses df_iterator. // Build a copy of the list before iterating over it. - llvm::SmallVector<BasicBlock *, 4> BBList( - depth_first(&(*i)->getEntryBlock())); + llvm::SmallVector<BasicBlock *, 4> BBList(depth_first(&i->getEntryBlock())); - for (llvm::SmallVector<BasicBlock *, 4>::iterator i = BBList.begin(), - e = BBList.end(); - i != e; ++i) { - Instruction *Inst = &(*i)->front(); + for (BasicBlock *i : BBList) { + Instruction *Inst = &i->front(); while (1) { // DFSanVisitor may split the current basic block, changing the current // instruction's next pointer and moving the next instruction to the @@ -1066,11 +1061,10 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, SmallVector<Value *, 2> Objs; GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout()); bool AllConstants = true; - for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end(); - i != e; ++i) { - if (isa<Function>(*i) || isa<BlockAddress>(*i)) + for (Value *Obj : Objs) { + if (isa<Function>(Obj) || isa<BlockAddress>(Obj)) continue; - if (isa<GlobalVariable>(*i) && cast<GlobalVariable>(*i)->isConstant()) + if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant()) continue; AllConstants = false; @@ -1412,10 +1406,6 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (F == DFSF.DFS.DFSanVarargWrapperFn) return; - assert(!(cast<FunctionType>( - CS.getCalledValue()->getType()->getPointerElementType())->isVarArg() && - dyn_cast<InvokeInst>(CS.getInstruction()))); - IRBuilder<> IRB(CS.getInstruction()); DenseMap<Value *, Function *>::iterator i = diff --git a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp new file mode 100644 index 0000000000000..fb80f87369f99 --- /dev/null +++ b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp @@ -0,0 +1,901 @@ +//===-- EfficiencySanitizer.cpp - performance tuner -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of EfficiencySanitizer, a family of performance tuners +// that detects multiple performance issues via separate sub-tools. +// +// The instrumentation phase is straightforward: +// - Take action on every memory access: either inlined instrumentation, +// or Inserted calls to our run-time library. +// - Optimizations may apply to avoid instrumenting some of the accesses. +// - Turn mem{set,cpy,move} instrinsics into library calls. +// The rest is handled by the run-time library. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "esan" + +// The tool type must be just one of these ClTool* options, as the tools +// cannot be combined due to shadow memory constraints. +static cl::opt<bool> + ClToolCacheFrag("esan-cache-frag", cl::init(false), + cl::desc("Detect data cache fragmentation"), cl::Hidden); +static cl::opt<bool> + ClToolWorkingSet("esan-working-set", cl::init(false), + cl::desc("Measure the working set size"), cl::Hidden); +// Each new tool will get its own opt flag here. +// These are converted to EfficiencySanitizerOptions for use +// in the code. + +static cl::opt<bool> ClInstrumentLoadsAndStores( + "esan-instrument-loads-and-stores", cl::init(true), + cl::desc("Instrument loads and stores"), cl::Hidden); +static cl::opt<bool> ClInstrumentMemIntrinsics( + "esan-instrument-memintrinsics", cl::init(true), + cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); +static cl::opt<bool> ClInstrumentFastpath( + "esan-instrument-fastpath", cl::init(true), + cl::desc("Instrument fastpath"), cl::Hidden); +static cl::opt<bool> ClAuxFieldInfo( + "esan-aux-field-info", cl::init(true), + cl::desc("Generate binary with auxiliary struct field information"), + cl::Hidden); + +// Experiments show that the performance difference can be 2x or more, +// and accuracy loss is typically negligible, so we turn this on by default. +static cl::opt<bool> ClAssumeIntraCacheLine( + "esan-assume-intra-cache-line", cl::init(true), + cl::desc("Assume each memory access touches just one cache line, for " + "better performance but with a potential loss of accuracy."), + cl::Hidden); + +STATISTIC(NumInstrumentedLoads, "Number of instrumented loads"); +STATISTIC(NumInstrumentedStores, "Number of instrumented stores"); +STATISTIC(NumFastpaths, "Number of instrumented fastpaths"); +STATISTIC(NumAccessesWithIrregularSize, + "Number of accesses with a size outside our targeted callout sizes"); +STATISTIC(NumIgnoredStructs, "Number of ignored structs"); +STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions"); +STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions"); +STATISTIC(NumAssumedIntraCacheLine, + "Number of accesses assumed to be intra-cache-line"); + +static const uint64_t EsanCtorAndDtorPriority = 0; +static const char *const EsanModuleCtorName = "esan.module_ctor"; +static const char *const EsanModuleDtorName = "esan.module_dtor"; +static const char *const EsanInitName = "__esan_init"; +static const char *const EsanExitName = "__esan_exit"; + +// We need to specify the tool to the runtime earlier than +// the ctor is called in some cases, so we set a global variable. +static const char *const EsanWhichToolName = "__esan_which_tool"; + +// We must keep these Shadow* constants consistent with the esan runtime. +// FIXME: Try to place these shadow constants, the names of the __esan_* +// interface functions, and the ToolType enum into a header shared between +// llvm and compiler-rt. +static const uint64_t ShadowMask = 0x00000fffffffffffull; +static const uint64_t ShadowOffs[3] = { // Indexed by scale + 0x0000130000000000ull, + 0x0000220000000000ull, + 0x0000440000000000ull, +}; +// This array is indexed by the ToolType enum. +static const int ShadowScale[] = { + 0, // ESAN_None. + 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2. + 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6. +}; + +// MaxStructCounterNameSize is a soft size limit to avoid insanely long +// names for those extremely large structs. +static const unsigned MaxStructCounterNameSize = 512; + +namespace { + +static EfficiencySanitizerOptions +OverrideOptionsFromCL(EfficiencySanitizerOptions Options) { + if (ClToolCacheFrag) + Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; + else if (ClToolWorkingSet) + Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; + + // Direct opt invocation with no params will have the default ESAN_None. + // We run the default tool in that case. + if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None) + Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; + + return Options; +} + +// Create a constant for Str so that we can pass it to the run-time lib. +static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str, + bool AllowMerging) { + Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); + // We use private linkage for module-local strings. If they can be merged + // with another one, we set the unnamed_addr attribute. + GlobalVariable *GV = + new GlobalVariable(M, StrConst->getType(), true, + GlobalValue::PrivateLinkage, StrConst, ""); + if (AllowMerging) + GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + GV->setAlignment(1); // Strings may not be merged w/o setting align 1. + return GV; +} + +/// EfficiencySanitizer: instrument each module to find performance issues. +class EfficiencySanitizer : public ModulePass { +public: + EfficiencySanitizer( + const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions()) + : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {} + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnModule(Module &M) override; + static char ID; + +private: + bool initOnModule(Module &M); + void initializeCallbacks(Module &M); + bool shouldIgnoreStructType(StructType *StructTy); + void createStructCounterName( + StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr); + void createCacheFragAuxGV( + Module &M, const DataLayout &DL, StructType *StructTy, + GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size); + GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL, + Constant *UnitName); + Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL); + void createDestructor(Module &M, Constant *ToolInfoArg); + bool runOnFunction(Function &F, Module &M); + bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); + bool instrumentMemIntrinsic(MemIntrinsic *MI); + bool instrumentGetElementPtr(Instruction *I, Module &M); + bool insertCounterUpdate(Instruction *I, StructType *StructTy, + unsigned CounterIdx); + unsigned getFieldCounterIdx(StructType *StructTy) { + return 0; + } + unsigned getArrayCounterIdx(StructType *StructTy) { + return StructTy->getNumElements(); + } + unsigned getStructCounterSize(StructType *StructTy) { + // The struct counter array includes: + // - one counter for each struct field, + // - one counter for the struct access within an array. + return (StructTy->getNumElements()/*field*/ + 1/*array*/); + } + bool shouldIgnoreMemoryAccess(Instruction *I); + int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); + Value *appToShadow(Value *Shadow, IRBuilder<> &IRB); + bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore, + Value *Addr, unsigned Alignment); + // Each tool has its own fastpath routine: + bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL, + Value *Addr, unsigned Alignment); + bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL, + Value *Addr, unsigned Alignment); + + EfficiencySanitizerOptions Options; + LLVMContext *Ctx; + Type *IntptrTy; + // Our slowpath involves callouts to the runtime library. + // Access sizes are powers of two: 1, 2, 4, 8, 16. + static const size_t NumberOfAccessSizes = 5; + Function *EsanAlignedLoad[NumberOfAccessSizes]; + Function *EsanAlignedStore[NumberOfAccessSizes]; + Function *EsanUnalignedLoad[NumberOfAccessSizes]; + Function *EsanUnalignedStore[NumberOfAccessSizes]; + // For irregular sizes of any alignment: + Function *EsanUnalignedLoadN, *EsanUnalignedStoreN; + Function *MemmoveFn, *MemcpyFn, *MemsetFn; + Function *EsanCtorFunction; + Function *EsanDtorFunction; + // Remember the counter variable for each struct type to avoid + // recomputing the variable name later during instrumentation. + std::map<Type *, GlobalVariable *> StructTyMap; +}; +} // namespace + +char EfficiencySanitizer::ID = 0; +INITIALIZE_PASS_BEGIN( + EfficiencySanitizer, "esan", + "EfficiencySanitizer: finds performance issues.", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END( + EfficiencySanitizer, "esan", + "EfficiencySanitizer: finds performance issues.", false, false) + +const char *EfficiencySanitizer::getPassName() const { + return "EfficiencySanitizer"; +} + +void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfoWrapperPass>(); +} + +ModulePass * +llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) { + return new EfficiencySanitizer(Options); +} + +void EfficiencySanitizer::initializeCallbacks(Module &M) { + IRBuilder<> IRB(M.getContext()); + // Initialize the callbacks. + for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) { + const unsigned ByteSize = 1U << Idx; + std::string ByteSizeStr = utostr(ByteSize); + // We'll inline the most common (i.e., aligned and frequent sizes) + // load + store instrumentation: these callouts are for the slowpath. + SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr); + EsanAlignedLoad[Idx] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr); + EsanAlignedStore[Idx] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr); + EsanUnalignedLoad[Idx] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr); + EsanUnalignedStore[Idx] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + } + EsanUnalignedLoadN = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + EsanUnalignedStoreN = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + MemmoveFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + MemcpyFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + MemsetFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt32Ty(), IntptrTy, nullptr)); +} + +bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) { + if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */) + return true; + return false; +} + +void EfficiencySanitizer::createStructCounterName( + StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) { + // Append NumFields and field type ids to avoid struct conflicts + // with the same name but different fields. + if (StructTy->hasName()) + NameStr += StructTy->getName(); + else + NameStr += "struct.anon"; + // We allow the actual size of the StructCounterName to be larger than + // MaxStructCounterNameSize and append #NumFields and at least one + // field type id. + // Append #NumFields. + NameStr += "#"; + Twine(StructTy->getNumElements()).toVector(NameStr); + // Append struct field type ids in the reverse order. + for (int i = StructTy->getNumElements() - 1; i >= 0; --i) { + NameStr += "#"; + Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr); + if (NameStr.size() >= MaxStructCounterNameSize) + break; + } + if (StructTy->isLiteral()) { + // End with # for literal struct. + NameStr += "#"; + } +} + +// Create global variables with auxiliary information (e.g., struct field size, +// offset, and type name) for better user report. +void EfficiencySanitizer::createCacheFragAuxGV( + Module &M, const DataLayout &DL, StructType *StructTy, + GlobalVariable *&TypeName, GlobalVariable *&Offset, + GlobalVariable *&Size) { + auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); + auto *Int32Ty = Type::getInt32Ty(*Ctx); + // FieldTypeName. + auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements()); + TypeName = new GlobalVariable(M, TypeNameArrayTy, true, + GlobalVariable::InternalLinkage, nullptr); + SmallVector<Constant *, 16> TypeNameVec; + // FieldOffset. + auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements()); + Offset = new GlobalVariable(M, OffsetArrayTy, true, + GlobalVariable::InternalLinkage, nullptr); + SmallVector<Constant *, 16> OffsetVec; + // FieldSize + auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements()); + Size = new GlobalVariable(M, SizeArrayTy, true, + GlobalVariable::InternalLinkage, nullptr); + SmallVector<Constant *, 16> SizeVec; + for (unsigned i = 0; i < StructTy->getNumElements(); ++i) { + Type *Ty = StructTy->getElementType(i); + std::string Str; + raw_string_ostream StrOS(Str); + Ty->print(StrOS); + TypeNameVec.push_back( + ConstantExpr::getPointerCast( + createPrivateGlobalForString(M, StrOS.str(), true), + Int8PtrTy)); + OffsetVec.push_back( + ConstantInt::get(Int32Ty, + DL.getStructLayout(StructTy)->getElementOffset(i))); + SizeVec.push_back(ConstantInt::get(Int32Ty, + DL.getTypeAllocSize(Ty))); + } + TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec)); + Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec)); + Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec)); +} + +// Create the global variable for the cache-fragmentation tool. +GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( + Module &M, const DataLayout &DL, Constant *UnitName) { + assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag); + + auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); + auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo(); + auto *Int32Ty = Type::getInt32Ty(*Ctx); + auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx); + auto *Int64Ty = Type::getInt64Ty(*Ctx); + auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + // This structure should be kept consistent with the StructInfo struct + // in the runtime library. + // struct StructInfo { + // const char *StructName; + // u32 Size; + // u32 NumFields; + // u32 *FieldOffset; // auxiliary struct field info. + // u32 *FieldSize; // auxiliary struct field info. + // const char **FieldTypeName; // auxiliary struct field info. + // u64 *FieldCounters; + // u64 *ArrayCounter; + // }; + auto *StructInfoTy = + StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy, + Int8PtrPtrTy, Int64PtrTy, Int64PtrTy, nullptr); + auto *StructInfoPtrTy = StructInfoTy->getPointerTo(); + // This structure should be kept consistent with the CacheFragInfo struct + // in the runtime library. + // struct CacheFragInfo { + // const char *UnitName; + // u32 NumStructs; + // StructInfo *Structs; + // }; + auto *CacheFragInfoTy = + StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy, nullptr); + + std::vector<StructType *> Vec = M.getIdentifiedStructTypes(); + unsigned NumStructs = 0; + SmallVector<Constant *, 16> Initializers; + + for (auto &StructTy : Vec) { + if (shouldIgnoreStructType(StructTy)) { + ++NumIgnoredStructs; + continue; + } + ++NumStructs; + + // StructName. + SmallString<MaxStructCounterNameSize> CounterNameStr; + createStructCounterName(StructTy, CounterNameStr); + GlobalVariable *StructCounterName = createPrivateGlobalForString( + M, CounterNameStr, /*AllowMerging*/true); + + // Counters. + // We create the counter array with StructCounterName and weak linkage + // so that the structs with the same name and layout from different + // compilation units will be merged into one. + auto *CounterArrayTy = ArrayType::get(Int64Ty, + getStructCounterSize(StructTy)); + GlobalVariable *Counters = + new GlobalVariable(M, CounterArrayTy, false, + GlobalVariable::WeakAnyLinkage, + ConstantAggregateZero::get(CounterArrayTy), + CounterNameStr); + + // Remember the counter variable for each struct type. + StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters)); + + // We pass the field type name array, offset array, and size array to + // the runtime for better reporting. + GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr; + if (ClAuxFieldInfo) + createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size); + + Constant *FieldCounterIdx[2]; + FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0); + FieldCounterIdx[1] = ConstantInt::get(Int32Ty, + getFieldCounterIdx(StructTy)); + Constant *ArrayCounterIdx[2]; + ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0); + ArrayCounterIdx[1] = ConstantInt::get(Int32Ty, + getArrayCounterIdx(StructTy)); + Initializers.push_back( + ConstantStruct::get( + StructInfoTy, + ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy), + ConstantInt::get(Int32Ty, + DL.getStructLayout(StructTy)->getSizeInBytes()), + ConstantInt::get(Int32Ty, StructTy->getNumElements()), + Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) : + ConstantExpr::getPointerCast(Offset, Int32PtrTy), + Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) : + ConstantExpr::getPointerCast(Size, Int32PtrTy), + TypeName == nullptr ? ConstantPointerNull::get(Int8PtrPtrTy) : + ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy), + ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, + FieldCounterIdx), + ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, + ArrayCounterIdx), + nullptr)); + } + // Structs. + Constant *StructInfo; + if (NumStructs == 0) { + StructInfo = ConstantPointerNull::get(StructInfoPtrTy); + } else { + auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs); + StructInfo = ConstantExpr::getPointerCast( + new GlobalVariable(M, StructInfoArrayTy, false, + GlobalVariable::InternalLinkage, + ConstantArray::get(StructInfoArrayTy, Initializers)), + StructInfoPtrTy); + } + + auto *CacheFragInfoGV = new GlobalVariable( + M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage, + ConstantStruct::get(CacheFragInfoTy, + UnitName, + ConstantInt::get(Int32Ty, NumStructs), + StructInfo, + nullptr)); + return CacheFragInfoGV; +} + +// Create the tool-specific argument passed to EsanInit and EsanExit. +Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M, + const DataLayout &DL) { + // This structure contains tool-specific information about each compilation + // unit (module) and is passed to the runtime library. + GlobalVariable *ToolInfoGV = nullptr; + + auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); + // Compilation unit name. + auto *UnitName = ConstantExpr::getPointerCast( + createPrivateGlobalForString(M, M.getModuleIdentifier(), true), + Int8PtrTy); + + // Create the tool-specific variable. + if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) + ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName); + + if (ToolInfoGV != nullptr) + return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy); + + // Create the null pointer if no tool-specific variable created. + return ConstantPointerNull::get(Int8PtrTy); +} + +void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) { + PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx); + EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx), + false), + GlobalValue::InternalLinkage, + EsanModuleDtorName, &M); + ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction)); + IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator()); + Function *EsanExit = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(), + Int8PtrTy, nullptr)); + EsanExit->setLinkage(Function::ExternalLinkage); + IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg}); + appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority); +} + +bool EfficiencySanitizer::initOnModule(Module &M) { + Ctx = &M.getContext(); + const DataLayout &DL = M.getDataLayout(); + IRBuilder<> IRB(M.getContext()); + IntegerType *OrdTy = IRB.getInt32Ty(); + PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx); + IntptrTy = DL.getIntPtrType(M.getContext()); + // Create the variable passed to EsanInit and EsanExit. + Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL); + // Constructor + // We specify the tool type both in the EsanWhichToolName global + // and as an arg to the init routine as a sanity check. + std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( + M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy}, + /*InitArgs=*/{ + ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)), + ToolInfoArg}); + appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority); + + createDestructor(M, ToolInfoArg); + + new GlobalVariable(M, OrdTy, true, + GlobalValue::WeakAnyLinkage, + ConstantInt::get(OrdTy, + static_cast<int>(Options.ToolType)), + EsanWhichToolName); + + return true; +} + +Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) { + // Shadow = ((App & Mask) + Offs) >> Scale + Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask)); + uint64_t Offs; + int Scale = ShadowScale[Options.ToolType]; + if (Scale <= 2) + Offs = ShadowOffs[Scale]; + else + Offs = ShadowOffs[0] << Scale; + Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs)); + if (Scale > 0) + Shadow = IRB.CreateLShr(Shadow, Scale); + return Shadow; +} + +bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) { + if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { + // We'd like to know about cache fragmentation in vtable accesses and + // constant data references, so we do not currently ignore anything. + return false; + } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { + // TODO: the instrumentation disturbs the data layout on the stack, so we + // may want to add an option to ignore stack references (if we can + // distinguish them) to reduce overhead. + } + // TODO(bruening): future tools will be returning true for some cases. + return false; +} + +bool EfficiencySanitizer::runOnModule(Module &M) { + bool Res = initOnModule(M); + initializeCallbacks(M); + for (auto &F : M) { + Res |= runOnFunction(F, M); + } + return Res; +} + +bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) { + // This is required to prevent instrumenting the call to __esan_init from + // within the module constructor. + if (&F == EsanCtorFunction) + return false; + SmallVector<Instruction *, 8> LoadsAndStores; + SmallVector<Instruction *, 8> MemIntrinCalls; + SmallVector<Instruction *, 8> GetElementPtrs; + bool Res = false; + const DataLayout &DL = M.getDataLayout(); + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + + for (auto &BB : F) { + for (auto &Inst : BB) { + if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) || + isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) && + !shouldIgnoreMemoryAccess(&Inst)) + LoadsAndStores.push_back(&Inst); + else if (isa<MemIntrinsic>(Inst)) + MemIntrinCalls.push_back(&Inst); + else if (isa<GetElementPtrInst>(Inst)) + GetElementPtrs.push_back(&Inst); + else if (CallInst *CI = dyn_cast<CallInst>(&Inst)) + maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); + } + } + + if (ClInstrumentLoadsAndStores) { + for (auto Inst : LoadsAndStores) { + Res |= instrumentLoadOrStore(Inst, DL); + } + } + + if (ClInstrumentMemIntrinsics) { + for (auto Inst : MemIntrinCalls) { + Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); + } + } + + if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { + for (auto Inst : GetElementPtrs) { + Res |= instrumentGetElementPtr(Inst, M); + } + } + + return Res; +} + +bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I, + const DataLayout &DL) { + IRBuilder<> IRB(I); + bool IsStore; + Value *Addr; + unsigned Alignment; + if (LoadInst *Load = dyn_cast<LoadInst>(I)) { + IsStore = false; + Alignment = Load->getAlignment(); + Addr = Load->getPointerOperand(); + } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) { + IsStore = true; + Alignment = Store->getAlignment(); + Addr = Store->getPointerOperand(); + } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { + IsStore = true; + Alignment = 0; + Addr = RMW->getPointerOperand(); + } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) { + IsStore = true; + Alignment = 0; + Addr = Xchg->getPointerOperand(); + } else + llvm_unreachable("Unsupported mem access type"); + + Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); + const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8; + Value *OnAccessFunc = nullptr; + + // Convert 0 to the default alignment. + if (Alignment == 0) + Alignment = DL.getPrefTypeAlignment(OrigTy); + + if (IsStore) + NumInstrumentedStores++; + else + NumInstrumentedLoads++; + int Idx = getMemoryAccessFuncIndex(Addr, DL); + if (Idx < 0) { + OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN; + IRB.CreateCall(OnAccessFunc, + {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + ConstantInt::get(IntptrTy, TypeSizeBytes)}); + } else { + if (ClInstrumentFastpath && + instrumentFastpath(I, DL, IsStore, Addr, Alignment)) { + NumFastpaths++; + return true; + } + if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0) + OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx]; + else + OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx]; + IRB.CreateCall(OnAccessFunc, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); + } + return true; +} + +// It's simplest to replace the memset/memmove/memcpy intrinsics with +// calls that the runtime library intercepts. +// Our pass is late enough that calls should not turn back into intrinsics. +bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { + IRBuilder<> IRB(MI); + bool Res = false; + if (isa<MemSetInst>(MI)) { + IRB.CreateCall( + MemsetFn, + {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)}); + MI->eraseFromParent(); + Res = true; + } else if (isa<MemTransferInst>(MI)) { + IRB.CreateCall( + isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn, + {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)}); + MI->eraseFromParent(); + Res = true; + } else + llvm_unreachable("Unsupported mem intrinsic type"); + return Res; +} + +bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) { + GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I); + bool Res = false; + if (GepInst == nullptr || GepInst->getNumIndices() == 1) { + ++NumIgnoredGEPs; + return false; + } + Type *SourceTy = GepInst->getSourceElementType(); + StructType *StructTy; + ConstantInt *Idx; + // Check if GEP calculates address from a struct array. + if (isa<StructType>(SourceTy)) { + StructTy = cast<StructType>(SourceTy); + Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1)); + if ((Idx == nullptr || Idx->getSExtValue() != 0) && + !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0) + Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy)); + } + // Iterate all (except the first and the last) idx within each GEP instruction + // for possible nested struct field address calculation. + for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) { + SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(), + GepInst->idx_begin() + i); + Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec); + unsigned CounterIdx = 0; + if (isa<ArrayType>(Ty)) { + ArrayType *ArrayTy = cast<ArrayType>(Ty); + StructTy = dyn_cast<StructType>(ArrayTy->getElementType()); + if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) + continue; + // The last counter for struct array access. + CounterIdx = getArrayCounterIdx(StructTy); + } else if (isa<StructType>(Ty)) { + StructTy = cast<StructType>(Ty); + if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) + continue; + // Get the StructTy's subfield index. + Idx = cast<ConstantInt>(GepInst->getOperand(i+1)); + assert(Idx->getSExtValue() >= 0 && + Idx->getSExtValue() < StructTy->getNumElements()); + CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue(); + } + Res |= insertCounterUpdate(I, StructTy, CounterIdx); + } + if (Res) + ++NumInstrumentedGEPs; + else + ++NumIgnoredGEPs; + return Res; +} + +bool EfficiencySanitizer::insertCounterUpdate(Instruction *I, + StructType *StructTy, + unsigned CounterIdx) { + GlobalVariable *CounterArray = StructTyMap[StructTy]; + if (CounterArray == nullptr) + return false; + IRBuilder<> IRB(I); + Constant *Indices[2]; + // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and + // http://llvm.org/docs/GetElementPtr.html. + // The first index of the GEP instruction steps through the first operand, + // i.e., the array itself. + Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0); + // The second index is the index within the array. + Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx); + Constant *Counter = + ConstantExpr::getGetElementPtr( + ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)), + CounterArray, Indices); + Value *Load = IRB.CreateLoad(Counter); + IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)), + Counter); + return true; +} + +int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr, + const DataLayout &DL) { + Type *OrigPtrTy = Addr->getType(); + Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); + assert(OrigTy->isSized()); + // The size is always a multiple of 8. + uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8; + if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 && + TypeSizeBytes != 8 && TypeSizeBytes != 16) { + // Irregular sizes do not have per-size call targets. + NumAccessesWithIrregularSize++; + return -1; + } + size_t Idx = countTrailingZeros(TypeSizeBytes); + assert(Idx < NumberOfAccessSizes); + return Idx; +} + +bool EfficiencySanitizer::instrumentFastpath(Instruction *I, + const DataLayout &DL, bool IsStore, + Value *Addr, unsigned Alignment) { + if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { + return instrumentFastpathCacheFrag(I, DL, Addr, Alignment); + } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { + return instrumentFastpathWorkingSet(I, DL, Addr, Alignment); + } + return false; +} + +bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I, + const DataLayout &DL, + Value *Addr, + unsigned Alignment) { + // Do nothing. + return true; // Return true to avoid slowpath instrumentation. +} + +bool EfficiencySanitizer::instrumentFastpathWorkingSet( + Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) { + assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this + IRBuilder<> IRB(I); + Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); + const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); + // Bail to the slowpath if the access might touch multiple cache lines. + // An access aligned to its size is guaranteed to be intra-cache-line. + // getMemoryAccessFuncIndex has already ruled out a size larger than 16 + // and thus larger than a cache line for platforms this tool targets + // (and our shadow memory setup assumes 64-byte cache lines). + assert(TypeSize <= 128); + if (!(TypeSize == 8 || + (Alignment % (TypeSize / 8)) == 0)) { + if (ClAssumeIntraCacheLine) + ++NumAssumedIntraCacheLine; + else + return false; + } + + // We inline instrumentation to set the corresponding shadow bits for + // each cache line touched by the application. Here we handle a single + // load or store where we've already ruled out the possibility that it + // might touch more than one cache line and thus we simply update the + // shadow memory for a single cache line. + // Our shadow memory model is fine with races when manipulating shadow values. + // We generate the following code: + // + // const char BitMask = 0x81; + // char *ShadowAddr = appToShadow(AppAddr); + // if ((*ShadowAddr & BitMask) != BitMask) + // *ShadowAddr |= Bitmask; + // + Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy); + Value *ShadowPtr = appToShadow(AddrPtr, IRB); + Type *ShadowTy = IntegerType::get(*Ctx, 8U); + Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); + // The bottom bit is used for the current sampling period's working set. + // The top bit is used for the total working set. We set both on each + // memory access, if they are not already set. + Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B + + Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); + // The AND and CMP will be turned into a TEST instruction by the compiler. + Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask); + TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false); + // FIXME: do I need to call SetCurrentDebugLocation? + IRB.SetInsertPoint(CmpTerm); + // We use OR to set the shadow bits to avoid corrupting the middle 6 bits, + // which are used by the runtime library. + Value *NewVal = IRB.CreateOr(OldValue, ValueMask); + IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); + IRB.SetInsertPoint(I); + + return true; +} diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index ffde7f8d9bae0..b4070b6027681 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" @@ -35,6 +34,8 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/GCOVProfiler.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> #include <memory> @@ -68,86 +69,93 @@ GCOVOptions GCOVOptions::getDefault() { } namespace { - class GCOVFunction; +class GCOVFunction; + +class GCOVProfiler { +public: + GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} + GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) { + assert((Options.EmitNotes || Options.EmitData) && + "GCOVProfiler asked to do nothing?"); + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; + } + bool runOnModule(Module &M); + +private: + // Create the .gcno files for the Module based on DebugInfo. + void emitProfileNotes(); + + // Modify the program to track transitions along edges and call into the + // profiling runtime to emit .gcda files when run. + bool emitProfileArcs(); + + // Get pointers to the functions in the runtime library. + Constant *getStartFileFunc(); + Constant *getIncrementIndirectCounterFunc(); + Constant *getEmitFunctionFunc(); + Constant *getEmitArcsFunc(); + Constant *getSummaryInfoFunc(); + Constant *getEndFileFunc(); + + // Create or retrieve an i32 state value that is used to represent the + // pred block number for certain non-trivial edges. + GlobalVariable *getEdgeStateValue(); + + // Produce a table of pointers to counters, by predecessor and successor + // block number. + GlobalVariable *buildEdgeLookupTable(Function *F, GlobalVariable *Counter, + const UniqueVector<BasicBlock *> &Preds, + const UniqueVector<BasicBlock *> &Succs); + + // Add the function to write out all our counters to the global destructor + // list. + Function * + insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); + Function *insertFlush(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); + void insertIndirectCounterIncrement(); + + std::string mangleName(const DICompileUnit *CU, const char *NewStem); - class GCOVProfiler : public ModulePass { - public: - static char ID; - GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} - GCOVProfiler(const GCOVOptions &Opts) : ModulePass(ID), Options(Opts) { - assert((Options.EmitNotes || Options.EmitData) && - "GCOVProfiler asked to do nothing?"); - ReversedVersion[0] = Options.Version[3]; - ReversedVersion[1] = Options.Version[2]; - ReversedVersion[2] = Options.Version[1]; - ReversedVersion[3] = Options.Version[0]; - ReversedVersion[4] = '\0'; - initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); - } - const char *getPassName() const override { - return "GCOV Profiler"; - } + GCOVOptions Options; - private: - bool runOnModule(Module &M) override; - - // Create the .gcno files for the Module based on DebugInfo. - void emitProfileNotes(); - - // Modify the program to track transitions along edges and call into the - // profiling runtime to emit .gcda files when run. - bool emitProfileArcs(); - - // Get pointers to the functions in the runtime library. - Constant *getStartFileFunc(); - Constant *getIncrementIndirectCounterFunc(); - Constant *getEmitFunctionFunc(); - Constant *getEmitArcsFunc(); - Constant *getSummaryInfoFunc(); - Constant *getDeleteWriteoutFunctionListFunc(); - Constant *getDeleteFlushFunctionListFunc(); - Constant *getEndFileFunc(); - - // Create or retrieve an i32 state value that is used to represent the - // pred block number for certain non-trivial edges. - GlobalVariable *getEdgeStateValue(); - - // Produce a table of pointers to counters, by predecessor and successor - // block number. - GlobalVariable *buildEdgeLookupTable(Function *F, - GlobalVariable *Counter, - const UniqueVector<BasicBlock *>&Preds, - const UniqueVector<BasicBlock*>&Succs); - - // Add the function to write out all our counters to the global destructor - // list. - Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, - MDNode*> >); - Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); - void insertIndirectCounterIncrement(); - - std::string mangleName(const DICompileUnit *CU, const char *NewStem); - - GCOVOptions Options; - - // Reversed, NUL-terminated copy of Options.Version. - char ReversedVersion[5]; - // Checksum, produced by hash of EdgeDestinations - SmallVector<uint32_t, 4> FileChecksums; - - Module *M; - LLVMContext *Ctx; - SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs; - DenseMap<DISubprogram *, Function *> FnMap; - }; + // Reversed, NUL-terminated copy of Options.Version. + char ReversedVersion[5]; + // Checksum, produced by hash of EdgeDestinations + SmallVector<uint32_t, 4> FileChecksums; + + Module *M; + LLVMContext *Ctx; + SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs; +}; + +class GCOVProfilerLegacyPass : public ModulePass { +public: + static char ID; + GCOVProfilerLegacyPass() + : GCOVProfilerLegacyPass(GCOVOptions::getDefault()) {} + GCOVProfilerLegacyPass(const GCOVOptions &Opts) + : ModulePass(ID), Profiler(Opts) { + initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { return "GCOV Profiler"; } + + bool runOnModule(Module &M) override { return Profiler.runOnModule(M); } + +private: + GCOVProfiler Profiler; +}; } -char GCOVProfiler::ID = 0; -INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", +char GCOVProfilerLegacyPass::ID = 0; +INITIALIZE_PASS(GCOVProfilerLegacyPass, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { - return new GCOVProfiler(Options); + return new GCOVProfilerLegacyPass(Options); } static StringRef getFunctionName(const DISubprogram *SP) { @@ -257,10 +265,9 @@ namespace { void writeOut() { uint32_t Len = 3; SmallVector<StringMapEntry<GCOVLines *> *, 32> SortedLinesByFile; - for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(), - E = LinesByFile.end(); I != E; ++I) { - Len += I->second->length(); - SortedLinesByFile.push_back(&*I); + for (auto &I : LinesByFile) { + Len += I.second->length(); + SortedLinesByFile.push_back(&I); } writeBytes(LinesTag, 4); @@ -272,10 +279,8 @@ namespace { StringMapEntry<GCOVLines *> *RHS) { return LHS->getKey() < RHS->getKey(); }); - for (SmallVectorImpl<StringMapEntry<GCOVLines *> *>::iterator - I = SortedLinesByFile.begin(), E = SortedLinesByFile.end(); - I != E; ++I) - (*I)->getValue()->writeOut(); + for (auto &I : SortedLinesByFile) + I->getValue()->writeOut(); write(0); write(0); } @@ -450,28 +455,32 @@ bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); - FnMap.clear(); - for (Function &F : M) { - if (DISubprogram *SP = F.getSubprogram()) - FnMap[SP] = &F; - } - if (Options.EmitNotes) emitProfileNotes(); if (Options.EmitData) return emitProfileArcs(); return false; } -static bool functionHasLines(Function *F) { +PreservedAnalyses GCOVProfilerPass::run(Module &M, + AnalysisManager<Module> &AM) { + + GCOVProfiler Profiler(GCOVOpts); + + if (!Profiler.runOnModule(M)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +static bool functionHasLines(Function &F) { // Check whether this function actually has any source lines. Not only // do these waste space, they also can crash gcov. - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); - I != IE; ++I) { + for (auto &BB : F) { + for (auto &I : BB) { // Debug intrinsic locations correspond to the location of the // declaration, not necessarily any statements or expressions. - if (isa<DbgInfoIntrinsic>(I)) continue; + if (isa<DbgInfoIntrinsic>(&I)) continue; - const DebugLoc &Loc = I->getDebugLoc(); + const DebugLoc &Loc = I.getDebugLoc(); if (!Loc) continue; @@ -504,27 +513,27 @@ void GCOVProfiler::emitProfileNotes() { std::string EdgeDestinations; unsigned FunctionIdent = 0; - for (auto *SP : CU->getSubprograms()) { - Function *F = FnMap[SP]; - if (!F) continue; + for (auto &F : M->functions()) { + DISubprogram *SP = F.getSubprogram(); + if (!SP) continue; if (!functionHasLines(F)) continue; // gcov expects every function to start with an entry block that has a // single successor, so split the entry block to make sure of that. - BasicBlock &EntryBlock = F->getEntryBlock(); + BasicBlock &EntryBlock = F.getEntryBlock(); BasicBlock::iterator It = EntryBlock.begin(); while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; EntryBlock.splitBasicBlock(It); - Funcs.push_back(make_unique<GCOVFunction>(SP, F, &out, FunctionIdent++, + Funcs.push_back(make_unique<GCOVFunction>(SP, &F, &out, FunctionIdent++, Options.UseCfgChecksum, Options.ExitBlockBeforeBody)); GCOVFunction &Func = *Funcs.back(); - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func.getBlock(&*BB); - TerminatorInst *TI = BB->getTerminator(); + for (auto &BB : F) { + GCOVBlock &Block = Func.getBlock(&BB); + TerminatorInst *TI = BB.getTerminator(); if (int successors = TI->getNumSuccessors()) { for (int i = 0; i != successors; ++i) { Block.addEdge(Func.getBlock(TI->getSuccessor(i))); @@ -534,13 +543,12 @@ void GCOVProfiler::emitProfileNotes() { } uint32_t Line = 0; - for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); - I != IE; ++I) { + for (auto &I : BB) { // Debug intrinsic locations correspond to the location of the // declaration, not necessarily any statements or expressions. - if (isa<DbgInfoIntrinsic>(I)) continue; + if (isa<DbgInfoIntrinsic>(&I)) continue; - const DebugLoc &Loc = I->getDebugLoc(); + const DebugLoc &Loc = I.getDebugLoc(); if (!Loc) continue; @@ -581,16 +589,15 @@ bool GCOVProfiler::emitProfileArcs() { bool Result = false; bool InsertIndCounterIncrCode = false; for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i)); SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; - for (auto *SP : CU->getSubprograms()) { - Function *F = FnMap[SP]; - if (!F) continue; + for (auto &F : M->functions()) { + DISubprogram *SP = F.getSubprogram(); + if (!SP) continue; if (!functionHasLines(F)) continue; if (!Result) Result = true; unsigned Edges = 0; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); + for (auto &BB : F) { + TerminatorInst *TI = BB.getTerminator(); if (isa<ReturnInst>(TI)) ++Edges; else @@ -610,12 +617,12 @@ bool GCOVProfiler::emitProfileArcs() { UniqueVector<BasicBlock *> ComplexEdgeSuccs; unsigned Edge = 0; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); + for (auto &BB : F) { + TerminatorInst *TI = BB.getTerminator(); int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); if (Successors) { if (Successors == 1) { - IRBuilder<> Builder(&*BB->getFirstInsertionPt()); + IRBuilder<> Builder(&*BB.getFirstInsertionPt()); Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge); Value *Count = Builder.CreateLoad(Counter); @@ -626,16 +633,13 @@ bool GCOVProfiler::emitProfileArcs() { Value *Sel = Builder.CreateSelect(BI->getCondition(), Builder.getInt64(Edge), Builder.getInt64(Edge + 1)); - SmallVector<Value *, 2> Idx; - Idx.push_back(Builder.getInt64(0)); - Idx.push_back(Sel); - Value *Counter = Builder.CreateInBoundsGEP(Counters->getValueType(), - Counters, Idx); + Value *Counter = Builder.CreateInBoundsGEP( + Counters->getValueType(), Counters, {Builder.getInt64(0), Sel}); Value *Count = Builder.CreateLoad(Counter); Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, Counter); } else { - ComplexEdgePreds.insert(&*BB); + ComplexEdgePreds.insert(&BB); for (int i = 0; i != Successors; ++i) ComplexEdgeSuccs.insert(TI->getSuccessor(i)); } @@ -646,7 +650,7 @@ bool GCOVProfiler::emitProfileArcs() { if (!ComplexEdgePreds.empty()) { GlobalVariable *EdgeTable = - buildEdgeLookupTable(F, Counters, + buildEdgeLookupTable(&F, Counters, ComplexEdgePreds, ComplexEdgeSuccs); GlobalVariable *EdgeState = getEdgeStateValue(); @@ -679,7 +683,7 @@ bool GCOVProfiler::emitProfileArcs() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, "__llvm_gcov_init", M); - F->setUnnamedAddr(true); + F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); F->setLinkage(GlobalValue::InternalLinkage); F->addFnAttr(Attribute::NoInline); if (Options.NoRedZone) @@ -732,8 +736,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( EdgeTable[i] = NullValue; unsigned Edge = 0; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); + for (BasicBlock &BB : *F) { + TerminatorInst *TI = BB.getTerminator(); int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) { for (int i = 0; i != Successors; ++i) { @@ -742,7 +746,7 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge + i); EdgeTable[((Succs.idFor(Succ) - 1) * Preds.size()) + - (Preds.idFor(&*BB) - 1)] = cast<Constant>(Counter); + (Preds.idFor(&BB) - 1)] = cast<Constant>(Counter); } } Edge += Successors; @@ -754,7 +758,7 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( ConstantArray::get(EdgeTableTy, makeArrayRef(&EdgeTable[0],TableSize)), "__llvm_gcda_edge_table"); - EdgeTableGV->setUnnamedAddr(true); + EdgeTableGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); return EdgeTableGV; } @@ -805,16 +809,6 @@ Constant *GCOVProfiler::getSummaryInfoFunc() { return M->getOrInsertFunction("llvm_gcda_summary_info", FTy); } -Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() { - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy); -} - -Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() { - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy); -} - Constant *GCOVProfiler::getEndFileFunc() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_end_file", FTy); @@ -828,7 +822,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { ConstantInt::get(Type::getInt32Ty(*Ctx), 0xffffffff), "__llvm_gcov_global_state_pred"); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); } return GV; } @@ -840,7 +834,7 @@ Function *GCOVProfiler::insertCounterWriteout( if (!WriteoutF) WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage, "__llvm_gcov_writeout", M); - WriteoutF->setUnnamedAddr(true); + WriteoutF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); WriteoutF->addFnAttr(Attribute::NoInline); if (Options.NoRedZone) WriteoutF->addFnAttr(Attribute::NoRedZone); @@ -884,7 +878,7 @@ Function *GCOVProfiler::insertCounterWriteout( GlobalVariable *GV = CountersBySP[j].first; unsigned Arcs = - cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); + cast<ArrayType>(GV->getValueType())->getNumElements(); Builder.CreateCall(EmitArcs, {Builder.getInt32(Arcs), Builder.CreateConstGEP2_64(GV, 0, 0)}); } @@ -900,7 +894,7 @@ Function *GCOVProfiler::insertCounterWriteout( void GCOVProfiler::insertIndirectCounterIncrement() { Function *Fn = cast<Function>(GCOVProfiler::getIncrementIndirectCounterFunc()); - Fn->setUnnamedAddr(true); + Fn->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); Fn->setLinkage(GlobalValue::InternalLinkage); Fn->addFnAttr(Attribute::NoInline); if (Options.NoRedZone) @@ -957,7 +951,7 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { "__llvm_gcov_flush", M); else FlushF->setLinkage(GlobalValue::InternalLinkage); - FlushF->setUnnamedAddr(true); + FlushF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); FlushF->addFnAttr(Attribute::NoInline); if (Options.NoRedZone) FlushF->addFnAttr(Attribute::NoRedZone); @@ -972,11 +966,9 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { Builder.CreateCall(WriteoutF, {}); // Zero out the counters. - for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator - I = CountersBySP.begin(), E = CountersBySP.end(); - I != E; ++I) { - GlobalVariable *GV = I->first; - Constant *Null = Constant::getNullValue(GV->getType()->getElementType()); + for (const auto &I : CountersBySP) { + GlobalVariable *GV = I.first; + Constant *Null = Constant::getNullValue(GV->getValueType()); Builder.CreateStore(Null, GV); } diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp new file mode 100644 index 0000000000000..202b94b19c4ca --- /dev/null +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -0,0 +1,661 @@ +//===-- IndirectCallPromotion.cpp - Promote indirect calls to direct calls ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the transformation that promotes indirect calls to +// conditional direct calls when the indirect-call value profile metadata is +// available. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/IndirectCallPromotionAnalysis.h" +#include "llvm/Analysis/IndirectCallSiteVisitor.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/PGOInstrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "pgo-icall-prom" + +STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); +STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); + +// Command line option to disable indirect-call promotion with the default as +// false. This is for debug purpose. +static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden, + cl::desc("Disable indirect call promotion")); + +// Set the cutoff value for the promotion. If the value is other than 0, we +// stop the transformation once the total number of promotions equals the cutoff +// value. +// For debug use only. +static cl::opt<unsigned> + ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of promotions for this compilaiton")); + +// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. +// For debug use only. +static cl::opt<unsigned> + ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore, + cl::desc("Skip Callsite up to this number for this compilaiton")); + +// Set if the pass is called in LTO optimization. The difference for LTO mode +// is the pass won't prefix the source module name to the internal linkage +// symbols. +static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, + cl::desc("Run indirect-call promotion in LTO " + "mode")); + +// If the option is set to true, only call instructions will be considered for +// transformation -- invoke instructions will be ignored. +static cl::opt<bool> + ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, + cl::desc("Run indirect-call promotion for call instructions " + "only")); + +// If the option is set to true, only invoke instructions will be considered for +// transformation -- call instructions will be ignored. +static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false), + cl::Hidden, + cl::desc("Run indirect-call promotion for " + "invoke instruction only")); + +// Dump the function level IR if the transformation happened in this +// function. For debug use only. +static cl::opt<bool> + ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, + cl::desc("Dump IR after transformation happens")); + +namespace { +class PGOIndirectCallPromotionLegacyPass : public ModulePass { +public: + static char ID; + + PGOIndirectCallPromotionLegacyPass(bool InLTO = false) + : ModulePass(ID), InLTO(InLTO) { + initializePGOIndirectCallPromotionLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "PGOIndirectCallPromotion"; + } + +private: + bool runOnModule(Module &M) override; + + // If this pass is called in LTO. We need to special handling the PGOFuncName + // for the static variables due to LTO's internalization. + bool InLTO; +}; +} // end anonymous namespace + +char PGOIndirectCallPromotionLegacyPass::ID = 0; +INITIALIZE_PASS(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", + "Use PGO instrumentation profile to promote indirect calls to " + "direct calls.", + false, false) + +ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO) { + return new PGOIndirectCallPromotionLegacyPass(InLTO); +} + +namespace { +// The class for main data structure to promote indirect calls to conditional +// direct calls. +class ICallPromotionFunc { +private: + Function &F; + Module *M; + + // Symtab that maps indirect call profile values to function names and + // defines. + InstrProfSymtab *Symtab; + + enum TargetStatus { + OK, // Should be able to promote. + NotAvailableInModule, // Cannot find the target in current module. + ReturnTypeMismatch, // Return type mismatch b/w target and indirect-call. + NumArgsMismatch, // Number of arguments does not match. + ArgTypeMismatch // Type mismatch in the arguments (cannot bitcast). + }; + + // Test if we can legally promote this direct-call of Target. + TargetStatus isPromotionLegal(Instruction *Inst, uint64_t Target, + Function *&F); + + // A struct that records the direct target and it's call count. + struct PromotionCandidate { + Function *TargetFunction; + uint64_t Count; + PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} + }; + + // Check if the indirect-call call site should be promoted. Return the number + // of promotions. Inst is the candidate indirect call, ValueDataRef + // contains the array of value profile data for profiled targets, + // TotalCount is the total profiled count of call executions, and + // NumCandidates is the number of candidate entries in ValueDataRef. + std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( + Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef, + uint64_t TotalCount, uint32_t NumCandidates); + + // Main function that transforms Inst (either a indirect-call instruction, or + // an invoke instruction , to a conditional call to F. This is like: + // if (Inst.CalledValue == F) + // F(...); + // else + // Inst(...); + // end + // TotalCount is the profile count value that the instruction executes. + // Count is the profile count value that F is the target function. + // These two values are being used to update the branch weight. + void promote(Instruction *Inst, Function *F, uint64_t Count, + uint64_t TotalCount); + + // Promote a list of targets for one indirect-call callsite. Return + // the number of promotions. + uint32_t tryToPromote(Instruction *Inst, + const std::vector<PromotionCandidate> &Candidates, + uint64_t &TotalCount); + + static const char *StatusToString(const TargetStatus S) { + switch (S) { + case OK: + return "OK to promote"; + case NotAvailableInModule: + return "Cannot find the target"; + case ReturnTypeMismatch: + return "Return type mismatch"; + case NumArgsMismatch: + return "The number of arguments mismatch"; + case ArgTypeMismatch: + return "Argument Type mismatch"; + } + llvm_unreachable("Should not reach here"); + } + + // Noncopyable + ICallPromotionFunc(const ICallPromotionFunc &other) = delete; + ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete; + +public: + ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab) + : F(Func), M(Modu), Symtab(Symtab) { + } + bool processFunction(); +}; +} // end anonymous namespace + +ICallPromotionFunc::TargetStatus +ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target, + Function *&TargetFunction) { + Function *DirectCallee = Symtab->getFunction(Target); + if (DirectCallee == nullptr) + return NotAvailableInModule; + // Check the return type. + Type *CallRetType = Inst->getType(); + if (!CallRetType->isVoidTy()) { + Type *FuncRetType = DirectCallee->getReturnType(); + if (FuncRetType != CallRetType && + !CastInst::isBitCastable(FuncRetType, CallRetType)) + return ReturnTypeMismatch; + } + + // Check if the arguments are compatible with the parameters + FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); + unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); + CallSite CS(Inst); + unsigned ArgNum = CS.arg_size(); + + if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) + return NumArgsMismatch; + + for (unsigned I = 0; I < ParamNum; ++I) { + Type *PTy = DirectCalleeType->getFunctionParamType(I); + Type *ATy = CS.getArgument(I)->getType(); + if (PTy == ATy) + continue; + if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) + return ArgTypeMismatch; + } + + DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to " + << Symtab->getFuncName(Target) << "\n"); + TargetFunction = DirectCallee; + return OK; +} + +// Indirect-call promotion heuristic. The direct targets are sorted based on +// the count. Stop at the first target that is not promoted. +std::vector<ICallPromotionFunc::PromotionCandidate> +ICallPromotionFunc::getPromotionCandidatesForCallSite( + Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef, + uint64_t TotalCount, uint32_t NumCandidates) { + std::vector<PromotionCandidate> Ret; + + DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst + << " Num_targets: " << ValueDataRef.size() + << " Num_candidates: " << NumCandidates << "\n"); + NumOfPGOICallsites++; + if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { + DEBUG(dbgs() << " Skip: User options.\n"); + return Ret; + } + + for (uint32_t I = 0; I < NumCandidates; I++) { + uint64_t Count = ValueDataRef[I].Count; + assert(Count <= TotalCount); + uint64_t Target = ValueDataRef[I].Value; + DEBUG(dbgs() << " Candidate " << I << " Count=" << Count + << " Target_func: " << Target << "\n"); + + if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) { + DEBUG(dbgs() << " Not promote: User options.\n"); + break; + } + if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) { + DEBUG(dbgs() << " Not promote: User option.\n"); + break; + } + if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { + DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); + break; + } + Function *TargetFunction = nullptr; + TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction); + if (Status != OK) { + StringRef TargetFuncName = Symtab->getFuncName(Target); + const char *Reason = StatusToString(Status); + DEBUG(dbgs() << " Not promote: " << Reason << "\n"); + emitOptimizationRemarkMissed( + F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(), + Twine("Cannot promote indirect call to ") + + (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) + + Twine(" with count of ") + Twine(Count) + ": " + Reason); + break; + } + Ret.push_back(PromotionCandidate(TargetFunction, Count)); + TotalCount -= Count; + } + return Ret; +} + +// Create a diamond structure for If_Then_Else. Also update the profile +// count. Do the fix-up for the invoke instruction. +static void createIfThenElse(Instruction *Inst, Function *DirectCallee, + uint64_t Count, uint64_t TotalCount, + BasicBlock **DirectCallBB, + BasicBlock **IndirectCallBB, + BasicBlock **MergeBB) { + CallSite CS(Inst); + Value *OrigCallee = CS.getCalledValue(); + + IRBuilder<> BBBuilder(Inst); + LLVMContext &Ctx = Inst->getContext(); + Value *BCI1 = + BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), ""); + Value *BCI2 = + BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), ""); + Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, ""); + + uint64_t ElseCount = TotalCount - Count; + uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); + uint64_t Scale = calculateCountScale(MaxCount); + MDBuilder MDB(Inst->getContext()); + MDNode *BranchWeights = MDB.createBranchWeights( + scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); + TerminatorInst *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm, + BranchWeights); + *DirectCallBB = ThenTerm->getParent(); + (*DirectCallBB)->setName("if.true.direct_targ"); + *IndirectCallBB = ElseTerm->getParent(); + (*IndirectCallBB)->setName("if.false.orig_indirect"); + *MergeBB = Inst->getParent(); + (*MergeBB)->setName("if.end.icp"); + + // Special handing of Invoke instructions. + InvokeInst *II = dyn_cast<InvokeInst>(Inst); + if (!II) + return; + + // We don't need branch instructions for invoke. + ThenTerm->eraseFromParent(); + ElseTerm->eraseFromParent(); + + // Add jump from Merge BB to the NormalDest. This is needed for the newly + // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB. + BranchInst::Create(II->getNormalDest(), *MergeBB); +} + +// Find the PHI in BB that have the CallResult as the operand. +static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) { + BasicBlock *From = Inst->getParent(); + for (auto &I : *BB) { + PHINode *PHI = dyn_cast<PHINode>(&I); + if (!PHI) + continue; + int IX = PHI->getBasicBlockIndex(From); + if (IX == -1) + continue; + Value *V = PHI->getIncomingValue(IX); + if (dyn_cast<Instruction>(V) == Inst) + return true; + } + return false; +} + +// This method fixes up PHI nodes in BB where BB is the UnwindDest of an +// invoke instruction. In BB, there may be PHIs with incoming block being +// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke +// instructions to its own BB, OrigBB is no longer the predecessor block of BB. +// Instead two new predecessors are added: IndirectCallBB and DirectCallBB, +// so the PHI node's incoming BBs need to be fixed up accordingly. +static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB, + BasicBlock *OrigBB, + BasicBlock *IndirectCallBB, + BasicBlock *DirectCallBB) { + for (auto &I : *BB) { + PHINode *PHI = dyn_cast<PHINode>(&I); + if (!PHI) + continue; + int IX = PHI->getBasicBlockIndex(OrigBB); + if (IX == -1) + continue; + Value *V = PHI->getIncomingValue(IX); + PHI->addIncoming(V, IndirectCallBB); + PHI->setIncomingBlock(IX, DirectCallBB); + } +} + +// This method fixes up PHI nodes in BB where BB is the NormalDest of an +// invoke instruction. In BB, there may be PHIs with incoming block being +// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke +// instructions to its own BB, a new incoming edge will be added to the original +// NormalDstBB from the IndirectCallBB. +static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB, + BasicBlock *OrigBB, + BasicBlock *IndirectCallBB, + Instruction *NewInst) { + for (auto &I : *BB) { + PHINode *PHI = dyn_cast<PHINode>(&I); + if (!PHI) + continue; + int IX = PHI->getBasicBlockIndex(OrigBB); + if (IX == -1) + continue; + Value *V = PHI->getIncomingValue(IX); + if (dyn_cast<Instruction>(V) == Inst) { + PHI->setIncomingBlock(IX, IndirectCallBB); + PHI->addIncoming(NewInst, OrigBB); + continue; + } + PHI->addIncoming(V, IndirectCallBB); + } +} + +// Add a bitcast instruction to the direct-call return value if needed. +static Instruction *insertCallRetCast(const Instruction *Inst, + Instruction *DirectCallInst, + Function *DirectCallee) { + if (Inst->getType()->isVoidTy()) + return DirectCallInst; + + Type *CallRetType = Inst->getType(); + Type *FuncRetType = DirectCallee->getReturnType(); + if (FuncRetType == CallRetType) + return DirectCallInst; + + BasicBlock *InsertionBB; + if (CallInst *CI = dyn_cast<CallInst>(DirectCallInst)) + InsertionBB = CI->getParent(); + else + InsertionBB = (dyn_cast<InvokeInst>(DirectCallInst))->getNormalDest(); + + return (new BitCastInst(DirectCallInst, CallRetType, "", + InsertionBB->getTerminator())); +} + +// Create a DirectCall instruction in the DirectCallBB. +// Parameter Inst is the indirect-call (invoke) instruction. +// DirectCallee is the decl of the direct-call (invoke) target. +// DirecallBB is the BB that the direct-call (invoke) instruction is inserted. +// MergeBB is the bottom BB of the if-then-else-diamond after the +// transformation. For invoke instruction, the edges from DirectCallBB and +// IndirectCallBB to MergeBB are removed before this call (during +// createIfThenElse). +static Instruction *createDirectCallInst(const Instruction *Inst, + Function *DirectCallee, + BasicBlock *DirectCallBB, + BasicBlock *MergeBB) { + Instruction *NewInst = Inst->clone(); + if (CallInst *CI = dyn_cast<CallInst>(NewInst)) { + CI->setCalledFunction(DirectCallee); + CI->mutateFunctionType(DirectCallee->getFunctionType()); + } else { + // Must be an invoke instruction. Direct invoke's normal destination is + // fixed up to MergeBB. MergeBB is the place where return cast is inserted. + // Also since IndirectCallBB does not have an edge to MergeBB, there is no + // need to insert new PHIs into MergeBB. + InvokeInst *II = dyn_cast<InvokeInst>(NewInst); + assert(II); + II->setCalledFunction(DirectCallee); + II->mutateFunctionType(DirectCallee->getFunctionType()); + II->setNormalDest(MergeBB); + } + + DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(), + NewInst); + + // Clear the value profile data. + NewInst->setMetadata(LLVMContext::MD_prof, 0); + CallSite NewCS(NewInst); + FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); + unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); + for (unsigned I = 0; I < ParamNum; ++I) { + Type *ATy = NewCS.getArgument(I)->getType(); + Type *PTy = DirectCalleeType->getParamType(I); + if (ATy != PTy) { + BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst); + NewCS.setArgument(I, BI); + } + } + + return insertCallRetCast(Inst, NewInst, DirectCallee); +} + +// Create a PHI to unify the return values of calls. +static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult, + Function *DirectCallee) { + if (Inst->getType()->isVoidTy()) + return; + + BasicBlock *RetValBB = CallResult->getParent(); + + BasicBlock *PHIBB; + if (InvokeInst *II = dyn_cast<InvokeInst>(CallResult)) + RetValBB = II->getNormalDest(); + + PHIBB = RetValBB->getSingleSuccessor(); + if (getCallRetPHINode(PHIBB, Inst)) + return; + + PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0); + PHIBB->getInstList().push_front(CallRetPHI); + Inst->replaceAllUsesWith(CallRetPHI); + CallRetPHI->addIncoming(Inst, Inst->getParent()); + CallRetPHI->addIncoming(CallResult, RetValBB); +} + +// This function does the actual indirect-call promotion transformation: +// For an indirect-call like: +// Ret = (*Foo)(Args); +// It transforms to: +// if (Foo == DirectCallee) +// Ret1 = DirectCallee(Args); +// else +// Ret2 = (*Foo)(Args); +// Ret = phi(Ret1, Ret2); +// It adds type casts for the args do not match the parameters and the return +// value. Branch weights metadata also updated. +void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee, + uint64_t Count, uint64_t TotalCount) { + assert(DirectCallee != nullptr); + BasicBlock *BB = Inst->getParent(); + // Just to suppress the non-debug build warning. + (void)BB; + DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); + DEBUG(dbgs() << *BB << "\n"); + + BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB; + createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB, + &IndirectCallBB, &MergeBB); + + Instruction *NewInst = + createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB); + + // Move Inst from MergeBB to IndirectCallBB. + Inst->removeFromParent(); + IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(), + Inst); + + if (InvokeInst *II = dyn_cast<InvokeInst>(Inst)) { + // At this point, the original indirect invoke instruction has the original + // UnwindDest and NormalDest. For the direct invoke instruction, the + // NormalDest points to MergeBB, and MergeBB jumps to the original + // NormalDest. MergeBB might have a new bitcast instruction for the return + // value. The PHIs are with the original NormalDest. Since we now have two + // incoming edges to NormalDest and UnwindDest, we have to do some fixups. + // + // UnwindDest will not use the return value. So pass nullptr here. + fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB, + DirectCallBB); + // We don't need to update the operand from NormalDest for DirectCallBB. + // Pass nullptr here. + fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB, + IndirectCallBB, NewInst); + } + + insertCallRetPHI(Inst, NewInst, DirectCallee); + + DEBUG(dbgs() << "\n== Basic Blocks After ==\n"); + DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); + + emitOptimizationRemark( + F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(), + Twine("Promote indirect call to ") + DirectCallee->getName() + + " with count " + Twine(Count) + " out of " + Twine(TotalCount)); +} + +// Promote indirect-call to conditional direct-call for one callsite. +uint32_t ICallPromotionFunc::tryToPromote( + Instruction *Inst, const std::vector<PromotionCandidate> &Candidates, + uint64_t &TotalCount) { + uint32_t NumPromoted = 0; + + for (auto &C : Candidates) { + uint64_t Count = C.Count; + promote(Inst, C.TargetFunction, Count, TotalCount); + assert(TotalCount >= Count); + TotalCount -= Count; + NumOfPGOICallPromotion++; + NumPromoted++; + } + return NumPromoted; +} + +// Traverse all the indirect-call callsite and get the value profile +// annotation to perform indirect-call promotion. +bool ICallPromotionFunc::processFunction() { + bool Changed = false; + ICallPromotionAnalysis ICallAnalysis; + for (auto &I : findIndirectCallSites(F)) { + uint32_t NumVals, NumCandidates; + uint64_t TotalCount; + auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( + I, NumVals, TotalCount, NumCandidates); + if (!NumCandidates) + continue; + auto PromotionCandidates = getPromotionCandidatesForCallSite( + I, ICallProfDataRef, TotalCount, NumCandidates); + uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount); + if (NumPromoted == 0) + continue; + + Changed = true; + // Adjust the MD.prof metadata. First delete the old one. + I->setMetadata(LLVMContext::MD_prof, 0); + // If all promoted, we don't need the MD.prof metadata. + if (TotalCount == 0 || NumPromoted == NumVals) + continue; + // Otherwise we need update with the un-promoted records back. + annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount, + IPVK_IndirectCallTarget, NumCandidates); + } + return Changed; +} + +// A wrapper function that does the actual work. +static bool promoteIndirectCalls(Module &M, bool InLTO) { + if (DisableICP) + return false; + InstrProfSymtab Symtab; + Symtab.create(M, InLTO); + bool Changed = false; + for (auto &F : M) { + if (F.isDeclaration()) + continue; + if (F.hasFnAttribute(Attribute::OptimizeNone)) + continue; + ICallPromotionFunc ICallPromotion(F, &M, &Symtab); + bool FuncChanged = ICallPromotion.processFunction(); + if (ICPDUMPAFTER && FuncChanged) { + DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); + DEBUG(dbgs() << "\n"); + } + Changed |= FuncChanged; + if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { + DEBUG(dbgs() << " Stop: Cutoff reached.\n"); + break; + } + } + return Changed; +} + +bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) { + // Command-line option has the priority for InLTO. + return promoteIndirectCalls(M, InLTO | ICPLTOMode); +} + +PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, AnalysisManager<Module> &AM) { + if (!promoteIndirectCalls(M, InLTO | ICPLTOMode)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 28483e7e9b692..b11c6be696f3e 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -13,12 +13,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/InstrProfiling.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -27,121 +27,112 @@ using namespace llvm; namespace { -class InstrProfiling : public ModulePass { +cl::opt<bool> DoNameCompression("enable-name-compression", + cl::desc("Enable name string compression"), + cl::init(true)); + +cl::opt<bool> ValueProfileStaticAlloc( + "vp-static-alloc", + cl::desc("Do static counter allocation for value profiler"), + cl::init(true)); +cl::opt<double> NumCountersPerValueSite( + "vp-counters-per-site", + cl::desc("The average number of profile counters allocated " + "per value profiling site."), + // This is set to a very small value because in real programs, only + // a very small percentage of value sites have non-zero targets, e.g, 1/30. + // For those sites with non-zero profile, the average number of targets + // is usually smaller than 2. + cl::init(1.0)); + +class InstrProfilingLegacyPass : public ModulePass { + InstrProfiling InstrProf; + public: static char ID; - - InstrProfiling() : ModulePass(ID) {} - - InstrProfiling(const InstrProfOptions &Options) - : ModulePass(ID), Options(Options) {} - + InstrProfilingLegacyPass() : ModulePass(ID), InstrProf() {} + InstrProfilingLegacyPass(const InstrProfOptions &Options) + : ModulePass(ID), InstrProf(Options) {} const char *getPassName() const override { return "Frontend instrumentation-based coverage lowering"; } - bool runOnModule(Module &M) override; + bool runOnModule(Module &M) override { return InstrProf.run(M); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); } +}; -private: - InstrProfOptions Options; - Module *M; - typedef struct PerFunctionProfileData { - uint32_t NumValueSites[IPVK_Last+1]; - GlobalVariable* RegionCounters; - GlobalVariable* DataVar; - PerFunctionProfileData() : RegionCounters(nullptr), DataVar(nullptr) { - memset(NumValueSites, 0, sizeof(uint32_t) * (IPVK_Last+1)); - } - } PerFunctionProfileData; - DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap; - std::vector<Value *> UsedVars; - - bool isMachO() const { - return Triple(M->getTargetTriple()).isOSBinFormatMachO(); - } - - /// Get the section name for the counter variables. - StringRef getCountersSection() const { - return getInstrProfCountersSectionName(isMachO()); - } - - /// Get the section name for the name variables. - StringRef getNameSection() const { - return getInstrProfNameSectionName(isMachO()); - } - - /// Get the section name for the profile data variables. - StringRef getDataSection() const { - return getInstrProfDataSectionName(isMachO()); - } - - /// Get the section name for the coverage mapping data. - StringRef getCoverageSection() const { - return getInstrProfCoverageSectionName(isMachO()); - } - - /// Count the number of instrumented value sites for the function. - void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); - - /// Replace instrprof_value_profile with a call to runtime library. - void lowerValueProfileInst(InstrProfValueProfileInst *Ins); - - /// Replace instrprof_increment with an increment of the appropriate value. - void lowerIncrement(InstrProfIncrementInst *Inc); +} // anonymous namespace - /// Force emitting of name vars for unused functions. - void lowerCoverageData(GlobalVariable *CoverageNamesVar); +PreservedAnalyses InstrProfiling::run(Module &M, AnalysisManager<Module> &AM) { + if (!run(M)) + return PreservedAnalyses::all(); - /// Get the region counters for an increment, creating them if necessary. - /// - /// If the counter array doesn't yet exist, the profile data variables - /// referring to them will also be created. - GlobalVariable *getOrCreateRegionCounters(InstrProfIncrementInst *Inc); + return PreservedAnalyses::none(); +} - /// Emit runtime registration functions for each profile data variable. - void emitRegistration(); +char InstrProfilingLegacyPass::ID = 0; +INITIALIZE_PASS(InstrProfilingLegacyPass, "instrprof", + "Frontend instrumentation-based coverage lowering.", false, + false) - /// Emit the necessary plumbing to pull in the runtime initialization. - void emitRuntimeHook(); +ModulePass * +llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) { + return new InstrProfilingLegacyPass(Options); +} - /// Add uses of our data variables and runtime hook. - void emitUses(); +bool InstrProfiling::isMachO() const { + return Triple(M->getTargetTriple()).isOSBinFormatMachO(); +} - /// Create a static initializer for our data, on platforms that need it, - /// and for any profile output file that was specified. - void emitInitialization(); -}; +/// Get the section name for the counter variables. +StringRef InstrProfiling::getCountersSection() const { + return getInstrProfCountersSectionName(isMachO()); +} -} // anonymous namespace +/// Get the section name for the name variables. +StringRef InstrProfiling::getNameSection() const { + return getInstrProfNameSectionName(isMachO()); +} -char InstrProfiling::ID = 0; -INITIALIZE_PASS(InstrProfiling, "instrprof", - "Frontend instrumentation-based coverage lowering.", false, - false) +/// Get the section name for the profile data variables. +StringRef InstrProfiling::getDataSection() const { + return getInstrProfDataSectionName(isMachO()); +} -ModulePass *llvm::createInstrProfilingPass(const InstrProfOptions &Options) { - return new InstrProfiling(Options); +/// Get the section name for the coverage mapping data. +StringRef InstrProfiling::getCoverageSection() const { + return getInstrProfCoverageSectionName(isMachO()); } -bool InstrProfiling::runOnModule(Module &M) { +bool InstrProfiling::run(Module &M) { bool MadeChange = false; this->M = &M; + NamesVar = nullptr; + NamesSize = 0; ProfileDataMap.clear(); UsedVars.clear(); // We did not know how many value sites there would be inside // the instrumented function. This is counting the number of instrumented // target value sites to enter it as field in the profile data variable. - for (Function &F : M) + for (Function &F : M) { + InstrProfIncrementInst *FirstProfIncInst = nullptr; for (BasicBlock &BB : F) - for (auto I = BB.begin(), E = BB.end(); I != E;) - if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I++)) + for (auto I = BB.begin(), E = BB.end(); I != E; I++) + if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I)) computeNumValueSiteCounts(Ind); + else if (FirstProfIncInst == nullptr) + FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I); + + // Value profiling intrinsic lowering requires per-function profile data + // variable to be created first. + if (FirstProfIncInst != nullptr) + static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst)); + } for (Function &F : M) for (BasicBlock &BB : F) @@ -157,7 +148,7 @@ bool InstrProfiling::runOnModule(Module &M) { } if (GlobalVariable *CoverageNamesVar = - M.getNamedGlobal(getCoverageNamesVarName())) { + M.getNamedGlobal(getCoverageUnusedNamesVarName())) { lowerCoverageData(CoverageNamesVar); MadeChange = true; } @@ -165,6 +156,8 @@ bool InstrProfiling::runOnModule(Module &M) { if (!MadeChange) return false; + emitVNodes(); + emitNameData(); emitRegistration(); emitRuntimeHook(); emitUses(); @@ -204,7 +197,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { GlobalVariable *Name = Ind->getName(); auto It = ProfileDataMap.find(Name); assert(It != ProfileDataMap.end() && It->second.DataVar && - "value profiling detected in function with no counter incerement"); + "value profiling detected in function with no counter incerement"); GlobalVariable *DataVar = It->second.DataVar; uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); @@ -213,9 +206,9 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Index += It->second.NumValueSites[Kind]; IRBuilder<> Builder(Ind); - Value* Args[3] = {Ind->getTargetValue(), - Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), - Builder.getInt32(Index)}; + Value *Args[3] = {Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index)}; Ind->replaceAllUsesWith( Builder.CreateCall(getOrInsertValueProfilingCall(*M), Args)); Ind->eraseFromParent(); @@ -243,9 +236,8 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { assert(isa<GlobalVariable>(V) && "Missing reference to function name"); GlobalVariable *Name = cast<GlobalVariable>(V); - // Move the name variable to the right section. - Name->setSection(getNameSection()); - Name->setAlignment(1); + Name->setLinkage(GlobalValue::PrivateLinkage); + ReferencedNames.push_back(Name); } } @@ -261,22 +253,77 @@ static inline bool shouldRecordFunctionAddr(Function *F) { if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && !F->hasAvailableExternallyLinkage()) return true; + // Prohibit function address recording if the function is both internal and + // COMDAT. This avoids the profile data variable referencing internal symbols + // in COMDAT. + if (F->hasLocalLinkage() && F->hasComdat()) + return false; // Check uses of this function for other than direct calls or invokes to it. - return F->hasAddressTaken(); + // Inline virtual functions have linkeOnceODR linkage. When a key method + // exists, the vtable will only be emitted in the TU where the key method + // is defined. In a TU where vtable is not available, the function won't + // be 'addresstaken'. If its address is not recorded here, the profile data + // with missing address may be picked by the linker leading to missing + // indirect call target info. + return F->hasAddressTaken() || F->hasLinkOnceLinkage(); +} + +static inline bool needsComdatForCounter(Function &F, Module &M) { + + if (F.hasComdat()) + return true; + + Triple TT(M.getTargetTriple()); + if (!TT.isOSBinFormatELF()) + return false; + + // See createPGOFuncNameVar for more details. To avoid link errors, profile + // counters for function with available_externally linkage needs to be changed + // to linkonce linkage. On ELF based systems, this leads to weak symbols to be + // created. Without using comdat, duplicate entries won't be removed by the + // linker leading to increased data segement size and raw profile size. Even + // worse, since the referenced counter from profile per-function data object + // will be resolved to the common strong definition, the profile counts for + // available_externally functions will end up being duplicated in raw profile + // data. This can result in distorted profile as the counts of those dups + // will be accumulated by the profile merger. + GlobalValue::LinkageTypes Linkage = F.getLinkage(); + if (Linkage != GlobalValue::ExternalWeakLinkage && + Linkage != GlobalValue::AvailableExternallyLinkage) + return false; + + return true; } -static inline Comdat *getOrCreateProfileComdat(Module &M, +static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F, InstrProfIncrementInst *Inc) { + if (!needsComdatForCounter(F, M)) + return nullptr; + // COFF format requires a COMDAT section to have a key symbol with the same - // name. The linker targeting COFF also requires that the COMDAT section + // name. The linker targeting COFF also requires that the COMDAT // a section is associated to must precede the associating section. For this - // reason, we must choose the name var's name as the name of the comdat. + // reason, we must choose the counter var's name as the name of the comdat. StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF() - ? getInstrProfNameVarPrefix() + ? getInstrProfCountersVarPrefix() : getInstrProfComdatPrefix()); return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix))); } +static bool needsRuntimeRegistrationOfSectionRange(const Module &M) { + // Don't do this for Darwin. compiler-rt uses linker magic. + if (Triple(M.getTargetTriple()).isOSDarwin()) + return false; + + // Use linker script magic to get data/cnts/name start/end. + if (Triple(M.getTargetTriple()).isOSLinux() || + Triple(M.getTargetTriple()).isOSFreeBSD() || + Triple(M.getTargetTriple()).isPS4CPU()) + return false; + + return true; +} + GlobalVariable * InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { GlobalVariable *NamePtr = Inc->getName(); @@ -294,11 +341,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { // linking. Function *Fn = Inc->getParent()->getParent(); Comdat *ProfileVarsComdat = nullptr; - if (Fn->hasComdat()) - ProfileVarsComdat = getOrCreateProfileComdat(*M, Inc); - NamePtr->setSection(getNameSection()); - NamePtr->setAlignment(1); - NamePtr->setComdat(ProfileVarsComdat); + ProfileVarsComdat = getOrCreateProfileComdat(*M, *Fn, Inc); uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); LLVMContext &Ctx = M->getContext(); @@ -314,27 +357,51 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { CounterPtr->setAlignment(8); CounterPtr->setComdat(ProfileVarsComdat); - // Create data variable. auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); + // Allocate statically the array of pointers to value profile nodes for + // the current function. + Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy); + if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) { + + uint64_t NS = 0; + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + NS += PD.NumValueSites[Kind]; + if (NS) { + ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS); + + auto *ValuesVar = + new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(), + Constant::getNullValue(ValuesTy), + getVarName(Inc, getInstrProfValuesVarPrefix())); + ValuesVar->setVisibility(NamePtr->getVisibility()); + ValuesVar->setSection(getInstrProfValuesSectionName(isMachO())); + ValuesVar->setAlignment(8); + ValuesVar->setComdat(ProfileVarsComdat); + ValuesPtrExpr = + ConstantExpr::getBitCast(ValuesVar, llvm::Type::getInt8PtrTy(Ctx)); + } + } + + // Create data variable. auto *Int16Ty = Type::getInt16Ty(Ctx); - auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last+1); + auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1); Type *DataTypes[] = { - #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType, - #include "llvm/ProfileData/InstrProfData.inc" +#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType, +#include "llvm/ProfileData/InstrProfData.inc" }; auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes)); - Constant *FunctionAddr = shouldRecordFunctionAddr(Fn) ? - ConstantExpr::getBitCast(Fn, Int8PtrTy) : - ConstantPointerNull::get(Int8PtrTy); + Constant *FunctionAddr = shouldRecordFunctionAddr(Fn) + ? ConstantExpr::getBitCast(Fn, Int8PtrTy) + : ConstantPointerNull::get(Int8PtrTy); - Constant *Int16ArrayVals[IPVK_Last+1]; + Constant *Int16ArrayVals[IPVK_Last + 1]; for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]); Constant *DataVals[] = { - #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, - #include "llvm/ProfileData/InstrProfData.inc" +#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, +#include "llvm/ProfileData/InstrProfData.inc" }; auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(), ConstantStruct::get(DataTy, DataVals), @@ -350,28 +417,99 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { // Mark the data variable as used so that it isn't stripped out. UsedVars.push_back(Data); + // Now that the linkage set by the FE has been passed to the data and counter + // variables, reset Name variable's linkage and visibility to private so that + // it can be removed later by the compiler. + NamePtr->setLinkage(GlobalValue::PrivateLinkage); + // Collect the referenced names to be used by emitNameData. + ReferencedNames.push_back(NamePtr); return CounterPtr; } -void InstrProfiling::emitRegistration() { - // Don't do this for Darwin. compiler-rt uses linker magic. - if (Triple(M->getTargetTriple()).isOSDarwin()) +void InstrProfiling::emitVNodes() { + if (!ValueProfileStaticAlloc) return; - // Use linker script magic to get data/cnts/name start/end. - if (Triple(M->getTargetTriple()).isOSLinux() || - Triple(M->getTargetTriple()).isOSFreeBSD()) + // For now only support this on platforms that do + // not require runtime registration to discover + // named section start/end. + if (needsRuntimeRegistrationOfSectionRange(*M)) + return; + + size_t TotalNS = 0; + for (auto &PD : ProfileDataMap) { + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + TotalNS += PD.second.NumValueSites[Kind]; + } + + if (!TotalNS) + return; + + uint64_t NumCounters = TotalNS * NumCountersPerValueSite; +// Heuristic for small programs with very few total value sites. +// The default value of vp-counters-per-site is chosen based on +// the observation that large apps usually have a low percentage +// of value sites that actually have any profile data, and thus +// the average number of counters per site is low. For small +// apps with very few sites, this may not be true. Bump up the +// number of counters in this case. +#define INSTR_PROF_MIN_VAL_COUNTS 10 + if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS) + NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2); + + auto &Ctx = M->getContext(); + Type *VNodeTypes[] = { +#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType, +#include "llvm/ProfileData/InstrProfData.inc" + }; + auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes)); + + ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters); + auto *VNodesVar = new GlobalVariable( + *M, VNodesTy, false, llvm::GlobalValue::PrivateLinkage, + Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName()); + VNodesVar->setSection(getInstrProfVNodesSectionName(isMachO())); + UsedVars.push_back(VNodesVar); +} + +void InstrProfiling::emitNameData() { + std::string UncompressedData; + + if (ReferencedNames.empty()) + return; + + std::string CompressedNameStr; + if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr, + DoNameCompression)) { + llvm::report_fatal_error(toString(std::move(E)), false); + } + + auto &Ctx = M->getContext(); + auto *NamesVal = llvm::ConstantDataArray::getString( + Ctx, StringRef(CompressedNameStr), false); + NamesVar = new llvm::GlobalVariable(*M, NamesVal->getType(), true, + llvm::GlobalValue::PrivateLinkage, + NamesVal, getInstrProfNamesVarName()); + NamesSize = CompressedNameStr.size(); + NamesVar->setSection(getNameSection()); + UsedVars.push_back(NamesVar); +} + +void InstrProfiling::emitRegistration() { + if (!needsRuntimeRegistrationOfSectionRange(*M)) return; // Construct the function. auto *VoidTy = Type::getVoidTy(M->getContext()); auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext()); + auto *Int64Ty = Type::getInt64Ty(M->getContext()); auto *RegisterFTy = FunctionType::get(VoidTy, false); auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage, getInstrProfRegFuncsName(), M); - RegisterF->setUnnamedAddr(true); - if (Options.NoRedZone) RegisterF->addFnAttr(Attribute::NoRedZone); + RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + if (Options.NoRedZone) + RegisterF->addFnAttr(Attribute::NoRedZone); auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false); auto *RuntimeRegisterF = @@ -380,7 +518,20 @@ void InstrProfiling::emitRegistration() { IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF)); for (Value *Data : UsedVars) - IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy)); + if (Data != NamesVar) + IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy)); + + if (NamesVar) { + Type *ParamTypes[] = {VoidPtrTy, Int64Ty}; + auto *NamesRegisterTy = + FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false); + auto *NamesRegisterF = + Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage, + getInstrProfNamesRegFuncName(), M); + IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy), + IRB.getInt64(NamesSize)}); + } + IRB.CreateRetVoid(); } @@ -392,7 +543,8 @@ void InstrProfiling::emitRuntimeHook() { return; // If the module's provided its own runtime, we don't need to do anything. - if (M->getGlobalVariable(getInstrProfRuntimeHookVarName())) return; + if (M->getGlobalVariable(getInstrProfRuntimeHookVarName())) + return; // Declare an external variable that will pull in the runtime initialization. auto *Int32Ty = Type::getInt32Ty(M->getContext()); @@ -405,8 +557,11 @@ void InstrProfiling::emitRuntimeHook() { GlobalValue::LinkOnceODRLinkage, getInstrProfRuntimeHookVarUseFuncName(), M); User->addFnAttr(Attribute::NoInline); - if (Options.NoRedZone) User->addFnAttr(Attribute::NoRedZone); + if (Options.NoRedZone) + User->addFnAttr(Attribute::NoRedZone); User->setVisibility(GlobalValue::HiddenVisibility); + if (Triple(M->getTargetTriple()).supportsCOMDAT()) + User->setComdat(M->getOrInsertComdat(User->getName())); IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User)); auto *Load = IRB.CreateLoad(Var); @@ -448,16 +603,18 @@ void InstrProfiling::emitInitialization() { std::string InstrProfileOutput = Options.InstrProfileOutput; Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName()); - if (!RegisterF && InstrProfileOutput.empty()) return; + if (!RegisterF && InstrProfileOutput.empty()) + return; // Create the initialization function. auto *VoidTy = Type::getVoidTy(M->getContext()); auto *F = Function::Create(FunctionType::get(VoidTy, false), GlobalValue::InternalLinkage, getInstrProfInitFuncName(), M); - F->setUnnamedAddr(true); + F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); F->addFnAttr(Attribute::NoInline); - if (Options.NoRedZone) F->addFnAttr(Attribute::NoRedZone); + if (Options.NoRedZone) + F->addFnAttr(Attribute::NoRedZone); // Add the basic block and the necessary calls. IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F)); diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index a05a5fa09f9ad..2963d08752c46 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -59,15 +59,16 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeAddressSanitizerPass(Registry); initializeAddressSanitizerModulePass(Registry); initializeBoundsCheckingPass(Registry); - initializeGCOVProfilerPass(Registry); - initializePGOInstrumentationGenPass(Registry); - initializePGOInstrumentationUsePass(Registry); - initializeInstrProfilingPass(Registry); + initializeGCOVProfilerLegacyPassPass(Registry); + initializePGOInstrumentationGenLegacyPassPass(Registry); + initializePGOInstrumentationUseLegacyPassPass(Registry); + initializePGOIndirectCallPromotionLegacyPassPass(Registry); + initializeInstrProfilingLegacyPassPass(Registry); initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); initializeSanitizerCoverageModulePass(Registry); initializeDataFlowSanitizerPass(Registry); - initializeSafeStackPass(Registry); + initializeEfficiencySanitizerPass(Registry); } /// LLVMInitializeInstrumentation - C binding for diff --git a/lib/Transforms/Instrumentation/Makefile b/lib/Transforms/Instrumentation/Makefile deleted file mode 100644 index 6cbc7a9cd88af..0000000000000 --- a/lib/Transforms/Instrumentation/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Transforms/Instrumentation/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMInstrumentation -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 34aaa7f27d6ef..970f9ab86e827 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -91,7 +91,6 @@ //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -109,9 +108,9 @@ #include "llvm/IR/Type.h" #include "llvm/IR/ValueMap.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -191,6 +190,12 @@ static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow", cl::desc("Insert checks for constant shadow values"), cl::Hidden, cl::init(false)); +// This is off by default because of a bug in gold: +// https://sourceware.org/bugzilla/show_bug.cgi?id=19002 +static cl::opt<bool> ClWithComdat("msan-with-comdat", + cl::desc("Place MSan constructors in comdat sections"), + cl::Hidden, cl::init(false)); + static const char *const kMsanModuleCtorName = "msan.module_ctor"; static const char *const kMsanInitName = "__msan_init"; @@ -312,6 +317,9 @@ class MemorySanitizer : public FunctionPass { TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)), WarningFn(nullptr) {} const char *getPassName() const override { return "MemorySanitizer"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + } bool runOnFunction(Function &F) override; bool doInitialization(Module &M) override; static char ID; // Pass identification, replacement for typeid. @@ -374,13 +382,18 @@ class MemorySanitizer : public FunctionPass { friend struct VarArgAMD64Helper; friend struct VarArgMIPS64Helper; friend struct VarArgAArch64Helper; + friend struct VarArgPowerPC64Helper; }; } // anonymous namespace char MemorySanitizer::ID = 0; -INITIALIZE_PASS(MemorySanitizer, "msan", - "MemorySanitizer: detects uninitialized reads.", - false, false) +INITIALIZE_PASS_BEGIN( + MemorySanitizer, "msan", + "MemorySanitizer: detects uninitialized reads.", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END( + MemorySanitizer, "msan", + "MemorySanitizer: detects uninitialized reads.", false, false) FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins) { return new MemorySanitizer(TrackOrigins); @@ -540,8 +553,14 @@ bool MemorySanitizer::doInitialization(Module &M) { createSanitizerCtorAndInitFunctions(M, kMsanModuleCtorName, kMsanInitName, /*InitArgTypes=*/{}, /*InitArgs=*/{}); + if (ClWithComdat) { + Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName); + MsanCtorFunction->setComdat(MsanCtorComdat); + appendToGlobalCtors(M, MsanCtorFunction, 0, MsanCtorFunction); + } else { + appendToGlobalCtors(M, MsanCtorFunction, 0); + } - appendToGlobalCtors(M, MsanCtorFunction, 0); if (TrackOrigins) new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage, @@ -591,7 +610,7 @@ CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, unsigned TypeSizeToSizeIndex(unsigned TypeSize) { if (TypeSize <= 8) return 0; - return Log2_32_Ceil(TypeSize / 8); + return Log2_32_Ceil((TypeSize + 7) / 8); } /// This class does all the work for a given function. Store and Load @@ -606,6 +625,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes; ValueMap<Value*, Value*> ShadowMap, OriginMap; std::unique_ptr<VarArgHelper> VAHelper; + const TargetLibraryInfo *TLI; // The following flags disable parts of MSan instrumentation based on // blacklist contents and command-line options. @@ -623,7 +643,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { : Shadow(S), Origin(O), OrigIns(I) { } }; SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList; - SmallVector<Instruction*, 16> StoreList; + SmallVector<StoreInst *, 16> StoreList; MemorySanitizerVisitor(Function &F, MemorySanitizer &MS) : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) { @@ -635,6 +655,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // FIXME: Consider using SpecialCaseList to specify a list of functions that // must always return fully initialized values. For now, we hardcode "main". CheckReturnValue = SanitizeFunction && (F.getName() == "main"); + TLI = &MS.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); DEBUG(if (!InsertChecks) dbgs() << "MemorySanitizer is not inserting checks into '" @@ -731,26 +752,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } void materializeStores(bool InstrumentWithCalls) { - for (auto Inst : StoreList) { - StoreInst &SI = *dyn_cast<StoreInst>(Inst); - - IRBuilder<> IRB(&SI); - Value *Val = SI.getValueOperand(); - Value *Addr = SI.getPointerOperand(); - Value *Shadow = SI.isAtomic() ? getCleanShadow(Val) : getShadow(Val); + for (StoreInst *SI : StoreList) { + IRBuilder<> IRB(SI); + Value *Val = SI->getValueOperand(); + Value *Addr = SI->getPointerOperand(); + Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val); Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); StoreInst *NewSI = - IRB.CreateAlignedStore(Shadow, ShadowPtr, SI.getAlignment()); + IRB.CreateAlignedStore(Shadow, ShadowPtr, SI->getAlignment()); DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); (void)NewSI; - if (ClCheckAccessAddress) insertShadowCheck(Addr, &SI); + if (ClCheckAccessAddress) + insertShadowCheck(Addr, SI); - if (SI.isAtomic()) SI.setOrdering(addReleaseOrdering(SI.getOrdering())); + if (SI->isAtomic()) + SI->setOrdering(addReleaseOrdering(SI->getOrdering())); - if (MS.TrackOrigins && !SI.isAtomic()) - storeOrigin(IRB, Addr, Shadow, getOrigin(Val), SI.getAlignment(), + if (MS.TrackOrigins && !SI->isAtomic()) + storeOrigin(IRB, Addr, Shadow, getOrigin(Val), SI->getAlignment(), InstrumentWithCalls); } } @@ -1142,7 +1163,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOrigin(A, getCleanOrigin()); } } - ArgOffset += RoundUpToAlignment(Size, kShadowTLSAlignment); + ArgOffset += alignTo(Size, kShadowTLSAlignment); } assert(*ShadowPtr && "Could not find shadow for an argument"); return *ShadowPtr; @@ -1210,34 +1231,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { AtomicOrdering addReleaseOrdering(AtomicOrdering a) { switch (a) { - case NotAtomic: - return NotAtomic; - case Unordered: - case Monotonic: - case Release: - return Release; - case Acquire: - case AcquireRelease: - return AcquireRelease; - case SequentiallyConsistent: - return SequentiallyConsistent; + case AtomicOrdering::NotAtomic: + return AtomicOrdering::NotAtomic; + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + case AtomicOrdering::Release: + return AtomicOrdering::Release; + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + return AtomicOrdering::AcquireRelease; + case AtomicOrdering::SequentiallyConsistent: + return AtomicOrdering::SequentiallyConsistent; } llvm_unreachable("Unknown ordering"); } AtomicOrdering addAcquireOrdering(AtomicOrdering a) { switch (a) { - case NotAtomic: - return NotAtomic; - case Unordered: - case Monotonic: - case Acquire: - return Acquire; - case Release: - case AcquireRelease: - return AcquireRelease; - case SequentiallyConsistent: - return SequentiallyConsistent; + case AtomicOrdering::NotAtomic: + return AtomicOrdering::NotAtomic; + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + case AtomicOrdering::Acquire: + return AtomicOrdering::Acquire; + case AtomicOrdering::Release: + case AtomicOrdering::AcquireRelease: + return AtomicOrdering::AcquireRelease; + case AtomicOrdering::SequentiallyConsistent: + return AtomicOrdering::SequentiallyConsistent; } llvm_unreachable("Unknown ordering"); } @@ -1603,7 +1624,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { for (unsigned Idx = 0; Idx < NumElements; ++Idx) { if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) { - APInt V = Elt->getValue(); + const APInt &V = Elt->getValue(); APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); Elements.push_back(ConstantInt::get(EltTy, V2)); } else { @@ -1613,7 +1634,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { ShadowMul = ConstantVector::get(Elements); } else { if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) { - APInt V = Elt->getValue(); + const APInt &V = Elt->getValue(); APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); ShadowMul = ConstantInt::get(Ty, V2); } else { @@ -2123,6 +2144,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return CreateShadowCast(IRB, S2, T, /* Signed */ true); } + // Given a vector, extract its first element, and return all + // zeroes if it is zero, and all ones otherwise. + Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) { + Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0); + Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1)); + return CreateShadowCast(IRB, S2, T, /* Signed */ true); + } + Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) { Type *T = S->getType(); assert(T->isVectorTy()); @@ -2270,15 +2299,39 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } + // \brief Instrument compare-packed intrinsic. + // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or + // all-ones shadow. + void handleVectorComparePackedIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Type *ResTy = getShadowTy(&I); + Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1)); + Value *S = IRB.CreateSExt( + IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy); + setShadow(&I, S); + setOriginForNaryOp(I); + } + + // \brief Instrument compare-scalar intrinsic. + // This handles both cmp* intrinsics which return the result in the first + // element of a vector, and comi* which return the result as i32. + void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1)); + Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I)); + setShadow(&I, S); + setOriginForNaryOp(I); + } + void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case llvm::Intrinsic::bswap: handleBswap(I); break; - case llvm::Intrinsic::x86_avx512_cvtsd2usi64: - case llvm::Intrinsic::x86_avx512_cvtsd2usi: - case llvm::Intrinsic::x86_avx512_cvtss2usi64: - case llvm::Intrinsic::x86_avx512_cvtss2usi: + case llvm::Intrinsic::x86_avx512_vcvtsd2usi64: + case llvm::Intrinsic::x86_avx512_vcvtsd2usi32: + case llvm::Intrinsic::x86_avx512_vcvtss2usi64: + case llvm::Intrinsic::x86_avx512_vcvtss2usi32: case llvm::Intrinsic::x86_avx512_cvttss2usi64: case llvm::Intrinsic::x86_avx512_cvttss2usi: case llvm::Intrinsic::x86_avx512_cvttsd2usi64: @@ -2303,8 +2356,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { case llvm::Intrinsic::x86_sse_cvttss2si: handleVectorConvertIntrinsic(I, 1); break; - case llvm::Intrinsic::x86_sse2_cvtdq2pd: - case llvm::Intrinsic::x86_sse2_cvtps2pd: case llvm::Intrinsic::x86_sse_cvtps2pi: case llvm::Intrinsic::x86_sse_cvttps2pi: handleVectorConvertIntrinsic(I, 2); @@ -2413,6 +2464,43 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { handleVectorPmaddIntrinsic(I, 16); break; + case llvm::Intrinsic::x86_sse_cmp_ss: + case llvm::Intrinsic::x86_sse2_cmp_sd: + case llvm::Intrinsic::x86_sse_comieq_ss: + case llvm::Intrinsic::x86_sse_comilt_ss: + case llvm::Intrinsic::x86_sse_comile_ss: + case llvm::Intrinsic::x86_sse_comigt_ss: + case llvm::Intrinsic::x86_sse_comige_ss: + case llvm::Intrinsic::x86_sse_comineq_ss: + case llvm::Intrinsic::x86_sse_ucomieq_ss: + case llvm::Intrinsic::x86_sse_ucomilt_ss: + case llvm::Intrinsic::x86_sse_ucomile_ss: + case llvm::Intrinsic::x86_sse_ucomigt_ss: + case llvm::Intrinsic::x86_sse_ucomige_ss: + case llvm::Intrinsic::x86_sse_ucomineq_ss: + case llvm::Intrinsic::x86_sse2_comieq_sd: + case llvm::Intrinsic::x86_sse2_comilt_sd: + case llvm::Intrinsic::x86_sse2_comile_sd: + case llvm::Intrinsic::x86_sse2_comigt_sd: + case llvm::Intrinsic::x86_sse2_comige_sd: + case llvm::Intrinsic::x86_sse2_comineq_sd: + case llvm::Intrinsic::x86_sse2_ucomieq_sd: + case llvm::Intrinsic::x86_sse2_ucomilt_sd: + case llvm::Intrinsic::x86_sse2_ucomile_sd: + case llvm::Intrinsic::x86_sse2_ucomigt_sd: + case llvm::Intrinsic::x86_sse2_ucomige_sd: + case llvm::Intrinsic::x86_sse2_ucomineq_sd: + handleVectorCompareScalarIntrinsic(I); + break; + + case llvm::Intrinsic::x86_sse_cmp_ps: + case llvm::Intrinsic::x86_sse2_cmp_pd: + // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function + // generates reasonably looking IR that fails in the backend with "Do not + // know how to split the result of this operator!". + handleVectorComparePackedIntrinsic(I); + break; + default: if (!handleUnknownIntrinsic(I)) visitInstruction(I); @@ -2450,6 +2538,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { AttributeSet::FunctionIndex, B)); } + + maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI); } IRBuilder<> IRB(&I); @@ -2498,7 +2588,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { (void)Store; assert(Size != 0 && Store != nullptr); DEBUG(dbgs() << " Param:" << *Store << "\n"); - ArgOffset += RoundUpToAlignment(Size, 8); + ArgOffset += alignTo(Size, 8); } DEBUG(dbgs() << " done with call args\n"); @@ -2811,14 +2901,19 @@ struct VarArgAMD64Helper : public VarArgHelper { ArgIt != End; ++ArgIt) { Value *A = *ArgIt; unsigned ArgNo = CS.getArgumentNo(ArgIt); + bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal); if (IsByVal) { // ByVal arguments always go to the overflow area. + // Fixed arguments passed through the overflow area will be stepped + // over by va_start, so don't count them towards the offset. + if (IsFixed) + continue; assert(A->getType()->isPointerTy()); Type *RealTy = A->getType()->getPointerElementType(); uint64_t ArgSize = DL.getTypeAllocSize(RealTy); Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset); - OverflowOffset += RoundUpToAlignment(ArgSize, 8); + OverflowOffset += alignTo(ArgSize, 8); IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), ArgSize, kShadowTLSAlignment); } else { @@ -2838,10 +2933,16 @@ struct VarArgAMD64Helper : public VarArgHelper { FpOffset += 16; break; case AK_Memory: + if (IsFixed) + continue; uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); - OverflowOffset += RoundUpToAlignment(ArgSize, 8); + OverflowOffset += alignTo(ArgSize, 8); } + // Take fixed arguments into account for GpOffset and FpOffset, + // but don't actually store shadows for them. + if (IsFixed) + continue; IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); } } @@ -2952,20 +3053,22 @@ struct VarArgMIPS64Helper : public VarArgHelper { void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { unsigned VAArgOffset = 0; const DataLayout &DL = F.getParent()->getDataLayout(); - for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end(); + for (CallSite::arg_iterator ArgIt = CS.arg_begin() + + CS.getFunctionType()->getNumParams(), End = CS.arg_end(); ArgIt != End; ++ArgIt) { + llvm::Triple TargetTriple(F.getParent()->getTargetTriple()); Value *A = *ArgIt; Value *Base; uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); -#if defined(__MIPSEB__) || defined(MIPSEB) - // Adjusting the shadow for argument with size < 8 to match the placement - // of bits in big endian system - if (ArgSize < 8) - VAArgOffset += (8 - ArgSize); -#endif + if (TargetTriple.getArch() == llvm::Triple::mips64) { + // Adjusting the shadow for argument with size < 8 to match the placement + // of bits in big endian system + if (ArgSize < 8) + VAArgOffset += (8 - ArgSize); + } Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset); VAArgOffset += ArgSize; - VAArgOffset = RoundUpToAlignment(VAArgOffset, 8); + VAArgOffset = alignTo(VAArgOffset, 8); IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); } @@ -3038,13 +3141,13 @@ struct VarArgMIPS64Helper : public VarArgHelper { /// \brief AArch64-specific implementation of VarArgHelper. struct VarArgAArch64Helper : public VarArgHelper { - static const unsigned kAArch64GrArgSize = 56; + static const unsigned kAArch64GrArgSize = 64; static const unsigned kAArch64VrArgSize = 128; static const unsigned AArch64GrBegOffset = 0; static const unsigned AArch64GrEndOffset = kAArch64GrArgSize; // Make VR space aligned to 16 bytes. - static const unsigned AArch64VrBegOffset = AArch64GrEndOffset + 8; + static const unsigned AArch64VrBegOffset = AArch64GrEndOffset; static const unsigned AArch64VrEndOffset = AArch64VrBegOffset + kAArch64VrArgSize; static const unsigned AArch64VAEndOffset = AArch64VrEndOffset; @@ -3089,9 +3192,11 @@ struct VarArgAArch64Helper : public VarArgHelper { unsigned OverflowOffset = AArch64VAEndOffset; const DataLayout &DL = F.getParent()->getDataLayout(); - for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end(); + for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); ArgIt != End; ++ArgIt) { Value *A = *ArgIt; + unsigned ArgNo = CS.getArgumentNo(ArgIt); + bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); ArgKind AK = classifyArgument(A); if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset) AK = AK_Memory; @@ -3108,11 +3213,19 @@ struct VarArgAArch64Helper : public VarArgHelper { VrOffset += 16; break; case AK_Memory: + // Don't count fixed arguments in the overflow area - va_start will + // skip right over them. + if (IsFixed) + continue; uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); - OverflowOffset += RoundUpToAlignment(ArgSize, 8); + OverflowOffset += alignTo(ArgSize, 8); break; } + // Count Gp/Vr fixed arguments to their respective offsets, but don't + // bother to actually store a shadow. + if (IsFixed) + continue; IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); } Constant *OverflowSize = @@ -3271,6 +3384,163 @@ struct VarArgAArch64Helper : public VarArgHelper { } }; +/// \brief PowerPC64-specific implementation of VarArgHelper. +struct VarArgPowerPC64Helper : public VarArgHelper { + Function &F; + MemorySanitizer &MS; + MemorySanitizerVisitor &MSV; + Value *VAArgTLSCopy; + Value *VAArgSize; + + SmallVector<CallInst*, 16> VAStartInstrumentationList; + + VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) + : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), + VAArgSize(nullptr) {} + + void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { + // For PowerPC, we need to deal with alignment of stack arguments - + // they are mostly aligned to 8 bytes, but vectors and i128 arrays + // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes, + // and QPX vectors are aligned to 32 bytes. For that reason, we + // compute current offset from stack pointer (which is always properly + // aligned), and offset for the first vararg, then subtract them. + unsigned VAArgBase; + llvm::Triple TargetTriple(F.getParent()->getTargetTriple()); + // Parameter save area starts at 48 bytes from frame pointer for ABIv1, + // and 32 bytes for ABIv2. This is usually determined by target + // endianness, but in theory could be overriden by function attribute. + // For simplicity, we ignore it here (it'd only matter for QPX vectors). + if (TargetTriple.getArch() == llvm::Triple::ppc64) + VAArgBase = 48; + else + VAArgBase = 32; + unsigned VAArgOffset = VAArgBase; + const DataLayout &DL = F.getParent()->getDataLayout(); + for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); + ArgIt != End; ++ArgIt) { + Value *A = *ArgIt; + unsigned ArgNo = CS.getArgumentNo(ArgIt); + bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); + bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal); + if (IsByVal) { + assert(A->getType()->isPointerTy()); + Type *RealTy = A->getType()->getPointerElementType(); + uint64_t ArgSize = DL.getTypeAllocSize(RealTy); + uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1); + if (ArgAlign < 8) + ArgAlign = 8; + VAArgOffset = alignTo(VAArgOffset, ArgAlign); + if (!IsFixed) { + Value *Base = getShadowPtrForVAArgument(RealTy, IRB, + VAArgOffset - VAArgBase); + IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), + ArgSize, kShadowTLSAlignment); + } + VAArgOffset += alignTo(ArgSize, 8); + } else { + Value *Base; + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); + uint64_t ArgAlign = 8; + if (A->getType()->isArrayTy()) { + // Arrays are aligned to element size, except for long double + // arrays, which are aligned to 8 bytes. + Type *ElementTy = A->getType()->getArrayElementType(); + if (!ElementTy->isPPC_FP128Ty()) + ArgAlign = DL.getTypeAllocSize(ElementTy); + } else if (A->getType()->isVectorTy()) { + // Vectors are naturally aligned. + ArgAlign = DL.getTypeAllocSize(A->getType()); + } + if (ArgAlign < 8) + ArgAlign = 8; + VAArgOffset = alignTo(VAArgOffset, ArgAlign); + if (DL.isBigEndian()) { + // Adjusting the shadow for argument with size < 8 to match the placement + // of bits in big endian system + if (ArgSize < 8) + VAArgOffset += (8 - ArgSize); + } + if (!IsFixed) { + Base = getShadowPtrForVAArgument(A->getType(), IRB, + VAArgOffset - VAArgBase); + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); + } + VAArgOffset += ArgSize; + VAArgOffset = alignTo(VAArgOffset, 8); + } + if (IsFixed) + VAArgBase = VAArgOffset; + } + + Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), + VAArgOffset - VAArgBase); + // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of + // a new class member i.e. it is the total size of all VarArgs. + IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); + } + + /// \brief Compute the shadow address for a given va_arg. + Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, + int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), + "_msarg"); + } + + void visitVAStartInst(VAStartInst &I) override { + IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */8, /* alignment */8, false); + } + + void visitVACopyInst(VACopyInst &I) override { + IRBuilder<> IRB(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants. + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */8, /* alignment */8, false); + } + + void finalizeInstrumentation() override { + assert(!VAArgSize && !VAArgTLSCopy && + "finalizeInstrumentation called twice"); + IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS); + Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), + VAArgSize); + + if (!VAStartInstrumentationList.empty()) { + // If there is a va_start in this function, make a backup copy of + // va_arg_tls somewhere in the function entry block. + VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + } + + // Instrument va_start. + // Copy va_list shadow from the backup copy of the TLS contents. + for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { + CallInst *OrigInst = VAStartInstrumentationList[i]; + IRBuilder<> IRB(OrigInst->getNextNode()); + Value *VAListTag = OrigInst->getArgOperand(0); + Value *RegSaveAreaPtrPtr = + IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + Type::getInt64PtrTy(*MS.C)); + Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); + Value *RegSaveAreaShadowPtr = + MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8); + } + } +}; + /// \brief A no-op implementation of VarArgHelper. struct VarArgNoOpHelper : public VarArgHelper { VarArgNoOpHelper(Function &F, MemorySanitizer &MS, @@ -3297,6 +3567,9 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, return new VarArgMIPS64Helper(Func, Msan, Visitor); else if (TargetTriple.getArch() == llvm::Triple::aarch64) return new VarArgAArch64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == llvm::Triple::ppc64 || + TargetTriple.getArch() == llvm::Triple::ppc64le) + return new VarArgPowerPC64Helper(Func, Msan, Visitor); else return new VarArgNoOpHelper(Func, Msan, Visitor); } diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 4b59b93b325f4..f54d8ad481462 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -25,9 +25,12 @@ // // This file contains two passes: // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge -// count profile, and +// count profile, and generates the instrumentation for indirect call +// profiling. // (2) Pass PGOInstrumentationUse which reads the edge count profile and -// annotates the branch weights. +// annotates the branch weights. It also reads the indirect call value +// profiling records and annotate the indirect call instructions. +// // To get the precise counter information, These two passes need to invoke at // the same compilation point (so they see the same IR). For pass // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For @@ -45,14 +48,16 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/PGOInstrumentation.h" #include "CFGMST.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/IndirectCallSiteVisitor.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -62,10 +67,13 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Debug.h" #include "llvm/Support/JamCRC.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <algorithm> #include <string> #include <utility> #include <vector> @@ -81,6 +89,7 @@ STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); STATISTIC(NumOfPGOMissing, "Number of functions without profile."); +STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); // Command line option to specify the file to read profile from. This is // mainly used for testing. @@ -90,13 +99,37 @@ static cl::opt<std::string> cl::desc("Specify the path of profile data file. This is" "mainly for test purpose.")); +// Command line option to disable value profiling. The default is false: +// i.e. value profiling is enabled by default. This is for debug purpose. +static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), + cl::Hidden, + cl::desc("Disable Value Profiling")); + +// Command line option to set the maximum number of VP annotations to write to +// the metadata for a single indirect call callsite. +static cl::opt<unsigned> MaxNumAnnotations( + "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of annotations for a single indirect " + "call callsite")); + +// Command line option to enable/disable the warning about missing profile +// information. +static cl::opt<bool> NoPGOWarnMissing("no-pgo-warn-missing", cl::init(false), + cl::Hidden); + +// Command line option to enable/disable the warning about a hash mismatch in +// the profile data. +static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), + cl::Hidden); + namespace { -class PGOInstrumentationGen : public ModulePass { +class PGOInstrumentationGenLegacyPass : public ModulePass { public: static char ID; - PGOInstrumentationGen() : ModulePass(ID) { - initializePGOInstrumentationGenPass(*PassRegistry::getPassRegistry()); + PGOInstrumentationGenLegacyPass() : ModulePass(ID) { + initializePGOInstrumentationGenLegacyPassPass( + *PassRegistry::getPassRegistry()); } const char *getPassName() const override { @@ -111,16 +144,17 @@ private: } }; -class PGOInstrumentationUse : public ModulePass { +class PGOInstrumentationUseLegacyPass : public ModulePass { public: static char ID; // Provide the profile filename as the parameter. - PGOInstrumentationUse(std::string Filename = "") - : ModulePass(ID), ProfileFileName(Filename) { + PGOInstrumentationUseLegacyPass(std::string Filename = "") + : ModulePass(ID), ProfileFileName(std::move(Filename)) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; - initializePGOInstrumentationUsePass(*PassRegistry::getPassRegistry()); + initializePGOInstrumentationUseLegacyPassPass( + *PassRegistry::getPassRegistry()); } const char *getPassName() const override { @@ -129,37 +163,36 @@ public: private: std::string ProfileFileName; - std::unique_ptr<IndexedInstrProfReader> PGOReader; - bool runOnModule(Module &M) override; + bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<BlockFrequencyInfoWrapperPass>(); } }; } // end anonymous namespace -char PGOInstrumentationGen::ID = 0; -INITIALIZE_PASS_BEGIN(PGOInstrumentationGen, "pgo-instr-gen", +char PGOInstrumentationGenLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) -INITIALIZE_PASS_END(PGOInstrumentationGen, "pgo-instr-gen", +INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) -ModulePass *llvm::createPGOInstrumentationGenPass() { - return new PGOInstrumentationGen(); +ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { + return new PGOInstrumentationGenLegacyPass(); } -char PGOInstrumentationUse::ID = 0; -INITIALIZE_PASS_BEGIN(PGOInstrumentationUse, "pgo-instr-use", +char PGOInstrumentationUseLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) -INITIALIZE_PASS_END(PGOInstrumentationUse, "pgo-instr-use", +INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) -ModulePass *llvm::createPGOInstrumentationUsePass(StringRef Filename) { - return new PGOInstrumentationUse(Filename.str()); +ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { + return new PGOInstrumentationUseLegacyPass(Filename.str()); } namespace { @@ -225,7 +258,7 @@ public: // Dump edges and BB information. void dumpInfo(std::string Str = "") const { MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + - Twine(FunctionHash) + "\t" + Str); + Twine(FunctionHash) + "\t" + Str); } FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false, @@ -247,7 +280,7 @@ public: if (CreateGlobalVar) FuncNameVar = createPGOFuncNameVar(F, FuncName); - }; + } }; // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index @@ -305,7 +338,7 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { return InstrBB; } -// Visit all edge and instrument the edges not in MST. +// Visit all edge and instrument the edges not in MST, and do value profiling. // Critical edges will be split. static void instrumentOneFunc(Function &F, Module *M, BranchProbabilityInfo *BPI, @@ -318,6 +351,7 @@ static void instrumentOneFunc(Function &F, Module *M, } uint32_t I = 0; + Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); for (auto &E : FuncInfo.MST.AllEdges) { BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); if (!InstrBB) @@ -326,13 +360,34 @@ static void instrumentOneFunc(Function &F, Module *M, IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); assert(Builder.GetInsertPoint() != InstrBB->end() && "Cannot get the Instrumentation point"); - Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), Builder.getInt32(I++)}); } + + if (DisableValueProfiling) + return; + + unsigned NumIndirectCallSites = 0; + for (auto &I : findIndirectCallSites(F)) { + CallSite CS(I); + Value *Callee = CS.getCalledValue(); + DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = " + << NumIndirectCallSites << "\n"); + IRBuilder<> Builder(I); + assert(Builder.GetInsertPoint() != I->getParent()->end() && + "Cannot get the Instrumentation point"); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), + {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), + Builder.getInt64(FuncInfo.FunctionHash), + Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), + Builder.getInt32(llvm::InstrProfValueKind::IPVK_IndirectCallTarget), + Builder.getInt32(NumIndirectCallSites++)}); + } + NumOfPGOICall += NumIndirectCallSites; } // This class represents a CFG edge in profile use compilation. @@ -352,7 +407,8 @@ struct PGOUseEdge : public PGOEdge { const std::string infoString() const { if (!CountValid) return PGOEdge::infoString(); - return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)).str(); + return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) + .str(); } }; @@ -399,6 +455,33 @@ static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { } class PGOUseFunc { +public: + PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr) + : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI), + FreqAttr(FFA_Normal) {} + + // Read counts for the instrumented BB from profile. + bool readCounters(IndexedInstrProfReader *PGOReader); + + // Populate the counts for all BBs. + void populateCounters(); + + // Set the branch weights based on the count values. + void setBranchWeights(); + + // Annotate the indirect call sites. + void annotateIndirectCallSites(); + + // The hotness of the function from the profile count. + enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; + + // Return the function hotness from the profile. + FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } + + // Return the profile record for this function; + InstrProfRecord &getProfileRecord() { return ProfileRecord; } + private: Function &F; Module *M; @@ -414,6 +497,12 @@ private: // compilation. uint64_t ProgramMaxCount; + // ProfileRecord for this function. + InstrProfRecord ProfileRecord; + + // Function hotness info derived from profile. + FuncFreqAttr FreqAttr; + // Find the Instrumented BB and set the value. void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); @@ -427,7 +516,7 @@ private: // Set the hot/cold inline hints based on the count values. // FIXME: This function should be removed once the functionality in // the inliner is implemented. - void applyFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { + void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { if (ProgramMaxCount == 0) return; // Threshold of the hot functions. @@ -435,24 +524,10 @@ private: // Threshold of the cold functions. const BranchProbability ColdFunctionThreshold(2, 10000); if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) - F.addFnAttr(llvm::Attribute::InlineHint); + FreqAttr = FFA_Hot; else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) - F.addFnAttr(llvm::Attribute::Cold); + FreqAttr = FFA_Cold; } - -public: - PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI) {} - - // Read counts for the instrumented BB from profile. - bool readCounters(IndexedInstrProfReader *PGOReader); - - // Populate the counts for all BBs. - void populateCounters(); - - // Set the branch weights based on the count values. - void setBranchWeights(); }; // Visit all the edges and assign the count value for the instrumented @@ -511,21 +586,32 @@ void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { // Return true if the profile are successfully read, and false on errors. bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) { auto &Ctx = M->getContext(); - ErrorOr<InstrProfRecord> Result = + Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); - if (std::error_code EC = Result.getError()) { - if (EC == instrprof_error::unknown_function) - NumOfPGOMissing++; - else if (EC == instrprof_error::hash_mismatch || - EC == llvm::instrprof_error::malformed) - NumOfPGOMismatch++; - - std::string Msg = EC.message() + std::string(" ") + F.getName().str(); - Ctx.diagnose( - DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); + if (Error E = Result.takeError()) { + handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { + auto Err = IPE.get(); + bool SkipWarning = false; + if (Err == instrprof_error::unknown_function) { + NumOfPGOMissing++; + SkipWarning = NoPGOWarnMissing; + } else if (Err == instrprof_error::hash_mismatch || + Err == instrprof_error::malformed) { + NumOfPGOMismatch++; + SkipWarning = NoPGOWarnMismatch; + } + + if (SkipWarning) + return; + + std::string Msg = IPE.message() + std::string(" ") + F.getName().str(); + Ctx.diagnose( + DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); + }); return false; } - std::vector<uint64_t> &CountFromProfile = Result.get().Counts; + ProfileRecord = std::move(Result.get()); + std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; NumOfPGOFunc++; DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); @@ -605,16 +691,17 @@ void PGOUseFunc::populateCounters() { } DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); +#ifndef NDEBUG // Assert every BB has a valid counter. + for (auto &BB : F) + assert(getBBInfo(&BB).CountValid && "BB count is not valid"); +#endif uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; + F.setEntryCount(FuncEntryCount); uint64_t FuncMaxCount = FuncEntryCount; - for (auto &BB : F) { - assert(getBBInfo(&BB).CountValid && "BB count is not valid"); - uint64_t Count = getBBInfo(&BB).CountValue; - if (Count > FuncMaxCount) - FuncMaxCount = Count; - } - applyFunctionAttributes(FuncEntryCount, FuncMaxCount); + for (auto &BB : F) + FuncMaxCount = std::max(FuncMaxCount, getBBInfo(&BB).CountValue); + markFunctionAttributes(FuncEntryCount, FuncMaxCount); DEBUG(FuncInfo.dumpInfo("after reading profile.")); } @@ -642,7 +729,7 @@ void PGOUseFunc::setBranchWeights() { const PGOUseEdge *E = BBCountInfo.OutEdges[s]; const BasicBlock *SrcBB = E->SrcBB; const BasicBlock *DestBB = E->DestBB; - if (DestBB == 0) + if (DestBB == nullptr) continue; unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); uint64_t EdgeCount = E->CountValue; @@ -663,56 +750,204 @@ void PGOUseFunc::setBranchWeights() { dbgs() << "\n";); } } + +// Traverse all the indirect callsites and annotate the instructions. +void PGOUseFunc::annotateIndirectCallSites() { + if (DisableValueProfiling) + return; + + // Create the PGOFuncName meta data. + createPGOFuncNameMetadata(F, FuncInfo.FuncName); + + unsigned IndirectCallSiteIndex = 0; + auto IndirectCallSites = findIndirectCallSites(F); + unsigned NumValueSites = + ProfileRecord.getNumValueSites(IPVK_IndirectCallTarget); + if (NumValueSites != IndirectCallSites.size()) { + std::string Msg = + std::string("Inconsistent number of indirect call sites: ") + + F.getName().str(); + auto &Ctx = M->getContext(); + Ctx.diagnose( + DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); + return; + } + + for (auto &I : IndirectCallSites) { + DEBUG(dbgs() << "Read one indirect call instrumentation: Index=" + << IndirectCallSiteIndex << " out of " << NumValueSites + << "\n"); + annotateValueSite(*M, *I, ProfileRecord, IPVK_IndirectCallTarget, + IndirectCallSiteIndex, MaxNumAnnotations); + IndirectCallSiteIndex++; + } +} } // end anonymous namespace -bool PGOInstrumentationGen::runOnModule(Module &M) { +// Create a COMDAT variable IR_LEVEL_PROF_VARNAME to make the runtime +// aware this is an ir_level profile so it can set the version flag. +static void createIRLevelProfileFlagVariable(Module &M) { + Type *IntTy64 = Type::getInt64Ty(M.getContext()); + uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); + auto IRLevelVersionVariable = new GlobalVariable( + M, IntTy64, true, GlobalVariable::ExternalLinkage, + Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), + INSTR_PROF_QUOTE(IR_LEVEL_PROF_VERSION_VAR)); + IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); + Triple TT(M.getTargetTriple()); + if (!TT.supportsCOMDAT()) + IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); + else + IRLevelVersionVariable->setComdat(M.getOrInsertComdat( + StringRef(INSTR_PROF_QUOTE(IR_LEVEL_PROF_VERSION_VAR)))); +} + +static bool InstrumentAllFunctions( + Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, + function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { + createIRLevelProfileFlagVariable(M); for (auto &F : M) { if (F.isDeclaration()) continue; - BranchProbabilityInfo *BPI = - &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI()); - BlockFrequencyInfo *BFI = - &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI()); + auto *BPI = LookupBPI(F); + auto *BFI = LookupBFI(F); instrumentOneFunc(F, &M, BPI, BFI); } return true; } -static void setPGOCountOnFunc(PGOUseFunc &Func, - IndexedInstrProfReader *PGOReader) { - if (Func.readCounters(PGOReader)) { - Func.populateCounters(); - Func.setBranchWeights(); - } +bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + auto LookupBPI = [this](Function &F) { + return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); + }; + auto LookupBFI = [this](Function &F) { + return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); + }; + return InstrumentAllFunctions(M, LookupBPI, LookupBFI); } -bool PGOInstrumentationUse::runOnModule(Module &M) { +PreservedAnalyses PGOInstrumentationGen::run(Module &M, + AnalysisManager<Module> &AM) { + + auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto LookupBPI = [&FAM](Function &F) { + return &FAM.getResult<BranchProbabilityAnalysis>(F); + }; + + auto LookupBFI = [&FAM](Function &F) { + return &FAM.getResult<BlockFrequencyAnalysis>(F); + }; + + if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +static bool annotateAllFunctions( + Module &M, StringRef ProfileFileName, + function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, + function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName); - if (std::error_code EC = ReaderOrErr.getError()) { - Ctx.diagnose( - DiagnosticInfoPGOProfile(ProfileFileName.data(), EC.message())); + if (Error E = ReaderOrErr.takeError()) { + handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { + Ctx.diagnose( + DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); + }); return false; } - PGOReader = std::move(ReaderOrErr.get()); + std::unique_ptr<IndexedInstrProfReader> PGOReader = + std::move(ReaderOrErr.get()); if (!PGOReader) { Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), - "Cannot get PGOReader")); + StringRef("Cannot get PGOReader"))); + return false; + } + // TODO: might need to change the warning once the clang option is finalized. + if (!PGOReader->isIRLevelProfile()) { + Ctx.diagnose(DiagnosticInfoPGOProfile( + ProfileFileName.data(), "Not an IR level instrumentation profile")); return false; } + std::vector<Function *> HotFunctions; + std::vector<Function *> ColdFunctions; for (auto &F : M) { if (F.isDeclaration()) continue; - BranchProbabilityInfo *BPI = - &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI()); - BlockFrequencyInfo *BFI = - &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI()); + auto *BPI = LookupBPI(F); + auto *BFI = LookupBFI(F); PGOUseFunc Func(F, &M, BPI, BFI); - setPGOCountOnFunc(Func, PGOReader.get()); + if (!Func.readCounters(PGOReader.get())) + continue; + Func.populateCounters(); + Func.setBranchWeights(); + Func.annotateIndirectCallSites(); + PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); + if (FreqAttr == PGOUseFunc::FFA_Cold) + ColdFunctions.push_back(&F); + else if (FreqAttr == PGOUseFunc::FFA_Hot) + HotFunctions.push_back(&F); + } + M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); + // Set function hotness attribute from the profile. + // We have to apply these attributes at the end because their presence + // can affect the BranchProbabilityInfo of any callers, resulting in an + // inconsistent MST between prof-gen and prof-use. + for (auto &F : HotFunctions) { + F->addFnAttr(llvm::Attribute::InlineHint); + DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() + << "\n"); + } + for (auto &F : ColdFunctions) { + F->addFnAttr(llvm::Attribute::Cold); + DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); } + return true; } + +PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename) + : ProfileFileName(std::move(Filename)) { + if (!PGOTestProfileFile.empty()) + ProfileFileName = PGOTestProfileFile; +} + +PreservedAnalyses PGOInstrumentationUse::run(Module &M, + AnalysisManager<Module> &AM) { + + auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto LookupBPI = [&FAM](Function &F) { + return &FAM.getResult<BranchProbabilityAnalysis>(F); + }; + + auto LookupBFI = [&FAM](Function &F) { + return &FAM.getResult<BlockFrequencyAnalysis>(F); + }; + + if (!annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + auto LookupBPI = [this](Function &F) { + return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); + }; + auto LookupBFI = [this](Function &F) { + return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); + }; + + return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI); +} diff --git a/lib/Transforms/Instrumentation/SafeStack.cpp b/lib/Transforms/Instrumentation/SafeStack.cpp deleted file mode 100644 index abed465f102d2..0000000000000 --- a/lib/Transforms/Instrumentation/SafeStack.cpp +++ /dev/null @@ -1,760 +0,0 @@ -//===-- SafeStack.cpp - Safe Stack Insertion ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass splits the stack into the safe stack (kept as-is for LLVM backend) -// and the unsafe stack (explicitly allocated and managed through the runtime -// support library). -// -// http://clang.llvm.org/docs/SafeStack.html -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Instrumentation.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_os_ostream.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" - -using namespace llvm; - -#define DEBUG_TYPE "safestack" - -enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP }; - -static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage", - cl::Hidden, cl::init(ThreadLocalUSP), - cl::desc("Type of storage for the unsafe stack pointer"), - cl::values(clEnumValN(ThreadLocalUSP, "thread-local", - "Thread-local storage"), - clEnumValN(SingleThreadUSP, "single-thread", - "Non-thread-local storage"), - clEnumValEnd)); - -namespace llvm { - -STATISTIC(NumFunctions, "Total number of functions"); -STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack"); -STATISTIC(NumUnsafeStackRestorePointsFunctions, - "Number of functions that use setjmp or exceptions"); - -STATISTIC(NumAllocas, "Total number of allocas"); -STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas"); -STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas"); -STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments"); -STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads"); - -} // namespace llvm - -namespace { - -/// Rewrite an SCEV expression for a memory access address to an expression that -/// represents offset from the given alloca. -/// -/// The implementation simply replaces all mentions of the alloca with zero. -class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> { - const Value *AllocaPtr; - -public: - AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr) - : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {} - - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (Expr->getValue() == AllocaPtr) - return SE.getZero(Expr->getType()); - return Expr; - } -}; - -/// The SafeStack pass splits the stack of each function into the safe -/// stack, which is only accessed through memory safe dereferences (as -/// determined statically), and the unsafe stack, which contains all -/// local variables that are accessed in ways that we can't prove to -/// be safe. -class SafeStack : public FunctionPass { - const TargetMachine *TM; - const TargetLoweringBase *TL; - const DataLayout *DL; - ScalarEvolution *SE; - - Type *StackPtrTy; - Type *IntPtrTy; - Type *Int32Ty; - Type *Int8Ty; - - Value *UnsafeStackPtr = nullptr; - - /// Unsafe stack alignment. Each stack frame must ensure that the stack is - /// aligned to this value. We need to re-align the unsafe stack if the - /// alignment of any object on the stack exceeds this value. - /// - /// 16 seems like a reasonable upper bound on the alignment of objects that we - /// might expect to appear on the stack on most common targets. - enum { StackAlignment = 16 }; - - /// \brief Build a value representing a pointer to the unsafe stack pointer. - Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F); - - /// \brief Find all static allocas, dynamic allocas, return instructions and - /// stack restore points (exception unwind blocks and setjmp calls) in the - /// given function and append them to the respective vectors. - void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas, - SmallVectorImpl<AllocaInst *> &DynamicAllocas, - SmallVectorImpl<Argument *> &ByValArguments, - SmallVectorImpl<ReturnInst *> &Returns, - SmallVectorImpl<Instruction *> &StackRestorePoints); - - /// \brief Calculate the allocation size of a given alloca. Returns 0 if the - /// size can not be statically determined. - uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI); - - /// \brief Allocate space for all static allocas in \p StaticAllocas, - /// replace allocas with pointers into the unsafe stack and generate code to - /// restore the stack pointer before all return instructions in \p Returns. - /// - /// \returns A pointer to the top of the unsafe stack after all unsafe static - /// allocas are allocated. - Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F, - ArrayRef<AllocaInst *> StaticAllocas, - ArrayRef<Argument *> ByValArguments, - ArrayRef<ReturnInst *> Returns); - - /// \brief Generate code to restore the stack after all stack restore points - /// in \p StackRestorePoints. - /// - /// \returns A local variable in which to maintain the dynamic top of the - /// unsafe stack if needed. - AllocaInst * - createStackRestorePoints(IRBuilder<> &IRB, Function &F, - ArrayRef<Instruction *> StackRestorePoints, - Value *StaticTop, bool NeedDynamicTop); - - /// \brief Replace all allocas in \p DynamicAllocas with code to allocate - /// space dynamically on the unsafe stack and store the dynamic unsafe stack - /// top to \p DynamicTop if non-null. - void moveDynamicAllocasToUnsafeStack(Function &F, Value *UnsafeStackPtr, - AllocaInst *DynamicTop, - ArrayRef<AllocaInst *> DynamicAllocas); - - bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize); - - bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U, - const Value *AllocaPtr, uint64_t AllocaSize); - bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr, - uint64_t AllocaSize); - -public: - static char ID; // Pass identification, replacement for typeid. - SafeStack(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) { - initializeSafeStackPass(*PassRegistry::getPassRegistry()); - } - SafeStack() : SafeStack(nullptr) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<ScalarEvolutionWrapperPass>(); - } - - bool doInitialization(Module &M) override { - DL = &M.getDataLayout(); - - StackPtrTy = Type::getInt8PtrTy(M.getContext()); - IntPtrTy = DL->getIntPtrType(M.getContext()); - Int32Ty = Type::getInt32Ty(M.getContext()); - Int8Ty = Type::getInt8Ty(M.getContext()); - - return false; - } - - bool runOnFunction(Function &F) override; -}; // class SafeStack - -uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { - uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType()); - if (AI->isArrayAllocation()) { - auto C = dyn_cast<ConstantInt>(AI->getArraySize()); - if (!C) - return 0; - Size *= C->getZExtValue(); - } - return Size; -} - -bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, - const Value *AllocaPtr, uint64_t AllocaSize) { - AllocaOffsetRewriter Rewriter(*SE, AllocaPtr); - const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr)); - - uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType()); - ConstantRange AccessStartRange = SE->getUnsignedRange(Expr); - ConstantRange SizeRange = - ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize)); - ConstantRange AccessRange = AccessStartRange.add(SizeRange); - ConstantRange AllocaRange = - ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize)); - bool Safe = AllocaRange.contains(AccessRange); - - DEBUG(dbgs() << "[SafeStack] " - << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ") - << *AllocaPtr << "\n" - << " Access " << *Addr << "\n" - << " SCEV " << *Expr - << " U: " << SE->getUnsignedRange(Expr) - << ", S: " << SE->getSignedRange(Expr) << "\n" - << " Range " << AccessRange << "\n" - << " AllocaRange " << AllocaRange << "\n" - << " " << (Safe ? "safe" : "unsafe") << "\n"); - - return Safe; -} - -bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U, - const Value *AllocaPtr, - uint64_t AllocaSize) { - // All MemIntrinsics have destination address in Arg0 and size in Arg2. - if (MI->getRawDest() != U) return true; - const auto *Len = dyn_cast<ConstantInt>(MI->getLength()); - // Non-constant size => unsafe. FIXME: try SCEV getRange. - if (!Len) return false; - return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize); -} - -/// Check whether a given allocation must be put on the safe -/// stack or not. The function analyzes all uses of AI and checks whether it is -/// only accessed in a memory safe way (as decided statically). -bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { - // Go through all uses of this alloca and check whether all accesses to the - // allocated object are statically known to be memory safe and, hence, the - // object can be placed on the safe stack. - SmallPtrSet<const Value *, 16> Visited; - SmallVector<const Value *, 8> WorkList; - WorkList.push_back(AllocaPtr); - - // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. - while (!WorkList.empty()) { - const Value *V = WorkList.pop_back_val(); - for (const Use &UI : V->uses()) { - auto I = cast<const Instruction>(UI.getUser()); - assert(V == UI.get()); - - switch (I->getOpcode()) { - case Instruction::Load: { - if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr, - AllocaSize)) - return false; - break; - } - case Instruction::VAArg: - // "va-arg" from a pointer is safe. - break; - case Instruction::Store: { - if (V == I->getOperand(0)) { - // Stored the pointer - conservatively assume it may be unsafe. - DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr - << "\n store of address: " << *I << "\n"); - return false; - } - - if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()), - AllocaPtr, AllocaSize)) - return false; - break; - } - case Instruction::Ret: { - // Information leak. - return false; - } - - case Instruction::Call: - case Instruction::Invoke: { - ImmutableCallSite CS(I); - - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) - continue; - } - - if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { - if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) { - DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr - << "\n unsafe memintrinsic: " << *I - << "\n"); - return false; - } - continue; - } - - // LLVM 'nocapture' attribute is only set for arguments whose address - // is not stored, passed around, or used in any other non-trivial way. - // We assume that passing a pointer to an object as a 'nocapture - // readnone' argument is safe. - // FIXME: a more precise solution would require an interprocedural - // analysis here, which would look at all uses of an argument inside - // the function being called. - ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) - if (A->get() == V) - if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) || - CS.doesNotAccessMemory()))) { - DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr - << "\n unsafe call: " << *I << "\n"); - return false; - } - continue; - } - - default: - if (Visited.insert(I).second) - WorkList.push_back(cast<const Instruction>(I)); - } - } - } - - // All uses of the alloca are safe, we can place it on the safe stack. - return true; -} - -Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) { - // Check if there is a target-specific location for the unsafe stack pointer. - if (TL) - if (Value *V = TL->getSafeStackPointerLocation(IRB)) - return V; - - // Otherwise, assume the target links with compiler-rt, which provides a - // thread-local variable with a magic name. - Module &M = *F.getParent(); - const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr"; - auto UnsafeStackPtr = - dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar)); - - bool UseTLS = USPStorage == ThreadLocalUSP; - - if (!UnsafeStackPtr) { - auto TLSModel = UseTLS ? - GlobalValue::InitialExecTLSModel : - GlobalValue::NotThreadLocal; - // The global variable is not defined yet, define it ourselves. - // We use the initial-exec TLS model because we do not support the - // variable living anywhere other than in the main executable. - UnsafeStackPtr = new GlobalVariable( - M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, - UnsafeStackPtrVar, nullptr, TLSModel); - } else { - // The variable exists, check its type and attributes. - if (UnsafeStackPtr->getValueType() != StackPtrTy) - report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type"); - if (UseTLS != UnsafeStackPtr->isThreadLocal()) - report_fatal_error(Twine(UnsafeStackPtrVar) + " must " + - (UseTLS ? "" : "not ") + "be thread-local"); - } - return UnsafeStackPtr; -} - -void SafeStack::findInsts(Function &F, - SmallVectorImpl<AllocaInst *> &StaticAllocas, - SmallVectorImpl<AllocaInst *> &DynamicAllocas, - SmallVectorImpl<Argument *> &ByValArguments, - SmallVectorImpl<ReturnInst *> &Returns, - SmallVectorImpl<Instruction *> &StackRestorePoints) { - for (Instruction &I : instructions(&F)) { - if (auto AI = dyn_cast<AllocaInst>(&I)) { - ++NumAllocas; - - uint64_t Size = getStaticAllocaAllocationSize(AI); - if (IsSafeStackAlloca(AI, Size)) - continue; - - if (AI->isStaticAlloca()) { - ++NumUnsafeStaticAllocas; - StaticAllocas.push_back(AI); - } else { - ++NumUnsafeDynamicAllocas; - DynamicAllocas.push_back(AI); - } - } else if (auto RI = dyn_cast<ReturnInst>(&I)) { - Returns.push_back(RI); - } else if (auto CI = dyn_cast<CallInst>(&I)) { - // setjmps require stack restore. - if (CI->getCalledFunction() && CI->canReturnTwice()) - StackRestorePoints.push_back(CI); - } else if (auto LP = dyn_cast<LandingPadInst>(&I)) { - // Exception landing pads require stack restore. - StackRestorePoints.push_back(LP); - } else if (auto II = dyn_cast<IntrinsicInst>(&I)) { - if (II->getIntrinsicID() == Intrinsic::gcroot) - llvm::report_fatal_error( - "gcroot intrinsic not compatible with safestack attribute"); - } - } - for (Argument &Arg : F.args()) { - if (!Arg.hasByValAttr()) - continue; - uint64_t Size = - DL->getTypeStoreSize(Arg.getType()->getPointerElementType()); - if (IsSafeStackAlloca(&Arg, Size)) - continue; - - ++NumUnsafeByValArguments; - ByValArguments.push_back(&Arg); - } -} - -AllocaInst * -SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, - ArrayRef<Instruction *> StackRestorePoints, - Value *StaticTop, bool NeedDynamicTop) { - if (StackRestorePoints.empty()) - return nullptr; - - // We need the current value of the shadow stack pointer to restore - // after longjmp or exception catching. - - // FIXME: On some platforms this could be handled by the longjmp/exception - // runtime itself. - - AllocaInst *DynamicTop = nullptr; - if (NeedDynamicTop) - // If we also have dynamic alloca's, the stack pointer value changes - // throughout the function. For now we store it in an alloca. - DynamicTop = IRB.CreateAlloca(StackPtrTy, /*ArraySize=*/nullptr, - "unsafe_stack_dynamic_ptr"); - - if (!StaticTop) - // We need the original unsafe stack pointer value, even if there are - // no unsafe static allocas. - StaticTop = IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr"); - - if (NeedDynamicTop) - IRB.CreateStore(StaticTop, DynamicTop); - - // Restore current stack pointer after longjmp/exception catch. - for (Instruction *I : StackRestorePoints) { - ++NumUnsafeStackRestorePoints; - - IRB.SetInsertPoint(I->getNextNode()); - Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop; - IRB.CreateStore(CurrentTop, UnsafeStackPtr); - } - - return DynamicTop; -} - -Value *SafeStack::moveStaticAllocasToUnsafeStack( - IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas, - ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns) { - if (StaticAllocas.empty() && ByValArguments.empty()) - return nullptr; - - DIBuilder DIB(*F.getParent()); - - // We explicitly compute and set the unsafe stack layout for all unsafe - // static alloca instructions. We save the unsafe "base pointer" in the - // prologue into a local variable and restore it in the epilogue. - - // Load the current stack pointer (we'll also use it as a base pointer). - // FIXME: use a dedicated register for it ? - Instruction *BasePointer = - IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr"); - assert(BasePointer->getType() == StackPtrTy); - - for (ReturnInst *RI : Returns) { - IRB.SetInsertPoint(RI); - IRB.CreateStore(BasePointer, UnsafeStackPtr); - } - - // Compute maximum alignment among static objects on the unsafe stack. - unsigned MaxAlignment = 0; - for (Argument *Arg : ByValArguments) { - Type *Ty = Arg->getType()->getPointerElementType(); - unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), - Arg->getParamAlignment()); - if (Align > MaxAlignment) - MaxAlignment = Align; - } - for (AllocaInst *AI : StaticAllocas) { - Type *Ty = AI->getAllocatedType(); - unsigned Align = - std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()); - if (Align > MaxAlignment) - MaxAlignment = Align; - } - - if (MaxAlignment > StackAlignment) { - // Re-align the base pointer according to the max requested alignment. - assert(isPowerOf2_32(MaxAlignment)); - IRB.SetInsertPoint(BasePointer->getNextNode()); - BasePointer = cast<Instruction>(IRB.CreateIntToPtr( - IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy), - ConstantInt::get(IntPtrTy, ~uint64_t(MaxAlignment - 1))), - StackPtrTy)); - } - - int64_t StaticOffset = 0; // Current stack top. - IRB.SetInsertPoint(BasePointer->getNextNode()); - - for (Argument *Arg : ByValArguments) { - Type *Ty = Arg->getType()->getPointerElementType(); - - uint64_t Size = DL->getTypeStoreSize(Ty); - if (Size == 0) - Size = 1; // Don't create zero-sized stack objects. - - // Ensure the object is properly aligned. - unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), - Arg->getParamAlignment()); - - // Add alignment. - // NOTE: we ensure that BasePointer itself is aligned to >= Align. - StaticOffset += Size; - StaticOffset = RoundUpToAlignment(StaticOffset, Align); - - Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8* - ConstantInt::get(Int32Ty, -StaticOffset)); - Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(), - Arg->getName() + ".unsafe-byval"); - - // Replace alloc with the new location. - replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, - /*Deref=*/true, -StaticOffset); - Arg->replaceAllUsesWith(NewArg); - IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); - IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); - } - - // Allocate space for every unsafe static AllocaInst on the unsafe stack. - for (AllocaInst *AI : StaticAllocas) { - IRB.SetInsertPoint(AI); - - Type *Ty = AI->getAllocatedType(); - uint64_t Size = getStaticAllocaAllocationSize(AI); - if (Size == 0) - Size = 1; // Don't create zero-sized stack objects. - - // Ensure the object is properly aligned. - unsigned Align = - std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()); - - // Add alignment. - // NOTE: we ensure that BasePointer itself is aligned to >= Align. - StaticOffset += Size; - StaticOffset = RoundUpToAlignment(StaticOffset, Align); - - Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8* - ConstantInt::get(Int32Ty, -StaticOffset)); - Value *NewAI = IRB.CreateBitCast(Off, AI->getType(), AI->getName()); - if (AI->hasName() && isa<Instruction>(NewAI)) - cast<Instruction>(NewAI)->takeName(AI); - - // Replace alloc with the new location. - replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -StaticOffset); - AI->replaceAllUsesWith(NewAI); - AI->eraseFromParent(); - } - - // Re-align BasePointer so that our callees would see it aligned as - // expected. - // FIXME: no need to update BasePointer in leaf functions. - StaticOffset = RoundUpToAlignment(StaticOffset, StackAlignment); - - // Update shadow stack pointer in the function epilogue. - IRB.SetInsertPoint(BasePointer->getNextNode()); - - Value *StaticTop = - IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -StaticOffset), - "unsafe_stack_static_top"); - IRB.CreateStore(StaticTop, UnsafeStackPtr); - return StaticTop; -} - -void SafeStack::moveDynamicAllocasToUnsafeStack( - Function &F, Value *UnsafeStackPtr, AllocaInst *DynamicTop, - ArrayRef<AllocaInst *> DynamicAllocas) { - DIBuilder DIB(*F.getParent()); - - for (AllocaInst *AI : DynamicAllocas) { - IRBuilder<> IRB(AI); - - // Compute the new SP value (after AI). - Value *ArraySize = AI->getArraySize(); - if (ArraySize->getType() != IntPtrTy) - ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false); - - Type *Ty = AI->getAllocatedType(); - uint64_t TySize = DL->getTypeAllocSize(Ty); - Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize)); - - Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy); - SP = IRB.CreateSub(SP, Size); - - // Align the SP value to satisfy the AllocaInst, type and stack alignments. - unsigned Align = std::max( - std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()), - (unsigned)StackAlignment); - - assert(isPowerOf2_32(Align)); - Value *NewTop = IRB.CreateIntToPtr( - IRB.CreateAnd(SP, ConstantInt::get(IntPtrTy, ~uint64_t(Align - 1))), - StackPtrTy); - - // Save the stack pointer. - IRB.CreateStore(NewTop, UnsafeStackPtr); - if (DynamicTop) - IRB.CreateStore(NewTop, DynamicTop); - - Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType()); - if (AI->hasName() && isa<Instruction>(NewAI)) - NewAI->takeName(AI); - - replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true); - AI->replaceAllUsesWith(NewAI); - AI->eraseFromParent(); - } - - if (!DynamicAllocas.empty()) { - // Now go through the instructions again, replacing stacksave/stackrestore. - for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) { - Instruction *I = &*(It++); - auto II = dyn_cast<IntrinsicInst>(I); - if (!II) - continue; - - if (II->getIntrinsicID() == Intrinsic::stacksave) { - IRBuilder<> IRB(II); - Instruction *LI = IRB.CreateLoad(UnsafeStackPtr); - LI->takeName(II); - II->replaceAllUsesWith(LI); - II->eraseFromParent(); - } else if (II->getIntrinsicID() == Intrinsic::stackrestore) { - IRBuilder<> IRB(II); - Instruction *SI = IRB.CreateStore(II->getArgOperand(0), UnsafeStackPtr); - SI->takeName(II); - assert(II->use_empty()); - II->eraseFromParent(); - } - } - } -} - -bool SafeStack::runOnFunction(Function &F) { - DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); - - if (!F.hasFnAttribute(Attribute::SafeStack)) { - DEBUG(dbgs() << "[SafeStack] safestack is not requested" - " for this function\n"); - return false; - } - - if (F.isDeclaration()) { - DEBUG(dbgs() << "[SafeStack] function definition" - " is not available\n"); - return false; - } - - TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr; - SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - - { - // Make sure the regular stack protector won't run on this function - // (safestack attribute takes precedence). - AttrBuilder B; - B.addAttribute(Attribute::StackProtect) - .addAttribute(Attribute::StackProtectReq) - .addAttribute(Attribute::StackProtectStrong); - F.removeAttributes( - AttributeSet::FunctionIndex, - AttributeSet::get(F.getContext(), AttributeSet::FunctionIndex, B)); - } - - ++NumFunctions; - - SmallVector<AllocaInst *, 16> StaticAllocas; - SmallVector<AllocaInst *, 4> DynamicAllocas; - SmallVector<Argument *, 4> ByValArguments; - SmallVector<ReturnInst *, 4> Returns; - - // Collect all points where stack gets unwound and needs to be restored - // This is only necessary because the runtime (setjmp and unwind code) is - // not aware of the unsafe stack and won't unwind/restore it prorerly. - // To work around this problem without changing the runtime, we insert - // instrumentation to restore the unsafe stack pointer when necessary. - SmallVector<Instruction *, 4> StackRestorePoints; - - // Find all static and dynamic alloca instructions that must be moved to the - // unsafe stack, all return instructions and stack restore points. - findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns, - StackRestorePoints); - - if (StaticAllocas.empty() && DynamicAllocas.empty() && - ByValArguments.empty() && StackRestorePoints.empty()) - return false; // Nothing to do in this function. - - if (!StaticAllocas.empty() || !DynamicAllocas.empty() || - !ByValArguments.empty()) - ++NumUnsafeStackFunctions; // This function has the unsafe stack. - - if (!StackRestorePoints.empty()) - ++NumUnsafeStackRestorePointsFunctions; - - IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); - UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F); - - // The top of the unsafe stack after all unsafe static allocas are allocated. - Value *StaticTop = moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, - ByValArguments, Returns); - - // Safe stack object that stores the current unsafe stack top. It is updated - // as unsafe dynamic (non-constant-sized) allocas are allocated and freed. - // This is only needed if we need to restore stack pointer after longjmp - // or exceptions, and we have dynamic allocations. - // FIXME: a better alternative might be to store the unsafe stack pointer - // before setjmp / invoke instructions. - AllocaInst *DynamicTop = createStackRestorePoints( - IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty()); - - // Handle dynamic allocas. - moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop, - DynamicAllocas); - - DEBUG(dbgs() << "[SafeStack] safestack applied\n"); - return true; -} - -} // anonymous namespace - -char SafeStack::ID = 0; -INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack", - "Safe Stack instrumentation pass", false, false) -INITIALIZE_TM_PASS_END(SafeStack, "safe-stack", - "Safe Stack instrumentation pass", false, false) - -FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) { - return new SafeStack(TM); -} diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 09de7a2cda2b4..7d404473655de 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -28,13 +28,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" @@ -45,6 +47,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -53,22 +56,28 @@ using namespace llvm; #define DEBUG_TYPE "sancov" -static const char *const kSanCovModuleInitName = "__sanitizer_cov_module_init"; -static const char *const kSanCovName = "__sanitizer_cov"; -static const char *const kSanCovWithCheckName = "__sanitizer_cov_with_check"; -static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16"; -static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter"; -static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block"; -static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp"; -static const char *const kSanCovTraceSwitch = "__sanitizer_cov_trace_switch"; -static const char *const kSanCovModuleCtorName = "sancov.module_ctor"; -static const uint64_t kSanCtorAndDtorPriority = 2; - -static cl::opt<int> ClCoverageLevel("sanitizer-coverage-level", - cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, " - "3: all blocks and critical edges, " - "4: above plus indirect calls"), - cl::Hidden, cl::init(0)); +static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init"; +static const char *const SanCovName = "__sanitizer_cov"; +static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check"; +static const char *const SanCovIndirCallName = "__sanitizer_cov_indir_call16"; +static const char *const SanCovTracePCIndirName = + "__sanitizer_cov_trace_pc_indir"; +static const char *const SanCovTraceEnterName = + "__sanitizer_cov_trace_func_enter"; +static const char *const SanCovTraceBBName = + "__sanitizer_cov_trace_basic_block"; +static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; +static const char *const SanCovTraceCmpName = "__sanitizer_cov_trace_cmp"; +static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch"; +static const char *const SanCovModuleCtorName = "sancov.module_ctor"; +static const uint64_t SanCtorAndDtorPriority = 2; + +static cl::opt<int> ClCoverageLevel( + "sanitizer-coverage-level", + cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, " + "3: all blocks and critical edges, " + "4: above plus indirect calls"), + cl::Hidden, cl::init(0)); static cl::opt<unsigned> ClCoverageBlockThreshold( "sanitizer-coverage-block-threshold", @@ -82,12 +91,21 @@ static cl::opt<bool> "callbacks at every basic block"), cl::Hidden, cl::init(false)); +static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc", + cl::desc("Experimental pc tracing"), + cl::Hidden, cl::init(false)); + static cl::opt<bool> ClExperimentalCMPTracing("sanitizer-coverage-experimental-trace-compares", cl::desc("Experimental tracing of CMP and similar " "instructions"), cl::Hidden, cl::init(false)); +static cl::opt<bool> + ClPruneBlocks("sanitizer-coverage-prune-blocks", + cl::desc("Reduce the number of instrumented blocks"), + cl::Hidden, cl::init(true)); + // Experimental 8-bit counters used as an additional search heuristic during // coverage-guided fuzzing. // The counters are not thread-friendly: @@ -131,22 +149,28 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { Options.TraceBB |= ClExperimentalTracing; Options.TraceCmp |= ClExperimentalCMPTracing; Options.Use8bitCounters |= ClUse8bitCounters; + Options.TracePC |= ClExperimentalTracePC; return Options; } class SanitizerCoverageModule : public ModulePass { - public: +public: SanitizerCoverageModule( const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) - : ModulePass(ID), Options(OverrideFromCL(Options)) {} + : ModulePass(ID), Options(OverrideFromCL(Options)) { + initializeSanitizerCoverageModulePass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M) override; bool runOnFunction(Function &F); - static char ID; // Pass identification, replacement for typeid - const char *getPassName() const override { - return "SanitizerCoverageModule"; + static char ID; // Pass identification, replacement for typeid + const char *getPassName() const override { return "SanitizerCoverageModule"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTreeWrapperPass>(); } - private: +private: void InjectCoverageForIndirectCalls(Function &F, ArrayRef<Instruction *> IndirCalls); void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets); @@ -162,8 +186,8 @@ class SanitizerCoverageModule : public ModulePass { } Function *SanCovFunction; Function *SanCovWithCheckFunction; - Function *SanCovIndirCallFunction; - Function *SanCovTraceEnter, *SanCovTraceBB; + Function *SanCovIndirCallFunction, *SanCovTracePCIndir; + Function *SanCovTraceEnter, *SanCovTraceBB, *SanCovTracePC; Function *SanCovTraceCmpFunction; Function *SanCovTraceSwitchFunction; InlineAsm *EmptyAsm; @@ -178,7 +202,7 @@ class SanitizerCoverageModule : public ModulePass { SanitizerCoverageOptions Options; }; -} // namespace +} // namespace bool SanitizerCoverageModule::runOnModule(Module &M) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) @@ -195,28 +219,32 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { Int64Ty = IRB.getInt64Ty(); SanCovFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kSanCovName, VoidTy, Int32PtrTy, nullptr)); + M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy, nullptr)); SanCovWithCheckFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr)); + M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy, nullptr)); + SanCovTracePCIndir = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy, nullptr)); SanCovIndirCallFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); + SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); SanCovTraceCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); + SanCovTraceCmpName, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); SanCovTraceSwitchFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kSanCovTraceSwitch, VoidTy, Int64Ty, Int64PtrTy, nullptr)); + SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy, nullptr)); // We insert an empty inline asm after cov callbacks to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), StringRef(""), StringRef(""), /*hasSideEffects=*/true); + SanCovTracePC = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(SanCovTracePCName, VoidTy, nullptr)); SanCovTraceEnter = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr)); + M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy, nullptr)); SanCovTraceBB = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr)); + M.getOrInsertFunction(SanCovTraceBBName, VoidTy, Int32PtrTy, nullptr)); // At this point we create a dummy array of guards because we don't // know how many elements we will need. @@ -243,7 +271,6 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage, Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov"); - // Replace the dummy array with the real one. GuardArray->replaceAllUsesWith( IRB.CreatePointerCast(RealGuardArray, Int32PtrTy)); @@ -252,13 +279,12 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { GlobalVariable *RealEightBitCounterArray; if (Options.Use8bitCounters) { // Make sure the array is 16-aligned. - static const int kCounterAlignment = 16; - Type *Int8ArrayNTy = - ArrayType::get(Int8Ty, RoundUpToAlignment(N, kCounterAlignment)); + static const int CounterAlignment = 16; + Type *Int8ArrayNTy = ArrayType::get(Int8Ty, alignTo(N, CounterAlignment)); RealEightBitCounterArray = new GlobalVariable( M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage, Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter"); - RealEightBitCounterArray->setAlignment(kCounterAlignment); + RealEightBitCounterArray->setAlignment(CounterAlignment); EightBitCounterArray->replaceAllUsesWith( IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)); EightBitCounterArray->eraseFromParent(); @@ -271,26 +297,64 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { new GlobalVariable(M, ModNameStrConst->getType(), true, GlobalValue::PrivateLinkage, ModNameStrConst); - Function *CtorFunc; - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, kSanCovModuleCtorName, kSanCovModuleInitName, - {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy}, - {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), - ConstantInt::get(IntptrTy, N), - Options.Use8bitCounters - ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy) - : Constant::getNullValue(Int8PtrTy), - IRB.CreatePointerCast(ModuleName, Int8PtrTy)}); + if (!Options.TracePC) { + Function *CtorFunc; + std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( + M, SanCovModuleCtorName, SanCovModuleInitName, + {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy}, + {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), + ConstantInt::get(IntptrTy, N), + Options.Use8bitCounters + ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy) + : Constant::getNullValue(Int8PtrTy), + IRB.CreatePointerCast(ModuleName, Int8PtrTy)}); + + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); + } + + return true; +} + +// True if block has successors and it dominates all of them. +static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) { + if (succ_begin(BB) == succ_end(BB)) + return false; + + for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) { + if (!DT->dominates(BB, SUCC)) + return false; + } + + return true; +} - appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority); +// True if block has predecessors and it postdominates all of them. +static bool isFullPostDominator(const BasicBlock *BB, + const PostDominatorTree *PDT) { + if (pred_begin(BB) == pred_end(BB)) + return false; + + for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) { + if (!PDT->dominates(BB, PRED)) + return false; + } return true; } +static bool shouldInstrumentBlock(const Function& F, const BasicBlock *BB, const DominatorTree *DT, + const PostDominatorTree *PDT) { + if (!ClPruneBlocks || &F.getEntryBlock() == BB) + return true; + + return !(isFullDominator(BB, DT) || isFullPostDominator(BB, PDT)); +} + bool SanitizerCoverageModule::runOnFunction(Function &F) { - if (F.empty()) return false; + if (F.empty()) + return false; if (F.getName().find(".module_ctor") != std::string::npos) - return false; // Should not instrument sanitizer init functions. + return false; // Should not instrument sanitizer init functions. // Don't instrument functions using SEH for now. Splitting basic blocks like // we do for coverage breaks WinEHPrepare. // FIXME: Remove this when SEH no longer uses landingpad pattern matching. @@ -299,12 +363,19 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { return false; if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) SplitAllCriticalEdges(F); - SmallVector<Instruction*, 8> IndirCalls; - SmallVector<BasicBlock*, 16> AllBlocks; - SmallVector<Instruction*, 8> CmpTraceTargets; - SmallVector<Instruction*, 8> SwitchTraceTargets; + SmallVector<Instruction *, 8> IndirCalls; + SmallVector<BasicBlock *, 16> BlocksToInstrument; + SmallVector<Instruction *, 8> CmpTraceTargets; + SmallVector<Instruction *, 8> SwitchTraceTargets; + + const DominatorTree *DT = + &getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); + const PostDominatorTree *PDT = + &getAnalysis<PostDominatorTreeWrapperPass>(F).getPostDomTree(); + for (auto &BB : F) { - AllBlocks.push_back(&BB); + if (shouldInstrumentBlock(F, &BB, DT, PDT)) + BlocksToInstrument.push_back(&BB); for (auto &Inst : BB) { if (Options.IndirectCalls) { CallSite CS(&Inst); @@ -319,7 +390,8 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { } } } - InjectCoverage(F, AllBlocks); + + InjectCoverage(F, BlocksToInstrument); InjectCoverageForIndirectCalls(F, IndirCalls); InjectTraceForCmp(F, CmpTraceTargets); InjectTraceForSwitch(F, SwitchTraceTargets); @@ -346,28 +418,34 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F, // On every indirect call we call a run-time function // __sanitizer_cov_indir_call* with two parameters: // - callee address, -// - global cache array that contains kCacheSize pointers (zero-initialized). +// - global cache array that contains CacheSize pointers (zero-initialized). // The cache is used to speed up recording the caller-callee pairs. // The address of the caller is passed implicitly via caller PC. -// kCacheSize is encoded in the name of the run-time function. +// CacheSize is encoded in the name of the run-time function. void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef<Instruction *> IndirCalls) { - if (IndirCalls.empty()) return; - const int kCacheSize = 16; - const int kCacheAlignment = 64; // Align for better performance. - Type *Ty = ArrayType::get(IntptrTy, kCacheSize); + if (IndirCalls.empty()) + return; + const int CacheSize = 16; + const int CacheAlignment = 64; // Align for better performance. + Type *Ty = ArrayType::get(IntptrTy, CacheSize); for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallSite CS(I); Value *Callee = CS.getCalledValue(); - if (isa<InlineAsm>(Callee)) continue; + if (isa<InlineAsm>(Callee)) + continue; GlobalVariable *CalleeCache = new GlobalVariable( *F.getParent(), Ty, false, GlobalValue::PrivateLinkage, Constant::getNullValue(Ty), "__sancov_gen_callee_cache"); - CalleeCache->setAlignment(kCacheAlignment); - IRB.CreateCall(SanCovIndirCallFunction, - {IRB.CreatePointerCast(Callee, IntptrTy), - IRB.CreatePointerCast(CalleeCache, IntptrTy)}); + CalleeCache->setAlignment(CacheAlignment); + if (Options.TracePC) + IRB.CreateCall(SanCovTracePCIndir, + IRB.CreatePointerCast(Callee, IntptrTy)); + else + IRB.CreateCall(SanCovIndirCallFunction, + {IRB.CreatePointerCast(Callee, IntptrTy), + IRB.CreatePointerCast(CalleeCache, IntptrTy)}); } } @@ -376,7 +454,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( // {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... }) void SanitizerCoverageModule::InjectTraceForSwitch( - Function &F, ArrayRef<Instruction *> SwitchTraceTargets) { + Function &, ArrayRef<Instruction *> SwitchTraceTargets) { for (auto I : SwitchTraceTargets) { if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { IRBuilder<> IRB(I); @@ -391,7 +469,7 @@ void SanitizerCoverageModule::InjectTraceForSwitch( if (Cond->getType()->getScalarSizeInBits() < Int64Ty->getScalarSizeInBits()) Cond = IRB.CreateIntCast(Cond, Int64Ty, false); - for (auto It: SI->cases()) { + for (auto It : SI->cases()) { Constant *C = It.getCaseValue(); if (C->getType()->getScalarSizeInBits() < Int64Ty->getScalarSizeInBits()) @@ -409,15 +487,15 @@ void SanitizerCoverageModule::InjectTraceForSwitch( } } - void SanitizerCoverageModule::InjectTraceForCmp( - Function &F, ArrayRef<Instruction *> CmpTraceTargets) { + Function &, ArrayRef<Instruction *> CmpTraceTargets) { for (auto I : CmpTraceTargets) { if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) { IRBuilder<> IRB(ICMP); Value *A0 = ICMP->getOperand(0); Value *A1 = ICMP->getOperand(1); - if (!A0->getType()->isIntegerTy()) continue; + if (!A0->getType()->isIntegerTy()) + continue; uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType()); // __sanitizer_cov_trace_cmp((type_size << 32) | predicate, A0, A1); IRB.CreateCall( @@ -430,8 +508,8 @@ void SanitizerCoverageModule::InjectTraceForCmp( } void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) { - I->setMetadata( - I->getModule()->getMDKindID("nosanitize"), MDNode::get(*C, None)); + I->setMetadata(I->getModule()->getMDKindID("nosanitize"), + MDNode::get(*C, None)); } void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, @@ -448,7 +526,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; if (IsEntryBB) { - if (auto SP = getDISubprogram(&F)) + if (auto SP = F.getSubprogram()) EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); // Keep static allocas and llvm.localescape calls in the entry block. Even // if we aren't splitting the block, it's nice for allocas to be before @@ -465,16 +543,20 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); - if (Options.TraceBB) { + if (Options.TracePC) { + IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC. + IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. + } else if (Options.TraceBB) { IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); } else if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); - Load->setAtomic(Monotonic); + Load->setAtomic(AtomicOrdering::Monotonic); Load->setAlignment(4); SetNoSanitizeMetadata(Load); - Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); + Value *Cmp = + IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); @@ -499,9 +581,16 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } char SanitizerCoverageModule::ID = 0; -INITIALIZE_PASS(SanitizerCoverageModule, "sancov", - "SanitizerCoverage: TODO." - "ModulePass", false, false) +INITIALIZE_PASS_BEGIN(SanitizerCoverageModule, "sancov", + "SanitizerCoverage: TODO." + "ModulePass", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_END(SanitizerCoverageModule, "sancov", + "SanitizerCoverage: TODO." + "ModulePass", + false, false) ModulePass *llvm::createSanitizerCoverageModulePass( const SanitizerCoverageOptions &Options) { return new SanitizerCoverageModule(Options); diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 9331e1d2b3fdd..dcb62d3ed1b51 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -36,11 +37,13 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -81,6 +84,7 @@ namespace { struct ThreadSanitizer : public FunctionPass { ThreadSanitizer() : FunctionPass(ID) {} const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnFunction(Function &F) override; bool doInitialization(Module &M) override; static char ID; // Pass identification, replacement for typeid. @@ -121,7 +125,13 @@ struct ThreadSanitizer : public FunctionPass { } // namespace char ThreadSanitizer::ID = 0; -INITIALIZE_PASS(ThreadSanitizer, "tsan", +INITIALIZE_PASS_BEGIN( + ThreadSanitizer, "tsan", + "ThreadSanitizer: detects data races.", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END( + ThreadSanitizer, "tsan", "ThreadSanitizer: detects data races.", false, false) @@ -129,6 +139,10 @@ const char *ThreadSanitizer::getPassName() const { return "ThreadSanitizer"; } +void ThreadSanitizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfoWrapperPass>(); +} + FunctionPass *llvm::createThreadSanitizerPass() { return new ThreadSanitizer(); } @@ -243,6 +257,37 @@ static bool isVtableAccess(Instruction *I) { return false; } +// Do not instrument known races/"benign races" that come from compiler +// instrumentatin. The user has no way of suppressing them. +static bool shouldInstrumentReadWriteFromAddress(Value *Addr) { + // Peel off GEPs and BitCasts. + Addr = Addr->stripInBoundsOffsets(); + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { + if (GV->hasSection()) { + StringRef SectionName = GV->getSection(); + // Check if the global is in the PGO counters section. + if (SectionName.endswith(getInstrProfCountersSectionName( + /*AddSegment=*/false))) + return false; + } + + // Check if the global is in a GCOV counter array. + if (GV->getName().startswith("__llvm_gcov_ctr")) + return false; + } + + // Do not instrument acesses from different address spaces; we cannot deal + // with them. + if (Addr) { + Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0) + return false; + } + + return true; +} + bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { // If this is a GEP, just analyze its pointer operand. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) @@ -281,14 +326,17 @@ void ThreadSanitizer::chooseInstructionsToInstrument( const DataLayout &DL) { SmallSet<Value*, 8> WriteTargets; // Iterate from the end. - for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(), - E = Local.rend(); It != E; ++It) { - Instruction *I = *It; + for (Instruction *I : reverse(Local)) { if (StoreInst *Store = dyn_cast<StoreInst>(I)) { - WriteTargets.insert(Store->getPointerOperand()); + Value *Addr = Store->getPointerOperand(); + if (!shouldInstrumentReadWriteFromAddress(Addr)) + continue; + WriteTargets.insert(Addr); } else { LoadInst *Load = cast<LoadInst>(I); Value *Addr = Load->getPointerOperand(); + if (!shouldInstrumentReadWriteFromAddress(Addr)) + continue; if (WriteTargets.count(Addr)) { // We will write to this temp, so no reason to analyze the read. NumOmittedReadsBeforeWrite++; @@ -344,6 +392,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) { bool HasCalls = false; bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread); const DataLayout &DL = F.getParent()->getDataLayout(); + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); // Traverse all instructions, collect loads/stores/returns, check for calls. for (auto &BB : F) { @@ -355,6 +405,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) { else if (isa<ReturnInst>(Inst)) RetVec.push_back(&Inst); else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) { + if (CallInst *CI = dyn_cast<CallInst>(&Inst)) + maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); if (isa<MemIntrinsic>(Inst)) MemIntrinCalls.push_back(&Inst); HasCalls = true; @@ -456,14 +508,16 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { uint32_t v = 0; switch (ord) { - case NotAtomic: llvm_unreachable("unexpected atomic ordering!"); - case Unordered: // Fall-through. - case Monotonic: v = 0; break; - // case Consume: v = 1; break; // Not specified yet. - case Acquire: v = 2; break; - case Release: v = 3; break; - case AcquireRelease: v = 4; break; - case SequentiallyConsistent: v = 5; break; + case AtomicOrdering::NotAtomic: + llvm_unreachable("unexpected atomic ordering!"); + case AtomicOrdering::Unordered: // Fall-through. + case AtomicOrdering::Monotonic: v = 0; break; + // Not specified yet: + // case AtomicOrdering::Consume: v = 1; break; + case AtomicOrdering::Acquire: v = 2; break; + case AtomicOrdering::Release: v = 3; break; + case AtomicOrdering::AcquireRelease: v = 4; break; + case AtomicOrdering::SequentiallyConsistent: v = 5; break; } return IRB->getInt32(v); } @@ -496,6 +550,11 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { return false; } +static Value *createIntOrPtrToIntCast(Value *V, Type* Ty, IRBuilder<> &IRB) { + return isa<PointerType>(V->getType()) ? + IRB.CreatePtrToInt(V, Ty) : IRB.CreateIntCast(V, Ty, false); +} + // Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x // standards. For background see C++11 standard. A slightly older, publicly // available draft of the standard (not entirely up-to-date, but close enough @@ -517,9 +576,16 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), createOrdering(&IRB, LI->getOrdering())}; - CallInst *C = CallInst::Create(TsanAtomicLoad[Idx], Args); - ReplaceInstWithInst(I, C); - + Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); + if (Ty == OrigTy) { + Instruction *C = CallInst::Create(TsanAtomicLoad[Idx], Args); + ReplaceInstWithInst(I, C); + } else { + // We are loading a pointer, so we need to cast the return value. + Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args); + Instruction *Cast = CastInst::Create(Instruction::IntToPtr, C, OrigTy); + ReplaceInstWithInst(I, Cast); + } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { Value *Addr = SI->getPointerOperand(); int Idx = getMemoryAccessFuncIndex(Addr, DL); @@ -530,7 +596,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), - IRB.CreateIntCast(SI->getValueOperand(), Ty, false), + createIntOrPtrToIntCast(SI->getValueOperand(), Ty, IRB), createOrdering(&IRB, SI->getOrdering())}; CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args); ReplaceInstWithInst(I, C); @@ -560,15 +626,26 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); + Value *CmpOperand = + createIntOrPtrToIntCast(CASI->getCompareOperand(), Ty, IRB); + Value *NewOperand = + createIntOrPtrToIntCast(CASI->getNewValOperand(), Ty, IRB); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), - IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false), - IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false), + CmpOperand, + NewOperand, createOrdering(&IRB, CASI->getSuccessOrdering()), createOrdering(&IRB, CASI->getFailureOrdering())}; CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args); - Value *Success = IRB.CreateICmpEQ(C, CASI->getCompareOperand()); + Value *Success = IRB.CreateICmpEQ(C, CmpOperand); + Value *OldVal = C; + Type *OrigOldValTy = CASI->getNewValOperand()->getType(); + if (Ty != OrigOldValTy) { + // The value is a pointer, so we need to cast the return value. + OldVal = IRB.CreateIntToPtr(C, OrigOldValTy); + } - Value *Res = IRB.CreateInsertValue(UndefValue::get(CASI->getType()), C, 0); + Value *Res = + IRB.CreateInsertValue(UndefValue::get(CASI->getType()), OldVal, 0); Res = IRB.CreateInsertValue(Res, Success, 1); I->replaceAllUsesWith(Res); |