diff options
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation')
21 files changed, 2387 insertions, 1205 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 79c119489a655..ee09a4d9db7e1 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -10,6 +10,8 @@ // Details of the algorithm: // https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm // +// FIXME: This sanitizer does not yet handle scalable vectors +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" @@ -30,7 +32,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -70,6 +71,7 @@ #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -213,6 +215,11 @@ static cl::opt<bool> ClInstrumentAtomics( cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); +static cl::opt<bool> + ClInstrumentByval("asan-instrument-byval", + cl::desc("instrument byval call arguments"), cl::Hidden, + cl::init(true)); + static cl::opt<bool> ClAlwaysSlowPath( "asan-always-slow-path", cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden, @@ -532,7 +539,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, return Mapping; } -static size_t RedzoneSizeForScale(int MappingScale) { +static uint64_t getRedzoneSizeForScale(int MappingScale) { // Redzone used for stack and globals is at least 32 bytes. // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. return std::max(32U, 1U << MappingScale); @@ -584,11 +591,10 @@ struct AddressSanitizer { AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false) - : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(*GlobalsMD) { - this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover; - this->CompileKernel = - ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel; - + : CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan + : CompileKernel), + Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover), + UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(*GlobalsMD) { C = &(M.getContext()); LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); @@ -613,16 +619,13 @@ struct AddressSanitizer { /// Check if we want (and can) handle this alloca. bool isInterestingAlloca(const AllocaInst &AI); - /// If it is an interesting memory access, return the PointerOperand - /// and set IsWrite/Alignment. Otherwise return nullptr. - /// MaybeMask is an output parameter for the mask Value, if we're looking at a - /// masked load/store. - Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, - uint64_t *TypeSize, unsigned *Alignment, - Value **MaybeMask = nullptr); + bool ignoreAccess(Value *Ptr); + void getInterestingMemoryOperands( + Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting); - void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I, - bool UseCalls, const DataLayout &DL); + void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, + InterestingMemoryOperand &O, bool UseCalls, + const DataLayout &DL); void instrumentPointerComparisonOrSubtraction(Instruction *I); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, @@ -639,9 +642,10 @@ struct AddressSanitizer { Value *SizeArgument, uint32_t Exp); void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); + bool suppressInstrumentationSiteForDebug(int &Instrumented); bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI); bool maybeInsertAsanInitAtFunctionEntry(Function &F); - void maybeInsertDynamicShadowAtFunctionEntry(Function &F); + bool maybeInsertDynamicShadowAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); private: @@ -691,7 +695,6 @@ private: FunctionCallee AsanMemoryAccessCallbackSized[2][2]; FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset; - InlineAsm *EmptyAsm; Value *LocalDynamicShadow = nullptr; const GlobalsMetadata &GlobalsMD; DenseMap<const AllocaInst *, bool> ProcessedAllocas; @@ -739,7 +742,11 @@ public: ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true, bool UseOdrIndicator = false) - : GlobalsMD(*GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), + : GlobalsMD(*GlobalsMD), + CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan + : CompileKernel), + Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover), + UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC && !this->CompileKernel), // Enable aliases as they should have no downside with ODR indicators. UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias), UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator), @@ -750,11 +757,7 @@ public: // argument is designed as workaround. Therefore, disable both // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to // do globals-gc. - UseCtorComdat(UseGlobalsGC && ClWithComdat) { - this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover; - this->CompileKernel = - ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel; - + UseCtorComdat(UseGlobalsGC && ClWithComdat && !this->CompileKernel) { C = &(M.getContext()); int LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); @@ -787,16 +790,18 @@ private: StringRef OriginalName); void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix); - IRBuilder<> CreateAsanModuleDtor(Module &M); + Instruction *CreateAsanModuleDtor(Module &M); - bool ShouldInstrumentGlobal(GlobalVariable *G); + bool canInstrumentAliasedGlobal(const GlobalAlias &GA) const; + bool shouldInstrumentGlobal(GlobalVariable *G) const; bool ShouldUseMachOGlobalsSection() const; StringRef getGlobalMetadataSection() const; void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName); void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); - size_t MinRedzoneSizeForGlobal() const { - return RedzoneSizeForScale(Mapping.Scale); + uint64_t getMinRedzoneSizeForGlobal() const { + return getRedzoneSizeForScale(Mapping.Scale); } + uint64_t getRedzoneSizeForGlobal(uint64_t SizeInBytes) const; int GetAsanVersion(const Module &M) const; const GlobalsMetadata &GlobalsMD; @@ -907,16 +912,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>; AllocaForValueMapTy AllocaForValue; - bool HasNonEmptyInlineAsm = false; + bool HasInlineAsm = false; bool HasReturnsTwiceCall = false; - std::unique_ptr<CallInst> EmptyInlineAsm; FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false), C(ASan.C), IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), - StackAlignment(1 << Mapping.Scale), - EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {} + StackAlignment(1 << Mapping.Scale) {} bool runOnFunction() { if (!ClStack) return false; @@ -1076,12 +1079,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { DynamicAllocaPoisonCallVec.push_back(APC); } - void visitCallSite(CallSite CS) { - Instruction *I = CS.getInstruction(); - if (CallInst *CI = dyn_cast<CallInst>(I)) { - HasNonEmptyInlineAsm |= CI->isInlineAsm() && - !CI->isIdenticalTo(EmptyInlineAsm.get()) && - I != ASan.LocalDynamicShadow; + void visitCallBase(CallBase &CB) { + if (CallInst *CI = dyn_cast<CallInst>(&CB)) { + HasInlineAsm |= CI->isInlineAsm() && &CB != ASan.LocalDynamicShadow; HasReturnsTwiceCall |= CI->canReturnTwice(); } } @@ -1147,9 +1147,9 @@ GlobalsMetadata::GlobalsMetadata(Module &M) { E.Name = Name->getString(); ConstantInt *IsDynInit = mdconst::extract<ConstantInt>(MDN->getOperand(3)); E.IsDynInit |= IsDynInit->isOne(); - ConstantInt *IsBlacklisted = + ConstantInt *IsExcluded = mdconst::extract<ConstantInt>(MDN->getOperand(4)); - E.IsBlacklisted |= IsBlacklisted->isOne(); + E.IsExcluded |= IsExcluded->isOne(); } } @@ -1168,9 +1168,8 @@ AddressSanitizerPass::AddressSanitizerPass(bool CompileKernel, bool Recover, PreservedAnalyses AddressSanitizerPass::run(Function &F, AnalysisManager<Function> &AM) { auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); - auto &MAM = MAMProxy.getManager(); Module &M = *F.getParent(); - if (auto *R = MAM.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) { + if (auto *R = MAMProxy.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) { const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F); AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope); if (Sanitizer.instrumentFunction(F, TLI)) @@ -1341,98 +1340,90 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { return IsInteresting; } -Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, - bool *IsWrite, - uint64_t *TypeSize, - unsigned *Alignment, - Value **MaybeMask) { +bool AddressSanitizer::ignoreAccess(Value *Ptr) { + // Do not instrument acesses from different address spaces; we cannot deal + // with them. + Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0) + return true; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (Ptr->isSwiftError()) + return true; + + // Treat memory accesses to promotable allocas as non-interesting since they + // will not cause memory violations. This greatly speeds up the instrumented + // executable at -O0. + if (auto AI = dyn_cast_or_null<AllocaInst>(Ptr)) + if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI)) + return true; + + return false; +} + +void AddressSanitizer::getInterestingMemoryOperands( + Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) { // Skip memory accesses inserted by another instrumentation. - if (I->hasMetadata("nosanitize")) return nullptr; + if (I->hasMetadata("nosanitize")) + return; // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) - return nullptr; + return; - Value *PtrOperand = nullptr; - const DataLayout &DL = I->getModule()->getDataLayout(); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (!ClInstrumentReads) return nullptr; - *IsWrite = false; - *TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); - *Alignment = LI->getAlignment(); - PtrOperand = LI->getPointerOperand(); + if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand())) + return; + Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, + LI->getType(), LI->getAlign()); } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!ClInstrumentWrites) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); - *Alignment = SI->getAlignment(); - PtrOperand = SI->getPointerOperand(); + if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand())) + return; + Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, + SI->getValueOperand()->getType(), SI->getAlign()); } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { - if (!ClInstrumentAtomics) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); - *Alignment = 0; - PtrOperand = RMW->getPointerOperand(); + if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand())) + return; + Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, + RMW->getValOperand()->getType(), None); } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { - if (!ClInstrumentAtomics) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); - *Alignment = 0; - PtrOperand = XCHG->getPointerOperand(); + if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand())) + return; + Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, + XCHG->getCompareOperand()->getType(), None); } else if (auto CI = dyn_cast<CallInst>(I)) { - auto *F = dyn_cast<Function>(CI->getCalledValue()); + auto *F = CI->getCalledFunction(); if (F && (F->getName().startswith("llvm.masked.load.") || F->getName().startswith("llvm.masked.store."))) { - unsigned OpOffset = 0; - if (F->getName().startswith("llvm.masked.store.")) { - if (!ClInstrumentWrites) - return nullptr; - // Masked store has an initial operand for the value. - OpOffset = 1; - *IsWrite = true; - } else { - if (!ClInstrumentReads) - return nullptr; - *IsWrite = false; - } - - auto BasePtr = CI->getOperand(0 + OpOffset); + bool IsWrite = F->getName().startswith("llvm.masked.store."); + // Masked store has an initial operand for the value. + unsigned OpOffset = IsWrite ? 1 : 0; + if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads) + return; + + auto BasePtr = CI->getOperand(OpOffset); + if (ignoreAccess(BasePtr)) + return; auto Ty = cast<PointerType>(BasePtr->getType())->getElementType(); - *TypeSize = DL.getTypeStoreSizeInBits(Ty); - if (auto AlignmentConstant = - dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset))) - *Alignment = (unsigned)AlignmentConstant->getZExtValue(); - else - *Alignment = 1; // No alignment guarantees. We probably got Undef - if (MaybeMask) - *MaybeMask = CI->getOperand(2 + OpOffset); - PtrOperand = BasePtr; + MaybeAlign Alignment = Align(1); + // Otherwise no alignment guarantees. We probably got Undef. + if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset))) + Alignment = Op->getMaybeAlignValue(); + Value *Mask = CI->getOperand(2 + OpOffset); + Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask); + } else { + for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) { + if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || + ignoreAccess(CI->getArgOperand(ArgNo))) + continue; + Type *Ty = CI->getParamByValType(ArgNo); + Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); + } } } - - if (PtrOperand) { - // Do not instrument acesses from different address spaces; we cannot deal - // with them. - Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType()); - if (PtrTy->getPointerAddressSpace() != 0) - return nullptr; - - // Ignore swifterror addresses. - // swifterror memory addresses are mem2reg promoted by instruction - // selection. As such they cannot have regular uses like an instrumentation - // function and it makes no sense to track them as memory. - if (PtrOperand->isSwiftError()) - return nullptr; - } - - // Treat memory accesses to promotable allocas as non-interesting since they - // will not cause memory violations. This greatly speeds up the instrumented - // executable at -O0. - if (ClSkipPromotableAllocas) - if (auto AI = dyn_cast_or_null<AllocaInst>(PtrOperand)) - return isInterestingAlloca(*AI) ? AI : nullptr; - - return PtrOperand; } static bool isPointerOperand(Value *V) { @@ -1491,7 +1482,7 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction( static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, Instruction *InsertBefore, Value *Addr, - unsigned Alignment, unsigned Granularity, + MaybeAlign Alignment, unsigned Granularity, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { @@ -1499,7 +1490,7 @@ static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, // if the data is properly aligned. if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || TypeSize == 128) && - (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) + (!Alignment || *Alignment >= Granularity || *Alignment >= TypeSize / 8)) return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite, nullptr, UseCalls, Exp); Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize, @@ -1509,13 +1500,14 @@ static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, Value *Mask, Instruction *I, - Value *Addr, unsigned Alignment, + Value *Addr, MaybeAlign Alignment, unsigned Granularity, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { - auto *VTy = cast<PointerType>(Addr->getType())->getElementType(); + auto *VTy = cast<FixedVectorType>( + cast<PointerType>(Addr->getType())->getElementType()); uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); - unsigned Num = VTy->getVectorNumElements(); + unsigned Num = VTy->getNumElements(); auto Zero = ConstantInt::get(IntptrTy, 0); for (unsigned Idx = 0; Idx < Num; ++Idx) { Value *InstrumentedAddress = nullptr; @@ -1546,15 +1538,9 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, } void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, - Instruction *I, bool UseCalls, + InterestingMemoryOperand &O, bool UseCalls, const DataLayout &DL) { - bool IsWrite = false; - unsigned Alignment = 0; - uint64_t TypeSize = 0; - Value *MaybeMask = nullptr; - Value *Addr = - isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask); - assert(Addr); + Value *Addr = O.getPtr(); // Optimization experiments. // The experiments can be used to evaluate potential optimizations that remove @@ -1574,7 +1560,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, // dynamically initialized global is always valid. GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL)); if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) && - isSafeAccess(ObjSizeVis, Addr, TypeSize)) { + isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) { NumOptimizedAccessesToGlobalVar++; return; } @@ -1583,25 +1569,26 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, if (ClOpt && ClOptStack) { // A direct inbounds access to a stack variable is always valid. if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) && - isSafeAccess(ObjSizeVis, Addr, TypeSize)) { + isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) { NumOptimizedAccessesToStackVar++; return; } } - if (IsWrite) + if (O.IsWrite) NumInstrumentedWrites++; else NumInstrumentedReads++; unsigned Granularity = 1 << Mapping.Scale; - if (MaybeMask) { - instrumentMaskedLoadOrStore(this, DL, IntptrTy, MaybeMask, I, Addr, - Alignment, Granularity, TypeSize, IsWrite, - nullptr, UseCalls, Exp); + if (O.MaybeMask) { + instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(), + Addr, O.Alignment, Granularity, O.TypeSize, + O.IsWrite, nullptr, UseCalls, Exp); } else { - doInstrumentAddress(this, I, I, Addr, Alignment, Granularity, TypeSize, - IsWrite, nullptr, UseCalls, Exp); + doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment, + Granularity, O.TypeSize, O.IsWrite, nullptr, UseCalls, + Exp); } } @@ -1629,10 +1616,7 @@ Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, {Addr, ExpVal}); } - // We don't do Call->setDoesNotReturn() because the BB already has - // UnreachableInst at the end. - // This EmptyAsm is required to avoid callback merge. - IRB.CreateCall(EmptyAsm, {}); + Call->setCannotMerge(); return Call; } @@ -1800,13 +1784,29 @@ void ModuleAddressSanitizer::createInitializerPoisonCalls( } } -bool ModuleAddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { +bool ModuleAddressSanitizer::canInstrumentAliasedGlobal( + const GlobalAlias &GA) const { + // In case this function should be expanded to include rules that do not just + // apply when CompileKernel is true, either guard all existing rules with an + // 'if (CompileKernel) { ... }' or be absolutely sure that all these rules + // should also apply to user space. + assert(CompileKernel && "Only expecting to be called when compiling kernel"); + + // When compiling the kernel, globals that are aliased by symbols prefixed + // by "__" are special and cannot be padded with a redzone. + if (GA.getName().startswith("__")) + return false; + + return true; +} + +bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { Type *Ty = G->getValueType(); LLVM_DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); // FIXME: Metadata should be attched directly to the global directly instead // of being added to llvm.asan.globals. - if (GlobalsMD.get(G).IsBlacklisted) return false; + if (GlobalsMD.get(G).IsExcluded) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; // Only instrument globals of default address spaces @@ -1817,7 +1817,7 @@ bool ModuleAddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { // - Need to poison all copies, not just the main thread's one. if (G->isThreadLocal()) return false; // For now, just ignore this Global if the alignment is large. - if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false; + if (G->getAlignment() > getMinRedzoneSizeForGlobal()) return false; // For non-COFF targets, only instrument globals known to be defined by this // TU. @@ -1847,6 +1847,12 @@ bool ModuleAddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { } if (G->hasSection()) { + // The kernel uses explicit sections for mostly special global variables + // that we should not instrument. E.g. the kernel may rely on their layout + // without redzones, or remove them at link time ("discard.*"), etc. + if (CompileKernel) + return false; + StringRef Section = G->getSection(); // Globals from llvm.metadata aren't emitted, do not instrument them. @@ -1913,6 +1919,13 @@ bool ModuleAddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { } } + if (CompileKernel) { + // Globals that prefixed by "__" are special and cannot be padded with a + // redzone. + if (G->getName().startswith("__")) + return false; + } + return true; } @@ -1993,7 +2006,7 @@ void ModuleAddressSanitizer::SetComdatForGlobalMetadata( } if (!InternalSuffix.empty() && G->hasLocalLinkage()) { - std::string Name = G->getName(); + std::string Name = std::string(G->getName()); Name += InternalSuffix; C = M.getOrInsertComdat(Name); } else { @@ -2030,13 +2043,13 @@ ModuleAddressSanitizer::CreateMetadataGlobal(Module &M, Constant *Initializer, return Metadata; } -IRBuilder<> ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) { +Instruction *ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) { AsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*C), false), GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); - return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB)); + return ReturnInst::Create(*C, AsanDtorBB); } void ModuleAddressSanitizer::InstrumentGlobalsCOFF( @@ -2045,11 +2058,15 @@ void ModuleAddressSanitizer::InstrumentGlobalsCOFF( assert(ExtendedGlobals.size() == MetadataInitializers.size()); auto &DL = M.getDataLayout(); + SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size()); for (size_t i = 0; i < ExtendedGlobals.size(); i++) { Constant *Initializer = MetadataInitializers[i]; GlobalVariable *G = ExtendedGlobals[i]; GlobalVariable *Metadata = CreateMetadataGlobal(M, Initializer, G->getName()); + MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G)); + Metadata->setMetadata(LLVMContext::MD_associated, MD); + MetadataGlobals[i] = Metadata; // The MSVC linker always inserts padding when linking incrementally. We // cope with that by aligning each struct to its size, which must be a power @@ -2061,6 +2078,11 @@ void ModuleAddressSanitizer::InstrumentGlobalsCOFF( SetComdatForGlobalMetadata(G, Metadata, ""); } + + // Update llvm.compiler.used, adding the new metadata globals. This is + // needed so that during LTO these variables stay alive. + if (!MetadataGlobals.empty()) + appendToCompilerUsed(M, MetadataGlobals); } void ModuleAddressSanitizer::InstrumentGlobalsELF( @@ -2081,10 +2103,23 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF( SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId); } + // This should never be called when there are no globals, by the logic that + // computes the UniqueModuleId string, which is "" when there are no globals. + // It's important that this path is only used when there are actually some + // globals, because that means that there will certainly be a live + // `asan_globals` input section at link time and thus `__start_asan_globals` + // and `__stop_asan_globals` symbols will definitely be defined at link time. + // This means there's no need for the references to them to be weak, which + // enables better code generation because ExternalWeakLinkage implies + // isInterposable() and thus requires GOT indirection for PIC. Since these + // are known-defined hidden/dso_local symbols, direct PIC accesses without + // dynamic relocation are always sufficient. + assert(!MetadataGlobals.empty()); + assert(!UniqueModuleId.empty()); + // Update llvm.compiler.used, adding the new metadata globals. This is // needed so that during LTO these variables stay alive. - if (!MetadataGlobals.empty()) - appendToCompilerUsed(M, MetadataGlobals); + appendToCompilerUsed(M, MetadataGlobals); // RegisteredFlag serves two purposes. First, we can pass it to dladdr() // to look up the loaded image that contains it. Second, we can store in it @@ -2097,15 +2132,18 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF( ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); - // Create start and stop symbols. - GlobalVariable *StartELFMetadata = new GlobalVariable( - M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, - "__start_" + getGlobalMetadataSection()); - StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); - GlobalVariable *StopELFMetadata = new GlobalVariable( - M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, - "__stop_" + getGlobalMetadataSection()); - StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + // Create start and stop symbols. These are known to be defined by + // the linker, see comment above. + auto MakeStartStopGV = [&](const char *Prefix) { + GlobalVariable *StartStop = + new GlobalVariable(M, IntptrTy, false, GlobalVariable::ExternalLinkage, + nullptr, Prefix + getGlobalMetadataSection()); + StartStop->setVisibility(GlobalVariable::HiddenVisibility); + assert(StartStop->isImplicitDSOLocal()); + return StartStop; + }; + GlobalVariable *StartELFMetadata = MakeStartStopGV("__start_"); + GlobalVariable *StopELFMetadata = MakeStartStopGV("__stop_"); // Create a call to register the globals with the runtime. IRB.CreateCall(AsanRegisterElfGlobals, @@ -2115,7 +2153,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF( // We also need to unregister globals at the end, e.g., when a shared library // gets closed. - IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRBuilder<> IRB_Dtor(CreateAsanModuleDtor(M)); IRB_Dtor.CreateCall(AsanUnregisterElfGlobals, {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), IRB.CreatePointerCast(StartELFMetadata, IntptrTy), @@ -2174,7 +2212,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsMachO( // We also need to unregister globals at the end, e.g., when a shared library // gets closed. - IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRBuilder<> IRB_Dtor(CreateAsanModuleDtor(M)); IRB_Dtor.CreateCall(AsanUnregisterImageGlobals, {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); } @@ -2202,7 +2240,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray( // We also need to unregister globals at the end, e.g., when a shared library // gets closed. - IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRBuilder<> IRB_Dtor(CreateAsanModuleDtor(M)); IRB_Dtor.CreateCall(AsanUnregisterGlobals, {IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, N)}); @@ -2217,10 +2255,22 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) { *CtorComdat = false; - SmallVector<GlobalVariable *, 16> GlobalsToChange; + // Build set of globals that are aliased by some GA, where + // canInstrumentAliasedGlobal(GA) returns false. + SmallPtrSet<const GlobalVariable *, 16> AliasedGlobalExclusions; + if (CompileKernel) { + for (auto &GA : M.aliases()) { + if (const auto *GV = dyn_cast<GlobalVariable>(GA.getAliasee())) { + if (!canInstrumentAliasedGlobal(GA)) + AliasedGlobalExclusions.insert(GV); + } + } + } + SmallVector<GlobalVariable *, 16> GlobalsToChange; for (auto &G : M.globals()) { - if (ShouldInstrumentGlobal(&G)) GlobalsToChange.push_back(&G); + if (!AliasedGlobalExclusions.count(&G) && shouldInstrumentGlobal(&G)) + GlobalsToChange.push_back(&G); } size_t n = GlobalsToChange.size(); @@ -2255,7 +2305,6 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix); for (size_t i = 0; i < n; i++) { - static const uint64_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; // FIXME: Metadata should be attched directly to the global directly instead @@ -2269,16 +2318,8 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, /*AllowMerging*/ true, kAsanGenPrefix); Type *Ty = G->getValueType(); - uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); - uint64_t MinRZ = MinRedzoneSizeForGlobal(); - // MinRZ <= RZ <= kMaxGlobalRedzone - // and trying to make RZ to be ~ 1/4 of SizeInBytes. - uint64_t RZ = std::max( - MinRZ, std::min(kMaxGlobalRedzone, (SizeInBytes / MinRZ / 4) * MinRZ)); - uint64_t RightRedzoneSize = RZ; - // Round up to MinRZ - if (SizeInBytes % MinRZ) RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); - assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); + const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); + const uint64_t RightRedzoneSize = getRedzoneSizeForGlobal(SizeInBytes); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); StructType *NewTy = StructType::get(Ty, RightRedZoneTy); @@ -2294,7 +2335,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); NewGlobal->setComdat(G->getComdat()); - NewGlobal->setAlignment(MaybeAlign(MinRZ)); + NewGlobal->setAlignment(MaybeAlign(getMinRedzoneSizeForGlobal())); // Don't fold globals with redzones. ODR violation detector and redzone // poisoning implicitly creates a dependence on the global's address, so it // is no longer valid for it to be marked unnamed_addr. @@ -2362,7 +2403,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, // Set meaningful attributes for indicator symbol. ODRIndicatorSym->setVisibility(NewGlobal->getVisibility()); ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass()); - ODRIndicatorSym->setAlignment(Align::None()); + ODRIndicatorSym->setAlignment(Align(1)); ODRIndicator = ODRIndicatorSym; } @@ -2416,6 +2457,23 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, return true; } +uint64_t +ModuleAddressSanitizer::getRedzoneSizeForGlobal(uint64_t SizeInBytes) const { + constexpr uint64_t kMaxRZ = 1 << 18; + const uint64_t MinRZ = getMinRedzoneSizeForGlobal(); + + // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes. + uint64_t RZ = + std::max(MinRZ, std::min(kMaxRZ, (SizeInBytes / MinRZ / 4) * MinRZ)); + + // Round up to multiple of MinRZ. + if (SizeInBytes % MinRZ) + RZ += MinRZ - (SizeInBytes % MinRZ); + assert((RZ + SizeInBytes) % MinRZ == 0); + + return RZ; +} + int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const { int LongSize = M.getDataLayout().getPointerSizeInBits(); bool isAndroid = Triple(M.getTargetTriple()).isAndroid(); @@ -2429,20 +2487,23 @@ int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const { bool ModuleAddressSanitizer::instrumentModule(Module &M) { initializeCallbacks(M); - if (CompileKernel) - return false; - // Create a module constructor. A destructor is created lazily because not all // platforms, and not all modules need it. - std::string AsanVersion = std::to_string(GetAsanVersion(M)); - std::string VersionCheckName = - ClInsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : ""; - std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( - M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{}, - /*InitArgs=*/{}, VersionCheckName); + if (CompileKernel) { + // The kernel always builds with its own runtime, and therefore does not + // need the init and version check calls. + AsanCtorFunction = createSanitizerCtor(M, kAsanModuleCtorName); + } else { + std::string AsanVersion = std::to_string(GetAsanVersion(M)); + std::string VersionCheckName = + ClInsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : ""; + std::tie(AsanCtorFunction, std::ignore) = + createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, + kAsanInitName, /*InitArgTypes=*/{}, + /*InitArgs=*/{}, VersionCheckName); + } bool CtorComdat = true; - // TODO(glider): temporarily disabled globals instrumentation for KASan. if (ClGlobals) { IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator()); InstrumentGlobals(IRB, M, &CtorComdat); @@ -2529,10 +2590,6 @@ void AddressSanitizer::initializeCallbacks(Module &M) { M.getOrInsertFunction(kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy); AsanPtrSubFunction = M.getOrInsertFunction(kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy); - // We insert an empty inline asm after __asan_report* to avoid callback merge. - EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), - StringRef(""), StringRef(""), - /*hasSideEffects=*/true); if (Mapping.InGlobal) AsanShadowGlobal = M.getOrInsertGlobal("__asan_shadow", ArrayType::get(IRB.getInt8Ty(), 0)); @@ -2556,10 +2613,10 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { return false; } -void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) { +bool AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) { // Generate code only when dynamic addressing is needed. if (Mapping.Offset != kDynamicShadowSentinel) - return; + return false; IRBuilder<> IRB(&F.front().front()); if (Mapping.InGlobal) { @@ -2581,6 +2638,7 @@ void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) { kAsanShadowMemoryDynamicAddress, IntptrTy); LocalDynamicShadow = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); } + return true; } void AddressSanitizer::markEscapedLocalAllocas(Function &F) { @@ -2611,6 +2669,14 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) { } } +bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) { + bool ShouldInstrument = + ClDebugMin < 0 || ClDebugMax < 0 || + (Instrumented >= ClDebugMin && Instrumented <= ClDebugMax); + Instrumented++; + return !ShouldInstrument; +} + bool AddressSanitizer::instrumentFunction(Function &F, const TargetLibraryInfo *TLI) { if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; @@ -2634,7 +2700,7 @@ bool AddressSanitizer::instrumentFunction(Function &F, FunctionStateRAII CleanupObj(this); - maybeInsertDynamicShadowAtFunctionEntry(F); + FunctionModified |= maybeInsertDynamicShadowAtFunctionEntry(F); // We can't instrument allocas used with llvm.localescape. Only static allocas // can be passed to that intrinsic. @@ -2643,14 +2709,12 @@ bool AddressSanitizer::instrumentFunction(Function &F, // We want to instrument every address only once per basic block (unless there // are calls between uses). SmallPtrSet<Value *, 16> TempsToInstrument; - SmallVector<Instruction *, 16> ToInstrument; + SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument; + SmallVector<MemIntrinsic *, 16> IntrinToInstrument; SmallVector<Instruction *, 8> NoReturnCalls; SmallVector<BasicBlock *, 16> AllBlocks; SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts; int NumAllocas = 0; - bool IsWrite; - unsigned Alignment; - uint64_t TypeSize; // Fill the set of memory operations to instrument. for (auto &BB : F) { @@ -2659,51 +2723,54 @@ bool AddressSanitizer::instrumentFunction(Function &F, int NumInsnsPerBB = 0; for (auto &Inst : BB) { if (LooksLikeCodeInBug11395(&Inst)) return false; - Value *MaybeMask = nullptr; - if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize, - &Alignment, &MaybeMask)) { - if (ClOpt && ClOptSameTemp) { - // If we have a mask, skip instrumentation if we've already - // instrumented the full object. But don't add to TempsToInstrument - // because we might get another load/store with a different mask. - if (MaybeMask) { - if (TempsToInstrument.count(Addr)) - continue; // We've seen this (whole) temp in the current BB. - } else { - if (!TempsToInstrument.insert(Addr).second) - continue; // We've seen this temp in the current BB. + SmallVector<InterestingMemoryOperand, 1> InterestingOperands; + getInterestingMemoryOperands(&Inst, InterestingOperands); + + if (!InterestingOperands.empty()) { + for (auto &Operand : InterestingOperands) { + if (ClOpt && ClOptSameTemp) { + Value *Ptr = Operand.getPtr(); + // If we have a mask, skip instrumentation if we've already + // instrumented the full object. But don't add to TempsToInstrument + // because we might get another load/store with a different mask. + if (Operand.MaybeMask) { + if (TempsToInstrument.count(Ptr)) + continue; // We've seen this (whole) temp in the current BB. + } else { + if (!TempsToInstrument.insert(Ptr).second) + continue; // We've seen this temp in the current BB. + } } + OperandsToInstrument.push_back(Operand); + NumInsnsPerBB++; } } else if (((ClInvalidPointerPairs || ClInvalidPointerCmp) && isInterestingPointerComparison(&Inst)) || ((ClInvalidPointerPairs || ClInvalidPointerSub) && isInterestingPointerSubtraction(&Inst))) { PointerComparisonsOrSubtracts.push_back(&Inst); - continue; - } else if (isa<MemIntrinsic>(Inst)) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) { // ok, take it. + IntrinToInstrument.push_back(MI); + NumInsnsPerBB++; } else { if (isa<AllocaInst>(Inst)) NumAllocas++; - CallSite CS(&Inst); - if (CS) { + if (auto *CB = dyn_cast<CallBase>(&Inst)) { // A call inside BB. TempsToInstrument.clear(); - if (CS.doesNotReturn() && !CS->hasMetadata("nosanitize")) - NoReturnCalls.push_back(CS.getInstruction()); + if (CB->doesNotReturn() && !CB->hasMetadata("nosanitize")) + NoReturnCalls.push_back(CB); } if (CallInst *CI = dyn_cast<CallInst>(&Inst)) maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); - continue; } - ToInstrument.push_back(&Inst); - NumInsnsPerBB++; if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; } } - bool UseCalls = - (ClInstrumentationWithCallsThreshold >= 0 && - ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold); + bool UseCalls = (ClInstrumentationWithCallsThreshold >= 0 && + OperandsToInstrument.size() + IntrinToInstrument.size() > + (unsigned)ClInstrumentationWithCallsThreshold); const DataLayout &DL = F.getParent()->getDataLayout(); ObjectSizeOpts ObjSizeOpts; ObjSizeOpts.RoundToAlign = true; @@ -2711,16 +2778,16 @@ bool AddressSanitizer::instrumentFunction(Function &F, // Instrument. int NumInstrumented = 0; - for (auto Inst : ToInstrument) { - if (ClDebugMin < 0 || ClDebugMax < 0 || - (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { - if (isInterestingMemoryAccess(Inst, &IsWrite, &TypeSize, &Alignment)) - instrumentMop(ObjSizeVis, Inst, UseCalls, - F.getParent()->getDataLayout()); - else - instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); - } - NumInstrumented++; + for (auto &Operand : OperandsToInstrument) { + if (!suppressInstrumentationSiteForDebug(NumInstrumented)) + instrumentMop(ObjSizeVis, Operand, UseCalls, + F.getParent()->getDataLayout()); + FunctionModified = true; + } + for (auto Inst : IntrinToInstrument) { + if (!suppressInstrumentationSiteForDebug(NumInstrumented)) + instrumentMemIntrinsic(Inst); + FunctionModified = true; } FunctionStackPoisoner FSP(F, *this); @@ -2735,10 +2802,10 @@ bool AddressSanitizer::instrumentFunction(Function &F, for (auto Inst : PointerComparisonsOrSubtracts) { instrumentPointerComparisonOrSubtraction(Inst); - NumInstrumented++; + FunctionModified = true; } - if (NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty()) + if (ChangedStack || !NoReturnCalls.empty()) FunctionModified = true; LLVM_DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " " @@ -2836,7 +2903,8 @@ void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask, Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)); Value *Poison = IRB.getIntN(StoreSizeInBytes * 8, Val); IRB.CreateAlignedStore( - Poison, IRB.CreateIntToPtr(Ptr, Poison->getType()->getPointerTo()), 1); + Poison, IRB.CreateIntToPtr(Ptr, Poison->getType()->getPointerTo()), + Align(1)); i += StoreSizeInBytes; } @@ -2900,7 +2968,7 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { const DataLayout &DL = F.getParent()->getDataLayout(); for (Argument &Arg : F.args()) { if (Arg.hasByValAttr()) { - Type *Ty = Arg.getType()->getPointerElementType(); + Type *Ty = Arg.getParamByValType(); const Align Alignment = DL.getValueOrABITypeAlignment(Arg.getParamAlign(), Ty); @@ -2943,7 +3011,7 @@ Value *FunctionStackPoisoner::createAllocaForLayout( } assert((ClRealignStack & (ClRealignStack - 1)) == 0); size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack); - Alloca->setAlignment(MaybeAlign(FrameAlignment)); + Alloca->setAlignment(Align(FrameAlignment)); return IRB.CreatePointerCast(Alloca, IntptrTy); } @@ -2982,6 +3050,59 @@ void FunctionStackPoisoner::processDynamicAllocas() { unpoisonDynamicAllocas(); } +/// Collect instructions in the entry block after \p InsBefore which initialize +/// permanent storage for a function argument. These instructions must remain in +/// the entry block so that uninitialized values do not appear in backtraces. An +/// added benefit is that this conserves spill slots. This does not move stores +/// before instrumented / "interesting" allocas. +static void findStoresToUninstrumentedArgAllocas( + AddressSanitizer &ASan, Instruction &InsBefore, + SmallVectorImpl<Instruction *> &InitInsts) { + Instruction *Start = InsBefore.getNextNonDebugInstruction(); + for (Instruction *It = Start; It; It = It->getNextNonDebugInstruction()) { + // Argument initialization looks like: + // 1) store <Argument>, <Alloca> OR + // 2) <CastArgument> = cast <Argument> to ... + // store <CastArgument> to <Alloca> + // Do not consider any other kind of instruction. + // + // Note: This covers all known cases, but may not be exhaustive. An + // alternative to pattern-matching stores is to DFS over all Argument uses: + // this might be more general, but is probably much more complicated. + if (isa<AllocaInst>(It) || isa<CastInst>(It)) + continue; + if (auto *Store = dyn_cast<StoreInst>(It)) { + // The store destination must be an alloca that isn't interesting for + // ASan to instrument. These are moved up before InsBefore, and they're + // not interesting because allocas for arguments can be mem2reg'd. + auto *Alloca = dyn_cast<AllocaInst>(Store->getPointerOperand()); + if (!Alloca || ASan.isInterestingAlloca(*Alloca)) + continue; + + Value *Val = Store->getValueOperand(); + bool IsDirectArgInit = isa<Argument>(Val); + bool IsArgInitViaCast = + isa<CastInst>(Val) && + isa<Argument>(cast<CastInst>(Val)->getOperand(0)) && + // Check that the cast appears directly before the store. Otherwise + // moving the cast before InsBefore may break the IR. + Val == It->getPrevNonDebugInstruction(); + bool IsArgInit = IsDirectArgInit || IsArgInitViaCast; + if (!IsArgInit) + continue; + + if (IsArgInitViaCast) + InitInsts.push_back(cast<Instruction>(Val)); + InitInsts.push_back(Store); + continue; + } + + // Do not reorder past unknown instructions: argument initialization should + // only involve casts and stores. + return; + } +} + void FunctionStackPoisoner::processStaticAllocas() { if (AllocaVec.empty()) { assert(StaticAllocaPoisonCallVec.empty()); @@ -3005,6 +3126,15 @@ void FunctionStackPoisoner::processStaticAllocas() { if (AI->getParent() == InsBeforeB) AI->moveBefore(InsBefore); + // Move stores of arguments into entry-block allocas as well. This prevents + // extra stack slots from being generated (to house the argument values until + // they can be stored into the allocas). This also prevents uninitialized + // values from being shown in backtraces. + SmallVector<Instruction *, 8> ArgInitInsts; + findStoresToUninstrumentedArgAllocas(ASan, *InsBefore, ArgInitInsts); + for (Instruction *ArgInitInst : ArgInitInsts) + ArgInitInst->moveBefore(InsBefore); + // If we have a call to llvm.localescape, keep it in the entry block. if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore); @@ -3063,8 +3193,8 @@ void FunctionStackPoisoner::processStaticAllocas() { // 2) There is a returns_twice call (typically setjmp), which is // optimization-hostile, and doesn't play well with introduced indirect // register-relative calculation of local variable addresses. - DoDynamicAlloca &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall; - DoStackMalloc &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall; + DoDynamicAlloca &= !HasInlineAsm && !HasReturnsTwiceCall; + DoStackMalloc &= !HasInlineAsm && !HasReturnsTwiceCall; Value *StaticAlloca = DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false); @@ -3118,11 +3248,21 @@ void FunctionStackPoisoner::processStaticAllocas() { LocalStackBaseAlloca = LocalStackBase; } + // It shouldn't matter whether we pass an `alloca` or a `ptrtoint` as the + // dbg.declare address opereand, but passing a `ptrtoint` seems to confuse + // later passes and can result in dropped variable coverage in debug info. + Value *LocalStackBaseAllocaPtr = + isa<PtrToIntInst>(LocalStackBaseAlloca) + ? cast<PtrToIntInst>(LocalStackBaseAlloca)->getPointerOperand() + : LocalStackBaseAlloca; + assert(isa<AllocaInst>(LocalStackBaseAllocaPtr) && + "Variable descriptions relative to ASan stack base will be dropped"); + // Replace Alloca instructions with base+offset. for (const auto &Desc : SVD) { AllocaInst *AI = Desc.AI; - replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, DIExprFlags, - Desc.Offset); + replaceDbgDeclare(AI, LocalStackBaseAllocaPtr, DIB, DIExprFlags, + Desc.Offset); Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); @@ -3256,7 +3396,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { IRBuilder<> IRB(AI); - const unsigned Align = std::max(kAllocaRzSize, AI->getAlignment()); + const unsigned Alignment = std::max(kAllocaRzSize, AI->getAlignment()); const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1; Value *Zero = Constant::getNullValue(IntptrTy); @@ -3283,21 +3423,21 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { Value *Cond = IRB.CreateICmpNE(Misalign, AllocaRzSize); Value *PartialPadding = IRB.CreateSelect(Cond, Misalign, Zero); - // AdditionalChunkSize = Align + PartialPadding + kAllocaRzSize - // Align is added to locate left redzone, PartialPadding for possible + // AdditionalChunkSize = Alignment + PartialPadding + kAllocaRzSize + // Alignment is added to locate left redzone, PartialPadding for possible // partial redzone and kAllocaRzSize for right redzone respectively. Value *AdditionalChunkSize = IRB.CreateAdd( - ConstantInt::get(IntptrTy, Align + kAllocaRzSize), PartialPadding); + ConstantInt::get(IntptrTy, Alignment + kAllocaRzSize), PartialPadding); Value *NewSize = IRB.CreateAdd(OldSize, AdditionalChunkSize); - // Insert new alloca with new NewSize and Align params. + // Insert new alloca with new NewSize and Alignment params. AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize); - NewAlloca->setAlignment(MaybeAlign(Align)); + NewAlloca->setAlignment(Align(Alignment)); - // NewAddress = Address + Align + // NewAddress = Address + Alignment Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy), - ConstantInt::get(IntptrTy, Align)); + ConstantInt::get(IntptrTy, Alignment)); // Insert __asan_alloca_poison call for new created alloca. IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize}); diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp index 9abb62ac788c7..efb11b68a1e3f 100644 --- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -112,7 +112,7 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal, /// /// \p GetTrapBB is a callable that returns the trap BB to use on failure. template <typename GetTrapBBT> -static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) { +static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) { // check if the comparison is always false ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or); if (C) { @@ -154,17 +154,22 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, Value *Or = nullptr; BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL)); if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { - Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI, - ObjSizeEval, IRB, SE); + if (!LI->isVolatile()) + Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI, + ObjSizeEval, IRB, SE); } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { - Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(), - DL, TLI, ObjSizeEval, IRB, SE); + if (!SI->isVolatile()) + Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(), + DL, TLI, ObjSizeEval, IRB, SE); } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) { - Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(), - DL, TLI, ObjSizeEval, IRB, SE); + if (!AI->isVolatile()) + Or = + getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(), + DL, TLI, ObjSizeEval, IRB, SE); } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) { - Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(), DL, - TLI, ObjSizeEval, IRB, SE); + if (!AI->isVolatile()) + Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(), + DL, TLI, ObjSizeEval, IRB, SE); } if (Or) TrapInfo.push_back(std::make_pair(&I, Or)); diff --git a/llvm/lib/Transforms/Instrumentation/CFGMST.h b/llvm/lib/Transforms/Instrumentation/CFGMST.h index 8bb6f47c4846f..9addb5d1ba938 100644 --- a/llvm/lib/Transforms/Instrumentation/CFGMST.h +++ b/llvm/lib/Transforms/Instrumentation/CFGMST.h @@ -20,6 +20,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -28,6 +29,11 @@ #define DEBUG_TYPE "cfgmst" +using namespace llvm; +static cl::opt<bool> PGOInstrumentEntry( + "pgo-instrument-entry", cl::init(false), cl::Hidden, + cl::desc("Force to instrument function entry basicblock.")); + namespace llvm { /// An union-find based Minimum Spanning Tree for CFG @@ -100,8 +106,11 @@ public: const BasicBlock *Entry = &(F.getEntryBlock()); uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); + // If we want to instrument the entry count, lower the weight to 0. + if (PGOInstrumentEntry) + EntryWeight = 0; Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, - *ExitOutgoing = nullptr, *ExitIncoming = nullptr; + *ExitOutgoing = nullptr, *ExitIncoming = nullptr; uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; // Add a fake edge to the entry. @@ -135,6 +144,8 @@ public: } if (BPI != nullptr) Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); + if (Weight == 0) + Weight++; auto *E = &addEdge(&*BB, TargetBB, Weight); E->IsCritical = Critical; LLVM_DEBUG(dbgs() << " Edge: from " << BB->getName() << " to " @@ -278,6 +289,9 @@ public: buildEdges(); sortEdgesByWeight(); computeMinimumSpanningTree(); + if (PGOInstrumentEntry && (AllEdges.size() > 1)) + std::iter_swap(std::move(AllEdges.begin()), + std::move(AllEdges.begin() + AllEdges.size() - 1)); } }; diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp index 358abab3cceb7..0cc0d9b07387b 100644 --- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp +++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp @@ -10,12 +10,13 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Transforms/Instrumentation.h" @@ -23,10 +24,32 @@ using namespace llvm; -PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { +static bool +addModuleFlags(Module &M, + MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) { + if (Counts.empty()) + return false; + + LLVMContext &Context = M.getContext(); + MDBuilder MDB(Context); + std::vector<Metadata *> Nodes; + + for (auto E : Counts) { + Metadata *Vals[] = {ValueAsMetadata::get(E.first.first), + ValueAsMetadata::get(E.first.second), + MDB.createConstant(ConstantInt::get( + Type::getInt64Ty(Context), E.second))}; + Nodes.push_back(MDNode::get(Context, Vals)); + } + + M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes)); + return true; +} + +static bool runCGProfilePass( + Module &M, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LazyBFI) { MapVector<std::pair<Function *, Function *>, uint64_t> Counts; - FunctionAnalysisManager &FAM = - MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); InstrProfSymtab Symtab; auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F, Function *CalledF, uint64_t NewCount) { @@ -36,29 +59,32 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { Count = SaturatingAdd(Count, NewCount); }; // Ignore error here. Indirect calls are ignored if this fails. - (void)(bool)Symtab.create(M); + (void)(bool) Symtab.create(M); for (auto &F : M) { - if (F.isDeclaration()) + // Avoid extra cost of running passes for BFI when the function doesn't have + // entry count. Since LazyBlockFrequencyInfoPass only exists in LPM, check + // if using LazyBlockFrequencyInfoPass. + // TODO: Remove LazyBFI when LazyBlockFrequencyInfoPass is available in NPM. + if (F.isDeclaration() || (LazyBFI && !F.getEntryCount())) continue; - auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); + auto &BFI = GetBFI(F); if (BFI.getEntryFreq() == 0) continue; - TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F); + TargetTransformInfo &TTI = GetTTI(F); for (auto &BB : F) { Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB); if (!BBCount) continue; for (auto &I : BB) { - CallSite CS(&I); - if (!CS) + CallBase *CB = dyn_cast<CallBase>(&I); + if (!CB) continue; - if (CS.isIndirectCall()) { + if (CB->isIndirectCall()) { InstrProfValueData ValueData[8]; uint32_t ActualNumValueData; uint64_t TotalC; - if (!getValueProfDataFromInst(*CS.getInstruction(), - IPVK_IndirectCallTarget, 8, ValueData, - ActualNumValueData, TotalC)) + if (!getValueProfDataFromInst(*CB, IPVK_IndirectCallTarget, 8, + ValueData, ActualNumValueData, TotalC)) continue; for (const auto &VD : ArrayRef<InstrProfValueData>(ValueData, ActualNumValueData)) { @@ -66,33 +92,61 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { } continue; } - UpdateCounts(TTI, &F, CS.getCalledFunction(), *BBCount); + UpdateCounts(TTI, &F, CB->getCalledFunction(), *BBCount); } } } - addModuleFlags(M, Counts); - - return PreservedAnalyses::all(); + return addModuleFlags(M, Counts); } -void CGProfilePass::addModuleFlags( - Module &M, - MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const { - if (Counts.empty()) - return; +namespace { +struct CGProfileLegacyPass final : public ModulePass { + static char ID; + CGProfileLegacyPass() : ModulePass(ID) { + initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry()); + } - LLVMContext &Context = M.getContext(); - MDBuilder MDB(Context); - std::vector<Metadata *> Nodes; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<LazyBlockFrequencyInfoPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + } - for (auto E : Counts) { - Metadata *Vals[] = {ValueAsMetadata::get(E.first.first), - ValueAsMetadata::get(E.first.second), - MDB.createConstant(ConstantInt::get( - Type::getInt64Ty(Context), E.second))}; - Nodes.push_back(MDNode::get(Context, Vals)); + bool runOnModule(Module &M) override { + auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { + return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI(); + }; + auto GetTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + }; + + return runCGProfilePass(M, GetBFI, GetTTI, true); } +}; - M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes)); +} // namespace + +char CGProfileLegacyPass::ID = 0; + +INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false, + false) + +ModulePass *llvm::createCGProfileLegacyPass() { + return new CGProfileLegacyPass(); +} + +PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { + FunctionAnalysisManager &FAM = + MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult<BlockFrequencyAnalysis>(F); + }; + auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult<TargetIRAnalysis>(F); + }; + + runCGProfilePass(M, GetBFI, GetTTI, false); + + return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index d35abb92dd086..a99c58b74fb1c 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -221,10 +221,8 @@ class CHRScope { "Must be siblings"); assert(getExitBlock() == Next->getEntryBlock() && "Must be adjacent"); - for (RegInfo &RI : Next->RegInfos) - RegInfos.push_back(RI); - for (CHRScope *Sub : Next->Subs) - Subs.push_back(Sub); + RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); + Subs.append(Next->Subs.begin(), Next->Subs.end()); } void addSub(CHRScope *SubIn) { @@ -246,37 +244,36 @@ class CHRScope { assert(Boundary && "Boundary null"); assert(RegInfos.begin()->R != Boundary && "Can't be split at beginning"); - auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), - [&Boundary](const RegInfo& RI) { - return Boundary == RI.R; - }); + auto BoundaryIt = llvm::find_if( + RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); if (BoundaryIt == RegInfos.end()) return nullptr; - SmallVector<RegInfo, 8> TailRegInfos; - SmallVector<CHRScope *, 8> TailSubs; - TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); - RegInfos.resize(BoundaryIt - RegInfos.begin()); + ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); DenseSet<Region *> TailRegionSet; - for (RegInfo &RI : TailRegInfos) + for (const RegInfo &RI : TailRegInfos) TailRegionSet.insert(RI.R); - for (auto It = Subs.begin(); It != Subs.end(); ) { - CHRScope *Sub = *It; - assert(Sub && "null Sub"); - Region *Parent = Sub->getParentRegion(); - if (TailRegionSet.count(Parent)) { - TailSubs.push_back(Sub); - It = Subs.erase(It); - } else { - assert(std::find_if(RegInfos.begin(), RegInfos.end(), - [&Parent](const RegInfo& RI) { - return Parent == RI.R; - }) != RegInfos.end() && - "Must be in head"); - ++It; - } - } + + auto TailIt = + std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { + assert(Sub && "null Sub"); + Region *Parent = Sub->getParentRegion(); + if (TailRegionSet.count(Parent)) + return false; + + assert(llvm::find_if(RegInfos, + [&Parent](const RegInfo &RI) { + return Parent == RI.R; + }) != RegInfos.end() && + "Must be in head"); + return true; + }); + ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); + assert(HoistStopMap.empty() && "MapHoistStops must be empty"); - return new CHRScope(TailRegInfos, TailSubs); + auto *Scope = new CHRScope(TailRegInfos, TailSubs); + RegInfos.erase(BoundaryIt, RegInfos.end()); + Subs.erase(TailIt, Subs.end()); + return Scope; } bool contains(Instruction *I) const { @@ -314,9 +311,9 @@ class CHRScope { HoistStopMapTy HoistStopMap; private: - CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, - SmallVector<CHRScope *, 8> &SubsIn) - : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} + CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) + : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), + Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} }; class CHR { @@ -340,8 +337,7 @@ class CHR { void findScopes(SmallVectorImpl<CHRScope *> &Output) { Region *R = RI.getTopLevelRegion(); - CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); - if (Scope) { + if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { Output.push_back(Scope); } } @@ -514,39 +510,36 @@ static bool isHoistable(Instruction *I, DominatorTree &DT) { // first-region entry block) or the (hoistable or unhoistable) base values that // are defined outside (including the first-region entry block) of the // scope. The returned set doesn't include constants. -static std::set<Value *> getBaseValues( - Value *V, DominatorTree &DT, - DenseMap<Value *, std::set<Value *>> &Visited) { - if (Visited.count(V)) { - return Visited[V]; +static const std::set<Value *> & +getBaseValues(Value *V, DominatorTree &DT, + DenseMap<Value *, std::set<Value *>> &Visited) { + auto It = Visited.find(V); + if (It != Visited.end()) { + return It->second; } std::set<Value *> Result; if (auto *I = dyn_cast<Instruction>(V)) { - // We don't stop at a block that's not in the Scope because we would miss some - // instructions that are based on the same base values if we stop there. + // We don't stop at a block that's not in the Scope because we would miss + // some instructions that are based on the same base values if we stop + // there. if (!isHoistable(I, DT)) { Result.insert(I); - Visited.insert(std::make_pair(V, Result)); - return Result; + return Visited.insert(std::make_pair(V, std::move(Result))).first->second; } // I is hoistable above the Scope. for (Value *Op : I->operands()) { - std::set<Value *> OpResult = getBaseValues(Op, DT, Visited); + const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); Result.insert(OpResult.begin(), OpResult.end()); } - Visited.insert(std::make_pair(V, Result)); - return Result; + return Visited.insert(std::make_pair(V, std::move(Result))).first->second; } if (isa<Argument>(V)) { Result.insert(V); - Visited.insert(std::make_pair(V, Result)); - return Result; } // We don't include others like constants because those won't lead to any // chance of folding of conditions (eg two bit checks merged into one check) // after CHR. - Visited.insert(std::make_pair(V, Result)); - return Result; // empty + return Visited.insert(std::make_pair(V, std::move(Result))).first->second; } // Return true if V is already hoisted or can be hoisted (along with its @@ -560,8 +553,9 @@ checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, DenseMap<Instruction *, bool> &Visited) { assert(InsertPoint && "Null InsertPoint"); if (auto *I = dyn_cast<Instruction>(V)) { - if (Visited.count(I)) { - return Visited[I]; + auto It = Visited.find(I); + if (It != Visited.end()) { + return It->second; } assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); @@ -1094,11 +1088,11 @@ static bool shouldSplit(Instruction *InsertPoint, std::set<Value *> PrevBases, Bases; DenseMap<Value *, std::set<Value *>> Visited; for (Value *V : PrevConditionValues) { - std::set<Value *> BaseValues = getBaseValues(V, DT, Visited); + const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); PrevBases.insert(BaseValues.begin(), BaseValues.end()); } for (Value *V : ConditionValues) { - std::set<Value *> BaseValues = getBaseValues(V, DT, Visited); + const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); Bases.insert(BaseValues.begin(), BaseValues.end()); } CHR_DEBUG( @@ -1111,10 +1105,9 @@ static bool shouldSplit(Instruction *InsertPoint, dbgs() << *V << ", "; } dbgs() << "\n"); - std::set<Value *> Intersection; - std::set_intersection(PrevBases.begin(), PrevBases.end(), - Bases.begin(), Bases.end(), - std::inserter(Intersection, Intersection.begin())); + std::vector<Value *> Intersection; + std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), + Bases.end(), std::back_inserter(Intersection)); if (Intersection.empty()) { // Empty intersection, split. CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); @@ -1439,7 +1432,7 @@ void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { setCHRRegions(Sub, OutermostScope); } -bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { +static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); } @@ -1578,26 +1571,24 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, static void insertTrivialPHIs(CHRScope *Scope, BasicBlock *EntryBlock, BasicBlock *ExitBlock, DenseSet<PHINode *> &TrivialPHIs) { - DenseSet<BasicBlock *> BlocksInScopeSet; - SmallVector<BasicBlock *, 8> BlocksInScopeVec; + SmallSetVector<BasicBlock *, 8> BlocksInScope; for (RegInfo &RI : Scope->RegInfos) { for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the // sub-Scopes. - BlocksInScopeSet.insert(BB); - BlocksInScopeVec.push_back(BB); + BlocksInScope.insert(BB); } } - CHR_DEBUG( - dbgs() << "Inserting redudant phis\n"; - for (BasicBlock *BB : BlocksInScopeVec) { - dbgs() << "BlockInScope " << BB->getName() << "\n"; - }); - for (BasicBlock *BB : BlocksInScopeVec) { + CHR_DEBUG({ + dbgs() << "Inserting redundant phis\n"; + for (BasicBlock *BB : BlocksInScope) + dbgs() << "BlockInScope " << BB->getName() << "\n"; + }); + for (BasicBlock *BB : BlocksInScope) { for (Instruction &I : *BB) { SmallVector<Instruction *, 8> Users; for (User *U : I.users()) { if (auto *UI = dyn_cast<Instruction>(U)) { - if (BlocksInScopeSet.count(UI->getParent()) == 0 && + if (BlocksInScope.count(UI->getParent()) == 0 && // Unless there's already a phi for I at the exit block. !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { CHR_DEBUG(dbgs() << "V " << I << "\n"); @@ -1874,9 +1865,10 @@ void CHR::fixupBranchesAndSelects(CHRScope *Scope, << " branches or selects"; }); MergedBR->setCondition(MergedCondition); - SmallVector<uint32_t, 2> Weights; - Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); - Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); + uint32_t Weights[] = { + static_cast<uint32_t>(CHRBranchBias.scale(1000)), + static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), + }; MDBuilder MDB(F.getContext()); MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] @@ -2101,8 +2093,7 @@ PreservedAnalyses ControlHeightReductionPass::run( auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); - auto &MAM = MAMProxy.getManager(); - auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); auto &RI = FAM.getResult<RegionInfoAnalysis>(F); auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index cf9a6a321c7a3..2846319007318 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -59,7 +59,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -162,10 +161,25 @@ static cl::opt<bool> ClDebugNonzeroLabels( "load or return with a nonzero label"), cl::Hidden); +// Experimental feature that inserts callbacks for certain data events. +// Currently callbacks are only inserted for loads, stores, memory transfers +// (i.e. memcpy and memmove), and comparisons. +// +// If this flag is set to true, the user must provide definitions for the +// following callback functions: +// void __dfsan_load_callback(dfsan_label Label); +// void __dfsan_store_callback(dfsan_label Label); +// void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len); +// void __dfsan_cmp_callback(dfsan_label CombinedLabel); +static cl::opt<bool> ClEventCallbacks( + "dfsan-event-callbacks", + cl::desc("Insert calls to __dfsan_*_callback functions on data events."), + cl::Hidden, cl::init(false)); + static StringRef GetGlobalTypeString(const GlobalValue &G) { // Types of GlobalVariables are always pointer types. Type *GType = G.getValueType(); - // For now we support blacklisting struct types only. + // For now we support excluding struct types only. if (StructType *SGType = dyn_cast<StructType>(GType)) { if (!SGType->isLiteral()) return SGType->getName(); @@ -282,9 +296,7 @@ class DataFlowSanitizer : public ModulePass { friend struct DFSanFunction; friend class DFSanVisitor; - enum { - ShadowWidth = 16 - }; + enum { ShadowWidthBits = 16, ShadowWidthBytes = ShadowWidthBits / 8 }; /// Which ABI should be used for instrumented functions? enum InstrumentedABI { @@ -345,6 +357,8 @@ class DataFlowSanitizer : public ModulePass { FunctionType *DFSanSetLabelFnTy; FunctionType *DFSanNonzeroLabelFnTy; FunctionType *DFSanVarargWrapperFnTy; + FunctionType *DFSanLoadStoreCmpCallbackFnTy; + FunctionType *DFSanMemTransferCallbackFnTy; FunctionCallee DFSanUnionFn; FunctionCallee DFSanCheckedUnionFn; FunctionCallee DFSanUnionLoadFn; @@ -352,6 +366,10 @@ class DataFlowSanitizer : public ModulePass { FunctionCallee DFSanSetLabelFn; FunctionCallee DFSanNonzeroLabelFn; FunctionCallee DFSanVarargWrapperFn; + FunctionCallee DFSanLoadCallbackFn; + FunctionCallee DFSanStoreCallbackFn; + FunctionCallee DFSanMemTransferCallbackFn; + FunctionCallee DFSanCmpCallbackFn; MDNode *ColdCallWeights; DFSanABIList ABIList; DenseMap<Value *, Function *> UnwrappedFnMap; @@ -371,6 +389,8 @@ class DataFlowSanitizer : public ModulePass { GlobalValue::LinkageTypes NewFLink, FunctionType *NewFT); Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); + void initializeCallbackFunctions(Module &M); + void initializeRuntimeFunctions(Module &M); public: static char ID; @@ -424,7 +444,7 @@ struct DFSanFunction { Value *combineOperandShadows(Instruction *Inst); Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align, Instruction *Pos); - void storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow, + void storeShadow(Value *Addr, uint64_t Size, Align Alignment, Value *Shadow, Instruction *Pos); }; @@ -438,7 +458,10 @@ public: return DFSF.F->getParent()->getDataLayout(); } - void visitOperandShadowInst(Instruction &I); + // Combines shadow values for all of I's operands. Returns the combined shadow + // value. + Value *visitOperandShadowInst(Instruction &I); + void visitUnaryOperator(UnaryOperator &UO); void visitBinaryOperator(BinaryOperator &BO); void visitCastInst(CastInst &CI); @@ -447,7 +470,7 @@ public: void visitLoadInst(LoadInst &LI); void visitStoreInst(StoreInst &SI); void visitReturnInst(ReturnInst &RI); - void visitCallSite(CallSite CS); + void visitCallBase(CallBase &CB); void visitPHINode(PHINode &PN); void visitExtractElementInst(ExtractElementInst &I); void visitInsertElementInst(InsertElementInst &I); @@ -553,11 +576,11 @@ bool DataFlowSanitizer::doInitialization(Module &M) { Mod = &M; Ctx = &M.getContext(); - ShadowTy = IntegerType::get(*Ctx, ShadowWidth); + ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits); ShadowPtrTy = PointerType::getUnqual(ShadowTy); IntptrTy = DL.getIntPtrType(*Ctx); ZeroShadow = ConstantInt::getSigned(ShadowTy, 0); - ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8); + ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidthBytes); if (IsX86_64) ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL); else if (IsMIPS64) @@ -583,6 +606,12 @@ bool DataFlowSanitizer::doInitialization(Module &M) { Type::getVoidTy(*Ctx), None, /*isVarArg=*/false); DFSanVarargWrapperFnTy = FunctionType::get( Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); + DFSanLoadStoreCmpCallbackFnTy = + FunctionType::get(Type::getVoidTy(*Ctx), ShadowTy, /*isVarArg=*/false); + Type *DFSanMemTransferCallbackArgs[2] = {ShadowPtrTy, IntptrTy}; + DFSanMemTransferCallbackFnTy = + FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs, + /*isVarArg=*/false); if (GetArgTLSPtr) { Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); @@ -628,7 +657,7 @@ DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) { } void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) { - std::string GVName = GV->getName(), Prefix = "dfs$"; + std::string GVName = std::string(GV->getName()), Prefix = "dfs$"; GV->setName(Prefix + GVName); // Try to change the name of the function in module inline asm. We only do @@ -713,25 +742,8 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, return cast<Constant>(C.getCallee()); } -bool DataFlowSanitizer::runOnModule(Module &M) { - if (ABIList.isIn(M, "skip")) - return false; - - if (!GetArgTLSPtr) { - Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); - ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); - if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS)) - G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); - } - if (!GetRetvalTLSPtr) { - RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy); - if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS)) - G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); - } - - ExternalShadowMask = - Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy); - +// Initialize DataFlowSanitizer runtime functions and declare them in the module +void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { { AttributeList AL; AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, @@ -745,7 +757,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) { DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL); } - { AttributeList AL; AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, @@ -782,6 +793,50 @@ bool DataFlowSanitizer::runOnModule(Module &M) { Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper", DFSanVarargWrapperFnTy); +} + +// Initializes event callback functions and declare them in the module +void DataFlowSanitizer::initializeCallbackFunctions(Module &M) { + DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback", + DFSanLoadStoreCmpCallbackFnTy); + DFSanStoreCallbackFn = Mod->getOrInsertFunction( + "__dfsan_store_callback", DFSanLoadStoreCmpCallbackFnTy); + DFSanMemTransferCallbackFn = Mod->getOrInsertFunction( + "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy); + DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback", + DFSanLoadStoreCmpCallbackFnTy); +} + +bool DataFlowSanitizer::runOnModule(Module &M) { + if (ABIList.isIn(M, "skip")) + return false; + + const unsigned InitialGlobalSize = M.global_size(); + const unsigned InitialModuleSize = M.size(); + + bool Changed = false; + + if (!GetArgTLSPtr) { + Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); + ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); + if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS)) { + Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel; + G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); + } + } + if (!GetRetvalTLSPtr) { + RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy); + if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS)) { + Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel; + G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); + } + } + + ExternalShadowMask = + Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy); + + initializeCallbackFunctions(M); + initializeRuntimeFunctions(M); std::vector<Function *> FnsToInstrument; SmallPtrSet<Function *, 2> FnsWithNativeABI; @@ -793,7 +848,11 @@ bool DataFlowSanitizer::runOnModule(Module &M) { &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() && &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() && &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() && - &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts()) + &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts() && + &i != DFSanLoadCallbackFn.getCallee()->stripPointerCasts() && + &i != DFSanStoreCallbackFn.getCallee()->stripPointerCasts() && + &i != DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts() && + &i != DFSanCmpCallbackFn.getCallee()->stripPointerCasts()) FnsToInstrument.push_back(&i); } @@ -994,7 +1053,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) { } } - return false; + return Changed || !FnsToInstrument.empty() || + M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize; } Value *DFSanFunction::getArgTLSPtr() { @@ -1177,9 +1237,10 @@ Value *DFSanFunction::combineOperandShadows(Instruction *Inst) { return Shadow; } -void DFSanVisitor::visitOperandShadowInst(Instruction &I) { +Value *DFSanVisitor::visitOperandShadowInst(Instruction &I) { Value *CombinedShadow = DFSF.combineOperandShadows(&I); DFSF.setShadow(&I, CombinedShadow); + return CombinedShadow; } // Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where @@ -1194,7 +1255,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, } } - uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8; + const llvm::Align ShadowAlign(Align * DFS.ShadowWidthBytes); SmallVector<const Value *, 2> Objs; GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout()); bool AllConstants = true; @@ -1216,7 +1277,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, return DFS.ZeroShadow; case 1: { LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos); - LI->setAlignment(MaybeAlign(ShadowAlign)); + LI->setAlignment(ShadowAlign); return LI; } case 2: { @@ -1228,7 +1289,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr1, ShadowAlign), Pos); } } - if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidth) == 0) { + if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidthBits) == 0) { // Fast path for the common case where each byte has identical shadow: load // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any // shadow is non-equal. @@ -1240,15 +1301,15 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); // Compare each of the shadows stored in the loaded 64 bits to each other, - // by computing (WideShadow rotl ShadowWidth) == WideShadow. + // by computing (WideShadow rotl ShadowWidthBits) == WideShadow. IRBuilder<> IRB(Pos); Value *WideAddr = IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx)); Value *WideShadow = IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign); Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy); - Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth); - Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth); + Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidthBits); + Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidthBits); Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow); Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow); @@ -1271,8 +1332,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, ReplaceInstWithInst(Head->getTerminator(), LastBr); DT.addNewBlock(FallbackBB, Head); - for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size; - Ofs += 64 / DFS.ShadowWidth) { + for (uint64_t Ofs = 64 / DFS.ShadowWidthBits; Ofs != Size; + Ofs += 64 / DFS.ShadowWidthBits) { BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F); DT.addNewBlock(NextBB, LastBr->getParent()); IRBuilder<> NextIRB(NextBB); @@ -1308,16 +1369,9 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { return; } - uint64_t Align; - if (ClPreserveAlignment) { - Align = LI.getAlignment(); - if (Align == 0) - Align = DL.getABITypeAlignment(LI.getType()); - } else { - Align = 1; - } - IRBuilder<> IRB(&LI); - Value *Shadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI); + Align Alignment = ClPreserveAlignment ? LI.getAlign() : Align(1); + Value *Shadow = + DFSF.loadShadow(LI.getPointerOperand(), Size, Alignment.value(), &LI); if (ClCombinePointerLabelsOnLoad) { Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); Shadow = DFSF.combineShadows(Shadow, PtrShadow, &LI); @@ -1326,9 +1380,13 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { DFSF.NonZeroChecks.push_back(Shadow); DFSF.setShadow(&LI, Shadow); + if (ClEventCallbacks) { + IRBuilder<> IRB(&LI); + IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, Shadow); + } } -void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, +void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, Align Alignment, Value *Shadow, Instruction *Pos) { if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { const auto i = AllocaShadowMap.find(AI); @@ -1339,11 +1397,12 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, } } - uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8; + const Align ShadowAlign(Alignment.value() * DFS.ShadowWidthBytes); IRBuilder<> IRB(Pos); Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); if (Shadow == DFS.ZeroShadow) { - IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidth); + IntegerType *ShadowTy = + IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits); Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0); Value *ExtShadowAddr = IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy)); @@ -1351,10 +1410,10 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, return; } - const unsigned ShadowVecSize = 128 / DFS.ShadowWidth; + const unsigned ShadowVecSize = 128 / DFS.ShadowWidthBits; uint64_t Offset = 0; if (Size >= ShadowVecSize) { - VectorType *ShadowVecTy = VectorType::get(DFS.ShadowTy, ShadowVecSize); + auto *ShadowVecTy = FixedVectorType::get(DFS.ShadowTy, ShadowVecSize); Value *ShadowVec = UndefValue::get(ShadowVecTy); for (unsigned i = 0; i != ShadowVecSize; ++i) { ShadowVec = IRB.CreateInsertElement( @@ -1386,21 +1445,18 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) { if (Size == 0) return; - uint64_t Align; - if (ClPreserveAlignment) { - Align = SI.getAlignment(); - if (Align == 0) - Align = DL.getABITypeAlignment(SI.getValueOperand()->getType()); - } else { - Align = 1; - } + const Align Alignment = ClPreserveAlignment ? SI.getAlign() : Align(1); Value* Shadow = DFSF.getShadow(SI.getValueOperand()); if (ClCombinePointerLabelsOnStore) { Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand()); Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI); } - DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI); + DFSF.storeShadow(SI.getPointerOperand(), Size, Alignment, Shadow, &SI); + if (ClEventCallbacks) { + IRBuilder<> IRB(&SI); + IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, Shadow); + } } void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) { @@ -1413,7 +1469,13 @@ void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) { void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); } -void DFSanVisitor::visitCmpInst(CmpInst &CI) { visitOperandShadowInst(CI); } +void DFSanVisitor::visitCmpInst(CmpInst &CI) { + Value *CombinedShadow = visitOperandShadowInst(CI); + if (ClEventCallbacks) { + IRBuilder<> IRB(&CI); + IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow); + } +} void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { visitOperandShadowInst(GEPI); @@ -1493,23 +1555,27 @@ void DFSanVisitor::visitMemSetInst(MemSetInst &I) { void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { IRBuilder<> IRB(&I); - Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I); + Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I); Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I); - Value *LenShadow = IRB.CreateMul( - I.getLength(), - ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8)); + Value *LenShadow = + IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(), + DFSF.DFS.ShadowWidthBytes)); Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx); - DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr); + Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr); SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr); auto *MTI = cast<MemTransferInst>( - IRB.CreateCall(I.getFunctionType(), I.getCalledValue(), + IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(), {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()})); if (ClPreserveAlignment) { - MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8)); - MTI->setSourceAlignment(I.getSourceAlignment() * (DFSF.DFS.ShadowWidth / 8)); + MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes); + MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes); } else { - MTI->setDestAlignment(DFSF.DFS.ShadowWidth / 8); - MTI->setSourceAlignment(DFSF.DFS.ShadowWidth / 8); + MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes)); + MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes)); + } + if (ClEventCallbacks) { + IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn, + {RawDestShadow, I.getLength()}); } } @@ -1536,10 +1602,10 @@ void DFSanVisitor::visitReturnInst(ReturnInst &RI) { } } -void DFSanVisitor::visitCallSite(CallSite CS) { - Function *F = CS.getCalledFunction(); - if ((F && F->isIntrinsic()) || isa<InlineAsm>(CS.getCalledValue())) { - visitOperandShadowInst(*CS.getInstruction()); +void DFSanVisitor::visitCallBase(CallBase &CB) { + Function *F = CB.getCalledFunction(); + if ((F && F->isIntrinsic()) || CB.isInlineAsm()) { + visitOperandShadowInst(CB); return; } @@ -1548,32 +1614,32 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts()) return; - IRBuilder<> IRB(CS.getInstruction()); + IRBuilder<> IRB(&CB); DenseMap<Value *, Function *>::iterator i = - DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue()); + DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand()); if (i != DFSF.DFS.UnwrappedFnMap.end()) { Function *F = i->second; switch (DFSF.DFS.getWrapperKind(F)) { case DataFlowSanitizer::WK_Warning: - CS.setCalledFunction(F); + CB.setCalledFunction(F); IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn, IRB.CreateGlobalStringPtr(F->getName())); - DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); + DFSF.setShadow(&CB, DFSF.DFS.ZeroShadow); return; case DataFlowSanitizer::WK_Discard: - CS.setCalledFunction(F); - DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); + CB.setCalledFunction(F); + DFSF.setShadow(&CB, DFSF.DFS.ZeroShadow); return; case DataFlowSanitizer::WK_Functional: - CS.setCalledFunction(F); - visitOperandShadowInst(*CS.getInstruction()); + CB.setCalledFunction(F); + visitOperandShadowInst(CB); return; case DataFlowSanitizer::WK_Custom: // Don't try to handle invokes of custom functions, it's too complicated. // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_ // wrapper. - if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { + if (CallInst *CI = dyn_cast<CallInst>(&CB)) { FunctionType *FT = F->getFunctionType(); TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT); std::string CustomFName = "__dfsw_"; @@ -1592,7 +1658,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { std::vector<Value *> Args; - CallSite::arg_iterator i = CS.arg_begin(); + auto i = CB.arg_begin(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) { Type *T = (*i)->getType(); FunctionType *ParamFT; @@ -1612,19 +1678,19 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } } - i = CS.arg_begin(); + i = CB.arg_begin(); const unsigned ShadowArgStart = Args.size(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) Args.push_back(DFSF.getShadow(*i)); if (FT->isVarArg()) { auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy, - CS.arg_size() - FT->getNumParams()); + CB.arg_size() - FT->getNumParams()); auto *LabelVAAlloca = new AllocaInst( LabelVATy, getDataLayout().getAllocaAddrSpace(), "labelva", &DFSF.F->getEntryBlock().front()); - for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) { + for (unsigned n = 0; i != CB.arg_end(); ++i, ++n) { auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n); IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr); } @@ -1642,7 +1708,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { Args.push_back(DFSF.LabelReturnAlloca); } - for (i = CS.arg_begin() + FT->getNumParams(); i != CS.arg_end(); ++i) + for (i = CB.arg_begin() + FT->getNumParams(); i != CB.arg_end(); ++i) Args.push_back(*i); CallInst *CustomCI = IRB.CreateCall(CustomF, Args); @@ -1673,18 +1739,17 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } } - FunctionType *FT = cast<FunctionType>( - CS.getCalledValue()->getType()->getPointerElementType()); + FunctionType *FT = CB.getFunctionType(); if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) { - IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)), - DFSF.getArgTLS(i, CS.getInstruction())); + IRB.CreateStore(DFSF.getShadow(CB.getArgOperand(i)), + DFSF.getArgTLS(i, &CB)); } } Instruction *Next = nullptr; - if (!CS.getType()->isVoidTy()) { - if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { + if (!CB.getType()->isVoidTy()) { + if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { if (II->getNormalDest()->getSinglePredecessor()) { Next = &II->getNormalDest()->front(); } else { @@ -1693,15 +1758,15 @@ void DFSanVisitor::visitCallSite(CallSite CS) { Next = &NewBB->front(); } } else { - assert(CS->getIterator() != CS->getParent()->end()); - Next = CS->getNextNode(); + assert(CB.getIterator() != CB.getParent()->end()); + Next = CB.getNextNode(); } if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { IRBuilder<> NextIRB(Next); LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.getRetvalTLS()); DFSF.SkipInsts.insert(LI); - DFSF.setShadow(CS.getInstruction(), LI); + DFSF.setShadow(&CB, LI); DFSF.NonZeroChecks.push_back(LI); } } @@ -1711,25 +1776,25 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) { FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT); Value *Func = - IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT)); + IRB.CreateBitCast(CB.getCalledOperand(), PointerType::getUnqual(NewFT)); std::vector<Value *> Args; - CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + auto i = CB.arg_begin(), E = CB.arg_end(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) Args.push_back(*i); - i = CS.arg_begin(); + i = CB.arg_begin(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) Args.push_back(DFSF.getShadow(*i)); if (FT->isVarArg()) { - unsigned VarArgSize = CS.arg_size() - FT->getNumParams(); + unsigned VarArgSize = CB.arg_size() - FT->getNumParams(); ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize); AllocaInst *VarArgShadow = new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(), "", &DFSF.F->getEntryBlock().front()); Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0)); - for (unsigned n = 0; i != e; ++i, ++n) { + for (unsigned n = 0; i != E; ++i, ++n) { IRB.CreateStore( DFSF.getShadow(*i), IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n)); @@ -1737,32 +1802,30 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } } - CallSite NewCS; - if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { - NewCS = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(), + CallBase *NewCB; + if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { + NewCB = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(), II->getUnwindDest(), Args); } else { - NewCS = IRB.CreateCall(NewFT, Func, Args); + NewCB = IRB.CreateCall(NewFT, Func, Args); } - NewCS.setCallingConv(CS.getCallingConv()); - NewCS.setAttributes(CS.getAttributes().removeAttributes( + NewCB->setCallingConv(CB.getCallingConv()); + NewCB->setAttributes(CB.getAttributes().removeAttributes( *DFSF.DFS.Ctx, AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType()))); + AttributeFuncs::typeIncompatible(NewCB->getType()))); if (Next) { - ExtractValueInst *ExVal = - ExtractValueInst::Create(NewCS.getInstruction(), 0, "", Next); + ExtractValueInst *ExVal = ExtractValueInst::Create(NewCB, 0, "", Next); DFSF.SkipInsts.insert(ExVal); - ExtractValueInst *ExShadow = - ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next); + ExtractValueInst *ExShadow = ExtractValueInst::Create(NewCB, 1, "", Next); DFSF.SkipInsts.insert(ExShadow); DFSF.setShadow(ExVal, ExShadow); DFSF.NonZeroChecks.push_back(ExShadow); - CS.getInstruction()->replaceAllUsesWith(ExVal); + CB.replaceAllUsesWith(ExVal); } - CS.getInstruction()->eraseFromParent(); + CB.eraseFromParent(); } } diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index bf3e4ed3e31f2..d8a965a90127b 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -45,24 +45,34 @@ #include <memory> #include <string> #include <utility> + using namespace llvm; +namespace endian = llvm::support::endian; #define DEBUG_TYPE "insert-gcov-profiling" -static cl::opt<std::string> -DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, - cl::ValueRequired); -static cl::opt<bool> DefaultExitBlockBeforeBody("gcov-exit-block-before-body", - cl::init(false), cl::Hidden); +enum : uint32_t { + GCOV_TAG_FUNCTION = 0x01000000, + GCOV_TAG_BLOCKS = 0x01410000, + GCOV_TAG_ARCS = 0x01430000, + GCOV_TAG_LINES = 0x01450000, +}; + +static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version", + cl::init("408*"), cl::Hidden, + cl::ValueRequired); + +// Returns the number of words which will be used to represent this string. +static unsigned wordsOfString(StringRef s) { + // Length + NUL-terminated string + 0~3 padding NULs. + return (s.size() / 4) + 2; +} GCOVOptions GCOVOptions::getDefault() { GCOVOptions Options; Options.EmitNotes = true; Options.EmitData = true; - Options.UseCfgChecksum = false; Options.NoRedZone = false; - Options.FunctionNamesInData = true; - Options.ExitBlockBeforeBody = DefaultExitBlockBeforeBody; if (DefaultGCOVVersion.size() != 4) { llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + @@ -78,19 +88,23 @@ class GCOVFunction; class GCOVProfiler { public: GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} - GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) { - assert((Options.EmitNotes || Options.EmitData) && - "GCOVProfiler asked to do nothing?"); - ReversedVersion[0] = Options.Version[3]; - ReversedVersion[1] = Options.Version[2]; - ReversedVersion[2] = Options.Version[1]; - ReversedVersion[3] = Options.Version[0]; - ReversedVersion[4] = '\0'; - } + GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {} bool runOnModule(Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI); + void write(uint32_t i) { + char Bytes[4]; + endian::write32(Bytes, i, Endian); + os->write(Bytes, 4); + } + void writeString(StringRef s) { + write(wordsOfString(s) - 1); + os->write(s.data(), s.size()); + os->write_zeros(4 - s.size() % 4); + } + void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); } + private: // Create the .gcno files for the Module based on DebugInfo. void emitProfileNotes(); @@ -115,17 +129,18 @@ private: // list. Function * insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); - Function *insertFlush(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); + Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); + Function *insertFlush(Function *ResetF); - void AddFlushBeforeForkAndExec(); + bool AddFlushBeforeForkAndExec(); enum class GCovFileType { GCNO, GCDA }; std::string mangleName(const DICompileUnit *CU, GCovFileType FileType); GCOVOptions Options; + support::endianness Endian; + raw_ostream *os; - // Reversed, NUL-terminated copy of Options.Version. - char ReversedVersion[5]; // Checksum, produced by hash of EdgeDestinations SmallVector<uint32_t, 4> FileChecksums; @@ -200,48 +215,15 @@ static SmallString<128> getFilename(const DISubprogram *SP) { namespace { class GCOVRecord { - protected: - static const char *const LinesTag; - static const char *const FunctionTag; - static const char *const BlockTag; - static const char *const EdgeTag; - - GCOVRecord() = default; - - void writeBytes(const char *Bytes, int Size) { - os->write(Bytes, Size); - } - - void write(uint32_t i) { - writeBytes(reinterpret_cast<char*>(&i), 4); - } - - // Returns the length measured in 4-byte blocks that will be used to - // represent this string in a GCOV file - static unsigned lengthOfGCOVString(StringRef s) { - // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs - // padding out to the next 4-byte word. The length is measured in 4-byte - // words including padding, not bytes of actual string. - return (s.size() / 4) + 1; - } - - void writeGCOVString(StringRef s) { - uint32_t Len = lengthOfGCOVString(s); - write(Len); - writeBytes(s.data(), s.size()); + protected: + GCOVProfiler *P; - // Write 1 to 4 bytes of NUL padding. - assert((unsigned)(4 - (s.size() % 4)) > 0); - assert((unsigned)(4 - (s.size() % 4)) <= 4); - writeBytes("\0\0\0\0", 4 - (s.size() % 4)); - } + GCOVRecord(GCOVProfiler *P) : P(P) {} - raw_ostream *os; + void write(uint32_t i) { P->write(i); } + void writeString(StringRef s) { P->writeString(s); } + void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); } }; - const char *const GCOVRecord::LinesTag = "\0\0\x45\x01"; - const char *const GCOVRecord::FunctionTag = "\0\0\0\1"; - const char *const GCOVRecord::BlockTag = "\0\0\x41\x01"; - const char *const GCOVRecord::EdgeTag = "\0\0\x43\x01"; class GCOVFunction; class GCOVBlock; @@ -257,23 +239,20 @@ namespace { } uint32_t length() const { - // Here 2 = 1 for string length + 1 for '0' id#. - return lengthOfGCOVString(Filename) + 2 + Lines.size(); + return 1 + wordsOfString(Filename) + Lines.size(); } void writeOut() { write(0); - writeGCOVString(Filename); + writeString(Filename); for (int i = 0, e = Lines.size(); i != e; ++i) write(Lines[i]); } - GCOVLines(StringRef F, raw_ostream *os) - : Filename(F) { - this->os = os; - } + GCOVLines(GCOVProfiler *P, StringRef F) + : GCOVRecord(P), Filename(std::string(F)) {} - private: + private: std::string Filename; SmallVector<uint32_t, 32> Lines; }; @@ -285,7 +264,7 @@ namespace { class GCOVBlock : public GCOVRecord { public: GCOVLines &getFile(StringRef Filename) { - return LinesByFile.try_emplace(Filename, Filename, os).first->second; + return LinesByFile.try_emplace(Filename, P, Filename).first->second; } void addEdge(GCOVBlock &Successor) { @@ -300,7 +279,7 @@ namespace { SortedLinesByFile.push_back(&I); } - writeBytes(LinesTag, 4); + write(GCOV_TAG_LINES); write(Len); write(Number); @@ -325,10 +304,8 @@ namespace { private: friend class GCOVFunction; - GCOVBlock(uint32_t Number, raw_ostream *os) - : Number(Number) { - this->os = os; - } + GCOVBlock(GCOVProfiler *P, uint32_t Number) + : GCOVRecord(P), Number(Number) {} uint32_t Number; StringMap<GCOVLines> LinesByFile; @@ -339,21 +316,19 @@ namespace { // set of blocks and a map of edges between blocks. This is the only GCOV // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { - public: - GCOVFunction(const DISubprogram *SP, Function *F, raw_ostream *os, - uint32_t Ident, bool UseCfgChecksum, bool ExitBlockBeforeBody) - : SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0), - ReturnBlock(1, os) { - this->os = os; - + public: + GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP, + unsigned EndLine, uint32_t Ident, int Version) + : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident), + Version(Version), ReturnBlock(P, 1) { LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); - + bool ExitBlockBeforeBody = Version >= 48; uint32_t i = 0; for (auto &BB : *F) { // Skip index 1 if it's assigned to the ReturnBlock. if (i == 1 && ExitBlockBeforeBody) ++i; - Blocks.insert(std::make_pair(&BB, GCOVBlock(i++, os))); + Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++))); } if (!ExitBlockBeforeBody) ReturnBlock.Number = i; @@ -389,42 +364,56 @@ namespace { return FuncChecksum; } - void setCfgChecksum(uint32_t Checksum) { - CfgChecksum = Checksum; - } - - void writeOut() { - writeBytes(FunctionTag, 4); + void writeOut(uint32_t CfgChecksum) { + write(GCOV_TAG_FUNCTION); SmallString<128> Filename = getFilename(SP); - uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + - 1 + lengthOfGCOVString(Filename) + 1; - if (UseCfgChecksum) - ++BlockLen; + uint32_t BlockLen = + 2 + (Version >= 47) + wordsOfString(getFunctionName(SP)); + if (Version < 80) + BlockLen += wordsOfString(Filename) + 1; + else + BlockLen += 1 + wordsOfString(Filename) + 3 + (Version >= 90); + write(BlockLen); write(Ident); write(FuncChecksum); - if (UseCfgChecksum) + if (Version >= 47) write(CfgChecksum); - writeGCOVString(getFunctionName(SP)); - writeGCOVString(Filename); - write(SP->getLine()); + writeString(getFunctionName(SP)); + if (Version < 80) { + writeString(Filename); + write(SP->getLine()); + } else { + write(SP->isArtificial()); // artificial + writeString(Filename); + write(SP->getLine()); // start_line + write(0); // start_column + // EndLine is the last line with !dbg. It is not the } line as in GCC, + // but good enough. + write(EndLine); + if (Version >= 90) + write(0); // end_column + } // Emit count of blocks. - writeBytes(BlockTag, 4); - write(Blocks.size() + 1); - for (int i = 0, e = Blocks.size() + 1; i != e; ++i) { - write(0); // No flags on our blocks. + write(GCOV_TAG_BLOCKS); + if (Version < 80) { + write(Blocks.size() + 1); + for (int i = Blocks.size() + 1; i; --i) + write(0); + } else { + write(1); + write(Blocks.size() + 1); } - LLVM_DEBUG(dbgs() << Blocks.size() << " blocks.\n"); + LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n"); // Emit edges between blocks. - if (Blocks.empty()) return; Function *F = Blocks.begin()->first->getParent(); for (BasicBlock &I : *F) { GCOVBlock &Block = getBlock(&I); if (Block.OutEdges.empty()) continue; - writeBytes(EdgeTag, 4); + write(GCOV_TAG_ARCS); write(Block.OutEdges.size() * 2 + 1); write(Block.Number); for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) { @@ -440,12 +429,12 @@ namespace { getBlock(&I).writeOut(); } - private: - const DISubprogram *SP; + private: + const DISubprogram *SP; + unsigned EndLine; uint32_t Ident; uint32_t FuncChecksum; - bool UseCfgChecksum; - uint32_t CfgChecksum; + int Version; DenseMap<BasicBlock *, GCOVBlock> Blocks; GCOVBlock ReturnBlock; }; @@ -473,11 +462,9 @@ std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) { bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename, std::vector<Regex> &Regexes) { - for (Regex &Re : Regexes) { - if (Re.match(Filename)) { + for (Regex &Re : Regexes) + if (Re.match(Filename)) return true; - } - } return false; } @@ -537,7 +524,8 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, MDString *DataFile = dyn_cast<MDString>(N->getOperand(1)); if (!NotesFile || !DataFile) continue; - return Notes ? NotesFile->getString() : DataFile->getString(); + return std::string(Notes ? NotesFile->getString() + : DataFile->getString()); } MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0)); @@ -546,7 +534,7 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, SmallString<128> Filename = GCovFile->getString(); sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda"); - return Filename.str(); + return std::string(Filename.str()); } } @@ -554,9 +542,10 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda"); StringRef FName = sys::path::filename(Filename); SmallString<128> CurPath; - if (sys::fs::current_path(CurPath)) return FName; + if (sys::fs::current_path(CurPath)) + return std::string(FName); sys::path::append(CurPath, FName); - return CurPath.str(); + return std::string(CurPath.str()); } bool GCOVProfiler::runOnModule( @@ -565,14 +554,15 @@ bool GCOVProfiler::runOnModule( this->GetTLI = std::move(GetTLI); Ctx = &M.getContext(); - AddFlushBeforeForkAndExec(); + bool Modified = AddFlushBeforeForkAndExec(); FilterRe = createRegexesFromString(Options.Filter); ExcludeRe = createRegexesFromString(Options.Exclude); if (Options.EmitNotes) emitProfileNotes(); - if (Options.EmitData) return emitProfileArcs(); - return false; + if (Options.EmitData) + Modified |= emitProfileArcs(); + return Modified; } PreservedAnalyses GCOVProfilerPass::run(Module &M, @@ -590,9 +580,10 @@ PreservedAnalyses GCOVProfilerPass::run(Module &M, return PreservedAnalyses::none(); } -static bool functionHasLines(Function &F) { +static bool functionHasLines(const Function &F, unsigned &EndLine) { // Check whether this function actually has any source lines. Not only // do these waste space, they also can crash gcov. + EndLine = 0; for (auto &BB : F) { for (auto &I : BB) { // Debug intrinsic locations correspond to the location of the @@ -605,6 +596,7 @@ static bool functionHasLines(Function &F) { // Artificial lines such as calls to the global constructors. if (Loc.getLine() == 0) continue; + EndLine = std::max(EndLine, Loc.getLine()); return true; } @@ -629,43 +621,95 @@ static bool shouldKeepInEntry(BasicBlock::iterator It) { return false; } -void GCOVProfiler::AddFlushBeforeForkAndExec() { - SmallVector<Instruction *, 2> ForkAndExecs; +bool GCOVProfiler::AddFlushBeforeForkAndExec() { + SmallVector<CallInst *, 2> Forks; + SmallVector<CallInst *, 2> Execs; for (auto &F : M->functions()) { auto *TLI = &GetTLI(F); for (auto &I : instructions(F)) { if (CallInst *CI = dyn_cast<CallInst>(&I)) { if (Function *Callee = CI->getCalledFunction()) { LibFunc LF; - if (TLI->getLibFunc(*Callee, LF) && - (LF == LibFunc_fork || LF == LibFunc_execl || - LF == LibFunc_execle || LF == LibFunc_execlp || - LF == LibFunc_execv || LF == LibFunc_execvp || - LF == LibFunc_execve || LF == LibFunc_execvpe || - LF == LibFunc_execvP)) { - ForkAndExecs.push_back(&I); + if (TLI->getLibFunc(*Callee, LF)) { + if (LF == LibFunc_fork) { +#if !defined(_WIN32) + Forks.push_back(CI); +#endif + } else if (LF == LibFunc_execl || LF == LibFunc_execle || + LF == LibFunc_execlp || LF == LibFunc_execv || + LF == LibFunc_execvp || LF == LibFunc_execve || + LF == LibFunc_execvpe || LF == LibFunc_execvP) { + Execs.push_back(CI); + } } } } } } - // We need to split the block after the fork/exec call - // because else the counters for the lines after will be - // the same as before the call. - for (auto I : ForkAndExecs) { - IRBuilder<> Builder(I); + for (auto F : Forks) { + IRBuilder<> Builder(F); + BasicBlock *Parent = F->getParent(); + auto NextInst = ++F->getIterator(); + + // We've a fork so just reset the counters in the child process + FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false); + FunctionCallee GCOVFork = M->getOrInsertFunction("__gcov_fork", FTy); + F->setCalledFunction(GCOVFork); + + // We split just after the fork to have a counter for the lines after + // Anyway there's a bug: + // void foo() { fork(); } + // void bar() { foo(); blah(); } + // then "blah();" will be called 2 times but showed as 1 + // because "blah()" belongs to the same block as "foo();" + Parent->splitBasicBlock(NextInst); + + // back() is a br instruction with a debug location + // equals to the one from NextAfterFork + // So to avoid to have two debug locs on two blocks just change it + DebugLoc Loc = F->getDebugLoc(); + Parent->back().setDebugLoc(Loc); + } + + for (auto E : Execs) { + IRBuilder<> Builder(E); + BasicBlock *Parent = E->getParent(); + auto NextInst = ++E->getIterator(); + + // Since the process is replaced by a new one we need to write out gcdas + // No need to reset the counters since they'll be lost after the exec** FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false); - FunctionCallee GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy); - Builder.CreateCall(GCOVFlush); - I->getParent()->splitBasicBlock(I); + FunctionCallee WriteoutF = + M->getOrInsertFunction("llvm_writeout_files", FTy); + Builder.CreateCall(WriteoutF); + + DebugLoc Loc = E->getDebugLoc(); + Builder.SetInsertPoint(&*NextInst); + // If the exec** fails we must reset the counters since they've been + // dumped + FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy); + Builder.CreateCall(ResetF)->setDebugLoc(Loc); + Parent->splitBasicBlock(NextInst); + Parent->back().setDebugLoc(Loc); } + + return !Forks.empty() || !Execs.empty(); } void GCOVProfiler::emitProfileNotes() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; + int Version; + { + uint8_t c3 = Options.Version[0]; + uint8_t c2 = Options.Version[1]; + uint8_t c1 = Options.Version[2]; + Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0' + : (c3 - '0') * 10 + c1 - '0'; + } + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { // Each compile unit gets its own .gcno file. This means that whether we run // this pass over the original .o's as they're produced, or run it after @@ -688,11 +732,14 @@ void GCOVProfiler::emitProfileNotes() { std::string EdgeDestinations; + Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little + : support::endianness::big; unsigned FunctionIdent = 0; for (auto &F : M->functions()) { DISubprogram *SP = F.getSubprogram(); + unsigned EndLine; if (!SP) continue; - if (!functionHasLines(F) || !isFunctionInstrumented(F)) + if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F)) continue; // TODO: Functions using scope-based EH are currently not supported. if (isUsingScopeBasedEH(F)) continue; @@ -705,9 +752,8 @@ void GCOVProfiler::emitProfileNotes() { ++It; EntryBlock.splitBasicBlock(It); - Funcs.push_back(std::make_unique<GCOVFunction>(SP, &F, &out, FunctionIdent++, - Options.UseCfgChecksum, - Options.ExitBlockBeforeBody)); + Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine, + FunctionIdent++, Version)); GCOVFunction &Func = *Funcs.back(); // Add the function line number to the lines of the entry block @@ -756,17 +802,29 @@ void GCOVProfiler::emitProfileNotes() { EdgeDestinations += Func.getEdgeDestinations(); } - FileChecksums.push_back(hash_value(EdgeDestinations)); - out.write("oncg", 4); - out.write(ReversedVersion, 4); - out.write(reinterpret_cast<char*>(&FileChecksums.back()), 4); - - for (auto &Func : Funcs) { - Func->setCfgChecksum(FileChecksums.back()); - Func->writeOut(); + char Tmp[4]; + os = &out; + auto Stamp = static_cast<uint32_t>(hash_value(EdgeDestinations)); + FileChecksums.push_back(Stamp); + if (Endian == support::endianness::big) { + out.write("gcno", 4); + out.write(Options.Version, 4); + } else { + out.write("oncg", 4); + std::reverse_copy(Options.Version, Options.Version + 4, Tmp); + out.write(Tmp, 4); } + write(Stamp); + if (Version >= 90) + writeString(""); // unuseful current_working_directory + if (Version >= 80) + write(0); // unuseful has_unexecuted_blocks - out.write("\0\0\0\0\0\0\0\0", 8); // EOF + for (auto &Func : Funcs) + Func->writeOut(Stamp); + + write(0); + write(0); out.close(); } } @@ -780,12 +838,12 @@ bool GCOVProfiler::emitProfileArcs() { SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; for (auto &F : M->functions()) { DISubprogram *SP = F.getSubprogram(); + unsigned EndLine; if (!SP) continue; - if (!functionHasLines(F) || !isFunctionInstrumented(F)) + if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F)) continue; // TODO: Functions using scope-based EH are currently not supported. if (isUsingScopeBasedEH(F)) continue; - if (!Result) Result = true; DenseMap<std::pair<BasicBlock *, BasicBlock *>, unsigned> EdgeToCounter; unsigned Edges = 0; @@ -850,7 +908,8 @@ bool GCOVProfiler::emitProfileArcs() { } Function *WriteoutF = insertCounterWriteout(CountersBySP); - Function *FlushF = insertFlush(CountersBySP); + Function *ResetF = insertReset(CountersBySP); + Function *FlushF = insertFlush(ResetF); // Create a small bit of code that registers the "__llvm_gcov_writeout" to // be executed at exit and the "__llvm_gcov_flush" function to be executed @@ -868,19 +927,18 @@ bool GCOVProfiler::emitProfileArcs() { IRBuilder<> Builder(BB); FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Type *Params[] = { - PointerType::get(FTy, 0), - PointerType::get(FTy, 0) - }; + Type *Params[] = {PointerType::get(FTy, 0), PointerType::get(FTy, 0), + PointerType::get(FTy, 0)}; FTy = FunctionType::get(Builder.getVoidTy(), Params, false); - // Initialize the environment and register the local writeout and flush - // functions. + // Initialize the environment and register the local writeout, flush and + // reset functions. FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy); - Builder.CreateCall(GCOVInit, {WriteoutF, FlushF}); + Builder.CreateCall(GCOVInit, {WriteoutF, FlushF, ResetF}); Builder.CreateRetVoid(); appendToGlobalCtors(*M, F, 0); + Result = true; } return Result; @@ -888,9 +946,9 @@ bool GCOVProfiler::emitProfileArcs() { FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) { Type *Args[] = { - Type::getInt8PtrTy(*Ctx), // const char *orig_filename - Type::getInt8PtrTy(*Ctx), // const char version[4] - Type::getInt32Ty(*Ctx), // uint32_t checksum + Type::getInt8PtrTy(*Ctx), // const char *orig_filename + Type::getInt32Ty(*Ctx), // uint32_t version + Type::getInt32Ty(*Ctx), // uint32_t checksum }; FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); AttributeList AL; @@ -903,18 +961,15 @@ FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) { FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) { Type *Args[] = { Type::getInt32Ty(*Ctx), // uint32_t ident - Type::getInt8PtrTy(*Ctx), // const char *function_name Type::getInt32Ty(*Ctx), // uint32_t func_checksum - Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum Type::getInt32Ty(*Ctx), // uint32_t cfg_checksum }; FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); AttributeList AL; if (auto AK = TLI->getExtAttrForI32Param(false)) { AL = AL.addParamAttribute(*Ctx, 0, AK); + AL = AL.addParamAttribute(*Ctx, 1, AK); AL = AL.addParamAttribute(*Ctx, 2, AK); - AL = AL.addParamAttribute(*Ctx, 3, AK); - AL = AL.addParamAttribute(*Ctx, 4, AK); } return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); } @@ -973,10 +1028,9 @@ Function *GCOVProfiler::insertCounterWriteout( // Collect the relevant data into a large constant data structure that we can // walk to write out everything. StructType *StartFileCallArgsTy = StructType::create( - {Builder.getInt8PtrTy(), Builder.getInt8PtrTy(), Builder.getInt32Ty()}); + {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()}); StructType *EmitFunctionCallArgsTy = StructType::create( - {Builder.getInt32Ty(), Builder.getInt8PtrTy(), Builder.getInt32Ty(), - Builder.getInt8Ty(), Builder.getInt32Ty()}); + {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()}); StructType *EmitArcsCallArgsTy = StructType::create( {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()}); StructType *FileInfoTy = @@ -999,23 +1053,19 @@ Function *GCOVProfiler::insertCounterWriteout( std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA); uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i]; auto *StartFileCallArgs = ConstantStruct::get( - StartFileCallArgsTy, {Builder.CreateGlobalStringPtr(FilenameGcda), - Builder.CreateGlobalStringPtr(ReversedVersion), - Builder.getInt32(CfgChecksum)}); + StartFileCallArgsTy, + {Builder.CreateGlobalStringPtr(FilenameGcda), + Builder.getInt32(endian::read32be(Options.Version)), + Builder.getInt32(CfgChecksum)}); SmallVector<Constant *, 8> EmitFunctionCallArgsArray; SmallVector<Constant *, 8> EmitArcsCallArgsArray; for (int j : llvm::seq<int>(0, CountersBySP.size())) { - auto *SP = cast_or_null<DISubprogram>(CountersBySP[j].second); uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum(); EmitFunctionCallArgsArray.push_back(ConstantStruct::get( EmitFunctionCallArgsTy, {Builder.getInt32(j), - Options.FunctionNamesInData - ? Builder.CreateGlobalStringPtr(getFunctionName(SP)) - : Constant::getNullValue(Builder.getInt8PtrTy()), Builder.getInt32(FuncChecksum), - Builder.getInt8(Options.UseCfgChecksum), Builder.getInt32(CfgChecksum)})); GlobalVariable *GV = CountersBySP[j].first; @@ -1144,19 +1194,12 @@ Function *GCOVProfiler::insertCounterWriteout( EmitFunctionCallArgsPtr, 1)), Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2), Builder.CreateStructGEP(EmitFunctionCallArgsTy, - EmitFunctionCallArgsPtr, 2)), - Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(3), - Builder.CreateStructGEP(EmitFunctionCallArgsTy, - EmitFunctionCallArgsPtr, 3)), - Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(4), - Builder.CreateStructGEP(EmitFunctionCallArgsTy, EmitFunctionCallArgsPtr, - 4))}); + 2))}); if (auto AK = TLI->getExtAttrForI32Param(false)) { EmitFunctionCall->addParamAttr(0, AK); + EmitFunctionCall->addParamAttr(1, AK); EmitFunctionCall->addParamAttr(2, AK); - EmitFunctionCall->addParamAttr(3, AK); - EmitFunctionCall->addParamAttr(4, AK); } auto *EmitArcsCallArgsPtr = Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV); @@ -1190,15 +1233,46 @@ Function *GCOVProfiler::insertCounterWriteout( return WriteoutF; } -Function *GCOVProfiler:: -insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { +Function *GCOVProfiler::insertReset( + ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *ResetF = M->getFunction("__llvm_gcov_reset"); + if (!ResetF) + ResetF = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_reset", M); + ResetF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + ResetF->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + ResetF->addFnAttr(Attribute::NoRedZone); + + BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF); + IRBuilder<> Builder(Entry); + + // Zero out the counters. + for (const auto &I : CountersBySP) { + GlobalVariable *GV = I.first; + Constant *Null = Constant::getNullValue(GV->getValueType()); + Builder.CreateStore(Null, GV); + } + + Type *RetTy = ResetF->getReturnType(); + if (RetTy->isVoidTy()) + Builder.CreateRetVoid(); + else if (RetTy->isIntegerTy()) + // Used if __llvm_gcov_reset was implicitly declared. + Builder.CreateRet(ConstantInt::get(RetTy, 0)); + else + report_fatal_error("invalid return type for __llvm_gcov_reset"); + + return ResetF; +} + +Function *GCOVProfiler::insertFlush(Function *ResetF) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *FlushF = M->getFunction("__llvm_gcov_flush"); if (!FlushF) FlushF = Function::Create(FTy, GlobalValue::InternalLinkage, "__llvm_gcov_flush", M); - else - FlushF->setLinkage(GlobalValue::InternalLinkage); FlushF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); FlushF->addFnAttr(Attribute::NoInline); if (Options.NoRedZone) @@ -1212,16 +1286,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { IRBuilder<> Builder(Entry); Builder.CreateCall(WriteoutF, {}); - - // Zero out the counters. - for (const auto &I : CountersBySP) { - GlobalVariable *GV = I.first; - Constant *Null = Constant::getNullValue(GV->getValueType()); - Builder.CreateStore(Null, GV); - } + Builder.CreateCall(ResetF, {}); Type *RetTy = FlushF->getReturnType(); - if (RetTy == Type::getVoidTy(*Ctx)) + if (RetTy->isVoidTy()) Builder.CreateRetVoid(); else if (RetTy->isIntegerTy()) // Used if __llvm_gcov_flush was implicitly declared. diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 7e8f8e27a97bc..2e71d613714a5 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -96,6 +97,10 @@ static cl::opt<bool> ClInstrumentAtomics( cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval", + cl::desc("instrument byval arguments"), + cl::Hidden, cl::init(true)); + static cl::opt<bool> ClRecover( "hwasan-recover", cl::desc("Enable recovery mode (continue-after-error)."), @@ -119,7 +124,7 @@ static cl::opt<bool> ClGenerateTagsWithCalls( cl::init(false)); static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(false), cl::ZeroOrMore); static cl::opt<int> ClMatchAllTag( "hwasan-match-all-tag", @@ -211,10 +216,10 @@ public: unsigned AccessSizeIndex, Instruction *InsertBefore); void instrumentMemIntrinsic(MemIntrinsic *MI); - bool instrumentMemAccess(Instruction *I); - Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, - uint64_t *TypeSize, unsigned *Alignment, - Value **MaybeMask); + bool instrumentMemAccess(InterestingMemoryOperand &O); + bool ignoreAccess(Value *Ptr); + void getInterestingMemoryOperands( + Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting); bool isInterestingAlloca(const AllocaInst &AI); bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size); @@ -300,7 +305,10 @@ public: explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false, bool Recover = false) - : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover) {} + : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover) { + initializeHWAddressSanitizerLegacyPassPass( + *PassRegistry::getPassRegistry()); + } StringRef getPassName() const override { return "HWAddressSanitizer"; } @@ -500,62 +508,62 @@ Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) { } } -Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I, - bool *IsWrite, - uint64_t *TypeSize, - unsigned *Alignment, - Value **MaybeMask) { +bool HWAddressSanitizer::ignoreAccess(Value *Ptr) { + // Do not instrument acesses from different address spaces; we cannot deal + // with them. + Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0) + return true; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (Ptr->isSwiftError()) + return true; + + return false; +} + +void HWAddressSanitizer::getInterestingMemoryOperands( + Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) { // Skip memory accesses inserted by another instrumentation. - if (I->hasMetadata("nosanitize")) return nullptr; + if (I->hasMetadata("nosanitize")) + return; // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) - return nullptr; + return; - Value *PtrOperand = nullptr; - const DataLayout &DL = I->getModule()->getDataLayout(); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (!ClInstrumentReads) return nullptr; - *IsWrite = false; - *TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); - *Alignment = LI->getAlignment(); - PtrOperand = LI->getPointerOperand(); + if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand())) + return; + Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, + LI->getType(), LI->getAlign()); } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!ClInstrumentWrites) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); - *Alignment = SI->getAlignment(); - PtrOperand = SI->getPointerOperand(); + if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand())) + return; + Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, + SI->getValueOperand()->getType(), SI->getAlign()); } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { - if (!ClInstrumentAtomics) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); - *Alignment = 0; - PtrOperand = RMW->getPointerOperand(); + if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand())) + return; + Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, + RMW->getValOperand()->getType(), None); } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { - if (!ClInstrumentAtomics) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); - *Alignment = 0; - PtrOperand = XCHG->getPointerOperand(); - } - - if (PtrOperand) { - // Do not instrument accesses from different address spaces; we cannot deal - // with them. - Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType()); - if (PtrTy->getPointerAddressSpace() != 0) - return nullptr; - - // Ignore swifterror addresses. - // swifterror memory addresses are mem2reg promoted by instruction - // selection. As such they cannot have regular uses like an instrumentation - // function and it makes no sense to track them as memory. - if (PtrOperand->isSwiftError()) - return nullptr; + if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand())) + return; + Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, + XCHG->getCompareOperand()->getType(), None); + } else if (auto CI = dyn_cast<CallInst>(I)) { + for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) { + if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || + ignoreAccess(CI->getArgOperand(ArgNo))) + continue; + Type *Ty = CI->getParamByValType(ArgNo); + Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); + } } - - return PtrOperand; } static unsigned getPointerOperandIndex(Instruction *I) { @@ -713,45 +721,32 @@ void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { MI->eraseFromParent(); } -bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) { - LLVM_DEBUG(dbgs() << "Instrumenting: " << *I << "\n"); - bool IsWrite = false; - unsigned Alignment = 0; - uint64_t TypeSize = 0; - Value *MaybeMask = nullptr; +bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) { + Value *Addr = O.getPtr(); - if (ClInstrumentMemIntrinsics && isa<MemIntrinsic>(I)) { - instrumentMemIntrinsic(cast<MemIntrinsic>(I)); - return true; - } - - Value *Addr = - isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask); - - if (!Addr) - return false; + LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n"); - if (MaybeMask) + if (O.MaybeMask) return false; //FIXME - IRBuilder<> IRB(I); - if (isPowerOf2_64(TypeSize) && - (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) && - (Alignment >= (1UL << Mapping.Scale) || Alignment == 0 || - Alignment >= TypeSize / 8)) { - size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); + IRBuilder<> IRB(O.getInsn()); + if (isPowerOf2_64(O.TypeSize) && + (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) && + (!O.Alignment || *O.Alignment >= (1ULL << Mapping.Scale) || + *O.Alignment >= O.TypeSize / 8)) { + size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize); if (ClInstrumentWithCalls) { - IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex], + IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex], IRB.CreatePointerCast(Addr, IntptrTy)); } else { - instrumentMemAccessInline(Addr, IsWrite, AccessSizeIndex, I); + instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn()); } } else { - IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite], + IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], {IRB.CreatePointerCast(Addr, IntptrTy), - ConstantInt::get(IntptrTy, TypeSize / 8)}); + ConstantInt::get(IntptrTy, O.TypeSize / 8)}); } - untagPointerOperand(I, Addr); + untagPointerOperand(O.getInsn(), Addr); return true; } @@ -789,7 +784,7 @@ bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, // llvm.memset right here into either a sequence of stores, or a call to // hwasan_tag_memory. if (ShadowSize) - IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align::None()); + IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1)); if (Size != AlignedSize) { IRB.CreateStore( ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()), @@ -1089,7 +1084,8 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n"); - SmallVector<Instruction*, 16> ToInstrument; + SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument; + SmallVector<MemIntrinsic *, 16> IntrinToInstrument; SmallVector<AllocaInst*, 8> AllocasToInstrument; SmallVector<Instruction*, 8> RetVec; SmallVector<Instruction*, 8> LandingPadVec; @@ -1115,31 +1111,31 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { if (InstrumentLandingPads && isa<LandingPadInst>(Inst)) LandingPadVec.push_back(&Inst); - Value *MaybeMask = nullptr; - bool IsWrite; - unsigned Alignment; - uint64_t TypeSize; - Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize, - &Alignment, &MaybeMask); - if (Addr || isa<MemIntrinsic>(Inst)) - ToInstrument.push_back(&Inst); + getInterestingMemoryOperands(&Inst, OperandsToInstrument); + + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) + IntrinToInstrument.push_back(MI); } } initializeCallbacks(*F.getParent()); + bool Changed = false; + if (!LandingPadVec.empty()) - instrumentLandingPads(LandingPadVec); + Changed |= instrumentLandingPads(LandingPadVec); if (AllocasToInstrument.empty() && F.hasPersonalityFn() && F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) { // __hwasan_personality_thunk is a no-op for functions without an // instrumented stack, so we can drop it. F.setPersonalityFn(nullptr); + Changed = true; } - if (AllocasToInstrument.empty() && ToInstrument.empty()) - return false; + if (AllocasToInstrument.empty() && OperandsToInstrument.empty() && + IntrinToInstrument.empty()) + return Changed; assert(!LocalDynamicShadow); @@ -1149,14 +1145,11 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { /*WithFrameRecord*/ ClRecordStackHistory && !AllocasToInstrument.empty()); - bool Changed = false; if (!AllocasToInstrument.empty()) { Value *StackTag = ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB); - Changed |= instrumentStack(AllocasToInstrument, AllocaDbgMap, RetVec, - StackTag); + instrumentStack(AllocasToInstrument, AllocaDbgMap, RetVec, StackTag); } - // Pad and align each of the allocas that we instrumented to stop small // uninteresting allocas from hiding in instrumented alloca's padding and so // that we have enough space to store real tags for short granules. @@ -1165,7 +1158,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { uint64_t Size = getAllocaSizeInBytes(*AI); uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); AI->setAlignment( - MaybeAlign(std::max(AI->getAlignment(), Mapping.getObjectAlignment()))); + Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment()))); if (Size != AlignedSize) { Type *AllocatedType = AI->getAllocatedType(); if (AI->isArrayAllocation()) { @@ -1178,7 +1171,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { auto *NewAI = new AllocaInst( TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI); NewAI->takeName(AI); - NewAI->setAlignment(MaybeAlign(AI->getAlignment())); + NewAI->setAlignment(AI->getAlign()); NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca()); NewAI->setSwiftError(AI->isSwiftError()); NewAI->copyMetadata(*AI); @@ -1216,13 +1209,18 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { } } - for (auto Inst : ToInstrument) - Changed |= instrumentMemAccess(Inst); + for (auto &Operand : OperandsToInstrument) + instrumentMemAccess(Operand); + + if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) { + for (auto Inst : IntrinToInstrument) + instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); + } LocalDynamicShadow = nullptr; StackBaseTag = nullptr; - return Changed; + return true; } void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) { @@ -1325,8 +1323,9 @@ void HWAddressSanitizer::instrumentGlobals() { // cases where two libraries mutually depend on each other. // // We only need one note per binary, so put everything for the note in a - // comdat. - Comdat *NoteComdat = M.getOrInsertComdat(kHwasanNoteName); + // comdat. This need to be a comdat with an .init_array section to prevent + // newer versions of lld from discarding the note. + Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName); Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0); auto Start = diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index d5787c8f62a11..bcd4e2e8e33cc 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -23,7 +23,6 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -194,12 +193,12 @@ private: // TotalCount is the total profiled count of call executions, and // NumCandidates is the number of candidate entries in ValueDataRef. std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( - Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef, + const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, uint64_t TotalCount, uint32_t NumCandidates); // Promote a list of targets for one indirect-call callsite. Return // the number of promotions. - uint32_t tryToPromote(Instruction *Inst, + uint32_t tryToPromote(CallBase &CB, const std::vector<PromotionCandidate> &Candidates, uint64_t &TotalCount); @@ -219,11 +218,11 @@ public: // the count. Stop at the first target that is not promoted. std::vector<ICallPromotionFunc::PromotionCandidate> ICallPromotionFunc::getPromotionCandidatesForCallSite( - Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef, + const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, uint64_t TotalCount, uint32_t NumCandidates) { std::vector<PromotionCandidate> Ret; - LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst + LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB << " Num_targets: " << ValueDataRef.size() << " Num_candidates: " << NumCandidates << "\n"); NumOfPGOICallsites++; @@ -239,18 +238,18 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count << " Target_func: " << Target << "\n"); - if (ICPInvokeOnly && isa<CallInst>(Inst)) { + if (ICPInvokeOnly && isa<CallInst>(CB)) { LLVM_DEBUG(dbgs() << " Not promote: User options.\n"); ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) << " Not promote: User options"; }); break; } - if (ICPCallOnly && isa<InvokeInst>(Inst)) { + if (ICPCallOnly && isa<InvokeInst>(CB)) { LLVM_DEBUG(dbgs() << " Not promote: User option.\n"); ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) << " Not promote: User options"; }); break; @@ -258,7 +257,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB) << " Not promote: Cutoff reached"; }); break; @@ -268,7 +267,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( if (TargetFunction == nullptr) { LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n"); ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB) << "Cannot promote indirect call: target with md5sum " << ore::NV("target md5sum", Target) << " not found"; }); @@ -276,11 +275,11 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( } const char *Reason = nullptr; - if (!isLegalToPromote(CallSite(Inst), TargetFunction, &Reason)) { + if (!isLegalToPromote(CB, TargetFunction, &Reason)) { using namespace ore; ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB) << "Cannot promote indirect call to " << NV("TargetFunction", TargetFunction) << " with count of " << NV("Count", Count) << ": " << Reason; @@ -294,25 +293,24 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( return Ret; } -Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst, - Function *DirectCallee, - uint64_t Count, uint64_t TotalCount, - bool AttachProfToDirectCall, - OptimizationRemarkEmitter *ORE) { +CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, + uint64_t Count, uint64_t TotalCount, + bool AttachProfToDirectCall, + OptimizationRemarkEmitter *ORE) { uint64_t ElseCount = TotalCount - Count; uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); uint64_t Scale = calculateCountScale(MaxCount); - MDBuilder MDB(Inst->getContext()); + MDBuilder MDB(CB.getContext()); MDNode *BranchWeights = MDB.createBranchWeights( scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); - Instruction *NewInst = - promoteCallWithIfThenElse(CallSite(Inst), DirectCallee, BranchWeights); + CallBase &NewInst = + promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights); if (AttachProfToDirectCall) { - MDBuilder MDB(NewInst->getContext()); - NewInst->setMetadata( + MDBuilder MDB(NewInst.getContext()); + NewInst.setMetadata( LLVMContext::MD_prof, MDB.createBranchWeights({static_cast<uint32_t>(Count)})); } @@ -321,7 +319,7 @@ Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst, if (ORE) ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Promoted", Inst) + return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB) << "Promote indirect call to " << NV("DirectCallee", DirectCallee) << " with count " << NV("Count", Count) << " out of " << NV("TotalCount", TotalCount); @@ -331,14 +329,14 @@ Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst, // Promote indirect-call to conditional direct-call for one callsite. uint32_t ICallPromotionFunc::tryToPromote( - Instruction *Inst, const std::vector<PromotionCandidate> &Candidates, + CallBase &CB, const std::vector<PromotionCandidate> &Candidates, uint64_t &TotalCount) { uint32_t NumPromoted = 0; for (auto &C : Candidates) { uint64_t Count = C.Count; - pgo::promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, - SamplePGO, &ORE); + pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO, + &ORE); assert(TotalCount >= Count); TotalCount -= Count; NumOfPGOICallPromotion++; @@ -352,28 +350,28 @@ uint32_t ICallPromotionFunc::tryToPromote( bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) { bool Changed = false; ICallPromotionAnalysis ICallAnalysis; - for (auto &I : findIndirectCalls(F)) { + for (auto *CB : findIndirectCalls(F)) { uint32_t NumVals, NumCandidates; uint64_t TotalCount; auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( - I, NumVals, TotalCount, NumCandidates); + CB, NumVals, TotalCount, NumCandidates); if (!NumCandidates || (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount))) continue; auto PromotionCandidates = getPromotionCandidatesForCallSite( - I, ICallProfDataRef, TotalCount, NumCandidates); - uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount); + *CB, ICallProfDataRef, TotalCount, NumCandidates); + uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount); if (NumPromoted == 0) continue; Changed = true; // Adjust the MD.prof metadata. First delete the old one. - I->setMetadata(LLVMContext::MD_prof, nullptr); + CB->setMetadata(LLVMContext::MD_prof, nullptr); // If all promoted, we don't need the MD.prof metadata. if (TotalCount == 0 || NumPromoted == NumVals) continue; // Otherwise we need update with the un-promoted records back. - annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount, + annotateValueSite(*M, *CB, ICallProfDataRef.slice(NumPromoted), TotalCount, IPVK_IndirectCallTarget, NumCandidates); } return Changed; diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp index 518b8895e8363..853385fbf863b 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp @@ -11,7 +11,6 @@ #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 04c7e856b5d4a..7b03bbfcdfe4b 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -74,15 +74,16 @@ cl::opt<unsigned> MemOPSizeLarge( namespace { -cl::opt<bool> DoNameCompression("enable-name-compression", - cl::desc("Enable name string compression"), - cl::init(true)); - cl::opt<bool> DoHashBasedCounterSplit( "hash-based-counter-split", cl::desc("Rename counter variable of a comdat function based on cfg hash"), cl::init(true)); +cl::opt<bool> RuntimeCounterRelocation( + "runtime-counter-relocation", + cl::desc("Enable relocating counters at runtime."), + cl::init(false)); + cl::opt<bool> ValueProfileStaticAlloc( "vp-static-alloc", cl::desc("Do static counter allocation for value profiler"), @@ -109,6 +110,12 @@ cl::opt<bool> AtomicCounterUpdatePromoted( " for promoted counters only"), cl::init(false)); +cl::opt<bool> AtomicFirstCounter( + "atomic-first-counter", cl::ZeroOrMore, + cl::desc("Use atomic fetch add for first counter in a function (usually " + "the entry counter)"), + cl::init(false)); + // If the option is not specified, the default behavior about whether // counter promotion is done depends on how instrumentaiton lowering // pipeline is setup, i.e., the default value of true of this option @@ -151,7 +158,9 @@ public: InstrProfilingLegacyPass() : ModulePass(ID) {} InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false) - : ModulePass(ID), InstrProf(Options, IsCS) {} + : ModulePass(ID), InstrProf(Options, IsCS) { + initializeInstrProfilingLegacyPassPass(*PassRegistry::getPassRegistry()); + } StringRef getPassName() const override { return "Frontend instrumentation-based coverage lowering"; @@ -242,9 +251,14 @@ public: : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), LI(LI), BFI(BFI) { + // Skip collection of ExitBlocks and InsertPts for loops that will not be + // able to have counters promoted. SmallVector<BasicBlock *, 8> LoopExitBlocks; SmallPtrSet<BasicBlock *, 8> BlockSet; + L.getExitBlocks(LoopExitBlocks); + if (!isPromotionPossible(&L, LoopExitBlocks)) + return; for (BasicBlock *ExitBlock : LoopExitBlocks) { if (BlockSet.insert(ExitBlock).second) { @@ -313,21 +327,31 @@ private: return true; } - // Returns the max number of Counter Promotions for LP. - unsigned getMaxNumOfPromotionsInLoop(Loop *LP) { + // Check whether the loop satisfies the basic conditions needed to perform + // Counter Promotions. + bool isPromotionPossible(Loop *LP, + const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) { // We can't insert into a catchswitch. - SmallVector<BasicBlock *, 8> LoopExitBlocks; - LP->getExitBlocks(LoopExitBlocks); if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) { return isa<CatchSwitchInst>(Exit->getTerminator()); })) - return 0; + return false; if (!LP->hasDedicatedExits()) - return 0; + return false; BasicBlock *PH = LP->getLoopPreheader(); if (!PH) + return false; + + return true; + } + + // Returns the max number of Counter Promotions for LP. + unsigned getMaxNumOfPromotionsInLoop(Loop *LP) { + SmallVector<BasicBlock *, 8> LoopExitBlocks; + LP->getExitBlocks(LoopExitBlocks); + if (!isPromotionPossible(LP, LoopExitBlocks)) return 0; SmallVector<BasicBlock *, 8> ExitingBlocks; @@ -431,6 +455,13 @@ bool InstrProfiling::lowerIntrinsics(Function *F) { return true; } +bool InstrProfiling::isRuntimeCounterRelocationEnabled() const { + if (RuntimeCounterRelocation.getNumOccurrences() > 0) + return RuntimeCounterRelocation; + + return TT.isOSFuchsia(); +} + bool InstrProfiling::isCounterPromotionEnabled() const { if (DoCounterPromotion.getNumOccurrences() > 0) return DoCounterPromotion; @@ -611,11 +642,19 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { llvm::InstrProfValueKind::IPVK_MemOPSize); CallInst *Call = nullptr; auto *TLI = &GetTLI(*Ind->getFunction()); + + // To support value profiling calls within Windows exception handlers, funclet + // information contained within operand bundles needs to be copied over to + // the library call. This is required for the IR to be processed by the + // WinEHPrepare pass. + SmallVector<OperandBundleDef, 1> OpBundles; + Ind->getOperandBundlesAsDefs(OpBundles); if (!IsRange) { Value *Args[3] = {Ind->getTargetValue(), Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), Builder.getInt32(Index)}; - Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args); + Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args, + OpBundles); } else { Value *Args[6] = { Ind->getTargetValue(), @@ -624,8 +663,8 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Builder.getInt64(MemOPSizeRangeStart), Builder.getInt64(MemOPSizeRangeLast), Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)}; - Call = - Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args); + Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), + Args, OpBundles); } if (auto AK = TLI->getExtAttrForI32Param(false)) Call->addParamAttr(2, AK); @@ -641,7 +680,30 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(), Counters, 0, Index); - if (Options.Atomic || AtomicCounterUpdateAll) { + if (isRuntimeCounterRelocationEnabled()) { + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext()); + Function *Fn = Inc->getParent()->getParent(); + Instruction &I = Fn->getEntryBlock().front(); + LoadInst *LI = dyn_cast<LoadInst>(&I); + if (!LI) { + IRBuilder<> Builder(&I); + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName()); + if (!Bias) { + Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, + Constant::getNullValue(Int64Ty), + getInstrProfCounterBiasVarName()); + Bias->setVisibility(GlobalVariable::HiddenVisibility); + } + LI = Builder.CreateLoad(Int64Ty, Bias); + } + auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI); + Addr = Builder.CreateIntToPtr(Add, Int64PtrTy); + } + + if (Options.Atomic || AtomicCounterUpdateAll || + (Index == 0 && AtomicFirstCounter)) { Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(), AtomicOrdering::Monotonic); } else { @@ -916,7 +978,7 @@ void InstrProfiling::emitNameData() { std::string CompressedNameStr; if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr, - DoNameCompression)) { + DoInstrProfNameCompression)) { report_fatal_error(toString(std::move(E)), false); } @@ -932,7 +994,7 @@ void InstrProfiling::emitNameData() { // On COFF, it's important to reduce the alignment down to 1 to prevent the // linker from inserting padding before the start of the names section or // between names entries. - NamesVar->setAlignment(Align::None()); + NamesVar->setAlignment(Align(1)); UsedVars.push_back(NamesVar); for (auto *NamePtr : ReferencedNames) @@ -979,9 +1041,9 @@ void InstrProfiling::emitRegistration() { } bool InstrProfiling::emitRuntimeHook() { - // We expect the linker to be invoked with -u<hook_var> flag for linux, - // for which case there is no need to emit the user function. - if (TT.isOSLinux()) + // We expect the linker to be invoked with -u<hook_var> flag for Linux or + // Fuchsia, in which case there is no need to emit the user function. + if (TT.isOSLinux() || TT.isOSFuchsia()) return false; // If the module's provided its own runtime, we don't need to do anything. diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index a6c2c9b464b63..ad238f1357c69 100644 --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -68,8 +68,8 @@ GlobalVariable *llvm::createPrivateGlobalForString(Module &M, StringRef Str, GlobalValue::PrivateLinkage, StrConst, NamePrefix); if (AllowMerging) GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - GV->setAlignment(Align::None()); // Strings may not be merged w/o setting - // alignment explicitly. + GV->setAlignment(Align(1)); // Strings may not be merged w/o setting + // alignment explicitly. return GV; } @@ -78,7 +78,7 @@ Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T, if (auto Comdat = F.getComdat()) return Comdat; assert(F.hasName()); Module *M = F.getParent(); - std::string Name = F.getName(); + std::string Name = std::string(F.getName()); // Make a unique comdat name for internal linkage things on ELF. On COFF, the // name of the comdat group identifies the leader symbol of the comdat group. @@ -112,6 +112,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializePGOInstrumentationUseLegacyPassPass(Registry); initializePGOIndirectCallPromotionLegacyPassPass(Registry); initializePGOMemOPSizeOptLegacyPassPass(Registry); + initializeCGProfileLegacyPassPass(Registry); initializeInstrOrderFileLegacyPassPass(Registry); initializeInstrProfilingLegacyPassPass(Registry); initializeMemorySanitizerLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 80acab3075782..fcf7f470b3e10 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -62,7 +62,7 @@ /// /// Origins are meaningless for fully initialized values, so MemorySanitizer /// avoids storing origin to memory when a fully initialized value is stored. -/// This way it avoids needless overwritting origin of the 4-byte region on +/// This way it avoids needless overwriting origin of the 4-byte region on /// a short (i.e. 1 byte) clean store, and it is also good for performance. /// /// Atomic handling. @@ -137,6 +137,9 @@ /// /// KernelMemorySanitizer only supports X86_64 at the moment. /// +// +// FIXME: This sanitizer does not yet handle scalable vectors +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" @@ -153,7 +156,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -282,6 +284,11 @@ static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address", cl::desc("report accesses through a pointer which has poisoned shadow"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClEagerChecks( + "msan-eager-checks", + cl::desc("check arguments and return values at function call boundaries"), + cl::Hidden, cl::init(false)); + static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions", cl::desc("print out instructions with default strict semantics"), cl::Hidden, cl::init(false)); @@ -392,6 +399,14 @@ static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = { 0x1C0000000000, // OriginBase }; +// s390x Linux +static const MemoryMapParams Linux_S390X_MemoryMapParams = { + 0xC00000000000, // AndMask + 0, // XorMask (not used) + 0x080000000000, // ShadowBase + 0x1C0000000000, // OriginBase +}; + // aarch64 Linux static const MemoryMapParams Linux_AArch64_MemoryMapParams = { 0, // AndMask (not used) @@ -439,6 +454,11 @@ static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = { &Linux_PowerPC64_MemoryMapParams, }; +static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = { + nullptr, + &Linux_S390X_MemoryMapParams, +}; + static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = { nullptr, &Linux_AArch64_MemoryMapParams, @@ -484,6 +504,7 @@ private: friend struct VarArgMIPS64Helper; friend struct VarArgAArch64Helper; friend struct VarArgPowerPC64Helper; + friend struct VarArgSystemZHelper; void initializeModule(Module &M); void initializeCallbacks(Module &M); @@ -530,10 +551,6 @@ private: /// (x86_64-specific). Value *VAArgOverflowSizeTLS; - /// Thread-local space used to pass origin value to the UMR reporting - /// function. - Value *OriginTLS; - /// Are the instrumentation callbacks set up? bool CallbacksInitialized = false; @@ -586,9 +603,6 @@ private: /// Branch weights for origin store. MDNode *OriginStoreWeights; - - /// An empty volatile inline asm that prevents callback merge. - InlineAsm *EmptyAsm; }; void insertModuleCtor(Module &M) { @@ -611,13 +625,15 @@ void insertModuleCtor(Module &M) { /// A legacy function pass for msan instrumentation. /// -/// Instruments functions to detect unitialized reads. +/// Instruments functions to detect uninitialized reads. struct MemorySanitizerLegacyPass : public FunctionPass { // Pass identification, replacement for typeid. static char ID; MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {}) - : FunctionPass(ID), Options(Options) {} + : FunctionPass(ID), Options(Options) { + initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); + } StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -700,10 +716,7 @@ void MemorySanitizer::createKernelApi(Module &M) { VAArgTLS = nullptr; VAArgOriginTLS = nullptr; VAArgOverflowSizeTLS = nullptr; - // OriginTLS is unused in the kernel. - OriginTLS = nullptr; - // __msan_warning() in the kernel takes an origin. WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(), IRB.getInt32Ty()); // Requests the per-task context state (kmsan_context_state*) from the @@ -758,12 +771,14 @@ static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) { /// Insert declarations for userspace-specific functions and globals. void MemorySanitizer::createUserspaceApi(Module &M) { IRBuilder<> IRB(*C); + // Create the callback. // FIXME: this function should have "Cold" calling conv, // which is not yet implemented. - StringRef WarningFnName = Recover ? "__msan_warning" - : "__msan_warning_noreturn"; - WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy()); + StringRef WarningFnName = Recover ? "__msan_warning_with_origin" + : "__msan_warning_with_origin_noreturn"; + WarningFn = + M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty()); // Create the global TLS variables. RetvalTLS = @@ -790,20 +805,30 @@ void MemorySanitizer::createUserspaceApi(Module &M) { VAArgOverflowSizeTLS = getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty()); - OriginTLS = getOrInsertGlobal(M, "__msan_origin_tls", IRB.getInt32Ty()); for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { unsigned AccessSize = 1 << AccessSizeIndex; std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize); + SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs; + MaybeWarningFnAttrs.push_back(std::make_pair( + AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt))); + MaybeWarningFnAttrs.push_back(std::make_pair( + AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt))); MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction( - FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), - IRB.getInt32Ty()); + FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs), + IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty()); FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); + SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs; + MaybeStoreOriginFnAttrs.push_back(std::make_pair( + AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt))); + MaybeStoreOriginFnAttrs.push_back(std::make_pair( + AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt))); MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( - FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), - IRB.getInt8PtrTy(), IRB.getInt32Ty()); + FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs), + IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(), + IRB.getInt32Ty()); } MsanSetAllocaOrigin4Fn = M.getOrInsertFunction( @@ -834,10 +859,6 @@ void MemorySanitizer::initializeCallbacks(Module &M) { MemsetFn = M.getOrInsertFunction( "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy); - // We insert an empty inline asm after __msan_report* to avoid callback merge. - EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), - StringRef(""), StringRef(""), - /*hasSideEffects=*/true); MsanInstrumentAsmStoreFn = M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(), @@ -924,6 +945,9 @@ void MemorySanitizer::initializeModule(Module &M) { case Triple::ppc64le: MapParams = Linux_PowerPC_MemoryMapParams.bits64; break; + case Triple::systemz: + MapParams = Linux_S390_MemoryMapParams.bits64; + break; case Triple::aarch64: case Triple::aarch64_be: MapParams = Linux_ARM_MemoryMapParams.bits64; @@ -982,8 +1006,8 @@ namespace { struct VarArgHelper { virtual ~VarArgHelper() = default; - /// Visit a CallSite. - virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0; + /// Visit a CallBase. + virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0; /// Visit a va_start call. virtual void visitVAStartInst(VAStartInst &I) = 0; @@ -1028,12 +1052,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { BasicBlock *ActualFnStart; // The following flags disable parts of MSan instrumentation based on - // blacklist contents and command-line options. + // exclusion list contents and command-line options. bool InsertChecks; bool PropagateShadow; bool PoisonStack; bool PoisonUndef; - bool CheckReturnValue; struct ShadowOriginAndInsertPoint { Value *Shadow; @@ -1057,9 +1080,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { PropagateShadow = SanitizeFunction; PoisonStack = SanitizeFunction && ClPoisonStack; PoisonUndef = SanitizeFunction && ClPoisonUndef; - // FIXME: Consider using SpecialCaseList to specify a list of functions that - // must always return fully initialized values. For now, we hardcode "main". - CheckReturnValue = SanitizeFunction && (F.getName() == "main"); MS.initializeCallbacks(*F.getParent()); if (MS.CompileKernel) @@ -1090,7 +1110,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr, unsigned Size, Align Alignment) { const DataLayout &DL = F.getParent()->getDataLayout(); - const Align IntptrAlignment = Align(DL.getABITypeAlignment(MS.IntptrTy)); + const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy); unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); assert(IntptrAlignment >= kMinOriginAlignment); assert(IntptrSize >= kOriginSize); @@ -1104,7 +1124,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { for (unsigned i = 0; i < Size / IntptrSize; ++i) { Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i) : IntptrOriginPtr; - IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment.value()); + IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment); Ofs += IntptrSize / kOriginSize; CurrentAlignment = IntptrAlignment; } @@ -1113,7 +1133,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) { Value *GEP = i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr; - IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment.value()); + IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment); CurrentAlignment = kMinOriginAlignment; } } @@ -1170,8 +1190,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true); - StoreInst *NewSI = - IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment.value()); + StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment); LLVM_DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); (void)NewSI; @@ -1188,15 +1207,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { void insertWarningFn(IRBuilder<> &IRB, Value *Origin) { if (!Origin) Origin = (Value *)IRB.getInt32(0); - if (MS.CompileKernel) { - IRB.CreateCall(MS.WarningFn, Origin); - } else { - if (MS.TrackOrigins) { - IRB.CreateStore(Origin, MS.OriginTLS); - } - IRB.CreateCall(MS.WarningFn, {}); - } - IRB.CreateCall(MS.EmptyAsm, {}); + assert(Origin->getType()->isIntegerTy()); + IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge(); // FIXME: Insert UnreachableInst if !MS.Recover? // This may invalidate some of the following checks and needs to be done // at the very end. @@ -1346,8 +1358,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { const DataLayout &DL = F.getParent()->getDataLayout(); if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) { uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType()); - return VectorType::get(IntegerType::get(*MS.C, EltSize), - VT->getNumElements()); + return FixedVectorType::get(IntegerType::get(*MS.C, EltSize), + cast<FixedVectorType>(VT)->getNumElements()); } if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) { return ArrayType::get(getShadowTy(AT->getElementType()), @@ -1368,7 +1380,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// Flatten a vector type. Type *getShadowTyNoVec(Type *ty) { if (VectorType *vt = dyn_cast<VectorType>(ty)) - return IntegerType::get(*MS.C, vt->getBitWidth()); + return IntegerType::get(*MS.C, + vt->getPrimitiveSizeInBits().getFixedSize()); return ty; } @@ -1606,20 +1619,28 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { LLVM_DEBUG(dbgs() << "Arg is not sized\n"); continue; } + + bool FArgByVal = FArg.hasByValAttr(); + bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef); + bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef; unsigned Size = FArg.hasByValAttr() - ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType()) + ? DL.getTypeAllocSize(FArg.getParamByValType()) : DL.getTypeAllocSize(FArg.getType()); + if (A == &FArg) { bool Overflow = ArgOffset + Size > kParamTLSSize; - Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); - if (FArg.hasByValAttr()) { + if (FArgEagerCheck) { + *ShadowPtr = getCleanShadow(V); + setOrigin(A, getCleanOrigin()); + continue; + } else if (FArgByVal) { + Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); // ByVal pointer itself has clean shadow. We copy the actual // argument shadow to the underlying memory. // Figure out maximal valid memcpy alignment. const Align ArgAlign = DL.getValueOrABITypeAlignment( - MaybeAlign(FArg.getParamAlignment()), - A->getType()->getPointerElementType()); + MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType()); Value *CpShadowPtr = getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign, /*isStore*/ true) @@ -1639,12 +1660,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } *ShadowPtr = getCleanShadow(V); } else { + // Shadow over TLS + Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); if (Overflow) { // ParamTLS overflow. *ShadowPtr = getCleanShadow(V); } else { - *ShadowPtr = EntryIRB.CreateAlignedLoad( - getShadowTy(&FArg), Base, kShadowTLSAlignment.value()); + *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base, + kShadowTLSAlignment); } } LLVM_DEBUG(dbgs() @@ -1657,7 +1680,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOrigin(A, getCleanOrigin()); } } - ArgOffset += alignTo(Size, kShadowTLSAlignment); + + if (!FArgEagerCheck) + ArgOffset += alignTo(Size, kShadowTLSAlignment); } assert(*ShadowPtr && "Could not find shadow for an argument"); return *ShadowPtr; @@ -1783,8 +1808,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (PropagateShadow) { std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false); - setShadow(&I, IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, - Alignment.value(), "_msld")); + setShadow(&I, + IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld")); } else { setShadow(&I, getCleanShadow(&I)); } @@ -1798,8 +1823,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (MS.TrackOrigins) { if (PropagateShadow) { const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment); - setOrigin(&I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, - OriginAlignment.value())); + setOrigin( + &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment)); } else { setOrigin(&I, getCleanOrigin()); } @@ -1821,7 +1846,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRBuilder<> IRB(&I); Value *Addr = I.getOperand(0); - Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(), Align::None(), + Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(), Align(1), /*isStore*/ true) .first; @@ -1868,10 +1893,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } void visitShuffleVectorInst(ShuffleVectorInst &I) { - insertShadowCheck(I.getOperand(2), &I); IRBuilder<> IRB(&I); setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1), - I.getOperand(2), "_msprop")); + I.getShuffleMask(), "_msprop")); setOriginForNaryOp(I); } @@ -2070,9 +2094,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) { assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) && "Vector of pointers is not a valid shadow type"); - return Ty->isVectorTy() ? - Ty->getVectorNumElements() * Ty->getScalarSizeInBits() : - Ty->getPrimitiveSizeInBits(); + return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() * + Ty->getScalarSizeInBits() + : Ty->getPrimitiveSizeInBits(); } /// Cast between two shadow types, extending or truncating as @@ -2088,7 +2112,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (dstTy->isIntegerTy() && srcTy->isIntegerTy()) return IRB.CreateIntCast(V, dstTy, Signed); if (dstTy->isVectorTy() && srcTy->isVectorTy() && - dstTy->getVectorNumElements() == srcTy->getVectorNumElements()) + cast<FixedVectorType>(dstTy)->getNumElements() == + cast<FixedVectorType>(srcTy)->getNumElements()) return IRB.CreateIntCast(V, dstTy, Signed); Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits)); Value *V2 = @@ -2132,9 +2157,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *OtherArg) { Constant *ShadowMul; Type *Ty = ConstArg->getType(); - if (Ty->isVectorTy()) { - unsigned NumElements = Ty->getVectorNumElements(); - Type *EltTy = Ty->getSequentialElementType(); + if (auto *VTy = dyn_cast<VectorType>(Ty)) { + unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements(); + Type *EltTy = VTy->getElementType(); SmallVector<Constant *, 16> Elements; for (unsigned Idx = 0; Idx < NumElements; ++Idx) { if (ConstantInt *Elt = @@ -2454,8 +2479,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // We don't know the pointer alignment (could be unaligned SSE store!). // Have to assume to worst case. std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr( - Addr, IRB, Shadow->getType(), Align::None(), /*isStore*/ true); - IRB.CreateAlignedStore(Shadow, ShadowPtr, 1); + Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true); + IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1)); if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); @@ -2478,11 +2503,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (PropagateShadow) { // We don't know the pointer alignment (could be unaligned SSE load!). // Have to assume to worst case. - const Align Alignment = Align::None(); + const Align Alignment = Align(1); std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false); - setShadow(&I, IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, - Alignment.value(), "_msld")); + setShadow(&I, + IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld")); } else { setShadow(&I, getCleanShadow(&I)); } @@ -2534,7 +2559,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// The main purpose of this code is to do something reasonable with all /// random intrinsics we might encounter, most importantly - SIMD intrinsics. /// We recognize several classes of intrinsics by their argument types and - /// ModRefBehaviour and apply special intrumentation when we are reasonably + /// ModRefBehaviour and apply special instrumentation when we are reasonably /// sure that we know what the intrinsic does. /// /// We special-case intrinsics where this approach fails. See llvm.bswap @@ -2595,7 +2620,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOrigin(&I, getOrigin(Op)); } - // Instrument vector convert instrinsic. + // Instrument vector convert intrinsic. // // This function instruments intrinsics like cvtsi2ss: // %Out = int_xxx_cvtyyy(%ConvertOp) @@ -2659,7 +2684,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { assert(CopyOp->getType() == I.getType()); assert(CopyOp->getType()->isVectorTy()); Value *ResultShadow = getShadow(CopyOp); - Type *EltTy = ResultShadow->getType()->getVectorElementType(); + Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType(); for (int i = 0; i < NumUsedElements; ++i) { ResultShadow = IRB.CreateInsertElement( ResultShadow, ConstantInt::getNullValue(EltTy), @@ -2698,7 +2723,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return IRB.CreateSExt(S2, T); } - // Instrument vector shift instrinsic. + // Instrument vector shift intrinsic. // // This function instruments intrinsics like int_x86_avx2_psll_w. // Intrinsic shifts %In by %ShiftSize bits. @@ -2716,7 +2741,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { : Lower64ShadowExtend(IRB, S2, getShadowTy(&I)); Value *V1 = I.getOperand(0); Value *V2 = I.getOperand(1); - Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledValue(), + Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(), {IRB.CreateBitCast(S1, V1->getType()), V2}); Shift = IRB.CreateBitCast(Shift, getShadowTy(&I)); setShadow(&I, IRB.CreateOr(Shift, S2Conv)); @@ -2728,8 +2753,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { const unsigned X86_MMXSizeInBits = 64; assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 && "Illegal MMX vector element size"); - return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits), - X86_MMXSizeInBits / EltSizeInBits); + return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits), + X86_MMXSizeInBits / EltSizeInBits); } // Returns a signed counterpart for an (un)signed-saturate-and-pack @@ -2763,7 +2788,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } } - // Instrument vector pack instrinsic. + // Instrument vector pack intrinsic. // // This function instruments intrinsics like x86_mmx_packsswb, that // packs elements of 2 input vectors into half as many bits with saturation. @@ -2806,7 +2831,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } - // Instrument sum-of-absolute-differencies intrinsic. + // Instrument sum-of-absolute-differences intrinsic. void handleVectorSadIntrinsic(IntrinsicInst &I) { const unsigned SignificantBitsPerResultElement = 16; bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); @@ -2864,13 +2889,56 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } + // Instrument generic vector reduction intrinsics + // by ORing together all their fields. + void handleVectorReduceIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *S = IRB.CreateOrReduce(getShadow(&I, 0)); + setShadow(&I, S); + setOrigin(&I, getOrigin(&I, 0)); + } + + // Instrument experimental.vector.reduce.or intrinsic. + // Valid (non-poisoned) set bits in the operand pull low the + // corresponding shadow bits. + void handleVectorReduceOrIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *OperandShadow = getShadow(&I, 0); + Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0)); + Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow); + // Bit N is clean if any field's bit N is 1 and unpoison + Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison); + // Otherwise, it is clean if every field's bit N is unpoison + Value *OrShadow = IRB.CreateOrReduce(OperandShadow); + Value *S = IRB.CreateAnd(OutShadowMask, OrShadow); + + setShadow(&I, S); + setOrigin(&I, getOrigin(&I, 0)); + } + + // Instrument experimental.vector.reduce.or intrinsic. + // Valid (non-poisoned) unset bits in the operand pull down the + // corresponding shadow bits. + void handleVectorReduceAndIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *OperandShadow = getShadow(&I, 0); + Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow); + // Bit N is clean if any field's bit N is 0 and unpoison + Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison); + // Otherwise, it is clean if every field's bit N is unpoison + Value *OrShadow = IRB.CreateOrReduce(OperandShadow); + Value *S = IRB.CreateAnd(OutShadowMask, OrShadow); + + setShadow(&I, S); + setOrigin(&I, getOrigin(&I, 0)); + } + void handleStmxcsr(IntrinsicInst &I) { IRBuilder<> IRB(&I); Value* Addr = I.getArgOperand(0); Type *Ty = IRB.getInt32Ty(); Value *ShadowPtr = - getShadowOriginPtr(Addr, IRB, Ty, Align::None(), /*isStore*/ true) - .first; + getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first; IRB.CreateStore(getCleanShadow(Ty), IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo())); @@ -2885,7 +2953,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRBuilder<> IRB(&I); Value *Addr = I.getArgOperand(0); Type *Ty = IRB.getInt32Ty(); - const Align Alignment = Align::None(); + const Align Alignment = Align(1); Value *ShadowPtr, *OriginPtr; std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false); @@ -2893,8 +2961,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); - Value *Shadow = - IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment.value(), "_ldmxcsr"); + Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr"); Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr) : getCleanOrigin(); insertShadowCheck(Shadow, Origin, &I); @@ -2904,7 +2971,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRBuilder<> IRB(&I); Value *V = I.getArgOperand(0); Value *Addr = I.getArgOperand(1); - const MaybeAlign Alignment( + const Align Alignment( cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()); Value *Mask = I.getArgOperand(3); Value *Shadow = getShadow(V); @@ -2921,21 +2988,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { insertShadowCheck(Mask, &I); } - IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment ? Alignment->value() : 0, - Mask); + IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask); if (MS.TrackOrigins) { auto &DL = F.getParent()->getDataLayout(); paintOrigin(IRB, getOrigin(V), OriginPtr, DL.getTypeStoreSize(Shadow->getType()), - llvm::max(Alignment, kMinOriginAlignment)); + std::max(Alignment, kMinOriginAlignment)); } } bool handleMaskedLoad(IntrinsicInst &I) { IRBuilder<> IRB(&I); Value *Addr = I.getArgOperand(0); - const MaybeAlign Alignment( + const Align Alignment( cast<ConstantInt>(I.getArgOperand(1))->getZExtValue()); Value *Mask = I.getArgOperand(2); Value *PassThru = I.getArgOperand(3); @@ -2945,9 +3011,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (PropagateShadow) { std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false); - setShadow(&I, IRB.CreateMaskedLoad( - ShadowPtr, Alignment ? Alignment->value() : 0, Mask, - getShadow(PassThru), "_msmaskedld")); + setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Alignment, Mask, + getShadow(PassThru), "_msmaskedld")); } else { setShadow(&I, getCleanShadow(&I)); } @@ -2965,8 +3030,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *Acc = IRB.CreateExtractElement( MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0)); - for (int i = 1, N = PassThru->getType()->getVectorNumElements(); i < N; - ++i) { + for (int i = 1, N = cast<FixedVectorType>(PassThru->getType()) + ->getNumElements(); + i < N; ++i) { Value *More = IRB.CreateExtractElement( MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i)); Acc = IRB.CreateOr(Acc, More); @@ -3005,6 +3071,68 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } + SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) { + SmallVector<int, 8> Mask; + for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) { + Mask.append(2, X); + } + return Mask; + } + + // Instrument pclmul intrinsics. + // These intrinsics operate either on odd or on even elements of the input + // vectors, depending on the constant in the 3rd argument, ignoring the rest. + // Replace the unused elements with copies of the used ones, ex: + // (0, 1, 2, 3) -> (0, 0, 2, 2) (even case) + // or + // (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case) + // and then apply the usual shadow combining logic. + void handlePclmulIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Type *ShadowTy = getShadowTy(&I); + unsigned Width = + cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements(); + assert(isa<ConstantInt>(I.getArgOperand(2)) && + "pclmul 3rd operand must be a constant"); + unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); + Value *Shuf0 = + IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy), + getPclmulMask(Width, Imm & 0x01)); + Value *Shuf1 = + IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy), + getPclmulMask(Width, Imm & 0x10)); + ShadowAndOriginCombiner SOC(this, IRB); + SOC.Add(Shuf0, getOrigin(&I, 0)); + SOC.Add(Shuf1, getOrigin(&I, 1)); + SOC.Done(&I); + } + + // Instrument _mm_*_sd intrinsics + void handleUnarySdIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *First = getShadow(&I, 0); + Value *Second = getShadow(&I, 1); + // High word of first operand, low word of second + Value *Shadow = + IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1})); + + setShadow(&I, Shadow); + setOriginForNaryOp(I); + } + + void handleBinarySdIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *First = getShadow(&I, 0); + Value *Second = getShadow(&I, 1); + Value *OrShadow = IRB.CreateOr(First, Second); + // High word of first operand, low word of both OR'd together + Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, + llvm::makeArrayRef<int>({2, 1})); + + setShadow(&I, Shadow); + setOriginForNaryOp(I); + } + void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case Intrinsic::lifetime_start: @@ -3023,6 +3151,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { case Intrinsic::masked_load: handleMaskedLoad(I); break; + case Intrinsic::experimental_vector_reduce_and: + handleVectorReduceAndIntrinsic(I); + break; + case Intrinsic::experimental_vector_reduce_or: + handleVectorReduceOrIntrinsic(I); + break; + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_mul: + handleVectorReduceIntrinsic(I); + break; case Intrinsic::x86_sse_stmxcsr: handleStmxcsr(I); break; @@ -3238,6 +3377,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { handleBmiIntrinsic(I); break; + case Intrinsic::x86_pclmulqdq: + case Intrinsic::x86_pclmulqdq_256: + case Intrinsic::x86_pclmulqdq_512: + handlePclmulIntrinsic(I); + break; + + case Intrinsic::x86_sse41_round_sd: + handleUnarySdIntrinsic(I); + break; + case Intrinsic::x86_sse2_max_sd: + case Intrinsic::x86_sse2_min_sd: + handleBinarySdIntrinsic(I); + break; + case Intrinsic::is_constant: // The result of llvm.is.constant() is always defined. setShadow(&I, getCleanShadow(&I)); @@ -3251,25 +3404,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } } - void visitCallSite(CallSite CS) { - Instruction &I = *CS.getInstruction(); - assert(!I.getMetadata("nosanitize")); - assert((CS.isCall() || CS.isInvoke() || CS.isCallBr()) && - "Unknown type of CallSite"); - if (CS.isCallBr() || (CS.isCall() && cast<CallInst>(&I)->isInlineAsm())) { + void visitCallBase(CallBase &CB) { + assert(!CB.getMetadata("nosanitize")); + if (CB.isInlineAsm()) { // For inline asm (either a call to asm function, or callbr instruction), // do the usual thing: check argument shadow and mark all outputs as // clean. Note that any side effects of the inline asm that are not // immediately visible in its constraints are not handled. if (ClHandleAsmConservative && MS.CompileKernel) - visitAsmInstruction(I); + visitAsmInstruction(CB); else - visitInstruction(I); + visitInstruction(CB); return; } - if (CS.isCall()) { - CallInst *Call = cast<CallInst>(&I); - assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere"); + if (auto *Call = dyn_cast<CallInst>(&CB)) { + assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere"); // We are going to insert code that relies on the fact that the callee // will become a non-readonly function after it is instrumented by us. To @@ -3288,16 +3437,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI); } - IRBuilder<> IRB(&I); + IRBuilder<> IRB(&CB); unsigned ArgOffset = 0; - LLVM_DEBUG(dbgs() << " CallSite: " << I << "\n"); - for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); - ArgIt != End; ++ArgIt) { + LLVM_DEBUG(dbgs() << " CallSite: " << CB << "\n"); + for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; + ++ArgIt) { Value *A = *ArgIt; - unsigned i = ArgIt - CS.arg_begin(); + unsigned i = ArgIt - CB.arg_begin(); if (!A->getType()->isSized()) { - LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n"); + LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n"); continue; } unsigned Size = 0; @@ -3311,12 +3460,23 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { << " Shadow: " << *ArgShadow << "\n"); bool ArgIsInitialized = false; const DataLayout &DL = F.getParent()->getDataLayout(); - if (CS.paramHasAttr(i, Attribute::ByVal)) { + + bool ByVal = CB.paramHasAttr(i, Attribute::ByVal); + bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef); + bool EagerCheck = ClEagerChecks && !ByVal && NoUndef; + + if (EagerCheck) { + insertShadowCheck(A, &CB); + continue; + } + if (ByVal) { + // ByVal requires some special handling as it's too big for a single + // load assert(A->getType()->isPointerTy() && "ByVal argument is not a pointer!"); - Size = DL.getTypeAllocSize(A->getType()->getPointerElementType()); + Size = DL.getTypeAllocSize(CB.getParamByValType(i)); if (ArgOffset + Size > kParamTLSSize) break; - const MaybeAlign ParamAlignment(CS.getParamAlignment(i)); + const MaybeAlign ParamAlignment(CB.getParamAlign(i)); MaybeAlign Alignment = llvm::None; if (ParamAlignment) Alignment = std::min(*ParamAlignment, kShadowTLSAlignment); @@ -3329,10 +3489,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Alignment, Size); // TODO(glider): need to copy origins. } else { + // Any other parameters mean we need bit-grained tracking of uninit data Size = DL.getTypeAllocSize(A->getType()); if (ArgOffset + Size > kParamTLSSize) break; Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase, - kShadowTLSAlignment.value()); + kShadowTLSAlignment); Constant *Cst = dyn_cast<Constant>(ArgShadow); if (Cst && Cst->isNullValue()) ArgIsInitialized = true; } @@ -3346,32 +3507,41 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } LLVM_DEBUG(dbgs() << " done with call args\n"); - FunctionType *FT = CS.getFunctionType(); + FunctionType *FT = CB.getFunctionType(); if (FT->isVarArg()) { - VAHelper->visitCallSite(CS, IRB); + VAHelper->visitCallBase(CB, IRB); } // Now, get the shadow for the RetVal. - if (!I.getType()->isSized()) return; + if (!CB.getType()->isSized()) + return; // Don't emit the epilogue for musttail call returns. - if (CS.isCall() && cast<CallInst>(&I)->isMustTailCall()) return; - IRBuilder<> IRBBefore(&I); + if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall()) + return; + + if (ClEagerChecks && CB.hasRetAttr(Attribute::NoUndef)) { + setShadow(&CB, getCleanShadow(&CB)); + setOrigin(&CB, getCleanOrigin()); + return; + } + + IRBuilder<> IRBBefore(&CB); // Until we have full dynamic coverage, make sure the retval shadow is 0. - Value *Base = getShadowPtrForRetval(&I, IRBBefore); - IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, - kShadowTLSAlignment.value()); + Value *Base = getShadowPtrForRetval(&CB, IRBBefore); + IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base, + kShadowTLSAlignment); BasicBlock::iterator NextInsn; - if (CS.isCall()) { - NextInsn = ++I.getIterator(); - assert(NextInsn != I.getParent()->end()); + if (isa<CallInst>(CB)) { + NextInsn = ++CB.getIterator(); + assert(NextInsn != CB.getParent()->end()); } else { - BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest(); + BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest(); if (!NormalDest->getSinglePredecessor()) { // FIXME: this case is tricky, so we are just conservative here. // Perhaps we need to split the edge between this BB and NormalDest, // but a naive attempt to use SplitEdge leads to a crash. - setShadow(&I, getCleanShadow(&I)); - setOrigin(&I, getCleanOrigin()); + setShadow(&CB, getCleanShadow(&CB)); + setOrigin(&CB, getCleanOrigin()); return; } // FIXME: NextInsn is likely in a basic block that has not been visited yet. @@ -3382,12 +3552,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } IRBuilder<> IRBAfter(&*NextInsn); Value *RetvalShadow = IRBAfter.CreateAlignedLoad( - getShadowTy(&I), getShadowPtrForRetval(&I, IRBAfter), - kShadowTLSAlignment.value(), "_msret"); - setShadow(&I, RetvalShadow); + getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter), + kShadowTLSAlignment, "_msret"); + setShadow(&CB, RetvalShadow); if (MS.TrackOrigins) - setOrigin(&I, IRBAfter.CreateLoad(MS.OriginTy, - getOriginPtrForRetval(IRBAfter))); + setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy, + getOriginPtrForRetval(IRBAfter))); } bool isAMustTailRetVal(Value *RetVal) { @@ -3407,14 +3577,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Don't emit the epilogue for musttail call returns. if (isAMustTailRetVal(RetVal)) return; Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB); - if (CheckReturnValue) { + bool HasNoUndef = + F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef); + bool StoreShadow = !(ClEagerChecks && HasNoUndef); + // FIXME: Consider using SpecialCaseList to specify a list of functions that + // must always return fully initialized values. For now, we hardcode "main". + bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main"); + + Value *Shadow = getShadow(RetVal); + bool StoreOrigin = true; + if (EagerCheck) { insertShadowCheck(RetVal, &I); - Value *Shadow = getCleanShadow(RetVal); - IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment.value()); - } else { - Value *Shadow = getShadow(RetVal); - IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment.value()); - if (MS.TrackOrigins) + Shadow = getCleanShadow(RetVal); + StoreOrigin = false; + } + + // The caller may still expect information passed over TLS if we pass our + // check + if (StoreShadow) { + IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment); + if (MS.TrackOrigins && StoreOrigin) IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB)); } } @@ -3455,7 +3637,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } else { Value *ShadowBase, *OriginBase; std::tie(ShadowBase, OriginBase) = getShadowOriginPtr( - &I, IRB, IRB.getInt8Ty(), Align::None(), /*isStore*/ true); + &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true); Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0); IRB.CreateMemSet(ShadowBase, PoisonValue, Len, @@ -3697,7 +3879,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { const DataLayout &DL = F.getParent()->getDataLayout(); CallBase *CB = cast<CallBase>(&I); IRBuilder<> IRB(&I); - InlineAsm *IA = cast<InlineAsm>(CB->getCalledValue()); + InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand()); int OutputArgs = getNumOutputArgs(IA, CB); // The last operand of a CallInst is the function itself. int NumOperands = CB->getNumOperands() - 1; @@ -3738,7 +3920,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// AMD64-specific implementation of VarArgHelper. struct VarArgAMD64Helper : public VarArgHelper { // An unfortunate workaround for asymmetric lowering of va_arg stuff. - // See a comment in visitCallSite for more details. + // See a comment in visitCallBase for more details. static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 static const unsigned AMD64FpEndOffsetSSE = 176; // If SSE is disabled, fp_offset in va_list is zero. @@ -3790,17 +3972,17 @@ struct VarArgAMD64Helper : public VarArgHelper { // would have been to associate each live instance of va_list with a copy of // MSanParamTLS, and extract shadow on va_arg() call in the argument list // order. - void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { + void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { unsigned GpOffset = 0; unsigned FpOffset = AMD64GpEndOffset; unsigned OverflowOffset = AMD64FpEndOffset; const DataLayout &DL = F.getParent()->getDataLayout(); - for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); - ArgIt != End; ++ArgIt) { + for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; + ++ArgIt) { Value *A = *ArgIt; - unsigned ArgNo = CS.getArgumentNo(ArgIt); - bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); - bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal); + unsigned ArgNo = CB.getArgOperandNo(ArgIt); + bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams(); + bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal); if (IsByVal) { // ByVal arguments always go to the overflow area. // Fixed arguments passed through the overflow area will be stepped @@ -3808,7 +3990,7 @@ struct VarArgAMD64Helper : public VarArgHelper { if (IsFixed) continue; assert(A->getType()->isPointerTy()); - Type *RealTy = A->getType()->getPointerElementType(); + Type *RealTy = CB.getParamByValType(ArgNo); uint64_t ArgSize = DL.getTypeAllocSize(RealTy); Value *ShadowBase = getShadowPtrForVAArgument( RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8)); @@ -3871,7 +4053,7 @@ struct VarArgAMD64Helper : public VarArgHelper { if (!ShadowBase) continue; Value *Shadow = MSV.getShadow(A); - IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment.value()); + IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment); if (MS.TrackOrigins) { Value *Origin = MSV.getOrigin(A); unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); @@ -4020,11 +4202,11 @@ struct VarArgMIPS64Helper : public VarArgHelper { VarArgMIPS64Helper(Function &F, MemorySanitizer &MS, MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} - void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { + void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { unsigned VAArgOffset = 0; const DataLayout &DL = F.getParent()->getDataLayout(); - for (CallSite::arg_iterator ArgIt = CS.arg_begin() + - CS.getFunctionType()->getNumParams(), End = CS.arg_end(); + for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(), + End = CB.arg_end(); ArgIt != End; ++ArgIt) { Triple TargetTriple(F.getParent()->getTargetTriple()); Value *A = *ArgIt; @@ -4041,8 +4223,7 @@ struct VarArgMIPS64Helper : public VarArgHelper { VAArgOffset = alignTo(VAArgOffset, 8); if (!Base) continue; - IRB.CreateAlignedStore(MSV.getShadow(A), Base, - kShadowTLSAlignment.value()); + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); } Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset); @@ -4170,17 +4351,17 @@ struct VarArgAArch64Helper : public VarArgHelper { // the remaining arguments. // Using constant offset within the va_arg TLS array allows fast copy // in the finalize instrumentation. - void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { + void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { unsigned GrOffset = AArch64GrBegOffset; unsigned VrOffset = AArch64VrBegOffset; unsigned OverflowOffset = AArch64VAEndOffset; const DataLayout &DL = F.getParent()->getDataLayout(); - for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); - ArgIt != End; ++ArgIt) { + for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; + ++ArgIt) { Value *A = *ArgIt; - unsigned ArgNo = CS.getArgumentNo(ArgIt); - bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); + unsigned ArgNo = CB.getArgOperandNo(ArgIt); + bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams(); ArgKind AK = classifyArgument(A); if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset) AK = AK_Memory; @@ -4213,8 +4394,7 @@ struct VarArgAArch64Helper : public VarArgHelper { continue; if (!Base) continue; - IRB.CreateAlignedStore(MSV.getShadow(A), Base, - kShadowTLSAlignment.value()); + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); } Constant *OverflowSize = ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset); @@ -4310,7 +4490,7 @@ struct VarArgAArch64Helper : public VarArgHelper { // for 128-bit FP/SIMD vn-v7). // We need then to propagate the shadow arguments on both regions // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'. - // The remaning arguments are saved on shadow for 'va::stack'. + // The remaining arguments are saved on shadow for 'va::stack'. // One caveat is it requires only to propagate the non-named arguments, // however on the call site instrumentation 'all' the arguments are // saved. So to copy the shadow values from the va_arg TLS array @@ -4400,7 +4580,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper { VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS, MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} - void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { + void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { // For PowerPC, we need to deal with alignment of stack arguments - // they are mostly aligned to 8 bytes, but vectors and i128 arrays // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes, @@ -4411,7 +4591,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper { Triple TargetTriple(F.getParent()->getTargetTriple()); // Parameter save area starts at 48 bytes from frame pointer for ABIv1, // and 32 bytes for ABIv2. This is usually determined by target - // endianness, but in theory could be overriden by function attribute. + // endianness, but in theory could be overridden by function attribute. // For simplicity, we ignore it here (it'd only matter for QPX vectors). if (TargetTriple.getArch() == Triple::ppc64) VAArgBase = 48; @@ -4419,19 +4599,19 @@ struct VarArgPowerPC64Helper : public VarArgHelper { VAArgBase = 32; unsigned VAArgOffset = VAArgBase; const DataLayout &DL = F.getParent()->getDataLayout(); - for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); - ArgIt != End; ++ArgIt) { + for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; + ++ArgIt) { Value *A = *ArgIt; - unsigned ArgNo = CS.getArgumentNo(ArgIt); - bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); - bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal); + unsigned ArgNo = CB.getArgOperandNo(ArgIt); + bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams(); + bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal); if (IsByVal) { assert(A->getType()->isPointerTy()); - Type *RealTy = A->getType()->getPointerElementType(); + Type *RealTy = CB.getParamByValType(ArgNo); uint64_t ArgSize = DL.getTypeAllocSize(RealTy); - uint64_t ArgAlign = CS.getParamAlignment(ArgNo); - if (ArgAlign < 8) - ArgAlign = 8; + MaybeAlign ArgAlign = CB.getParamAlign(ArgNo); + if (!ArgAlign || *ArgAlign < Align(8)) + ArgAlign = Align(8); VAArgOffset = alignTo(VAArgOffset, ArgAlign); if (!IsFixed) { Value *Base = getShadowPtrForVAArgument( @@ -4474,8 +4654,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper { Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset - VAArgBase, ArgSize); if (Base) - IRB.CreateAlignedStore(MSV.getShadow(A), Base, - kShadowTLSAlignment.value()); + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); } VAArgOffset += ArgSize; VAArgOffset = alignTo(VAArgOffset, 8); @@ -4566,12 +4745,324 @@ struct VarArgPowerPC64Helper : public VarArgHelper { } }; +/// SystemZ-specific implementation of VarArgHelper. +struct VarArgSystemZHelper : public VarArgHelper { + static const unsigned SystemZGpOffset = 16; + static const unsigned SystemZGpEndOffset = 56; + static const unsigned SystemZFpOffset = 128; + static const unsigned SystemZFpEndOffset = 160; + static const unsigned SystemZMaxVrArgs = 8; + static const unsigned SystemZRegSaveAreaSize = 160; + static const unsigned SystemZOverflowOffset = 160; + static const unsigned SystemZVAListTagSize = 32; + static const unsigned SystemZOverflowArgAreaPtrOffset = 16; + static const unsigned SystemZRegSaveAreaPtrOffset = 24; + + Function &F; + MemorySanitizer &MS; + MemorySanitizerVisitor &MSV; + Value *VAArgTLSCopy = nullptr; + Value *VAArgTLSOriginCopy = nullptr; + Value *VAArgOverflowSize = nullptr; + + SmallVector<CallInst *, 16> VAStartInstrumentationList; + + enum class ArgKind { + GeneralPurpose, + FloatingPoint, + Vector, + Memory, + Indirect, + }; + + enum class ShadowExtension { None, Zero, Sign }; + + VarArgSystemZHelper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) + : F(F), MS(MS), MSV(MSV) {} + + ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) { + // T is a SystemZABIInfo::classifyArgumentType() output, and there are + // only a few possibilities of what it can be. In particular, enums, single + // element structs and large types have already been taken care of. + + // Some i128 and fp128 arguments are converted to pointers only in the + // back end. + if (T->isIntegerTy(128) || T->isFP128Ty()) + return ArgKind::Indirect; + if (T->isFloatingPointTy()) + return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint; + if (T->isIntegerTy() || T->isPointerTy()) + return ArgKind::GeneralPurpose; + if (T->isVectorTy()) + return ArgKind::Vector; + return ArgKind::Memory; + } + + ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) { + // ABI says: "One of the simple integer types no more than 64 bits wide. + // ... If such an argument is shorter than 64 bits, replace it by a full + // 64-bit integer representing the same number, using sign or zero + // extension". Shadow for an integer argument has the same type as the + // argument itself, so it can be sign or zero extended as well. + bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt); + bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt); + if (ZExt) { + assert(!SExt); + return ShadowExtension::Zero; + } + if (SExt) { + assert(!ZExt); + return ShadowExtension::Sign; + } + return ShadowExtension::None; + } + + void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { + bool IsSoftFloatABI = CB.getCalledFunction() + ->getFnAttribute("use-soft-float") + .getValueAsString() == "true"; + unsigned GpOffset = SystemZGpOffset; + unsigned FpOffset = SystemZFpOffset; + unsigned VrIndex = 0; + unsigned OverflowOffset = SystemZOverflowOffset; + const DataLayout &DL = F.getParent()->getDataLayout(); + for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; + ++ArgIt) { + Value *A = *ArgIt; + unsigned ArgNo = CB.getArgOperandNo(ArgIt); + bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams(); + // SystemZABIInfo does not produce ByVal parameters. + assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal)); + Type *T = A->getType(); + ArgKind AK = classifyArgument(T, IsSoftFloatABI); + if (AK == ArgKind::Indirect) { + T = PointerType::get(T, 0); + AK = ArgKind::GeneralPurpose; + } + if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset) + AK = ArgKind::Memory; + if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset) + AK = ArgKind::Memory; + if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed)) + AK = ArgKind::Memory; + Value *ShadowBase = nullptr; + Value *OriginBase = nullptr; + ShadowExtension SE = ShadowExtension::None; + switch (AK) { + case ArgKind::GeneralPurpose: { + // Always keep track of GpOffset, but store shadow only for varargs. + uint64_t ArgSize = 8; + if (GpOffset + ArgSize <= kParamTLSSize) { + if (!IsFixed) { + SE = getShadowExtension(CB, ArgNo); + uint64_t GapSize = 0; + if (SE == ShadowExtension::None) { + uint64_t ArgAllocSize = DL.getTypeAllocSize(T); + assert(ArgAllocSize <= ArgSize); + GapSize = ArgSize - ArgAllocSize; + } + ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize); + if (MS.TrackOrigins) + OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize); + } + GpOffset += ArgSize; + } else { + GpOffset = kParamTLSSize; + } + break; + } + case ArgKind::FloatingPoint: { + // Always keep track of FpOffset, but store shadow only for varargs. + uint64_t ArgSize = 8; + if (FpOffset + ArgSize <= kParamTLSSize) { + if (!IsFixed) { + // PoP says: "A short floating-point datum requires only the + // left-most 32 bit positions of a floating-point register". + // Therefore, in contrast to AK_GeneralPurpose and AK_Memory, + // don't extend shadow and don't mind the gap. + ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset); + if (MS.TrackOrigins) + OriginBase = getOriginPtrForVAArgument(IRB, FpOffset); + } + FpOffset += ArgSize; + } else { + FpOffset = kParamTLSSize; + } + break; + } + case ArgKind::Vector: { + // Keep track of VrIndex. No need to store shadow, since vector varargs + // go through AK_Memory. + assert(IsFixed); + VrIndex++; + break; + } + case ArgKind::Memory: { + // Keep track of OverflowOffset and store shadow only for varargs. + // Ignore fixed args, since we need to copy only the vararg portion of + // the overflow area shadow. + if (!IsFixed) { + uint64_t ArgAllocSize = DL.getTypeAllocSize(T); + uint64_t ArgSize = alignTo(ArgAllocSize, 8); + if (OverflowOffset + ArgSize <= kParamTLSSize) { + SE = getShadowExtension(CB, ArgNo); + uint64_t GapSize = + SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0; + ShadowBase = + getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize); + if (MS.TrackOrigins) + OriginBase = + getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize); + OverflowOffset += ArgSize; + } else { + OverflowOffset = kParamTLSSize; + } + } + break; + } + case ArgKind::Indirect: + llvm_unreachable("Indirect must be converted to GeneralPurpose"); + } + if (ShadowBase == nullptr) + continue; + Value *Shadow = MSV.getShadow(A); + if (SE != ShadowExtension::None) + Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(), + /*Signed*/ SE == ShadowExtension::Sign); + ShadowBase = IRB.CreateIntToPtr( + ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s"); + IRB.CreateStore(Shadow, ShadowBase); + if (MS.TrackOrigins) { + Value *Origin = MSV.getOrigin(A); + unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); + MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize, + kMinOriginAlignment); + } + } + Constant *OverflowSize = ConstantInt::get( + IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset); + IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS); + } + + Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); + return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + } + + Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0), + "_msarg_va_o"); + } + + void unpoisonVAListTagForInst(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr, *OriginPtr; + const Align Alignment = Align(8); + std::tie(ShadowPtr, OriginPtr) = + MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment, + /*isStore*/ true); + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + SystemZVAListTagSize, Alignment, false); + } + + void visitVAStartInst(VAStartInst &I) override { + VAStartInstrumentationList.push_back(&I); + unpoisonVAListTagForInst(I); + } + + void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); } + + void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) { + Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); + Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr( + IRB.CreateAdd( + IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)), + PointerType::get(RegSaveAreaPtrTy, 0)); + Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); + Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; + const Align Alignment = Align(8); + std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = + MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment, + /*isStore*/ true); + // TODO(iii): copy only fragments filled by visitCallBase() + IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, + SystemZRegSaveAreaSize); + if (MS.TrackOrigins) + IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy, + Alignment, SystemZRegSaveAreaSize); + } + + void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) { + Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C); + Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr( + IRB.CreateAdd( + IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)), + PointerType::get(OverflowArgAreaPtrTy, 0)); + Value *OverflowArgAreaPtr = + IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr); + Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr; + const Align Alignment = Align(8); + std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) = + MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(), + Alignment, /*isStore*/ true); + Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy, + SystemZOverflowOffset); + IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment, + VAArgOverflowSize); + if (MS.TrackOrigins) { + SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy, + SystemZOverflowOffset); + IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment, + VAArgOverflowSize); + } + } + + void finalizeInstrumentation() override { + assert(!VAArgOverflowSize && !VAArgTLSCopy && + "finalizeInstrumentation called twice"); + if (!VAStartInstrumentationList.empty()) { + // If there is a va_start in this function, make a backup copy of + // va_arg_tls somewhere in the function entry block. + IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI()); + VAArgOverflowSize = + IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); + Value *CopySize = + IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset), + VAArgOverflowSize); + VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); + if (MS.TrackOrigins) { + VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS, + Align(8), CopySize); + } + } + + // Instrument va_start. + // Copy va_list shadow from the backup copy of the TLS contents. + for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size(); + VaStartNo < VaStartNum; VaStartNo++) { + CallInst *OrigInst = VAStartInstrumentationList[VaStartNo]; + IRBuilder<> IRB(OrigInst->getNextNode()); + Value *VAListTag = OrigInst->getArgOperand(0); + copyRegSaveArea(IRB, VAListTag); + copyOverflowArea(IRB, VAListTag); + } + } +}; + /// A no-op implementation of VarArgHelper. struct VarArgNoOpHelper : public VarArgHelper { VarArgNoOpHelper(Function &F, MemorySanitizer &MS, MemorySanitizerVisitor &MSV) {} - void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {} + void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {} void visitVAStartInst(VAStartInst &I) override {} @@ -4596,6 +5087,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, else if (TargetTriple.getArch() == Triple::ppc64 || TargetTriple.getArch() == Triple::ppc64le) return new VarArgPowerPC64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == Triple::systemz) + return new VarArgSystemZHelper(Func, Msan, Visitor); else return new VarArgNoOpHelper(Func, Msan, Visitor); } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index cc96bdd1d5163..dcfc28887a486 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -52,6 +52,7 @@ #include "ValueProfileCollector.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -63,13 +64,13 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -376,6 +377,7 @@ private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } }; @@ -404,6 +406,7 @@ private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } }; @@ -436,6 +439,7 @@ INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) @@ -467,7 +471,7 @@ INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, ModulePass * llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { - return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName); + return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); } namespace { @@ -565,11 +569,11 @@ public: } FuncPGOInstrumentation( - Function &Func, + Function &Func, TargetLibraryInfo &TLI, std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) - : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func), + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); @@ -799,18 +803,50 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { return canInstrument(InstrBB); } +// When generating value profiling calls on Windows routines that make use of +// handler funclets for exception processing an operand bundle needs to attached +// to the called function. This routine will set \p OpBundles to contain the +// funclet information, if any is needed, that should be placed on the generated +// value profiling call for the value profile candidate call. +static void +populateEHOperandBundle(VPCandidateInfo &Cand, + DenseMap<BasicBlock *, ColorVector> &BlockColors, + SmallVectorImpl<OperandBundleDef> &OpBundles) { + auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); + if (OrigCall && !isa<IntrinsicInst>(OrigCall)) { + // The instrumentation call should belong to the same funclet as a + // non-intrinsic call, so just copy the operand bundle, if any exists. + Optional<OperandBundleUse> ParentFunclet = + OrigCall->getOperandBundle(LLVMContext::OB_funclet); + if (ParentFunclet) + OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); + } else { + // Intrinsics or other instructions do not get funclet information from the + // front-end. Need to use the BlockColors that was computed by the routine + // colorEHFunclets to determine whether a funclet is needed. + if (!BlockColors.empty()) { + const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; + assert(CV.size() == 1 && "non-unique color for block!"); + Instruction *EHPad = CV.front()->getFirstNonPHI(); + if (EHPad->isEHPad()) + OpBundles.emplace_back("funclet", EHPad); + } + } +} + // Visit all edge and instrument the edges not in MST, and do value profiling. // Critical edges will be split. static void instrumentOneFunc( - Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, + Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI, std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI, - BFI, IsCS); + FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, TLI, ComdatMembers, true, + BPI, BFI, IsCS); std::vector<BasicBlock *> InstrumentBBs; FuncInfo.getInstrumentBBs(InstrumentBBs); unsigned NumCounters = @@ -839,6 +875,15 @@ static void instrumentOneFunc( NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); + // Intrinsic function calls do not have funclet operand bundles needed for + // Windows exception handling attached to them. However, if value profiling is + // inserted for one of these calls, then a funclet value will need to be set + // on the instrumentation call based on the funclet coloring. + DenseMap<BasicBlock *, ColorVector> BlockColors; + if (F.hasPersonalityFn() && + isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) + BlockColors = colorEHFunclets(F); + // For each VP Kind, walk the VP candidates and instrument each one. for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { unsigned SiteIndex = 0; @@ -860,11 +905,14 @@ static void instrumentOneFunc( ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); assert(ToProfile && "value profiling Value is of unexpected type"); + SmallVector<OperandBundleDef, 1> OpBundles; + populateEHOperandBundle(Cand, BlockColors, OpBundles); Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), Builder.getInt64(FuncInfo.FunctionHash), ToProfile, - Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}); + Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, + OpBundles); } } // IPVK_First <= Kind <= IPVK_Last } @@ -953,12 +1001,12 @@ namespace { class PGOUseFunc { public: - PGOUseFunc(Function &Func, Module *Modu, + PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, ProfileSummaryInfo *PSI, bool IsCS) : F(Func), M(Modu), BFI(BFIin), PSI(PSI), - FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), + FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS), FreqAttr(FFA_Normal), IsCS(IsCS) {} // Read counts for the instrumented BB from profile. @@ -1295,7 +1343,7 @@ void PGOUseFunc::setBranchWeights() { if (TI->getNumSuccessors() < 2) continue; if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || - isa<IndirectBrInst>(TI))) + isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) continue; if (getBBInfo(&BB).CountValue == 0) @@ -1460,7 +1508,8 @@ static void collectComdatMembers( } static bool InstrumentAllFunctions( - Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, + Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, + function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. @@ -1472,9 +1521,10 @@ static bool InstrumentAllFunctions( for (auto &F : M) { if (F.isDeclaration()) continue; + auto &TLI = LookupTLI(F); auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); + instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); } return true; } @@ -1490,27 +1540,32 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { if (skipModule(M)) return false; + auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; auto LookupBPI = [this](Function &F) { return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); }; auto LookupBFI = [this](Function &F) { return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); }; - return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); + return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); } PreservedAnalyses PGOInstrumentationGen::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; auto LookupBPI = [&FAM](Function &F) { return &FAM.getResult<BranchProbabilityAnalysis>(F); }; - auto LookupBFI = [&FAM](Function &F) { return &FAM.getResult<BlockFrequencyAnalysis>(F); }; - if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) + if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1518,6 +1573,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M, static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, + function_ref<TargetLibraryInfo &(Function &)> LookupTLI, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS) { @@ -1557,6 +1613,7 @@ static bool annotateAllFunctions( M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), IsCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr); + PSI->refresh(); std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; collectComdatMembers(M, ComdatMembers); @@ -1565,12 +1622,13 @@ static bool annotateAllFunctions( for (auto &F : M) { if (F.isDeclaration()) continue; + auto &TLI = LookupTLI(F); auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS); + PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1651,10 +1709,12 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; auto LookupBPI = [&FAM](Function &F) { return &FAM.getResult<BranchProbabilityAnalysis>(F); }; - auto LookupBFI = [&FAM](Function &F) { return &FAM.getResult<BlockFrequencyAnalysis>(F); }; @@ -1662,7 +1722,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI, PSI, IsCS)) + LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1672,6 +1732,9 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { if (skipModule(M)) return false; + auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; auto LookupBPI = [this](Function &F) { return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); }; @@ -1680,13 +1743,13 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { }; auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI, - IsCS); + return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, + LookupBFI, PSI, IsCS); } static std::string getSimpleNodeName(const BasicBlock *Node) { if (!Node->getName().empty()) - return Node->getName(); + return std::string(Node->getName()); std::string SimpleNodeName; raw_string_ostream OS(SimpleNodeName); @@ -1750,7 +1813,7 @@ void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { template <> struct GraphTraits<PGOUseFunc *> { using NodeRef = const BasicBlock *; - using ChildIteratorType = succ_const_iterator; + using ChildIteratorType = const_succ_iterator; using nodes_iterator = pointer_iterator<Function::const_iterator>; static NodeRef getEntryNode(const PGOUseFunc *G) { @@ -1777,7 +1840,7 @@ template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const PGOUseFunc *G) { - return G->getFunc().getName(); + return std::string(G->getFunc().getName()); } std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index d0afe2959b390..2b7b859891dcd 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -23,7 +23,6 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -38,7 +37,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" -#include "llvm/PassSupport.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -97,6 +95,11 @@ extern cl::opt<std::string> MemOPSizeRange; // This option sets the value that groups large memop sizes extern cl::opt<unsigned> MemOPSizeLarge; +cl::opt<bool> + MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), + cl::Hidden, + cl::desc("Size-specialize memcmp and bcmp calls")); + namespace { class PGOMemOPSizeOptLegacyPass : public FunctionPass { public: @@ -115,6 +118,7 @@ private: AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } }; } // end anonymous namespace @@ -124,6 +128,7 @@ INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) @@ -133,11 +138,90 @@ FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { } namespace { + +static const char *getMIName(const MemIntrinsic *MI) { + switch (MI->getIntrinsicID()) { + case Intrinsic::memcpy: + return "memcpy"; + case Intrinsic::memmove: + return "memmove"; + case Intrinsic::memset: + return "memset"; + default: + return "unknown"; + } +} + +// A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp). +struct MemOp { + Instruction *I; + MemOp(MemIntrinsic *MI) : I(MI) {} + MemOp(CallInst *CI) : I(CI) {} + MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); } + CallInst *asCI() { return cast<CallInst>(I); } + MemOp clone() { + if (auto MI = asMI()) + return MemOp(cast<MemIntrinsic>(MI->clone())); + return MemOp(cast<CallInst>(asCI()->clone())); + } + Value *getLength() { + if (auto MI = asMI()) + return MI->getLength(); + return asCI()->getArgOperand(2); + } + void setLength(Value *Length) { + if (auto MI = asMI()) + return MI->setLength(Length); + asCI()->setArgOperand(2, Length); + } + StringRef getFuncName() { + if (auto MI = asMI()) + return MI->getCalledFunction()->getName(); + return asCI()->getCalledFunction()->getName(); + } + bool isMemmove() { + if (auto MI = asMI()) + if (MI->getIntrinsicID() == Intrinsic::memmove) + return true; + return false; + } + bool isMemcmp(TargetLibraryInfo &TLI) { + LibFunc Func; + if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) && + Func == LibFunc_memcmp) { + return true; + } + return false; + } + bool isBcmp(TargetLibraryInfo &TLI) { + LibFunc Func; + if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) && + Func == LibFunc_bcmp) { + return true; + } + return false; + } + const char *getName(TargetLibraryInfo &TLI) { + if (auto MI = asMI()) + return getMIName(MI); + LibFunc Func; + if (TLI.getLibFunc(*asCI(), Func)) { + if (Func == LibFunc_memcmp) + return "memcmp"; + if (Func == LibFunc_bcmp) + return "bcmp"; + } + llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst"); + return nullptr; + } +}; + class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> { public: MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE, DominatorTree *DT) - : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) { + OptimizationRemarkEmitter &ORE, DominatorTree *DT, + TargetLibraryInfo &TLI) + : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) { ValueDataArray = std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2); // Get the MemOPSize range information from option MemOPSizeRange, @@ -149,13 +233,12 @@ public: WorkList.clear(); visit(Func); - for (auto &MI : WorkList) { + for (auto &MO : WorkList) { ++NumOfPGOMemOPAnnotate; - if (perform(MI)) { + if (perform(MO)) { Changed = true; ++NumOfPGOMemOPOpt; - LLVM_DEBUG(dbgs() << "MemOP call: " - << MI->getCalledFunction()->getName() + LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName() << "is Transformed.\n"); } } @@ -166,7 +249,16 @@ public: // Not perform on constant length calls. if (dyn_cast<ConstantInt>(Length)) return; - WorkList.push_back(&MI); + WorkList.push_back(MemOp(&MI)); + } + + void visitCallInst(CallInst &CI) { + LibFunc Func; + if (TLI.getLibFunc(CI, Func) && + (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && + !dyn_cast<ConstantInt>(CI.getArgOperand(2))) { + WorkList.push_back(MemOp(&CI)); + } } private: @@ -174,15 +266,16 @@ private: BlockFrequencyInfo &BFI; OptimizationRemarkEmitter &ORE; DominatorTree *DT; + TargetLibraryInfo &TLI; bool Changed; - std::vector<MemIntrinsic *> WorkList; + std::vector<MemOp> WorkList; // Start of the previse range. int64_t PreciseRangeStart; // Last value of the previse range. int64_t PreciseRangeLast; // The space to read the profile annotation. std::unique_ptr<InstrProfValueData[]> ValueDataArray; - bool perform(MemIntrinsic *MI); + bool perform(MemOp MO); // This kind shows which group the value falls in. For PreciseValue, we have // the profile count for that value. LargeGroup groups the values that are in @@ -198,19 +291,6 @@ private: } }; -static const char *getMIName(const MemIntrinsic *MI) { - switch (MI->getIntrinsicID()) { - case Intrinsic::memcpy: - return "memcpy"; - case Intrinsic::memmove: - return "memmove"; - case Intrinsic::memset: - return "memset"; - default: - return "unknown"; - } -} - static bool isProfitable(uint64_t Count, uint64_t TotalCount) { assert(Count <= TotalCount); if (Count < MemOPCountThreshold) @@ -229,21 +309,23 @@ static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, return ScaleCount / Denom; } -bool MemOPSizeOpt::perform(MemIntrinsic *MI) { - assert(MI); - if (MI->getIntrinsicID() == Intrinsic::memmove) +bool MemOPSizeOpt::perform(MemOp MO) { + assert(MO.I); + if (MO.isMemmove()) + return false; + if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI))) return false; uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; uint64_t TotalCount; - if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, + if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumPromotions, ValueDataArray.get(), NumVals, TotalCount)) return false; uint64_t ActualCount = TotalCount; uint64_t SavedTotalCount = TotalCount; if (MemOPScaleCount) { - auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); + auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent()); if (!BBEdgeCount) return false; ActualCount = *BBEdgeCount; @@ -335,13 +417,13 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { // } // merge_bb: - BasicBlock *BB = MI->getParent(); + BasicBlock *BB = MO.I->getParent(); LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); LLVM_DEBUG(dbgs() << *BB << "\n"); auto OrigBBFreq = BFI.getBlockFreq(BB); - BasicBlock *DefaultBB = SplitBlock(BB, MI, DT); - BasicBlock::iterator It(*MI); + BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT); + BasicBlock::iterator It(*MO.I); ++It; assert(It != DefaultBB->end()); BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT); @@ -353,15 +435,24 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { auto &Ctx = Func.getContext(); IRBuilder<> IRB(BB); BB->getTerminator()->eraseFromParent(); - Value *SizeVar = MI->getLength(); + Value *SizeVar = MO.getLength(); SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); + Type *MemOpTy = MO.I->getType(); + PHINode *PHI = nullptr; + if (!MemOpTy->isVoidTy()) { + // Insert a phi for the return values at the merge block. + IRBuilder<> IRBM(MergeBB->getFirstNonPHI()); + PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge"); + MO.I->replaceAllUsesWith(PHI); + PHI->addIncoming(MO.I, DefaultBB); + } // Clear the value profile data. - MI->setMetadata(LLVMContext::MD_prof, nullptr); + MO.I->setMetadata(LLVMContext::MD_prof, nullptr); // If all promoted, we don't need the MD.prof metadata. if (SavedRemainCount > 0 || Version != NumVals) // Otherwise we need update with the un-promoted records back. - annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version), + annotateValueSite(*Func.getParent(), *MO.I, VDs.slice(Version), SavedRemainCount, IPVK_MemOPSize, NumVals); LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n"); @@ -373,17 +464,18 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { for (uint64_t SizeId : SizeIds) { BasicBlock *CaseBB = BasicBlock::Create( Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); - Instruction *NewInst = MI->clone(); + MemOp NewMO = MO.clone(); // Fix the argument. - auto *MemI = cast<MemIntrinsic>(NewInst); - auto *SizeType = dyn_cast<IntegerType>(MemI->getLength()->getType()); + auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType()); assert(SizeType && "Expected integer type size argument."); ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId); - MemI->setLength(CaseSizeId); - CaseBB->getInstList().push_back(NewInst); + NewMO.setLength(CaseSizeId); + CaseBB->getInstList().push_back(NewMO.I); IRBuilder<> IRBCase(CaseBB); IRBCase.CreateBr(MergeBB); SI->addCase(CaseSizeId, CaseBB); + if (!MemOpTy->isVoidTy()) + PHI->addIncoming(NewMO.I, CaseBB); if (DT) { Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB}); Updates.push_back({DominatorTree::Insert, BB, CaseBB}); @@ -401,11 +493,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { ORE.emit([&]() { using namespace ore; - return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MI) - << "optimized " << NV("Intrinsic", StringRef(getMIName(MI))) - << " with count " << NV("Count", SumForOpt) << " out of " - << NV("Total", TotalCount) << " for " << NV("Versions", Version) - << " versions"; + return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I) + << "optimized " << NV("Memop", MO.getName(TLI)) << " with count " + << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount) + << " for " << NV("Versions", Version) << " versions"; }); return true; @@ -414,13 +505,13 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, - DominatorTree *DT) { + DominatorTree *DT, TargetLibraryInfo &TLI) { if (DisableMemOPOPT) return false; if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT); + MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI); MemOPSizeOpt.perform(); return MemOPSizeOpt.isChanged(); } @@ -431,7 +522,9 @@ bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - return PGOMemOPSizeOptImpl(F, BFI, ORE, DT); + TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI); } namespace llvm { @@ -442,7 +535,8 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT); + auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI); if (!Changed) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp index 71ecfd9a26429..85e096112fca1 100644 --- a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp @@ -12,26 +12,24 @@ // LangRef. There are obvious parallels to the sanitizer tools, but this pass // is focused purely on the semantics of LLVM IR, not any particular source // language. If you're looking for something to see if your C/C++ contains -// UB, this is not it. -// +// UB, this is not it. +// // The rewritten semantics of each instruction will include the following -// components: +// components: // // 1) The original instruction, unmodified. // 2) A propagation rule which translates dynamic information about the poison // state of each input to whether the dynamic output of the instruction // produces poison. -// 3) A flag validation rule which validates any poison producing flags on the +// 3) A creation rule which validates any poison producing flags on the // instruction itself (e.g. checks for overflow on nsw). // 4) A check rule which traps (to a handler function) if this instruction must // execute undefined behavior given the poison state of it's inputs. // -// At the moment, the UB detection is done in a best effort manner; that is, -// the resulting code may produce a false negative result (not report UB when -// it actually exists according to the LangRef spec), but should never produce -// a false positive (report UB where it doesn't exist). The intention is to -// eventually support a "strict" mode which never dynamically reports a false -// negative at the cost of rejecting some valid inputs to translation. +// This is a must analysis based transform; that is, the resulting code may +// produce a false negative result (not report UB when actually exists +// according to the LangRef spec), but should never produce a false positive +// (report UB where it doesn't exist). // // Use cases for this pass include: // - Understanding (and testing!) the implications of the definition of poison @@ -40,7 +38,7 @@ // are well defined on the specific input used. // - Finding/confirming poison specific miscompiles by checking the poison // status of an input/IR pair is the same before and after an optimization -// transform. +// transform. // - Checking that a bugpoint reduction does not introduce UB which didn't // exist in the original program being reduced. // @@ -56,7 +54,7 @@ // moment, all arguments and return values are assumed not to be poison. // - Undef is not modeled. In particular, the optimizer's freedom to pick // concrete values for undef bits so as to maximize potential for producing -// poison is not modeled. +// poison is not modeled. // //===----------------------------------------------------------------------===// @@ -103,10 +101,10 @@ static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { return Accum; } -static void generatePoisonChecksForBinOp(Instruction &I, - SmallVector<Value*, 2> &Checks) { +static void generateCreationChecksForBinOp(Instruction &I, + SmallVectorImpl<Value*> &Checks) { assert(isa<BinaryOperator>(I)); - + IRBuilder<> B(&I); Value *LHS = I.getOperand(0); Value *RHS = I.getOperand(1); @@ -183,22 +181,28 @@ static void generatePoisonChecksForBinOp(Instruction &I, }; } -static Value* generatePoisonChecks(Instruction &I) { +/// Given an instruction which can produce poison on non-poison inputs +/// (i.e. canCreatePoison returns true), generate runtime checks to produce +/// boolean indicators of when poison would result. +static void generateCreationChecks(Instruction &I, + SmallVectorImpl<Value*> &Checks) { IRBuilder<> B(&I); - SmallVector<Value*, 2> Checks; if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy()) - generatePoisonChecksForBinOp(I, Checks); + generateCreationChecksForBinOp(I, Checks); - // Handle non-binops seperately + // Handle non-binops separately switch (I.getOpcode()) { default: + // Note there are a couple of missing cases here, once implemented, this + // should become an llvm_unreachable. break; case Instruction::ExtractElement: { Value *Vec = I.getOperand(0); - if (Vec->getType()->getVectorIsScalable()) + auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); + if (!VecVTy) break; Value *Idx = I.getOperand(1); - unsigned NumElts = Vec->getType()->getVectorNumElements(); + unsigned NumElts = VecVTy->getNumElements(); Value *Check = B.CreateICmp(ICmpInst::ICMP_UGE, Idx, ConstantInt::get(Idx->getType(), NumElts)); @@ -207,10 +211,11 @@ static Value* generatePoisonChecks(Instruction &I) { } case Instruction::InsertElement: { Value *Vec = I.getOperand(0); - if (Vec->getType()->getVectorIsScalable()) + auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); + if (!VecVTy) break; Value *Idx = I.getOperand(2); - unsigned NumElts = Vec->getType()->getVectorNumElements(); + unsigned NumElts = VecVTy->getNumElements(); Value *Check = B.CreateICmp(ICmpInst::ICMP_UGE, Idx, ConstantInt::get(Idx->getType(), NumElts)); @@ -218,7 +223,6 @@ static Value* generatePoisonChecks(Instruction &I) { break; } }; - return buildOrChain(B, Checks); } static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { @@ -262,24 +266,23 @@ static bool rewrite(Function &F) { for (BasicBlock &BB : F) for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { auto *OldPHI = cast<PHINode>(&*I); - auto *NewPHI = PHINode::Create(Int1Ty, - OldPHI->getNumIncomingValues()); + auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues()); for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) NewPHI->addIncoming(UndefValue::get(Int1Ty), OldPHI->getIncomingBlock(i)); NewPHI->insertBefore(OldPHI); ValToPoison[OldPHI] = NewPHI; } - + for (BasicBlock &BB : F) for (Instruction &I : BB) { if (isa<PHINode>(I)) continue; IRBuilder<> B(cast<Instruction>(&I)); - + // Note: There are many more sources of documented UB, but this pass only // attempts to find UB triggered by propagation of poison. - if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I))) + if (Value *Op = const_cast<Value*>(getGuaranteedNonPoisonOp(&I))) CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); if (LocalCheck) @@ -290,12 +293,12 @@ static bool rewrite(Function &F) { } SmallVector<Value*, 4> Checks; - if (propagatesFullPoison(&I)) + if (propagatesPoison(&I)) for (Value *V : I.operands()) Checks.push_back(getPoisonFor(ValToPoison, V)); - if (auto *Check = generatePoisonChecks(I)) - Checks.push_back(Check); + if (canCreatePoison(&I)) + generateCreationChecks(I, Checks); ValToPoison[&I] = buildOrChain(B, Checks); } @@ -328,7 +331,6 @@ PreservedAnalyses PoisonCheckingPass::run(Function &F, return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); } - /* Major TODO Items: - Control dependent poison UB - Strict mode - (i.e. must analyze every operand) @@ -338,10 +340,7 @@ PreservedAnalyses PoisonCheckingPass::run(Function &F, Instructions w/Unclear Semantics: - shufflevector - It would seem reasonable for an out of bounds mask element - to produce poison, but the LangRef does not state. - - and/or - It would seem reasonable for poison to propagate from both - arguments, but LangRef doesn't state and propagatesFullPoison doesn't - include these two. + to produce poison, but the LangRef does not state. - all binary ops w/vector operands - The likely interpretation would be that any element overflowing should produce poison for the entire result, but the LangRef does not state. diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index e6dc684c2e771..b6a9df57e4315 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -16,7 +16,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -35,6 +34,8 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/SpecialCaseList.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -67,6 +68,8 @@ static const char *const SanCovModuleCtorTracePcGuardName = "sancov.module_ctor_trace_pc_guard"; static const char *const SanCovModuleCtor8bitCountersName = "sancov.module_ctor_8bit_counters"; +static const char *const SanCovModuleCtorBoolFlagName = + "sancov.module_ctor_bool_flag"; static const uint64_t SanCtorAndDtorPriority = 2; static const char *const SanCovTracePCGuardName = @@ -75,10 +78,13 @@ static const char *const SanCovTracePCGuardInitName = "__sanitizer_cov_trace_pc_guard_init"; static const char *const SanCov8bitCountersInitName = "__sanitizer_cov_8bit_counters_init"; +static const char *const SanCovBoolFlagInitName = + "__sanitizer_cov_bool_flag_init"; static const char *const SanCovPCsInitName = "__sanitizer_cov_pcs_init"; static const char *const SanCovGuardsSectionName = "sancov_guards"; static const char *const SanCovCountersSectionName = "sancov_cntrs"; +static const char *const SanCovBoolFlagSectionName = "sancov_bools"; static const char *const SanCovPCsSectionName = "sancov_pcs"; static const char *const SanCovLowestStackName = "__sancov_lowest_stack"; @@ -101,7 +107,8 @@ static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard", // BBs, put this global into a named section, and pass this section's bounds // to __sanitizer_cov_pcs_init. // This way the coverage instrumentation does not need to acquire the PCs -// at run-time. Works with trace-pc-guard and inline-8bit-counters. +// at run-time. Works with trace-pc-guard, inline-8bit-counters, and +// inline-bool-flag. static cl::opt<bool> ClCreatePCTable("sanitizer-coverage-pc-table", cl::desc("create a static PC table"), cl::Hidden, cl::init(false)); @@ -112,6 +119,11 @@ static cl::opt<bool> cl::Hidden, cl::init(false)); static cl::opt<bool> + ClInlineBoolFlag("sanitizer-coverage-inline-bool-flag", + cl::desc("sets a boolean flag for every edge"), cl::Hidden, + cl::init(false)); + +static cl::opt<bool> ClCMPTracing("sanitizer-coverage-trace-compares", cl::desc("Tracing of CMP and similar instructions"), cl::Hidden, cl::init(false)); @@ -169,11 +181,13 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { Options.TracePC |= ClTracePC; Options.TracePCGuard |= ClTracePCGuard; Options.Inline8bitCounters |= ClInline8bitCounters; + Options.InlineBoolFlag |= ClInlineBoolFlag; Options.PCTable |= ClCreatePCTable; Options.NoPrune |= !ClPruneBlocks; Options.StackDepth |= ClStackDepth; if (!Options.TracePCGuard && !Options.TracePC && - !Options.Inline8bitCounters && !Options.StackDepth) + !Options.Inline8bitCounters && !Options.StackDepth && + !Options.InlineBoolFlag) Options.TracePCGuard = true; // TracePCGuard is default. return Options; } @@ -185,8 +199,11 @@ using PostDomTreeCallback = class ModuleSanitizerCoverage { public: ModuleSanitizerCoverage( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) - : Options(OverrideFromCL(Options)) {} + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), + const SpecialCaseList *Allowlist = nullptr, + const SpecialCaseList *Blocklist = nullptr) + : Options(OverrideFromCL(Options)), Allowlist(Allowlist), + Blocklist(Blocklist) {} bool instrumentModule(Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback); @@ -233,9 +250,8 @@ private: FunctionCallee SanCovTraceGepFunction; FunctionCallee SanCovTraceSwitchFunction; GlobalVariable *SanCovLowestStack; - InlineAsm *EmptyAsm; Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy, - *Int16Ty, *Int8Ty, *Int8PtrTy; + *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty, *Int1PtrTy; Module *CurModule; std::string CurModuleUniqueId; Triple TargetTriple; @@ -244,23 +260,38 @@ private: GlobalVariable *FunctionGuardArray; // for trace-pc-guard. GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters. + GlobalVariable *FunctionBoolArray; // for inline-bool-flag. GlobalVariable *FunctionPCsArray; // for pc-table. SmallVector<GlobalValue *, 20> GlobalsToAppendToUsed; SmallVector<GlobalValue *, 20> GlobalsToAppendToCompilerUsed; SanitizerCoverageOptions Options; + + const SpecialCaseList *Allowlist; + const SpecialCaseList *Blocklist; }; class ModuleSanitizerCoverageLegacyPass : public ModulePass { public: ModuleSanitizerCoverageLegacyPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), + const std::vector<std::string> &AllowlistFiles = + std::vector<std::string>(), + const std::vector<std::string> &BlocklistFiles = + std::vector<std::string>()) : ModulePass(ID), Options(Options) { + if (AllowlistFiles.size() > 0) + Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, + *vfs::getRealFileSystem()); + if (BlocklistFiles.size() > 0) + Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, + *vfs::getRealFileSystem()); initializeModuleSanitizerCoverageLegacyPassPass( *PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override { - ModuleSanitizerCoverage ModuleSancov(Options); + ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(), + Blocklist.get()); auto DTCallback = [this](Function &F) -> const DominatorTree * { return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); }; @@ -281,13 +312,17 @@ public: private: SanitizerCoverageOptions Options; + + std::unique_ptr<SpecialCaseList> Allowlist; + std::unique_ptr<SpecialCaseList> Blocklist; }; } // namespace PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M, ModuleAnalysisManager &MAM) { - ModuleSanitizerCoverage ModuleSancov(Options); + ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(), + Blocklist.get()); auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); auto DTCallback = [&FAM](Function &F) -> const DominatorTree * { return &FAM.getResult<DominatorTreeAnalysis>(F); @@ -360,6 +395,12 @@ bool ModuleSanitizerCoverage::instrumentModule( Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; + if (Allowlist && + !Allowlist->inSection("coverage", "src", M.getSourceFileName())) + return false; + if (Blocklist && + Blocklist->inSection("coverage", "src", M.getSourceFileName())) + return false; C = &(M.getContext()); DL = &M.getDataLayout(); CurModule = &M; @@ -367,6 +408,7 @@ bool ModuleSanitizerCoverage::instrumentModule( TargetTriple = Triple(M.getTargetTriple()); FunctionGuardArray = nullptr; Function8bitCounterArray = nullptr; + FunctionBoolArray = nullptr; FunctionPCsArray = nullptr; IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); IntptrPtrTy = PointerType::getUnqual(IntptrTy); @@ -375,10 +417,12 @@ bool ModuleSanitizerCoverage::instrumentModule( Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); + Int1PtrTy = PointerType::getUnqual(IRB.getInt1Ty()); Int64Ty = IRB.getInt64Ty(); Int32Ty = IRB.getInt32Ty(); Int16Ty = IRB.getInt16Ty(); Int8Ty = IRB.getInt8Ty(); + Int1Ty = IRB.getInt1Ty(); SanCovTracePCIndir = M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy); @@ -440,11 +484,6 @@ bool ModuleSanitizerCoverage::instrumentModule( if (Options.StackDepth && !SanCovLowestStack->isDeclaration()) SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy)); - // We insert an empty inline asm after cov callbacks to avoid callback merge. - EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), - StringRef(""), StringRef(""), - /*hasSideEffects=*/true); - SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy); SanCovTracePCGuard = M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy); @@ -462,6 +501,11 @@ bool ModuleSanitizerCoverage::instrumentModule( Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName, SanCov8bitCountersInitName, Int8PtrTy, SanCovCountersSectionName); + if (FunctionBoolArray) { + Ctor = CreateInitCallsForSections(M, SanCovModuleCtorBoolFlagName, + SanCovBoolFlagInitName, Int1PtrTy, + SanCovBoolFlagSectionName); + } if (Ctor && Options.PCTable) { auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy); FunctionCallee InitFunction = declareSanitizerInitFunction( @@ -589,6 +633,10 @@ void ModuleSanitizerCoverage::instrumentFunction( if (F.hasPersonalityFn() && isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) return; + if (Allowlist && !Allowlist->inSection("coverage", "fun", F.getName())) + return; + if (Blocklist && Blocklist->inSection("coverage", "fun", F.getName())) + return; if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests()); SmallVector<Instruction *, 8> IndirCalls; @@ -607,8 +655,8 @@ void ModuleSanitizerCoverage::instrumentFunction( BlocksToInstrument.push_back(&BB); for (auto &Inst : BB) { if (Options.IndirectCalls) { - CallSite CS(&Inst); - if (CS && !CS.getCalledFunction()) + CallBase *CB = dyn_cast<CallBase>(&Inst); + if (CB && !CB->getCalledFunction()) IndirCalls.push_back(&Inst); } if (Options.TraceCmp) { @@ -653,9 +701,7 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection( GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId)) Array->setComdat(Comdat); Array->setSection(getSectionName(Section)); - Array->setAlignment(Align(Ty->isPointerTy() - ? DL->getPointerSize() - : Ty->getPrimitiveSizeInBits() / 8)); + Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize())); GlobalsToAppendToUsed.push_back(Array); GlobalsToAppendToCompilerUsed.push_back(Array); MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F)); @@ -701,6 +747,9 @@ void ModuleSanitizerCoverage::CreateFunctionLocalArrays( if (Options.Inline8bitCounters) Function8bitCounterArray = CreateFunctionLocalArrayInSection( AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName); + if (Options.InlineBoolFlag) + FunctionBoolArray = CreateFunctionLocalArrayInSection( + AllBlocks.size(), F, Int1Ty, SanCovBoolFlagSectionName); if (Options.PCTable) FunctionPCsArray = CreatePCArray(F, AllBlocks); @@ -727,11 +776,12 @@ void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls( Function &F, ArrayRef<Instruction *> IndirCalls) { if (IndirCalls.empty()) return; - assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters); + assert(Options.TracePC || Options.TracePCGuard || + Options.Inline8bitCounters || Options.InlineBoolFlag); for (auto I : IndirCalls) { IRBuilder<> IRB(I); - CallSite CS(I); - Value *Callee = CS.getCalledValue(); + CallBase &CB = cast<CallBase>(*I); + Value *Callee = CB.getCalledOperand(); if (isa<InlineAsm>(Callee)) continue; IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy)); @@ -865,16 +915,15 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, IRBuilder<> IRB(&*IP); IRB.SetCurrentDebugLocation(EntryLoc); if (Options.TracePC) { - IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC. - IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. + IRB.CreateCall(SanCovTracePC) + ->setCannotMerge(); // gets the PC using GET_CALLER_PC. } if (Options.TracePCGuard) { auto GuardPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), ConstantInt::get(IntptrTy, Idx * 4)), Int32PtrTy); - IRB.CreateCall(SanCovTracePCGuard, GuardPtr); - IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. + IRB.CreateCall(SanCovTracePCGuard, GuardPtr)->setCannotMerge(); } if (Options.Inline8bitCounters) { auto CounterPtr = IRB.CreateGEP( @@ -886,6 +935,18 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, SetNoSanitizeMetadata(Load); SetNoSanitizeMetadata(Store); } + if (Options.InlineBoolFlag) { + auto FlagPtr = IRB.CreateGEP( + FunctionBoolArray->getValueType(), FunctionBoolArray, + {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); + auto Load = IRB.CreateLoad(Int1Ty, FlagPtr); + auto ThenTerm = + SplitBlockAndInsertIfThen(IRB.CreateIsNull(Load), &*IP, false); + IRBuilder<> ThenIRB(ThenTerm); + auto Store = ThenIRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr); + SetNoSanitizeMetadata(Load); + SetNoSanitizeMetadata(Store); + } if (Options.StackDepth && IsEntryBB && !IsLeafFunc) { // Check stack depth. If it's the deepest so far, record it. Module *M = F.getParent(); @@ -910,6 +971,8 @@ ModuleSanitizerCoverage::getSectionName(const std::string &Section) const { if (TargetTriple.isOSBinFormatCOFF()) { if (Section == SanCovCountersSectionName) return ".SCOV$CM"; + if (Section == SanCovBoolFlagSectionName) + return ".SCOV$BM"; if (Section == SanCovPCsSectionName) return ".SCOVP$M"; return ".SCOV$GM"; // For SanCovGuardsSectionName. @@ -943,6 +1006,9 @@ INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov", "Pass for instrumenting coverage on functions", false, false) ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options) { - return new ModuleSanitizerCoverageLegacyPass(Options); + const SanitizerCoverageOptions &Options, + const std::vector<std::string> &AllowlistFiles, + const std::vector<std::string> &BlocklistFiles) { + return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles, + BlocklistFiles); } diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 9b7edad3444be..c911b37afac7e 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -68,6 +68,14 @@ static cl::opt<bool> ClInstrumentAtomics( static cl::opt<bool> ClInstrumentMemIntrinsics( "tsan-instrument-memintrinsics", cl::init(true), cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); +static cl::opt<bool> ClDistinguishVolatile( + "tsan-distinguish-volatile", cl::init(false), + cl::desc("Emit special instrumentation for accesses to volatiles"), + cl::Hidden); +static cl::opt<bool> ClInstrumentReadBeforeWrite( + "tsan-instrument-read-before-write", cl::init(false), + cl::desc("Do not eliminate read instrumentation for read-before-writes"), + cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); @@ -118,6 +126,10 @@ private: FunctionCallee TsanWrite[kNumberOfAccessSizes]; FunctionCallee TsanUnalignedRead[kNumberOfAccessSizes]; FunctionCallee TsanUnalignedWrite[kNumberOfAccessSizes]; + FunctionCallee TsanVolatileRead[kNumberOfAccessSizes]; + FunctionCallee TsanVolatileWrite[kNumberOfAccessSizes]; + FunctionCallee TsanUnalignedVolatileRead[kNumberOfAccessSizes]; + FunctionCallee TsanUnalignedVolatileWrite[kNumberOfAccessSizes]; FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes]; FunctionCallee TsanAtomicStore[kNumberOfAccessSizes]; FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1] @@ -131,7 +143,9 @@ private: }; struct ThreadSanitizerLegacyPass : FunctionPass { - ThreadSanitizerLegacyPass() : FunctionPass(ID) {} + ThreadSanitizerLegacyPass() : FunctionPass(ID) { + initializeThreadSanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); + } StringRef getPassName() const override; void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnFunction(Function &F) override; @@ -236,6 +250,24 @@ void ThreadSanitizer::initialize(Module &M) { TsanUnalignedWrite[i] = M.getOrInsertFunction( UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); + SmallString<64> VolatileReadName("__tsan_volatile_read" + ByteSizeStr); + TsanVolatileRead[i] = M.getOrInsertFunction( + VolatileReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); + + SmallString<64> VolatileWriteName("__tsan_volatile_write" + ByteSizeStr); + TsanVolatileWrite[i] = M.getOrInsertFunction( + VolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); + + SmallString<64> UnalignedVolatileReadName("__tsan_unaligned_volatile_read" + + ByteSizeStr); + TsanUnalignedVolatileRead[i] = M.getOrInsertFunction( + UnalignedVolatileReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); + + SmallString<64> UnalignedVolatileWriteName( + "__tsan_unaligned_volatile_write" + ByteSizeStr); + TsanUnalignedVolatileWrite[i] = M.getOrInsertFunction( + UnalignedVolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); + Type *Ty = Type::getIntNTy(M.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load"); @@ -246,28 +278,28 @@ void ThreadSanitizer::initialize(Module &M) { TsanAtomicStore[i] = M.getOrInsertFunction( AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy); - for (int op = AtomicRMWInst::FIRST_BINOP; - op <= AtomicRMWInst::LAST_BINOP; ++op) { - TsanAtomicRMW[op][i] = nullptr; + for (unsigned Op = AtomicRMWInst::FIRST_BINOP; + Op <= AtomicRMWInst::LAST_BINOP; ++Op) { + TsanAtomicRMW[Op][i] = nullptr; const char *NamePart = nullptr; - if (op == AtomicRMWInst::Xchg) + if (Op == AtomicRMWInst::Xchg) NamePart = "_exchange"; - else if (op == AtomicRMWInst::Add) + else if (Op == AtomicRMWInst::Add) NamePart = "_fetch_add"; - else if (op == AtomicRMWInst::Sub) + else if (Op == AtomicRMWInst::Sub) NamePart = "_fetch_sub"; - else if (op == AtomicRMWInst::And) + else if (Op == AtomicRMWInst::And) NamePart = "_fetch_and"; - else if (op == AtomicRMWInst::Or) + else if (Op == AtomicRMWInst::Or) NamePart = "_fetch_or"; - else if (op == AtomicRMWInst::Xor) + else if (Op == AtomicRMWInst::Xor) NamePart = "_fetch_xor"; - else if (op == AtomicRMWInst::Nand) + else if (Op == AtomicRMWInst::Nand) NamePart = "_fetch_nand"; else continue; SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); - TsanAtomicRMW[op][i] = + TsanAtomicRMW[Op][i] = M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy); } @@ -385,7 +417,7 @@ void ThreadSanitizer::chooseInstructionsToInstrument( Value *Addr = Load->getPointerOperand(); if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) continue; - if (WriteTargets.count(Addr)) { + if (!ClInstrumentReadBeforeWrite && WriteTargets.count(Addr)) { // We will write to this temp, so no reason to analyze the read. NumOmittedReadsBeforeWrite++; continue; @@ -441,6 +473,11 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, // the module constructor. if (F.getName() == kTsanModuleCtorName) return false; + // Naked functions can not have prologue/epilogue + // (__tsan_func_entry/__tsan_func_exit) generated, so don't instrument them at + // all. + if (F.hasFnAttribute(Attribute::Naked)) + return false; initialize(*F.getParent()); SmallVector<Instruction*, 8> AllLoadsAndStores; SmallVector<Instruction*, 8> LocalLoadsAndStores; @@ -560,13 +597,24 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, const unsigned Alignment = IsWrite ? cast<StoreInst>(I)->getAlignment() : cast<LoadInst>(I)->getAlignment(); + const bool IsVolatile = + ClDistinguishVolatile && (IsWrite ? cast<StoreInst>(I)->isVolatile() + : cast<LoadInst>(I)->isVolatile()); Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); FunctionCallee OnAccessFunc = nullptr; - if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) - OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; - else - OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx]; + if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) { + if (IsVolatile) + OnAccessFunc = IsWrite ? TsanVolatileWrite[Idx] : TsanVolatileRead[Idx]; + else + OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; + } else { + if (IsVolatile) + OnAccessFunc = IsWrite ? TsanUnalignedVolatileWrite[Idx] + : TsanUnalignedVolatileRead[Idx]; + else + OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx]; + } IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); if (IsWrite) NumInstrumentedWrites++; else NumInstrumentedReads++; diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp index 604726d4f40fc..cd4f636ff1320 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp +++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp @@ -38,7 +38,7 @@ using PluginChainFinal = PluginChain<VP_PLUGIN_LIST>; template <> class PluginChain<> { public: - PluginChain(Function &F) {} + PluginChain(Function &F, TargetLibraryInfo &TLI) {} void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {} }; @@ -48,7 +48,8 @@ class PluginChain<PluginT, Ts...> : public PluginChain<Ts...> { using Base = PluginChain<Ts...>; public: - PluginChain(Function &F) : PluginChain<Ts...>(F), Plugin(F) {} + PluginChain(Function &F, TargetLibraryInfo &TLI) + : PluginChain<Ts...>(F, TLI), Plugin(F, TLI) {} void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) { if (K == PluginT::Kind) @@ -65,8 +66,9 @@ public: using PluginChainFinal::PluginChainFinal; }; -ValueProfileCollector::ValueProfileCollector(Function &F) - : PImpl(new ValueProfileCollectorImpl(F)) {} +ValueProfileCollector::ValueProfileCollector(Function &F, + TargetLibraryInfo &TLI) + : PImpl(new ValueProfileCollectorImpl(F, TLI)) {} ValueProfileCollector::~ValueProfileCollector() = default; diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h index ff883c8d0c779..c3f549c2e7cc5 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h +++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h @@ -16,6 +16,7 @@ #ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H #define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -58,7 +59,7 @@ public: Instruction *AnnotatedInst; // Where metadata is attached. }; - ValueProfileCollector(Function &Fn); + ValueProfileCollector(Function &Fn, TargetLibraryInfo &TLI); ValueProfileCollector(ValueProfileCollector &&) = delete; ValueProfileCollector &operator=(ValueProfileCollector &&) = delete; diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc index 4cc4c6c848c39..8d0cf5843ebce 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc +++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -20,15 +20,19 @@ using namespace llvm; using CandidateInfo = ValueProfileCollector::CandidateInfo; +extern cl::opt<bool> MemOPOptMemcmpBcmp; + ///--------------------------- MemIntrinsicPlugin ------------------------------ class MemIntrinsicPlugin : public InstVisitor<MemIntrinsicPlugin> { Function &F; + TargetLibraryInfo &TLI; std::vector<CandidateInfo> *Candidates; public: static constexpr InstrProfValueKind Kind = IPVK_MemOPSize; - MemIntrinsicPlugin(Function &Fn) : F(Fn), Candidates(nullptr) {} + MemIntrinsicPlugin(Function &Fn, TargetLibraryInfo &TLI) + : F(Fn), TLI(TLI), Candidates(nullptr) {} void run(std::vector<CandidateInfo> &Cs) { Candidates = &Cs; @@ -45,6 +49,24 @@ public: Instruction *AnnotatedInst = &MI; Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst}); } + void visitCallInst(CallInst &CI) { + if (!MemOPOptMemcmpBcmp) + return; + auto *F = CI.getCalledFunction(); + if (!F) + return; + LibFunc Func; + if (TLI.getLibFunc(CI, Func) && + (Func == LibFunc_memcmp || Func == LibFunc_bcmp)) { + Value *Length = CI.getArgOperand(2); + // Not instrument constant length calls. + if (dyn_cast<ConstantInt>(Length)) + return; + Instruction *InsertPt = &CI; + Instruction *AnnotatedInst = &CI; + Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst}); + } + } }; ///------------------------ IndirectCallPromotionPlugin ------------------------ @@ -54,12 +76,12 @@ class IndirectCallPromotionPlugin { public: static constexpr InstrProfValueKind Kind = IPVK_IndirectCallTarget; - IndirectCallPromotionPlugin(Function &Fn) : F(Fn) {} + IndirectCallPromotionPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {} void run(std::vector<CandidateInfo> &Candidates) { - std::vector<Instruction *> Result = findIndirectCalls(F); + std::vector<CallBase *> Result = findIndirectCalls(F); for (Instruction *I : Result) { - Value *Callee = CallSite(I).getCalledValue(); + Value *Callee = cast<CallBase>(I)->getCalledOperand(); Instruction *InsertPt = I; Instruction *AnnotatedInst = I; Candidates.emplace_back(CandidateInfo{Callee, InsertPt, AnnotatedInst}); |