summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64StackTagging.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64StackTagging.cpp')
-rw-r--r--lib/Target/AArch64/AArch64StackTagging.cpp394
1 files changed, 371 insertions, 23 deletions
diff --git a/lib/Target/AArch64/AArch64StackTagging.cpp b/lib/Target/AArch64/AArch64StackTagging.cpp
index 6e99c48bf1d7..e6dbe01d3807 100644
--- a/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -55,9 +56,215 @@ using namespace llvm;
#define DEBUG_TYPE "stack-tagging"
-static constexpr unsigned kTagGranuleSize = 16;
+static cl::opt<bool> ClMergeInit(
+ "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ cl::desc("merge stack variable initializers with tagging when possible"));
+
+static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
+ cl::init(40), cl::Hidden);
+
+static const Align kTagGranuleSize = Align(16);
namespace {
+
+class InitializerBuilder {
+ uint64_t Size;
+ const DataLayout *DL;
+ Value *BasePtr;
+ Function *SetTagFn;
+ Function *SetTagZeroFn;
+ Function *StgpFn;
+
+ // List of initializers sorted by start offset.
+ struct Range {
+ uint64_t Start, End;
+ Instruction *Inst;
+ };
+ SmallVector<Range, 4> Ranges;
+ // 8-aligned offset => 8-byte initializer
+ // Missing keys are zero initialized.
+ std::map<uint64_t, Value *> Out;
+
+public:
+ InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr,
+ Function *SetTagFn, Function *SetTagZeroFn,
+ Function *StgpFn)
+ : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
+ SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
+
+ bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
+ auto I = std::lower_bound(
+ Ranges.begin(), Ranges.end(), Start,
+ [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; });
+ if (I != Ranges.end() && End > I->Start) {
+ // Overlap - bail.
+ return false;
+ }
+ Ranges.insert(I, {Start, End, Inst});
+ return true;
+ }
+
+ bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) {
+ int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
+ if (!addRange(Offset, Offset + StoreSize, SI))
+ return false;
+ IRBuilder<> IRB(SI);
+ applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
+ return true;
+ }
+
+ bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
+ uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if (!addRange(Offset, Offset + StoreSize, MSI))
+ return false;
+ IRBuilder<> IRB(MSI);
+ applyMemSet(IRB, Offset, Offset + StoreSize,
+ cast<ConstantInt>(MSI->getValue()));
+ return true;
+ }
+
+ void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
+ ConstantInt *V) {
+ // Out[] does not distinguish between zero and undef, and we already know
+ // that this memset does not overlap with any other initializer. Nothing to
+ // do for memset(0).
+ if (V->isZero())
+ return;
+ for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
+ uint64_t Cst = 0x0101010101010101UL;
+ int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
+ if (LowBits)
+ Cst = (Cst >> LowBits) << LowBits;
+ int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
+ if (HighBits)
+ Cst = (Cst << HighBits) >> HighBits;
+ ConstantInt *C =
+ ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());
+
+ Value *&CurrentV = Out[Offset];
+ if (!CurrentV) {
+ CurrentV = C;
+ } else {
+ CurrentV = IRB.CreateOr(CurrentV, C);
+ }
+ }
+ }
+
+ // Take a 64-bit slice of the value starting at the given offset (in bytes).
+ // Offset can be negative. Pad with zeroes on both sides when necessary.
+ Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) {
+ if (Offset > 0) {
+ V = IRB.CreateLShr(V, Offset * 8);
+ V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
+ } else if (Offset < 0) {
+ V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
+ V = IRB.CreateShl(V, -Offset * 8);
+ } else {
+ V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
+ }
+ return V;
+ }
+
+ void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
+ Value *StoredValue) {
+ StoredValue = flatten(IRB, StoredValue);
+ for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
+ Value *V = sliceValue(IRB, StoredValue, Offset - Start);
+ Value *&CurrentV = Out[Offset];
+ if (!CurrentV) {
+ CurrentV = V;
+ } else {
+ CurrentV = IRB.CreateOr(CurrentV, V);
+ }
+ }
+ }
+
+ void generate(IRBuilder<> &IRB) {
+ LLVM_DEBUG(dbgs() << "Combined initializer\n");
+ // No initializers => the entire allocation is undef.
+ if (Ranges.empty()) {
+ emitUndef(IRB, 0, Size);
+ return;
+ }
+
+ // Look through 8-byte initializer list 16 bytes at a time;
+ // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
+ // Otherwise, emit zeroes up to next available item.
+ uint64_t LastOffset = 0;
+ for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
+ auto I1 = Out.find(Offset);
+ auto I2 = Out.find(Offset + 8);
+ if (I1 == Out.end() && I2 == Out.end())
+ continue;
+
+ if (Offset > LastOffset)
+ emitZeroes(IRB, LastOffset, Offset - LastOffset);
+
+ Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
+ : I1->second;
+ Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
+ : I2->second;
+ emitPair(IRB, Offset, Store1, Store2);
+ LastOffset = Offset + 16;
+ }
+
+ // memset(0) does not update Out[], therefore the tail can be either undef
+ // or zero.
+ if (LastOffset < Size)
+ emitZeroes(IRB, LastOffset, Size - LastOffset);
+
+ for (const auto &R : Ranges) {
+ R.Inst->eraseFromParent();
+ }
+ }
+
+ void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
+ LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
+ << ") zero\n");
+ Value *Ptr = BasePtr;
+ if (Offset)
+ Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
+ IRB.CreateCall(SetTagZeroFn,
+ {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
+ }
+
+ void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
+ LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
+ << ") undef\n");
+ Value *Ptr = BasePtr;
+ if (Offset)
+ Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
+ IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
+ }
+
+ void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) {
+ LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n");
+ LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n");
+ Value *Ptr = BasePtr;
+ if (Offset)
+ Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
+ IRB.CreateCall(StgpFn, {Ptr, A, B});
+ }
+
+ Value *flatten(IRBuilder<> &IRB, Value *V) {
+ if (V->getType()->isIntegerTy())
+ return V;
+ // vector of pointers -> vector of ints
+ if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
+ LLVMContext &Ctx = IRB.getContext();
+ Type *EltTy = VecTy->getElementType();
+ if (EltTy->isPointerTy()) {
+ uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
+ Type *NewTy = VectorType::get(IntegerType::get(Ctx, EltSize),
+ VecTy->getNumElements());
+ V = IRB.CreatePointerCast(V, NewTy);
+ }
+ }
+ return IRB.CreateBitOrPointerCast(
+ V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
+ }
+};
+
class AArch64StackTagging : public FunctionPass {
struct AllocaInfo {
AllocaInst *AI;
@@ -67,10 +274,15 @@ class AArch64StackTagging : public FunctionPass {
int Tag; // -1 for non-tagged allocations
};
+ bool MergeInit;
+
public:
static char ID; // Pass ID, replacement for typeid
- AArch64StackTagging() : FunctionPass(ID) {
+ AArch64StackTagging(bool MergeInit = true)
+ : FunctionPass(ID),
+ MergeInit(ClMergeInit.getNumOccurrences() > 0 ? ClMergeInit
+ : MergeInit) {
initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
}
@@ -81,6 +293,9 @@ public:
uint64_t Size);
void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
+ Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
+ uint64_t Size, InitializerBuilder &IB);
+
Instruction *
insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
const DominatorTree *DT);
@@ -92,9 +307,12 @@ private:
Function *F;
Function *SetTagFunc;
const DataLayout *DL;
+ AAResults *AA;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ if (MergeInit)
+ AU.addRequired<AAResultsWrapperPass>();
}
};
@@ -107,8 +325,68 @@ INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
false, false)
-FunctionPass *llvm::createAArch64StackTaggingPass() {
- return new AArch64StackTagging();
+FunctionPass *llvm::createAArch64StackTaggingPass(bool MergeInit) {
+ return new AArch64StackTagging(MergeInit);
+}
+
+Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
+ Value *StartPtr,
+ uint64_t Size,
+ InitializerBuilder &IB) {
+ MemoryLocation AllocaLoc{StartPtr, Size};
+ Instruction *LastInst = StartInst;
+ BasicBlock::iterator BI(StartInst);
+
+ unsigned Count = 0;
+ for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
+ if (!isa<DbgInfoIntrinsic>(*BI))
+ ++Count;
+
+ if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
+ continue;
+
+ if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+ // If the instruction is readnone, ignore it, otherwise bail out. We
+ // don't even allow readonly here because we don't want something like:
+ // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
+ if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+ break;
+ continue;
+ }
+
+ if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+ if (!NextStore->isSimple())
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ Optional<int64_t> Offset =
+ isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL);
+ if (!Offset)
+ break;
+
+ if (!IB.addStore(*Offset, NextStore, DL))
+ break;
+ LastInst = NextStore;
+ } else {
+ MemSetInst *MSI = cast<MemSetInst>(BI);
+
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+ break;
+
+ if (!isa<ConstantInt>(MSI->getValue()))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL);
+ if (!Offset)
+ break;
+
+ if (!IB.addMemSet(*Offset, MSI))
+ break;
+ LastInst = MSI;
+ }
+ }
+ return LastInst;
}
bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
@@ -127,8 +405,23 @@ bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
Value *Ptr, uint64_t Size) {
+ auto SetTagZeroFunc =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
+ auto StgpFunc =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);
+
+ InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
+ bool LittleEndian =
+ Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
+ // Current implementation of initializer merging assumes little endianness.
+ if (MergeInit && !F->hasOptNone() && LittleEndian) {
+ LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
+ << ", size = " << Size << "\n");
+ InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
+ }
+
IRBuilder<> IRB(InsertBefore);
- IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
+ IB.generate(IRB);
}
void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
@@ -166,7 +459,8 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer(
}
void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
- unsigned NewAlignment = std::max(Info.AI->getAlignment(), kTagGranuleSize);
+ const Align NewAlignment =
+ max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize);
Info.AI->setAlignment(NewAlignment);
uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
@@ -179,7 +473,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
Info.AI->isArrayAllocation()
? ArrayType::get(
Info.AI->getAllocatedType(),
- dyn_cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
+ cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
: Info.AI->getAllocatedType();
Type *PaddingType =
ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
@@ -187,7 +481,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
auto *NewAI = new AllocaInst(
TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
NewAI->takeName(Info.AI);
- NewAI->setAlignment(Info.AI->getAlignment());
+ NewAI->setAlignment(MaybeAlign(Info.AI->getAlignment()));
NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
NewAI->setSwiftError(Info.AI->isSwiftError());
NewAI->copyMetadata(*Info.AI);
@@ -198,6 +492,24 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
Info.AI = NewAI;
}
+// Helper function to check for post-dominance.
+static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A,
+ const IntrinsicInst *B) {
+ const BasicBlock *ABB = A->getParent();
+ const BasicBlock *BBB = B->getParent();
+
+ if (ABB != BBB)
+ return PDT->dominates(ABB, BBB);
+
+ for (const Instruction &I : *ABB) {
+ if (&I == B)
+ return true;
+ if (&I == A)
+ return false;
+ }
+ llvm_unreachable("Corrupt instruction list");
+}
+
// FIXME: check for MTE extension
bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
@@ -205,6 +517,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
F = &Fn;
DL = &Fn.getParent()->getDataLayout();
+ if (MergeInit)
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
SmallVector<Instruction *, 8> RetVec;
@@ -270,23 +584,31 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (NumInterestingAllocas == 0)
return true;
+ std::unique_ptr<DominatorTree> DeleteDT;
+ DominatorTree *DT = nullptr;
+ if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &P->getDomTree();
+
+ if (DT == nullptr && (NumInterestingAllocas > 1 ||
+ !F->hasFnAttribute(Attribute::OptimizeNone))) {
+ DeleteDT = std::make_unique<DominatorTree>(*F);
+ DT = DeleteDT.get();
+ }
+
+ std::unique_ptr<PostDominatorTree> DeletePDT;
+ PostDominatorTree *PDT = nullptr;
+ if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
+ PDT = &P->getPostDomTree();
+
+ if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
+ DeletePDT = std::make_unique<PostDominatorTree>(*F);
+ PDT = DeletePDT.get();
+ }
+
SetTagFunc =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
- // Compute DT only if the function has the attribute, there are more than 1
- // interesting allocas, and it is not available for free.
- Instruction *Base;
- if (NumInterestingAllocas > 1) {
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- if (DTWP) {
- Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree());
- } else {
- DominatorTree DT(*F);
- Base = insertBaseTaggedPointer(Allocas, &DT);
- }
- } else {
- Base = insertBaseTaggedPointer(Allocas, nullptr);
- }
+ Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
for (auto &I : Allocas) {
const AllocaInfo &Info = I.second;
@@ -309,11 +631,37 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
Info.LifetimeEnd.size() == 1) {
IntrinsicInst *Start = Info.LifetimeStart[0];
+ IntrinsicInst *End = Info.LifetimeEnd[0];
uint64_t Size =
dyn_cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
Size = alignTo(Size, kTagGranuleSize);
tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
- untagAlloca(AI, Info.LifetimeEnd[0], Size);
+ // We need to ensure that if we tag some object, we certainly untag it
+ // before the function exits.
+ if (PDT != nullptr && postDominates(PDT, End, Start)) {
+ untagAlloca(AI, End, Size);
+ } else {
+ SmallVector<Instruction *, 8> ReachableRetVec;
+ unsigned NumCoveredExits = 0;
+ for (auto &RI : RetVec) {
+ if (!isPotentiallyReachable(Start, RI, nullptr, DT))
+ continue;
+ ReachableRetVec.push_back(RI);
+ if (DT != nullptr && DT->dominates(End, RI))
+ ++NumCoveredExits;
+ }
+ // If there's a mix of covered and non-covered exits, just put the untag
+ // on exits, so we avoid the redundancy of untagging twice.
+ if (NumCoveredExits == ReachableRetVec.size()) {
+ untagAlloca(AI, End, Size);
+ } else {
+ for (auto &RI : ReachableRetVec)
+ untagAlloca(AI, RI, Size);
+ // We may have inserted untag outside of the lifetime interval.
+ // Remove the lifetime end call for this alloca.
+ End->eraseFromParent();
+ }
+ }
} else {
uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());