aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMCodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMCodeGenPrepare.cpp')
-rw-r--r--lib/Target/ARM/ARMCodeGenPrepare.cpp205
1 files changed, 101 insertions, 104 deletions
diff --git a/lib/Target/ARM/ARMCodeGenPrepare.cpp b/lib/Target/ARM/ARMCodeGenPrepare.cpp
index b631c2bc687b..2fc5f4aaab50 100644
--- a/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -1,9 +1,8 @@
//===----- ARMCodeGenPrepare.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,15 +113,20 @@ class IRPromoter {
SmallPtrSet<Value*, 8> Promoted;
Module *M = nullptr;
LLVMContext &Ctx;
+ // The type we promote to: always i32
IntegerType *ExtTy = nullptr;
+ // The type of the value that the search began from, either i8 or i16.
+ // This defines the max range of the values that we allow in the promoted
+ // tree.
IntegerType *OrigTy = nullptr;
- SmallPtrSetImpl<Value*> *Visited;
+ SetVector<Value*> *Visited;
SmallPtrSetImpl<Value*> *Sources;
SmallPtrSetImpl<Instruction*> *Sinks;
SmallPtrSetImpl<Instruction*> *SafeToPromote;
+ SmallPtrSetImpl<Instruction*> *SafeWrap;
void ReplaceAllUsersOfWith(Value *From, Value *To);
- void PrepareConstants(void);
+ void PrepareWrappingAdds(void);
void ExtendSources(void);
void ConvertTruncs(void);
void PromoteTree(void);
@@ -135,10 +139,11 @@ public:
void Mutate(Type *OrigTy,
- SmallPtrSetImpl<Value*> &Visited,
+ SetVector<Value*> &Visited,
SmallPtrSetImpl<Value*> &Sources,
SmallPtrSetImpl<Instruction*> &Sinks,
- SmallPtrSetImpl<Instruction*> &SafeToPromote);
+ SmallPtrSetImpl<Instruction*> &SafeToPromote,
+ SmallPtrSetImpl<Instruction*> &SafeWrap);
};
class ARMCodeGenPrepare : public FunctionPass {
@@ -146,8 +151,9 @@ class ARMCodeGenPrepare : public FunctionPass {
IRPromoter *Promoter = nullptr;
std::set<Value*> AllVisited;
SmallPtrSet<Instruction*, 8> SafeToPromote;
+ SmallPtrSet<Instruction*, 4> SafeWrap;
- bool isSafeOverflow(Instruction *I);
+ bool isSafeWrap(Instruction *I);
bool isSupportedValue(Value *V);
bool isLegalToPromote(Value *V);
bool TryToPromote(Value *V);
@@ -172,13 +178,17 @@ public:
}
-static bool generateSignBits(Value *V) {
+static bool GenerateSignBits(Value *V) {
+ if (auto *Arg = dyn_cast<Argument>(V))
+ return Arg->hasSExtAttr();
+
if (!isa<Instruction>(V))
return false;
unsigned Opc = cast<Instruction>(V)->getOpcode();
return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
- Opc == Instruction::SRem;
+ Opc == Instruction::SRem || Opc == Instruction::SExt ||
+ Opc == Instruction::SIToFP;
}
static bool EqualTypeSize(Value *V) {
@@ -271,19 +281,14 @@ static bool isSink(Value *V) {
return isa<CallInst>(V);
}
-/// Return whether the instruction can be promoted within any modifications to
-/// its operands or result.
-bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
- // FIXME Do we need NSW too?
- if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
- return true;
-
- // We can support a, potentially, overflowing instruction (I) if:
+/// Return whether this instruction can safely wrap.
+bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
+ // We can support a, potentially, wrapping instruction (I) if:
// - It is only used by an unsigned icmp.
// - The icmp uses a constant.
- // - The overflowing value (I) is decreasing, i.e would underflow - wrapping
+ // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
// around zero to become a larger number than before.
- // - The underflowing instruction (I) also uses a constant.
+ // - The wrapping instruction (I) also uses a constant.
//
// We can then use the two constants to calculate whether the result would
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
@@ -327,7 +332,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
// - (255 >= 254) == (0xFFFFFFFF >= 254) == true
//
// To demonstrate why we can't handle increasing values:
- //
+ //
// %add = add i8 %a, 2
// %cmp = icmp ult i8 %add, 127
//
@@ -385,6 +390,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
return false;
LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
+ SafeWrap.insert(I);
return true;
}
@@ -408,13 +414,16 @@ static bool shouldPromote(Value *V) {
/// Return whether we can safely mutate V's type to ExtTy without having to be
/// concerned with zero extending or truncation.
static bool isPromotedResultSafe(Value *V) {
+ if (GenerateSignBits(V))
+ return false;
+
if (!isa<Instruction>(V))
return true;
- if (generateSignBits(V))
- return false;
+ if (!isa<OverflowingBinaryOperator>(V))
+ return true;
- return !isa<OverflowingBinaryOperator>(V);
+ return cast<Instruction>(V)->hasNoUnsignedWrap();
}
/// Return the intrinsic for the instruction that can perform the same
@@ -462,61 +471,34 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
InstsToRemove.insert(I);
}
-void IRPromoter::PrepareConstants() {
+void IRPromoter::PrepareWrappingAdds() {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n");
IRBuilder<> Builder{Ctx};
- // First step is to prepare the instructions for mutation. Most constants
- // just need to be zero extended into their new type, but complications arise
- // because:
- // - For nuw binary operators, negative immediates would need sign extending;
- // however, instead we'll change them to positive and zext them. We can do
- // this because:
- // > The operators that can wrap are: add, sub, mul and shl.
- // > shl interprets its second operand as unsigned and if the first operand
- // is an immediate, it will need zext to be nuw.
- // > I'm assuming mul has to interpret immediates as unsigned for nuw.
- // > Which leaves the nuw add and sub to be handled; as with shl, if an
- // immediate is used as operand 0, it will need zext to be nuw.
- // - We also allow add and sub to safely overflow in certain circumstances
- // and only when the value (operand 0) is being decreased.
- //
- // For adds and subs, that are either nuw or safely wrap and use a negative
- // immediate as operand 1, we create an equivalent instruction using a
- // positive immediate. That positive immediate can then be zext along with
- // all the other immediates later.
- for (auto *V : *Visited) {
- if (!isa<Instruction>(V))
- continue;
-
- auto *I = cast<Instruction>(V);
- if (SafeToPromote->count(I)) {
-
- if (!isa<OverflowingBinaryOperator>(I))
- continue;
- if (auto *Const = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (!Const->isNegative())
- break;
+ // For adds that safely wrap and use a negative immediate as operand 1, we
+ // create an equivalent instruction using a positive immediate.
+ // That positive immediate can then be zext along with all the other
+ // immediates later.
+ for (auto *I : *SafeWrap) {
+ if (I->getOpcode() != Instruction::Add)
+ continue;
- unsigned Opc = I->getOpcode();
- if (Opc != Instruction::Add && Opc != Instruction::Sub)
- continue;
+ LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
+ assert((isa<ConstantInt>(I->getOperand(1)) &&
+ cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
+ "Wrapping should have a negative immediate as the second operand");
- LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
- auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
- Builder.SetInsertPoint(I);
- Value *NewVal = Opc == Instruction::Sub ?
- Builder.CreateAdd(I->getOperand(0), NewConst) :
- Builder.CreateSub(I->getOperand(0), NewConst);
- LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
-
- if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
- NewInst->copyIRFlags(I);
- NewInsts.insert(NewInst);
- }
- InstsToRemove.insert(I);
- I->replaceAllUsesWith(NewVal);
- }
+ auto Const = cast<ConstantInt>(I->getOperand(1));
+ auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
+ Builder.SetInsertPoint(I);
+ Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
+ if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
+ NewInst->copyIRFlags(I);
+ NewInsts.insert(NewInst);
}
+ InstsToRemove.insert(I);
+ I->replaceAllUsesWith(NewVal);
+ LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
}
for (auto *I : NewInsts)
Visited->insert(I);
@@ -605,7 +587,7 @@ void IRPromoter::PromoteTree() {
if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
continue;
-
+
assert(EnableDSP && "DSP intrinisc insertion not enabled!");
// Replace unsafe instructions with appropriate intrinsic calls.
@@ -683,13 +665,14 @@ void IRPromoter::TruncateSinks() {
}
void IRPromoter::Cleanup() {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n");
// Some zexts will now have become redundant, along with their trunc
// operands, so remove them
for (auto V : *Visited) {
- if (!isa<CastInst>(V))
+ if (!isa<ZExtInst>(V))
continue;
- auto ZExt = cast<CastInst>(V);
+ auto ZExt = cast<ZExtInst>(V);
if (ZExt->getDestTy() != ExtTy)
continue;
@@ -701,9 +684,11 @@ void IRPromoter::Cleanup() {
continue;
}
- // For any truncs that we insert to handle zexts, we can replace the
- // result of the zext with the input to the trunc.
- if (NewInsts.count(Src) && isa<ZExtInst>(V) && isa<TruncInst>(Src)) {
+ // Unless they produce a value that is narrower than ExtTy, we can
+ // replace the result of the zext with the input of a newly inserted
+ // trunc.
+ if (NewInsts.count(Src) && isa<TruncInst>(Src) &&
+ Src->getType() == OrigTy) {
auto *Trunc = cast<TruncInst>(Src);
assert(Trunc->getOperand(0)->getType() == ExtTy &&
"expected inserted trunc to be operating on i32");
@@ -721,9 +706,12 @@ void IRPromoter::Cleanup() {
NewInsts.clear();
TruncTysMap.clear();
Promoted.clear();
+ SafeToPromote->clear();
+ SafeWrap->clear();
}
void IRPromoter::ConvertTruncs() {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n");
IRBuilder<> Builder{Ctx};
for (auto *V : *Visited) {
@@ -731,12 +719,13 @@ void IRPromoter::ConvertTruncs() {
continue;
auto *Trunc = cast<TruncInst>(V);
- assert(LessThanTypeSize(Trunc) && "expected narrow trunc");
-
Builder.SetInsertPoint(Trunc);
- unsigned NumBits =
- cast<IntegerType>(Trunc->getType())->getScalarSizeInBits();
- ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits));
+ IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
+ IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
+
+ unsigned NumBits = DestTy->getScalarSizeInBits();
+ ConstantInt *Mask =
+ ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
if (auto *I = dyn_cast<Instruction>(Masked))
@@ -747,10 +736,11 @@ void IRPromoter::ConvertTruncs() {
}
void IRPromoter::Mutate(Type *OrigTy,
- SmallPtrSetImpl<Value*> &Visited,
+ SetVector<Value*> &Visited,
SmallPtrSetImpl<Value*> &Sources,
SmallPtrSetImpl<Instruction*> &Sinks,
- SmallPtrSetImpl<Instruction*> &SafeToPromote) {
+ SmallPtrSetImpl<Instruction*> &SafeToPromote,
+ SmallPtrSetImpl<Instruction*> &SafeWrap) {
LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
<< ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
@@ -763,6 +753,7 @@ void IRPromoter::Mutate(Type *OrigTy,
this->Sources = &Sources;
this->Sinks = &Sinks;
this->SafeToPromote = &SafeToPromote;
+ this->SafeWrap = &SafeWrap;
// Cache original types of the values that will likely need truncating
for (auto *I : Sinks) {
@@ -778,22 +769,28 @@ void IRPromoter::Mutate(Type *OrigTy,
TruncTysMap[I].push_back(I->getOperand(i)->getType());
}
}
+ for (auto *V : Visited) {
+ if (!isa<TruncInst>(V) || Sources.count(V))
+ continue;
+ auto *Trunc = cast<TruncInst>(V);
+ TruncTysMap[Trunc].push_back(Trunc->getDestTy());
+ }
- // Convert adds and subs using negative immediates to equivalent instructions
- // that use positive constants.
- PrepareConstants();
+ // Convert adds using negative immediates to equivalent instructions that use
+ // positive constants.
+ PrepareWrappingAdds();
// Insert zext instructions between sources and their users.
ExtendSources();
- // Convert any truncs, that aren't sources, into AND masks.
- ConvertTruncs();
-
// Promote visited instructions, mutating their types in place. Also insert
// DSP intrinsics, if enabled, for adds and subs which would be unsafe to
// promote.
PromoteTree();
+ // Convert any truncs, that aren't sources, into AND masks.
+ ConvertTruncs();
+
// Insert trunc instructions for use by calls, stores etc...
TruncateSinks();
@@ -819,6 +816,11 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
return EqualTypeSize(I->getOperand(0));
}
+ if (GenerateSignBits(V)) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
+ return false;
+ }
+
// Memory instructions
if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
return true;
@@ -835,9 +837,6 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
isa<LoadInst>(V))
return isSupportedType(V);
- if (isa<SExtInst>(V))
- return false;
-
if (auto *Cast = dyn_cast<CastInst>(V))
return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
@@ -854,10 +853,6 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
if (!isSupportedType(V))
return false;
- if (generateSignBits(V)) {
- LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
- return false;
- }
return true;
}
@@ -873,7 +868,7 @@ bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
if (SafeToPromote.count(I))
return true;
- if (isPromotedResultSafe(V) || isSafeOverflow(I)) {
+ if (isPromotedResultSafe(V) || isSafeWrap(I)) {
SafeToPromote.insert(I);
return true;
}
@@ -911,6 +906,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
return false;
SafeToPromote.clear();
+ SafeWrap.clear();
if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
return false;
@@ -921,7 +917,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
SetVector<Value*> WorkList;
SmallPtrSet<Value*, 8> Sources;
SmallPtrSet<Instruction*, 4> Sinks;
- SmallPtrSet<Value*, 16> CurrentVisited;
+ SetVector<Value*> CurrentVisited;
WorkList.insert(V);
// Return true if V was added to the worklist as a supported instruction,
@@ -1009,7 +1005,8 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
if (ToPromote < 2)
return false;
- Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote);
+ Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote,
+ SafeWrap);
return true;
}