diff options
Diffstat (limited to 'lib/Target/ARM/ARMCodeGenPrepare.cpp')
-rw-r--r-- | lib/Target/ARM/ARMCodeGenPrepare.cpp | 205 |
1 files changed, 101 insertions, 104 deletions
diff --git a/lib/Target/ARM/ARMCodeGenPrepare.cpp b/lib/Target/ARM/ARMCodeGenPrepare.cpp index b631c2bc687b..2fc5f4aaab50 100644 --- a/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ b/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -1,9 +1,8 @@ //===----- ARMCodeGenPrepare.cpp ------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -114,15 +113,20 @@ class IRPromoter { SmallPtrSet<Value*, 8> Promoted; Module *M = nullptr; LLVMContext &Ctx; + // The type we promote to: always i32 IntegerType *ExtTy = nullptr; + // The type of the value that the search began from, either i8 or i16. + // This defines the max range of the values that we allow in the promoted + // tree. IntegerType *OrigTy = nullptr; - SmallPtrSetImpl<Value*> *Visited; + SetVector<Value*> *Visited; SmallPtrSetImpl<Value*> *Sources; SmallPtrSetImpl<Instruction*> *Sinks; SmallPtrSetImpl<Instruction*> *SafeToPromote; + SmallPtrSetImpl<Instruction*> *SafeWrap; void ReplaceAllUsersOfWith(Value *From, Value *To); - void PrepareConstants(void); + void PrepareWrappingAdds(void); void ExtendSources(void); void ConvertTruncs(void); void PromoteTree(void); @@ -135,10 +139,11 @@ public: void Mutate(Type *OrigTy, - SmallPtrSetImpl<Value*> &Visited, + SetVector<Value*> &Visited, SmallPtrSetImpl<Value*> &Sources, SmallPtrSetImpl<Instruction*> &Sinks, - SmallPtrSetImpl<Instruction*> &SafeToPromote); + SmallPtrSetImpl<Instruction*> &SafeToPromote, + SmallPtrSetImpl<Instruction*> &SafeWrap); }; class ARMCodeGenPrepare : public FunctionPass { @@ -146,8 +151,9 @@ class ARMCodeGenPrepare : public FunctionPass { IRPromoter *Promoter = nullptr; std::set<Value*> AllVisited; SmallPtrSet<Instruction*, 8> SafeToPromote; + SmallPtrSet<Instruction*, 4> SafeWrap; - bool isSafeOverflow(Instruction *I); + bool isSafeWrap(Instruction *I); bool isSupportedValue(Value *V); bool isLegalToPromote(Value *V); bool TryToPromote(Value *V); @@ -172,13 +178,17 @@ public: } -static bool generateSignBits(Value *V) { +static bool GenerateSignBits(Value *V) { + if (auto *Arg = dyn_cast<Argument>(V)) + return Arg->hasSExtAttr(); + if (!isa<Instruction>(V)) return false; unsigned Opc = cast<Instruction>(V)->getOpcode(); return Opc == Instruction::AShr || Opc == Instruction::SDiv || - Opc == Instruction::SRem; + Opc == Instruction::SRem || Opc == Instruction::SExt || + Opc == Instruction::SIToFP; } static bool EqualTypeSize(Value *V) { @@ -271,19 +281,14 @@ static bool isSink(Value *V) { return isa<CallInst>(V); } -/// Return whether the instruction can be promoted within any modifications to -/// its operands or result. -bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) { - // FIXME Do we need NSW too? - if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap()) - return true; - - // We can support a, potentially, overflowing instruction (I) if: +/// Return whether this instruction can safely wrap. +bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) { + // We can support a, potentially, wrapping instruction (I) if: // - It is only used by an unsigned icmp. // - The icmp uses a constant. - // - The overflowing value (I) is decreasing, i.e would underflow - wrapping + // - The wrapping value (I) is decreasing, i.e would underflow - wrapping // around zero to become a larger number than before. - // - The underflowing instruction (I) also uses a constant. + // - The wrapping instruction (I) also uses a constant. // // We can then use the two constants to calculate whether the result would // wrap in respect to itself in the original bitwidth. If it doesn't wrap, @@ -327,7 +332,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) { // - (255 >= 254) == (0xFFFFFFFF >= 254) == true // // To demonstrate why we can't handle increasing values: - // + // // %add = add i8 %a, 2 // %cmp = icmp ult i8 %add, 127 // @@ -385,6 +390,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) { return false; LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n"); + SafeWrap.insert(I); return true; } @@ -408,13 +414,16 @@ static bool shouldPromote(Value *V) { /// Return whether we can safely mutate V's type to ExtTy without having to be /// concerned with zero extending or truncation. static bool isPromotedResultSafe(Value *V) { + if (GenerateSignBits(V)) + return false; + if (!isa<Instruction>(V)) return true; - if (generateSignBits(V)) - return false; + if (!isa<OverflowingBinaryOperator>(V)) + return true; - return !isa<OverflowingBinaryOperator>(V); + return cast<Instruction>(V)->hasNoUnsignedWrap(); } /// Return the intrinsic for the instruction that can perform the same @@ -462,61 +471,34 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { InstsToRemove.insert(I); } -void IRPromoter::PrepareConstants() { +void IRPromoter::PrepareWrappingAdds() { + LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n"); IRBuilder<> Builder{Ctx}; - // First step is to prepare the instructions for mutation. Most constants - // just need to be zero extended into their new type, but complications arise - // because: - // - For nuw binary operators, negative immediates would need sign extending; - // however, instead we'll change them to positive and zext them. We can do - // this because: - // > The operators that can wrap are: add, sub, mul and shl. - // > shl interprets its second operand as unsigned and if the first operand - // is an immediate, it will need zext to be nuw. - // > I'm assuming mul has to interpret immediates as unsigned for nuw. - // > Which leaves the nuw add and sub to be handled; as with shl, if an - // immediate is used as operand 0, it will need zext to be nuw. - // - We also allow add and sub to safely overflow in certain circumstances - // and only when the value (operand 0) is being decreased. - // - // For adds and subs, that are either nuw or safely wrap and use a negative - // immediate as operand 1, we create an equivalent instruction using a - // positive immediate. That positive immediate can then be zext along with - // all the other immediates later. - for (auto *V : *Visited) { - if (!isa<Instruction>(V)) - continue; - - auto *I = cast<Instruction>(V); - if (SafeToPromote->count(I)) { - - if (!isa<OverflowingBinaryOperator>(I)) - continue; - if (auto *Const = dyn_cast<ConstantInt>(I->getOperand(1))) { - if (!Const->isNegative()) - break; + // For adds that safely wrap and use a negative immediate as operand 1, we + // create an equivalent instruction using a positive immediate. + // That positive immediate can then be zext along with all the other + // immediates later. + for (auto *I : *SafeWrap) { + if (I->getOpcode() != Instruction::Add) + continue; - unsigned Opc = I->getOpcode(); - if (Opc != Instruction::Add && Opc != Instruction::Sub) - continue; + LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n"); + assert((isa<ConstantInt>(I->getOperand(1)) && + cast<ConstantInt>(I->getOperand(1))->isNegative()) && + "Wrapping should have a negative immediate as the second operand"); - LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n"); - auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); - Builder.SetInsertPoint(I); - Value *NewVal = Opc == Instruction::Sub ? - Builder.CreateAdd(I->getOperand(0), NewConst) : - Builder.CreateSub(I->getOperand(0), NewConst); - LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n"); - - if (auto *NewInst = dyn_cast<Instruction>(NewVal)) { - NewInst->copyIRFlags(I); - NewInsts.insert(NewInst); - } - InstsToRemove.insert(I); - I->replaceAllUsesWith(NewVal); - } + auto Const = cast<ConstantInt>(I->getOperand(1)); + auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); + Builder.SetInsertPoint(I); + Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst); + if (auto *NewInst = dyn_cast<Instruction>(NewVal)) { + NewInst->copyIRFlags(I); + NewInsts.insert(NewInst); } + InstsToRemove.insert(I); + I->replaceAllUsesWith(NewVal); + LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n"); } for (auto *I : NewInsts) Visited->insert(I); @@ -605,7 +587,7 @@ void IRPromoter::PromoteTree() { if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I)) continue; - + assert(EnableDSP && "DSP intrinisc insertion not enabled!"); // Replace unsafe instructions with appropriate intrinsic calls. @@ -683,13 +665,14 @@ void IRPromoter::TruncateSinks() { } void IRPromoter::Cleanup() { + LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n"); // Some zexts will now have become redundant, along with their trunc // operands, so remove them for (auto V : *Visited) { - if (!isa<CastInst>(V)) + if (!isa<ZExtInst>(V)) continue; - auto ZExt = cast<CastInst>(V); + auto ZExt = cast<ZExtInst>(V); if (ZExt->getDestTy() != ExtTy) continue; @@ -701,9 +684,11 @@ void IRPromoter::Cleanup() { continue; } - // For any truncs that we insert to handle zexts, we can replace the - // result of the zext with the input to the trunc. - if (NewInsts.count(Src) && isa<ZExtInst>(V) && isa<TruncInst>(Src)) { + // Unless they produce a value that is narrower than ExtTy, we can + // replace the result of the zext with the input of a newly inserted + // trunc. + if (NewInsts.count(Src) && isa<TruncInst>(Src) && + Src->getType() == OrigTy) { auto *Trunc = cast<TruncInst>(Src); assert(Trunc->getOperand(0)->getType() == ExtTy && "expected inserted trunc to be operating on i32"); @@ -721,9 +706,12 @@ void IRPromoter::Cleanup() { NewInsts.clear(); TruncTysMap.clear(); Promoted.clear(); + SafeToPromote->clear(); + SafeWrap->clear(); } void IRPromoter::ConvertTruncs() { + LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n"); IRBuilder<> Builder{Ctx}; for (auto *V : *Visited) { @@ -731,12 +719,13 @@ void IRPromoter::ConvertTruncs() { continue; auto *Trunc = cast<TruncInst>(V); - assert(LessThanTypeSize(Trunc) && "expected narrow trunc"); - Builder.SetInsertPoint(Trunc); - unsigned NumBits = - cast<IntegerType>(Trunc->getType())->getScalarSizeInBits(); - ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits)); + IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType()); + IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]); + + unsigned NumBits = DestTy->getScalarSizeInBits(); + ConstantInt *Mask = + ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask); if (auto *I = dyn_cast<Instruction>(Masked)) @@ -747,10 +736,11 @@ void IRPromoter::ConvertTruncs() { } void IRPromoter::Mutate(Type *OrigTy, - SmallPtrSetImpl<Value*> &Visited, + SetVector<Value*> &Visited, SmallPtrSetImpl<Value*> &Sources, SmallPtrSetImpl<Instruction*> &Sinks, - SmallPtrSetImpl<Instruction*> &SafeToPromote) { + SmallPtrSetImpl<Instruction*> &SafeToPromote, + SmallPtrSetImpl<Instruction*> &SafeWrap) { LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << ARMCodeGenPrepare::TypeSize << " to 32-bits\n"); @@ -763,6 +753,7 @@ void IRPromoter::Mutate(Type *OrigTy, this->Sources = &Sources; this->Sinks = &Sinks; this->SafeToPromote = &SafeToPromote; + this->SafeWrap = &SafeWrap; // Cache original types of the values that will likely need truncating for (auto *I : Sinks) { @@ -778,22 +769,28 @@ void IRPromoter::Mutate(Type *OrigTy, TruncTysMap[I].push_back(I->getOperand(i)->getType()); } } + for (auto *V : Visited) { + if (!isa<TruncInst>(V) || Sources.count(V)) + continue; + auto *Trunc = cast<TruncInst>(V); + TruncTysMap[Trunc].push_back(Trunc->getDestTy()); + } - // Convert adds and subs using negative immediates to equivalent instructions - // that use positive constants. - PrepareConstants(); + // Convert adds using negative immediates to equivalent instructions that use + // positive constants. + PrepareWrappingAdds(); // Insert zext instructions between sources and their users. ExtendSources(); - // Convert any truncs, that aren't sources, into AND masks. - ConvertTruncs(); - // Promote visited instructions, mutating their types in place. Also insert // DSP intrinsics, if enabled, for adds and subs which would be unsafe to // promote. PromoteTree(); + // Convert any truncs, that aren't sources, into AND masks. + ConvertTruncs(); + // Insert trunc instructions for use by calls, stores etc... TruncateSinks(); @@ -819,6 +816,11 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) { return EqualTypeSize(I->getOperand(0)); } + if (GenerateSignBits(V)) { + LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n"); + return false; + } + // Memory instructions if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V)) return true; @@ -835,9 +837,6 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) { isa<LoadInst>(V)) return isSupportedType(V); - if (isa<SExtInst>(V)) - return false; - if (auto *Cast = dyn_cast<CastInst>(V)) return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0)); @@ -854,10 +853,6 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) { if (!isSupportedType(V)) return false; - if (generateSignBits(V)) { - LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n"); - return false; - } return true; } @@ -873,7 +868,7 @@ bool ARMCodeGenPrepare::isLegalToPromote(Value *V) { if (SafeToPromote.count(I)) return true; - if (isPromotedResultSafe(V) || isSafeOverflow(I)) { + if (isPromotedResultSafe(V) || isSafeWrap(I)) { SafeToPromote.insert(I); return true; } @@ -911,6 +906,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) { return false; SafeToPromote.clear(); + SafeWrap.clear(); if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V)) return false; @@ -921,7 +917,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) { SetVector<Value*> WorkList; SmallPtrSet<Value*, 8> Sources; SmallPtrSet<Instruction*, 4> Sinks; - SmallPtrSet<Value*, 16> CurrentVisited; + SetVector<Value*> CurrentVisited; WorkList.insert(V); // Return true if V was added to the worklist as a supported instruction, @@ -1009,7 +1005,8 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) { if (ToPromote < 2) return false; - Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote); + Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote, + SafeWrap); return true; } |