diff options
62 files changed, 1772 insertions, 469 deletions
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 82c30d39afd6..df7c951743c9 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -295,7 +295,7 @@ public:    }    /// Should we be emitting segmented stack stuff for the function -  bool shouldSplitStack(); +  bool shouldSplitStack() const;    /// getNumBlockIDs - Return the number of MBB ID's allocated.    /// diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 23816bde07c0..536fc656e8e2 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -369,6 +369,18 @@ public:      (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |      (NoSignedZeros << 6) | (AllowReciprocal << 7);    } + +  /// Clear any flags in this flag set that aren't also set in Flags. +  void intersectWith(const SDNodeFlags *Flags) { +    NoUnsignedWrap &= Flags->NoUnsignedWrap; +    NoSignedWrap &= Flags->NoSignedWrap; +    Exact &= Flags->Exact; +    UnsafeAlgebra &= Flags->UnsafeAlgebra; +    NoNaNs &= Flags->NoNaNs; +    NoInfs &= Flags->NoInfs; +    NoSignedZeros &= Flags->NoSignedZeros; +    AllowReciprocal &= Flags->AllowReciprocal; +  }  };  /// Represents one node in the SelectionDAG. @@ -682,6 +694,9 @@ public:    /// and directly, but it is not to avoid creating a vtable for this class.    const SDNodeFlags *getFlags() const; +  /// Clear any flags in this node that aren't also set in Flags. +  void intersectFlagsWith(const SDNodeFlags *Flags); +    /// Return the number of values defined/returned by this operator.    unsigned getNumValues() const { return NumValues; } diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index 4fa4e7daeab0..fa6469aa0ade 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -346,6 +346,10 @@ public:      return !(isDeclarationForLinker() || isWeakForLinker());    } +  // Returns true if the alignment of the value can be unilaterally +  // increased. +  bool canIncreaseAlignment() const; +    /// This method unlinks 'this' from the containing module, but does not delete    /// it.    virtual void removeFromParent() = 0; diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 911c6f14da0b..3ae01657a2ec 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -331,6 +331,25 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,  /// during lowering by the GC infrastructure.  bool callsGCLeafFunction(ImmutableCallSite CS); +//===----------------------------------------------------------------------===// +//  Intrinsic pattern matching +// + +/// Try and match a bitreverse or bswap idiom. +/// +/// If an idiom is matched, an intrinsic call is inserted before \c I. Any added +/// instructions are returned in \c InsertedInsts. They will all have been added +/// to a basic block. +/// +/// A bitreverse idiom normally requires around 2*BW nodes to be searched (where +/// BW is the bitwidth of the integer type). A bswap idiom requires anywhere up +/// to BW / 4 nodes to be searched, so is significantly faster. +/// +/// This function returns true on a successful match or false otherwise. +bool recognizeBitReverseOrBSwapIdiom( +    Instruction *I, bool MatchBSwaps, bool MatchBitReversals, +    SmallVectorImpl<Instruction *> &InsertedInsts); +  } // End llvm namespace  #endif diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 410a075aeb98..fc34f49a1255 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -125,8 +125,6 @@ private:    Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B);    // Math Library Optimizations -  Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, bool CheckRetType); -  Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B);    Value *optimizeCos(CallInst *CI, IRBuilder<> &B);    Value *optimizePow(CallInst *CI, IRBuilder<> &B);    Value *optimizeExp2(CallInst *CI, IRBuilder<> &B); diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index bbe53249a084..b60ab9151ef2 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -93,18 +93,7 @@ public:    /// variable, merge them by appending Next's values to the current    /// list of values.    /// Return true if the merge was successful. -  bool MergeValues(const DebugLocEntry &Next) { -    if (Begin == Next.Begin) { -      auto *Expr = cast_or_null<DIExpression>(Values[0].Expression); -      auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression); -      if (Expr->isBitPiece() && NextExpr->isBitPiece()) { -        addValues(Next.Values); -        End = Next.End; -        return true; -      } -    } -    return false; -  } +  bool MergeValues(const DebugLocEntry &Next);    /// \brief Attempt to merge this DebugLocEntry with Next and return    /// true if the merge was successful. Entries can be merged if they diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index a4fb07eacb3b..ae62b6b19a42 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -805,6 +805,24 @@ static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) {    return (l1 < r2) && (l2 < r1);  } +/// \brief If this and Next are describing different pieces of the same +/// variable, merge them by appending Next's values to the current +/// list of values. +/// Return true if the merge was successful. +bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) { +  if (Begin == Next.Begin) { +    auto *Expr = cast_or_null<DIExpression>(Values[0].Expression); +    auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression); +    if (Expr->isBitPiece() && NextExpr->isBitPiece() && +        !piecesOverlap(Expr, NextExpr)) { +      addValues(Next.Values); +      End = Next.End; +      return true; +    } +  } +  return false; +} +  /// Build the location list for all DBG_VALUEs in the function that  /// describe the same variable.  If the ranges of several independent  /// pieces of the same variable overlap partially, split them up and diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 03e57787307a..c8007a524e70 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1742,8 +1742,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {        // over-aligning global variables that have an explicit section is        // forbidden.        GlobalVariable *GV; -      if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->hasUniqueInitializer() && -          !GV->hasSection() && GV->getAlignment() < PrefAlign && +      if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() && +          GV->getAlignment() < PrefAlign &&            DL->getTypeAllocSize(GV->getType()->getElementType()) >=                MinSize + Offset2)          GV->setAlignment(PrefAlign); @@ -5211,6 +5211,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {    return false;  } +/// Given an OR instruction, check to see if this is a bitreverse +/// idiom. If so, insert the new intrinsic and return true. +static bool makeBitReverse(Instruction &I, const DataLayout &DL, +                           const TargetLowering &TLI) { +  if (!I.getType()->isIntegerTy() || +      !TLI.isOperationLegalOrCustom(ISD::BITREVERSE, +                                    TLI.getValueType(DL, I.getType(), true))) +    return false; + +  SmallVector<Instruction*, 4> Insts; +  if (!recognizeBitReverseOrBSwapIdiom(&I, false, true, Insts)) +    return false; +  Instruction *LastInst = Insts.back(); +  I.replaceAllUsesWith(LastInst); +  RecursivelyDeleteTriviallyDeadInstructions(&I); +  return true; +} +  // In this pass we look for GEP and cast instructions that are used  // across basic blocks and rewrite them to improve basic-block-at-a-time  // selection. @@ -5224,8 +5242,19 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {      if (ModifiedDT)        return true;    } -  MadeChange |= dupRetToEnableTailCallOpts(&BB); +  bool MadeBitReverse = true; +  while (TLI && MadeBitReverse) { +    MadeBitReverse = false; +    for (auto &I : reverse(BB)) { +      if (makeBitReverse(I, *DL, *TLI)) { +        MadeBitReverse = MadeChange = true; +        break; +      } +    } +  } +  MadeChange |= dupRetToEnableTailCallOpts(&BB); +      return MadeChange;  } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index ca4bb1c6ad49..f6604f38722a 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -163,7 +163,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {  }  /// Should we be emitting segmented stack stuff for the function -bool MachineFunction::shouldSplitStack() { +bool MachineFunction::shouldSplitStack() const {    return getFunction()->hasFnAttribute("split-stack");  } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 96bf914701c6..893871f94485 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -377,22 +377,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,    }  } -/// Add logical or fast math flag values to FoldingSetNodeID value. -static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode, -                           const SDNodeFlags *Flags) { -  if (!isBinOpWithFlags(Opcode)) -    return; - -  unsigned RawFlags = 0; -  if (Flags) -    RawFlags = Flags->getRawFlags(); -  ID.AddInteger(RawFlags); -} - -static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) { -  AddNodeIDFlags(ID, N->getOpcode(), N->getFlags()); -} -  static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,                            SDVTList VTList, ArrayRef<SDValue> OpList) {    AddNodeIDOpcode(ID, OpC); @@ -528,8 +512,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {    }    } // end switch (N->getOpcode()) -  AddNodeIDFlags(ID, N); -    // Target specific memory nodes could also have address spaces to check.    if (N->isTargetMemoryOpcode())      ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); @@ -851,6 +833,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,    AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);    AddNodeIDCustom(ID, N);    SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); +  if (Node) +    if (const SDNodeFlags *Flags = N->getFlags()) +      Node->intersectFlagsWith(Flags);    return Node;  } @@ -869,6 +854,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,    AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);    AddNodeIDCustom(ID, N);    SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); +  if (Node) +    if (const SDNodeFlags *Flags = N->getFlags()) +      Node->intersectFlagsWith(Flags);    return Node;  } @@ -886,6 +874,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,    AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);    AddNodeIDCustom(ID, N);    SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); +  if (Node) +    if (const SDNodeFlags *Flags = N->getFlags()) +      Node->intersectFlagsWith(Flags);    return Node;  } @@ -3892,10 +3883,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,      SDValue Ops[] = {N1, N2};      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTs, Ops); -    AddNodeIDFlags(ID, Opcode, Flags);      void *IP = nullptr; -    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { +      if (Flags) +        E->intersectFlagsWith(Flags);        return SDValue(E, 0); +    }      N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); @@ -6249,10 +6242,12 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,    if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTList, Ops); -    AddNodeIDFlags(ID, Opcode, Flags);      void *IP = nullptr; -    if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) { +      if (Flags) +        E->intersectFlagsWith(Flags);        return E; +    }    }    return nullptr;  } @@ -6948,6 +6943,11 @@ const SDNodeFlags *SDNode::getFlags() const {    return nullptr;  } +void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) { +  if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) +    FlagsNode->Flags.intersectWith(Flags); +} +  SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {    assert(N->getNumValues() == 1 &&           "Can't unroll a vector with multiple results!"); diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index 6159f93faf89..a61b62bd9687 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -12,11 +12,12 @@  //  //===----------------------------------------------------------------------===// -#include "llvm/IR/GlobalValue.h"  #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Triple.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h"  #include "llvm/IR/GlobalVariable.h"  #include "llvm/IR/Module.h"  #include "llvm/IR/Operator.h" @@ -134,6 +135,47 @@ bool GlobalValue::isDeclaration() const {    return false;  } +bool GlobalValue::canIncreaseAlignment() const { +  // Firstly, can only increase the alignment of a global if it +  // is a strong definition. +  if (!isStrongDefinitionForLinker()) +    return false; + +  // It also has to either not have a section defined, or, not have +  // alignment specified. (If it is assigned a section, the global +  // could be densely packed with other objects in the section, and +  // increasing the alignment could cause padding issues.) +  if (hasSection() && getAlignment() > 0) +    return false; + +  // On ELF platforms, we're further restricted in that we can't +  // increase the alignment of any variable which might be emitted +  // into a shared library, and which is exported. If the main +  // executable accesses a variable found in a shared-lib, the main +  // exe actually allocates memory for and exports the symbol ITSELF, +  // overriding the symbol found in the library. That is, at link +  // time, the observed alignment of the variable is copied into the +  // executable binary. (A COPY relocation is also generated, to copy +  // the initial data from the shadowed variable in the shared-lib +  // into the location in the main binary, before running code.) +  // +  // And thus, even though you might think you are defining the +  // global, and allocating the memory for the global in your object +  // file, and thus should be able to set the alignment arbitrarily, +  // that's not actually true. Doing so can cause an ABI breakage; an +  // executable might have already been built with the previous +  // alignment of the variable, and then assuming an increased +  // alignment will be incorrect. + +  // Conservatively assume ELF if there's no parent pointer. +  bool isELF = +      (!Parent || Triple(Parent->getTargetTriple()).isOSBinFormatELF()); +  if (isELF && hasDefaultVisibility() && !hasLocalLinkage()) +    return false; + +  return true; +} +  //===----------------------------------------------------------------------===//  // GlobalVariable Implementation  //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4ecfbe9e2280..9b73c5e9d952 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10133,6 +10133,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR(    const TargetInstrInfo *TII = Subtarget->getInstrInfo();    MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); +  MachineBasicBlock::iterator MBBI = Entry->begin();    for (const MCPhysReg *I = IStart; *I; ++I) {      const TargetRegisterClass *RC = nullptr;      if (AArch64::GPR64RegClass.contains(*I)) @@ -10152,13 +10153,13 @@ void AArch64TargetLowering::insertCopiesSplitCSR(                 Attribute::NoUnwind) &&             "Function should be nounwind in insertCopiesSplitCSR!");      Entry->addLiveIn(*I); -    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), -            NewVR) +    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)          .addReg(*I); +    // Insert the copy-back instructions right before the terminator.      for (auto *Exit : Exits) -      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), -              *I) +      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), +              TII->get(TargetOpcode::COPY), *I)            .addReg(NewVR);    }  } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index d26604f5765d..685907a2178e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -112,9 +112,21 @@ public:      MCELFStreamer::EmitInstruction(Inst, STI);    } +  /// Emit a 32-bit value as an instruction. This is only used for the .inst +  /// directive, EmitInstruction should be used in other cases.    void emitInst(uint32_t Inst) { +    char Buffer[4]; + +    // We can't just use EmitIntValue here, as that will emit a data mapping +    // symbol, and swap the endianness on big-endian systems (instructions are +    // always little-endian). +    for (unsigned I = 0; I < 4; ++I) { +      Buffer[I] = uint8_t(Inst); +      Inst >>= 8; +    } +      EmitA64MappingSymbol(); -    MCELFStreamer::EmitIntValue(Inst, 4); +    MCELFStreamer::EmitBytes(StringRef(Buffer, 4));    }    /// This is one of the functions used to emit data into an ELF section, so the diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 37c0795af283..978e99cf511e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12423,6 +12423,7 @@ void ARMTargetLowering::insertCopiesSplitCSR(    const TargetInstrInfo *TII = Subtarget->getInstrInfo();    MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); +  MachineBasicBlock::iterator MBBI = Entry->begin();    for (const MCPhysReg *I = IStart; *I; ++I) {      const TargetRegisterClass *RC = nullptr;      if (ARM::GPRRegClass.contains(*I)) @@ -12442,13 +12443,13 @@ void ARMTargetLowering::insertCopiesSplitCSR(                 Attribute::NoUnwind) &&             "Function should be nounwind in insertCopiesSplitCSR!");      Entry->addLiveIn(*I); -    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), -            NewVR) +    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)          .addReg(*I); +    // Insert the copy-back instructions right before the terminator.      for (auto *Exit : Exits) -      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), -              *I) +      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), +              TII->get(TargetOpcode::COPY), *I)            .addReg(NewVR);    }  } diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e8b96e74a7af..ed2e88067168 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -832,10 +832,10 @@ def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,                                               R8, R9, R10, R11)>;  // CSRs that are handled by prologue, epilogue. -def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>; +def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add RBP)>;  // CSRs that are handled explicitly via copies. -def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>; +def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>;  // All GPRs - except r11  def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 8632bb8254f9..7f8ce4768c00 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -2031,6 +2031,10 @@ void X86FrameLowering::adjustForSegmentedStacks(    unsigned TlsReg, TlsOffset;    DebugLoc DL; +  // To support shrink-wrapping we would need to insert the new blocks +  // at the right place and update the branches to PrologueMBB. +  assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); +    unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);    assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&           "Scratch register is live-in"); @@ -2271,6 +2275,11 @@ void X86FrameLowering::adjustForHiPEPrologue(      MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {    MachineFrameInfo *MFI = MF.getFrameInfo();    DebugLoc DL; + +  // To support shrink-wrapping we would need to insert the new blocks +  // at the right place and update the branches to PrologueMBB. +  assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); +    // HiPE-specific values    const unsigned HipeLeafWords = 24;    const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; @@ -2584,7 +2593,14 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {  bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {    // If we may need to emit frameless compact unwind information, give    // up as this is currently broken: PR25614. -  return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF); +  return (MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) && +         // The lowering of segmented stack and HiPE only support entry blocks +         // as prologue blocks: PR26107. +         // This limitation may be lifted if we fix: +         // - adjustForSegmentedStacks +         // - adjustForHiPEPrologue +         MF.getFunction()->getCallingConv() != CallingConv::HiPE && +         !MF.shouldSplitStack();  }  MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b723059f091d..6904714ec781 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -28908,6 +28908,7 @@ void X86TargetLowering::insertCopiesSplitCSR(    const TargetInstrInfo *TII = Subtarget->getInstrInfo();    MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); +  MachineBasicBlock::iterator MBBI = Entry->begin();    for (const MCPhysReg *I = IStart; *I; ++I) {      const TargetRegisterClass *RC = nullptr;      if (X86::GR64RegClass.contains(*I)) @@ -28925,13 +28926,13 @@ void X86TargetLowering::insertCopiesSplitCSR(                 Attribute::NoUnwind) &&             "Function should be nounwind in insertCopiesSplitCSR!");      Entry->addLiveIn(*I); -    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), -            NewVR) +    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)          .addReg(*I); +    // Insert the copy-back instructions right before the terminator.      for (auto *Exit : Exits) -      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), -              *I) +      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), +              TII->get(TargetOpcode::COPY), *I)            .addReg(NewVR);    }  } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 95c50d32c820..76cefd97cd8f 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -17,6 +17,7 @@  #include "llvm/IR/Intrinsics.h"  #include "llvm/IR/PatternMatch.h"  #include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/Transforms/Utils/Local.h"  using namespace llvm;  using namespace PatternMatch; @@ -1565,190 +1566,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {    return Changed ? &I : nullptr;  } - -/// Analyze the specified subexpression and see if it is capable of providing -/// pieces of a bswap or bitreverse. The subexpression provides a potential -/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in -/// the output of the expression came from a corresponding bit in some other -/// value. This function is recursive, and the end result is a mapping of -/// (value, bitnumber) to bitnumber. It is the caller's responsibility to -/// validate that all `value`s are identical and that the bitnumber to bitnumber -/// mapping is correct for a bswap or bitreverse. -/// -/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know -/// that the expression deposits the low byte of %X into the high byte of the -/// result and that all other bits are zero. This expression is accepted, -/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7]. -/// -/// This function returns true if the match was unsuccessful and false if so. -/// On entry to the function the "OverallLeftShift" is a signed integer value -/// indicating the number of bits that the subexpression is later shifted.  For -/// example, if the expression is later right shifted by 16 bits, the -/// OverallLeftShift value would be -16 on entry.  This is used to specify which -/// bits of BitValues are actually being set. -/// -/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding -/// bit is masked to zero by a user.  For example, in (X & 255), X will be -/// processed with a bytemask of 255. BitMask is always in the local -/// (OverallLeftShift) coordinate space. -/// -static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask, -                            SmallVectorImpl<Value *> &BitValues, -                            SmallVectorImpl<int> &BitProvenance) { -  if (Instruction *I = dyn_cast<Instruction>(V)) { -    // If this is an or instruction, it may be an inner node of the bswap. -    if (I->getOpcode() == Instruction::Or) -      return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, -                             BitValues, BitProvenance) || -             CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask, -                             BitValues, BitProvenance); - -    // If this is a logical shift by a constant, recurse with OverallLeftShift -    // and BitMask adjusted. -    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { -      unsigned ShAmt = -          cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); -      // Ensure the shift amount is defined. -      if (ShAmt > BitValues.size()) -        return true; - -      unsigned BitShift = ShAmt; -      if (I->getOpcode() == Instruction::Shl) { -        // X << C -> collect(X, +C) -        OverallLeftShift += BitShift; -        BitMask = BitMask.lshr(BitShift); -      } else { -        // X >>u C -> collect(X, -C) -        OverallLeftShift -= BitShift; -        BitMask = BitMask.shl(BitShift); -      } - -      if (OverallLeftShift >= (int)BitValues.size()) -        return true; -      if (OverallLeftShift <= -(int)BitValues.size()) -        return true; - -      return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, -                             BitValues, BitProvenance); -    } - -    // If this is a logical 'and' with a mask that clears bits, clear the -    // corresponding bits in BitMask. -    if (I->getOpcode() == Instruction::And && -        isa<ConstantInt>(I->getOperand(1))) { -      unsigned NumBits = BitValues.size(); -      APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); -      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); - -      for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) { -        // If this bit is masked out by a later operation, we don't care what -        // the and mask is. -        if (BitMask[i] == 0) -          continue; - -        // If the AndMask is zero for this bit, clear the bit. -        APInt MaskB = AndMask & Bit; -        if (MaskB == 0) { -          BitMask.clearBit(i); -          continue; -        } - -        // Otherwise, this bit is kept. -      } - -      return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, -                             BitValues, BitProvenance); -    } -  } - -  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be -  // the input value to the bswap/bitreverse. To be part of a bswap or -  // bitreverse we must be demanding a contiguous range of bits from it. -  unsigned InputBitLen = BitMask.countPopulation(); -  unsigned InputBitNo = BitMask.countTrailingZeros(); -  if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo != -      InputBitLen) -    // Not a contiguous set range of bits! -    return true; - -  // We know we're moving a contiguous range of bits from the input to the -  // output. Record which bits in the output came from which bits in the input. -  unsigned DestBitNo = InputBitNo + OverallLeftShift; -  for (unsigned I = 0; I < InputBitLen; ++I) -    BitProvenance[DestBitNo + I] = InputBitNo + I; - -  // If the destination bit value is already defined, the values are or'd -  // together, which isn't a bswap/bitreverse (unless it's an or of the same -  // bits). -  if (BitValues[DestBitNo] && BitValues[DestBitNo] != V) -    return true; -  for (unsigned I = 0; I < InputBitLen; ++I) -    BitValues[DestBitNo + I] = V; - -  return false; -} - -static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, -                                          unsigned BitWidth) { -  if (From % 8 != To % 8) -    return false; -  // Convert from bit indices to byte indices and check for a byte reversal. -  From >>= 3; -  To >>= 3; -  BitWidth >>= 3; -  return From == BitWidth - To - 1; -} - -static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, -                                               unsigned BitWidth) { -  return From == BitWidth - To - 1; -} -  /// Given an OR instruction, check to see if this is a bswap or bitreverse  /// idiom. If so, insert the new intrinsic and return it.  Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) { -  IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); -  if (!ITy) -    return nullptr;   // Can't do vectors. -  unsigned BW = ITy->getBitWidth(); -   -  /// We keep track of which bit (BitProvenance) inside which value (BitValues) -  /// defines each bit in the result. -  SmallVector<Value *, 8> BitValues(BW, nullptr); -  SmallVector<int, 8> BitProvenance(BW, -1); -   -  // Try to find all the pieces corresponding to the bswap. -  APInt BitMask = APInt::getAllOnesValue(BitValues.size()); -  if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance)) -    return nullptr; - -  // Check to see if all of the bits come from the same value. -  Value *V = BitValues[0]; -  if (!V) return nullptr;  // Didn't find a bit?  Must be zero. - -  if (!std::all_of(BitValues.begin(), BitValues.end(), -                   [&](const Value *X) { return X == V; })) -    return nullptr; - -  // Now, is the bit permutation correct for a bswap or a bitreverse? We can -  // only byteswap values with an even number of bytes. -  bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;; -  for (unsigned i = 0, e = BitValues.size(); i != e; ++i) { -    OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); -    OKForBitReverse &= -        bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); -  } - -  Intrinsic::ID Intrin; -  if (OKForBSwap) -    Intrin = Intrinsic::bswap; -  else if (OKForBitReverse) -    Intrin = Intrinsic::bitreverse; -  else +  SmallVector<Instruction*, 4> Insts; +  if (!recognizeBitReverseOrBSwapIdiom(&I, true, false, Insts))      return nullptr; +  Instruction *LastInst = Insts.pop_back_val(); +  LastInst->removeFromParent(); -  Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy); -  return CallInst::Create(F, V); +  for (auto *Inst : Insts) +    Worklist.Add(Inst); +  return LastInst;  }  /// We have an expression of the form (A&C)|(B&D).  Check if A is (cond?-1:0) diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 14574119b9a8..79282a2a703b 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -179,13 +179,244 @@ void LandingPadInliningInfo::forwardResume(    RI->eraseFromParent();  } +/// Helper for getUnwindDestToken/getUnwindDestTokenHelper. +static Value *getParentPad(Value *EHPad) { +  if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) +    return FPI->getParentPad(); +  return cast<CatchSwitchInst>(EHPad)->getParentPad(); +} + +typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy; + +/// Helper for getUnwindDestToken that does the descendant-ward part of +/// the search. +static Value *getUnwindDestTokenHelper(Instruction *EHPad, +                                       UnwindDestMemoTy &MemoMap) { +  SmallVector<Instruction *, 8> Worklist(1, EHPad); + +  while (!Worklist.empty()) { +    Instruction *CurrentPad = Worklist.pop_back_val(); +    // We only put pads on the worklist that aren't in the MemoMap.  When +    // we find an unwind dest for a pad we may update its ancestors, but +    // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, +    // so they should never get updated while queued on the worklist. +    assert(!MemoMap.count(CurrentPad)); +    Value *UnwindDestToken = nullptr; +    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { +      if (CatchSwitch->hasUnwindDest()) { +        UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); +      } else { +        // Catchswitch doesn't have a 'nounwind' variant, and one might be +        // annotated as "unwinds to caller" when really it's nounwind (see +        // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the +        // parent's unwind dest from this.  We can check its catchpads' +        // descendants, since they might include a cleanuppad with an +        // "unwinds to caller" cleanupret, which can be trusted. +        for (auto HI = CatchSwitch->handler_begin(), +                  HE = CatchSwitch->handler_end(); +             HI != HE && !UnwindDestToken; ++HI) { +          BasicBlock *HandlerBlock = *HI; +          auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); +          for (User *Child : CatchPad->users()) { +            // Intentionally ignore invokes here -- since the catchswitch is +            // marked "unwind to caller", it would be a verifier error if it +            // contained an invoke which unwinds out of it, so any invoke we'd +            // encounter must unwind to some child of the catch. +            if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) +              continue; + +            Instruction *ChildPad = cast<Instruction>(Child); +            auto Memo = MemoMap.find(ChildPad); +            if (Memo == MemoMap.end()) { +              // Haven't figure out this child pad yet; queue it. +              Worklist.push_back(ChildPad); +              continue; +            } +            // We've already checked this child, but might have found that +            // it offers no proof either way. +            Value *ChildUnwindDestToken = Memo->second; +            if (!ChildUnwindDestToken) +              continue; +            // We already know the child's unwind dest, which can either +            // be ConstantTokenNone to indicate unwind to caller, or can +            // be another child of the catchpad.  Only the former indicates +            // the unwind dest of the catchswitch. +            if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { +              UnwindDestToken = ChildUnwindDestToken; +              break; +            } +            assert(getParentPad(ChildUnwindDestToken) == CatchPad); +          } +        } +      } +    } else { +      auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); +      for (User *U : CleanupPad->users()) { +        if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { +          if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) +            UnwindDestToken = RetUnwindDest->getFirstNonPHI(); +          else +            UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); +          break; +        } +        Value *ChildUnwindDestToken; +        if (auto *Invoke = dyn_cast<InvokeInst>(U)) { +          ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); +        } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { +          Instruction *ChildPad = cast<Instruction>(U); +          auto Memo = MemoMap.find(ChildPad); +          if (Memo == MemoMap.end()) { +            // Haven't resolved this child yet; queue it and keep searching. +            Worklist.push_back(ChildPad); +            continue; +          } +          // We've checked this child, but still need to ignore it if it +          // had no proof either way. +          ChildUnwindDestToken = Memo->second; +          if (!ChildUnwindDestToken) +            continue; +        } else { +          // Not a relevant user of the cleanuppad +          continue; +        } +        // In a well-formed program, the child/invoke must either unwind to +        // an(other) child of the cleanup, or exit the cleanup.  In the +        // first case, continue searching. +        if (isa<Instruction>(ChildUnwindDestToken) && +            getParentPad(ChildUnwindDestToken) == CleanupPad) +          continue; +        UnwindDestToken = ChildUnwindDestToken; +        break; +      } +    } +    // If we haven't found an unwind dest for CurrentPad, we may have queued its +    // children, so move on to the next in the worklist. +    if (!UnwindDestToken) +      continue; + +    // Now we know that CurrentPad unwinds to UnwindDestToken.  It also exits +    // any ancestors of CurrentPad up to but not including UnwindDestToken's +    // parent pad.  Record this in the memo map, and check to see if the +    // original EHPad being queried is one of the ones exited. +    Value *UnwindParent; +    if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) +      UnwindParent = getParentPad(UnwindPad); +    else +      UnwindParent = nullptr; +    bool ExitedOriginalPad = false; +    for (Instruction *ExitedPad = CurrentPad; +         ExitedPad && ExitedPad != UnwindParent; +         ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { +      // Skip over catchpads since they just follow their catchswitches. +      if (isa<CatchPadInst>(ExitedPad)) +        continue; +      MemoMap[ExitedPad] = UnwindDestToken; +      ExitedOriginalPad |= (ExitedPad == EHPad); +    } + +    if (ExitedOriginalPad) +      return UnwindDestToken; + +    // Continue the search. +  } + +  // No definitive information is contained within this funclet. +  return nullptr; +} + +/// Given an EH pad, find where it unwinds.  If it unwinds to an EH pad, +/// return that pad instruction.  If it unwinds to caller, return +/// ConstantTokenNone.  If it does not have a definitive unwind destination, +/// return nullptr. +/// +/// This routine gets invoked for calls in funclets in inlinees when inlining +/// an invoke.  Since many funclets don't have calls inside them, it's queried +/// on-demand rather than building a map of pads to unwind dests up front. +/// Determining a funclet's unwind dest may require recursively searching its +/// descendants, and also ancestors and cousins if the descendants don't provide +/// an answer.  Since most funclets will have their unwind dest immediately +/// available as the unwind dest of a catchswitch or cleanupret, this routine +/// searches top-down from the given pad and then up. To avoid worst-case +/// quadratic run-time given that approach, it uses a memo map to avoid +/// re-processing funclet trees.  The callers that rewrite the IR as they go +/// take advantage of this, for correctness, by checking/forcing rewritten +/// pads' entries to match the original callee view. +static Value *getUnwindDestToken(Instruction *EHPad, +                                 UnwindDestMemoTy &MemoMap) { +  // Catchpads unwind to the same place as their catchswitch; +  // redirct any queries on catchpads so the code below can +  // deal with just catchswitches and cleanuppads. +  if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) +    EHPad = CPI->getCatchSwitch(); + +  // Check if we've already determined the unwind dest for this pad. +  auto Memo = MemoMap.find(EHPad); +  if (Memo != MemoMap.end()) +    return Memo->second; + +  // Search EHPad and, if necessary, its descendants. +  Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); +  assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); +  if (UnwindDestToken) +    return UnwindDestToken; + +  // No information is available for this EHPad from itself or any of its +  // descendants.  An unwind all the way out to a pad in the caller would +  // need also to agree with the unwind dest of the parent funclet, so +  // search up the chain to try to find a funclet with information.  Put +  // null entries in the memo map to avoid re-processing as we go up. +  MemoMap[EHPad] = nullptr; +  Instruction *LastUselessPad = EHPad; +  Value *AncestorToken; +  for (AncestorToken = getParentPad(EHPad); +       auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); +       AncestorToken = getParentPad(AncestorToken)) { +    // Skip over catchpads since they just follow their catchswitches. +    if (isa<CatchPadInst>(AncestorPad)) +      continue; +    assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); +    auto AncestorMemo = MemoMap.find(AncestorPad); +    if (AncestorMemo == MemoMap.end()) { +      UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); +    } else { +      UnwindDestToken = AncestorMemo->second; +    } +    if (UnwindDestToken) +      break; +    LastUselessPad = AncestorPad; +  } + +  // Since the whole tree under LastUselessPad has no information, it all must +  // match UnwindDestToken; record that to avoid repeating the search. +  SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); +  while (!Worklist.empty()) { +    Instruction *UselessPad = Worklist.pop_back_val(); +    assert(!MemoMap.count(UselessPad) || MemoMap[UselessPad] == nullptr); +    MemoMap[UselessPad] = UnwindDestToken; +    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { +      for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) +        for (User *U : HandlerBlock->getFirstNonPHI()->users()) +          if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) +            Worklist.push_back(cast<Instruction>(U)); +    } else { +      assert(isa<CleanupPadInst>(UselessPad)); +      for (User *U : UselessPad->users()) +        if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) +          Worklist.push_back(cast<Instruction>(U)); +    } +  } + +  return UnwindDestToken; +} +  /// When we inline a basic block into an invoke,  /// we have to turn all of the calls that can throw into invokes.  /// This function analyze BB to see if there are any calls, and if so,  /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI  /// nodes in that block with the values specified in InvokeDestPHIValues. -static BasicBlock * -HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { +static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( +    BasicBlock *BB, BasicBlock *UnwindEdge, +    UnwindDestMemoTy *FuncletUnwindMap = nullptr) {    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {      Instruction *I = &*BBI++; @@ -196,6 +427,31 @@ HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {      if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))        continue; +    if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { +      // This call is nested inside a funclet.  If that funclet has an unwind +      // destination within the inlinee, then unwinding out of this call would +      // be UB.  Rewriting this call to an invoke which targets the inlined +      // invoke's unwind dest would give the call's parent funclet multiple +      // unwind destinations, which is something that subsequent EH table +      // generation can't handle and that the veirifer rejects.  So when we +      // see such a call, leave it as a call. +      auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); +      Value *UnwindDestToken = +          getUnwindDestToken(FuncletPad, *FuncletUnwindMap); +      if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) +        continue; +#ifndef NDEBUG +      Instruction *MemoKey; +      if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) +        MemoKey = CatchPad->getCatchSwitch(); +      else +        MemoKey = FuncletPad; +      assert(FuncletUnwindMap->count(MemoKey) && +             (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && +             "must get memoized to avoid confusing later searches"); +#endif // NDEBUG +    } +      // Convert this function call into an invoke instruction.  First, split the      // basic block.      BasicBlock *Split = @@ -328,13 +584,23 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,    // This connects all the instructions which 'unwind to caller' to the invoke    // destination. +  UnwindDestMemoTy FuncletUnwindMap;    for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();         BB != E; ++BB) {      if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {        if (CRI->unwindsToCaller()) { -        CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI); +        auto *CleanupPad = CRI->getCleanupPad(); +        CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);          CRI->eraseFromParent();          UpdatePHINodes(&*BB); +        // Finding a cleanupret with an unwind destination would confuse +        // subsequent calls to getUnwindDestToken, so map the cleanuppad +        // to short-circuit any such calls and recognize this as an "unwind +        // to caller" cleanup. +        assert(!FuncletUnwindMap.count(CleanupPad) || +               isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); +        FuncletUnwindMap[CleanupPad] = +            ConstantTokenNone::get(Caller->getContext());        }      } @@ -345,12 +611,41 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,      Instruction *Replacement = nullptr;      if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {        if (CatchSwitch->unwindsToCaller()) { +        Value *UnwindDestToken; +        if (auto *ParentPad = +                dyn_cast<Instruction>(CatchSwitch->getParentPad())) { +          // This catchswitch is nested inside another funclet.  If that +          // funclet has an unwind destination within the inlinee, then +          // unwinding out of this catchswitch would be UB.  Rewriting this +          // catchswitch to unwind to the inlined invoke's unwind dest would +          // give the parent funclet multiple unwind destinations, which is +          // something that subsequent EH table generation can't handle and +          // that the veirifer rejects.  So when we see such a call, leave it +          // as "unwind to caller". +          UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); +          if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) +            continue; +        } else { +          // This catchswitch has no parent to inherit constraints from, and +          // none of its descendants can have an unwind edge that exits it and +          // targets another funclet in the inlinee.  It may or may not have a +          // descendant that definitively has an unwind to caller.  In either +          // case, we'll have to assume that any unwinds out of it may need to +          // be routed to the caller, so treat it as though it has a definitive +          // unwind to caller. +          UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); +        }          auto *NewCatchSwitch = CatchSwitchInst::Create(              CatchSwitch->getParentPad(), UnwindDest,              CatchSwitch->getNumHandlers(), CatchSwitch->getName(),              CatchSwitch);          for (BasicBlock *PadBB : CatchSwitch->handlers())            NewCatchSwitch->addHandler(PadBB); +        // Propagate info for the old catchswitch over to the new one in +        // the unwind map.  This also serves to short-circuit any subsequent +        // checks for the unwind dest of this catchswitch, which would get +        // confused if they found the outer handler in the callee. +        FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;          Replacement = NewCatchSwitch;        }      } else if (!isa<FuncletPadInst>(I)) { @@ -369,8 +664,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,      for (Function::iterator BB = FirstNewBlock->getIterator(),                              E = Caller->end();           BB != E; ++BB) -      if (BasicBlock *NewBB = -              HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest)) +      if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( +              &*BB, UnwindDest, &FuncletUnwindMap))          // Update any PHI nodes in the exceptional block to indicate that there          // is now a new entry in them.          UpdatePHINodes(NewBB); @@ -1415,6 +1710,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,      }    } +  // If we are inlining for an invoke instruction, we must make sure to rewrite +  // any call instructions into invoke instructions.  This is sensitive to which +  // funclet pads were top-level in the inlinee, so must be done before +  // rewriting the "parent pad" links. +  if (auto *II = dyn_cast<InvokeInst>(TheCall)) { +    BasicBlock *UnwindDest = II->getUnwindDest(); +    Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); +    if (isa<LandingPadInst>(FirstNonPHI)) { +      HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); +    } else { +      HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); +    } +  } +    // Update the lexical scopes of the new funclets and callsites.    // Anything that had 'none' as its parent is now nested inside the callsite's    // EHPad. @@ -1472,18 +1781,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,      }    } -  // If we are inlining for an invoke instruction, we must make sure to rewrite -  // any call instructions into invoke instructions. -  if (auto *II = dyn_cast<InvokeInst>(TheCall)) { -    BasicBlock *UnwindDest = II->getUnwindDest(); -    Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); -    if (isa<LandingPadInst>(FirstNonPHI)) { -      HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); -    } else { -      HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); -    } -  } -    // Handle any inlined musttail call sites.  In order for a new call site to be    // musttail, the source of the clone and the inlined call site must have been    // musttail.  Therefore it's safe to return without merging control into the diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index d2793e5ecb5b..abc9b65f7a39 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -944,37 +944,44 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {  static unsigned enforceKnownAlignment(Value *V, unsigned Align,                                        unsigned PrefAlign,                                        const DataLayout &DL) { +  assert(PrefAlign > Align); +    V = V->stripPointerCasts();    if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { +    // TODO: ideally, computeKnownBits ought to have used +    // AllocaInst::getAlignment() in its computation already, making +    // the below max redundant. But, as it turns out, +    // stripPointerCasts recurses through infinite layers of bitcasts, +    // while computeKnownBits is not allowed to traverse more than 6 +    // levels. +    Align = std::max(AI->getAlignment(), Align); +    if (PrefAlign <= Align) +      return Align; +      // If the preferred alignment is greater than the natural stack alignment      // then don't round up. This avoids dynamic stack realignment.      if (DL.exceedsNaturalStackAlignment(PrefAlign))        return Align; -    // If there is a requested alignment and if this is an alloca, round up. -    if (AI->getAlignment() >= PrefAlign) -      return AI->getAlignment();      AI->setAlignment(PrefAlign);      return PrefAlign;    }    if (auto *GO = dyn_cast<GlobalObject>(V)) { +    // TODO: as above, this shouldn't be necessary. +    Align = std::max(GO->getAlignment(), Align); +    if (PrefAlign <= Align) +      return Align; +      // If there is a large requested alignment and we can, bump up the alignment      // of the global.  If the memory we set aside for the global may not be the      // memory used by the final program then it is impossible for us to reliably      // enforce the preferred alignment. -    if (!GO->isStrongDefinitionForLinker()) +    if (!GO->canIncreaseAlignment())        return Align; -    if (GO->getAlignment() >= PrefAlign) -      return GO->getAlignment(); -    // We can only increase the alignment of the global if it has no alignment -    // specified or if it is not assigned a section.  If it is assigned a -    // section, the global could be densely packed with other objects in the -    // section, increasing the alignment could cause padding issues. -    if (!GO->hasSection() || GO->getAlignment() == 0) -      GO->setAlignment(PrefAlign); -    return GO->getAlignment(); +    GO->setAlignment(PrefAlign); +    return PrefAlign;    }    return Align; @@ -1585,3 +1592,205 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {    return false;  } + +/// A potential constituent of a bitreverse or bswap expression. See +/// collectBitParts for a fuller explanation. +struct BitPart { +  BitPart(Value *P, unsigned BW) : Provider(P) { +    Provenance.resize(BW); +  } + +  /// The Value that this is a bitreverse/bswap of. +  Value *Provider; +  /// The "provenance" of each bit. Provenance[A] = B means that bit A +  /// in Provider becomes bit B in the result of this expression. +  SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128. + +  enum { Unset = -1 }; +}; + +/// Analyze the specified subexpression and see if it is capable of providing +/// pieces of a bswap or bitreverse. The subexpression provides a potential +/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in +/// the output of the expression came from a corresponding bit in some other +/// value. This function is recursive, and the end result is a mapping of +/// bitnumber to bitnumber. It is the caller's responsibility to validate that +/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse. +/// +/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know +/// that the expression deposits the low byte of %X into the high byte of the +/// result and that all other bits are zero. This expression is accepted and a +/// BitPart is returned with Provider set to %X and Provenance[24-31] set to +/// [0-7]. +/// +/// To avoid revisiting values, the BitPart results are memoized into the +/// provided map. To avoid unnecessary copying of BitParts, BitParts are +/// constructed in-place in the \c BPS map. Because of this \c BPS needs to +/// store BitParts objects, not pointers. As we need the concept of a nullptr +/// BitParts (Value has been analyzed and the analysis failed), we an Optional +/// type instead to provide the same functionality. +/// +/// Because we pass around references into \c BPS, we must use a container that +/// does not invalidate internal references (std::map instead of DenseMap). +/// +static const Optional<BitPart> & +collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, +                std::map<Value *, Optional<BitPart>> &BPS) { +  auto I = BPS.find(V); +  if (I != BPS.end()) +    return I->second; + +  auto &Result = BPS[V] = None; +  auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + +  if (Instruction *I = dyn_cast<Instruction>(V)) { +    // If this is an or instruction, it may be an inner node of the bswap. +    if (I->getOpcode() == Instruction::Or) { +      auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, +                                MatchBitReversals, BPS); +      auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, +                                MatchBitReversals, BPS); +      if (!A || !B) +        return Result; + +      // Try and merge the two together. +      if (!A->Provider || A->Provider != B->Provider) +        return Result; + +      Result = BitPart(A->Provider, BitWidth); +      for (unsigned i = 0; i < A->Provenance.size(); ++i) { +        if (A->Provenance[i] != BitPart::Unset && +            B->Provenance[i] != BitPart::Unset && +            A->Provenance[i] != B->Provenance[i]) +          return Result = None; + +        if (A->Provenance[i] == BitPart::Unset) +          Result->Provenance[i] = B->Provenance[i]; +        else +          Result->Provenance[i] = A->Provenance[i]; +      } + +      return Result; +    } + +    // If this is a logical shift by a constant, recurse then shift the result. +    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { +      unsigned BitShift = +          cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); +      // Ensure the shift amount is defined. +      if (BitShift > BitWidth) +        return Result; + +      auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, +                                  MatchBitReversals, BPS); +      if (!Res) +        return Result; +      Result = Res; + +      // Perform the "shift" on BitProvenance. +      auto &P = Result->Provenance; +      if (I->getOpcode() == Instruction::Shl) { +        P.erase(std::prev(P.end(), BitShift), P.end()); +        P.insert(P.begin(), BitShift, BitPart::Unset); +      } else { +        P.erase(P.begin(), std::next(P.begin(), BitShift)); +        P.insert(P.end(), BitShift, BitPart::Unset); +      } + +      return Result; +    } + +    // If this is a logical 'and' with a mask that clears bits, recurse then +    // unset the appropriate bits. +    if (I->getOpcode() == Instruction::And && +        isa<ConstantInt>(I->getOperand(1))) { +      APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); +      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); + +      // Check that the mask allows a multiple of 8 bits for a bswap, for an +      // early exit. +      unsigned NumMaskedBits = AndMask.countPopulation(); +      if (!MatchBitReversals && NumMaskedBits % 8 != 0) +        return Result; +       +      auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, +                                  MatchBitReversals, BPS); +      if (!Res) +        return Result; +      Result = Res; + +      for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1) +        // If the AndMask is zero for this bit, clear the bit. +        if ((AndMask & Bit) == 0) +          Result->Provenance[i] = BitPart::Unset; + +      return Result; +    } +  } + +  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be +  // the input value to the bswap/bitreverse. +  Result = BitPart(V, BitWidth); +  for (unsigned i = 0; i < BitWidth; ++i) +    Result->Provenance[i] = i; +  return Result; +} + +static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, +                                          unsigned BitWidth) { +  if (From % 8 != To % 8) +    return false; +  // Convert from bit indices to byte indices and check for a byte reversal. +  From >>= 3; +  To >>= 3; +  BitWidth >>= 3; +  return From == BitWidth - To - 1; +} + +static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, +                                               unsigned BitWidth) { +  return From == BitWidth - To - 1; +} + +/// Given an OR instruction, check to see if this is a bitreverse +/// idiom. If so, insert the new intrinsic and return true. +bool llvm::recognizeBitReverseOrBSwapIdiom( +    Instruction *I, bool MatchBSwaps, bool MatchBitReversals, +    SmallVectorImpl<Instruction *> &InsertedInsts) { +  if (Operator::getOpcode(I) != Instruction::Or) +    return false; +  if (!MatchBSwaps && !MatchBitReversals) +    return false; +  IntegerType *ITy = dyn_cast<IntegerType>(I->getType()); +  if (!ITy || ITy->getBitWidth() > 128) +    return false;   // Can't do vectors or integers > 128 bits. +  unsigned BW = ITy->getBitWidth(); + +  // Try to find all the pieces corresponding to the bswap. +  std::map<Value *, Optional<BitPart>> BPS; +  auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS); +  if (!Res) +    return false; +  auto &BitProvenance = Res->Provenance; + +  // Now, is the bit permutation correct for a bswap or a bitreverse? We can +  // only byteswap values with an even number of bytes. +  bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true; +  for (unsigned i = 0; i < BW; ++i) { +    OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); +    OKForBitReverse &= +        bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); +  } + +  Intrinsic::ID Intrin; +  if (OKForBSwap && MatchBSwaps) +    Intrin = Intrinsic::bswap; +  else if (OKForBitReverse && MatchBitReversals) +    Intrin = Intrinsic::bitreverse; +  else +    return false; + +  Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); +  InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); +  return true; +} diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index dc0744060142..2f3c31128cf0 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -970,15 +970,34 @@ static Value *valueHasFloatPrecision(Value *Val) {    return nullptr;  } -//===----------------------------------------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' +/// Any floating-point library function that we're trying to simplify will have +/// a signature of the form: fptype foo(fptype param1, fptype param2, ...). +/// CheckDoubleTy indicates that 'fptype' must be 'double'. +static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams, +                                          bool CheckDoubleTy) { +  FunctionType *FT = F->getFunctionType(); +  if (FT->getNumParams() != NumParams) +    return false; + +  // The return type must match what we're looking for. +  Type *RetTy = FT->getReturnType(); +  if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy()) +    return false; + +  // Each parameter must match the return type, and therefore, match every other +  // parameter too. +  for (const Type *ParamTy : FT->params()) +    if (ParamTy != RetTy) +      return false; -Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, -                                                bool CheckRetType) { +  return true; +} + +/// Shrink double -> float for unary functions like 'floor'. +static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, +                                    bool CheckRetType) {    Function *Callee = CI->getCalledFunction(); -  FunctionType *FT = Callee->getFunctionType(); -  if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || -      !FT->getParamType(0)->isDoubleTy()) +  if (!matchesFPLibFunctionSignature(Callee, 1, true))      return nullptr;    if (CheckRetType) { @@ -1013,15 +1032,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,    return B.CreateFPExt(V, B.getDoubleTy());  } -// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax' -Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { +/// Shrink double -> float for binary functions like 'fmin/fmax'. +static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {    Function *Callee = CI->getCalledFunction(); -  FunctionType *FT = Callee->getFunctionType(); -  // Just make sure this has 2 arguments of the same FP type, which match the -  // result type. -  if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || -      FT->getParamType(0) != FT->getParamType(1) || -      !FT->getParamType(0)->isFloatingPointTy()) +  if (!matchesFPLibFunctionSignature(Callee, 2, true))      return nullptr;    // If this is something like 'fmin((double)floatval1, (double)floatval2)', @@ -1394,12 +1408,21 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {  Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {    Function *Callee = CI->getCalledFunction(); -   +    Value *Ret = nullptr;    if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||                                     Callee->getIntrinsicID() == Intrinsic::sqrt))      Ret = optimizeUnaryDoubleFP(CI, B, true); +  // FIXME: Refactor - this check is repeated all over this file and even in the +  // preceding call to shrink double -> float. + +  // Make sure this has 1 argument of FP type, which matches the result type. +  FunctionType *FT = Callee->getFunctionType(); +  if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || +      !FT->getParamType(0)->isFloatingPointTy()) +    return Ret; +    if (!CI->hasUnsafeAlgebra())      return Ret; diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll index a9ae00c8d270..9996c0d3aba8 100644 --- a/test/CodeGen/AArch64/cxx-tlscc.ll +++ b/test/CodeGen/AArch64/cxx-tlscc.ll @@ -8,6 +8,7 @@  @sg = internal thread_local global %struct.S zeroinitializer, align 1  @__dso_handle = external global i8  @__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4  declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)  declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) @@ -74,3 +75,29 @@ __tls_init.exit:  ; CHECK-NOT: ldp d27, d26  ; CHECK-NOT: ldp d29, d28  ; CHECK-NOT: ldp d31, d30 + +; CHECK-LABEL: _ZTW4sum1 +; CHECK-NOT: stp d31, d30 +; CHECK-NOT: stp d29, d28 +; CHECK-NOT: stp d27, d26 +; CHECK-NOT: stp d25, d24 +; CHECK-NOT: stp d23, d22 +; CHECK-NOT: stp d21, d20 +; CHECK-NOT: stp d19, d18 +; CHECK-NOT: stp d17, d16 +; CHECK-NOT: stp d7, d6 +; CHECK-NOT: stp d5, d4 +; CHECK-NOT: stp d3, d2 +; CHECK-NOT: stp d1, d0 +; CHECK-NOT: stp x20, x19 +; CHECK-NOT: stp x14, x13 +; CHECK-NOT: stp x12, x11 +; CHECK-NOT: stp x10, x9 +; CHECK-NOT: stp x8, x7 +; CHECK-NOT: stp x6, x5 +; CHECK-NOT: stp x4, x3 +; CHECK-NOT: stp x2, x1 +; CHECK: blr +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { +  ret i32* @sum1 +} diff --git a/test/CodeGen/ARM/cse-flags.ll b/test/CodeGen/ARM/cse-flags.ll new file mode 100644 index 000000000000..c18e2fcb6039 --- /dev/null +++ b/test/CodeGen/ARM/cse-flags.ll @@ -0,0 +1,43 @@ +; RUN: llc -asm-verbose=false < %s | FileCheck %s +; PR26063 + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7--linux-gnueabihf" + +; CHECK: .LBB0_1: +; CHECK-NEXT: bl      f{{$}} +; CHECK-NEXT: ldrb    r[[T0:[0-9]+]], [r{{[0-9]+}}, #1]!{{$}} +; CHECK-NEXT: cmp     r{{[0-9]+}}, #1{{$}} +; CHECK-NEXT: cmpne   r[[T0]], #0{{$}} +; CHECK-NEXT: bne     .LBB0_1{{$}} +define i8* @h(i8* readonly %a, i32 %b, i32 %c) { +entry: +  %0 = load i8, i8* %a, align 1 +  %tobool4 = icmp ne i8 %0, 0 +  %cmp5 = icmp ne i32 %b, 1 +  %1 = and i1 %cmp5, %tobool4 +  br i1 %1, label %while.body.preheader, label %while.end + +while.body.preheader:                             ; preds = %entry +  br label %while.body + +while.body:                                       ; preds = %while.body.preheader, %while.body +  %a.addr.06 = phi i8* [ %incdec.ptr, %while.body ], [ %a, %while.body.preheader ] +  %call = tail call i32 bitcast (i32 (...)* @f to i32 ()*)() +  %incdec.ptr = getelementptr inbounds i8, i8* %a.addr.06, i32 1 +  %2 = load i8, i8* %incdec.ptr, align 1 +  %tobool = icmp ne i8 %2, 0 +  %cmp = icmp ne i32 %call, 1 +  %3 = and i1 %cmp, %tobool +  br i1 %3, label %while.body, label %while.end.loopexit + +while.end.loopexit:                               ; preds = %while.body +  %incdec.ptr.lcssa = phi i8* [ %incdec.ptr, %while.body ] +  br label %while.end + +while.end:                                        ; preds = %while.end.loopexit, %entry +  %a.addr.0.lcssa = phi i8* [ %a, %entry ], [ %incdec.ptr.lcssa, %while.end.loopexit ] +  ret i8* %a.addr.0.lcssa +} + +declare i32 @f(...) diff --git a/test/CodeGen/ARM/cxx-tlscc.ll b/test/CodeGen/ARM/cxx-tlscc.ll index 7b776d4b8e88..11173bbb1978 100644 --- a/test/CodeGen/ARM/cxx-tlscc.ll +++ b/test/CodeGen/ARM/cxx-tlscc.ll @@ -8,6 +8,7 @@  @sg = internal thread_local global %struct.S zeroinitializer, align 1  @__dso_handle = external global i8  @__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4  declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)  declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) @@ -44,3 +45,13 @@ __tls_init.exit:  ; CHECK-NOT: pop {r9, r12}  ; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc}  ; CHECK: pop {lr} + +; CHECK-LABEL: _ZTW4sum1 +; CHECK-NOT: push {r1, r2, r3, r4, r7, lr} +; CHECK-NOT: push {r9, r12} +; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7} +; CHECK: blx +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { +  ret i32* @sum1 +} diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll index 66743f3e9d5e..46fef7629cc4 100644 --- a/test/CodeGen/ARM/memfunc.ll +++ b/test/CodeGen/ARM/memfunc.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS -; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN -; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI -; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI -; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI -; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI -; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI +; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS --check-prefix=CHECK +; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK  define void @f1(i8* %dest, i8* %src) {  entry: @@ -402,8 +402,8 @@ entry:  ; CHECK: arr1:  ; CHECK-IOS: .align 3  ; CHECK-DARWIN: .align 2 -; CHECK-EABI: .align 2 -; CHECK-GNUEABI: .align 2 +; CHECK-EABI-NOT: .align +; CHECK-GNUEABI-NOT: .align  ; CHECK: arr2:  ; CHECK: {{\.section.+foo,bar}}  ; CHECK-NOT: .align diff --git a/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll b/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll deleted file mode 100644 index 4580795880ab..000000000000 --- a/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s -; CHECK: addl - -; The two additions are the same , but have different flags. -; In theory this code should never be generated by the frontend, but this  -; tries to test that two identical instructions with two different flags -; actually generate two different nodes. -; -; Normally the combiner would see this condition without the flags  -; and optimize the result of the sub into a register clear -; (the final result would be 0). With the different flags though the combiner  -; needs to keep the add + sub nodes, because the two nodes result as different -; nodes and so cannot assume that the subtraction of the two nodes -; generates 0 as result -define i32 @foo(i32 %a, i32 %b) { -  %1 = add i32 %a, %b -  %2 = add nsw i32 %a, %b -  %3 = sub i32 %1, %2 -  ret i32 %3 -} diff --git a/test/CodeGen/X86/cxx_tlscc64.ll b/test/CodeGen/X86/cxx_tlscc64.ll index 70fe501040bf..6c8e45e42d15 100644 --- a/test/CodeGen/X86/cxx_tlscc64.ll +++ b/test/CodeGen/X86/cxx_tlscc64.ll @@ -4,11 +4,13 @@  ; tricks similar to AArch64 fast TLS calling convention (r255821).  ; Applying tricks on x86-64 similar to r255821.  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s --check-prefix=CHECK-O0  %struct.S = type { i8 }  @sg = internal thread_local global %struct.S zeroinitializer, align 1  @__dso_handle = external global i8  @__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4  declare void @_ZN1SC1Ev(%struct.S*)  declare void @_ZN1SD1Ev(%struct.S*) @@ -50,3 +52,28 @@ init.i:  __tls_init.exit:    ret %struct.S* @sg  } + +; CHECK-LABEL: _ZTW4sum1 +; CHECK-NOT: pushq %r11 +; CHECK-NOT: pushq %r10 +; CHECK-NOT: pushq %r9 +; CHECK-NOT: pushq %r8 +; CHECK-NOT: pushq %rsi +; CHECK-NOT: pushq %rdx +; CHECK-NOT: pushq %rcx +; CHECK-NOT: pushq %rbx +; CHECK: callq +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { +  ret i32* @sum1 +} + +; Make sure at O0 we don't overwrite RBP. +; CHECK-O0-LABEL: _ZTW4sum2 +; CHECK-O0: pushq %rbp +; CHECK-O0: movq %rsp, %rbp +; CHECK-O0-NOT: movq %r{{.*}}, (%rbp)  +define cxx_fast_tlscc i32* @_ZTW4sum2() #0 { +  ret i32* @sum1 +} + +attributes #0 = { nounwind "no-frame-pointer-elim"="true" } diff --git a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll index 7c00f407b1e0..eb87f7101d7c 100644 --- a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -1,11 +1,5 @@  ; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK  ; -; This test checks that we do not use shrink-wrapping when -; the function does not have any frame pointer and may unwind. -; This is a workaround for a limitation in the emission of -; the CFI directives, that are not correct in such case. -; PR25614 -;  ; Note: This test cannot be merged with the shrink-wrapping tests  ; because the booleans set on the command line take precedence on  ; the target logic that disable shrink-wrapping. @@ -13,6 +7,12 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"  target triple = "x86_64-apple-macosx" +; This test checks that we do not use shrink-wrapping when +; the function does not have any frame pointer and may unwind. +; This is a workaround for a limitation in the emission of +; the CFI directives, that are not correct in such case. +; PR25614 +;  ; No shrink-wrapping should occur here, until the CFI information are fixed.  ; CHECK-LABEL: framelessUnwind:  ; @@ -151,3 +151,74 @@ false:  }  attributes #2 = { "no-frame-pointer-elim"="false" nounwind } + + +; Check that we generate correct code for segmented stack. +; We used to emit the code at the entry point of the function +; instead of just before the prologue. +; For now, shrink-wrapping is disabled on segmented stack functions: PR26107. +; +; CHECK-LABEL: segmentedStack: +; CHECK: cmpq +; CHECK-NEXT: ja [[ENTRY_LABEL:LBB[0-9_]+]] +; +; CHECK: callq ___morestack +; CHECK-NEXT: retq +; +; CHECK: [[ENTRY_LABEL]]: +; Prologue +; CHECK: push +; +; In PR26107, we use to drop these two basic blocks, because +; the segmentedStack entry block was jumping directly to +; the place where the prologue is actually needed, which is +; the call to memcmp. +; Then, those two basic blocks did not have any predecessors +; anymore and were removed. +; +; Check if vk1 is null +; CHECK: testq %rdi, %rdi +; CHECK-NEXT: je [[STRINGS_EQUAL:LBB[0-9_]+]] +; +; Check if vk2 is null +; CHECK: testq %rsi, %rsi +; CHECK-NEXT:  je [[STRINGS_EQUAL]] +; +; CHECK: [[STRINGS_EQUAL]] +; CHECK-NEXT: popq +define zeroext i1 @segmentedStack(i8* readonly %vk1, i8* readonly %vk2, i64 %key_size) #5 { +entry: +  %cmp.i = icmp eq i8* %vk1, null +  %cmp1.i = icmp eq i8* %vk2, null +  %brmerge.i = or i1 %cmp.i, %cmp1.i +  %cmp1.mux.i = and i1 %cmp.i, %cmp1.i +  br i1 %brmerge.i, label %__go_ptr_strings_equal.exit, label %if.end4.i + +if.end4.i:                                        ; preds = %entry +  %tmp = getelementptr inbounds i8, i8* %vk1, i64 8 +  %tmp1 = bitcast i8* %tmp to i64* +  %tmp2 = load i64, i64* %tmp1, align 8 +  %tmp3 = getelementptr inbounds i8, i8* %vk2, i64 8 +  %tmp4 = bitcast i8* %tmp3 to i64* +  %tmp5 = load i64, i64* %tmp4, align 8 +  %cmp.i.i = icmp eq i64 %tmp2, %tmp5 +  br i1 %cmp.i.i, label %land.rhs.i.i, label %__go_ptr_strings_equal.exit + +land.rhs.i.i:                                     ; preds = %if.end4.i +  %tmp6 = bitcast i8* %vk2 to i8** +  %tmp7 = load i8*, i8** %tmp6, align 8 +  %tmp8 = bitcast i8* %vk1 to i8** +  %tmp9 = load i8*, i8** %tmp8, align 8 +  %call.i.i = tail call i32 @memcmp(i8* %tmp9, i8* %tmp7, i64 %tmp2) #5 +  %cmp4.i.i = icmp eq i32 %call.i.i, 0 +  br label %__go_ptr_strings_equal.exit + +__go_ptr_strings_equal.exit:                      ; preds = %land.rhs.i.i, %if.end4.i, %entry +  %retval.0.i = phi i1 [ %cmp1.mux.i, %entry ], [ false, %if.end4.i ], [ %cmp4.i.i, %land.rhs.i.i ] +  ret i1 %retval.0.i +} + +; Function Attrs: nounwind readonly +declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) #5 + +attributes #5 = { nounwind readonly ssp uwtable "split-stack" } diff --git a/test/DebugInfo/ARM/PR26163.ll b/test/DebugInfo/ARM/PR26163.ll new file mode 100644 index 000000000000..9ab0e35805c1 --- /dev/null +++ b/test/DebugInfo/ARM/PR26163.ll @@ -0,0 +1,107 @@ +; RUN: llc -filetype=obj -o - < %s | llvm-dwarfdump - | FileCheck %s +; +; Checks that we're creating two ranges, one that terminates immediately +; and one that spans the rest of the function. This isn't necessarily the +; best thing to do here (and also not necessarily correct, since the first +; one has a bit_piece), but it is what is currently being emitted, any +; change here needs to be intentional, so the test is very specific. +; +; CHECK: .debug_loc contents: +; CHECK: 0x00000000: Beginning address offset: 0x0000000000000004 +; CHECK:                Ending address offset: 0x0000000000000004 +; CHECK:                 Location description: 10 00 9f +; CHECK:             Beginning address offset: 0x0000000000000004 +; CHECK:                Ending address offset: 0x0000000000000014 +; CHECK:                 Location description: 10 00 9f + +; Created form the following test case (PR26163) with +; clang -cc1 -triple armv4t--freebsd11.0-gnueabi -emit-obj -debug-info-kind=standalone -O2 -x c test.c +; +; typedef	unsigned int	size_t; +; struct timeval { +; 	long long tv_sec; +; 	int tv_usec; +; }; +;  +; void *memset(void *, int, size_t); +; void foo(void); +;  +; static void +; bar(int value) +; { +; 	struct timeval lifetime; +;  +; 	memset(&lifetime, 0, sizeof(struct timeval)); +; 	lifetime.tv_sec = value; +;  +; 	foo(); +; } +;  +; int +; parse_config_file(void) +; { +; 	int value; +;  +; 	bar(value); +; 	return (0); +; } + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv4t--freebsd11.0-gnueabi" + +%struct.timeval = type { i64, i32 } + +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) + +declare void @foo() + +define i32 @parse_config_file() !dbg !4 { +entry: +  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !26), !dbg !27 +  tail call void @llvm.dbg.declare(metadata %struct.timeval* undef, metadata !16, metadata !26), !dbg !29 +  tail call void @llvm.dbg.value(metadata i64 0, i64 0, metadata !16, metadata !30), !dbg !29 +  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !16, metadata !31), !dbg !29 +  tail call void @foo() #3, !dbg !32 +  ret i32 0, !dbg !33 +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!22, !23, !24} +!llvm.ident = !{!25} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (https://github.com/llvm-mirror/clang 89dda3855cda574f355e6defa1d77bdae5053994) (llvm/trunk 257891)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3) +!1 = !DIFile(filename: "<stdin>", directory: "/home/ubuntu/bugs") +!2 = !{} +!3 = !{!4, !11} +!4 = distinct !DISubprogram(name: "parse_config_file", scope: !5, file: !5, line: 22, type: !6, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: true, variables: !9) +!5 = !DIFile(filename: "test.c", directory: "/home/ubuntu/bugs") +!6 = !DISubroutineType(types: !7) +!7 = !{!8} +!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !{!10} +!10 = !DILocalVariable(name: "value", scope: !4, file: !5, line: 24, type: !8) +!11 = distinct !DISubprogram(name: "bar", scope: !5, file: !5, line: 11, type: !12, isLocal: true, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, variables: !14) +!12 = !DISubroutineType(types: !13) +!13 = !{null, !8} +!14 = !{!15, !16} +!15 = !DILocalVariable(name: "value", arg: 1, scope: !11, file: !5, line: 11, type: !8) +!16 = !DILocalVariable(name: "lifetime", scope: !11, file: !5, line: 13, type: !17) +!17 = !DICompositeType(tag: DW_TAG_structure_type, name: "timeval", file: !5, line: 2, size: 128, align: 64, elements: !18) +!18 = !{!19, !21} +!19 = !DIDerivedType(tag: DW_TAG_member, name: "tv_sec", scope: !17, file: !5, line: 3, baseType: !20, size: 64, align: 64) +!20 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed) +!21 = !DIDerivedType(tag: DW_TAG_member, name: "tv_usec", scope: !17, file: !5, line: 4, baseType: !8, size: 32, align: 32, offset: 64) +!22 = !{i32 2, !"Debug Info Version", i32 3} +!23 = !{i32 1, !"wchar_size", i32 4} +!24 = !{i32 1, !"min_enum_size", i32 4} +!25 = !{!"clang version 3.9.0 (https://github.com/llvm-mirror/clang 89dda3855cda574f355e6defa1d77bdae5053994) (llvm/trunk 257891)"} +!26 = !DIExpression() +!27 = !DILocation(line: 11, scope: !11, inlinedAt: !28) +!28 = distinct !DILocation(line: 26, scope: !4) +!29 = !DILocation(line: 13, scope: !11, inlinedAt: !28) +!30 = !DIExpression(DW_OP_bit_piece, 0, 64) +!31 = !DIExpression(DW_OP_bit_piece, 0, 32) +!32 = !DILocation(line: 18, scope: !11, inlinedAt: !28) +!33 = !DILocation(line: 27, scope: !4) diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll index 7df88b1ec5e0..b91a0438a679 100644 --- a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll +++ b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll @@ -1,5 +1,5 @@  ; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  declare i32 @FB() diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll index d35418b19c7f..94938a86cba4 100644 --- a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll +++ b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll @@ -1,5 +1,5 @@  ; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  declare i32 @FB() diff --git a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll index 0d1a1ec6871a..72449f3af3ad 100644 --- a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  define i32 @bar() nounwind {  	ret i32 0 diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll index 31ed7523db43..31271b594c02 100644 --- a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32  ; This test should fail until remote symbol resolution is supported.  define i32 @main() nounwind { diff --git a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll index bbeab10cd788..9d1abbcf847c 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32  ; The intention of this test is to verify that symbols mapped to COMMON in ELF  ; work as expected. diff --git a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll index 0aa19b244c04..afa8a95f454d 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll @@ -1,5 +1,5 @@  ; RUN:  %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32  ; Check that a variable is always aligned as specified. diff --git a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll index 13bac29a3628..f9961593c7b9 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  define double @test(double* %DP, double %Arg) nounwind {  	%D = load double, double* %DP		; <double> [#uses=1] diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll index 5d5480e9d459..329dc5c83950 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  @count = global i32 1, align 4 diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll index ef74fa02e6a9..44557ea399b5 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll @@ -1,6 +1,6 @@  ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \  ; RUN:   -relocation-model=pic -code-model=small %s > /dev/null -; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32 +; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32  @count = global i32 1, align 4 diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll index c2260fc2f1ff..a249c2f097e1 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32  @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1  @ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4 diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll index 2a45472b25a1..281705383339 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll @@ -1,6 +1,6 @@  ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \  ; RUN:   -O0 -relocation-model=pic -code-model=small %s -; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32 +; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32  @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1  @ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll index 249aad2d4b48..6fbb2bc3c4bd 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll @@ -1,5 +1,5 @@  ; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  declare i32 @FB() diff --git a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll index 32c58ee6237b..ce094174134b 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll @@ -1,5 +1,5 @@  ; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  declare i32 @FB() diff --git a/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll index aaf3ebc9bc7f..bc477c285515 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  define i32 @bar() nounwind {  	ret i32 0 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll index a0d941049c4a..001a617b97a3 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32  ; This test should fail until remote symbol resolution is supported.  define i32 @main() nounwind { diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll index 9b4e2469665f..4c4256e45a35 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32  ; The intention of this test is to verify that symbols mapped to COMMON in ELF  ; work as expected. diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll index 88a561b613ef..1621501a31a1 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll @@ -1,5 +1,5 @@  ; RUN:  %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32  ; Check that a variable is always aligned as specified. diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll index 484541ab4807..6ff8704da1f9 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  define double @test(double* %DP, double %Arg) nounwind {  	%D = load double, double* %DP		; <double> [#uses=1] diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll index adc3e944b639..a7c8bfef938f 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32  @count = global i32 1, align 4 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll index 8ab3fd591388..a028df674648 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll @@ -1,6 +1,6 @@  ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \  ; RUN:   -relocation-model=pic -code-model=small %s > /dev/null -; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32 +; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32  @count = global i32 1, align 4 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll index a47c801e799b..d369d2b38498 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll @@ -1,5 +1,5 @@  ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32  @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1  @ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll index 210ac6f6ed1c..e918dabe1772 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll @@ -1,6 +1,6 @@  ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \  ; RUN:   -O0 -relocation-model=pic -code-model=small %s -; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32 +; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32  @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1  @ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4 diff --git a/test/MC/AArch64/inst-directive.s b/test/MC/AArch64/inst-directive.s index 3bb620f689d1..7fd5200b9e57 100644 --- a/test/MC/AArch64/inst-directive.s +++ b/test/MC/AArch64/inst-directive.s @@ -1,7 +1,14 @@  // RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=asm -o - \  // RUN:   | FileCheck %s --check-prefix=CHECK-ASM -// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=obj -o - \ -// RUN:   | llvm-readobj -s -sd | FileCheck %s  --check-prefix=CHECK-OBJ +// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=obj -o %t +// RUN: llvm-readobj -s -sd %t | FileCheck %s  --check-prefix=CHECK-OBJ +// RUN: llvm-objdump -t %t | FileCheck %s  --check-prefix=CHECK-SYMS + +// RUN: llvm-mc %s -triple=aarch64_be-none-linux-gnu -filetype=asm -o - \ +// RUN:   | FileCheck %s --check-prefix=CHECK-ASM +// RUN: llvm-mc %s -triple=aarch64_be-none-linux-gnu -filetype=obj -o %t +// RUN: llvm-readobj -s -sd %t | FileCheck %s  --check-prefix=CHECK-OBJ +// RUN: llvm-objdump -t %t | FileCheck %s  --check-prefix=CHECK-SYMS      .section    .inst.aarch64_inst @@ -22,3 +29,7 @@ aarch64_inst:  // CHECK-OBJ:   SectionData (  // CHECK-OBJ-NEXT: 0000: 2040105E  // CHECK-OBJ-NEXT: ) + +// CHECK-SYMS-NOT: 0000000000000000         .inst.aarch64_inst              00000000 $d +// CHECK-SYMS:     0000000000000000         .inst.aarch64_inst              00000000 $x +// CHECK-SYMS-NOT: 0000000000000000         .inst.aarch64_inst              00000000 $d diff --git a/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll b/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll new file mode 100644 index 000000000000..36440da21626 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -loop-unroll -codegenprepare < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7--linux-gnueabihf" + +; CHECK-LABEL: @f +define i32 @f(i32 %a) #0 { +; CHECK: call i32 @llvm.bitreverse.i32 +entry: +  br label %for.body + +for.cond.cleanup:                                 ; preds = %for.body +  ret i32 %or + +for.body:                                         ; preds = %for.body, %entry +  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] +  %b.07 = phi i32 [ 0, %entry ], [ %or, %for.body ] +  %shr = lshr i32 %a, %i.08 +  %and = and i32 %shr, 1 +  %sub = sub nuw nsw i32 31, %i.08 +  %shl = shl i32 %and, %sub +  %or = or i32 %shl, %b.07 +  %inc = add nuw nsw i32 %i.08, 1 +  %exitcond = icmp eq i32 %inc, 32 +  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !3 +} + +attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git b7441a0f42c43a8eea9e3e706be187252db747fa)"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.full"} diff --git a/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg b/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg new file mode 100644 index 000000000000..98c6700c209d --- /dev/null +++ b/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'ARM' in config.root.targets: +    config.unsupported = True + diff --git a/test/Transforms/CodeGenPrepare/bitreverse-hang.ll b/test/Transforms/CodeGenPrepare/bitreverse-hang.ll new file mode 100644 index 000000000000..c81dcc15cae9 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/bitreverse-hang.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-unroll -codegenprepare -S | FileCheck %s + +; This test is a worst-case scenario for bitreversal/byteswap detection. +; After loop unrolling (the unrolled loop is unreadably large so it has been kept +; rolled here), we have a binary tree of OR operands (as bitreversal detection +; looks straight through shifts): +; +;  OR +;  | \ +;  |  LSHR +;  | / +;  OR +;  | \ +;  |  LSHR +;  | / +;  OR +; +; This results in exponential runtime. The loop here is 32 iterations which will +; totally hang if we don't deal with this case cleverly. + +@b = common global i32 0, align 4 + +; CHECK: define i32 @fn1 +define i32 @fn1() #0 { +entry: +  %b.promoted = load i32, i32* @b, align 4, !tbaa !2 +  br label %for.body + +for.body:                                         ; preds = %for.body, %entry +  %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ] +  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] +  %shr = lshr i32 %or4, 1 +  %or = or i32 %shr, %or4 +  %inc = add nuw nsw i32 %i.03, 1 +  %exitcond = icmp eq i32 %inc, 32 +  br i1 %exitcond, label %for.end, label %for.body + +for.end:                                          ; preds = %for.body +  store i32 %or, i32* @b, align 4, !tbaa !2 +  ret i32 undef +} + +attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/test/Transforms/Inline/inline-funclets.ll b/test/Transforms/Inline/inline-funclets.ll new file mode 100644 index 000000000000..362e03d36a32 --- /dev/null +++ b/test/Transforms/Inline/inline-funclets.ll @@ -0,0 +1,455 @@ +; RUN: opt -inline -S %s | FileCheck %s + +declare void @g() + + +;;; Test with a call in a funclet that needs to remain a call +;;; when inlined because the funclet doesn't unwind to caller. +;;; CHECK-LABEL: define void @test1( +define void @test1() personality void ()* @g { +entry: +; CHECK-NEXT: entry: +  invoke void @test1_inlinee() +    to label %exit unwind label %cleanup +cleanup: +  %pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %pad) ] +  cleanupret from %pad unwind to caller +exit: +  ret void +} + +define void @test1_inlinee() alwaysinline personality void ()* @g { +entry: +  invoke void @g() +    to label %exit unwind label %cleanup.inner +; CHECK-NEXT:  invoke void @g() +; CHECK-NEXT:    unwind label %[[cleanup_inner:.+]] + +cleanup.inner: +  %pad.inner = cleanuppad within none [] +  call void @g() [ "funclet"(token %pad.inner) ] +  cleanupret from %pad.inner unwind label %cleanup.outer +; CHECK: [[cleanup_inner]]: +; The call here needs to remain a call becuase pad.inner has a cleanupret +; that stays within the inlinee. +; CHECK-NEXT:  %[[pad_inner:[^ ]+]] = cleanuppad within none +; CHECK-NEXT:  call void @g() [ "funclet"(token %[[pad_inner]]) ] +; CHECK-NEXT:  cleanupret from %[[pad_inner]] unwind label %[[cleanup_outer:.+]] + +cleanup.outer: +  %pad.outer = cleanuppad within none [] +  call void @g() [ "funclet"(token %pad.outer) ] +  cleanupret from %pad.outer unwind to caller +; CHECK: [[cleanup_outer]]: +; The call and cleanupret here need to be redirected to caller cleanup +; CHECK-NEXT: %[[pad_outer:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[pad_outer]]) ] +; CHECK-NEXT:   unwind label %cleanup +; CHECK: cleanupret from %[[pad_outer]] unwind label %cleanup{{$}} + +exit: +  ret void +} + + + +;;; Test with an "unwind to caller" catchswitch in a parent funclet +;;; that needs to remain "unwind to caller" because the parent +;;; doesn't unwind to caller. +;;; CHECK-LABEL: define void @test2( +define void @test2() personality void ()* @g { +entry: +; CHECK-NEXT: entry: +  invoke void @test2_inlinee() +    to label %exit unwind label %cleanup +cleanup: +  %pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %pad) ] +  cleanupret from %pad unwind to caller +exit: +  ret void +} + +define void @test2_inlinee() alwaysinline personality void ()* @g { +entry: +  invoke void @g() +    to label %exit unwind label %cleanup1 +; CHECK-NEXT:   invoke void @g() +; CHECK-NEXT:     unwind label %[[cleanup1:.+]] + +cleanup1: +  %outer = cleanuppad within none [] +  invoke void @g() [ "funclet"(token %outer) ] +    to label %ret1 unwind label %catchswitch +; CHECK: [[cleanup1]]: +; CHECK-NEXT: %[[outer:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[outer]]) ] +; CHECK-NEXT:   unwind label %[[catchswitch:.+]] + +catchswitch: +  %cs = catchswitch within %outer [label %catch] unwind to caller +; CHECK: [[catchswitch]]: +; The catchswitch here needs to remain "unwind to caller" since %outer +; has a cleanupret that remains within the inlinee. +; CHECK-NEXT: %[[cs:[^ ]+]] = catchswitch within %[[outer]] [label %[[catch:.+]]] unwind to caller + +catch: +  %inner = catchpad within %cs [] +  call void @g() [ "funclet"(token %inner) ] +  catchret from %inner to label %ret1 +; CHECK: [[catch]]: +; The call here needs to remain a call since it too is within %outer +; CHECK:   %[[inner:[^ ]+]] = catchpad within %[[cs]] +; CHECK-NEXT: call void @g() [ "funclet"(token %[[inner]]) ] + +ret1: +  cleanupret from %outer unwind label %cleanup2 +; CHECK: cleanupret from %[[outer]] unwind label %[[cleanup2:.+]] + +cleanup2: +  %later = cleanuppad within none [] +  cleanupret from %later unwind to caller +; CHECK: [[cleanup2]]: +; The cleanupret here needs to get redirected to the caller cleanup +; CHECK-NEXT: %[[later:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: cleanupret from %[[later]] unwind label %cleanup{{$}} + +exit: +  ret void +} + + +;;; Test with a call in a cleanup that has no definitive unwind +;;; destination, that must be rewritten to an invoke. +;;; CHECK-LABEL: define void @test3( +define void @test3() personality void ()* @g { +entry: +; CHECK-NEXT: entry: +  invoke void @test3_inlinee() +    to label %exit unwind label %cleanup +cleanup: +  %pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %pad) ] +  cleanupret from %pad unwind to caller +exit: +  ret void +} + +define void @test3_inlinee() alwaysinline personality void ()* @g { +entry: +  invoke void @g() +    to label %exit unwind label %cleanup +; CHECK-NEXT:  invoke void @g() +; CHECK-NEXT:    unwind label %[[cleanup:.+]] + +cleanup: +  %pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %pad) ] +  unreachable +; CHECK: [[cleanup]]: +; The call must be rewritten to an invoke targeting the caller cleanup +; because it may well unwind to there. +; CHECK-NEXT: %[[pad:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[pad]]) ] +; CHECK-NEXT:   unwind label %cleanup{{$}} + +exit: +  ret void +} + + +;;; Test with a catchswitch in a cleanup that has no definitive +;;; unwind destination, that must be rewritten to unwind to the +;;; inlined invoke's unwind dest +;;; CHECK-LABEL: define void @test4( +define void @test4() personality void ()* @g { +entry: +; CHECK-NEXT: entry: +  invoke void @test4_inlinee() +    to label %exit unwind label %cleanup +cleanup: +  %pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %pad) ] +  cleanupret from %pad unwind to caller +exit: +  ret void +} + +define void @test4_inlinee() alwaysinline personality void ()* @g { +entry: +  invoke void @g() +    to label %exit unwind label %cleanup +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT:   unwind label %[[cleanup:.+]] + +cleanup: +  %clean = cleanuppad within none [] +  invoke void @g() [ "funclet"(token %clean) ] +    to label %unreachable unwind label %dispatch +; CHECK: [[cleanup]]: +; CHECK-NEXT: %[[clean:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[clean]]) ] +; CHECK-NEXT:   unwind label %[[dispatch:.+]] + +dispatch: +  %cs = catchswitch within %clean [label %catch] unwind to caller +; CHECK: [[dispatch]]: +; The catchswitch must be rewritten to unwind to %cleanup in the caller +; because it may well unwind to there. +; CHECK-NEXT: %[[cs:[^ ]+]] = catchswitch within %[[clean]] [label %[[catch:.+]]] unwind label %cleanup{{$}} + +catch: +  catchpad within %cs [] +  br label %unreachable +unreachable: +  unreachable +exit: +  ret void +} + + +;;; Test with multiple levels of nesting, and unwind dests +;;; that need to be inferred from ancestors, descendants, +;;; and cousins. +;;; CHECK-LABEL: define void @test5( +define void @test5() personality void ()* @g { +entry: +; CHECK-NEXT: entry: +  invoke void @test5_inlinee() +    to label %exit unwind label %cleanup +cleanup: +  %pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %pad) ] +  cleanupret from %pad unwind to caller +exit: +  ret void +} + +define void @test5_inlinee() alwaysinline personality void ()* @g { +entry: +  invoke void @g() +    to label %cont unwind label %noinfo.root +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT:   to label %[[cont:[^ ]+]] unwind label %[[noinfo_root:.+]] + +noinfo.root: +  %noinfo.root.pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %noinfo.root.pad) ] +  invoke void @g() [ "funclet"(token %noinfo.root.pad) ] +    to label %noinfo.root.cont unwind label %noinfo.left +; CHECK: [[noinfo_root]]: +; Nothing under "noinfo.root" has a definitive unwind destination, so +; we must assume all of it may actually unwind, and redirect unwinds +; to the cleanup in the caller. +; CHECK-NEXT: %[[noinfo_root_pad:[^ ]+]] = cleanuppad within none [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ] +; CHECK-NEXT:   to label %[[next:[^ ]+]] unwind label %cleanup{{$}} +; CHECK: [[next]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ] +; CHECK-NEXT:   to label %[[noinfo_root_cont:[^ ]+]] unwind label %[[noinfo_left:.+]] + +noinfo.left: +  %noinfo.left.pad = cleanuppad within %noinfo.root.pad [] +  invoke void @g() [ "funclet"(token %noinfo.left.pad) ] +    to label %unreachable unwind label %noinfo.left.child +; CHECK: [[noinfo_left]]: +; CHECK-NEXT: %[[noinfo_left_pad:[^ ]+]] = cleanuppad within %[[noinfo_root_pad]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_left_pad]]) ] +; CHECK-NEXT:   unwind label %[[noinfo_left_child:.+]] + +noinfo.left.child: +  %noinfo.left.child.cs = catchswitch within %noinfo.left.pad [label %noinfo.left.child.catch] unwind to caller +; CHECK: [[noinfo_left_child]]: +; CHECK-NEXT: %[[noinfo_left_child_cs:[^ ]+]] = catchswitch within %[[noinfo_left_pad]] [label %[[noinfo_left_child_catch:[^ ]+]]] unwind label %cleanup{{$}} + +noinfo.left.child.catch: +  %noinfo.left.child.pad = catchpad within %noinfo.left.child.cs [] +  call void @g() [ "funclet"(token %noinfo.left.child.pad) ] +  br label %unreachable +; CHECK: [[noinfo_left_child_catch]]: +; CHECK-NEXT: %[[noinfo_left_child_pad:[^ ]+]] = catchpad within %[[noinfo_left_child_cs]] [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_left_child_pad]]) ] +; CHECK-NEXT:   unwind label %cleanup{{$}} + +noinfo.root.cont: +  invoke void @g() [ "funclet"(token %noinfo.root.pad) ] +    to label %unreachable unwind label %noinfo.right +; CHECK: [[noinfo_root_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ] +; CHECK-NEXT:   unwind label %[[noinfo_right:.+]] + +noinfo.right: +  %noinfo.right.cs = catchswitch within %noinfo.root.pad [label %noinfo.right.catch] unwind to caller +; CHECK: [[noinfo_right]]: +; CHECK-NEXT: %[[noinfo_right_cs:[^ ]+]] = catchswitch within %[[noinfo_root_pad]] [label %[[noinfo_right_catch:[^ ]+]]] unwind label %cleanup{{$}} + +noinfo.right.catch: +  %noinfo.right.pad = catchpad within %noinfo.right.cs [] +  invoke void @g() [ "funclet"(token %noinfo.right.pad) ] +    to label %unreachable unwind label %noinfo.right.child +; CHECK: [[noinfo_right_catch]]: +; CHECK-NEXT: %[[noinfo_right_pad:[^ ]+]] = catchpad within %[[noinfo_right_cs]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_right_pad]]) ] +; CHECK-NEXT:   unwind label %[[noinfo_right_child:.+]] + +noinfo.right.child: +  %noinfo.right.child.pad = cleanuppad within %noinfo.right.pad [] +  call void @g() [ "funclet"(token %noinfo.right.child.pad) ] +  br label %unreachable +; CHECK: [[noinfo_right_child]]: +; CHECK-NEXT: %[[noinfo_right_child_pad:[^ ]+]] = cleanuppad within %[[noinfo_right_pad]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_right_child_pad]]) ] +; CHECK-NEXT:   unwind label %cleanup{{$}} + +cont: +  invoke void @g() +    to label %exit unwind label %implicit.root +; CHECK: [[cont]]: +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT:   unwind label %[[implicit_root:.+]] + +implicit.root: +  %implicit.root.pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %implicit.root.pad) ] +  invoke void @g() [ "funclet"(token %implicit.root.pad) ] +    to label %implicit.root.cont unwind label %implicit.left +; CHECK: [[implicit_root]]: +; There's an unwind edge to %internal in implicit.right, and we need to propagate that +; fact down to implicit.right.grandchild, up to implicit.root, and down to +; implicit.left.child.catch, leaving all calls and "unwind to caller" catchswitches +; alone to so they don't conflict with the unwind edge in implicit.right +; CHECK-NEXT: %[[implicit_root_pad:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_root_pad]]) ] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_root_pad]]) ] +; CHECK-NEXT:   to label %[[implicit_root_cont:[^ ]+]] unwind label %[[implicit_left:.+]] + +implicit.left: +  %implicit.left.pad = cleanuppad within %implicit.root.pad [] +  invoke void @g() [ "funclet"(token %implicit.left.pad) ] +    to label %unreachable unwind label %implicit.left.child +; CHECK: [[implicit_left]]: +; CHECK-NEXT: %[[implicit_left_pad:[^ ]+]] = cleanuppad within %[[implicit_root_pad:[^ ]+]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_left_pad]]) ] +; CHECK-NEXT:   unwind label %[[implicit_left_child:.+]] + +implicit.left.child: +  %implicit.left.child.cs = catchswitch within %implicit.left.pad [label %implicit.left.child.catch] unwind to caller +; CHECK: [[implicit_left_child]]: +; CHECK-NEXT: %[[implicit_left_child_cs:[^ ]+]] = catchswitch within %[[implicit_left_pad]] [label %[[implicit_left_child_catch:[^ ]+]]] unwind to caller + +implicit.left.child.catch: +  %implicit.left.child.pad = catchpad within %implicit.left.child.cs [] +  call void @g() [ "funclet"(token %implicit.left.child.pad) ] +  br label %unreachable +; CHECK: [[implicit_left_child_catch]]: +; CHECK-NEXT: %[[implicit_left_child_pad:[^ ]+]] = catchpad within %[[implicit_left_child_cs]] +; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_left_child_pad]]) ] + +implicit.root.cont: +  invoke void @g() [ "funclet"(token %implicit.root.pad) ] +    to label %unreachable unwind label %implicit.right +; CHECK: [[implicit_root_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_root_pad]]) ] +; CHECK-NEXT:   unwind label %[[implicit_right:.+]] + +implicit.right: +  %implicit.right.cs = catchswitch within %implicit.root.pad [label %implicit.right.catch] unwind label %internal +; CHECK: [[implicit_right]]: +; This is the unwind edge (to %internal) whose existence needs to get propagated around the "implicit" tree +; CHECK-NEXT: %[[implicit_right_cs:[^ ]+]] = catchswitch within %[[implicit_root_pad]] [label %[[implicit_right_catch:[^ ]+]]] unwind label %[[internal:.+]] + +implicit.right.catch: +  %implicit.right.pad = catchpad within %implicit.right.cs [] +  invoke void @g() [ "funclet"(token %implicit.right.pad) ] +    to label %unreachable unwind label %implicit.right.child +; CHECK: [[implicit_right_catch]]: +; CHECK-NEXT: %[[implicit_right_pad:[^ ]+]] = catchpad within %[[implicit_right_cs]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_right_pad]]) ] +; CHECK-NEXT:   unwind label %[[implicit_right_child:.+]] + +implicit.right.child: +  %implicit.right.child.pad = cleanuppad within %implicit.right.pad [] +  invoke void @g() [ "funclet"(token %implicit.right.child.pad) ] +    to label %unreachable unwind label %implicit.right.grandchild +; CHECK: [[implicit_right_child]]: +; CHECK-NEXT: %[[implicit_right_child_pad:[^ ]+]] = cleanuppad within %[[implicit_right_pad]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_right_child_pad]]) ] +; CHECK-NEXT:   unwind label %[[implicit_right_grandchild:.+]] + +implicit.right.grandchild: +  %implicit.right.grandchild.cs = catchswitch within %implicit.right.child.pad [label %implicit.right.grandchild.catch] unwind to caller +; CHECK: [[implicit_right_grandchild]]: +; CHECK-NEXT: %[[implicit_right_grandchild_cs:[^ ]+]] = catchswitch within %[[implicit_right_child_pad]] [label %[[implicit_right_grandchild_catch:[^ ]+]]] unwind to caller + +implicit.right.grandchild.catch: +  %implicit.right.grandhcild.pad = catchpad within %implicit.right.grandchild.cs [] +  call void @g() [ "funclet"(token %implicit.right.grandhcild.pad) ] +  br label %unreachable +; CHECK: [[implicit_right_grandchild_catch]]: +; CHECK-NEXT: %[[implicit_right_grandhcild_pad:[^ ]+]] = catchpad within %[[implicit_right_grandchild_cs]] +; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_right_grandhcild_pad]]) ] + +internal: +  %internal.pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %internal.pad) ] +  cleanupret from %internal.pad unwind to caller +; CHECK: [[internal]]: +; internal is a cleanup with a "return to caller" cleanuppad; that needs to get redirected +; to %cleanup in the caller, and the call needs to get similarly rewritten to an invoke. +; CHECK-NEXT: %[[internal_pad:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %internal.pad.i) ] +; CHECK-NEXT:   to label %[[next:[^ ]+]] unwind label %cleanup{{$}} +; CHECK: [[next]]: +; CHECK-NEXT: cleanupret from %[[internal_pad]] unwind label %cleanup{{$}} + +unreachable: +  unreachable +exit: +  ret void +} + + +declare void @ProcessCLRException() + +; Make sure the logic doesn't get tripped up when the inlined invoke is +; itself within a funclet in the caller. +; CHECK-LABEL: define void @test6( +define void @test6() personality void ()* @ProcessCLRException { +entry: +  invoke void @g() +    to label %exit unwind label %callsite_parent +callsite_parent: +  %callsite_parent.pad = cleanuppad within none [] +; CHECK: %callsite_parent.pad = cleanuppad within none +  invoke void @test6_inlinee() [ "funclet"(token %callsite_parent.pad) ] +    to label %ret unwind label %cleanup +ret: +  cleanupret from %callsite_parent.pad unwind label %cleanup +cleanup: +  %pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %pad) ] +  cleanupret from %pad unwind to caller +exit: +  ret void +} + +define void @test6_inlinee() alwaysinline personality void ()* @ProcessCLRException { +entry: +  invoke void @g() +    to label %exit unwind label %inlinee_cleanup +; CHECK-NEXT: invoke void @g() [ "funclet"(token %callsite_parent.pad) ] +; CHECK-NEXT:   unwind label %[[inlinee_cleanup:.+]] + +inlinee_cleanup: +  %inlinee.pad = cleanuppad within none [] +  call void @g() [ "funclet"(token %inlinee.pad) ] +  unreachable +; CHECK: [[inlinee_cleanup]]: +; CHECK-NEXT: %[[inlinee_pad:[^ ]+]] = cleanuppad within %callsite_parent.pad +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[inlinee_pad]]) ] +; CHECK-NEXT:   unwind label %cleanup{{$}} + +exit: +  ret void +} diff --git a/test/Transforms/InstCombine/bitreverse-hang.ll b/test/Transforms/InstCombine/bitreverse-hang.ll new file mode 100644 index 000000000000..6823bd0ed653 --- /dev/null +++ b/test/Transforms/InstCombine/bitreverse-hang.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-unroll -instcombine -S | FileCheck %s + +; This test is a worst-case scenario for bitreversal/byteswap detection. +; After loop unrolling (the unrolled loop is unreadably large so it has been kept +; rolled here), we have a binary tree of OR operands (as bitreversal detection +; looks straight through shifts): +; +;  OR +;  | \ +;  |  LSHR +;  | / +;  OR +;  | \ +;  |  LSHR +;  | / +;  OR +; +; This results in exponential runtime. The loop here is 32 iterations which will +; totally hang if we don't deal with this case cleverly. + +@b = common global i32 0, align 4 + +; CHECK: define i32 @fn1 +define i32 @fn1() #0 { +entry: +  %b.promoted = load i32, i32* @b, align 4, !tbaa !2 +  br label %for.body + +for.body:                                         ; preds = %for.body, %entry +  %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ] +  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] +  %shr = lshr i32 %or4, 1 +  %or = or i32 %shr, %or4 +  %inc = add nuw nsw i32 %i.03, 1 +  %exitcond = icmp eq i32 %inc, 32 +  br i1 %exitcond, label %for.end, label %for.body + +for.end:                                          ; preds = %for.body +  store i32 %or, i32* @b, align 4, !tbaa !2 +  ret i32 undef +} + +attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/test/Transforms/InstCombine/bitreverse-recognize.ll b/test/Transforms/InstCombine/bitreverse-recognize.ll deleted file mode 100644 index fbd5cb6d139c..000000000000 --- a/test/Transforms/InstCombine/bitreverse-recognize.ll +++ /dev/null @@ -1,114 +0,0 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.10.0" - -define zeroext i8 @f_u8(i8 zeroext %a) { -; CHECK-LABEL: @f_u8 -; CHECK-NEXT: %[[A:.*]] = call i8 @llvm.bitreverse.i8(i8 %a) -; CHECK-NEXT: ret i8 %[[A]] -  %1 = shl i8 %a, 7 -  %2 = shl i8 %a, 5 -  %3 = and i8 %2, 64 -  %4 = shl i8 %a, 3 -  %5 = and i8 %4, 32 -  %6 = shl i8 %a, 1 -  %7 = and i8 %6, 16 -  %8 = lshr i8 %a, 1 -  %9 = and i8 %8, 8 -  %10 = lshr i8 %a, 3 -  %11 = and i8 %10, 4 -  %12 = lshr i8 %a, 5 -  %13 = and i8 %12, 2 -  %14 = lshr i8 %a, 7 -  %15 = or i8 %14, %1 -  %16 = or i8 %15, %3 -  %17 = or i8 %16, %5 -  %18 = or i8 %17, %7 -  %19 = or i8 %18, %9 -  %20 = or i8 %19, %11 -  %21 = or i8 %20, %13 -  ret i8 %21 -} - -; The ANDs with 32 and 64 have been swapped here, so the sequence does not -; completely match a bitreverse. -define zeroext i8 @f_u8_fail(i8 zeroext %a) { -; CHECK-LABEL: @f_u8_fail -; CHECK-NOT: call -; CHECK: ret i8 -  %1 = shl i8 %a, 7 -  %2 = shl i8 %a, 5 -  %3 = and i8 %2, 32 -  %4 = shl i8 %a, 3 -  %5 = and i8 %4, 64 -  %6 = shl i8 %a, 1 -  %7 = and i8 %6, 16 -  %8 = lshr i8 %a, 1 -  %9 = and i8 %8, 8 -  %10 = lshr i8 %a, 3 -  %11 = and i8 %10, 4 -  %12 = lshr i8 %a, 5 -  %13 = and i8 %12, 2 -  %14 = lshr i8 %a, 7 -  %15 = or i8 %14, %1 -  %16 = or i8 %15, %3 -  %17 = or i8 %16, %5 -  %18 = or i8 %17, %7 -  %19 = or i8 %18, %9 -  %20 = or i8 %19, %11 -  %21 = or i8 %20, %13 -  ret i8 %21 -} - -define zeroext i16 @f_u16(i16 zeroext %a) { -; CHECK-LABEL: @f_u16 -; CHECK-NEXT: %[[A:.*]] = call i16 @llvm.bitreverse.i16(i16 %a) -; CHECK-NEXT: ret i16 %[[A]] -  %1 = shl i16 %a, 15 -  %2 = shl i16 %a, 13 -  %3 = and i16 %2, 16384 -  %4 = shl i16 %a, 11 -  %5 = and i16 %4, 8192 -  %6 = shl i16 %a, 9 -  %7 = and i16 %6, 4096 -  %8 = shl i16 %a, 7 -  %9 = and i16 %8, 2048 -  %10 = shl i16 %a, 5 -  %11 = and i16 %10, 1024 -  %12 = shl i16 %a, 3 -  %13 = and i16 %12, 512 -  %14 = shl i16 %a, 1 -  %15 = and i16 %14, 256 -  %16 = lshr i16 %a, 1 -  %17 = and i16 %16, 128 -  %18 = lshr i16 %a, 3 -  %19 = and i16 %18, 64 -  %20 = lshr i16 %a, 5 -  %21 = and i16 %20, 32 -  %22 = lshr i16 %a, 7 -  %23 = and i16 %22, 16 -  %24 = lshr i16 %a, 9 -  %25 = and i16 %24, 8 -  %26 = lshr i16 %a, 11 -  %27 = and i16 %26, 4 -  %28 = lshr i16 %a, 13 -  %29 = and i16 %28, 2 -  %30 = lshr i16 %a, 15 -  %31 = or i16 %30, %1 -  %32 = or i16 %31, %3 -  %33 = or i16 %32, %5 -  %34 = or i16 %33, %7 -  %35 = or i16 %34, %9 -  %36 = or i16 %35, %11 -  %37 = or i16 %36, %13 -  %38 = or i16 %37, %15 -  %39 = or i16 %38, %17 -  %40 = or i16 %39, %19 -  %41 = or i16 %40, %21 -  %42 = or i16 %41, %23 -  %43 = or i16 %42, %25 -  %44 = or i16 %43, %27 -  %45 = or i16 %44, %29 -  ret i16 %45 -}
\ No newline at end of file diff --git a/test/Transforms/InstCombine/cos-2.ll b/test/Transforms/InstCombine/cos-2.ll index c9a9c7c07712..a85cc8fa6bde 100644 --- a/test/Transforms/InstCombine/cos-2.ll +++ b/test/Transforms/InstCombine/cos-2.ll @@ -1,12 +1,11 @@ -; Test that the cos library call simplifier works correctly. -;  ; RUN: opt < %s -instcombine -S | FileCheck %s  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"  declare float @cos(double) +declare signext i8 @sqrt(...) -; Check that cos functions with the wrong prototype aren't simplified. +; Check that functions with the wrong prototype aren't simplified.  define float @test_no_simplify1(double %d) {  ; CHECK-LABEL: @test_no_simplify1( @@ -15,3 +14,14 @@ define float @test_no_simplify1(double %d) {  ; CHECK: call float @cos(double %neg)    ret float %cos  } + + +define i8 @bogus_sqrt() { +  %fake_sqrt = call signext i8 (...) @sqrt() +  ret i8 %fake_sqrt + +; CHECK-LABEL: bogus_sqrt( +; CHECK-NEXT:  %fake_sqrt = call signext i8 (...) @sqrt() +; CHECK-NEXT:  ret i8 %fake_sqrt +} + diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll index 319ea3259830..74f3ebbf5230 100644 --- a/test/Transforms/InstCombine/double-float-shrink-1.ll +++ b/test/Transforms/InstCombine/double-float-shrink-1.ll @@ -364,6 +364,26 @@ define float @max1(float %a, float %b) {  ; CHECK-NEXT:  ret  } +; A function can have a name that matches a common libcall, +; but with the wrong type(s). Let it be. + +define float @fake_fmin(float %a, float %b) { +  %c = fpext float %a to fp128 +  %d = fpext float %b to fp128 +  %e = call fp128 @fmin(fp128 %c, fp128 %d) +  %f = fptrunc fp128 %e to float +  ret float %f + +; CHECK-LABEL: fake_fmin( +; CHECK-NEXT:  %c = fpext float %a to fp128 +; CHECK-NEXT:  %d = fpext float %b to fp128 +; CHECK-NEXT:  %e = call fp128 @fmin(fp128 %c, fp128 %d) +; CHECK-NEXT:  %f = fptrunc fp128 %e to float +; CHECK-NEXT:  ret float %f +} + +declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for. +  declare double @fmax(double, double)  declare double @tanh(double) #1 diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp index 67e7cbd7686a..a76ec11fb1da 100644 --- a/tools/lli/lli.cpp +++ b/tools/lli/lli.cpp @@ -16,6 +16,7 @@  #include "OrcLazyJIT.h"  #include "RemoteJITUtils.h"  #include "llvm/IR/LLVMContext.h" +#include "llvm/ADT/StringExtras.h"  #include "llvm/ADT/Triple.h"  #include "llvm/Bitcode/ReaderWriter.h"  #include "llvm/CodeGen/LinkAllCodegenComponents.h" @@ -741,11 +742,11 @@ std::unique_ptr<FDRPCChannel> launchRemote() {        ChildPath.reset(new char[ChildExecPath.size() + 1]);        std::copy(ChildExecPath.begin(), ChildExecPath.end(), &ChildPath[0]);        ChildPath[ChildExecPath.size()] = '\0'; -      std::string ChildInStr = std::to_string(PipeFD[0][0]); +      std::string ChildInStr = utostr(PipeFD[0][0]);        ChildIn.reset(new char[ChildInStr.size() + 1]);        std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]);        ChildIn[ChildInStr.size()] = '\0'; -      std::string ChildOutStr = std::to_string(PipeFD[1][1]); +      std::string ChildOutStr = utostr(PipeFD[1][1]);        ChildOut.reset(new char[ChildOutStr.size() + 1]);        std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]);        ChildOut[ChildOutStr.size()] = '\0'; diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh index fb50160f5fed..c3884ba31756 100755 --- a/utils/release/test-release.sh +++ b/utils/release/test-release.sh @@ -65,11 +65,6 @@ function usage() {      echo " -no-openmp           Disable check-out & build libomp"  } -if [ `uname -s` = "Darwin" ]; then -  # compiler-rt doesn't yet build with CMake on Darwin. -  use_autoconf="yes" -fi -  while [ $# -gt 0 ]; do      case $1 in          -release | --release ) @@ -288,10 +283,20 @@ function export_sources() {      if [ ! -h clang ]; then          ln -s ../../cfe.src clang      fi -    cd $BuildDir/llvm.src/tools/clang/tools -    if [ ! -h extra ]; then -        ln -s ../../../../clang-tools-extra.src extra + +    # The autoconf and CMake builds want different symlinks here: +    if [ "$use_autoconf" = "yes" ]; then +      cd $BuildDir/llvm.src/tools/clang/tools +      if [ ! -h extra ]; then +          ln -s ../../../../clang-tools-extra.src extra +      fi +    else +      cd $BuildDir/cfe.src/tools +      if [ ! -h extra ]; then +          ln -s ../../clang-tools-extra.src extra +      fi      fi +      cd $BuildDir/llvm.src/projects      if [ -d $BuildDir/test-suite.src ] && [ ! -h test-suite ]; then          ln -s ../../test-suite.src test-suite  | 
