diff options
100 files changed, 1115 insertions, 1533 deletions
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index ee55c52df6e7..dae8e758c7cf 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -334,11 +334,6 @@ function(llvm_add_library name)          PREFIX ""          )      endif() -    if (MSVC) -      set_target_properties(${name} -        PROPERTIES -        IMPORT_SUFFIX ".imp") -    endif ()    endif()    if(ARG_MODULE OR ARG_SHARED) diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index f27fb0277085..107f9d297d0f 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -391,7 +391,8 @@ During this release the MIPS target has reached a few major milestones. The  compiler has gained support for MIPS-II and MIPS-III; become ABI-compatible  with GCC for big and little endian O32, N32, and N64; and is now able to  compile the Linux kernel for 32-bit targets. Additionally, LLD now supports -microMIPS for the O32 ABI on little endian targets. +microMIPS for the O32 ABI on little endian targets, and code generation for +microMIPS is almost completely passing the test-suite.  ABI  ^^^ diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index f0d0b2dbcdbc..584ce65c9d17 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -66,7 +66,6 @@ struct LandingPadInfo {    MachineBasicBlock *LandingPadBlock;    // Landing pad block.    SmallVector<MCSymbol*, 1> BeginLabels; // Labels prior to invoke.    SmallVector<MCSymbol*, 1> EndLabels;   // Labels after invoke. -  SmallVector<MCSymbol*, 1> ClauseLabels; // Labels for each clause.    MCSymbol *LandingPadLabel;             // Label at beginning of landing pad.    const Function *Personality;           // Personality function.    std::vector<int> TypeIds;              // List of type ids (filters negative) @@ -331,11 +330,6 @@ public:    ///    void addCleanup(MachineBasicBlock *LandingPad); -  /// Add a clause for a landing pad. Returns a new label for the clause. This -  /// is used by EH schemes that have more than one landing pad. In this case, -  /// each clause gets its own basic block. -  MCSymbol *addClauseForLandingPad(MachineBasicBlock *LandingPad); -    /// getTypeIDFor - Return the type id for the specified typeinfo.  This is    /// function wide.    unsigned getTypeIDFor(const GlobalValue *TI); diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h index eceb790c547d..5c0e9859915f 100644 --- a/include/llvm/CodeGen/RegAllocPBQP.h +++ b/include/llvm/CodeGen/RegAllocPBQP.h @@ -248,7 +248,7 @@ public:    void setReductionState(ReductionState RS) { this->RS = RS; }    void handleAddEdge(const MatrixMetadata& MD, bool Transpose) { -    DeniedOpts += Transpose ? MD.getWorstCol() : MD.getWorstRow(); +    DeniedOpts += Transpose ? MD.getWorstRow() : MD.getWorstCol();      const bool* UnsafeOpts =        Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();      for (unsigned i = 0; i < NumOpts; ++i) @@ -256,7 +256,7 @@ public:    }    void handleRemoveEdge(const MatrixMetadata& MD, bool Transpose) { -    DeniedOpts -= Transpose ? MD.getWorstCol() : MD.getWorstRow(); +    DeniedOpts -= Transpose ? MD.getWorstRow() : MD.getWorstCol();      const bool* UnsafeOpts =        Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();      for (unsigned i = 0; i < NumOpts; ++i) diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h index 3bf6d38d311d..27a5d6f80041 100644 --- a/include/llvm/IR/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -693,6 +693,7 @@ public:    static AAMDNodes getMostGenericAA(const AAMDNodes &A, const AAMDNodes &B);    static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B);    static MDNode *getMostGenericRange(MDNode *A, MDNode *B); +  static MDNode *getMostGenericAliasScope(MDNode *A, MDNode *B);  };  /// \brief Uniquable metadata node. diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 58ac5d33409e..86e7fc21b950 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -52,7 +52,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {    const TargetTransformInfo &TTI;    /// The cache of @llvm.assume intrinsics. -  AssumptionCache &AC; +  AssumptionCacheTracker *ACT;    // The called function.    Function &F; @@ -146,8 +146,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {  public:    CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI, -               AssumptionCache &AC, Function &Callee, int Threshold) -      : DL(DL), TTI(TTI), AC(AC), F(Callee), Threshold(Threshold), Cost(0), +               AssumptionCacheTracker *ACT, Function &Callee, int Threshold) +      : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),          IsCallerRecursive(false), IsRecursiveCall(false),          ExposesReturnsTwice(false), HasDynamicAlloca(false),          ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), @@ -783,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {    // during devirtualization and so we want to give it a hefty bonus for    // inlining, but cap that bonus in the event that inlining wouldn't pan    // out. Pretend to inline the function, with a custom threshold. -  CallAnalyzer CA(DL, TTI, AC, *F, InlineConstants::IndirectCallThreshold); +  CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold);    if (CA.analyzeCall(CS)) {      // We were able to inline the indirect call! Subtract the cost from the      // bonus we want to apply, but don't go below zero. @@ -1110,7 +1110,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {    // the ephemeral values multiple times (and they're completely determined by    // the callee, so this is purely duplicate work).    SmallPtrSet<const Value *, 32> EphValues; -  CodeMetrics::collectEphemeralValues(&F, &AC, EphValues); +  CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);    // The worklist of live basic blocks in the callee *after* inlining. We avoid    // adding basic blocks of the callee which can be proven to be dead for this @@ -1310,7 +1310,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,          << "...\n");    CallAnalyzer CA(Callee->getDataLayout(), *TTI, -                  ACT->getAssumptionCache(*Callee), *Callee, Threshold); +                  ACT, *Callee, Threshold);    bool ShouldInline = CA.analyzeCall(CS);    DEBUG(CA.dump()); diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 085ce920139b..ff8955870cb5 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -623,8 +623,8 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {      N.TBAA = getMetadata(LLVMContext::MD_tbaa);    if (Merge) -    N.Scope = -        MDNode::intersect(N.Scope, getMetadata(LLVMContext::MD_alias_scope)); +    N.Scope = MDNode::getMostGenericAliasScope( +        N.Scope, getMetadata(LLVMContext::MD_alias_scope));    else      N.Scope = getMetadata(LLVMContext::MD_alias_scope); diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index 9b3acb5ca0a9..868fbf010db3 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -9,9 +9,11 @@  #include "llvm-c/BitReader.h"  #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/DiagnosticPrinter.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/Module.h"  #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h"  #include <cstring>  #include <string> @@ -30,11 +32,20 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,                                     LLVMMemoryBufferRef MemBuf,                                     LLVMModuleRef *OutModule,                                     char **OutMessage) { -  ErrorOr<Module *> ModuleOrErr = -      parseBitcodeFile(unwrap(MemBuf)->getMemBufferRef(), *unwrap(ContextRef)); -  if (std::error_code EC = ModuleOrErr.getError()) { -    if (OutMessage) -      *OutMessage = strdup(EC.message().c_str()); +  MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef(); +  LLVMContext &Ctx = *unwrap(ContextRef); + +  std::string Message; +  raw_string_ostream Stream(Message); +  DiagnosticPrinterRawOStream DP(Stream); + +  ErrorOr<Module *> ModuleOrErr = parseBitcodeFile( +      Buf, Ctx, [&](const DiagnosticInfo &DI) { DI.print(DP); }); +  if (ModuleOrErr.getError()) { +    if (OutMessage) { +      Stream.flush(); +      *OutMessage = strdup(Message.c_str()); +    }      *OutModule = wrap((Module*)nullptr);      return 1;    } diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 1bc86f6c222a..f112120c1abc 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -121,8 +121,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,        for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {          int TypeID = TypeIds[J];          assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); -        int ValueForTypeID = -            isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID; +        int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;          unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);          int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; @@ -270,14 +269,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,          CallSiteEntry Site = {            BeginLabel,            LastLabel, -          LandingPad, +          LandingPad->LandingPadLabel,            FirstActions[P.PadIndex]          };          // Try to merge with the previous call-site. SJLJ doesn't do this          if (PreviousIsInvoke && !IsSJLJ) {            CallSiteEntry &Prev = CallSites.back(); -          if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) { +          if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {              // Extend the range of the previous entry.              Prev.EndLabel = Site.EndLabel;              continue; @@ -577,15 +576,15 @@ void EHStreamer::emitExceptionTable() {        // Offset of the landing pad, counted in 16-byte bundles relative to the        // @LPStart address. -      if (!S.LPad) { +      if (!S.PadLabel) {          if (VerboseAsm)            Asm->OutStreamer.AddComment("    has no landing pad");          Asm->OutStreamer.EmitIntValue(0, 4/*size*/);        } else {          if (VerboseAsm)            Asm->OutStreamer.AddComment(Twine("    jumps to ") + -                                      S.LPad->LandingPadLabel->getName()); -        Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4); +                                      S.PadLabel->getName()); +        Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);        }        // Offset of the first associated action record, relative to the start of @@ -682,7 +681,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {      unsigned TypeID = *I;      if (VerboseAsm) {        --Entry; -      if (isFilterEHSelector(TypeID)) +      if (TypeID != 0)          Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));      } diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 9b316ff00e9b..e93055ce8655 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -23,8 +23,6 @@ class MachineModuleInfo;  class MachineInstr;  class MachineFunction;  class AsmPrinter; -class MCSymbol; -class MCSymbolRefExpr;  template <typename T>  class SmallVectorImpl; @@ -62,11 +60,11 @@ protected:    /// Structure describing an entry in the call-site table.    struct CallSiteEntry {      // The 'try-range' is BeginLabel .. EndLabel. -    MCSymbol *BeginLabel; // Null indicates the start of the function. -    MCSymbol *EndLabel;   // Null indicates the end of the function. +    MCSymbol *BeginLabel; // zero indicates the start of the function. +    MCSymbol *EndLabel;   // zero indicates the end of the function. -    // LPad contains the landing pad start labels. -    const LandingPadInfo *LPad; // Null indicates that there is no landing pad. +    // The landing pad starts at PadLabel. +    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.      unsigned Action;    }; @@ -114,13 +112,6 @@ protected:    virtual void emitTypeInfos(unsigned TTypeEncoding); -  // Helpers for for identifying what kind of clause an EH typeid or selector -  // corresponds to. Negative selectors are for filter clauses, the zero -  // selector is for cleanups, and positive selectors are for catch clauses. -  static bool isFilterEHSelector(int Selector) { return Selector < 0; } -  static bool isCleanupEHSelector(int Selector) { return Selector == 0; } -  static bool isCatchEHSelector(int Selector) { return Selector > 0; } -  public:    EHStreamer(AsmPrinter *A);    virtual ~EHStreamer(); diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 2138cb9514f8..0f0ad755835d 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -99,156 +99,9 @@ void Win64Exception::endFunction(const MachineFunction *) {    if (shouldEmitPersonality) {      Asm->OutStreamer.PushSection(); - -    // Emit an UNWIND_INFO struct describing the prologue.      Asm->OutStreamer.EmitWinEHHandlerData(); - -    // Emit either MSVC-compatible tables or the usual Itanium-style LSDA after -    // the UNWIND_INFO struct. -    if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::MSVC) { -      const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; -      if (Per->getName() == "__C_specific_handler") -        emitCSpecificHandlerTable(); -      else -        report_fatal_error(Twine("unexpected personality function: ") + -                           Per->getName()); -    } else { -      emitExceptionTable(); -    } - +    emitExceptionTable();      Asm->OutStreamer.PopSection();    }    Asm->OutStreamer.EmitWinCFIEndProc();  } - -const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) { -  return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32, -                                 Asm->OutContext); -} - -/// Emit the language-specific data that __C_specific_handler expects.  This -/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning -/// up after faults with __try, __except, and __finally.  The typeinfo values -/// are not really RTTI data, but pointers to filter functions that return an -/// integer (1, 0, or -1) indicating how to handle the exception. For __finally -/// blocks and other cleanups, the landing pad label is zero, and the filter -/// function is actually a cleanup handler with the same prototype.  A catch-all -/// entry is modeled with a null filter function field and a non-zero landing -/// pad label. -/// -/// Possible filter function return values: -///   EXCEPTION_EXECUTE_HANDLER (1): -///     Jump to the landing pad label after cleanups. -///   EXCEPTION_CONTINUE_SEARCH (0): -///     Continue searching this table or continue unwinding. -///   EXCEPTION_CONTINUE_EXECUTION (-1): -///     Resume execution at the trapping PC. -/// -/// Inferred table structure: -///   struct Table { -///     int NumEntries; -///     struct Entry { -///       imagerel32 LabelStart; -///       imagerel32 LabelEnd; -///       imagerel32 FilterOrFinally;  // Zero means catch-all. -///       imagerel32 LabelLPad;        // Zero means __finally. -///     } Entries[NumEntries]; -///   }; -void Win64Exception::emitCSpecificHandlerTable() { -  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); - -  // Simplifying assumptions for first implementation: -  // - Cleanups are not implemented. -  // - Filters are not implemented. - -  // The Itanium LSDA table sorts similar landing pads together to simplify the -  // actions table, but we don't need that. -  SmallVector<const LandingPadInfo *, 64> LandingPads; -  LandingPads.reserve(PadInfos.size()); -  for (const auto &LP : PadInfos) -    LandingPads.push_back(&LP); - -  // Compute label ranges for call sites as we would for the Itanium LSDA, but -  // use an all zero action table because we aren't using these actions. -  SmallVector<unsigned, 64> FirstActions; -  FirstActions.resize(LandingPads.size()); -  SmallVector<CallSiteEntry, 64> CallSites; -  computeCallSiteTable(CallSites, LandingPads, FirstActions); - -  MCSymbol *EHFuncBeginSym = -      Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); -  MCSymbol *EHFuncEndSym = -      Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); - -  // Emit the number of table entries. -  unsigned NumEntries = 0; -  for (const CallSiteEntry &CSE : CallSites) { -    if (!CSE.LPad) -      continue; // Ignore gaps. -    for (int Selector : CSE.LPad->TypeIds) { -      // Ignore C++ filter clauses in SEH. -      // FIXME: Implement cleanup clauses. -      if (isCatchEHSelector(Selector)) -        ++NumEntries; -    } -  } -  Asm->OutStreamer.EmitIntValue(NumEntries, 4); - -  // Emit the four-label records for each call site entry. The table has to be -  // sorted in layout order, and the call sites should already be sorted. -  for (const CallSiteEntry &CSE : CallSites) { -    // Ignore gaps. Unlike the Itanium model, unwinding through a frame without -    // an EH table entry will propagate the exception rather than terminating -    // the program. -    if (!CSE.LPad) -      continue; -    const LandingPadInfo *LPad = CSE.LPad; - -    // Compute the label range. We may reuse the function begin and end labels -    // rather than forming new ones. -    const MCExpr *Begin = -        createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym); -    const MCExpr *End; -    if (CSE.EndLabel) { -      // The interval is half-open, so we have to add one to include the return -      // address of the last invoke in the range. -      End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel), -                                    MCConstantExpr::Create(1, Asm->OutContext), -                                    Asm->OutContext); -    } else { -      End = createImageRel32(EHFuncEndSym); -    } - -    // These aren't really type info globals, they are actually pointers to -    // filter functions ordered by selector. The zero selector is used for -    // cleanups, so slot zero corresponds to selector 1. -    const std::vector<const GlobalValue *> &SelectorToFilter = MMI->getTypeInfos(); - -    // Do a parallel iteration across typeids and clause labels, skipping filter -    // clauses. -    assert(LPad->TypeIds.size() == LPad->ClauseLabels.size()); -    for (size_t I = 0, E = LPad->TypeIds.size(); I < E; ++I) { -      // AddLandingPadInfo stores the clauses in reverse, but there is a FIXME -      // to change that. -      int Selector = LPad->TypeIds[E - I - 1]; -      MCSymbol *ClauseLabel = LPad->ClauseLabels[I]; - -      // Ignore C++ filter clauses in SEH. -      // FIXME: Implement cleanup clauses. -      if (!isCatchEHSelector(Selector)) -        continue; - -      Asm->OutStreamer.EmitValue(Begin, 4); -      Asm->OutStreamer.EmitValue(End, 4); -      if (isCatchEHSelector(Selector)) { -        assert(unsigned(Selector - 1) < SelectorToFilter.size()); -        const GlobalValue *TI = SelectorToFilter[Selector - 1]; -        if (TI) // Emit the filter function pointer. -          Asm->OutStreamer.EmitValue(createImageRel32(Asm->getSymbol(TI)), 4); -        else  // Otherwise, this is a "catch i8* null", or catch all. -          Asm->OutStreamer.EmitIntValue(0, 4); -      } -      Asm->OutStreamer.EmitValue(createImageRel32(ClauseLabel), 4); -    } -  } -} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h index b2d5d1bce563..538e1328157f 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.h +++ b/lib/CodeGen/AsmPrinter/Win64Exception.h @@ -29,10 +29,6 @@ class Win64Exception : public EHStreamer {    /// Per-function flag to indicate if frame moves info should be emitted.    bool shouldEmitMoves; -  void emitCSpecificHandlerTable(); - -  const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value); -  public:    //===--------------------------------------------------------------------===//    // Main entry points. diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 32d511285eb2..baad411e2c5b 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -452,14 +452,6 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {    LP.TypeIds.push_back(0);  } -MCSymbol * -MachineModuleInfo::addClauseForLandingPad(MachineBasicBlock *LandingPad) { -  MCSymbol *ClauseLabel = Context.CreateTempSymbol(); -  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); -  LP.ClauseLabels.push_back(ClauseLabel); -  return ClauseLabel; -} -  /// TidyLandingPads - Remap landing pad labels and remove any deleted landing  /// pads.  void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 28e9f847a9d7..e53e874cb178 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -449,9 +449,9 @@ void TargetPassConfig::addPassesToHandleExceptions() {    case ExceptionHandling::DwarfCFI:    case ExceptionHandling::ARM:    case ExceptionHandling::ItaniumWinEH: -  case ExceptionHandling::MSVC: // FIXME: Needs preparation.      addPass(createDwarfEHPass(TM));      break; +  case ExceptionHandling::MSVC: // FIXME: Add preparation.    case ExceptionHandling::None:      addPass(createLowerInvokePass()); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5145731f6231..1bd6cfff9d25 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6544,19 +6544,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {    // If the input is a constant, let getNode fold it.    if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { -    SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); -    if (Res.getNode() != N) { -      if (!LegalOperations || -          TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) -        return Res; - -      // Folding it resulted in an illegal node, and it's too late to -      // do that. Clean up the old node and forego the transformation. -      // Ideally this won't happen very often, because instcombine -      // and the earlier dagcombine runs (where illegal nodes are -      // permitted) should have folded most of them already. -      deleteAndRecombine(Res.getNode()); -    } +    // If we can't allow illegal operations, we need to check that this is just +    // a fp -> int or int -> conversion and that the resulting operation will +    // be legal. +    if (!LegalOperations || +        (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && +         TLI.isOperationLegal(ISD::ConstantFP, VT)) || +        (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && +         TLI.isOperationLegal(ISD::Constant, VT))) +      return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);    }    // (conv (conv x, t1), t2) -> (conv x, t2) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index eac404c50365..3a8c276e2618 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -390,7 +390,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {        if (Op.getOperand(j)                .getValueType()                .getVectorElementType() -              .isFloatingPoint()) +              .isFloatingPoint() && +          NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())          Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));        else          Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); @@ -399,8 +400,9 @@ SDValue VectorLegalizer::Promote(SDValue Op) {    }    Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); -  if (VT.isFloatingPoint() || -      (VT.isVector() && VT.getVectorElementType().isFloatingPoint())) +  if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || +      (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && +       NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))      return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0));    else      return DAG.getNode(ISD::BITCAST, dl, VT, Op); @@ -554,9 +556,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {        BitOffset += SrcEltBits;        if (BitOffset >= WideBits) {          WideIdx++; -        Offset -= WideBits; -        if (Offset > 0) { -          ShAmt = DAG.getConstant(SrcEltBits - Offset, +        BitOffset -= WideBits; +        if (BitOffset > 0) { +          ShAmt = DAG.getConstant(SrcEltBits - BitOffset,                                    TLI.getShiftAmountTy(WideVT));            Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);            Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 86a63eea7c2a..151bc724df67 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2071,14 +2071,10 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {    // Get the two live-in registers as SDValues. The physregs have already been    // copied into virtual registers.    SDValue Ops[2]; -  if (FuncInfo.ExceptionPointerVirtReg) { -    Ops[0] = DAG.getZExtOrTrunc( -        DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), -                           FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), -        getCurSDLoc(), ValueVTs[0]); -  } else { -    Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); -  } +  Ops[0] = DAG.getZExtOrTrunc( +      DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), +                         FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), +      getCurSDLoc(), ValueVTs[0]);    Ops[1] = DAG.getZExtOrTrunc(        DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),                           FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), @@ -2090,27 +2086,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {    setValue(&LP, Res);  } -unsigned -SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, -                                             MachineBasicBlock *LPadBB) { -  SDValue Chain = getControlRoot(); - -  // Get the typeid that we will dispatch on later. -  const TargetLowering &TLI = DAG.getTargetLoweringInfo(); -  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); -  unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); -  unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); -  SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); -  Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); - -  // Branch to the main landing pad block. -  MachineBasicBlock *ClauseMBB = FuncInfo.MBB; -  ClauseMBB->addSuccessor(LPadBB); -  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, -                          DAG.getBasicBlock(LPadBB))); -  return VReg; -} -  /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for  /// small case ranges).  bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index eba98b8086b7..9070091d77b8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -713,8 +713,6 @@ public:    void visitJumpTable(JumpTable &JT);    void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,                              MachineBasicBlock *SwitchBB); -  unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV, -                                   MachineBasicBlock *LPadMBB);  private:    // These all get lowered before this pass. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4f031d3ff7e7..ef5452554f72 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,7 +19,6 @@  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/BranchProbabilityInfo.h"  #include "llvm/Analysis/CFG.h" -#include "llvm/CodeGen/Analysis.h"  #include "llvm/CodeGen/FastISel.h"  #include "llvm/CodeGen/FunctionLoweringInfo.h"  #include "llvm/CodeGen/GCMetadata.h" @@ -41,7 +40,6 @@  #include "llvm/IR/Intrinsics.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/Module.h" -#include "llvm/MC/MCAsmInfo.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" @@ -909,8 +907,6 @@ void SelectionDAGISel::DoInstructionSelection() {  void SelectionDAGISel::PrepareEHLandingPad() {    MachineBasicBlock *MBB = FuncInfo->MBB; -  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); -    // Add a label to mark the beginning of the landing pad.  Deletion of the    // landing pad can thus be detected via the MachineModuleInfo.    MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -922,66 +918,8 @@ void SelectionDAGISel::PrepareEHLandingPad() {    BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)      .addSym(Label); -  if (TM.getMCAsmInfo()->getExceptionHandlingType() == -      ExceptionHandling::MSVC) { -    // Make virtual registers and a series of labels that fill in values for the -    // clauses. -    auto &RI = MF->getRegInfo(); -    FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC); - -    // Get all invoke BBs that will unwind into the clause BBs. -    SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(), -                                                  MBB->pred_end()); - -    // Emit separate machine basic blocks with separate labels for each clause -    // before the main landing pad block. -    const BasicBlock *LLVMBB = MBB->getBasicBlock(); -    const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); -    MachineInstrBuilder SelectorPHI = BuildMI( -        *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI), -        FuncInfo->ExceptionSelectorVirtReg); -    for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) { -      MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB); -      MF->insert(MBB, ClauseBB); - -      // Add the edge from the invoke to the clause. -      for (MachineBasicBlock *InvokeBB : InvokeBBs) -        InvokeBB->addSuccessor(ClauseBB); - -      // Mark the clause as a landing pad or MI passes will delete it. -      ClauseBB->setIsLandingPad(); - -      GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I)); - -      // Start the BB with a label. -      MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB); -      BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II) -          .addSym(ClauseLabel); - -      // Construct a simple BB that defines a register with the typeid constant. -      FuncInfo->MBB = ClauseBB; -      FuncInfo->InsertPt = ClauseBB->end(); -      unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB); -      CurDAG->setRoot(SDB->getRoot()); -      SDB->clear(); -      CodeGenAndEmitDAG(); - -      // Add the typeid virtual register to the phi in the main landing pad. -      SelectorPHI.addReg(VReg).addMBB(ClauseBB); -    } - -    // Remove the edge from the invoke to the lpad. -    for (MachineBasicBlock *InvokeBB : InvokeBBs) -      InvokeBB->removeSuccessor(MBB); - -    // Restore FuncInfo back to its previous state and select the main landing -    // pad block. -    FuncInfo->MBB = MBB; -    FuncInfo->InsertPt = MBB->end(); -    return; -  } -    // Mark exception register as live in. +  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());    if (unsigned Reg = TLI->getExceptionPointerRegister())      FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 290dbe29c707..71b43942e882 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -525,12 +525,15 @@ bool DISubprogram::Verify() const {          while ((IA = DL.getInlinedAt()))            DL = DebugLoc::getFromDILocation(IA);          DL.getScopeAndInlinedAt(Scope, IA); +        if (!Scope) +          return false;          assert(!IA);          while (!DIDescriptor(Scope).isSubprogram()) {            DILexicalBlockFile D(Scope);            Scope = D.isLexicalBlockFile()                        ? D.getScope()                        : DebugLoc::getFromDILexicalBlock(Scope).getScope(); +          assert(Scope && "lexical block file has no scope");          }          if (!DISubprogram(Scope).describes(F))            return false; diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 2c6b332dc8ee..63e5730f954e 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -826,6 +826,28 @@ MDNode *MDNode::intersect(MDNode *A, MDNode *B) {    return getOrSelfReference(A->getContext(), MDs);  } +MDNode *MDNode::getMostGenericAliasScope(MDNode *A, MDNode *B) { +  if (!A || !B) +    return nullptr; + +  SmallVector<Metadata *, 4> MDs(B->op_begin(), B->op_end()); +  for (unsigned i = 0, ie = A->getNumOperands(); i != ie; ++i) { +    Metadata *MD = A->getOperand(i); +    bool insert = true; +    for (unsigned j = 0, je = B->getNumOperands(); j != je; ++j) +      if (MD == B->getOperand(j)) { +        insert = false; +        break; +      } +    if (insert) +        MDs.push_back(MD); +  } + +  // FIXME: This preserves long-standing behaviour, but is it really the right +  // behaviour?  Or was that an unintended side-effect of node uniquing? +  return getOrSelfReference(A->getContext(), MDs); +} +  MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {    if (!A || !B)      return nullptr; diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index 889705e95fc2..65060dc39d27 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -708,9 +708,10 @@ VectorType::VectorType(Type *ElType, unsigned NumEl)  VectorType *VectorType::get(Type *elementType, unsigned NumElements) {    Type *ElementType = const_cast<Type*>(elementType);    assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0"); -  assert(isValidElementType(ElementType) && -         "Elements of a VectorType must be a primitive type"); -   +  assert(isValidElementType(ElementType) && "Element type of a VectorType must " +                                            "be an integer, floating point, or " +                                            "pointer type."); +    LLVMContextImpl *pImpl = ElementType->getContext().pImpl;    VectorType *&Entry = ElementType->getContext().pImpl      ->VectorTypes[std::make_pair(ElementType, NumElements)]; diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index e95845f0af01..4d6298c542e2 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -47,6 +47,10 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,    }    OS << "\t.section\t" << getSectionName() << ",\""; +  if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) +    OS << 'd'; +  if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) +    OS << 'b';    if (getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE)      OS << 'x';    if (getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE) @@ -55,10 +59,6 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,      OS << 'r';    else      OS << 'y'; -  if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) -    OS << 'd'; -  if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) -    OS << 'b';    if (getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE)      OS << 'n';    if (getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED) diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index c17f99b9bd7b..ec0e0f7256a4 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -710,17 +710,22 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,      CrossSection = &Symbol.getSection() != &B->getSection();      // Offset of the symbol in the section -    int64_t a = Layout.getSymbolOffset(&B_SD); +    int64_t OffsetOfB = Layout.getSymbolOffset(&B_SD); -    // Offset of the relocation in the section -    int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); - -    FixedValue = b - a;      // In the case where we have SymbA and SymB, we just need to store the delta      // between the two symbols.  Update FixedValue to account for the delta, and      // skip recording the relocation. -    if (!CrossSection) +    if (!CrossSection) { +      int64_t OffsetOfA = Layout.getSymbolOffset(&A_SD); +      FixedValue = (OffsetOfA - OffsetOfB) + Target.getConstant();        return; +    } + +    // Offset of the relocation in the section +    int64_t OffsetOfRelocation = +        Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + +    FixedValue = OffsetOfRelocation - OffsetOfB;    } else {      FixedValue = Target.getConstant();    } diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c index 0b5b765f89e1..b79692966473 100644 --- a/lib/Support/regcomp.c +++ b/lib/Support/regcomp.c @@ -49,6 +49,14 @@  #include "regcclass.h"  #include "regcname.h" +#include "llvm/Config/config.h" +#if HAVE_STDINT_H +#include <stdint.h> +#else +/* Pessimistically bound memory use */ +#define SIZE_MAX UINT_MAX +#endif +  /*   * parse structure, passed up and down to avoid global variables and   * other clumsinesses @@ -1069,6 +1077,8 @@ allocset(struct parse *p)  		p->ncsalloc += CHAR_BIT;  		nc = p->ncsalloc; +		if (nc > SIZE_MAX / sizeof(cset)) +			goto nomem;  		assert(nc % CHAR_BIT == 0);  		nbytes = nc / CHAR_BIT * css; @@ -1412,6 +1422,11 @@ enlarge(struct parse *p, sopno size)  	if (p->ssize >= size)  		return; +	if ((unsigned long)size > SIZE_MAX / sizeof(sop)) { +		SETERROR(REG_ESPACE); +		return; +	} +  	sp = (sop *)realloc(p->strip, size*sizeof(sop));  	if (sp == NULL) {  		SETERROR(REG_ESPACE); @@ -1428,6 +1443,12 @@ static void  stripsnug(struct parse *p, struct re_guts *g)  {  	g->nstates = p->slen; +	if ((unsigned long)p->slen > SIZE_MAX / sizeof(sop)) { +		g->strip = p->strip; +		SETERROR(REG_ESPACE); +		return; +	} +  	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));  	if (g->strip == NULL) {  		SETERROR(REG_ESPACE); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 19f51ce9450c..399b5eeaf5f5 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6287,6 +6287,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,                                      AArch64CC::CondCode CC, bool NoNans, EVT VT,                                      SDLoc dl, SelectionDAG &DAG) {    EVT SrcVT = LHS.getValueType(); +  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() && +         "function only supposed to emit natural comparisons");    BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());    APInt CnstBits(VT.getSizeInBits(), 0); @@ -6381,13 +6383,15 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();    SDValue LHS = Op.getOperand(0);    SDValue RHS = Op.getOperand(1); +  EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();    SDLoc dl(Op);    if (LHS.getValueType().getVectorElementType().isInteger()) {      assert(LHS.getValueType() == RHS.getValueType());      AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); -    return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(), -                                dl, DAG); +    SDValue Cmp = +        EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG); +    return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());    }    assert(LHS.getValueType().getVectorElementType() == MVT::f32 || @@ -6401,19 +6405,21 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,    bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;    SDValue Cmp = -      EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG); +      EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);    if (!Cmp.getNode())      return SDValue();    if (CC2 != AArch64CC::AL) {      SDValue Cmp2 = -        EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG); +        EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);      if (!Cmp2.getNode())        return SDValue(); -    Cmp = DAG.getNode(ISD::OR, dl, Cmp.getValueType(), Cmp, Cmp2); +    Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);    } +  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); +    if (ShouldInvert)      return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType()); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index e0397cce962d..c12442255a01 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2400,7 +2400,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,    else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {      // Conservatively refuse to convert an instruction which isn't in the same      // BB as the comparison. -    // For CMPri, we need to check Sub, thus we can't return here. +    // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. +    // Thus we cannot return here.      if (CmpInstr->getOpcode() == ARM::CMPri ||         CmpInstr->getOpcode() == ARM::t2CMPri)        MI = nullptr; @@ -2479,8 +2480,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,    case ARM::t2EORrr:    case ARM::t2EORri: {      // Scan forward for the use of CPSR -    // When checking against MI: if it's a conditional code requires -    // checking of V bit, then this is not safe to do. +    // When checking against MI: if it's a conditional code that requires +    // checking of the V bit or C bit, then this is not safe to do.      // It is safe to remove CmpInstr if CPSR is redefined or killed.      // If we are done with the basic block, we need to check whether CPSR is      // live-out. @@ -2547,19 +2548,30 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,              OperandsToUpdate.push_back(                  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));            } -        } else +        } else { +          // No Sub, so this is x = <op> y, z; cmp x, 0.            switch (CC) { -          default: +          case ARMCC::EQ: // Z +          case ARMCC::NE: // Z +          case ARMCC::MI: // N +          case ARMCC::PL: // N +          case ARMCC::AL: // none              // CPSR can be used multiple times, we should continue.              break; -          case ARMCC::VS: -          case ARMCC::VC: -          case ARMCC::GE: -          case ARMCC::LT: -          case ARMCC::GT: -          case ARMCC::LE: +          case ARMCC::HS: // C +          case ARMCC::LO: // C +          case ARMCC::VS: // V +          case ARMCC::VC: // V +          case ARMCC::HI: // C Z +          case ARMCC::LS: // C Z +          case ARMCC::GE: // N V +          case ARMCC::LT: // N V +          case ARMCC::GT: // Z N V +          case ARMCC::LE: // Z N V +            // The instruction uses the V bit or C bit which is not safe.              return false;            } +        }        }      } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index f92f257cd7eb..a1de5efb4507 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -565,7 +565,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)      setTargetDAGCombine(ISD::FP_TO_SINT);      setTargetDAGCombine(ISD::FP_TO_UINT);      setTargetDAGCombine(ISD::FDIV); -    setTargetDAGCombine(ISD::LOAD);      // It is legal to extload from v4i8 to v4i16 or v4i32.      MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, @@ -4488,6 +4487,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {    SDValue Op0 = Op.getOperand(0);    SDValue Op1 = Op.getOperand(1);    SDValue CC = Op.getOperand(2); +  EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();    EVT VT = Op.getValueType();    ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();    SDLoc dl(Op); @@ -4517,8 +4517,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {        TmpOp0 = Op0;        TmpOp1 = Op1;        Opc = ISD::OR; -      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); -      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); +      Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); +      Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);        break;      case ISD::SETUO: Invert = true; // Fallthrough      case ISD::SETO: @@ -4526,8 +4526,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {        TmpOp0 = Op0;        TmpOp1 = Op1;        Opc = ISD::OR; -      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); -      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); +      Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); +      Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);        break;      }    } else { @@ -4561,8 +4561,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {        if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {          Opc = ARMISD::VTST; -        Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); -        Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); +        Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); +        Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));          Invert = !Invert;        }      } @@ -4588,22 +4588,24 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {    if (SingleOp.getNode()) {      switch (Opc) {      case ARMISD::VCEQ: -      Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; +      Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;      case ARMISD::VCGE: -      Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; +      Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;      case ARMISD::VCLEZ: -      Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; +      Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;      case ARMISD::VCGT: -      Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; +      Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;      case ARMISD::VCLTZ: -      Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; +      Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;      default: -      Result = DAG.getNode(Opc, dl, VT, Op0, Op1); +      Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);      }    } else { -     Result = DAG.getNode(Opc, dl, VT, Op0, Op1); +     Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);    } +  Result = DAG.getSExtOrTrunc(Result, dl, VT); +    if (Invert)      Result = DAG.getNOT(dl, Result, VT); @@ -8877,18 +8879,17 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {                                DAG.getUNDEF(VT), NewMask.data());  } -/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, -/// NEON load/store intrinsics, and generic vector load/stores, to merge -/// base address updates. -/// For generic load/stores, the memory type is assumed to be a vector. -/// The caller is assumed to have checked legality. +/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and +/// NEON load/store intrinsics to merge base address updates.  static SDValue CombineBaseUpdate(SDNode *N,                                   TargetLowering::DAGCombinerInfo &DCI) { +  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) +    return SDValue(); +    SelectionDAG &DAG = DCI.DAG;    bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||                        N->getOpcode() == ISD::INTRINSIC_W_CHAIN); -  bool isStore = N->getOpcode() == ISD::STORE; -  unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1); +  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);    SDValue Addr = N->getOperand(AddrOpIdx);    // Search for a use of the address operand that is an increment. @@ -8949,10 +8950,6 @@ static SDValue CombineBaseUpdate(SDNode *N,        case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;        case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;        case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; -      case ISD::LOAD:       NewOpc = ARMISD::VLD1_UPD; -        NumVecs = 1; isLaneOp = false; break; -      case ISD::STORE:      NewOpc = ARMISD::VST1_UPD; -        NumVecs = 1; isLoad = false; isLaneOp = false; break;        }      } @@ -8960,11 +8957,8 @@ static SDValue CombineBaseUpdate(SDNode *N,      EVT VecTy;      if (isLoad)        VecTy = N->getValueType(0); -    else if (isIntrinsic) -      VecTy = N->getOperand(AddrOpIdx+1).getValueType();      else -      VecTy = N->getOperand(1).getValueType(); - +      VecTy = N->getOperand(AddrOpIdx+1).getValueType();      unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;      if (isLaneOp)        NumBytes /= VecTy.getVectorNumElements(); @@ -8981,70 +8975,25 @@ static SDValue CombineBaseUpdate(SDNode *N,        continue;      } -    EVT AlignedVecTy = VecTy; - -    // If this is a less-than-standard-aligned load/store, change the type to -    // match the standard alignment. -    // The alignment is overlooked when selecting _UPD variants; and it's -    // easier to introduce bitcasts here than fix that. -    // There are 3 ways to get to this base-update combine: -    // - intrinsics: they are assumed to be properly aligned (to the standard -    //   alignment of the memory type), so we don't need to do anything. -    // - ARMISD::VLDx nodes: they are only generated from the aforementioned -    //   intrinsics, so, likewise, there's nothing to do. -    // - generic load/store instructions: the alignment is specified as an -    //   explicit operand, rather than implicitly as the standard alignment -    //   of the memory type (like the intrisics).  We need to change the -    //   memory type to match the explicit alignment.  That way, we don't -    //   generate non-standard-aligned ARMISD::VLDx nodes. -    if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N)) { -      unsigned Alignment = LSN->getAlignment(); -      if (Alignment == 0) -        Alignment = 1; -      if (Alignment < VecTy.getScalarSizeInBits() / 8) { -        MVT EltTy = MVT::getIntegerVT(Alignment * 8); -        assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); -        assert(!isLaneOp && "Unexpected generic load/store lane."); -        unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); -        AlignedVecTy = MVT::getVectorVT(EltTy, NumElts); -      } -    } -      // Create the new updating load/store node. -    // First, create an SDVTList for the new updating node's results.      EVT Tys[6];      unsigned NumResultVecs = (isLoad ? NumVecs : 0);      unsigned n;      for (n = 0; n < NumResultVecs; ++n) -      Tys[n] = AlignedVecTy; +      Tys[n] = VecTy;      Tys[n++] = MVT::i32;      Tys[n] = MVT::Other;      SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); - -    // Then, gather the new node's operands.      SmallVector<SDValue, 8> Ops;      Ops.push_back(N->getOperand(0)); // incoming chain      Ops.push_back(N->getOperand(AddrOpIdx));      Ops.push_back(Inc); -    if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) { -      // Try to match the intrinsic's signature -      Ops.push_back(StN->getValue()); -      Ops.push_back(DAG.getConstant(StN->getAlignment(), MVT::i32)); -    } else { -      for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) -        Ops.push_back(N->getOperand(i)); -    } - -    // If this is a non-standard-aligned store, the penultimate operand is the -    // stored value.  Bitcast it to the aligned type. -    if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) { -      SDValue &StVal = Ops[Ops.size()-2]; -      StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal); +    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { +      Ops.push_back(N->getOperand(i));      } - -    MemSDNode *MemInt = cast<MemSDNode>(N); +    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);      SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, -                                           Ops, AlignedVecTy, +                                           Ops, MemInt->getMemoryVT(),                                             MemInt->getMemOperand());      // Update the uses. @@ -9052,14 +9001,6 @@ static SDValue CombineBaseUpdate(SDNode *N,      for (unsigned i = 0; i < NumResultVecs; ++i) {        NewResults.push_back(SDValue(UpdN.getNode(), i));      } - -    // If this is an non-standard-aligned load, the first result is the loaded -    // value.  Bitcast it to the expected result type. -    if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) { -      SDValue &LdVal = NewResults[0]; -      LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal); -    } -      NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain      DCI.CombineTo(N, NewResults);      DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); @@ -9069,14 +9010,6 @@ static SDValue CombineBaseUpdate(SDNode *N,    return SDValue();  } -static SDValue PerformVLDCombine(SDNode *N, -                                 TargetLowering::DAGCombinerInfo &DCI) { -  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) -    return SDValue(); - -  return CombineBaseUpdate(N, DCI); -} -  /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a  /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic  /// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and @@ -9190,18 +9123,6 @@ static SDValue PerformVDUPLANECombine(SDNode *N,    return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);  } -static SDValue PerformLOADCombine(SDNode *N, -                                  TargetLowering::DAGCombinerInfo &DCI) { -  EVT VT = N->getValueType(0); - -  // If this is a legal vector load, try to combine it into a VLD1_UPD. -  if (ISD::isNormalLoad(N) && VT.isVector() && -      DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) -    return CombineBaseUpdate(N, DCI); - -  return SDValue(); -} -  /// PerformSTORECombine - Target-specific dag combine xforms for  /// ISD::STORE.  static SDValue PerformSTORECombine(SDNode *N, @@ -9340,11 +9261,6 @@ static SDValue PerformSTORECombine(SDNode *N,                          St->getAAInfo());    } -  // If this is a legal vector store, try to combine it into a VST1_UPD. -  if (ISD::isNormalStore(N) && VT.isVector() && -      DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) -    return CombineBaseUpdate(N, DCI); -    return SDValue();  } @@ -9938,11 +9854,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,    case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);    case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);    case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); -  case ISD::LOAD:       return PerformLOADCombine(N, DCI);    case ARMISD::VLD2DUP:    case ARMISD::VLD3DUP:    case ARMISD::VLD4DUP: -    return PerformVLDCombine(N, DCI); +    return CombineBaseUpdate(N, DCI);    case ARMISD::BUILD_VECTOR:      return PerformARMBUILD_VECTORCombine(N, DCI);    case ISD::INTRINSIC_VOID: @@ -9962,7 +9877,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,      case Intrinsic::arm_neon_vst2lane:      case Intrinsic::arm_neon_vst3lane:      case Intrinsic::arm_neon_vst4lane: -      return PerformVLDCombine(N, DCI); +      return CombineBaseUpdate(N, DCI);      default: break;      }      break; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 96f3b4e64326..56de9d2f470c 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -9195,34 +9195,48 @@ static const struct {    const uint64_t Enabled;    const uint64_t Disabled;  } FPUs[] = { -    {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON}, -    {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON}, -    {ARM::VFPV3, ARM::FeatureVFP2 | ARM::FeatureVFP3, ARM::FeatureNEON}, -    {ARM::VFPV3_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16, -     ARM::FeatureNEON}, -    {ARM::VFPV4, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4, -     ARM::FeatureNEON}, -    {ARM::VFPV4_D16, -     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureD16, -     ARM::FeatureNEON}, -    {ARM::FPV5_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | -                        ARM::FeatureFPARMv8 | ARM::FeatureD16, -     ARM::FeatureNEON | ARM::FeatureCrypto}, -    {ARM::FP_ARMV8, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | -                        ARM::FeatureFPARMv8, -     ARM::FeatureNEON | ARM::FeatureCrypto}, -    {ARM::NEON, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, 0}, -    {ARM::NEON_VFPV4, -     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureNEON, -     0}, -    {ARM::NEON_FP_ARMV8, -     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +    {/* ID */ ARM::VFP, +     /* Enabled */ ARM::FeatureVFP2, +     /* Disabled */ ARM::FeatureNEON}, +    {/* ID */ ARM::VFPV2, +     /* Enabled */ ARM::FeatureVFP2, +     /* Disabled */ ARM::FeatureNEON}, +    {/* ID */ ARM::VFPV3, +     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3, +     /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16}, +    {/* ID */ ARM::VFPV3_D16, +     /* Enable */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16, +     /* Disabled */ ARM::FeatureNEON}, +    {/* ID */ ARM::VFPV4, +     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4, +     /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16}, +    {/* ID */ ARM::VFPV4_D16, +     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +         ARM::FeatureD16, +     /* Disabled */ ARM::FeatureNEON}, +    {/* ID */ ARM::FPV5_D16, +     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +         ARM::FeatureFPARMv8 | ARM::FeatureD16, +     /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto}, +    {/* ID */ ARM::FP_ARMV8, +     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +         ARM::FeatureFPARMv8, +     /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto | ARM::FeatureD16}, +    {/* ID */ ARM::NEON, +     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, +     /* Disabled */ ARM::FeatureD16}, +    {/* ID */ ARM::NEON_VFPV4, +     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +         ARM::FeatureNEON, +     /* Disabled */ ARM::FeatureD16}, +    {/* ID */ ARM::NEON_FP_ARMV8, +     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |           ARM::FeatureFPARMv8 | ARM::FeatureNEON, -     ARM::FeatureCrypto}, -    {ARM::CRYPTO_NEON_FP_ARMV8, -     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +     /* Disabled */ ARM::FeatureCrypto | ARM::FeatureD16}, +    {/* ID */ ARM::CRYPTO_NEON_FP_ARMV8, +     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |           ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, -     0}, +     /* Disabled */ ARM::FeatureD16},      {ARM::SOFTVFP, 0, 0},  }; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index d597cdcbd644..ad3bc1dbeeeb 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -3134,7 +3134,8 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),  def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),                      "icbi $src", IIC_LdStICBI, []>; -def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), +// We used to have EIEIO as value but E[0-9A-Z] is a reserved name +def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins),                             "eieio", IIC_LdStLoad, []>;  def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L), diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index e7bc00635f75..0bc62d0ab04d 100644 --- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -100,7 +100,7 @@ bool AMDGPUTTI::hasBranchDivergence() const { return true; }  void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,                                          UnrollingPreferences &UP) const {    UP.Threshold = 300; // Twice the default. -  UP.Count = UINT_MAX; +  UP.MaxCount = UINT_MAX;    UP.Partial = true;    // TODO: Do we want runtime unrolling? diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index 91eb60beb6bb..c99219dd9074 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -14,6 +14,7 @@  #include "AMDGPU.h"  #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Analysis/LoopInfo.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Instructions.h" @@ -66,6 +67,8 @@ class SIAnnotateControlFlow : public FunctionPass {    DominatorTree *DT;    StackVector Stack; +  LoopInfo *LI; +    bool isTopOfStack(BasicBlock *BB);    Value *popSaved(); @@ -99,6 +102,7 @@ public:    }    void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addRequired<LoopInfo>();      AU.addRequired<DominatorTreeWrapperPass>();      AU.addPreserved<DominatorTreeWrapperPass>();      FunctionPass::getAnalysisUsage(AU); @@ -277,10 +281,25 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {    Term->setCondition(CallInst::Create(Loop, Arg, "", Term));    push(Term->getSuccessor(0), Arg); -} - -/// \brief Close the last opened control flow +}/// \brief Close the last opened control flow  void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { +  llvm::Loop *L = LI->getLoopFor(BB); + +  if (L && L->getHeader() == BB) { +    // We can't insert an EndCF call into a loop header, because it will +    // get executed on every iteration of the loop, when it should be +    // executed only once before the loop. +    SmallVector <BasicBlock*, 8> Latches; +    L->getLoopLatches(Latches); + +    std::vector<BasicBlock*> Preds; +    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { +      if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end()) +        Preds.push_back(*PI); +    } +    BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", this); +  } +    CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());  } @@ -288,6 +307,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {  /// recognize if/then/else and loops.  bool SIAnnotateControlFlow::runOnFunction(Function &F) {    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); +  LI = &getAnalysis<LoopInfo>();    for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),         E = df_end(&F.getEntryBlock()); I != E; ++I) { diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 0396bf384066..58c2cd109680 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -266,6 +266,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,        break;      case AMDGPU::SI_SPILL_V32_RESTORE:      case AMDGPU::SI_SPILL_V64_RESTORE: +    case AMDGPU::SI_SPILL_V96_RESTORE:      case AMDGPU::SI_SPILL_V128_RESTORE:      case AMDGPU::SI_SPILL_V256_RESTORE:      case AMDGPU::SI_SPILL_V512_RESTORE: { diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index ab3319afe93f..30b3b2876b8a 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -132,9 +132,9 @@ def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",  def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",                                        "Enable XOP instructions",                                        [FeatureFMA4]>; -def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", -                                          "HasVectorUAMem", "true", -                 "Allow unaligned memory operands on vector/SIMD instructions">; +def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", +                                          "HasSSEUnalignedMem", "true", +                      "Allow unaligned memory operands with SSE instructions">;  def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",                                        "Enable AES instructions",                                        [FeatureSSE2]>; @@ -309,7 +309,6 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [                                         FeatureCMPXCHG16B,                                         FeatureFastUAMem,                                         FeatureSlowUAMem32, -                                       FeatureVectorUAMem,                                         FeaturePOPCNT,                                         FeatureAES,                                         FeaturePCLMUL @@ -322,7 +321,6 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [                                       FeatureCMPXCHG16B,                                       FeatureFastUAMem,                                       FeatureSlowUAMem32, -                                     FeatureVectorUAMem,                                       FeaturePOPCNT,                                       FeatureAES,                                       FeaturePCLMUL, @@ -337,7 +335,6 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [                                     FeatureAVX2,                                     FeatureCMPXCHG16B,                                     FeatureFastUAMem, -                                   FeatureVectorUAMem,                                     FeaturePOPCNT,                                     FeatureAES,                                     FeaturePCLMUL, @@ -360,7 +357,6 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [                                       FeatureAVX2,                                       FeatureCMPXCHG16B,                                       FeatureFastUAMem, -                                     FeatureVectorUAMem,                                       FeaturePOPCNT,                                       FeatureAES,                                       FeaturePCLMUL, @@ -388,7 +384,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,                        FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,                        FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,                        FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, -                      FeatureSlowIncDec, FeatureVectorUAMem]>; +                      FeatureSlowIncDec]>;  def : KnightsLandingProc<"knl">;  // FIXME: define SKX model @@ -399,7 +395,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,                        FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,                        FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,                        FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, -                      FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>; +                      FeatureSlowIncDec, FeatureSGX]>;  def : SkylakeProc<"skylake">;  def : SkylakeProc<"skx">; // Legacy alias. diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 14a0f38df668..d06e94fbfe1f 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -688,11 +688,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {      std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;      for (const auto &Function : M) -      if (Function.hasDLLExportStorageClass()) +      if (Function.hasDLLExportStorageClass() && !Function.isDeclaration())          DLLExportedFns.push_back(getSymbol(&Function));      for (const auto &Global : M.globals()) -      if (Global.hasDLLExportStorageClass()) +      if (Global.hasDLLExportStorageClass() && !Global.isDeclaration())          DLLExportedGlobals.push_back(getSymbol(&Global));      for (const auto &Alias : M.aliases()) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a1fd34ea8000..78a11e64fef0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5473,6 +5473,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,      if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {        DecodePSHUFBMask(C, Mask); +      if (Mask.empty()) +        return false;        break;      } diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 76e8fad78de3..7069bd68e00e 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -424,7 +424,7 @@ def alignedloadv8i64  : PatFrag<(ops node:$ptr),  // setting a feature bit in the processor (on startup, for example).  // Opteron 10h and later implement such a feature.  def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ -  return    Subtarget->hasVectorUAMem() +  return    Subtarget->hasSSEUnalignedMem()           || cast<LoadSDNode>(N)->getAlignment() >= 16;  }]>; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index e59395c06a5c..983169886ad3 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -265,7 +265,7 @@ void X86Subtarget::initializeEnvironment() {    IsSHLDSlow = false;    IsUAMemFast = false;    IsUAMem32Slow = false; -  HasVectorUAMem = false; +  HasSSEUnalignedMem = false;    HasCmpxchg16b = false;    UseLeaForSP = false;    HasSlowDivide32 = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 754b5b924717..e0263d66e928 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -162,9 +162,9 @@ protected:    /// True if unaligned 32-byte memory accesses are slow.    bool IsUAMem32Slow; -  /// HasVectorUAMem - True if SIMD operations can have unaligned memory -  /// operands. This may require setting a feature bit in the processor. -  bool HasVectorUAMem; +  /// True if SSE operations can have unaligned memory operands. +  /// This may require setting a configuration bit in the processor. +  bool HasSSEUnalignedMem;    /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;    /// this is true for most x86-64 chips, but not the first AMD chips. @@ -378,7 +378,7 @@ public:    bool isSHLDSlow() const { return IsSHLDSlow; }    bool isUnalignedMemAccessFast() const { return IsUAMemFast; }    bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } -  bool hasVectorUAMem() const { return HasVectorUAMem; } +  bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }    bool hasCmpxchg16b() const { return HasCmpxchg16b; }    bool useLeaForSP() const { return UseLeaForSP; }    bool hasSlowDivide32() const { return HasSlowDivide32; } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index af1694d3453c..6230c000cf57 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -330,11 +330,17 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT      case LLVMContext::MD_noalias:      case LLVMContext::MD_nontemporal:      case LLVMContext::MD_mem_parallel_loop_access: -    case LLVMContext::MD_nonnull:        // All of these directly apply.        NewLoad->setMetadata(ID, N);        break; +    case LLVMContext::MD_nonnull: +      // FIXME: We should translate this into range metadata for integer types +      // and vice versa. +      if (NewTy->isPointerTy()) +        NewLoad->setMetadata(ID, N); +      break; +      case LLVMContext::MD_range:        // FIXME: It would be nice to propagate this in some way, but the type        // conversions make it hard. @@ -548,13 +554,14 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {        case LLVMContext::MD_noalias:        case LLVMContext::MD_nontemporal:        case LLVMContext::MD_mem_parallel_loop_access: -      case LLVMContext::MD_nonnull:          // All of these directly apply.          NewStore->setMetadata(ID, N);          break;        case LLVMContext::MD_invariant_load: +      case LLVMContext::MD_nonnull:        case LLVMContext::MD_range: +        // These don't apply for stores.          break;        }      } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 745c85a98e2f..25f1f022c40c 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -67,7 +67,7 @@ static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;  static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 36;  static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;  static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; -static const uint64_t kWindowsShadowOffset32 = 1ULL << 30; +static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;  static const size_t kMinStackMallocSize = 1 << 6;  // 64B  static const size_t kMaxStackMallocSize = 1 << 16;  // 64K diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 5f73b89e8551..2a3d15421de9 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -71,9 +71,17 @@ private:      return isMachO() ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";    } +  /// Get the section name for the coverage mapping data. +  StringRef getCoverageSection() const { +    return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap"; +  } +    /// Replace instrprof_increment with an increment of the appropriate value.    void lowerIncrement(InstrProfIncrementInst *Inc); +  /// Set up the section and uses for coverage data and its references. +  void lowerCoverageData(GlobalVariable *CoverageData); +    /// Get the region counters for an increment, creating them if necessary.    ///    /// If the counter array doesn't yet exist, the profile data variables @@ -118,6 +126,10 @@ bool InstrProfiling::runOnModule(Module &M) {            lowerIncrement(Inc);            MadeChange = true;          } +  if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) { +    lowerCoverageData(Coverage); +    MadeChange = true; +  }    if (!MadeChange)      return false; @@ -140,6 +152,35 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {    Inc->eraseFromParent();  } +void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { +  CoverageData->setSection(getCoverageSection()); +  CoverageData->setAlignment(8); + +  Constant *Init = CoverageData->getInitializer(); +  // We're expecting { i32, i32, i32, i32, [n x { i8*, i32, i32 }], [m x i8] } +  // for some C. If not, the frontend's given us something broken. +  assert(Init->getNumOperands() == 6 && "bad number of fields in coverage map"); +  assert(isa<ConstantArray>(Init->getAggregateElement(4)) && +         "invalid function list in coverage map"); +  ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(4)); +  for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) { +    Constant *Record = Records->getOperand(I); +    Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts(); + +    assert(isa<GlobalVariable>(V) && "Missing reference to function name"); +    GlobalVariable *Name = cast<GlobalVariable>(V); + +    // If we have region counters for this name, we've already handled it. +    auto It = RegionCounters.find(Name); +    if (It != RegionCounters.end()) +      continue; + +    // Move the name variable to the right section. +    Name->setSection(getNameSection()); +    Name->setAlignment(1); +  } +} +  /// Get the name of a profiling variable for a particular function.  static std::string getVarName(InstrProfIncrementInst *Inc, StringRef VarName) {    auto *Arr = cast<ConstantDataArray>(Inc->getName()->getInitializer()); diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 9f00d3d6c824..d7d752ff5243 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -631,7 +631,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {        if (SI.isAtomic()) SI.setOrdering(addReleaseOrdering(SI.getOrdering())); -      if (MS.TrackOrigins) +      if (MS.TrackOrigins && !SI.isAtomic())          storeOrigin(IRB, Addr, Shadow, getOrigin(Val), SI.getAlignment(),                      InstrumentWithCalls);      } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 394b0d3de7bd..969b9a8f8df1 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -480,6 +480,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {        // Ignore volatile loads.        if (!LI->isSimple()) {          LastStore = nullptr; +        // Don't CSE across synchronization boundaries. +        if (Inst->mayWriteToMemory()) +          ++CurrentGeneration;          continue;        } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 33b5f9df5a27..3eea3d4e27ae 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -750,6 +750,16 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,    // its dependence information by changing its parameter.    MD->removeInstruction(C); +  // Update AA metadata +  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be +  // handled here, but combineMetadata doesn't support them yet +  unsigned KnownIDs[] = { +    LLVMContext::MD_tbaa, +    LLVMContext::MD_alias_scope, +    LLVMContext::MD_noalias, +  }; +  combineMetadata(C, cpy, KnownIDs); +    // Remove the memcpy.    MD->removeInstruction(cpy);    ++NumMemCpyInstr; diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 08a4b3f3b737..2a84d7e0c484 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1328,6 +1328,8 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign          K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));          break;        case LLVMContext::MD_alias_scope: +        K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD)); +        break;        case LLVMContext::MD_noalias:          K->setMetadata(Kind, MDNode::intersect(JMD, KMD));          break; diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 477fba42412e..65f2ae2f9429 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -154,19 +154,21 @@ static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) {    return mapToMetadata(VM, MD, const_cast<Metadata *>(MD));  } -static Metadata *MapMetadataImpl(const Metadata *MD, ValueToValueMapTy &VM, -                                 RemapFlags Flags, +static Metadata *MapMetadataImpl(const Metadata *MD, +                                 SmallVectorImpl<UniquableMDNode *> &Cycles, +                                 ValueToValueMapTy &VM, RemapFlags Flags,                                   ValueMapTypeRemapper *TypeMapper,                                   ValueMaterializer *Materializer); -static Metadata *mapMetadataOp(Metadata *Op, ValueToValueMapTy &VM, -                                 RemapFlags Flags, -                                 ValueMapTypeRemapper *TypeMapper, -                                 ValueMaterializer *Materializer) { +static Metadata *mapMetadataOp(Metadata *Op, +                               SmallVectorImpl<UniquableMDNode *> &Cycles, +                               ValueToValueMapTy &VM, RemapFlags Flags, +                               ValueMapTypeRemapper *TypeMapper, +                               ValueMaterializer *Materializer) {    if (!Op)      return nullptr;    if (Metadata *MappedOp = -          MapMetadataImpl(Op, VM, Flags, TypeMapper, Materializer)) +          MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer))      return MappedOp;    // Use identity map if MappedOp is null and we can ignore missing entries.    if (Flags & RF_IgnoreMissingEntries) @@ -180,8 +182,9 @@ static Metadata *mapMetadataOp(Metadata *Op, ValueToValueMapTy &VM,    return nullptr;  } -static Metadata *cloneMDTuple(const MDTuple *Node, ValueToValueMapTy &VM, -                              RemapFlags Flags, +static Metadata *cloneMDTuple(const MDTuple *Node, +                              SmallVectorImpl<UniquableMDNode *> &Cycles, +                              ValueToValueMapTy &VM, RemapFlags Flags,                                ValueMapTypeRemapper *TypeMapper,                                ValueMaterializer *Materializer,                                bool IsDistinct) { @@ -192,41 +195,57 @@ static Metadata *cloneMDTuple(const MDTuple *Node, ValueToValueMapTy &VM,    SmallVector<Metadata *, 4> Elts;    Elts.reserve(Node->getNumOperands());    for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) -    Elts.push_back(mapMetadataOp(Node->getOperand(I), VM, Flags, TypeMapper, -                                 Materializer)); +    Elts.push_back(mapMetadataOp(Node->getOperand(I), Cycles, VM, Flags, +                                 TypeMapper, Materializer));    return MDTuple::get(Node->getContext(), Elts);  } -static Metadata *cloneMDLocation(const MDLocation *Node, ValueToValueMapTy &VM, -                                 RemapFlags Flags, +static Metadata *cloneMDLocation(const MDLocation *Node, +                                 SmallVectorImpl<UniquableMDNode *> &Cycles, +                                 ValueToValueMapTy &VM, RemapFlags Flags,                                   ValueMapTypeRemapper *TypeMapper,                                   ValueMaterializer *Materializer,                                   bool IsDistinct) {    return (IsDistinct ? MDLocation::getDistinct : MDLocation::get)(        Node->getContext(), Node->getLine(), Node->getColumn(), -      mapMetadataOp(Node->getScope(), VM, Flags, TypeMapper, Materializer), -      mapMetadataOp(Node->getInlinedAt(), VM, Flags, TypeMapper, Materializer)); +      mapMetadataOp(Node->getScope(), Cycles, VM, Flags, TypeMapper, +                    Materializer), +      mapMetadataOp(Node->getInlinedAt(), Cycles, VM, Flags, TypeMapper, +                    Materializer));  } -static Metadata *cloneMDNode(const UniquableMDNode *Node, ValueToValueMapTy &VM, -                             RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, +static Metadata *cloneMDNode(const UniquableMDNode *Node, +                             SmallVectorImpl<UniquableMDNode *> &Cycles, +                             ValueToValueMapTy &VM, RemapFlags Flags, +                             ValueMapTypeRemapper *TypeMapper,                               ValueMaterializer *Materializer, bool IsDistinct) {    switch (Node->getMetadataID()) {    default:      llvm_unreachable("Invalid UniquableMDNode subclass");  #define HANDLE_UNIQUABLE_LEAF(CLASS)                                           \    case Metadata::CLASS##Kind:                                                  \ -    return clone##CLASS(cast<CLASS>(Node), VM, Flags, TypeMapper,              \ +    return clone##CLASS(cast<CLASS>(Node), Cycles, VM, Flags, TypeMapper,      \                          Materializer, IsDistinct);  #include "llvm/IR/Metadata.def"    }  } +static void +trackCyclesUnderDistinct(const UniquableMDNode *Node, +                         SmallVectorImpl<UniquableMDNode *> &Cycles) { +  // Track any cycles beneath this node. +  for (Metadata *Op : Node->operands()) +    if (auto *N = dyn_cast_or_null<UniquableMDNode>(Op)) +      if (!N->isResolved()) +        Cycles.push_back(N); +} +  /// \brief Map a distinct MDNode.  ///  /// Distinct nodes are not uniqued, so they must always recreated.  static Metadata *mapDistinctNode(const UniquableMDNode *Node, +                                 SmallVectorImpl<UniquableMDNode *> &Cycles,                                   ValueToValueMapTy &VM, RemapFlags Flags,                                   ValueMapTypeRemapper *TypeMapper,                                   ValueMaterializer *Materializer) { @@ -241,9 +260,11 @@ static Metadata *mapDistinctNode(const UniquableMDNode *Node,      // Fix the operands.      for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) -      NewMD->replaceOperandWith(I, mapMetadataOp(Node->getOperand(I), VM, Flags, -                                                 TypeMapper, Materializer)); +      NewMD->replaceOperandWith(I, +                                mapMetadataOp(Node->getOperand(I), Cycles, VM, +                                              Flags, TypeMapper, Materializer)); +    trackCyclesUnderDistinct(NewMD, Cycles);      return NewMD;    } @@ -252,9 +273,11 @@ static Metadata *mapDistinctNode(const UniquableMDNode *Node,    std::unique_ptr<MDNodeFwdDecl> Dummy(        MDNode::getTemporary(Node->getContext(), None));    mapToMetadata(VM, Node, Dummy.get()); -  Metadata *NewMD = cloneMDNode(Node, VM, Flags, TypeMapper, Materializer, -                                /* IsDistinct */ true); +  auto *NewMD = cast<UniquableMDNode>(cloneMDNode(Node, Cycles, VM, Flags, +                                                  TypeMapper, Materializer, +                                                  /* IsDistinct */ true));    Dummy->replaceAllUsesWith(NewMD); +  trackCyclesUnderDistinct(NewMD, Cycles);    return mapToMetadata(VM, Node, NewMD);  } @@ -263,13 +286,14 @@ static Metadata *mapDistinctNode(const UniquableMDNode *Node,  /// Check whether a uniqued node needs to be remapped (due to any operands  /// changing).  static bool shouldRemapUniquedNode(const UniquableMDNode *Node, +                                   SmallVectorImpl<UniquableMDNode *> &Cycles,                                     ValueToValueMapTy &VM, RemapFlags Flags,                                     ValueMapTypeRemapper *TypeMapper,                                     ValueMaterializer *Materializer) {    // Check all operands to see if any need to be remapped.    for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {      Metadata *Op = Node->getOperand(I); -    if (Op != mapMetadataOp(Op, VM, Flags, TypeMapper, Materializer)) +    if (Op != mapMetadataOp(Op, Cycles, VM, Flags, TypeMapper, Materializer))        return true;    }    return false; @@ -279,9 +303,10 @@ static bool shouldRemapUniquedNode(const UniquableMDNode *Node,  ///  /// Uniqued nodes may not need to be recreated (they may map to themselves).  static Metadata *mapUniquedNode(const UniquableMDNode *Node, -                                 ValueToValueMapTy &VM, RemapFlags Flags, -                                 ValueMapTypeRemapper *TypeMapper, -                                 ValueMaterializer *Materializer) { +                                SmallVectorImpl<UniquableMDNode *> &Cycles, +                                ValueToValueMapTy &VM, RemapFlags Flags, +                                ValueMapTypeRemapper *TypeMapper, +                                ValueMaterializer *Materializer) {    assert(!Node->isDistinct() && "Expected uniqued node");    // Create a dummy node in case we have a metadata cycle. @@ -289,7 +314,8 @@ static Metadata *mapUniquedNode(const UniquableMDNode *Node,    mapToMetadata(VM, Node, Dummy);    // Check all operands to see if any need to be remapped. -  if (!shouldRemapUniquedNode(Node, VM, Flags, TypeMapper, Materializer)) { +  if (!shouldRemapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, +                              Materializer)) {      // Use an identity mapping.      mapToSelf(VM, Node);      MDNode::deleteTemporary(Dummy); @@ -297,15 +323,17 @@ static Metadata *mapUniquedNode(const UniquableMDNode *Node,    }    // At least one operand needs remapping. -  Metadata *NewMD = cloneMDNode(Node, VM, Flags, TypeMapper, Materializer, -                                /* IsDistinct */ false); +  Metadata *NewMD = +      cloneMDNode(Node, Cycles, VM, Flags, TypeMapper, Materializer, +                  /* IsDistinct */ false);    Dummy->replaceAllUsesWith(NewMD);    MDNode::deleteTemporary(Dummy);    return mapToMetadata(VM, Node, NewMD);  } -static Metadata *MapMetadataImpl(const Metadata *MD, ValueToValueMapTy &VM, -                                 RemapFlags Flags, +static Metadata *MapMetadataImpl(const Metadata *MD, +                                 SmallVectorImpl<UniquableMDNode *> &Cycles, +                                 ValueToValueMapTy &VM, RemapFlags Flags,                                   ValueMapTypeRemapper *TypeMapper,                                   ValueMaterializer *Materializer) {    // If the value already exists in the map, use it. @@ -345,18 +373,30 @@ static Metadata *MapMetadataImpl(const Metadata *MD, ValueToValueMapTy &VM,      return mapToSelf(VM, MD);    if (Node->isDistinct()) -    return mapDistinctNode(Node, VM, Flags, TypeMapper, Materializer); +    return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); -  return mapUniquedNode(Node, VM, Flags, TypeMapper, Materializer); +  return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);  }  Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,                              RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,                              ValueMaterializer *Materializer) { -  Metadata *NewMD = MapMetadataImpl(MD, VM, Flags, TypeMapper, Materializer); -  if (NewMD && NewMD != MD) +  SmallVector<UniquableMDNode *, 8> Cycles; +  Metadata *NewMD = +      MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer); + +  // Resolve cycles underneath MD. +  if (NewMD && NewMD != MD) {      if (auto *N = dyn_cast<UniquableMDNode>(NewMD))        N->resolveCycles(); + +    for (UniquableMDNode *N : Cycles) +      N->resolveCycles(); +  } else { +    // Shouldn't get unresolved cycles if nothing was remapped. +    assert(Cycles.empty() && "Expected no unresolved cycles"); +  } +    return NewMD;  } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4834782ecc14..bd8a4b3fd3d0 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -75,6 +75,18 @@ static const unsigned MinVecRegSize = 128;  static const unsigned RecursionMaxDepth = 12; +/// \brief Predicate for the element types that the SLP vectorizer supports. +/// +/// The most important thing to filter here are types which are invalid in LLVM +/// vectors. We also filter target specific types which have absolutely no +/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just +/// avoids spending time checking the cost model and realizing that they will +/// be inevitably scalarized. +static bool isValidElementType(Type *Ty) { +  return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() && +         !Ty->isPPC_FP128Ty(); +} +  /// \returns the parent basic block if all of the instructions in \p VL  /// are in the same block or null otherwise.  static BasicBlock *getSameBlock(ArrayRef<Value *> VL) { @@ -208,6 +220,8 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {          MD = MDNode::getMostGenericTBAA(MD, IMD);          break;        case LLVMContext::MD_alias_scope: +        MD = MDNode::getMostGenericAliasScope(MD, IMD); +        break;        case LLVMContext::MD_noalias:          MD = MDNode::intersect(MD, IMD);          break; @@ -1214,7 +1228,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {        Type *SrcTy = VL0->getOperand(0)->getType();        for (unsigned i = 0; i < VL.size(); ++i) {          Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType(); -        if (Ty != SrcTy || Ty->isAggregateType() || Ty->isVectorTy()) { +        if (Ty != SrcTy || !isValidElementType(Ty)) {            BS.cancelScheduling(VL);            newTreeEntry(VL, false);            DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); @@ -3128,7 +3142,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {      // Check that the pointer points to scalars.      Type *Ty = SI->getValueOperand()->getType(); -    if (Ty->isAggregateType() || Ty->isVectorTy()) +    if (!isValidElementType(Ty))        continue;      // Find the base pointer. @@ -3169,7 +3183,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,    for (int i = 0, e = VL.size(); i < e; ++i) {      Type *Ty = VL[i]->getType(); -    if (Ty->isAggregateType() || Ty->isVectorTy()) +    if (!isValidElementType(Ty))        return false;      Instruction *Inst = dyn_cast<Instruction>(VL[i]);      if (!Inst || Inst->getOpcode() != Opcode0) @@ -3389,7 +3403,7 @@ public:        return false;      Type *Ty = B->getType(); -    if (Ty->isVectorTy()) +    if (!isValidElementType(Ty))        return false;      ReductionOpcode = B->getOpcode(); diff --git a/test/Bindings/llvm-c/Inputs/invalid.ll.bc b/test/Bindings/llvm-c/Inputs/invalid.ll.bc Binary files differnew file mode 100644 index 000000000000..a85c3644b3ab --- /dev/null +++ b/test/Bindings/llvm-c/Inputs/invalid.ll.bc diff --git a/test/Bindings/llvm-c/invalid-bitcode.test b/test/Bindings/llvm-c/invalid-bitcode.test new file mode 100644 index 000000000000..6318a9bf13d9 --- /dev/null +++ b/test/Bindings/llvm-c/invalid-bitcode.test @@ -0,0 +1,3 @@ +; RUN: not llvm-c-test --module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s + +CHECK: Error parsing bitcode: Unknown attribute kind (48) diff --git a/test/CodeGen/AArch64/setcc-type-mismatch.ll b/test/CodeGen/AArch64/setcc-type-mismatch.ll new file mode 100644 index 000000000000..86817fa4fa40 --- /dev/null +++ b/test/CodeGen/AArch64/setcc-type-mismatch.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s + +define void @test_mismatched_setcc(<4 x i22> %l, <4 x i22> %r, <4 x i1>* %addr) { +; CHECK-LABEL: test_mismatched_setcc: +; CHECK: cmeq [[CMP128:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: xtn {{v[0-9]+}}.4h, [[CMP128]].4s + +  %tst = icmp eq <4 x i22> %l, %r +  store <4 x i1> %tst, <4 x i1>* %addr +  ret void +} diff --git a/test/CodeGen/ARM/Windows/read-only-data.ll b/test/CodeGen/ARM/Windows/read-only-data.ll index 0ccb5ededff2..0438d68b55c6 100644 --- a/test/CodeGen/ARM/Windows/read-only-data.ll +++ b/test/CodeGen/ARM/Windows/read-only-data.ll @@ -10,6 +10,6 @@ entry:    ret void  } -; CHECK: .section .rdata,"rd" +; CHECK: .section .rdata,"dr"  ; CHECK-NOT: .section ".rodata.str1.1" diff --git a/test/CodeGen/ARM/Windows/structors.ll b/test/CodeGen/ARM/Windows/structors.ll index a1a90265c03a..874b5bf35b81 100644 --- a/test/CodeGen/ARM/Windows/structors.ll +++ b/test/CodeGen/ARM/Windows/structors.ll @@ -7,6 +7,6 @@ entry:    ret void  } -; CHECK: .section .CRT$XCU,"rd" +; CHECK: .section .CRT$XCU,"dr"  ; CHECK: .long function diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll index 5ad87191efe9..24c28baff881 100644 --- a/test/CodeGen/ARM/alloc-no-stack-realign.ll +++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll @@ -9,8 +9,8 @@  define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {  entry:  ; NO-REALIGN-LABEL: test1 -; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]] -; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128] +; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16  ; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]  ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32  ; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] @@ -21,14 +21,16 @@ entry:  ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]  ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32  ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! +; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16 +; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]  ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]  ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48  ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]  ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32  ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]! +; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16 +; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]  ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]   %retval = alloca <16 x float>, align 16   %0 = load <16 x float>* @T3_retval, align 16 @@ -42,8 +44,8 @@ define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {  entry:  ; REALIGN-LABEL: test2  ; REALIGN: bfc sp, #0, #6 -; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]] -; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128] +; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16  ; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]  ; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32  ; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] @@ -63,7 +65,8 @@ entry:  ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]  ; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32  ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]! +; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #16 +; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]  ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]   %retval = alloca <16 x float>, align 16   %0 = load <16 x float>* @T3_retval, align 16 diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index 33ac4e125633..84ce4a7f0e79 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -46,8 +46,10 @@ entry:  ; CHECK: movw [[REG2:r[0-9]+]], #16716  ; CHECK: movt [[REG2:r[0-9]+]], #72  ; CHECK: str [[REG2]], [r0, #32] -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK: adds r0, #16 +; CHECK: adds r1, #16  ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]  ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) @@ -57,8 +59,10 @@ entry:  define void @t3(i8* nocapture %C) nounwind {  entry:  ; CHECK-LABEL: t3: -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK: adds r0, #16 +; CHECK: adds r1, #16  ; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]  ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) @@ -69,8 +73,7 @@ define void @t4(i8* nocapture %C) nounwind {  entry:  ; CHECK-LABEL: t4:  ; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1] -; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]! -; CHECK: strh [[REG5:r[0-9]+]], [r0] +; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)    ret void  } diff --git a/test/CodeGen/ARM/setcc-type-mismatch.ll b/test/CodeGen/ARM/setcc-type-mismatch.ll new file mode 100644 index 000000000000..2cfdba12db54 --- /dev/null +++ b/test/CodeGen/ARM/setcc-type-mismatch.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s + +define void @test_mismatched_setcc(<4 x i22> %l, <4 x i22> %r, <4 x i1>* %addr) { +; CHECK-LABEL: test_mismatched_setcc: +; CHECK: vceq.i32 [[CMP128:q[0-9]+]], {{q[0-9]+}}, {{q[0-9]+}} +; CHECK: vmovn.i32 {{d[0-9]+}}, [[CMP128]] + +  %tst = icmp eq <4 x i22> %l, %r +  store <4 x i1> %tst, <4 x i1>* %addr +  ret void +} diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll index 19727dabf09e..f7328dc580ef 100644 --- a/test/CodeGen/ARM/sub-cmp-peephole.ll +++ b/test/CodeGen/ARM/sub-cmp-peephole.ll @@ -88,6 +88,19 @@ if.end11:                                         ; preds = %num2long.exit    ret i32 23  } +; When considering the producer of cmp's src as the subsuming instruction, +; only consider that when the comparison is to 0. +define i32 @cmp_src_nonzero(i32 %a, i32 %b, i32 %x, i32 %y) { +entry: +; CHECK-LABEL: cmp_src_nonzero: +; CHECK: sub +; CHECK: cmp +  %sub = sub i32 %a, %b +  %cmp = icmp eq i32 %sub, 17 +  %ret = select i1 %cmp, i32 %x, i32 %y +  ret i32 %ret +} +  define float @float_sel(i32 %a, i32 %b, float %x, float %y) {  entry:  ; CHECK-LABEL: float_sel: @@ -144,3 +157,50 @@ entry:    store i32 %sub, i32* @t    ret double %ret  } + +declare void @abort() +declare void @exit(i32) + +; If the comparison uses the V bit (signed overflow/underflow), we can't +; omit the comparison. +define i32 @cmp_slt0(i32 %a, i32 %b, i32 %x, i32 %y) { +entry: +; CHECK-LABEL: cmp_slt0 +; CHECK: sub +; CHECK: cmp +; CHECK: bge +  %load = load i32* @t, align 4 +  %sub = sub i32 %load, 17 +  %cmp = icmp slt i32 %sub, 0 +  br i1 %cmp, label %if.then, label %if.else + +if.then: +  call void @abort() +  unreachable + +if.else: +  call void @exit(i32 0) +  unreachable +} + +; Same for the C bit. (Note the ult X, 0 is trivially +; false, so the DAG combiner may or may not optimize it). +define i32 @cmp_ult0(i32 %a, i32 %b, i32 %x, i32 %y) { +entry: +; CHECK-LABEL: cmp_ult0 +; CHECK: sub +; CHECK: cmp +; CHECK: bhs +  %load = load i32* @t, align 4 +  %sub = sub i32 %load, 17 +  %cmp = icmp ult i32 %sub, 0 +  br i1 %cmp, label %if.then, label %if.else + +if.then: +  call void @abort() +  unreachable + +if.else: +  call void @exit(i32 0) +  unreachable +} diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll deleted file mode 100644 index 008bd1f6f8c8..000000000000 --- a/test/CodeGen/ARM/vector-load.ll +++ /dev/null @@ -1,253 +0,0 @@ -; RUN: llc < %s | FileCheck %s - -target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -target triple = "thumbv7s-apple-ios8.0.0" - -define <8 x i8> @load_v8i8(<8 x i8>** %ptr) { -;CHECK-LABEL: load_v8i8: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <8 x i8>** %ptr -	%lA = load <8 x i8>* %A, align 1 -	ret <8 x i8> %lA -} - -define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) { -;CHECK-LABEL: load_v8i8_update: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <8 x i8>** %ptr -	%lA = load <8 x i8>* %A, align 1 -	%inc = getelementptr <8 x i8>* %A, i38 1 -        store <8 x i8>* %inc, <8 x i8>** %ptr -	ret <8 x i8> %lA -} - -define <4 x i16> @load_v4i16(<4 x i16>** %ptr) { -;CHECK-LABEL: load_v4i16: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <4 x i16>** %ptr -	%lA = load <4 x i16>* %A, align 1 -	ret <4 x i16> %lA -} - -define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) { -;CHECK-LABEL: load_v4i16_update: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <4 x i16>** %ptr -	%lA = load <4 x i16>* %A, align 1 -	%inc = getelementptr <4 x i16>* %A, i34 1 -        store <4 x i16>* %inc, <4 x i16>** %ptr -	ret <4 x i16> %lA -} - -define <2 x i32> @load_v2i32(<2 x i32>** %ptr) { -;CHECK-LABEL: load_v2i32: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <2 x i32>** %ptr -	%lA = load <2 x i32>* %A, align 1 -	ret <2 x i32> %lA -} - -define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) { -;CHECK-LABEL: load_v2i32_update: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x i32>** %ptr -	%lA = load <2 x i32>* %A, align 1 -	%inc = getelementptr <2 x i32>* %A, i32 1 -        store <2 x i32>* %inc, <2 x i32>** %ptr -	ret <2 x i32> %lA -} - -define <2 x float> @load_v2f32(<2 x float>** %ptr) { -;CHECK-LABEL: load_v2f32: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <2 x float>** %ptr -	%lA = load <2 x float>* %A, align 1 -	ret <2 x float> %lA -} - -define <2 x float> @load_v2f32_update(<2 x float>** %ptr) { -;CHECK-LABEL: load_v2f32_update: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x float>** %ptr -	%lA = load <2 x float>* %A, align 1 -	%inc = getelementptr <2 x float>* %A, i32 1 -        store <2 x float>* %inc, <2 x float>** %ptr -	ret <2 x float> %lA -} - -define <1 x i64> @load_v1i64(<1 x i64>** %ptr) { -;CHECK-LABEL: load_v1i64: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <1 x i64>** %ptr -	%lA = load <1 x i64>* %A, align 1 -	ret <1 x i64> %lA -} - -define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) { -;CHECK-LABEL: load_v1i64_update: -;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <1 x i64>** %ptr -	%lA = load <1 x i64>* %A, align 1 -	%inc = getelementptr <1 x i64>* %A, i31 1 -        store <1 x i64>* %inc, <1 x i64>** %ptr -	ret <1 x i64> %lA -} - -define <16 x i8> @load_v16i8(<16 x i8>** %ptr) { -;CHECK-LABEL: load_v16i8: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <16 x i8>** %ptr -	%lA = load <16 x i8>* %A, align 1 -	ret <16 x i8> %lA -} - -define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) { -;CHECK-LABEL: load_v16i8_update: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <16 x i8>** %ptr -	%lA = load <16 x i8>* %A, align 1 -	%inc = getelementptr <16 x i8>* %A, i316 1 -        store <16 x i8>* %inc, <16 x i8>** %ptr -	ret <16 x i8> %lA -} - -define <8 x i16> @load_v8i16(<8 x i16>** %ptr) { -;CHECK-LABEL: load_v8i16: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <8 x i16>** %ptr -	%lA = load <8 x i16>* %A, align 1 -	ret <8 x i16> %lA -} - -define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) { -;CHECK-LABEL: load_v8i16_update: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <8 x i16>** %ptr -	%lA = load <8 x i16>* %A, align 1 -	%inc = getelementptr <8 x i16>* %A, i38 1 -        store <8 x i16>* %inc, <8 x i16>** %ptr -	ret <8 x i16> %lA -} - -define <4 x i32> @load_v4i32(<4 x i32>** %ptr) { -;CHECK-LABEL: load_v4i32: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <4 x i32>** %ptr -	%lA = load <4 x i32>* %A, align 1 -	ret <4 x i32> %lA -} - -define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) { -;CHECK-LABEL: load_v4i32_update: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <4 x i32>** %ptr -	%lA = load <4 x i32>* %A, align 1 -	%inc = getelementptr <4 x i32>* %A, i34 1 -        store <4 x i32>* %inc, <4 x i32>** %ptr -	ret <4 x i32> %lA -} - -define <4 x float> @load_v4f32(<4 x float>** %ptr) { -;CHECK-LABEL: load_v4f32: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <4 x float>** %ptr -	%lA = load <4 x float>* %A, align 1 -	ret <4 x float> %lA -} - -define <4 x float> @load_v4f32_update(<4 x float>** %ptr) { -;CHECK-LABEL: load_v4f32_update: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <4 x float>** %ptr -	%lA = load <4 x float>* %A, align 1 -	%inc = getelementptr <4 x float>* %A, i34 1 -        store <4 x float>* %inc, <4 x float>** %ptr -	ret <4 x float> %lA -} - -define <2 x i64> @load_v2i64(<2 x i64>** %ptr) { -;CHECK-LABEL: load_v2i64: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <2 x i64>** %ptr -	%lA = load <2 x i64>* %A, align 1 -	ret <2 x i64> %lA -} - -define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) { -;CHECK-LABEL: load_v2i64_update: -;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x i64>** %ptr -	%lA = load <2 x i64>* %A, align 1 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret <2 x i64> %lA -} - -; Make sure we change the type to match alignment if necessary. -define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) { -;CHECK-LABEL: load_v2i64_update_aligned2: -;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x i64>** %ptr -	%lA = load <2 x i64>* %A, align 2 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret <2 x i64> %lA -} - -define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) { -;CHECK-LABEL: load_v2i64_update_aligned4: -;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x i64>** %ptr -	%lA = load <2 x i64>* %A, align 4 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret <2 x i64> %lA -} - -define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) { -;CHECK-LABEL: load_v2i64_update_aligned8: -;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:64]! -	%A = load <2 x i64>** %ptr -	%lA = load <2 x i64>* %A, align 8 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret <2 x i64> %lA -} - -define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) { -;CHECK-LABEL: load_v2i64_update_aligned16: -;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]! -	%A = load <2 x i64>** %ptr -	%lA = load <2 x i64>* %A, align 16 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret <2 x i64> %lA -} - -; Make sure we don't break smaller-than-dreg extloads. -define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) { -;CHECK-LABEL: zextload_v8i8tov8i32: -;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32] -;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}} -;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}} -	%A = load <4 x i8>** %ptr -	%lA = load <4 x i8>* %A, align 4 -        %zlA = zext <4 x i8> %lA to <4 x i32> -	ret <4 x i32> %zlA -} - -define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) { -;CHECK-LABEL: zextload_v8i8tov8i32_fake_update: -;CHECK: ldr.w   r[[PTRREG:[0-9]+]], [r0] -;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32] -;CHECK: add.w   r[[INCREG:[0-9]+]], r[[PTRREG]], #16 -;CHECK: str.w   r[[INCREG]], [r0] -;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}} -;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}} -	%A = load <4 x i8>** %ptr -	%lA = load <4 x i8>* %A, align 4 -	%inc = getelementptr <4 x i8>* %A, i38 4 -        store <4 x i8>* %inc, <4 x i8>** %ptr -        %zlA = zext <4 x i8> %lA to <4 x i32> -	ret <4 x i32> %zlA -} diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll deleted file mode 100644 index 9036a31d141d..000000000000 --- a/test/CodeGen/ARM/vector-store.ll +++ /dev/null @@ -1,258 +0,0 @@ -; RUN: llc < %s | FileCheck %s - -target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -target triple = "thumbv7s-apple-ios8.0.0" - -define void @store_v8i8(<8 x i8>** %ptr, <8 x i8> %val) { -;CHECK-LABEL: store_v8i8: -;CHECK: str r1, [r0] -	%A = load <8 x i8>** %ptr -	store  <8 x i8> %val, <8 x i8>* %A, align 1 -	ret void -} - -define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) { -;CHECK-LABEL: store_v8i8_update: -;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <8 x i8>** %ptr -	store  <8 x i8> %val, <8 x i8>* %A, align 1 -	%inc = getelementptr <8 x i8>* %A, i38 1 -        store <8 x i8>* %inc, <8 x i8>** %ptr -	ret void -} - -define void @store_v4i16(<4 x i16>** %ptr, <4 x i16> %val) { -;CHECK-LABEL: store_v4i16: -;CHECK: str r1, [r0] -	%A = load <4 x i16>** %ptr -	store  <4 x i16> %val, <4 x i16>* %A, align 1 -	ret void -} - -define void @store_v4i16_update(<4 x i16>** %ptr, <4 x i16> %val) { -;CHECK-LABEL: store_v4i16_update: -;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <4 x i16>** %ptr -	store  <4 x i16> %val, <4 x i16>* %A, align 1 -	%inc = getelementptr <4 x i16>* %A, i34 1 -        store <4 x i16>* %inc, <4 x i16>** %ptr -	ret void -} - -define void @store_v2i32(<2 x i32>** %ptr, <2 x i32> %val) { -;CHECK-LABEL: store_v2i32: -;CHECK: str r1, [r0] -	%A = load <2 x i32>** %ptr -	store  <2 x i32> %val, <2 x i32>* %A, align 1 -	ret void -} - -define void @store_v2i32_update(<2 x i32>** %ptr, <2 x i32> %val) { -;CHECK-LABEL: store_v2i32_update: -;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x i32>** %ptr -	store  <2 x i32> %val, <2 x i32>* %A, align 1 -	%inc = getelementptr <2 x i32>* %A, i32 1 -        store <2 x i32>* %inc, <2 x i32>** %ptr -	ret void -} - -define void @store_v2f32(<2 x float>** %ptr, <2 x float> %val) { -;CHECK-LABEL: store_v2f32: -;CHECK: str r1, [r0] -	%A = load <2 x float>** %ptr -	store  <2 x float> %val, <2 x float>* %A, align 1 -	ret void -} - -define void @store_v2f32_update(<2 x float>** %ptr, <2 x float> %val) { -;CHECK-LABEL: store_v2f32_update: -;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x float>** %ptr -	store  <2 x float> %val, <2 x float>* %A, align 1 -	%inc = getelementptr <2 x float>* %A, i32 1 -        store <2 x float>* %inc, <2 x float>** %ptr -	ret void -} - -define void @store_v1i64(<1 x i64>** %ptr, <1 x i64> %val) { -;CHECK-LABEL: store_v1i64: -;CHECK: str r1, [r0] -	%A = load <1 x i64>** %ptr -	store  <1 x i64> %val, <1 x i64>* %A, align 1 -	ret void -} - -define void @store_v1i64_update(<1 x i64>** %ptr, <1 x i64> %val) { -;CHECK-LABEL: store_v1i64_update: -;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <1 x i64>** %ptr -	store  <1 x i64> %val, <1 x i64>* %A, align 1 -	%inc = getelementptr <1 x i64>* %A, i31 1 -        store <1 x i64>* %inc, <1 x i64>** %ptr -	ret void -} - -define void @store_v16i8(<16 x i8>** %ptr, <16 x i8> %val) { -;CHECK-LABEL: store_v16i8: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <16 x i8>** %ptr -	store  <16 x i8> %val, <16 x i8>* %A, align 1 -	ret void -} - -define void @store_v16i8_update(<16 x i8>** %ptr, <16 x i8> %val) { -;CHECK-LABEL: store_v16i8_update: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <16 x i8>** %ptr -	store  <16 x i8> %val, <16 x i8>* %A, align 1 -	%inc = getelementptr <16 x i8>* %A, i316 1 -        store <16 x i8>* %inc, <16 x i8>** %ptr -	ret void -} - -define void @store_v8i16(<8 x i16>** %ptr, <8 x i16> %val) { -;CHECK-LABEL: store_v8i16: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <8 x i16>** %ptr -	store  <8 x i16> %val, <8 x i16>* %A, align 1 -	ret void -} - -define void @store_v8i16_update(<8 x i16>** %ptr, <8 x i16> %val) { -;CHECK-LABEL: store_v8i16_update: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <8 x i16>** %ptr -	store  <8 x i16> %val, <8 x i16>* %A, align 1 -	%inc = getelementptr <8 x i16>* %A, i38 1 -        store <8 x i16>* %inc, <8 x i16>** %ptr -	ret void -} - -define void @store_v4i32(<4 x i32>** %ptr, <4 x i32> %val) { -;CHECK-LABEL: store_v4i32: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <4 x i32>** %ptr -	store  <4 x i32> %val, <4 x i32>* %A, align 1 -	ret void -} - -define void @store_v4i32_update(<4 x i32>** %ptr, <4 x i32> %val) { -;CHECK-LABEL: store_v4i32_update: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <4 x i32>** %ptr -	store  <4 x i32> %val, <4 x i32>* %A, align 1 -	%inc = getelementptr <4 x i32>* %A, i34 1 -        store <4 x i32>* %inc, <4 x i32>** %ptr -	ret void -} - -define void @store_v4f32(<4 x float>** %ptr, <4 x float> %val) { -;CHECK-LABEL: store_v4f32: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <4 x float>** %ptr -	store  <4 x float> %val, <4 x float>* %A, align 1 -	ret void -} - -define void @store_v4f32_update(<4 x float>** %ptr, <4 x float> %val) { -;CHECK-LABEL: store_v4f32_update: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <4 x float>** %ptr -	store  <4 x float> %val, <4 x float>* %A, align 1 -	%inc = getelementptr <4 x float>* %A, i34 1 -        store <4 x float>* %inc, <4 x float>** %ptr -	ret void -} - -define void @store_v2i64(<2 x i64>** %ptr, <2 x i64> %val) { -;CHECK-LABEL: store_v2i64: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}] -	%A = load <2 x i64>** %ptr -	store  <2 x i64> %val, <2 x i64>* %A, align 1 -	ret void -} - -define void @store_v2i64_update(<2 x i64>** %ptr, <2 x i64> %val) { -;CHECK-LABEL: store_v2i64_update: -;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x i64>** %ptr -	store  <2 x i64> %val, <2 x i64>* %A, align 1 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret void -} - -define void @store_v2i64_update_aligned2(<2 x i64>** %ptr, <2 x i64> %val) { -;CHECK-LABEL: store_v2i64_update_aligned2: -;CHECK: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x i64>** %ptr -	store  <2 x i64> %val, <2 x i64>* %A, align 2 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret void -} - -define void @store_v2i64_update_aligned4(<2 x i64>** %ptr, <2 x i64> %val) { -;CHECK-LABEL: store_v2i64_update_aligned4: -;CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]! -	%A = load <2 x i64>** %ptr -	store  <2 x i64> %val, <2 x i64>* %A, align 4 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret void -} - -define void @store_v2i64_update_aligned8(<2 x i64>** %ptr, <2 x i64> %val) { -;CHECK-LABEL: store_v2i64_update_aligned8: -;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:64]! -	%A = load <2 x i64>** %ptr -	store  <2 x i64> %val, <2 x i64>* %A, align 8 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret void -} - -define void @store_v2i64_update_aligned16(<2 x i64>** %ptr, <2 x i64> %val) { -;CHECK-LABEL: store_v2i64_update_aligned16: -;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]! -	%A = load <2 x i64>** %ptr -	store  <2 x i64> %val, <2 x i64>* %A, align 16 -	%inc = getelementptr <2 x i64>* %A, i32 1 -        store <2 x i64>* %inc, <2 x i64>** %ptr -	ret void -} - -define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) { -;CHECK-LABEL: truncstore_v4i32tov4i8: -;CHECK: ldr.w   r9, [sp] -;CHECK: vmov    {{d[0-9]+}}, r3, r9 -;CHECK: vmov    {{d[0-9]+}}, r1, r2 -;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}} -;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}} -;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0] -;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32] -	%A = load <4 x i8>** %ptr -        %trunc = trunc <4 x i32> %val to <4 x i8> -	store  <4 x i8> %trunc, <4 x i8>* %A, align 4 -	ret void -} - -define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val) { -;CHECK-LABEL: truncstore_v4i32tov4i8_fake_update: -;CHECK: ldr.w   r9, [sp] -;CHECK: vmov    {{d[0-9]+}}, r3, r9 -;CHECK: vmov    {{d[0-9]+}}, r1, r2 -;CHECK: movs    [[IMM16:r[0-9]+]], #16 -;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}} -;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}} -;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0] -;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]] -;CHECK: str     r[[PTRREG]], [r0] -	%A = load <4 x i8>** %ptr -        %trunc = trunc <4 x i32> %val to <4 x i8> -	store  <4 x i8> %trunc, <4 x i8>* %A, align 4 -	%inc = getelementptr <4 x i8>* %A, i38 4 -        store <4 x i8>* %inc, <4 x i8>** %ptr -	ret void -} diff --git a/test/CodeGen/PowerPC/vsel-prom.ll b/test/CodeGen/PowerPC/vsel-prom.ll new file mode 100644 index 000000000000..dd219ec0da6f --- /dev/null +++ b/test/CodeGen/PowerPC/vsel-prom.ll @@ -0,0 +1,23 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @Compute_Lateral() #0 { +entry: +  br i1 undef, label %if.then, label %if.end + +if.then:                                          ; preds = %entry +  unreachable + +if.end:                                           ; preds = %entry +  %0 = select i1 undef, <2 x double> undef, <2 x double> zeroinitializer +  %1 = extractelement <2 x double> %0, i32 1 +  store double %1, double* undef, align 8 +  ret void + +; CHECK-LABEL: @Compute_Lateral +} + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/R600/endcf-loop-header.ll b/test/CodeGen/R600/endcf-loop-header.ll new file mode 100644 index 000000000000..e3c5b3c1c364 --- /dev/null +++ b/test/CodeGen/R600/endcf-loop-header.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s + +; This tests that the llvm.SI.end.cf intrinsic is not inserted into the +; loop block.  This intrinsic will be lowered to s_or_b64 by the code +; generator. + +; CHECK-LABEL: {{^}}test: + +; This is was lowered from the llvm.SI.end.cf intrinsic: +; CHECK: s_or_b64 exec, exec + +; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}} +; CHECK-NOT: s_or_b64 exec, exec +; CHECK: s_cbranch_execnz [[LOOP_LABEL]] +define void @test(i32 addrspace(1)* %out, i32 %cond) { +entry: +  %tmp0 = icmp eq i32 %cond, 0 +  br i1 %tmp0, label %if, label %loop + +if: +  store i32 0, i32 addrspace(1)* %out +  br label %loop + +loop: +  %tmp1 = phi i32 [0, %entry], [0, %if], [%inc, %loop] +  %inc = add i32 %tmp1, %cond +  %tmp2 = icmp ugt i32 %inc, 10 +  br i1 %tmp2, label %done, label %loop + +done: +  %tmp3 = getelementptr i32 addrspace(1)* %out, i64 1 +  store i32 %inc, i32 addrspace(1)* %tmp3 +  ret void +} diff --git a/test/CodeGen/R600/tti-unroll-prefs.ll b/test/CodeGen/R600/tti-unroll-prefs.ll new file mode 100644 index 000000000000..0009c42f79bc --- /dev/null +++ b/test/CodeGen/R600/tti-unroll-prefs.ll @@ -0,0 +1,58 @@ +; RUN: opt -loop-unroll -S -mtriple=amdgcn-- -mcpu=SI %s | FileCheck %s + +; This IR comes from this OpenCL C code: +; +; if (b + 4 > a) { +;   for (int i = 0; i < 4; i++, b++) { +;     if (b + 1 <= a) +;       *(dst + c + b) = 0; +;     else +;       break; +;   } +; } +; +; This test is meant to check that this loop isn't unrolled into more than +; four iterations.  The loop unrolling preferences we currently use cause this +; loop to not be unrolled at all, but that may change in the future. + +; CHECK-LABEL: @test +; CHECK: store i8 0, i8 addrspace(1)* +; CHECK-NOT: store i8 0, i8 addrspace(1)* +; CHECK: ret void +define void @test(i8 addrspace(1)* nocapture %dst, i32 %a, i32 %b, i32 %c) { +entry: +  %add = add nsw i32 %b, 4 +  %cmp = icmp sgt i32 %add, %a +  br i1 %cmp, label %for.cond.preheader, label %if.end7 + +for.cond.preheader:                               ; preds = %entry +  %cmp313 = icmp slt i32 %b, %a +  br i1 %cmp313, label %if.then4.lr.ph, label %if.end7.loopexit + +if.then4.lr.ph:                                   ; preds = %for.cond.preheader +  %0 = sext i32 %c to i64 +  br label %if.then4 + +if.then4:                                         ; preds = %if.then4.lr.ph, %if.then4 +  %i.015 = phi i32 [ 0, %if.then4.lr.ph ], [ %inc, %if.then4 ] +  %b.addr.014 = phi i32 [ %b, %if.then4.lr.ph ], [ %add2, %if.then4 ] +  %add2 = add nsw i32 %b.addr.014, 1 +  %1 = sext i32 %b.addr.014 to i64 +  %add.ptr.sum = add nsw i64 %1, %0 +  %add.ptr5 = getelementptr inbounds i8 addrspace(1)* %dst, i64 %add.ptr.sum +  store i8 0, i8 addrspace(1)* %add.ptr5, align 1 +  %inc = add nsw i32 %i.015, 1 +  %cmp1 = icmp slt i32 %inc, 4 +  %cmp3 = icmp slt i32 %add2, %a +  %or.cond = and i1 %cmp3, %cmp1 +  br i1 %or.cond, label %if.then4, label %for.cond.if.end7.loopexit_crit_edge + +for.cond.if.end7.loopexit_crit_edge:              ; preds = %if.then4 +  br label %if.end7.loopexit + +if.end7.loopexit:                                 ; preds = %for.cond.if.end7.loopexit_crit_edge, %for.cond.preheader +  br label %if.end7 + +if.end7:                                          ; preds = %if.end7.loopexit, %entry +  ret void +} diff --git a/test/CodeGen/X86/coff-comdat.ll b/test/CodeGen/X86/coff-comdat.ll index dcbbe1097d53..44e1cb236e91 100644 --- a/test/CodeGen/X86/coff-comdat.ll +++ b/test/CodeGen/X86/coff-comdat.ll @@ -73,20 +73,20 @@ $vftable = comdat largest  ; CHECK: .globl  @v8@0  ; CHECK: .section        .text,"xr",discard,@f8@0  ; CHECK: .globl  @f8@0 -; CHECK: .section        .bss,"wb",associative,_f1 +; CHECK: .section        .bss,"bw",associative,_f1  ; CHECK: .globl  _v1 -; CHECK: .section        .bss,"wb",associative,_f2 +; CHECK: .section        .bss,"bw",associative,_f2  ; CHECK: .globl  _v2 -; CHECK: .section        .bss,"wb",associative,_f3 +; CHECK: .section        .bss,"bw",associative,_f3  ; CHECK: .globl  _v3 -; CHECK: .section        .bss,"wb",associative,_f4 +; CHECK: .section        .bss,"bw",associative,_f4  ; CHECK: .globl  _v4 -; CHECK: .section        .bss,"wb",associative,_f5 +; CHECK: .section        .bss,"bw",associative,_f5  ; CHECK: .globl  _v5 -; CHECK: .section        .bss,"wb",associative,_f6 +; CHECK: .section        .bss,"bw",associative,_f6  ; CHECK: .globl  _v6 -; CHECK: .section        .bss,"wb",same_size,_f6 +; CHECK: .section        .bss,"bw",same_size,_f6  ; CHECK: .globl  _f6 -; CHECK: .section        .rdata,"rd",largest,_vftable +; CHECK: .section        .rdata,"dr",largest,_vftable  ; CHECK: .globl  _vftable  ; CHECK: _vftable = L_some_name+4 diff --git a/test/CodeGen/X86/constant-combines.ll b/test/CodeGen/X86/constant-combines.ll new file mode 100644 index 000000000000..d2a6ef4f5d25 --- /dev/null +++ b/test/CodeGen/X86/constant-combines.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +define void @PR22524({ float, float }* %arg) { +; Check that we can materialize the zero constants we store in two places here, +; and at least form a legal store of the floating point value at the end. +; The DAG combiner at one point contained bugs that given enough permutations +; would incorrectly form an illegal operation for the last of these stores when +; it folded it to a zero too late to legalize the zero store operation. If this +; ever starts forming a zero store instead of movss, the test case has stopped +; being useful. +;  +; CHECK-LABEL: PR22524: +entry: +  %0 = getelementptr inbounds { float, float }* %arg,  i32 0, i32 1 +  store float 0.000000e+00, float* %0, align 4 +; CHECK: movl $0, 4(%rdi) + +  %1 = getelementptr inbounds { float, float }* %arg, i64 0,  i32 0 +  %2 = bitcast float* %1 to i64* +  %3 = load i64* %2, align 8 +  %4 = trunc i64 %3 to i32 +  %5 = lshr i64 %3, 32 +  %6 = trunc i64 %5 to i32 +  %7 = bitcast i32 %6 to float +  %8 = fmul float %7, 0.000000e+00 +  %9 = bitcast float* %1 to i32* +  store i32 %6, i32* %9, align 4 +; CHECK: movl $0, (%rdi) +  store float %8, float* %0, align 4 +; CHECK: movss %{{.*}}, 4(%rdi) +  ret void +} diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll index c673f5d485f9..cf4557d12716 100644 --- a/test/CodeGen/X86/dllexport-x86_64.ll +++ b/test/CodeGen/X86/dllexport-x86_64.ll @@ -40,18 +40,18 @@ define weak_odr dllexport void @weak1() {  ; CHECK: .globl Var1  @Var1 = dllexport global i32 1, align 4 -; CHECK: .rdata,"rd" +; CHECK: .rdata,"dr"  ; CHECK: .globl Var2  @Var2 = dllexport unnamed_addr constant i32 1  ; CHECK: .comm Var3  @Var3 = common dllexport global i32 0, align 4 -; CHECK: .section .data,"wd",discard,WeakVar1 +; CHECK: .section .data,"dw",discard,WeakVar1  ; CHECK: .globl WeakVar1  @WeakVar1 = weak_odr dllexport global i32 1, align 4 -; CHECK: .section .rdata,"rd",discard,WeakVar2 +; CHECK: .section .rdata,"dr",discard,WeakVar2  ; CHECK: .globl WeakVar2  @WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1 diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll index 5035aa153301..145b48aaf635 100644 --- a/test/CodeGen/X86/dllexport.ll +++ b/test/CodeGen/X86/dllexport.ll @@ -21,6 +21,8 @@ define dllexport void @f2() unnamed_addr {  	ret void  } +declare dllexport void @not_defined() +  ; CHECK: .globl _stdfun@0  define dllexport x86_stdcallcc void @stdfun() nounwind {  	ret void @@ -59,18 +61,18 @@ define weak_odr dllexport void @weak1() {  ; CHECK: .globl _Var1  @Var1 = dllexport global i32 1, align 4 -; CHECK: .rdata,"rd" +; CHECK: .rdata,"dr"  ; CHECK: .globl _Var2  @Var2 = dllexport unnamed_addr constant i32 1  ; CHECK: .comm _Var3  @Var3 = common dllexport global i32 0, align 4 -; CHECK: .section .data,"wd",discard,_WeakVar1 +; CHECK: .section .data,"dw",discard,_WeakVar1  ; CHECK: .globl _WeakVar1  @WeakVar1 = weak_odr dllexport global i32 1, align 4 -; CHECK: .section .rdata,"rd",discard,_WeakVar2 +; CHECK: .section .rdata,"dr",discard,_WeakVar2  ; CHECK: .globl _WeakVar2  @WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1 @@ -91,7 +93,6 @@ define weak_odr dllexport void @weak1() {  ; CHECK: _weak_alias = _f1  @weak_alias = weak_odr dllexport alias void()* @f1 -  ; CHECK: .section .drectve  ; CHECK-CL: " /EXPORT:_Var1,DATA"  ; CHECK-CL: " /EXPORT:_Var2,DATA" @@ -100,6 +101,7 @@ define weak_odr dllexport void @weak1() {  ; CHECK-CL: " /EXPORT:_WeakVar2,DATA"  ; CHECK-CL: " /EXPORT:_f1"  ; CHECK-CL: " /EXPORT:_f2" +; CHECK-CL-NOT: not_exported  ; CHECK-CL: " /EXPORT:_stdfun@0"  ; CHECK-CL: " /EXPORT:@fastfun@0"  ; CHECK-CL: " /EXPORT:_thisfun" @@ -117,6 +119,7 @@ define weak_odr dllexport void @weak1() {  ; CHECK-GCC: " -export:WeakVar2,data"  ; CHECK-GCC: " -export:f1"  ; CHECK-GCC: " -export:f2" +; CHECK-CL-NOT: not_exported  ; CHECK-GCC: " -export:stdfun@0"  ; CHECK-GCC: " -export:@fastfun@0"  ; CHECK-GCC: " -export:thisfun" diff --git a/test/CodeGen/X86/fold-vex.ll b/test/CodeGen/X86/fold-vex.ll index 2bb5b441c7c0..5a8b1d8cbfdf 100644 --- a/test/CodeGen/X86/fold-vex.ll +++ b/test/CodeGen/X86/fold-vex.ll @@ -1,16 +1,31 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s +; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition. -;CHECK: @test -; No need to load from memory. The operand will be loaded as part of th AND instr. -;CHECK-NOT: vmovaps -;CHECK: vandps -;CHECK: ret +; RUN: llc < %s -mtriple=x86_64-unknown-unknown                  -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx             | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2                 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown                  -mattr=-avx | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=-avx | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2     -mattr=-avx | FileCheck %s --check-prefix=SSE -define void @test1(<8 x i32>* %p0, <8 x i32> %in1) nounwind { -entry: -  %in0 = load <8 x i32>* %p0, align 2 -  %a = and <8 x i32> %in0, %in1 -  store <8 x i32> %a, <8 x i32>* undef -  ret void +; No need to load unaligned operand from memory using an explicit instruction with AVX. +; The operand should be folded into the AND instr. + +; With SSE, folding memory operands into math/logic ops requires 16-byte alignment +; unless specially configured on some CPUs such as AMD Family 10H. + +define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind { +  %in0 = load <4 x i32>* %p0, align 2 +  %a = and <4 x i32> %in0, %in1 +  ret <4 x i32> %a + +; CHECK-LABEL: @test1 +; CHECK-NOT:   vmovups +; CHECK:       vandps (%rdi), %xmm0, %xmm0 +; CHECK-NEXT:  ret + +; SSE-LABEL: @test1 +; SSE:       movups (%rdi), %xmm1 +; SSE-NEXT:  andps %xmm1, %xmm0 +; SSE-NEXT:  ret  } diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll index fa1169d8a8e3..d6e45ad79ea9 100644 --- a/test/CodeGen/X86/global-sections.ll +++ b/test/CodeGen/X86/global-sections.ll @@ -48,7 +48,7 @@ define void @F1() {  ; LINUX-SECTIONS: .section        .rodata.G3,"a",@progbits  ; LINUX-SECTIONS: .globl  G3 -; WIN32-SECTIONS: .section        .rdata,"rd",one_only,_G3 +; WIN32-SECTIONS: .section        .rdata,"dr",one_only,_G3  ; WIN32-SECTIONS: .globl  _G3 @@ -126,7 +126,7 @@ define void @F1() {  ; LINUX-SECTIONS: .section        .rodata.G7,"aMS",@progbits,1  ; LINUX-SECTIONS:       .globl G7 -; WIN32-SECTIONS: .section        .rdata,"rd",one_only,_G7 +; WIN32-SECTIONS: .section        .rdata,"dr",one_only,_G7  ; WIN32-SECTIONS:       .globl _G7 @@ -189,7 +189,7 @@ define void @F1() {  ; LINUX-SECTIONS:        .asciz  "foo"  ; LINUX-SECTIONS:        .size   .LG14, 4 -; WIN32-SECTIONS:        .section        .rdata,"rd" +; WIN32-SECTIONS:        .section        .rdata,"dr"  ; WIN32-SECTIONS: L_G14:  ; WIN32-SECTIONS:        .asciz  "foo" @@ -211,5 +211,5 @@ define void @F1() {  ; LINUX-SECTIONS: .section      .rodata.G15,"aM",@progbits,8  ; LINUX-SECTIONS: G15: -; WIN32-SECTIONS: .section      .rdata,"rd",one_only,_G15 +; WIN32-SECTIONS: .section      .rdata,"dr",one_only,_G15  ; WIN32-SECTIONS: _G15: diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll index b4dc5fd47168..90df9905fe1a 100644 --- a/test/CodeGen/X86/pr15267.ll +++ b/test/CodeGen/X86/pr15267.ll @@ -4,8 +4,7 @@ define <4 x i3> @test1(<4 x i3>* %in) nounwind {    %ret = load <4 x i3>* %in, align 1    ret <4 x i3> %ret  } - -; CHECK: test1 +; CHECK-LABEL: test1  ; CHECK: movzwl  ; CHECK: shrl $3  ; CHECK: andl $7 @@ -25,7 +24,7 @@ define <4 x i1> @test2(<4 x i1>* %in) nounwind {    ret <4 x i1> %ret  } -; CHECK: test2 +; CHECK-LABEL: test2  ; CHECK: movzbl  ; CHECK: shrl  ; CHECK: andl $1 @@ -46,7 +45,7 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {    ret <4 x i64> %sext  } -; CHECK: test3 +; CHECK-LABEL: test3  ; CHECK: movzbl  ; CHECK: movq  ; CHECK: shlq @@ -67,3 +66,71 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {  ; CHECK: vpunpcklqdq  ; CHECK: vinsertf128  ; CHECK: ret + +define <16 x i4> @test4(<16 x i4>* %in) nounwind { +  %ret = load <16 x i4>* %in, align 1 +  ret <16 x i4> %ret +} + +; CHECK-LABEL: test4 +; CHECK: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: movl +; CHECK-NEXT: andl +; CHECK-NEXT: vmovd +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: shrq +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: retq diff --git a/test/CodeGen/X86/pshufb-mask-comments.ll b/test/CodeGen/X86/pshufb-mask-comments.ll index 303c4a684761..ca5a02ce8d3a 100644 --- a/test/CodeGen/X86/pshufb-mask-comments.ll +++ b/test/CodeGen/X86/pshufb-mask-comments.ll @@ -37,4 +37,16 @@ define <16 x i8> @test4(<2 x i64>* %V) {    ret <16 x i8> %1  } +define <16 x i8> @test5() { +; CHECK-LABEL: test5 +; CHECK: pshufb {{.*}} +  store <2 x i64> <i64 1, i64 0>, <2 x i64>* undef, align 16 +  %l = load <2 x i64>* undef, align 16 +  %shuffle = shufflevector <2 x i64> %l, <2 x i64> undef, <2 x i32> zeroinitializer +  store <2 x i64> %shuffle, <2 x i64>* undef, align 16 +  %1 = load <16 x i8>* undef, align 16 +  %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> undef, <16 x i8> %1) +  ret <16 x i8> %2 +} +  declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone diff --git a/test/CodeGen/X86/seh-basic.ll b/test/CodeGen/X86/seh-basic.ll deleted file mode 100644 index 69d70d70948c..000000000000 --- a/test/CodeGen/X86/seh-basic.ll +++ /dev/null @@ -1,175 +0,0 @@ -; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s - -define void @two_invoke_merged() { -entry: -  invoke void @try_body() -          to label %again unwind label %lpad - -again: -  invoke void @try_body() -          to label %done unwind label %lpad - -done: -  ret void - -lpad: -  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) -          catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*) -          catch i8* bitcast (i32 (i8*, i8*)* @filt1 to i8*) -  %sel = extractvalue { i8*, i32 } %vals, 1 -  call void @use_selector(i32 %sel) -  ret void -} - -; Normal path code - -; CHECK-LABEL: {{^}}two_invoke_merged: -; CHECK: .seh_proc two_invoke_merged -; CHECK: .seh_handler __C_specific_handler, @unwind, @except -; CHECK: .Ltmp0: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp1: -; CHECK: .Ltmp2: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp3: -; CHECK: retq - -; Landing pad code - -; CHECK: .Ltmp5: -; CHECK: movl $1, %ecx -; CHECK: jmp -; CHECK: .Ltmp6: -; CHECK: movl $2, %ecx -; CHECK: callq use_selector - -; CHECK: .seh_handlerdata -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp3@IMGREL+1 -; CHECK-NEXT: .long filt0@IMGREL -; CHECK-NEXT: .long .Ltmp5@IMGREL -; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp3@IMGREL+1 -; CHECK-NEXT: .long filt1@IMGREL -; CHECK-NEXT: .long .Ltmp6@IMGREL -; CHECK: .text -; CHECK: .seh_endproc - -define void @two_invoke_gap() { -entry: -  invoke void @try_body() -          to label %again unwind label %lpad - -again: -  call void @do_nothing_on_unwind() -  invoke void @try_body() -          to label %done unwind label %lpad - -done: -  ret void - -lpad: -  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) -          catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*) -  %sel = extractvalue { i8*, i32 } %vals, 1 -  call void @use_selector(i32 %sel) -  ret void -} - -; Normal path code - -; CHECK-LABEL: {{^}}two_invoke_gap: -; CHECK: .seh_proc two_invoke_gap -; CHECK: .seh_handler __C_specific_handler, @unwind, @except -; CHECK: .Ltmp11: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp12: -; CHECK: callq do_nothing_on_unwind -; CHECK: .Ltmp13: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp14: -; CHECK: retq - -; Landing pad code - -; CHECK: .Ltmp16: -; CHECK: movl $1, %ecx -; CHECK: callq use_selector - -; CHECK: .seh_handlerdata -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long .Ltmp11@IMGREL -; CHECK-NEXT: .long .Ltmp12@IMGREL+1 -; CHECK-NEXT: .long filt0@IMGREL -; CHECK-NEXT: .long .Ltmp16@IMGREL -; CHECK-NEXT: .long .Ltmp13@IMGREL -; CHECK-NEXT: .long .Ltmp14@IMGREL+1 -; CHECK-NEXT: .long filt0@IMGREL -; CHECK-NEXT: .long .Ltmp16@IMGREL -; CHECK: .text -; CHECK: .seh_endproc - -define void @two_invoke_nounwind_gap() { -entry: -  invoke void @try_body() -          to label %again unwind label %lpad - -again: -  call void @cannot_unwind() -  invoke void @try_body() -          to label %done unwind label %lpad - -done: -  ret void - -lpad: -  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) -          catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*) -  %sel = extractvalue { i8*, i32 } %vals, 1 -  call void @use_selector(i32 %sel) -  ret void -} - -; Normal path code - -; CHECK-LABEL: {{^}}two_invoke_nounwind_gap: -; CHECK: .seh_proc two_invoke_nounwind_gap -; CHECK: .seh_handler __C_specific_handler, @unwind, @except -; CHECK: .Ltmp21: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp22: -; CHECK: callq cannot_unwind -; CHECK: .Ltmp23: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp24: -; CHECK: retq - -; Landing pad code - -; CHECK: .Ltmp26: -; CHECK: movl $1, %ecx -; CHECK: callq use_selector - -; CHECK: .seh_handlerdata -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp21@IMGREL -; CHECK-NEXT: .long .Ltmp24@IMGREL+1 -; CHECK-NEXT: .long filt0@IMGREL -; CHECK-NEXT: .long .Ltmp26@IMGREL -; CHECK: .text -; CHECK: .seh_endproc - -declare void @try_body() -declare void @do_nothing_on_unwind() -declare void @cannot_unwind() nounwind -declare void @use_selector(i32) - -declare i32 @filt0(i8* %eh_info, i8* %rsp) -declare i32 @filt1(i8* %eh_info, i8* %rsp) - -declare void @handler0() -declare void @handler1() - -declare i32 @__C_specific_handler(...) -declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind diff --git a/test/CodeGen/X86/seh-safe-div.ll b/test/CodeGen/X86/seh-safe-div.ll deleted file mode 100644 index e911df04ded4..000000000000 --- a/test/CodeGen/X86/seh-safe-div.ll +++ /dev/null @@ -1,196 +0,0 @@ -; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s - -; This test case is also intended to be run manually as a complete functional -; test. It should link, print something, and exit zero rather than crashing. -; It is the hypothetical lowering of a C source program that looks like: -; -;   int safe_div(int *n, int *d) { -;     int r; -;     __try { -;       __try { -;         r = *n / *d; -;       } __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION) { -;         puts("EXCEPTION_ACCESS_VIOLATION"); -;         r = -1; -;       } -;     } __except(GetExceptionCode() == EXCEPTION_INT_DIVIDE_BY_ZERO) { -;       puts("EXCEPTION_INT_DIVIDE_BY_ZERO"); -;       r = -2; -;     } -;     return r; -;   } - -@str1 = internal constant [27 x i8] c"EXCEPTION_ACCESS_VIOLATION\00" -@str2 = internal constant [29 x i8] c"EXCEPTION_INT_DIVIDE_BY_ZERO\00" - -define i32 @safe_div(i32* %n, i32* %d) { -entry: -  %r = alloca i32, align 4 -  invoke void @try_body(i32* %r, i32* %n, i32* %d) -          to label %__try.cont unwind label %lpad - -lpad: -  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) -          catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*) -          catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*) -  %ehptr = extractvalue { i8*, i32 } %vals, 0 -  %sel = extractvalue { i8*, i32 } %vals, 1 -  %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)) -  %is_filt0 = icmp eq i32 %sel, %filt0_val -  br i1 %is_filt0, label %handler0, label %eh.dispatch1 - -eh.dispatch1: -  %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)) -  %is_filt1 = icmp eq i32 %sel, %filt1_val -  br i1 %is_filt1, label %handler1, label %eh.resume - -handler0: -  call void @puts(i8* getelementptr ([27 x i8]* @str1, i32 0, i32 0)) -  store i32 -1, i32* %r, align 4 -  br label %__try.cont - -handler1: -  call void @puts(i8* getelementptr ([29 x i8]* @str2, i32 0, i32 0)) -  store i32 -2, i32* %r, align 4 -  br label %__try.cont - -eh.resume: -  resume { i8*, i32 } %vals - -__try.cont: -  %safe_ret = load i32* %r, align 4 -  ret i32 %safe_ret -} - -; Normal path code - -; CHECK: {{^}}safe_div: -; CHECK: .seh_proc safe_div -; CHECK: .seh_handler __C_specific_handler, @unwind, @except -; CHECK: .Ltmp0: -; CHECK: leaq [[rloc:.*\(%rsp\)]], %rcx -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp1 -; CHECK: .LBB0_7: -; CHECK: movl [[rloc]], %eax -; CHECK: retq - -; Landing pad code - -; CHECK: .Ltmp3: -; CHECK: movl $1, %[[sel:[a-z]+]] -; CHECK: .Ltmp4 -; CHECK: movl $2, %[[sel]] -; CHECK: .L{{.*}}: -; CHECK: cmpl $1, %[[sel]] - -; CHECK: # %handler0 -; CHECK: callq puts -; CHECK: movl $-1, [[rloc]] -; CHECK: jmp .LBB0_7 - -; CHECK: cmpl $2, %[[sel]] - -; CHECK: # %handler1 -; CHECK: callq puts -; CHECK: movl $-2, [[rloc]] -; CHECK: jmp .LBB0_7 - -; FIXME: EH preparation should not call _Unwind_Resume. -; CHECK: callq _Unwind_Resume -; CHECK: ud2 - -; CHECK: .seh_handlerdata -; CHECK: .long 2 -; CHECK: .long .Ltmp0@IMGREL -; CHECK: .long .Ltmp1@IMGREL+1 -; CHECK: .long safe_div_filt0@IMGREL -; CHECK: .long .Ltmp3@IMGREL -; CHECK: .long .Ltmp0@IMGREL -; CHECK: .long .Ltmp1@IMGREL+1 -; CHECK: .long safe_div_filt1@IMGREL -; CHECK: .long .Ltmp4@IMGREL -; CHECK: .text -; CHECK: .seh_endproc - - -define void @try_body(i32* %r, i32* %n, i32* %d) { -entry: -  %0 = load i32* %n, align 4 -  %1 = load i32* %d, align 4 -  %div = sdiv i32 %0, %1 -  store i32 %div, i32* %r, align 4 -  ret void -} - -; The prototype of these filter functions is: -; int filter(EXCEPTION_POINTERS *eh_ptrs, void *rbp); - -; The definition of EXCEPTION_POINTERS is: -;   typedef struct _EXCEPTION_POINTERS { -;     EXCEPTION_RECORD *ExceptionRecord; -;     CONTEXT          *ContextRecord; -;   } EXCEPTION_POINTERS; - -; The definition of EXCEPTION_RECORD is: -;   typedef struct _EXCEPTION_RECORD { -;     DWORD ExceptionCode; -;     ... -;   } EXCEPTION_RECORD; - -; The exception code can be retreived with two loads, one for the record -; pointer and one for the code.  The values of local variables can be -; accessed via rbp, but that would require additional not yet implemented LLVM -; support. - -define i32 @safe_div_filt0(i8* %eh_ptrs, i8* %rbp) { -  %eh_ptrs_c = bitcast i8* %eh_ptrs to i32** -  %eh_rec = load i32** %eh_ptrs_c -  %eh_code = load i32* %eh_rec -  ; EXCEPTION_ACCESS_VIOLATION = 0xC0000005 -  %cmp = icmp eq i32 %eh_code, 3221225477 -  %filt.res = zext i1 %cmp to i32 -  ret i32 %filt.res -} - -define i32 @safe_div_filt1(i8* %eh_ptrs, i8* %rbp) { -  %eh_ptrs_c = bitcast i8* %eh_ptrs to i32** -  %eh_rec = load i32** %eh_ptrs_c -  %eh_code = load i32* %eh_rec -  ; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094 -  %cmp = icmp eq i32 %eh_code, 3221225620 -  %filt.res = zext i1 %cmp to i32 -  ret i32 %filt.res -} - -@str_result = internal constant [21 x i8] c"safe_div result: %d\0A\00" - -define i32 @main() { -  %d.addr = alloca i32, align 4 -  %n.addr = alloca i32, align 4 - -  store i32 10, i32* %n.addr, align 4 -  store i32 2, i32* %d.addr, align 4 -  %r1 = call i32 @safe_div(i32* %n.addr, i32* %d.addr) -  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r1) - -  store i32 10, i32* %n.addr, align 4 -  store i32 0, i32* %d.addr, align 4 -  %r2 = call i32 @safe_div(i32* %n.addr, i32* %d.addr) -  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r2) - -  %r3 = call i32 @safe_div(i32* %n.addr, i32* null) -  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r3) -  ret i32 0 -} - -define void @_Unwind_Resume() { -  call void @abort() -  unreachable -} - -declare i32 @__C_specific_handler(...) -declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind -declare void @puts(i8*) -declare void @printf(i8*, ...) -declare void @abort() diff --git a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/test/CodeGen/X86/sse-unaligned-mem-feature.ll index bb24adb41817..15f91ee04eaf 100644 --- a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll +++ b/test/CodeGen/X86/sse-unaligned-mem-feature.ll @@ -1,5 +1,4 @@ -; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s -; CHECK: addps ( +; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem -march=x86 < %s | FileCheck %s  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"  target triple = "x86_64-unknown-linux-gnu" @@ -8,4 +7,7 @@ define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {  	%A = load <4 x float>* %P, align 4  	%B = fadd <4 x float> %A, %In  	ret <4 x float> %B + +; CHECK-LABEL: @foo +; CHECK:       addps (  } diff --git a/test/CodeGen/X86/win_cst_pool.ll b/test/CodeGen/X86/win_cst_pool.ll index e8b853a03dae..d534b126b192 100644 --- a/test/CodeGen/X86/win_cst_pool.ll +++ b/test/CodeGen/X86/win_cst_pool.ll @@ -6,7 +6,7 @@ define double @double() {    ret double 0x0000000000800000  }  ; CHECK:              .globl  __real@0000000000800000 -; CHECK-NEXT:         .section        .rdata,"rd",discard,__real@0000000000800000 +; CHECK-NEXT:         .section        .rdata,"dr",discard,__real@0000000000800000  ; CHECK-NEXT:         .align  8  ; CHECK-NEXT: __real@0000000000800000:  ; CHECK-NEXT:         .quad   8388608 @@ -18,7 +18,7 @@ define <4 x i32> @vec1() {    ret <4 x i32> <i32 3, i32 2, i32 1, i32 0>  }  ; CHECK:              .globl  __xmm@00000000000000010000000200000003 -; CHECK-NEXT:         .section        .rdata,"rd",discard,__xmm@00000000000000010000000200000003 +; CHECK-NEXT:         .section        .rdata,"dr",discard,__xmm@00000000000000010000000200000003  ; CHECK-NEXT:         .align  16  ; CHECK-NEXT: __xmm@00000000000000010000000200000003:  ; CHECK-NEXT:         .long   3 @@ -33,7 +33,7 @@ define <8 x i16> @vec2() {    ret <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>  }  ; CHECK:             .globl  __xmm@00000001000200030004000500060007 -; CHECK-NEXT:        .section        .rdata,"rd",discard,__xmm@00000001000200030004000500060007 +; CHECK-NEXT:        .section        .rdata,"dr",discard,__xmm@00000001000200030004000500060007  ; CHECK-NEXT:        .align  16  ; CHECK-NEXT: __xmm@00000001000200030004000500060007:  ; CHECK-NEXT:        .short  7 @@ -53,7 +53,7 @@ define <4 x float> @undef1() {    ret <4 x float> <float 1.0, float 1.0, float undef, float undef>  ; CHECK:             .globl  __xmm@00000000000000003f8000003f800000 -; CHECK-NEXT:        .section        .rdata,"rd",discard,__xmm@00000000000000003f8000003f800000 +; CHECK-NEXT:        .section        .rdata,"dr",discard,__xmm@00000000000000003f8000003f800000  ; CHECK-NEXT:        .align  16  ; CHECK-NEXT: __xmm@00000000000000003f8000003f800000:  ; CHECK-NEXT:        .long   1065353216              # float 1 diff --git a/test/DebugInfo/COFF/asm.ll b/test/DebugInfo/COFF/asm.ll index 44ee4f9ce4f4..4d5cdda5659a 100644 --- a/test/DebugInfo/COFF/asm.ll +++ b/test/DebugInfo/COFF/asm.ll @@ -22,7 +22,7 @@  ; X86-NEXT: L{{.*}}:  ; X86-NEXT: [[END_OF_F:^L.*]]:  ; -; X86-LABEL: .section        .debug$S,"rd" +; X86-LABEL: .section        .debug$S,"dr"  ; X86-NEXT: .long   4  ; Symbol subsection  ; X86-NEXT: .long   241 @@ -127,7 +127,7 @@  ; X64-NEXT: .L{{.*}}:  ; X64-NEXT: [[END_OF_F:.*]]:  ; -; X64-LABEL: .section        .debug$S,"rd" +; X64-LABEL: .section        .debug$S,"dr"  ; X64-NEXT: .long   4  ; Symbol subsection  ; X64-NEXT: .long   241 diff --git a/test/DebugInfo/COFF/multifile.ll b/test/DebugInfo/COFF/multifile.ll index 5cdd6dc2e51b..52a62d1c3ca9 100644 --- a/test/DebugInfo/COFF/multifile.ll +++ b/test/DebugInfo/COFF/multifile.ll @@ -29,7 +29,7 @@  ; X86-NEXT: L{{.*}}:  ; X86-NEXT: [[END_OF_F:.*]]:  ; -; X86-LABEL: .section        .debug$S,"rd" +; X86-LABEL: .section        .debug$S,"dr"  ; X86-NEXT: .long   4  ; Symbol subsection  ; X86-NEXT: .long   241 @@ -159,7 +159,7 @@  ; X64-NEXT: .L{{.*}}:  ; X64-NEXT: [[END_OF_F:.*]]:  ; -; X64-LABEL: .section        .debug$S,"rd" +; X64-LABEL: .section        .debug$S,"dr"  ; X64-NEXT: .long   4  ; Symbol subsection  ; X64-NEXT: .long   241 diff --git a/test/DebugInfo/COFF/multifunction.ll b/test/DebugInfo/COFF/multifunction.ll index 8f9a3f8b9b75..01ba617dc1d7 100644 --- a/test/DebugInfo/COFF/multifunction.ll +++ b/test/DebugInfo/COFF/multifunction.ll @@ -53,7 +53,7 @@  ; X86-NEXT: L{{.*}}:  ; X86-NEXT: [[END_OF_F:.*]]:  ; -; X86-LABEL: .section        .debug$S,"rd" +; X86-LABEL: .section        .debug$S,"dr"  ; X86-NEXT: .long   4  ; Symbol subsection for x  ; X86-NEXT: .long   241 @@ -317,7 +317,7 @@  ; X64-NEXT: .L{{.*}}:  ; X64-NEXT: [[END_OF_F:.*]]:  ; -; X64-LABEL: .section        .debug$S,"rd" +; X64-LABEL: .section        .debug$S,"dr"  ; X64-NEXT: .long   4  ; Symbol subsection for x  ; X64-NEXT: .long   241 diff --git a/test/DebugInfo/COFF/simple.ll b/test/DebugInfo/COFF/simple.ll index bcb8a695c7f1..196deefe8c4e 100644 --- a/test/DebugInfo/COFF/simple.ll +++ b/test/DebugInfo/COFF/simple.ll @@ -20,7 +20,7 @@  ; X86-NEXT: L{{.*}}:  ; X86-NEXT: [[END_OF_F:.*]]:  ; -; X86-LABEL: .section        .debug$S,"rd" +; X86-LABEL: .section        .debug$S,"dr"  ; X86-NEXT: .long   4  ; Symbol subsection  ; X86-NEXT: .long   241 @@ -118,7 +118,7 @@  ; X64-NEXT: .L{{.*}}:  ; X64-NEXT: [[END_OF_F:.*]]:  ; -; X64-LABEL: .section        .debug$S,"rd" +; X64-LABEL: .section        .debug$S,"dr"  ; X64-NEXT: .long   4  ; Symbol subsection  ; X64-NEXT: .long   241 diff --git a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll index 181359b905ab..83d976d24056 100644 --- a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll +++ b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll @@ -22,7 +22,7 @@  ; X86-NEXT: [[END_OF_BAR:^L.*]]:{{$}}  ; X86-NOT:  ret -; X86-LABEL: .section        .debug$S,"rd" +; X86-LABEL: .section        .debug$S,"dr"  ; X86:       .secrel32 "?bar@@YAXHZZ"  ; X86-NEXT:  .secidx   "?bar@@YAXHZZ"  ; X86:       .long   0 diff --git a/test/DebugInfo/X86/coff_debug_info_type.ll b/test/DebugInfo/X86/coff_debug_info_type.ll index d34f50b7244b..89859d246af8 100644 --- a/test/DebugInfo/X86/coff_debug_info_type.ll +++ b/test/DebugInfo/X86/coff_debug_info_type.ll @@ -6,7 +6,7 @@  ; CHECK:    .section  .apple_types  ; RUN: llc -mtriple=i686-pc-win32 -filetype=asm -O0 < %s | FileCheck -check-prefix=WIN32 %s -; WIN32:    .section .debug$S,"rd" +; WIN32:    .section .debug$S,"dr"  ; RUN: llc -mtriple=i686-pc-win32 -filetype=null -O0 < %s diff --git a/test/DebugInfo/location-verifier.ll b/test/DebugInfo/location-verifier.ll new file mode 100644 index 000000000000..0e56be42e1f9 --- /dev/null +++ b/test/DebugInfo/location-verifier.ll @@ -0,0 +1,33 @@ +; RUN: not llvm-as -disable-output -verify-debug-info < %s 2>&1 | FileCheck %s +; ModuleID = 'test.c' +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; Function Attrs: nounwind ssp uwtable +define i32 @foo() #0 { +entry: +  ret i32 42, !dbg !13 +} + +attributes #0 = { nounwind ssp uwtable } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10, !11} +!llvm.ident = !{!12} + +!0 = !{!"0x11\0012\00clang version 3.7.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/test.c] [DW_LANG_C99] +!1 = !{!"test.c", !""} +!2 = !{} +!3 = !{!4} +!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\000\000\000\001", !1, !5, !6, null, i32 ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo] +!5 = !{!"0x29", !1}                               ; [ DW_TAG_file_type ] [/test.c] +!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = !{!8} +!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = !{i32 2, !"Dwarf Version", i32 2} +!10 = !{i32 2, !"Debug Info Version", i32 2} +!11 = !{i32 1, !"PIC Level", i32 2} +!12 = !{!"clang version 3.7.0 "} +; An old-style MDLocation should not pass verify. +; CHECK: DISubprogram does not Verify +!13 = !{i32 2, i32 2, !4, null} diff --git a/test/Instrumentation/MemorySanitizer/atomics.ll b/test/Instrumentation/MemorySanitizer/atomics.ll index c8f3b88815bb..28736ad79029 100644 --- a/test/Instrumentation/MemorySanitizer/atomics.ll +++ b/test/Instrumentation/MemorySanitizer/atomics.ll @@ -1,4 +1,6 @@  ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s +; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck %s +; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=2 -S | FileCheck %s  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"  target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Linker/distinct-cycles.ll b/test/Linker/distinct-cycles.ll new file mode 100644 index 000000000000..b9b496c50c14 --- /dev/null +++ b/test/Linker/distinct-cycles.ll @@ -0,0 +1,13 @@ +; RUN: llvm-link -o - -S %s | FileCheck %s +; Crasher for PR22456: MapMetadata() should resolve all cycles. + +; CHECK: !named = !{!0} +!named = !{!0} + +; CHECK: !0 = distinct !{!1} +!0 = distinct !{!1} + +; CHECK-NEXT: !1 = !{!2} +; CHECK-NEXT: !2 = !{!1} +!1 = !{!2} +!2 = !{!1} diff --git a/test/MC/ARM/pr22395-2.s b/test/MC/ARM/pr22395-2.s new file mode 100644 index 000000000000..3d2a10d6bbbc --- /dev/null +++ b/test/MC/ARM/pr22395-2.s @@ -0,0 +1,37 @@ +@ RUN: llvm-mc -triple armv4t-eabi -mattr +d16 -filetype asm -o - %s 2>&1 | FileCheck %s + +	.text +	.thumb + +	.p2align 2 + +	.fpu vfpv3 +	vldmia r0, {d16-d31} +@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +@ CHECK-NOT: error: register expected + +	.fpu vfpv4 +	vldmia r0, {d16-d31} +@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +@ CHECK-NOT: error: register expected + +	.fpu neon +	vldmia r0, {d16-d31} +@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +@ CHECK-NOT: error: register expected + +	.fpu neon-vfpv4 +	vldmia r0, {d16-d31} +@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +@ CHECK-NOT: error: register expected + +	.fpu neon-fp-armv8 +	vldmia r0, {d16-d31} +@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +@ CHECK-NOT: error: register expected + +	.fpu crypto-neon-fp-armv8 +	vldmia r0, {d16-d31} +@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +@ CHECK-NOT: error: register expected + diff --git a/test/MC/COFF/bss_section.ll b/test/MC/COFF/bss_section.ll index 1921eeb61a65..477b3dfbd3a6 100644 --- a/test/MC/COFF/bss_section.ll +++ b/test/MC/COFF/bss_section.ll @@ -6,4 +6,4 @@  ; CHECK: .bss  @thingy_linkonce = linkonce_odr global %struct.foo zeroinitializer, align 4 -; CHECK: .section .bss,"wb",discard,_thingy_linkonce +; CHECK: .section .bss,"bw",discard,_thingy_linkonce diff --git a/test/MC/COFF/const-gv-with-rel-init.ll b/test/MC/COFF/const-gv-with-rel-init.ll index 7d3c5f631881..5d0460dbaeff 100644 --- a/test/MC/COFF/const-gv-with-rel-init.ll +++ b/test/MC/COFF/const-gv-with-rel-init.ll @@ -5,7 +5,7 @@ define void @f() {  }  @ptr = constant void ()* @f, section ".CRT$XLB", align 8 -; CHECK:  .section  .CRT$XLB,"rd" +; CHECK:  .section  .CRT$XLB,"dr"  @weak_array = weak_odr unnamed_addr constant [1 x i8*] [i8* bitcast (void ()* @f to i8*)] -; CHECK:  .section  .rdata,"rd",discard,weak_array +; CHECK:  .section  .rdata,"dr",discard,weak_array diff --git a/test/MC/COFF/diff.s b/test/MC/COFF/diff.s index 820272a40bf4..5111600c7449 100644 --- a/test/MC/COFF/diff.s +++ b/test/MC/COFF/diff.s @@ -1,5 +1,23 @@  // RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | llvm-readobj -s -sr -sd | FileCheck %s +.section baz, "xr" +	.def	X +	.scl	2; +	.type	32; +	.endef +	.globl	X +X: +	mov	Y-X+42,	%eax +	retl + +	.def	Y +	.scl	2; +	.type	32; +	.endef +	.globl	Y +Y: +	retl +  	.def	 _foobar;  	.scl	2;  	.type	32; @@ -30,3 +48,10 @@ _rust_crate:  // CHECK:        SectionData (  // CHECK-NEXT:     0000: 00000000 00000000 1C000000 20000000  // CHECK-NEXT:   ) + +// CHECK:        Name: baz +// CHECK:        Relocations [ +// CHECK-NEXT:   ] +// CHECK:        SectionData ( +// CHECK-NEXT:     0000: A1300000 00C3C3 +// CHECK-NEXT:   ) diff --git a/test/MC/COFF/global_ctors_dtors.ll b/test/MC/COFF/global_ctors_dtors.ll index ca17f24a68e0..be92c27c0d8a 100644 --- a/test/MC/COFF/global_ctors_dtors.ll +++ b/test/MC/COFF/global_ctors_dtors.ll @@ -49,17 +49,17 @@ define i32 @main() nounwind {    ret i32 0  } -; WIN32: .section .CRT$XCU,"rd" +; WIN32: .section .CRT$XCU,"dr"  ; WIN32: a_global_ctor -; WIN32: .section .CRT$XCU,"rd",associative,{{_?}}b +; WIN32: .section .CRT$XCU,"dr",associative,{{_?}}b  ; WIN32: b_global_ctor  ; WIN32-NOT: c_global_ctor -; WIN32: .section .CRT$XTX,"rd" +; WIN32: .section .CRT$XTX,"dr"  ; WIN32: a_global_dtor -; MINGW32: .section .ctors,"wd" +; MINGW32: .section .ctors,"dw"  ; MINGW32: a_global_ctor -; MINGW32: .section .ctors,"wd",associative,{{_?}}b +; MINGW32: .section .ctors,"dw",associative,{{_?}}b  ; MINGW32: b_global_ctor  ; MINGW32-NOT: c_global_ctor -; MINGW32: .section .dtors,"wd" +; MINGW32: .section .dtors,"dw"  ; MINGW32: a_global_dtor diff --git a/test/MC/COFF/initialised-data.ll b/test/MC/COFF/initialised-data.ll index c4284696b8ca..a2faac748567 100644 --- a/test/MC/COFF/initialised-data.ll +++ b/test/MC/COFF/initialised-data.ll @@ -3,5 +3,5 @@  @data = dllexport constant [5 x i8] c"data\00", align 1 -; CHECK: .section	.rdata,"rd" +; CHECK: .section	.rdata,"dr" diff --git a/test/MC/COFF/section-passthru-flags.s b/test/MC/COFF/section-passthru-flags.s index 3bd061b391d1..96e42d23351e 100644 --- a/test/MC/COFF/section-passthru-flags.s +++ b/test/MC/COFF/section-passthru-flags.s @@ -3,5 +3,5 @@  // CHECK: .section .klaatu,"wn"  .section .barada,"y"  // CHECK: .section .barada,"y" -.section .nikto,"wds" -// CHECK: .section .nikto,"wds" +.section .nikto,"dws" +// CHECK: .section .nikto,"dws" diff --git a/test/MC/COFF/weak-symbol.ll b/test/MC/COFF/weak-symbol.ll index fd78307c1f22..0ab860cad462 100644 --- a/test/MC/COFF/weak-symbol.ll +++ b/test/MC/COFF/weak-symbol.ll @@ -28,20 +28,20 @@ define weak void @f() section ".sect" {  }
  ; Weak global
 -; X86: .section .data,"rd",discard,_a
 +; X86: .section .data,"dr",discard,_a
  ; X86: .globl _a
  ; X86: .zero 12
  ;
 -; X64: .section .data,"rd",discard,a
 +; X64: .section .data,"dr",discard,a
  ; X64: .globl a
  ; X64: .zero 12
  @a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
 -; X86:  .section        .tls$,"wd",discard,_b
 +; X86:  .section        .tls$,"dw",discard,_b
  ; X86:  .globl  _b
  ; X86:  .long   0
  ;
 -; X64:  .section        .tls$,"wd",discard,b
 +; X64:  .section        .tls$,"dw",discard,b
  ; X64:  .globl  b
  ; X64:  .long   0
 diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll index 155d36f60e21..3ec8831def18 100644 --- a/test/Transforms/EarlyCSE/basic.ll +++ b/test/Transforms/EarlyCSE/basic.ll @@ -192,4 +192,13 @@ define void @test11(i32 *%P) {    ; CHECK-NEXT: ret void  } - +; CHECK-LABEL: @test12( +define i32 @test12(i1 %B, i32* %P1, i32* %P2) { +  %load0 = load i32* %P1 +  %1 = load atomic i32* %P2 seq_cst, align 4 +  %load1 = load i32* %P1 +  %sel = select i1 %B, i32 %load0, i32 %load1 +  ret i32 %sel +  ; CHECK: load i32* %P1 +  ; CHECK: load i32* %P1 +} diff --git a/test/Transforms/Inline/inline-indirect.ll b/test/Transforms/Inline/inline-indirect.ll new file mode 100644 index 000000000000..f6eb528e0650 --- /dev/null +++ b/test/Transforms/Inline/inline-indirect.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -inline -disable-output 2>/dev/null +; This test used to trigger an assertion in the assumption cache when +; inlining the indirect call +declare void @llvm.assume(i1) + +define void @foo() { +  ret void +} + +define void @bar(void ()*) { +  call void @llvm.assume(i1 true) +  call void %0(); +  ret void +} + +define void @baz() { +  call void @bar(void ()* @foo) +  ret void +} diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll index ad6a11cf6eb1..3d18ac0e3344 100644 --- a/test/Transforms/InstCombine/loadstore-metadata.ll +++ b/test/Transforms/InstCombine/loadstore-metadata.ll @@ -1,5 +1,7 @@  ; RUN: opt -instcombine -S < %s | FileCheck %s +target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" +  define i32 @test_load_cast_combine_tbaa(float* %ptr) {  ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.  ; CHECK-LABEL: @test_load_cast_combine_tbaa( diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll index 26b294042d42..f4edf092641f 100644 --- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll +++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll @@ -201,7 +201,7 @@ for.end:                                          ; preds = %for.body  ;  ; Currently we have three extra add.w's that keep the store address  ; live past the next increment because ISEL is unfortunately undoing -; the store chain. ISEL also fails to convert all but one of the stores to +; the store chain. ISEL also fails to convert the stores to  ; post-increment addressing. However, the loads should use  ; post-increment addressing, no add's or add.w's beyond the three  ; mentioned. Most importantly, there should be no spills or reloads! @@ -210,7 +210,7 @@ for.end:                                          ; preds = %for.body  ; A9: %.lr.ph  ; A9-NOT: lsl.w  ; A9-NOT: {{ldr|str|adds|add r}} -; A9: vst1.8 {{.*}} [r{{[0-9]+}}]! +; A9: add.w r  ; A9-NOT: {{ldr|str|adds|add r}}  ; A9: add.w r  ; A9-NOT: {{ldr|str|adds|add r}} diff --git a/test/Transforms/MemCpyOpt/callslot_aa.ll b/test/Transforms/MemCpyOpt/callslot_aa.ll new file mode 100644 index 000000000000..b6ea129ccfa7 --- /dev/null +++ b/test/Transforms/MemCpyOpt/callslot_aa.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%T = type { i64, i64 } + +define void @test(i8* %src) { +  %tmp = alloca i8 +  %dst = alloca i8 +; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i32 8, i1 false) +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %src, i64 1, i32 8, i1 false), !noalias !2 +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %tmp, i64 1, i32 8, i1 false) + +  ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) + +; Check that the noalias for "dst" was removed by checking that the metadata is gone +; CHECK-NOT: "dst" +!0 = !{!0} +!1 = distinct !{!1, !0, !"dst"} +!2 = distinct !{!1} diff --git a/test/Transforms/SLPVectorizer/X86/bad_types.ll b/test/Transforms/SLPVectorizer/X86/bad_types.ll new file mode 100644 index 000000000000..38ed18dad2ac --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/bad_types.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) { +; Ensure we can handle x86_mmx values which are primitive and can be bitcast +; with integer types but can't be put into a vector. +; +; CHECK-LABEL: @test1 +; CHECK:         store i64 +; CHECK:         store i64 +; CHECK:         ret void +entry: +  %a.cast = bitcast x86_mmx %a to i64 +  %b.cast = bitcast x86_mmx %b to i64 +  %a.and = and i64 %a.cast, 42 +  %b.and = and i64 %b.cast, 42 +  %gep = getelementptr i64* %ptr, i32 1 +  store i64 %a.and, i64* %ptr +  store i64 %b.and, i64* %gep +  ret void +} + +define void @test2(x86_mmx %a, x86_mmx %b) { +; Same as @test1 but using phi-input vectorization instead of store +; vectorization. +; +; CHECK-LABEL: @test2 +; CHECK:         and i64 +; CHECK:         and i64 +; CHECK:         ret void +entry: +  br i1 undef, label %if.then, label %exit + +if.then: +  %a.cast = bitcast x86_mmx %a to i64 +  %b.cast = bitcast x86_mmx %b to i64 +  %a.and = and i64 %a.cast, 42 +  %b.and = and i64 %b.cast, 42 +  br label %exit + +exit: +  %a.phi = phi i64 [ 0, %entry ], [ %a.and, %if.then ] +  %b.phi = phi i64 [ 0, %entry ], [ %b.and, %if.then ] +  tail call void @f(i64 %a.phi, i64 %b.phi) +  ret void +} + +declare void @f(i64, i64) diff --git a/test/Transforms/Util/combine-alias-scope-metadata.ll b/test/Transforms/Util/combine-alias-scope-metadata.ll new file mode 100644 index 000000000000..fd0a3d5c5b92 --- /dev/null +++ b/test/Transforms/Util/combine-alias-scope-metadata.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @test(i8* noalias dereferenceable(1) %in, i8* noalias dereferenceable(1) %out) { +  %tmp = alloca i8 +  %tmp2 = alloca i8 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i32 8, i1 false) +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %in, i64 1, i32 8, i1 false), !alias.scope !4 +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp, i64 1, i32 8, i1 false), !alias.scope !5 + +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %tmp2, i64 1, i32 8, i1 false), !noalias !6 + +  ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) + +!0 = !{!0} +!1 = distinct !{!1, !0, !"in"} +!2 = distinct !{!2, !0, !"tmp"} +!3 = distinct !{!3, !0, !"tmp2"} +!4 = distinct !{!1, !2} +!5 = distinct !{!2, !3} +!6 = distinct !{!1, !2} diff --git a/test/tools/gold/no-map-whole-file.ll b/test/tools/gold/no-map-whole-file.ll new file mode 100644 index 000000000000..21a0c46d28b0 --- /dev/null +++ b/test/tools/gold/no-map-whole-file.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as -o %t.bc %s +; RUN: ld -plugin %llvmshlibdir/LLVMgold.so -plugin-opt=emit-llvm \ +; RUN:    --no-map-whole-files -r -o %t2.bc %t.bc +; RUN: llvm-dis < %t2.bc -o - | FileCheck %s + +; CHECK: main +define i32 @main() { +  ret i32 0 +} diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index 5524bb9922c5..a9909a721c1b 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -559,11 +559,9 @@ static void freeSymName(ld_plugin_symbol &Sym) {  }  static std::unique_ptr<Module> -getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile, +getModuleForFile(LLVMContext &Context, claimed_file &F, +                 off_t Filesize, raw_fd_ostream *ApiFile,                   StringSet<> &Internalize, StringSet<> &Maybe) { -  ld_plugin_input_file File; -  if (get_input_file(F.handle, &File) != LDPS_OK) -    message(LDPL_FATAL, "Failed to get file information");    if (get_symbols(F.handle, F.syms.size(), &F.syms[0]) != LDPS_OK)      message(LDPL_FATAL, "Failed to get symbol information"); @@ -572,7 +570,7 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile,    if (get_view(F.handle, &View) != LDPS_OK)      message(LDPL_FATAL, "Failed to get a view of file"); -  MemoryBufferRef BufferRef(StringRef((const char *)View, File.filesize), ""); +  MemoryBufferRef BufferRef(StringRef((const char *)View, Filesize), "");    ErrorOr<std::unique_ptr<object::IRObjectFile>> ObjOrErr =        object::IRObjectFile::create(BufferRef, Context); @@ -580,9 +578,6 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile,      message(LDPL_FATAL, "Could not read bitcode from file : %s",              EC.message().c_str()); -  if (release_input_file(F.handle) != LDPS_OK) -    message(LDPL_FATAL, "Failed to release file information"); -    object::IRObjectFile &Obj = **ObjOrErr;    Module &M = Obj.getModule(); @@ -798,8 +793,12 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) {    StringSet<> Internalize;    StringSet<> Maybe;    for (claimed_file &F : Modules) { +    ld_plugin_input_file File; +    if (get_input_file(F.handle, &File) != LDPS_OK) +      message(LDPL_FATAL, "Failed to get file information");      std::unique_ptr<Module> M = -        getModuleForFile(Context, F, ApiFile, Internalize, Maybe); +        getModuleForFile(Context, F, File.filesize, ApiFile, +                         Internalize, Maybe);      if (!options::triple.empty())        M->setTargetTriple(options::triple.c_str());      else if (M->getTargetTriple().empty()) { @@ -808,6 +807,8 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) {      if (L.linkInModule(M.get()))        message(LDPL_FATAL, "Failed to link module"); +    if (release_input_file(F.handle) != LDPS_OK) +      message(LDPL_FATAL, "Failed to release file information");    }    for (const auto &Name : Internalize) {  | 
