diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2018-08-02 17:32:43 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2018-08-02 17:32:43 +0000 | 
| commit | b7eb8e35e481a74962664b63dfb09483b200209a (patch) | |
| tree | 1937fb4a348458ce2d02ade03ac3bb0aa18d2fcd /lib/CodeGen | |
| parent | eb11fae6d08f479c0799db45860a98af528fa6e7 (diff) | |
Notes
Diffstat (limited to 'lib/CodeGen')
42 files changed, 568 insertions, 323 deletions
| diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index 181da83dc88b..d93716287981 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -46,7 +46,7 @@ public:                                           MachineBasicBlock::iterator End,                                           unsigned InsertPosIndex,                                           DbgValueVector &DbgValues) = 0; -   +    /// Update liveness information to account for the current    /// instruction, which will not be scheduled.    virtual void Observe(MachineInstr &MI, unsigned Count, diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp index 4a226527cb5b..c8305ad9c547 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -24,8 +24,26 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {    return IterBool.first->second.Number;  } + +void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { +  static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize(); +  Asm.OutStreamer->SwitchSection(Section); + +  uint64_t Length = sizeof(uint16_t) // version +                  + sizeof(uint8_t)  // address_size +                  + sizeof(uint8_t)  // segment_selector_size +                  + AddrSize * Pool.size(); // entries +  Asm.emitInt32(Length); // TODO: Support DWARF64 format. +  Asm.emitInt16(Asm.getDwarfVersion()); +  Asm.emitInt8(AddrSize); +  Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size. +} +  // Emit addresses into the section given.  void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { +  if (Asm.getDwarfVersion() >= 5) +    emitHeader(Asm, AddrSection); +    if (Pool.empty())      return; diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h index 5350006bf744..d5008fab5563 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/lib/CodeGen/AsmPrinter/AddressPool.h @@ -50,6 +50,9 @@ public:    bool hasBeenUsed() const { return HasBeenUsed; }    void resetUsedFlag() { HasBeenUsed = false; } + +private: +  void emitHeader(AsmPrinter &Asm, MCSection *Section);  };  } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8761fae9dd22..500e7a00196f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -364,7 +364,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)    else      UseSectionsAsReferences = DwarfSectionsAsReferences == Enable; -  GenerateTypeUnits = GenerateDwarfTypeUnits; +  // Don't generate type units for unsupported object file formats. +  GenerateTypeUnits = +      A->TM.getTargetTriple().isOSBinFormatELF() && GenerateDwarfTypeUnits;    TheAccelTableKind = computeAccelTableKind(        DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple()); @@ -886,8 +888,7 @@ void DwarfDebug::endModule() {      emitDebugInfoDWO();      emitDebugAbbrevDWO();      emitDebugLineDWO(); -    // Emit DWO addresses. -    AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); +    emitDebugAddr();    }    // Emit info into the dwarf accelerator table sections. @@ -2136,7 +2137,7 @@ void DwarfDebug::emitDebugRanges() {      return;    } -  if (getDwarfVersion() >= 5 && NoRangesPresent()) +  if (NoRangesPresent())      return;    // Start the dwarf ranges section. @@ -2297,6 +2298,12 @@ void DwarfDebug::emitDebugStrDWO() {                           OffSec, /* UseRelativeOffsets = */ false);  } +// Emit DWO addresses. +void DwarfDebug::emitDebugAddr() { +  assert(useSplitDwarf() && "No split dwarf?"); +  AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); +} +  MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {    if (!useSplitDwarf())      return nullptr; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 0c7be5d27dfe..abf2e43b1312 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -447,6 +447,9 @@ class DwarfDebug : public DebugHandlerBase {    /// Emit the debug str dwo section.    void emitDebugStrDWO(); +  /// Emit DWO addresses. +  void emitDebugAddr(); +    /// Flags to let the linker know we have emitted new style pubnames. Only    /// emit it here if we don't have a skeleton CU for split dwarf.    void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const; diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index 952b0d99a95a..0637d952eba4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -112,7 +112,7 @@ protected:    uint64_t OffsetInBits = 0;    unsigned DwarfVersion; -  /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.  +  /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.    unsigned SubRegisterSizeInBits = 0;    unsigned SubRegisterOffsetInBits = 0; diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index c90bd568162d..049f349b009a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -95,6 +95,6 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {      }    } else {      ScopeVars.Locals.push_back(Var); -  }     +  }    return true;  } diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 43b835b2c4aa..600f4a78fda0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1182,7 +1182,7 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {      addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());    if (!M->getISysRoot().empty())      addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot()); -   +    return &MDie;  } @@ -1691,7 +1691,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {  }  void DwarfTypeUnit::emitHeader(bool UseOffsets) { -  DwarfUnit::emitCommonHeader(UseOffsets,  +  DwarfUnit::emitCommonHeader(UseOffsets,                                DD->useSplitDwarf() ? dwarf::DW_UT_split_type                                                    : dwarf::DW_UT_type);    Asm->OutStreamer->AddComment("Type Signature"); diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index f2615edaece2..e28fc6fb9d4f 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -362,19 +362,19 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,  /// Convert an atomic load of a non-integral type to an integer load of the  /// equivalent bitwidth.  See the function comment on -/// convertAtomicStoreToIntegerType for background.   +/// convertAtomicStoreToIntegerType for background.  LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {    auto *M = LI->getModule();    Type *NewTy = getCorrespondingIntegerType(LI->getType(),                                              M->getDataLayout());    IRBuilder<> Builder(LI); -   +    Value *Addr = LI->getPointerOperand();    Type *PT = PointerType::get(NewTy,                                Addr->getType()->getPointerAddressSpace());    Value *NewAddr = Builder.CreateBitCast(Addr, PT); -   +    auto *NewLI = Builder.CreateLoad(NewAddr);    NewLI->setAlignment(LI->getAlignment());    NewLI->setVolatile(LI->isVolatile()); @@ -452,7 +452,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {    Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),                                              M->getDataLayout());    Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); -   +    Value *Addr = SI->getPointerOperand();    Type *PT = PointerType::get(NewTy,                                Addr->getType()->getPointerAddressSpace()); @@ -920,14 +920,14 @@ Value *AtomicExpand::insertRMWLLSCLoop(  /// the equivalent bitwidth.  We used to not support pointer cmpxchg in the  /// IR.  As a migration step, we convert back to what use to be the standard  /// way to represent a pointer cmpxchg so that we can update backends one by -/// one.  +/// one.  AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {    auto *M = CI->getModule();    Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),                                              M->getDataLayout());    IRBuilder<> Builder(CI); -   +    Value *Addr = CI->getPointerOperand();    Type *PT = PointerType::get(NewTy,                                Addr->getType()->getPointerAddressSpace()); @@ -935,8 +935,8 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *    Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);    Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); -   -   + +    auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,                                              CI->getSuccessOrdering(),                                              CI->getFailureOrdering(), diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp index abac555d6602..3a9b20aa661d 100644 --- a/lib/CodeGen/BuiltinGCs.cpp +++ b/lib/CodeGen/BuiltinGCs.cpp @@ -8,7 +8,7 @@  //===----------------------------------------------------------------------===//  //  // This file contains the boilerplate required to define our various built in -// gc lowering strategies.   +// gc lowering strategies.  //  //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 840e5ede6444..5a5960b16130 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -530,7 +530,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,      // Kill instructions can define registers but are really nops, and there      // might be a real definition earlier that needs to be paired with uses      // dominated by this kill. -     +      // FIXME: It may be possible to remove the isKill() restriction once PR18663      // has been properly fixed. There can be value in processing kills as seen      // in the AggressiveAntiDepBreaker class. diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 456fa799e8e1..fe3d29657942 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -159,7 +159,7 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {    auto NMI = GCStrategyMap.find(Name);    if (NMI != GCStrategyMap.end())      return NMI->getValue(); -   +    for (auto& Entry : GCRegistry::entries()) {      if (Name == Entry.getName()) {        std::unique_ptr<GCStrategy> S = Entry.instantiate(); @@ -171,11 +171,11 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {    }    if (GCRegistry::begin() == GCRegistry::end()) { -    // In normal operation, the registry should not be empty.  There should  +    // In normal operation, the registry should not be empty.  There should      // be the builtin GCs if nothing else.  The most likely scenario here is -    // that we got here without running the initializers used by the Registry  +    // that we got here without running the initializers used by the Registry      // itself and it's registration mechanism. -    const std::string error = ("unsupported GC: " + Name).str() +  +    const std::string error = ("unsupported GC: " + Name).str() +        " (did you remember to link and initialize the CodeGen library?)";      report_fatal_error(error);    } else diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index bafb7a05536d..80da50562d32 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -11,6 +11,7 @@  //===----------------------------------------------------------------------===//  #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/ADT/PostOrderIterator.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/ScopeExit.h"  #include "llvm/ADT/SmallSet.h" @@ -33,6 +34,7 @@  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h"  #include "llvm/IR/Constant.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h" @@ -1503,6 +1505,8 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {        Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));      }      EntryBuilder.buildMerge(Reg, Ops); +  } else if (auto *BA = dyn_cast<BlockAddress>(&C)) { +    EntryBuilder.buildBlockAddress(Reg, BA);    } else      return false; @@ -1611,19 +1615,20 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {      ArgIt++;    } -  // And translate the function! -  for (const BasicBlock &BB : F) { -    MachineBasicBlock &MBB = getMBB(BB); +  // Need to visit defs before uses when translating instructions. +  ReversePostOrderTraversal<const Function *> RPOT(&F); +  for (const BasicBlock *BB : RPOT) { +    MachineBasicBlock &MBB = getMBB(*BB);      // Set the insertion point of all the following translations to      // the end of this basic block.      CurBuilder.setMBB(MBB); -    for (const Instruction &Inst : BB) { +    for (const Instruction &Inst : *BB) {        if (translate(Inst))          continue;        OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", -                                 Inst.getDebugLoc(), &BB); +                                 Inst.getDebugLoc(), BB);        R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);        if (ORE->allowExtraAnalysis("gisel-irtranslator")) { diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 9df931eb81b3..3271b54aa830 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -809,6 +809,15 @@ MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,                          MMO);  } +MachineInstrBuilder +MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) { +#ifndef NDEBUG +  assert(getMRI()->getType(Res).isPointer() && "invalid res type"); +#endif + +  return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA); +} +  void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,                                              bool IsExtend) {  #ifndef NDEBUG diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index ca56f4e0c4f1..9f7f5e392a9a 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -56,7 +56,7 @@  // - it makes linker optimizations less useful (order files, LOHs, ...)  // - it forces usage of indexed addressing (which isn't necessarily "free")  // - it can increase register pressure when the uses are disparate enough. -//  +//  // We use heuristics to discover the best global grouping we can (cf cl::opts).  //  // ===---------------------------------------------------------------------===// diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index eb4099964242..707113bd973b 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -113,22 +113,22 @@ void IntrinsicLowering::AddPrototypes(Module &M) {        case Intrinsic::memcpy:          M.getOrInsertFunction("memcpy",            Type::getInt8PtrTy(Context), -                              Type::getInt8PtrTy(Context),  -                              Type::getInt8PtrTy(Context),  +                              Type::getInt8PtrTy(Context), +                              Type::getInt8PtrTy(Context),                                DL.getIntPtrType(Context));          break;        case Intrinsic::memmove:          M.getOrInsertFunction("memmove",            Type::getInt8PtrTy(Context), -                              Type::getInt8PtrTy(Context),  -                              Type::getInt8PtrTy(Context),  +                              Type::getInt8PtrTy(Context), +                              Type::getInt8PtrTy(Context),                                DL.getIntPtrType(Context));          break;        case Intrinsic::memset:          M.getOrInsertFunction("memset",            Type::getInt8PtrTy(Context), -                              Type::getInt8PtrTy(Context),  -                              Type::getInt32Ty(M.getContext()),  +                              Type::getInt8PtrTy(Context), +                              Type::getInt32Ty(M.getContext()),                                DL.getIntPtrType(Context));          break;        case Intrinsic::sqrt: @@ -210,13 +210,13 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {                                      "bswap.5");      Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),                                       "bswap.4"); -    Value* Tmp3 = Builder.CreateLShr(V,  +    Value* Tmp3 = Builder.CreateLShr(V,                                       ConstantInt::get(V->getType(), 24),                                       "bswap.3"); -    Value* Tmp2 = Builder.CreateLShr(V,  +    Value* Tmp2 = Builder.CreateLShr(V,                                       ConstantInt::get(V->getType(), 40),                                       "bswap.2"); -    Value* Tmp1 = Builder.CreateLShr(V,  +    Value* Tmp1 = Builder.CreateLShr(V,                                       ConstantInt::get(V->getType(), 56),                                       "bswap.1");      Tmp7 = Builder.CreateAnd(Tmp7, @@ -274,7 +274,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {    for (unsigned n = 0; n < WordSize; ++n) {      Value *PartValue = V; -    for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);  +    for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);           i <<= 1, ++ct) {        Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);        Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1"); @@ -381,7 +381,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {    case Intrinsic::siglongjmp: {      // Insert the call to abort -    ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),  +    ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),                      Type::getVoidTy(Context));      break;    } @@ -392,7 +392,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {    case Intrinsic::bswap:      CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));      break; -     +    case Intrinsic::ctlz:      CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));      break; @@ -420,7 +420,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {        CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));      break;    } -     +    case Intrinsic::get_dynamic_area_offset:      errs() << "WARNING: this target does not support the custom llvm.get."                "dynamic.area.offset.  It is being lowered to a constant 0\n"; @@ -473,7 +473,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {    case Intrinsic::assume:    case Intrinsic::var_annotation:      break;   // Strip out these intrinsics -  +    case Intrinsic::memcpy: {      Type *IntPtr = DL.getIntPtrType(Context);      Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index fea83e92de8f..417bd9d5aebe 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -340,7 +340,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,  /// address the spill location in a target independent way.  int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,                                                    unsigned &Reg) { -  assert(MI.hasOneMemOperand() &&  +  assert(MI.hasOneMemOperand() &&           "Spill instruction does not have exactly one memory operand?");    auto MMOI = MI.memoperands_begin();    const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue(); @@ -472,7 +472,7 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,    int FI;    const MachineMemOperand *MMO; -  // TODO: Handle multiple stores folded into one.  +  // TODO: Handle multiple stores folded into one.    if (!MI.hasOneMemOperand())      return false; diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 054cc97f8374..639cd80768fc 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -314,10 +314,10 @@ public:      MMI.deleteMachineFunctionFor(F);      return true;    } -   +    StringRef getPassName() const override {      return "Free MachineFunction"; -  }  +  }  };  } // end anonymous namespace diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 28e4e2c6c87a..a712afec0959 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -620,10 +620,8 @@ struct InstructionMapper {    /// queried for candidates.    ///    /// \param MBB The \p MachineBasicBlock to be translated into integers. -  /// \param TRI \p TargetRegisterInfo for the module. -  /// \param TII \p TargetInstrInfo for the module. +  /// \param TII \p TargetInstrInfo for the function.    void convertToUnsignedVec(MachineBasicBlock &MBB, -                            const TargetRegisterInfo &TRI,                              const TargetInstrInfo &TII) {      unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB); @@ -729,7 +727,6 @@ struct MachineOutliner : public ModulePass {    /// its leaf children to find the locations of its substring.    ///    /// \param ST A suffix tree to query. -  /// \param TII TargetInstrInfo for the target.    /// \param Mapper Contains outlining mapping information.    /// \param[out] CandidateList Filled with candidates representing each    /// beneficial substring. @@ -738,7 +735,7 @@ struct MachineOutliner : public ModulePass {    ///    /// \returns The length of the longest candidate found.    unsigned -  findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, +  findCandidates(SuffixTree &ST,                   InstructionMapper &Mapper,                   std::vector<std::shared_ptr<Candidate>> &CandidateList,                   std::vector<OutlinedFunction> &FunctionList); @@ -770,14 +767,12 @@ struct MachineOutliner : public ModulePass {    /// \param[out] FunctionList Filled with functions corresponding to each type    /// of \p Candidate.    /// \param ST The suffix tree for the module. -  /// \param TII TargetInstrInfo for the module.    ///    /// \returns The length of the longest candidate found. 0 if there are none.    unsigned    buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,                       std::vector<OutlinedFunction> &FunctionList, -                     SuffixTree &ST, InstructionMapper &Mapper, -                     const TargetInstrInfo &TII); +                     SuffixTree &ST, InstructionMapper &Mapper);    /// Helper function for pruneOverlaps.    /// Removes \p C from the candidate list, and updates its \p OutlinedFunction. @@ -795,11 +790,9 @@ struct MachineOutliner : public ModulePass {    /// \param[in,out] FunctionList A list of functions to be outlined.    /// \param Mapper Contains instruction mapping info for outlining.    /// \param MaxCandidateLen The length of the longest candidate. -  /// \param TII TargetInstrInfo for the module.    void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,                       std::vector<OutlinedFunction> &FunctionList, -                     InstructionMapper &Mapper, unsigned MaxCandidateLen, -                     const TargetInstrInfo &TII); +                     InstructionMapper &Mapper, unsigned MaxCandidateLen);    /// Construct a suffix tree on the instructions in \p M and outline repeated    /// strings from that tree. @@ -892,7 +885,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {  }  unsigned MachineOutliner::findCandidates( -    SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper, +    SuffixTree &ST, InstructionMapper &Mapper,      std::vector<std::shared_ptr<Candidate>> &CandidateList,      std::vector<OutlinedFunction> &FunctionList) {    CandidateList.clear(); @@ -945,7 +938,7 @@ unsigned MachineOutliner::findCandidates(          // AA (where each "A" is an instruction).          //          // We might have some portion of the module that looks like this: -        // AAAAAA (6 A's)  +        // AAAAAA (6 A's)          //          // In this case, there are 5 different copies of "AA" in this range, but          // at most 3 can be outlined. If only outlining 3 of these is going to @@ -979,8 +972,16 @@ unsigned MachineOutliner::findCandidates(      // We've found something we might want to outline.      // Create an OutlinedFunction to store it and check if it'd be beneficial      // to outline. +    if (CandidatesForRepeatedSeq.empty()) +      continue; + +    // Arbitrarily choose a TII from the first candidate. +    // FIXME: Should getOutliningCandidateInfo move to TargetMachine? +    const TargetInstrInfo *TII = +        CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo(); +      OutlinedFunction OF = -        TII.getOutliningCandidateInfo(CandidatesForRepeatedSeq); +        TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);      // If we deleted every candidate, then there's nothing to outline.      if (OF.Candidates.empty()) @@ -1036,7 +1037,7 @@ void MachineOutliner::prune(Candidate &C,  void MachineOutliner::pruneOverlaps(      std::vector<std::shared_ptr<Candidate>> &CandidateList,      std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper, -    unsigned MaxCandidateLen, const TargetInstrInfo &TII) { +    unsigned MaxCandidateLen) {    // Return true if this candidate became unbeneficial for outlining in a    // previous step. @@ -1127,13 +1128,13 @@ void MachineOutliner::pruneOverlaps(  unsigned MachineOutliner::buildCandidateList(      std::vector<std::shared_ptr<Candidate>> &CandidateList,      std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST, -    InstructionMapper &Mapper, const TargetInstrInfo &TII) { +    InstructionMapper &Mapper) {    std::vector<unsigned> CandidateSequence; // Current outlining candidate.    unsigned MaxCandidateLen = 0;            // Length of the longest candidate.    MaxCandidateLen = -      findCandidates(ST, TII, Mapper, CandidateList, FunctionList); +      findCandidates(ST, Mapper, CandidateList, FunctionList);    // Sort the candidates in decending order. This will simplify the outlining    // process when we have to remove the candidates from the mapping by @@ -1339,10 +1340,6 @@ bool MachineOutliner::runOnModule(Module &M) {      return false;    MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); -  const TargetSubtargetInfo &STI = -      MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget(); -  const TargetRegisterInfo *TRI = STI.getRegisterInfo(); -  const TargetInstrInfo *TII = STI.getInstrInfo();    // If the user passed -enable-machine-outliner=always or    // -enable-machine-outliner, the pass will run on all functions in the module. @@ -1382,6 +1379,8 @@ bool MachineOutliner::runOnModule(Module &M) {      if (!MF)        continue; +    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); +      if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))        continue; @@ -1405,7 +1404,7 @@ bool MachineOutliner::runOnModule(Module &M) {          continue;        // MBB is suitable for outlining. Map it to a list of unsigneds. -      Mapper.convertToUnsignedVec(MBB, *TRI, *TII); +      Mapper.convertToUnsignedVec(MBB, *TII);      }    } @@ -1416,10 +1415,10 @@ bool MachineOutliner::runOnModule(Module &M) {    // Find all of the outlining candidates.    unsigned MaxCandidateLen = -      buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII); +      buildCandidateList(CandidateList, FunctionList, ST, Mapper);    // Remove candidates that overlap with other candidates. -  pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII); +  pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen);    // Outline each of the candidates and return true if something was outlined.    bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper); diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 6095bdd06b69..f632a9bd457f 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -383,7 +383,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {    assert(FromReg != ToReg && "Cannot replace a reg with itself");    const TargetRegisterInfo *TRI = getTargetRegisterInfo(); -   +    // TODO: This could be more efficient by bulk changing the operands.    for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {      MachineOperand &O = *I; diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 773661965f18..542491eabbf2 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -254,14 +254,14 @@ public:    private:      MachineInstr *PHI;      unsigned idx; -  +    public:      explicit PHI_iterator(MachineInstr *P) // begin iterator        : PHI(P), idx(1) {}      PHI_iterator(MachineInstr *P, bool) // end iterator        : PHI(P), idx(PHI->getNumOperands()) {} -    PHI_iterator &operator++() { idx += 2; return *this; }  +    PHI_iterator &operator++() { idx += 2; return *this; }      bool operator==(const PHI_iterator& x) const { return idx == x.idx; }      bool operator!=(const PHI_iterator& x) const { return !operator==(x); } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 354f46e9e625..1fd40f757351 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -509,7 +509,7 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,    }    ToSplit.insert(std::make_pair(FromBB, ToBB)); -   +    return true;  } diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index b444cd31eba2..79ca6adf95c4 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -655,7 +655,7 @@ static bool getDataDeps(const MachineInstr &UseMI,    // Debug values should not be included in any calculations.    if (UseMI.isDebugInstr())      return false; -   +    bool HasPhysRegs = false;    for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(),         E = UseMI.operands_end(); I != E; ++I) { @@ -1167,7 +1167,7 @@ MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {      computeInstrDepths(MBB);    if (!TBI.HasValidInstrHeights)      computeInstrHeights(MBB); -   +    return Trace(*this, TBI);  } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index d644e41abc5b..318776136e24 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -1077,8 +1077,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {      auto VerifyStackMapConstant = [&](unsigned Offset) {        if (!MI->getOperand(Offset).isImm() || -          MI->getOperand(Offset).getImm() != StackMaps::ConstantOp ||  -          !MI->getOperand(Offset + 1).isImm())  +          MI->getOperand(Offset).getImm() != StackMaps::ConstantOp || +          !MI->getOperand(Offset + 1).isImm())          report("stack map constant to STATEPOINT not well formed!", MI);      };      const unsigned VarStart = StatepointOpers(MI).getVarIdx(); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index a878c34f9aa4..3660586c1358 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -594,7 +594,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,      MachineBasicBlock::iterator ReloadAfter =        RestoreAfter ? std::next(MBBI) : MBBI;      MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); -    LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); +    if (ReloadBefore != MBB.end()) +      LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');      ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);      Scavenged.Restore = &*std::prev(SpillBefore);      LiveUnits.removeReg(Reg); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7a99687757f8..a8c4b85df321 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -72,7 +72,6 @@  #include <string>  #include <tuple>  #include <utility> -#include <vector>  using namespace llvm; @@ -483,9 +482,6 @@ namespace {      /// returns false.      bool findBetterNeighborChains(StoreSDNode *St); -    /// Match "(X shl/srl V1) & V2" where V2 may not be present. -    bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); -      /// Holds a pointer to an LSBaseSDNode as well as information on where it      /// is located in a sequence of memory operations connected by a chain.      struct MemOpLink { @@ -2671,6 +2667,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {      return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),                         N0.getOperand(1).getOperand(0)); +  // fold (A-(B-C)) -> A+(C-B) +  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse()) +    return DAG.getNode(ISD::ADD, DL, VT, N0, +                       DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), +                                   N1.getOperand(0))); +    // fold (X - (-Y * Z)) -> (X + (Y * Z))    if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {      if (N1.getOperand(0).getOpcode() == ISD::SUB && @@ -2740,6 +2742,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {      }    } +  // Prefer an add for more folding potential and possibly better codegen: +  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) +  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { +    SDValue ShAmt = N1.getOperand(1); +    ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); +    if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) { +      SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); +      return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); +    } +  } +    return SDValue();  } @@ -4205,8 +4218,8 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,      // Allow one node which will masked along with any loads found.      if (NodeToMask)        return false; -  -    // Also ensure that the node to be masked only produces one data result.  + +    // Also ensure that the node to be masked only produces one data result.      NodeToMask = Op.getNode();      if (NodeToMask->getNumValues() > 1) {        bool HasValue = false; @@ -5148,25 +5161,140 @@ SDValue DAGCombiner::visitOR(SDNode *N) {    return SDValue();  } -/// Match "(X shl/srl V1) & V2" where V2 may not be present. -bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { -  if (Op.getOpcode() == ISD::AND) { -    if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { -      Mask = Op.getOperand(1); -      Op = Op.getOperand(0); -    } else { -      return false; -    } +static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { +  if (Op.getOpcode() == ISD::AND && +      DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { +    Mask = Op.getOperand(1); +    return Op.getOperand(0);    } +  return Op; +} +/// Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, +                            SDValue &Mask) { +  Op = stripConstantMask(DAG, Op, Mask);    if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {      Shift = Op;      return true;    } -    return false;  } +/// Helper function for visitOR to extract the needed side of a rotate idiom +/// from a shl/srl/mul/udiv.  This is meant to handle cases where +/// InstCombine merged some outside op with one of the shifts from +/// the rotate pattern. +/// \returns An empty \c SDValue if the needed shift couldn't be extracted. +/// Otherwise, returns an expansion of \p ExtractFrom based on the following +/// patterns: +/// +///   (or (mul v c0) (shrl (mul v c1) c2)): +///     expands (mul v c0) -> (shl (mul v c1) c3) +/// +///   (or (udiv v c0) (shl (udiv v c1) c2)): +///     expands (udiv v c0) -> (shrl (udiv v c1) c3) +/// +///   (or (shl v c0) (shrl (shl v c1) c2)): +///     expands (shl v c0) -> (shl (shl v c1) c3) +/// +///   (or (shrl v c0) (shl (shrl v c1) c2)): +///     expands (shrl v c0) -> (shrl (shrl v c1) c3) +/// +/// Such that in all cases, c3+c2==bitwidth(op v c1). +static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, +                                     SDValue ExtractFrom, SDValue &Mask, +                                     const SDLoc &DL) { +  assert(OppShift && ExtractFrom && "Empty SDValue"); +  assert( +      (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && +      "Existing shift must be valid as a rotate half"); + +  ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); +  // Preconditions: +  //    (or (op0 v c0) (shiftl/r (op0 v c1) c2)) +  // +  // Find opcode of the needed shift to be extracted from (op0 v c0). +  unsigned Opcode = ISD::DELETED_NODE; +  bool IsMulOrDiv = false; +  // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift +  // opcode or its arithmetic (mul or udiv) variant. +  auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) { +    IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant; +    if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift) +      return false; +    Opcode = NeededShift; +    return true; +  }; +  // op0 must be either the needed shift opcode or the mul/udiv equivalent +  // that the needed shift can be extracted from. +  if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) && +      (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV))) +    return SDValue(); + +  // op0 must be the same opcode on both sides, have the same LHS argument, +  // and produce the same value type. +  SDValue OppShiftLHS = OppShift.getOperand(0); +  EVT ShiftedVT = OppShiftLHS.getValueType(); +  if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || +      OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || +      ShiftedVT != ExtractFrom.getValueType()) +    return SDValue(); + +  // Amount of the existing shift. +  ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); +  // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. +  ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); +  // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. +  ConstantSDNode *ExtractFromCst = +      isConstOrConstSplat(ExtractFrom.getOperand(1)); +  // TODO: We should be able to handle non-uniform constant vectors for these values +  // Check that we have constant values. +  if (!OppShiftCst || !OppShiftCst->getAPIntValue() || +      !OppLHSCst || !OppLHSCst->getAPIntValue() || +      !ExtractFromCst || !ExtractFromCst->getAPIntValue()) +    return SDValue(); + +  // Compute the shift amount we need to extract to complete the rotate. +  const unsigned VTWidth = ShiftedVT.getScalarSizeInBits(); +  APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue(); +  if (NeededShiftAmt.isNegative()) +    return SDValue(); +  // Normalize the bitwidth of the two mul/udiv/shift constant operands. +  APInt ExtractFromAmt = ExtractFromCst->getAPIntValue(); +  APInt OppLHSAmt = OppLHSCst->getAPIntValue(); +  zeroExtendToMatch(ExtractFromAmt, OppLHSAmt); + +  // Now try extract the needed shift from the ExtractFrom op and see if the +  // result matches up with the existing shift's LHS op. +  if (IsMulOrDiv) { +    // Op to extract from is a mul or udiv by a constant. +    // Check: +    //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0 +    //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0 +    const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(), +                                                 NeededShiftAmt.getZExtValue()); +    APInt ResultAmt; +    APInt Rem; +    APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem); +    if (Rem != 0 || ResultAmt != OppLHSAmt) +      return SDValue(); +  } else { +    // Op to extract from is a shift by a constant. +    // Check: +    //      c2 - (bitwidth(op0 v c0) - c1) == c0 +    if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc( +                                          ExtractFromAmt.getBitWidth())) +      return SDValue(); +  } + +  // Return the expanded shift op that should allow a rotate to be formed. +  EVT ShiftVT = OppShift.getOperand(1).getValueType(); +  EVT ResVT = ExtractFrom.getValueType(); +  SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT); +  return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode); +} +  // Return true if we can prove that, whenever Neg and Pos are both in the  // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that  // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: @@ -5333,13 +5461,40 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {    // Match "(X shl/srl V1) & V2" where V2 may not be present.    SDValue LHSShift;   // The shift.    SDValue LHSMask;    // AND value if any. -  if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) -    return nullptr; // Not part of a rotate. +  matchRotateHalf(DAG, LHS, LHSShift, LHSMask);    SDValue RHSShift;   // The shift.    SDValue RHSMask;    // AND value if any. -  if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) -    return nullptr; // Not part of a rotate. +  matchRotateHalf(DAG, RHS, RHSShift, RHSMask); + +  // If neither side matched a rotate half, bail +  if (!LHSShift && !RHSShift) +    return nullptr; + +  // InstCombine may have combined a constant shl, srl, mul, or udiv with one +  // side of the rotate, so try to handle that here. In all cases we need to +  // pass the matched shift from the opposite side to compute the opcode and +  // needed shift amount to extract.  We still want to do this if both sides +  // matched a rotate half because one half may be a potential overshift that +  // can be broken down (ie if InstCombine merged two shl or srl ops into a +  // single one). + +  // Have LHS side of the rotate, try to extract the needed shift from the RHS. +  if (LHSShift) +    if (SDValue NewRHSShift = +            extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL)) +      RHSShift = NewRHSShift; +  // Have RHS side of the rotate, try to extract the needed shift from the LHS. +  if (RHSShift) +    if (SDValue NewLHSShift = +            extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL)) +      LHSShift = NewLHSShift; + +  // If a side is still missing, nothing else we can do. +  if (!RHSShift || !LHSShift) +    return nullptr; + +  // At this point we've matched or extracted a shift op on each side.    if (LHSShift.getOperand(0) != RHSShift.getOperand(0))      return nullptr;   // Not shifting the same value. @@ -10270,7 +10425,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                                   N10.getOperand(0))),                           DAG.getNode(ISD::FP_EXTEND, SL, VT,                                       N10.getOperand(1)), -                         N0, Flags);           +                         N0, Flags);      }    } @@ -10333,7 +10488,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                       N0.getOperand(2).getOperand(0),                                       N0.getOperand(2).getOperand(1),                                       DAG.getNode(ISD::FNEG, SL, VT, -                                                 N1), Flags), Flags);           +                                                 N1), Flags), Flags);      }      // fold (fsub x, (fma y, z, (fmul u, v))) @@ -10348,7 +10503,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                           N1.getOperand(1),                           DAG.getNode(PreferredFusedOpcode, SL, VT,                                       DAG.getNode(ISD::FNEG, SL, VT, N20), -                                     N21, N0, Flags), Flags);       +                                     N21, N0, Flags), Flags);      } @@ -10368,7 +10523,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                       N020.getOperand(1)),                                           DAG.getNode(ISD::FNEG, SL, VT, -                                                     N1), Flags), Flags);               +                                                     N1), Flags), Flags);          }        }      } @@ -10396,7 +10551,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                       N002.getOperand(1)),                                           DAG.getNode(ISD::FNEG, SL, VT, -                                                     N1), Flags), Flags);               +                                                     N1), Flags), Flags);          }        }      } @@ -10419,7 +10574,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                                                 VT, N1200)),                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                     N1201), -                                       N0, Flags), Flags);         +                                       N0, Flags), Flags);        }      } @@ -10450,7 +10605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                                                 VT, N1020)),                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                     N1021), -                                       N0, Flags), Flags);         +                                       N0, Flags), Flags);        }      }    } @@ -10506,7 +10661,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {                             Y, Flags);        if (XC1 && XC1->isExactlyValue(-1.0))          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, -                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);       +                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);      }      return SDValue();    }; @@ -10530,7 +10685,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {        if (XC0 && XC0->isExactlyValue(-1.0))          return DAG.getNode(PreferredFusedOpcode, SL, VT,                             DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, -                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);       +                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);        auto XC1 = isConstOrConstSplatFP(X.getOperand(1));        if (XC1 && XC1->isExactlyValue(+1.0)) @@ -10838,12 +10993,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {    if (SDValue NewSel = foldBinOpIntoSelect(N))      return NewSel; -  if (Options.UnsafeFPMath ||  +  if (Options.UnsafeFPMath ||        (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {      // fold (fmul A, 0) -> 0      if (N1CFP && N1CFP->isZero())        return N1; -  }  +  }    if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {      // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 @@ -11258,7 +11413,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {  SDValue DAGCombiner::visitFSQRT(SDNode *N) {    SDNodeFlags Flags = N->getFlags(); -  if (!DAG.getTarget().Options.UnsafeFPMath &&  +  if (!DAG.getTarget().Options.UnsafeFPMath &&        !Flags.hasApproximateFuncs())      return SDValue(); @@ -17913,9 +18068,9 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {    if (C->isNullValue())      return SDValue(); -  std::vector<SDNode *> Built; +  SmallVector<SDNode *, 8> Built;    SDValue S = -      TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); +      TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);    for (SDNode *N : Built)      AddToWorklist(N); @@ -17933,8 +18088,8 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {    if (C->isNullValue())      return SDValue(); -  std::vector<SDNode *> Built; -  SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); +  SmallVector<SDNode *, 8> Built; +  SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built);    for (SDNode *N : Built)      AddToWorklist(N); @@ -17959,9 +18114,9 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {    if (C->isNullValue())      return SDValue(); -  std::vector<SDNode *> Built; +  SmallVector<SDNode *, 8> Built;    SDValue S = -      TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); +      TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);    for (SDNode *N : Built)      AddToWorklist(N); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index e4a9d557d386..795ade588b8f 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1130,7 +1130,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {    ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys);    SmallVector<ISD::OutputArg, 4> Outs; -  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL); +  GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);    bool CanLowerReturn = TLI.CanLowerReturn(        CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext()); @@ -1548,7 +1548,7 @@ void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)  {    MachineInstr *CurLastLocalValue = getLastLocalValue();    if (CurLastLocalValue != SavedLastLocalValue) { -    // Find the first local value instruction to be deleted.  +    // Find the first local value instruction to be deleted.      // This is the instruction after SavedLastLocalValue if it is non-NULL.      // Otherwise it's the first instruction in the block.      MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue); @@ -1569,7 +1569,7 @@ bool FastISel::selectInstruction(const Instruction *I) {      if (!handlePHINodesInSuccessorBlocks(I->getParent())) {        // PHI node handling may have generated local value instructions,        // even though it failed to handle all PHI nodes. -      // We remove these instructions because SelectionDAGISel will generate  +      // We remove these instructions because SelectionDAGISel will generate        // them again.        removeDeadLocalValueCode(SavedLastLocalValue);        return false; @@ -1630,7 +1630,7 @@ bool FastISel::selectInstruction(const Instruction *I) {    DbgLoc = DebugLoc();    // Undo phi node updates, because they will be added again by SelectionDAG.    if (isa<TerminatorInst>(I)) { -    // PHI node handling may have generated local value instructions.  +    // PHI node handling may have generated local value instructions.      // We remove them because SelectionDAGISel will generate them again.      removeDeadLocalValueCode(SavedLastLocalValue);      FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 42c7181dac41..d3c31911d677 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -89,10 +89,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,    // Check whether the function can return without sret-demotion.    SmallVector<ISD::OutputArg, 4> Outs; -  GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI, +  CallingConv::ID CC = Fn->getCallingConv(); + +  GetReturnInfo(CC, Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,                  mf.getDataLayout()); -  CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, -                                       Fn->isVarArg(), Outs, Fn->getContext()); +  CanLowerReturn = +      TLI->CanLowerReturn(CC, *MF, Fn->isVarArg(), Outs, Fn->getContext());    // If this personality uses funclets, we need to do a bit more work.    DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b0ae1e0399fb..9aa0ea15f3b7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -153,7 +153,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {    // of Endianness. LLVM's APFloat representation is not Endian sensitive,    // and so always converts into a 128-bit APInt in a non-Endian-sensitive    // way. However, APInt's are serialized in an Endian-sensitive fashion, -  // so on big-Endian targets, the two doubles are output in the wrong  +  // so on big-Endian targets, the two doubles are output in the wrong    // order. Fix this by manually flipping the order of the high 64 bits    // and the low 64 bits here.    if (DAG.getDataLayout().isBigEndian() && @@ -815,7 +815,7 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {    switch (N->getOpcode()) {      case ISD::ConstantFP:  // Leaf node. -    case ISD::CopyFromReg: // Operand is a register that we know to be left  +    case ISD::CopyFromReg: // Operand is a register that we know to be left                             // unchanged by SoftenFloatResult().      case ISD::Register:    // Leaf node.        return true; @@ -838,7 +838,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) {    if (N->getNumOperands() == 3)      return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); -  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,  +  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,                                          N->getOperand(3)),                   0);  } @@ -1898,7 +1898,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {      case ISD::FROUND:      case ISD::FSIN:      case ISD::FSQRT: -    case ISD::FTRUNC:     R = PromoteFloatRes_UnaryOp(N); break; +    case ISD::FTRUNC: +    case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;      // Binary FP Operations      case ISD::FADD: diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2c6b1ee7900f..135922d6f267 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -510,7 +510,7 @@ private:    SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);    // Return true if we can skip softening the given operand or SDNode because -  // either it was soften before by SoftenFloatResult and references to the  +  // either it was soften before by SoftenFloatResult and references to the    // operand were replaced by ReplaceValueWith or it's value type is legal in HW    // registers and the operand can be left unchanged.    bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 67928d4bdbd5..3a98a7a904cb 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -131,7 +131,7 @@ class VectorLegalizer {    SDValue ExpandCTLZ(SDValue Op);    SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);    SDValue ExpandStrictFPOp(SDValue Op); -   +    /// Implements vector promotion.    ///    /// This is essentially just bitcasting the operands to a different type and @@ -315,7 +315,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {      // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT      // is also legal, but if ISD::FSQRT requires expansion then so does      // ISD::STRICT_FSQRT. -    Action = TLI.getStrictFPOperationAction(Node->getOpcode(),  +    Action = TLI.getStrictFPOperationAction(Node->getOpcode(),                                              Node->getValueType(0));      break;    case ISD::ADD: @@ -397,12 +397,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {      Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));      break;    case ISD::FP_ROUND_INREG: -    Action = TLI.getOperationAction(Node->getOpcode(),  +    Action = TLI.getOperationAction(Node->getOpcode(),                 cast<VTSDNode>(Node->getOperand(1))->getVT());      break;    case ISD::SINT_TO_FP:    case ISD::UINT_TO_FP: -    Action = TLI.getOperationAction(Node->getOpcode(),  +    Action = TLI.getOperationAction(Node->getOpcode(),                                      Node->getOperand(0).getValueType());      break;    case ISD::MSCATTER: @@ -736,7 +736,7 @@ SDValue VectorLegalizer::Expand(SDValue Op) {    case ISD::CTTZ_ZERO_UNDEF:      return ExpandCTTZ_ZERO_UNDEF(Op);    case ISD::STRICT_FADD: -  case ISD::STRICT_FSUB:  +  case ISD::STRICT_FSUB:    case ISD::STRICT_FMUL:    case ISD::STRICT_FDIV:    case ISD::STRICT_FSQRT: @@ -1153,24 +1153,24 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {    SmallVector<SDValue, 32> OpChains;    for (unsigned i = 0; i < NumElems; ++i) {      SmallVector<SDValue, 4> Opers; -    SDValue Idx = DAG.getConstant(i, dl,  +    SDValue Idx = DAG.getConstant(i, dl,                                    TLI.getVectorIdxTy(DAG.getDataLayout()));      // The Chain is the first operand.      Opers.push_back(Chain); -    // Now process the remaining operands.  +    // Now process the remaining operands.      for (unsigned j = 1; j < NumOpers; ++j) {        SDValue Oper = Op.getOperand(j);        EVT OperVT = Oper.getValueType();        if (OperVT.isVector()) -        Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,  +        Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,                             EltVT, Oper, Idx);        Opers.push_back(Oper);      } -  +      SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);      OpValues.push_back(ScalarOp.getValue(0)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1cd43ace48f3..f5d9dd234afd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1068,14 +1068,14 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,    OpsLo.push_back(Chain);    OpsHi.push_back(Chain); -  // Now process the remaining operands.  +  // Now process the remaining operands.    for (unsigned i = 1; i < NumOps; ++i) { -    SDValue Op = N->getOperand(i);  -    SDValue OpLo = Op;  -    SDValue OpHi = Op;    +    SDValue Op = N->getOperand(i); +    SDValue OpLo = Op; +    SDValue OpHi = Op;      EVT InVT = Op.getValueType(); -    if (InVT.isVector()) {  +    if (InVT.isVector()) {        // If the input also splits, handle it directly for a        // compile time speedup. Otherwise split it by hand.        if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) @@ -1092,10 +1092,10 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,    EVT HiValueVTs[] = {HiVT, MVT::Other};    Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo);    Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); -   +    // Build a factor node to remember that this Op is independent of the    // other one. -  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,  +  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,                        Lo.getValue(1), Hi.getValue(1));    // Legalize the chain result - switch anything that used the old chain to diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1aa8df29af3b..5f6b6010cae2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -157,31 +157,36 @@ static cl::opt<unsigned> SwitchPeelThreshold(  // store [4096 x i8] %data, [4096 x i8]* %buffer  static const unsigned MaxParallelChains = 64; -// True if the Value passed requires ABI mangling as it is a parameter to a -// function or a return value from a function which is not an intrinsic. -static bool isABIRegCopy(const Value *V) { -  const bool IsRetInst = V && isa<ReturnInst>(V); -  const bool IsCallInst = V && isa<CallInst>(V); -  const bool IsInLineAsm = -      IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm(); -  const bool IsIndirectFunctionCall = -      IsCallInst && !IsInLineAsm && -      !static_cast<const CallInst *>(V)->getCalledFunction(); -  // It is possible that the call instruction is an inline asm statement or an -  // indirect function call in which case the return value of -  // getCalledFunction() would be nullptr. -  const bool IsInstrinsicCall = -      IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall && -      static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() != -          Intrinsic::not_intrinsic; - -  return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall)); +// Return the calling convention if the Value passed requires ABI mangling as it +// is a parameter to a function or a return value from a function which is not +// an intrinsic. +static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) { +  if (auto *R = dyn_cast<ReturnInst>(V)) +    return R->getParent()->getParent()->getCallingConv(); + +  if (auto *CI = dyn_cast<CallInst>(V)) { +    const bool IsInlineAsm = CI->isInlineAsm(); +    const bool IsIndirectFunctionCall = +        !IsInlineAsm && !CI->getCalledFunction(); + +    // It is possible that the call instruction is an inline asm statement or an +    // indirect function call in which case the return value of +    // getCalledFunction() would be nullptr. +    const bool IsInstrinsicCall = +        !IsInlineAsm && !IsIndirectFunctionCall && +        CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic; + +    if (!IsInlineAsm && !IsInstrinsicCall) +      return CI->getCallingConv(); +  } + +  return None;  }  static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,                                        const SDValue *Parts, unsigned NumParts,                                        MVT PartVT, EVT ValueVT, const Value *V, -                                      bool IsABIRegCopy); +                                      Optional<CallingConv::ID> CC);  /// getCopyFromParts - Create a value that contains the specified legal parts  /// combined into the value they represent.  If the parts combine to a type @@ -191,11 +196,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,  static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,                                  const SDValue *Parts, unsigned NumParts,                                  MVT PartVT, EVT ValueVT, const Value *V, -                                Optional<ISD::NodeType> AssertOp = None, -                                bool IsABIRegCopy = false) { +                                Optional<CallingConv::ID> CC = None, +                                Optional<ISD::NodeType> AssertOp = None) {    if (ValueVT.isVector()) -    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, -                                  PartVT, ValueVT, V, IsABIRegCopy); +    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V, +                                  CC);    assert(NumParts > 0 && "No parts to assemble!");    const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -236,8 +241,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,          // Assemble the trailing non-power-of-2 part.          unsigned OddParts = NumParts - RoundParts;          EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); -        Hi = getCopyFromParts(DAG, DL, -                              Parts + RoundParts, OddParts, PartVT, OddVT, V); +        Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, +                              OddVT, V, CC);          // Combine the round and odd parts.          Lo = Val; @@ -267,7 +272,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,        assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&               !PartVT.isVector() && "Unexpected split");        EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); -      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); +      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);      }    } @@ -340,9 +345,11 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,  static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,                                        const SDValue *Parts, unsigned NumParts,                                        MVT PartVT, EVT ValueVT, const Value *V, -                                      bool IsABIRegCopy) { +                                      Optional<CallingConv::ID> CallConv) {    assert(ValueVT.isVector() && "Not a vector value");    assert(NumParts > 0 && "No parts to assemble!"); +  const bool IsABIRegCopy = CallConv.hasValue(); +    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    SDValue Val = Parts[0]; @@ -355,8 +362,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,      if (IsABIRegCopy) {        NumRegs = TLI.getVectorTypeBreakdownForCallingConv( -          *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, -          RegisterVT); +          *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, +          NumIntermediates, RegisterVT);      } else {        NumRegs =            TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -470,7 +477,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,  static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,                                   SDValue Val, SDValue *Parts, unsigned NumParts, -                                 MVT PartVT, const Value *V, bool IsABIRegCopy); +                                 MVT PartVT, const Value *V, +                                 Optional<CallingConv::ID> CallConv);  /// getCopyToParts - Create a series of nodes that contain the specified value  /// split into legal parts.  If the parts contain more bits than Val, then, for @@ -478,14 +486,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,  static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,                             SDValue *Parts, unsigned NumParts, MVT PartVT,                             const Value *V, -                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND, -                           bool IsABIRegCopy = false) { +                           Optional<CallingConv::ID> CallConv = None, +                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {    EVT ValueVT = Val.getValueType();    // Handle the vector case separately.    if (ValueVT.isVector())      return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, -                                IsABIRegCopy); +                                CallConv);    unsigned PartBits = PartVT.getSizeInBits();    unsigned OrigNumParts = NumParts; @@ -564,7 +572,8 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,      unsigned OddParts = NumParts - RoundParts;      SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,                                   DAG.getIntPtrConstant(RoundBits, DL)); -    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); +    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V, +                   CallConv);      if (DAG.getDataLayout().isBigEndian())        // The odd parts were reversed by getCopyToParts - unreverse them. @@ -605,16 +614,16 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,      std::reverse(Parts, Parts + OrigNumParts);  } -  /// getCopyToPartsVector - Create a series of nodes that contain the specified  /// value split into legal parts.  static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,                                   SDValue Val, SDValue *Parts, unsigned NumParts,                                   MVT PartVT, const Value *V, -                                 bool IsABIRegCopy) { +                                 Optional<CallingConv::ID> CallConv) {    EVT ValueVT = Val.getValueType();    assert(ValueVT.isVector() && "Not a vector");    const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +  const bool IsABIRegCopy = CallConv.hasValue();    if (NumParts == 1) {      EVT PartEVT = PartVT; @@ -679,8 +688,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,    unsigned NumRegs;    if (IsABIRegCopy) {      NumRegs = TLI.getVectorTypeBreakdownForCallingConv( -        *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, -        RegisterVT); +        *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, +        NumIntermediates, RegisterVT);    } else {      NumRegs =          TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -720,7 +729,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,      // If the register was not expanded, promote or copy the value,      // as appropriate.      for (unsigned i = 0; i != NumParts; ++i) -      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); +      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);    } else if (NumParts > 0) {      // If the intermediate type was expanded, split each the value into      // legal parts. @@ -729,29 +738,32 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,             "Must expand into a divisible number of parts!");      unsigned Factor = NumParts / NumIntermediates;      for (unsigned i = 0; i != NumIntermediates; ++i) -      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); +      getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V, +                     CallConv);    }  }  RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, -                           EVT valuevt, bool IsABIMangledValue) +                           EVT valuevt, Optional<CallingConv::ID> CC)      : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), -      RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {} +      RegCount(1, regs.size()), CallConv(CC) {}  RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,                             const DataLayout &DL, unsigned Reg, Type *Ty, -                           bool IsABIMangledValue) { +                           Optional<CallingConv::ID> CC) {    ComputeValueVTs(TLI, DL, Ty, ValueVTs); -  IsABIMangled = IsABIMangledValue; +  CallConv = CC;    for (EVT ValueVT : ValueVTs) { -    unsigned NumRegs = IsABIMangledValue -                           ? TLI.getNumRegistersForCallingConv(Context, ValueVT) -                           : TLI.getNumRegisters(Context, ValueVT); -    MVT RegisterVT = IsABIMangledValue -                         ? TLI.getRegisterTypeForCallingConv(Context, ValueVT) -                         : TLI.getRegisterType(Context, ValueVT); +    unsigned NumRegs = +        isABIMangled() +            ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT) +            : TLI.getNumRegisters(Context, ValueVT); +    MVT RegisterVT = +        isABIMangled() +            ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT) +            : TLI.getRegisterType(Context, ValueVT);      for (unsigned i = 0; i != NumRegs; ++i)        Regs.push_back(Reg + i);      RegVTs.push_back(RegisterVT); @@ -777,9 +789,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,      // Copy the legal parts from the registers.      EVT ValueVT = ValueVTs[Value];      unsigned NumRegs = RegCount[Value]; -    MVT RegisterVT = IsABIMangled -      ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) -      : RegVTs[Value]; +    MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( +                                          *DAG.getContext(), +                                          CallConv.getValue(), RegVTs[Value]) +                                    : RegVTs[Value];      Parts.resize(NumRegs);      for (unsigned i = 0; i != NumRegs; ++i) { @@ -837,8 +850,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,                               RegisterVT, P, DAG.getValueType(FromVT));      } -    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), -                                     NumRegs, RegisterVT, ValueVT, V); +    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs, +                                     RegisterVT, ValueVT, V, CallConv);      Part += NumRegs;      Parts.clear();    } @@ -859,15 +872,16 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,    for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {      unsigned NumParts = RegCount[Value]; -    MVT RegisterVT = IsABIMangled -      ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) -      : RegVTs[Value]; +    MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( +                                          *DAG.getContext(), +                                          CallConv.getValue(), RegVTs[Value]) +                                    : RegVTs[Value];      if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))        ExtendKind = ISD::ZERO_EXTEND; -    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), -                   &Parts[Part], NumParts, RegisterVT, V, ExtendKind); +    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], +                   NumParts, RegisterVT, V, CallConv, ExtendKind);      Part += NumParts;    } @@ -1164,7 +1178,7 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {      unsigned InReg = It->second;      RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), -                     DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V)); +                     DAG.getDataLayout(), InReg, Ty, getABIRegCopyCC(V));      SDValue Chain = DAG.getEntryNode();      Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,                                   V); @@ -1355,7 +1369,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {      unsigned InReg = FuncInfo.InitializeRegForValue(Inst);      RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, -                     Inst->getType(), isABIRegCopy(V)); +                     Inst->getType(), getABIRegCopyCC(V));      SDValue Chain = DAG.getEntryNode();      return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);    } @@ -1589,12 +1603,14 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {          if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())            VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); -        unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT); -        MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT); +        CallingConv::ID CC = F->getCallingConv(); + +        unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT); +        MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);          SmallVector<SDValue, 4> Parts(NumParts);          getCopyToParts(DAG, getCurSDLoc(),                         SDValue(RetOp.getNode(), RetOp.getResNo() + j), -                       &Parts[0], NumParts, PartVT, &I, ExtendKind, true); +                       &Parts[0], NumParts, PartVT, &I, CC, ExtendKind);          // 'inreg' on function refers to return value          ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -4929,7 +4945,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(      if (VMI != FuncInfo.ValueMap.end()) {        const auto &TLI = DAG.getTargetLoweringInfo();        RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, -                       V->getType(), isABIRegCopy(V)); +                       V->getType(), getABIRegCopyCC(V));        if (RFV.occupiesMultipleRegs()) {          unsigned Offset = 0;          for (auto RegAndSize : RFV.getRegsAndSizes()) { @@ -4971,7 +4987,7 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,                                               unsigned DbgSDNodeOrder) {    if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {      // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe -    // stack slot locations.  +    // stack slot locations.      //      // Consider "int x = 0; int *px = &x;". There are two kinds of interesting      // debug values here after optimization: @@ -5288,7 +5304,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {          // The PHI node may be split up into several MI PHI nodes (in          // FunctionLoweringInfo::set).          RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, -                         V->getType(), false); +                         V->getType(), None);          if (RFV.occupiesMultipleRegs()) {            unsigned Offset = 0;            unsigned BitsToDescribe = 0; @@ -7182,10 +7198,11 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,  /// uses features that we can't model on machineinstrs, we have SDISel do the  /// allocation.  This produces generally horrible, but correct, code.  /// -///   OpInfo describes the operand. +///   OpInfo describes the operand +///   RefOpInfo describes the matching operand if any, the operand otherwise  static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, -                                 const SDLoc &DL, -                                 SDISelAsmOperandInfo &OpInfo) { +                                 const SDLoc &DL, SDISelAsmOperandInfo &OpInfo, +                                 SDISelAsmOperandInfo &RefOpInfo) {    LLVMContext &Context = *DAG.getContext();    MachineFunction &MF = DAG.getMachineFunction(); @@ -7195,8 +7212,8 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,    // If this is a constraint for a single physreg, or a constraint for a    // register class, find it.    std::pair<unsigned, const TargetRegisterClass *> PhysReg = -      TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode, -                                       OpInfo.ConstraintVT); +      TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode, +                                       RefOpInfo.ConstraintVT);    unsigned NumRegs = 1;    if (OpInfo.ConstraintVT != MVT::Other) { @@ -7238,6 +7255,11 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,      NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);    } +  // No need to allocate a matching input constraint since the constraint it's +  // matching to has already been allocated. +  if (OpInfo.isMatchingInputConstraint()) +    return; +    MVT RegVT;    EVT ValueVT = OpInfo.ConstraintVT; @@ -7486,19 +7508,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {      // If this constraint is for a specific register, allocate it before      // anything else. -    if (OpInfo.ConstraintType == TargetLowering::C_Register) -      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); +    SDISelAsmOperandInfo &RefOpInfo = +        OpInfo.isMatchingInputConstraint() +            ? ConstraintOperands[OpInfo.getMatchedOperand()] +            : ConstraintOperands[i]; +    if (RefOpInfo.ConstraintType == TargetLowering::C_Register) +      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);    }    // Third pass - Loop over all of the operands, assigning virtual or physregs    // to register class operands.    for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {      SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; +    SDISelAsmOperandInfo &RefOpInfo = +        OpInfo.isMatchingInputConstraint() +            ? ConstraintOperands[OpInfo.getMatchedOperand()] +            : ConstraintOperands[i];      // C_Register operands have already been allocated, Other/Memory don't need      // to be. -    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) -      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); +    if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass) +      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);    }    // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -8289,7 +8319,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {    }    SmallVector<ISD::OutputArg, 4> Outs; -  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); +  GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);    bool CanLowerReturn =        this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), @@ -8305,7 +8335,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {      unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);      MachineFunction &MF = CLI.DAG.getMachineFunction();      DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); -    Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); +    Type *StackSlotPtrType = PointerType::get(CLI.RetTy, +                                              DL.getAllocaAddrSpace());      DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));      ArgListEntry Entry; @@ -8331,10 +8362,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {    } else {      for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {        EVT VT = RetTys[I]; -      MVT RegisterVT = -          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); -      unsigned NumRegs = -          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); +      MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), +                                                     CLI.CallConv, VT); +      unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), +                                                       CLI.CallConv, VT);        for (unsigned i = 0; i != NumRegs; ++i) {          ISD::InputArg MyFlags;          MyFlags.VT = RegisterVT; @@ -8443,9 +8474,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {          Flags.setInConsecutiveRegs();        Flags.setOrigAlign(OriginalAlignment); -      MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); -      unsigned NumParts = -          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); +      MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), +                                                 CLI.CallConv, VT); +      unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(), +                                                        CLI.CallConv, VT);        SmallVector<SDValue, 4> Parts(NumParts);        ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -8477,7 +8509,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {        }        getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, -                     CLI.CS.getInstruction(), ExtendKind, true); +                     CLI.CS.getInstruction(), CLI.CallConv, ExtendKind);        for (unsigned j = 0; j != NumParts; ++j) {          // if it isn't first piece, alignment must be 1 @@ -8577,14 +8609,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {      unsigned CurReg = 0;      for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {        EVT VT = RetTys[I]; -      MVT RegisterVT = -          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); -      unsigned NumRegs = -          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); +      MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), +                                                     CLI.CallConv, VT); +      unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), +                                                       CLI.CallConv, VT);        ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],                                                NumRegs, RegisterVT, VT, nullptr, -                                              AssertOp, true)); +                                              CLI.CallConv, AssertOp));        CurReg += NumRegs;      } @@ -8623,8 +8655,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {    // If this is an InlineAsm we have to match the registers required, not the    // notional registers required by the type. -  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, -                   V->getType(), isABIRegCopy(V)); +  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), +                   getABIRegCopyCC(V));    SDValue Chain = DAG.getEntryNode();    ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -8937,10 +8969,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {        if (ArgCopyElisionCandidates.count(&Arg))          Flags.setCopyElisionCandidate(); -      MVT RegisterVT = -          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); -      unsigned NumRegs = -          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); +      MVT RegisterVT = TLI->getRegisterTypeForCallingConv( +          *CurDAG->getContext(), F.getCallingConv(), VT); +      unsigned NumRegs = TLI->getNumRegistersForCallingConv( +          *CurDAG->getContext(), F.getCallingConv(), VT);        for (unsigned i = 0; i != NumRegs; ++i) {          ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,                                ArgNo, PartBase+i*RegisterVT.getStoreSize()); @@ -8995,8 +9027,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {      MVT VT = ValueVTs[0].getSimpleVT();      MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);      Optional<ISD::NodeType> AssertOp = None; -    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, -                                        RegVT, VT, nullptr, AssertOp); +    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, +                                        nullptr, F.getCallingConv(), AssertOp);      MachineFunction& MF = SDB->DAG.getMachineFunction();      MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -9046,10 +9078,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {      for (unsigned Val = 0; Val != NumValues; ++Val) {        EVT VT = ValueVTs[Val]; -      MVT PartVT = -          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); -      unsigned NumParts = -          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); +      MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), +                                                      F.getCallingConv(), VT); +      unsigned NumParts = TLI->getNumRegistersForCallingConv( +          *CurDAG->getContext(), F.getCallingConv(), VT);        // Even an apparant 'unused' swifterror argument needs to be returned. So        // we do generate a copy for it that can be used on return from the @@ -9062,8 +9094,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {            AssertOp = ISD::AssertZext;          ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, -                                             PartVT, VT, nullptr, AssertOp, -                                             true)); +                                             PartVT, VT, nullptr, +                                             F.getCallingConv(), AssertOp));        }        i += NumParts; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index e421984b8af2..4b5dda982f1b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1015,14 +1015,18 @@ struct RegsForValue {    /// Records if this value needs to be treated in an ABI dependant manner,    /// different to normal type legalization. -  bool IsABIMangled = false; +  Optional<CallingConv::ID> CallConv;    RegsForValue() = default;    RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, -               bool IsABIMangledValue = false); +               Optional<CallingConv::ID> CC = None);    RegsForValue(LLVMContext &Context, const TargetLowering &TLI,                 const DataLayout &DL, unsigned Reg, Type *Ty, -               bool IsABIMangledValue = false); +               Optional<CallingConv::ID> CC); + +  bool isABIMangled() const { +    return CallConv.hasValue(); +  }    /// Add the specified values to this one.    void append(const RegsForValue &RHS) { diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 5cf06e62b80c..54cbd6859f70 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -419,10 +419,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,                                                    Builder.getFrameIndexTy()));    } else if (LiveInOnly) {      // If this value is live in (not live-on-return, or live-through), we can -    // treat it the same way patchpoint treats it's "live in" values.  We'll  -    // end up folding some of these into stack references, but they'll be  +    // treat it the same way patchpoint treats it's "live in" values.  We'll +    // end up folding some of these into stack references, but they'll be      // handled by the register allocator.  Note that we do not have the notion -    // of a late use so these values might be placed in registers which are  +    // of a late use so these values might be placed in registers which are      // clobbered by the call.  This is fine for live-in.      Ops.push_back(Incoming);    } else { @@ -498,7 +498,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,    auto isGCValue =[&](const Value *V) {      return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);    }; -   +    // Before we actually start lowering (and allocating spill slots for values),    // reserve any stack slots which we judge to be profitable to reuse for a    // particular value.  This is purely an optimization over the code below and @@ -861,7 +861,8 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,        //       completely and make statepoint call to return a tuple.        unsigned Reg = FuncInfo.CreateRegs(RetTy);        RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), -                       DAG.getDataLayout(), Reg, RetTy, true); +                       DAG.getDataLayout(), Reg, RetTy, +                       ISP.getCallSite().getCallingConv());        SDValue Chain = DAG.getEntryNode();        RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index fa867fcec366..e317268fa5f4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3421,7 +3421,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,  /// with the multiplicative inverse of the constant.  static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,                                const SDLoc &dl, SelectionDAG &DAG, -                              std::vector<SDNode *> &Created) { +                              SmallVectorImpl<SDNode *> &Created) {    assert(d != 0 && "Division by zero!");    // Shift the value upfront if it is even, so the LSB is one. @@ -3450,8 +3450,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,  }  SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, -                                      SelectionDAG &DAG, -                                      std::vector<SDNode *> *Created) const { +                                     SelectionDAG &DAG, +                                     SmallVectorImpl<SDNode *> &Created) const {    AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    if (TLI.isIntDivCheap(N->getValueType(0), Attr)) @@ -3465,9 +3465,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,  /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".  SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,                                    SelectionDAG &DAG, bool IsAfterLegalization, -                                  std::vector<SDNode *> *Created) const { -  assert(Created && "No vector to hold sdiv ops."); - +                                  SmallVectorImpl<SDNode *> &Created) const {    EVT VT = N->getValueType(0);    SDLoc dl(N); @@ -3478,7 +3476,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,    // If the sdiv has an 'exact' bit we can use a simpler lowering.    if (N->getFlags().hasExact()) -    return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created); +    return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, Created);    APInt::ms magics = Divisor.magic(); @@ -3496,15 +3494,18 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,                                DAG.getConstant(magics.m, dl, VT)).getNode(), 1);    else      return SDValue();       // No mulhs or equvialent + +  Created.push_back(Q.getNode()); +    // If d > 0 and m < 0, add the numerator    if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {      Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); -    Created->push_back(Q.getNode()); +    Created.push_back(Q.getNode());    }    // If d < 0 and m > 0, subtract the numerator.    if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {      Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); -    Created->push_back(Q.getNode()); +    Created.push_back(Q.getNode());    }    auto &DL = DAG.getDataLayout();    // Shift right algebraic if shift value is nonzero @@ -3512,14 +3513,14 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,      Q = DAG.getNode(          ISD::SRA, dl, VT, Q,          DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); -    Created->push_back(Q.getNode()); +    Created.push_back(Q.getNode());    }    // Extract the sign bit and add it to the quotient    SDValue T =        DAG.getNode(ISD::SRL, dl, VT, Q,                    DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,                                    getShiftAmountTy(Q.getValueType(), DL))); -  Created->push_back(T.getNode()); +  Created.push_back(T.getNode());    return DAG.getNode(ISD::ADD, dl, VT, Q, T);  } @@ -3529,9 +3530,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,  /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".  SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,                                    SelectionDAG &DAG, bool IsAfterLegalization, -                                  std::vector<SDNode *> *Created) const { -  assert(Created && "No vector to hold udiv ops."); - +                                  SmallVectorImpl<SDNode *> &Created) const {    EVT VT = N->getValueType(0);    SDLoc dl(N);    auto &DL = DAG.getDataLayout(); @@ -3554,7 +3553,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,      Q = DAG.getNode(          ISD::SRL, dl, VT, Q,          DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL))); -    Created->push_back(Q.getNode()); +    Created.push_back(Q.getNode());      // Get magic number for the shifted divisor.      magics = Divisor.lshr(Shift).magicu(Shift); @@ -3573,7 +3572,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,    else      return SDValue();       // No mulhu or equivalent -  Created->push_back(Q.getNode()); +  Created.push_back(Q.getNode());    if (magics.a == 0) {      assert(magics.s < Divisor.getBitWidth() && @@ -3583,13 +3582,13 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,          DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));    } else {      SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); -    Created->push_back(NPQ.getNode()); +    Created.push_back(NPQ.getNode());      NPQ = DAG.getNode(          ISD::SRL, dl, VT, NPQ,          DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL))); -    Created->push_back(NPQ.getNode()); +    Created.push_back(NPQ.getNode());      NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); -    Created->push_back(NPQ.getNode()); +    Created.push_back(NPQ.getNode());      return DAG.getNode(          ISD::SRL, dl, VT, NPQ,          DAG.getConstant(magics.s - 1, dl, @@ -3994,7 +3993,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {          // Scalarize the load and let the individual components be handled.          SDValue Scalarized = scalarizeVectorLoad(LD, DAG);          if (Scalarized->getOpcode() == ISD::MERGE_VALUES) -	  return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); +          return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));          return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));        } diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp index 25d405bf63de..3e12b32b12d4 100644 --- a/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -175,7 +175,7 @@ bool ShadowStackGCLowering::doInitialization(Module &M) {    }    if (!Active)      return false; -   +    // struct FrameMap {    //   int32_t NumRoots; // Number of roots in stack frame.    //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots. @@ -286,7 +286,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {    if (!F.hasGC() ||        F.getGC() != std::string("shadow-stack"))      return false; -   +    LLVMContext &Context = F.getContext();    // Find calls to llvm.gcroot. diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index ed664e4f81a3..8fbe724045e6 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -233,7 +233,7 @@ public:  /// - Create a SplitEditor from a SplitAnalysis.  /// - Start a new live interval with openIntv.  /// - Mark the places where the new interval is entered using enterIntv* -/// - Mark the ranges where the new interval is used with useIntv*  +/// - Mark the ranges where the new interval is used with useIntv*  /// - Mark the places where the interval is exited with exitIntv*.  /// - Finish the current interval with closeIntv and repeat from 2.  /// - Rewrite instructions with finish(). diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 43f4bad595e3..7b1b76821daa 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -632,7 +632,7 @@ void TargetLoweringBase::initActions() {      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);      setOperationAction(ISD::BITREVERSE, VT, Expand); -     +      // These library functions default to expand.      setOperationAction(ISD::FROUND, VT, Expand);      setOperationAction(ISD::FPOWI, VT, Expand); @@ -924,7 +924,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,    // STATEPOINT Deopt Spill - live-through, read only, indirect    // STATEPOINT Deopt Alloca - live-through, read only, direct    // (We're currently conservative and mark the deopt slots read/write in -  // practice.)  +  // practice.)    // STATEPOINT GC Spill - live-through, read/write, indirect    // STATEPOINT GC Alloca - live-through, read/write, direct    // The live-in vs live-through is handled already (the live through ones are @@ -1337,7 +1337,8 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT  /// type of the given function.  This does not require a DAG or a return value,  /// and is suitable for use before any DAGs for the function are constructed.  /// TODO: Move this out of TargetLowering.cpp. -void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr, +void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, +                         AttributeList attr,                           SmallVectorImpl<ISD::OutputArg> &Outs,                           const TargetLowering &TLI, const DataLayout &DL) {    SmallVector<EVT, 4> ValueVTs; @@ -1365,9 +1366,9 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,      }      unsigned NumParts = -        TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT); +        TLI.getNumRegistersForCallingConv(ReturnType->getContext(), CC, VT);      MVT PartVT = -        TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT); +        TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), CC, VT);      // 'inreg' on function refers to return value      ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -1410,7 +1411,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,        *Fast = true;      return true;    } -   +    // This is a misaligned access.    return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);  } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index b5dd2d4cca89..f6b91a2f0231 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -422,32 +422,34 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {    return ".data.rel.ro";  } +static unsigned getEntrySizeForKind(SectionKind Kind) { +  if (Kind.isMergeable1ByteCString()) +    return 1; +  else if (Kind.isMergeable2ByteCString()) +    return 2; +  else if (Kind.isMergeable4ByteCString()) +    return 4; +  else if (Kind.isMergeableConst4()) +    return 4; +  else if (Kind.isMergeableConst8()) +    return 8; +  else if (Kind.isMergeableConst16()) +    return 16; +  else if (Kind.isMergeableConst32()) +    return 32; +  else { +    // We shouldn't have mergeable C strings or mergeable constants that we +    // didn't handle above. +    assert(!Kind.isMergeableCString() && "unknown string width"); +    assert(!Kind.isMergeableConst() && "unknown data width"); +    return 0; +  } +} +  static MCSectionELF *selectELFSectionForGlobal(      MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,      const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,      unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) { -  unsigned EntrySize = 0; -  if (Kind.isMergeableCString()) { -    if (Kind.isMergeable2ByteCString()) { -      EntrySize = 2; -    } else if (Kind.isMergeable4ByteCString()) { -      EntrySize = 4; -    } else { -      EntrySize = 1; -      assert(Kind.isMergeable1ByteCString() && "unknown string width"); -    } -  } else if (Kind.isMergeableConst()) { -    if (Kind.isMergeableConst4()) { -      EntrySize = 4; -    } else if (Kind.isMergeableConst8()) { -      EntrySize = 8; -    } else if (Kind.isMergeableConst16()) { -      EntrySize = 16; -    } else { -      assert(Kind.isMergeableConst32() && "unknown data width"); -      EntrySize = 32; -    } -  }    StringRef Group = "";    if (const Comdat *C = getELFComdat(GO)) { @@ -455,7 +457,9 @@ static MCSectionELF *selectELFSectionForGlobal(      Group = C->getName();    } -  bool UniqueSectionNames = TM.getUniqueSectionNames(); +  // Get the section entry size based on the kind. +  unsigned EntrySize = getEntrySizeForKind(Kind); +    SmallString<128> Name;    if (Kind.isMergeableCString()) {      // We also need alignment here. @@ -479,16 +483,17 @@ static MCSectionELF *selectELFSectionForGlobal(        Name += *OptionalPrefix;    } -  if (EmitUniqueSection && UniqueSectionNames) { -    Name.push_back('.'); -    TM.getNameWithPrefix(Name, GO, Mang, true); -  }    unsigned UniqueID = MCContext::GenericSectionID; -  if (EmitUniqueSection && !UniqueSectionNames) { -    UniqueID = *NextUniqueID; -    (*NextUniqueID)++; +  if (EmitUniqueSection) { +    if (TM.getUniqueSectionNames()) { +      Name.push_back('.'); +      TM.getNameWithPrefix(Name, GO, Mang, true /*MayAlwaysUsePrivate*/); +    } else { +      UniqueID = *NextUniqueID; +      (*NextUniqueID)++; +    }    } -  // Use 0 as the unique ID for execute-only text +  // Use 0 as the unique ID for execute-only text.    if (Kind.isExecuteOnly())      UniqueID = 0;    return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 3fca2f4ee4fe..2db03288f2ac 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -166,7 +166,7 @@ static cl::opt<CFLAAType> UseCFLAA(                            "Enable unification-based CFL-AA"),                 clEnumValN(CFLAAType::Andersen, "anders",                            "Enable inclusion-based CFL-AA"), -               clEnumValN(CFLAAType::Both, "both",  +               clEnumValN(CFLAAType::Both, "both",                            "Enable both variants of CFL-AA")));  /// Option names for limiting the codegen pipeline. diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index e629c13f133f..65d0a7a774fe 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -54,7 +54,7 @@ static cl::opt<bool> DemoteCatchSwitchPHIOnlyOpt(      cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false));  namespace { -   +  class WinEHPrepare : public FunctionPass {  public:    static char ID; // Pass identification, replacement for typeid. | 
