diff options
Diffstat (limited to 'lib')
66 files changed, 988 insertions, 622 deletions
| diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 86852d634ff2..ee4273770555 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -3154,8 +3154,9 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,    if (LHS == RHS)      return getConstant(LHS->getType(), 0); -  // X - Y --> X + -Y -  return getAddExpr(LHS, getNegativeSCEV(RHS), Flags); +  // X - Y --> X + -Y. +  // X -(nsw || nuw) Y --> X + -Y. +  return getAddExpr(LHS, getNegativeSCEV(RHS));  }  /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the @@ -3461,12 +3462,10 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {                    if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))                      Flags = setFlags(Flags, SCEV::FlagNUW);                  } -              } else if (const SubOperator *OBO = -                           dyn_cast<SubOperator>(BEValueV)) { -                if (OBO->hasNoUnsignedWrap()) -                  Flags = setFlags(Flags, SCEV::FlagNUW); -                if (OBO->hasNoSignedWrap()) -                  Flags = setFlags(Flags, SCEV::FlagNSW); + +                // We cannot transfer nuw and nsw flags from subtraction +                // operations -- sub nuw X, Y is not the same as add nuw X, -Y +                // for instance.                }                const SCEV *StartVal = getSCEV(StartValueV); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 3d3da2ac1d31..455258e81e6d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -626,10 +626,7 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {    DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); -  // If this type is not derived from any type or the type is a declaration then -  // take conservative approach. -  if (!BaseType.isValid() || BaseType.isForwardDecl()) -    return Ty.getSizeInBits(); +  assert(BaseType.isValid());    // If this is a derived type, go ahead and get the base type, unless it's a    // reference then it's just the size of the field. Pointer types have no need @@ -1473,7 +1470,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {      uint64_t FieldSize = getBaseTypeSize(DD, DT);      uint64_t OffsetInBytes; -    if (Size != FieldSize) { +    if (FieldSize && Size != FieldSize) {        // Handle bitfield, assume bytes are 8 bits.        addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);        addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 789f2042a073..2f076b6734d9 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -11,11 +11,19 @@  //  //===----------------------------------------------------------------------===// -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/MachineFunctionPass.h"  #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h"  #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h"  using namespace llvm;  Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, @@ -43,15 +51,13 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {    // because CodeGen overloads that to mean preserving the MachineBasicBlock    // CFG in addition to the LLVM IR CFG.    AU.addPreserved<AliasAnalysis>(); -  AU.addPreserved("scalar-evolution"); -  AU.addPreserved("iv-users"); -  AU.addPreserved("memdep"); -  AU.addPreserved("live-values"); -  AU.addPreserved("domtree"); -  AU.addPreserved("domfrontier"); -  AU.addPreserved("loops"); -  AU.addPreserved("lda"); -  AU.addPreserved("stack-protector"); +  AU.addPreserved<DominanceFrontier>(); +  AU.addPreserved<DominatorTreeWrapperPass>(); +  AU.addPreserved<IVUsers>(); +  AU.addPreserved<LoopInfo>(); +  AU.addPreserved<MemoryDependenceAnalysis>(); +  AU.addPreserved<ScalarEvolution>(); +  AU.addPreserved<StackProtector>();    FunctionPass::getAnalysisUsage(AU);  } diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index a25c4d66d3bb..753d9c229eca 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -563,9 +563,23 @@ LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {    return nullptr;  } -void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) { -  MDNode *N = -      MD ? cast<MDNode>(unwrap<MetadataAsValue>(MD)->getMetadata()) : nullptr; +// MetadataAsValue uses a canonical format which strips the actual MDNode for +// MDNode with just a single constant value, storing just a ConstantAsMetadata +// This undoes this canonicalization, reconstructing the MDNode. +static MDNode *extractMDNode(MetadataAsValue *MAV) { +  Metadata *MD = MAV->getMetadata(); +  assert((isa<MDNode>(MD) || isa<ConstantAsMetadata>(MD)) && +      "Expected a metadata node or a canonicalized constant"); + +  if (MDNode *N = dyn_cast<MDNode>(MD)) +    return N; + +  return MDNode::get(MAV->getContext(), MD); +} + +void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef Val) { +  MDNode *N = Val ? extractMDNode(unwrap<MetadataAsValue>(Val)) : nullptr; +    unwrap<Instruction>(Inst)->setMetadata(KindID, N);  } @@ -795,7 +809,7 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,      return;    if (!Val)      return; -  N->addOperand(cast<MDNode>(unwrap<MetadataAsValue>(Val)->getMetadata())); +  N->addOperand(extractMDNode(unwrap<MetadataAsValue>(Val)));  }  /*--.. Operations on scalar constants ......................................--*/ diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index b9ab25651fa2..fa8d50ec160c 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -600,8 +600,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {    // If P is an analysis pass and it is available then do not    // generate the analysis again. Stale analysis info should not be    // available at this point. -  const PassInfo *PI = -    PassRegistry::getPassRegistry()->getPassInfo(P->getPassID()); +  const PassInfo *PI = findAnalysisPassInfo(P->getPassID());    if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {      delete P;      return; @@ -619,7 +618,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {        Pass *AnalysisPass = findAnalysisPass(*I);        if (!AnalysisPass) { -        const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); +        const PassInfo *PI = findAnalysisPassInfo(*I);          if (!PI) {            // Pass P is not in the global PassRegistry @@ -716,8 +715,7 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {        return *I;      // If Pass not found then check the interfaces implemented by Immutable Pass -    const PassInfo *PassInf = -      PassRegistry::getPassRegistry()->getPassInfo(PI); +    const PassInfo *PassInf = findAnalysisPassInfo(PI);      assert(PassInf && "Expected all immutable passes to be initialized");      const std::vector<const PassInfo*> &ImmPI =        PassInf->getInterfacesImplemented(); @@ -731,6 +729,17 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {    return nullptr;  } +const PassInfo *PMTopLevelManager::findAnalysisPassInfo(AnalysisID AID) const { +  const PassInfo *&PI = AnalysisPassInfos[AID]; +  if (!PI) +    PI = PassRegistry::getPassRegistry()->getPassInfo(AID); +  else +    assert(PI == PassRegistry::getPassRegistry()->getPassInfo(AID) && +           "The pass info pointer changed for an analysis ID!"); + +  return PI; +} +  // Print passes managed by this top level manager.  void PMTopLevelManager::dumpPasses() const { @@ -759,8 +768,7 @@ void PMTopLevelManager::dumpArguments() const {    dbgs() << "Pass Arguments: ";    for (SmallVectorImpl<ImmutablePass *>::const_iterator I =         ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) -    if (const PassInfo *PI = -        PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) { +    if (const PassInfo *PI = findAnalysisPassInfo((*I)->getPassID())) {        assert(PI && "Expected all immutable passes to be initialized");        if (!PI->isAnalysisGroup())          dbgs() << " -" << PI->getPassArgument(); @@ -824,7 +832,7 @@ void PMDataManager::recordAvailableAnalysis(Pass *P) {    // This pass is the current implementation of all of the interfaces it    // implements as well. -  const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI); +  const PassInfo *PInf = TPM->findAnalysisPassInfo(PI);    if (!PInf) return;    const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();    for (unsigned i = 0, e = II.size(); i != e; ++i) @@ -957,7 +965,7 @@ void PMDataManager::freePass(Pass *P, StringRef Msg,    }    AnalysisID PI = P->getPassID(); -  if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) { +  if (const PassInfo *PInf = TPM->findAnalysisPassInfo(PI)) {      // Remove the pass itself (if it is not already removed).      AvailableAnalysis.erase(PI); @@ -1037,7 +1045,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {    for (SmallVectorImpl<AnalysisID>::iterator           I = ReqAnalysisNotAvailable.begin(),           E = ReqAnalysisNotAvailable.end() ;I != E; ++I) { -    const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); +    const PassInfo *PI = TPM->findAnalysisPassInfo(*I);      Pass *AnalysisPass = PI->createPass();      this->addLowerLevelRequiredPass(P, AnalysisPass);    } @@ -1142,7 +1150,7 @@ void PMDataManager::dumpPassArguments() const {        PMD->dumpPassArguments();      else        if (const PassInfo *PI = -            PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) +            TPM->findAnalysisPassInfo((*I)->getPassID()))          if (!PI->isAnalysisGroup())            dbgs() << " -" << PI->getPassArgument();    } @@ -1218,7 +1226,7 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,    dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";    for (unsigned i = 0; i != Set.size(); ++i) {      if (i) dbgs() << ','; -    const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]); +    const PassInfo *PInf = TPM->findAnalysisPassInfo(Set[i]);      if (!PInf) {        // Some preserved passes, such as AliasAnalysis, may not be initialized by        // all drivers. @@ -1658,8 +1666,8 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {      OnTheFlyManagers[P] = FPP;    } -  const PassInfo * RequiredPassPI = -    PassRegistry::getPassRegistry()->getPassInfo(RequiredPass->getPassID()); +  const PassInfo *RequiredPassPI = +      TPM->findAnalysisPassInfo(RequiredPass->getPassID());    Pass *FoundPass = nullptr;    if (RequiredPassPI && RequiredPassPI->isAnalysis()) { diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 4dcf910e01a7..e2439abaf001 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -219,7 +219,7 @@ class ELFObjectWriter : public MCObjectWriter {                                    const MCSymbolData *SD, uint64_t C,                                    unsigned Type) const; -    void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, +    void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,                            const MCFragment *Fragment, const MCFixup &Fixup,                            MCValue Target, bool &IsPCRel,                            uint64_t &FixedValue) override; @@ -789,11 +789,13 @@ static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) {    return nullptr;  } -void ELFObjectWriter::RecordRelocation(MCAssembler &Asm, +void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,                                         const MCAsmLayout &Layout,                                         const MCFragment *Fragment, -                                       const MCFixup &Fixup, MCValue Target, -                                       bool &IsPCRel, uint64_t &FixedValue) { +                                       const MCFixup &Fixup, +                                       MCValue Target, +                                       bool &IsPCRel, +                                       uint64_t &FixedValue) {    const MCSectionData *FixupSection = Fragment->getParent();    uint64_t C = Target.getConstant();    uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index f7054902f24c..04cc0ff4a864 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -27,7 +27,22 @@ bool MCAsmInfoDarwin::isSectionAtomizableBySymbols(    // contain.    // Sections holding 2 byte strings require symbols in order to be atomized.    // There is no dedicated section for 4 byte strings. -  if (SMO.getType() == MachO::S_CSTRING_LITERALS) +  if (SMO.getKind().isMergeable1ByteCString()) +    return false; + +  if (SMO.getSegmentName() == "__TEXT" && +      SMO.getSectionName() == "__objc_classname" && +      SMO.getType() == MachO::S_CSTRING_LITERALS) +    return false; + +  if (SMO.getSegmentName() == "__TEXT" && +      SMO.getSectionName() == "__objc_methname" && +      SMO.getType() == MachO::S_CSTRING_LITERALS) +    return false; + +  if (SMO.getSegmentName() == "__TEXT" && +      SMO.getSectionName() == "__objc_methtype" && +      SMO.getType() == MachO::S_CSTRING_LITERALS)      return false;    if (SMO.getSegmentName() == "__DATA" && SMO.getSectionName() == "__cfstring") diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 45d49fae94bf..e3c2443f4a21 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -425,16 +425,6 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {    return true;  } -void MCAssembler::addLocalUsedInReloc(const MCSymbol &Sym) { -  assert(Sym.isTemporary()); -  LocalsUsedInReloc.insert(&Sym); -} - -bool MCAssembler::isLocalUsedInReloc(const MCSymbol &Sym) const { -  assert(Sym.isTemporary()); -  return LocalsUsedInReloc.count(&Sym); -} -  bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {    // Non-temporary labels should always be visible to the linker.    if (!Symbol.isTemporary()) @@ -444,10 +434,8 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {    if (!Symbol.isInSection())      return false; -  if (isLocalUsedInReloc(Symbol)) -    return true; - -  return false; +  // Otherwise, check if the section requires symbols even for temporary labels. +  return getBackend().doesSectionRequireSymbols(Symbol.getSection());  }  const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 588d424120c4..d3751bd9ba57 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -448,11 +448,14 @@ void MachObjectWriter::WriteLinkerOptionsLoadCommand(    assert(OS.tell() - Start == Size);  } -void MachObjectWriter::RecordRelocation(MCAssembler &Asm, + +void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,                                          const MCAsmLayout &Layout,                                          const MCFragment *Fragment, -                                        const MCFixup &Fixup, MCValue Target, -                                        bool &IsPCRel, uint64_t &FixedValue) { +                                        const MCFixup &Fixup, +                                        MCValue Target, +                                        bool &IsPCRel, +                                        uint64_t &FixedValue) {    TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,                                         Target, FixedValue);  } @@ -613,22 +616,6 @@ void MachObjectWriter::ComputeSymbolTable(      ExternalSymbolData[i].SymbolData->setIndex(Index++);    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)      UndefinedSymbolData[i].SymbolData->setIndex(Index++); - -  for (const MCSectionData &SD : Asm) { -    std::vector<RelAndSymbol> &Relocs = Relocations[&SD]; -    for (RelAndSymbol &Rel : Relocs) { -      if (!Rel.Sym) -        continue; - -      // Set the Index and the IsExtern bit. -      unsigned Index = Rel.Sym->getIndex(); -      assert(isInt<24>(Index)); -      if (IsLittleEndian) -        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (-1 << 24)) | Index | (1 << 27); -      else -        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); -    } -  }  }  void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, @@ -675,6 +662,10 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,    // Mark symbol difference expressions in variables (from .set or = directives)    // as absolute.    markAbsoluteVariableSymbols(Asm, Layout); + +  // Compute symbol table information and bind symbol indices. +  ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, +                     UndefinedSymbolData);  }  bool MachObjectWriter:: @@ -758,10 +749,6 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,  void MachObjectWriter::WriteObject(MCAssembler &Asm,                                     const MCAsmLayout &Layout) { -  // Compute symbol table information and bind symbol indices. -  ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, -                     UndefinedSymbolData); -    unsigned NumSections = Asm.size();    const MCAssembler::VersionMinInfoType &VersionInfo =      Layout.getAssembler().getVersionMinInfo(); @@ -852,7 +839,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;    for (MCAssembler::const_iterator it = Asm.begin(),           ie = Asm.end(); it != ie; ++it) { -    std::vector<RelAndSymbol> &Relocs = Relocations[it]; +    std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];      unsigned NumRelocs = Relocs.size();      uint64_t SectionStart = SectionDataStart + getSectionAddress(it);      WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); @@ -946,10 +933,10 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,           ie = Asm.end(); it != ie; ++it) {      // Write the section relocation entries, in reverse order to match 'as'      // (approximately, the exact algorithm is more complicated than this). -    std::vector<RelAndSymbol> &Relocs = Relocations[it]; +    std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];      for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { -      Write32(Relocs[e - i - 1].MRE.r_word0); -      Write32(Relocs[e - i - 1].MRE.r_word1); +      Write32(Relocs[e - i - 1].r_word0); +      Write32(Relocs[e - i - 1].r_word1);      }    } diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index d8729bdbc4bb..c17f99b9bd7b 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -175,7 +175,7 @@ public:                                                const MCFragment &FB, bool InSet,                                                bool IsPCRel) const override; -  void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, +  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,                          const MCFragment *Fragment, const MCFixup &Fixup,                          MCValue Target, bool &IsPCRel,                          uint64_t &FixedValue) override; @@ -661,9 +661,13 @@ bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(                                                                  InSet, IsPCRel);  } -void WinCOFFObjectWriter::RecordRelocation( -    MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, -    const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { +void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, +                                           const MCAsmLayout &Layout, +                                           const MCFragment *Fragment, +                                           const MCFixup &Fixup, +                                           MCValue Target, +                                           bool &IsPCRel, +                                           uint64_t &FixedValue) {    assert(Target.getSymA() && "Relocation must reference a symbol!");    const MCSymbol &Symbol = Target.getSymA()->getSymbol(); diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index f923a9aa87ae..0838e90baaec 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -246,13 +246,21 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {    if (ArchName.startswith("armv")) {      offset = 3; -    arch = Triple::arm; +    if (ArchName.endswith("eb")) { +      arch = Triple::armeb; +      ArchName = ArchName.substr(0, ArchName.size() - 2); +    } else +      arch = Triple::arm;    } else if (ArchName.startswith("armebv")) {      offset = 5;      arch = Triple::armeb;    } else if (ArchName.startswith("thumbv")) {      offset = 5; -    arch = Triple::thumb; +    if (ArchName.endswith("eb")) { +      arch = Triple::thumbeb; +      ArchName = ArchName.substr(0, ArchName.size() - 2); +    } else +      arch = Triple::thumb;    } else if (ArchName.startswith("thumbebv")) {      offset = 7;      arch = Triple::thumbeb; @@ -271,6 +279,8 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {  }  static Triple::ArchType parseArch(StringRef ArchName) { +  Triple::ArchType ARMArch(parseARMArch(ArchName)); +    return StringSwitch<Triple::ArchType>(ArchName)      .Cases("i386", "i486", "i586", "i686", Triple::x86)      // FIXME: Do we need to support these? @@ -280,9 +290,10 @@ static Triple::ArchType parseArch(StringRef ArchName) {      .Cases("powerpc64", "ppu", Triple::ppc64)      .Case("powerpc64le", Triple::ppc64le)      .Case("xscale", Triple::arm) -    .StartsWith("arm", parseARMArch(ArchName)) -    .StartsWith("thumb", parseARMArch(ArchName)) -    .StartsWith("aarch64", parseARMArch(ArchName)) +    .Case("xscaleeb", Triple::armeb) +    .StartsWith("arm", ARMArch) +    .StartsWith("thumb", ARMArch) +    .StartsWith("aarch64", ARMArch)      .Case("msp430", Triple::msp430)      .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)      .Cases("mipsel", "mipsallegrexel", Triple::mipsel) @@ -379,6 +390,9 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {  }  static Triple::SubArchType parseSubArch(StringRef SubArchName) { +  if (SubArchName.endswith("eb")) +    SubArchName = SubArchName.substr(0, SubArchName.size() - 2); +    return StringSwitch<Triple::SubArchType>(SubArchName)      .EndsWith("v8", Triple::ARMSubArch_v8)      .EndsWith("v8a", Triple::ARMSubArch_v8) @@ -1022,6 +1036,8 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {      offset = 5;    if (offset != StringRef::npos && MArch.substr(offset, 2) == "eb")      offset += 2; +  if (MArch.endswith("eb")) +    MArch = MArch.substr(0, MArch.size() - 2);    if (offset != StringRef::npos)      result = llvm::StringSwitch<const char *>(MArch.substr(offset))        .Cases("v2", "v2a", "arm2") diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 1a8040275ca8..a391e766deea 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -204,6 +204,44 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[                                            [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>  ]>; +//===----------------------------------------------------------------------===// +// ARM64 Calling Convention for GHC +//===----------------------------------------------------------------------===// + +// This calling convention is specific to the Glasgow Haskell Compiler. +// The only documentation is the GHC source code, specifically the C header +// file: +// +//     https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h +// +// which defines the registers for the Spineless Tagless G-Machine (STG) that +// GHC uses to implement lazy evaluation. The generic STG machine has a set of +// registers which are mapped to appropriate set of architecture specific +// registers for each CPU architecture. +// +// The STG Machine is documented here: +// +//    https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode +// +// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI +// register mapping". + +def CC_AArch64_GHC : CallingConv<[ +  // Handle all vector types as either f64 or v2f64. +  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, +  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>, + +  CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, +  CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, +  CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, + +  // Promote i8/i16/i32 arguments to i64. +  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + +  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim +  CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> +]>; +  // FIXME: LR is only callee-saved in the sense that *we* preserve it and are  // presumably a callee to someone. External functions may not do so, but this  // is currently safe since BL has LR as an implicit-def and what happens after a @@ -249,3 +287,4 @@ def CSR_AArch64_AllRegs                             (sequence "S%u", 0, 31), (sequence "D%u", 0, 31),                             (sequence "Q%u", 0, 31))>; +def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 419fbc8be6c9..ca4e97bd7c03 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -302,6 +302,8 @@ static unsigned getImplicitScaleFactor(MVT VT) {  CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {    if (CC == CallingConv::WebKit_JS)      return CC_AArch64_WebKit_JS; +  if (CC == CallingConv::GHC) +    return CC_AArch64_GHC;    return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;  } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 66aa216db2c9..d8e91562e316 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -215,6 +215,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {    bool HasFP = hasFP(MF);    DebugLoc DL = MBB.findDebugLoc(MBBI); +  // All calls are tail calls in GHC calling conv, and functions have no +  // prologue/epilogue. +  if (MF.getFunction()->getCallingConv() == CallingConv::GHC) +    return; +    int NumBytes = (int)MFI->getStackSize();    if (!AFI->hasStackFrame()) {      assert(!HasFP && "unexpected function without stack frame but with FP"); @@ -451,6 +456,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,    int NumBytes = MFI->getStackSize();    const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); +  // All calls are tail calls in GHC calling conv, and functions have no +  // prologue/epilogue. +  if (MF.getFunction()->getCallingConv() == CallingConv::GHC) +    return; +    // Initial and residual are named for consitency with the prologue. Note that    // in the epilogue, the residual adjustment is executed first.    uint64_t ArgumentPopSize = 0; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 0d44f992a2ab..19f51ce9450c 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1990,6 +1990,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,      llvm_unreachable("Unsupported calling convention.");    case CallingConv::WebKit_JS:      return CC_AArch64_WebKit_JS; +  case CallingConv::GHC: +    return CC_AArch64_GHC;    case CallingConv::C:    case CallingConv::Fast:      if (!Subtarget->isTargetDarwin()) diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index d734d436add7..206cdbbe0c5b 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -33,6 +33,10 @@ using namespace llvm;  #define GET_REGINFO_TARGET_DESC  #include "AArch64GenRegisterInfo.inc" +static cl::opt<bool> +ReserveX18("aarch64-reserve-x18", cl::Hidden, +          cl::desc("Reserve X18, making it unavailable as GPR")); +  AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,                                           const AArch64Subtarget *sti)      : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {} @@ -40,6 +44,10 @@ AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,  const MCPhysReg *  AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {    assert(MF && "Invalid MachineFunction pointer."); +  if (MF->getFunction()->getCallingConv() == CallingConv::GHC) +    // GHC set of callee saved regs is empty as all those regs are +    // used for passing STG regs around +    return CSR_AArch64_NoRegs_SaveList;    if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)      return CSR_AArch64_AllRegs_SaveList;    else @@ -48,6 +56,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {  const uint32_t *  AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +  if (CC == CallingConv::GHC) +    // This is academic becase all GHC calls are (supposed to be) tail calls +    return CSR_AArch64_NoRegs_RegMask;    if (CC == CallingConv::AnyReg)      return CSR_AArch64_AllRegs_RegMask;    else @@ -63,7 +74,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {  }  const uint32_t * -AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { +AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {    // This should return a register mask that is the same as that returned by    // getCallPreservedMask but that additionally preserves the register used for    // the first i64 argument (which must also be the register used to return a @@ -71,6 +82,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {    //    // In case that the calling convention does not use the same register for    // both, the function should return NULL (does not currently apply) +  assert(CC != CallingConv::GHC && "should not be GHC calling convention.");    return CSR_AArch64_AAPCS_ThisReturn_RegMask;  } @@ -90,7 +102,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {      Reserved.set(AArch64::W29);    } -  if (STI->isTargetDarwin()) { +  if (STI->isTargetDarwin() || ReserveX18) {      Reserved.set(AArch64::X18); // Platform register      Reserved.set(AArch64::W18);    } @@ -117,7 +129,7 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,      return true;    case AArch64::X18:    case AArch64::W18: -    return STI->isTargetDarwin(); +    return STI->isTargetDarwin() || ReserveX18;    case AArch64::FP:    case AArch64::W29:      return TFI->hasFP(MF) || STI->isTargetDarwin(); @@ -379,7 +391,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,    case AArch64::GPR64commonRegClassID:      return 32 - 1                                      // XZR/SP             - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP -           - STI->isTargetDarwin() // X18 reserved as platform register +           - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register             - hasBasePointer(MF);   // X19    case AArch64::FPR8RegClassID:    case AArch64::FPR16RegClassID: diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 423da6500c48..27cbac9f12e2 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -317,6 +317,42 @@ public:                                           MachO::CPU_SUBTYPE_ARM64_ALL);    } +  bool doesSectionRequireSymbols(const MCSection &Section) const override { +    // Any section for which the linker breaks things into atoms needs to +    // preserve symbols, including assembler local symbols, to identify +    // those atoms. These sections are: +    // Sections of type: +    // +    //    S_CSTRING_LITERALS  (e.g. __cstring) +    //    S_LITERAL_POINTERS  (e.g.  objc selector pointers) +    //    S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS +    // +    // Sections named: +    // +    //    __TEXT,__eh_frame +    //    __TEXT,__ustring +    //    __DATA,__cfstring +    //    __DATA,__objc_classrefs +    //    __DATA,__objc_catlist +    // +    // FIXME: It would be better if the compiler used actual linker local +    // symbols for each of these sections rather than preserving what +    // are ostensibly assembler local symbols. +    const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section); +    return (SMO.getType() == MachO::S_CSTRING_LITERALS || +            SMO.getType() == MachO::S_4BYTE_LITERALS || +            SMO.getType() == MachO::S_8BYTE_LITERALS || +            SMO.getType() == MachO::S_16BYTE_LITERALS || +            SMO.getType() == MachO::S_LITERAL_POINTERS || +            (SMO.getSegmentName() == "__TEXT" && +             (SMO.getSectionName() == "__eh_frame" || +              SMO.getSectionName() == "__ustring")) || +            (SMO.getSegmentName() == "__DATA" && +             (SMO.getSectionName() == "__cfstring" || +              SMO.getSectionName() == "__objc_classrefs" || +              SMO.getSectionName() == "__objc_catlist"))); +  } +    /// \brief Generate the compact unwind encoding from the CFI directives.    uint32_t generateCompactUnwindEncoding(                               ArrayRef<MCCFIInstruction> Instrs) const override { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index f6fab5d6b6fe..e12a24be0a67 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -10,7 +10,6 @@  #include "MCTargetDesc/AArch64FixupKinds.h"  #include "MCTargetDesc/AArch64MCTargetDesc.h"  #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmInfo.h"  #include "llvm/MC/MCAsmLayout.h"  #include "llvm/MC/MCAssembler.h"  #include "llvm/MC/MCContext.h" @@ -34,7 +33,7 @@ public:        : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,                                   /*UseAggressiveSymbolFolding=*/true) {} -  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, +  void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,                          const MCAsmLayout &Layout, const MCFragment *Fragment,                          const MCFixup &Fixup, MCValue Target,                          uint64_t &FixedValue) override; @@ -113,25 +112,8 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(    }  } -static bool canUseLocalRelocation(const MCSectionMachO &Section, -                                  const MCSymbol &Symbol, unsigned Log2Size) { -  // Debug info sections can use local relocations. -  if (Section.hasAttribute(MachO::S_ATTR_DEBUG)) -    return true; - -  // Otherwise, only pointer sized relocations are supported. -  if (Log2Size != 3) -    return false; - -  // But only if they don't point to a cstring. -  if (!Symbol.isInSection()) -    return true; -  const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection()); -  return RefSec.getType() != MachO::S_CSTRING_LITERALS; -} -  void AArch64MachObjectWriter::RecordRelocation( -    MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, +    MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,      const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,      uint64_t &FixedValue) {    unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); @@ -141,9 +123,9 @@ void AArch64MachObjectWriter::RecordRelocation(    unsigned Log2Size = 0;    int64_t Value = 0;    unsigned Index = 0; +  unsigned IsExtern = 0;    unsigned Type = 0;    unsigned Kind = Fixup.getKind(); -  const MCSymbolData *RelSymbol = nullptr;    FixupOffset += Fixup.getOffset(); @@ -189,8 +171,10 @@ void AArch64MachObjectWriter::RecordRelocation(      // FIXME: Should this always be extern?      // SymbolNum of 0 indicates the absolute section.      Type = MachO::ARM64_RELOC_UNSIGNED; +    Index = 0;      if (IsPCRel) { +      IsExtern = 1;        Asm.getContext().FatalError(Fixup.getLoc(),                                    "PC relative absolute relocation!"); @@ -214,12 +198,15 @@ void AArch64MachObjectWriter::RecordRelocation(          Layout.getSymbolOffset(&B_SD) ==              Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {        // SymB is the PC, so use a PC-rel pointer-to-GOT relocation. +      Index = A_Base->getIndex(); +      IsExtern = 1;        Type = MachO::ARM64_RELOC_POINTER_TO_GOT;        IsPCRel = 1;        MachO::any_relocation_info MRE;        MRE.r_word0 = FixupOffset; -      MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); -      Writer->addRelocation(A_Base, Fragment->getParent(), MRE); +      MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | +                     (IsExtern << 27) | (Type << 28)); +      Writer->addRelocation(Fragment->getParent(), MRE);        return;      } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||                 Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) @@ -265,30 +252,25 @@ void AArch64MachObjectWriter::RecordRelocation(                    ? 0                    : Writer->getSymbolAddress(B_Base, Layout)); +    Index = A_Base->getIndex(); +    IsExtern = 1;      Type = MachO::ARM64_RELOC_UNSIGNED;      MachO::any_relocation_info MRE;      MRE.r_word0 = FixupOffset; -    MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); -    Writer->addRelocation(A_Base, Fragment->getParent(), MRE); +    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | +                   (IsExtern << 27) | (Type << 28)); +    Writer->addRelocation(Fragment->getParent(), MRE); -    RelSymbol = B_Base; +    Index = B_Base->getIndex(); +    IsExtern = 1;      Type = MachO::ARM64_RELOC_SUBTRACTOR;    } else { // A + constant      const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); -    const MCSectionMachO &Section = static_cast<const MCSectionMachO &>( -        Fragment->getParent()->getSection()); - -    bool CanUseLocalRelocation = -        canUseLocalRelocation(Section, *Symbol, Log2Size); -    if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) { -      const MCSection &Sec = Symbol->getSection(); -      if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec)) -        Asm.addLocalUsedInReloc(*Symbol); -    } -      const MCSymbolData &SD = Asm.getSymbolData(*Symbol);      const MCSymbolData *Base = Asm.getAtom(&SD); +    const MCSectionMachO &Section = static_cast<const MCSectionMachO &>( +        Fragment->getParent()->getSection());      // If the symbol is a variable and we weren't able to get a Base for it      // (i.e., it's not in the symbol table associated with a section) resolve @@ -328,13 +310,16 @@ void AArch64MachObjectWriter::RecordRelocation(      // sections, and for pointer-sized relocations (.quad), we allow section      // relocations.  It's code sections that run into trouble.      if (Base) { -      RelSymbol = Base; +      Index = Base->getIndex(); +      IsExtern = 1;        // Add the local offset, if needed.        if (Base != &SD)          Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);      } else if (Symbol->isInSection()) { -      if (!CanUseLocalRelocation) +      // Pointer-sized relocations can use a local relocation. Otherwise, +      // we have to be in a debug info section. +      if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)          Asm.getContext().FatalError(              Fixup.getLoc(),              "unsupported relocation of local symbol '" + Symbol->getName() + @@ -344,6 +329,7 @@ void AArch64MachObjectWriter::RecordRelocation(        const MCSectionData &SymSD =            Asm.getSectionData(SD.getSymbol().getSection());        Index = SymSD.getOrdinal() + 1; +      IsExtern = 0;        Value += Writer->getSymbolAddress(&SD, Layout);        if (IsPCRel) @@ -376,16 +362,16 @@ void AArch64MachObjectWriter::RecordRelocation(      MachO::any_relocation_info MRE;      MRE.r_word0 = FixupOffset; -    MRE.r_word1 = -        (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); -    Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); +    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | +                   (IsExtern << 27) | (Type << 28)); +    Writer->addRelocation(Fragment->getParent(), MRE);      // Now set up the Addend relocation.      Type = MachO::ARM64_RELOC_ADDEND;      Index = Value; -    RelSymbol = nullptr;      IsPCRel = 0;      Log2Size = 2; +    IsExtern = 0;      // Put zero into the instruction itself. The addend is in the relocation.      Value = 0; @@ -397,9 +383,9 @@ void AArch64MachObjectWriter::RecordRelocation(    // struct relocation_info (8 bytes)    MachO::any_relocation_info MRE;    MRE.r_word0 = FixupOffset; -  MRE.r_word1 = -      (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); -  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); +  MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | +                 (IsExtern << 27) | (Type << 28)); +  Writer->addRelocation(Fragment->getParent(), MRE);  }  MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS, diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c429ac185211..fda3e815624d 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -567,10 +567,21 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,        //   MOV  NewBase, Base        //   ADDS NewBase, #imm8.        if (Base != NewBase && Offset >= 8) { +        const ARMSubtarget &Subtarget = MBB.getParent()->getTarget() +                       .getSubtarget<ARMSubtarget>();          // Need to insert a MOV to the new base first. -        BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) -          .addReg(Base, getKillRegState(BaseKill)) -          .addImm(Pred).addReg(PredReg); +        if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && +            !Subtarget.hasV6Ops()) { +          // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr +          if (Pred != ARMCC::AL) +            return false; +          BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) +            .addReg(Base, getKillRegState(BaseKill)); +        } else +          BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) +            .addReg(Base, getKillRegState(BaseKill)) +            .addImm(Pred).addReg(PredReg); +          // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.          Base = NewBase;          BaseKill = false; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 6a00b28f37a7..96f3b4e64326 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -9191,27 +9191,39 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {  // FIXME: This is duplicated in getARMFPUFeatures() in  // tools/clang/lib/Driver/Tools.cpp  static const struct { -  const unsigned Fpu; +  const unsigned ID;    const uint64_t Enabled;    const uint64_t Disabled; -} Fpus[] = { -      {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON}, -      {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON}, -      {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON}, -      {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON}, -      {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON}, -      {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON}, -      {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16, -       ARM::FeatureNEON | ARM::FeatureCrypto}, -      {ARM::FP_ARMV8, ARM::FeatureFPARMv8, -       ARM::FeatureNEON | ARM::FeatureCrypto}, -      {ARM::NEON, ARM::FeatureNEON, 0}, -      {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0}, -      {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON, -       ARM::FeatureCrypto}, -      {ARM::CRYPTO_NEON_FP_ARMV8, -       ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0}, -      {ARM::SOFTVFP, 0, 0}, +} FPUs[] = { +    {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON}, +    {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON}, +    {ARM::VFPV3, ARM::FeatureVFP2 | ARM::FeatureVFP3, ARM::FeatureNEON}, +    {ARM::VFPV3_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16, +     ARM::FeatureNEON}, +    {ARM::VFPV4, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4, +     ARM::FeatureNEON}, +    {ARM::VFPV4_D16, +     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureD16, +     ARM::FeatureNEON}, +    {ARM::FPV5_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +                        ARM::FeatureFPARMv8 | ARM::FeatureD16, +     ARM::FeatureNEON | ARM::FeatureCrypto}, +    {ARM::FP_ARMV8, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +                        ARM::FeatureFPARMv8, +     ARM::FeatureNEON | ARM::FeatureCrypto}, +    {ARM::NEON, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, 0}, +    {ARM::NEON_VFPV4, +     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureNEON, +     0}, +    {ARM::NEON_FP_ARMV8, +     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +         ARM::FeatureFPARMv8 | ARM::FeatureNEON, +     ARM::FeatureCrypto}, +    {ARM::CRYPTO_NEON_FP_ARMV8, +     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | +         ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, +     0}, +    {ARM::SOFTVFP, 0, 0},  };  /// parseDirectiveFPU @@ -9229,14 +9241,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {      return false;    } -  for (const auto &Fpu : Fpus) { -    if (Fpu.Fpu != ID) +  for (const auto &Entry : FPUs) { +    if (Entry.ID != ID)        continue;      // Need to toggle features that should be on but are off and that      // should off but are on. -    uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) | -                      (Fpu.Disabled & STI.getFeatureBits()); +    uint64_t Toggle = (Entry.Enabled & ~STI.getFeatureBits()) | +                      (Entry.Disabled & STI.getFeatureBits());      setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));      break;    } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 3187d36f7519..7da500390ed1 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -54,10 +54,10 @@ public:      : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,                                 /*UseAggressiveSymbolFolding=*/true) {} -  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, -                        const MCAsmLayout &Layout, const MCFragment *Fragment, -                        const MCFixup &Fixup, MCValue Target, -                        uint64_t &FixedValue) override; +  void RecordRelocation(MachObjectWriter *Writer, +                        const MCAssembler &Asm, const MCAsmLayout &Layout, +                        const MCFragment *Fragment, const MCFixup &Fixup, +                        MCValue Target, uint64_t &FixedValue) override;  };  } @@ -232,7 +232,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,                     (IsPCRel               << 30) |                     MachO::R_SCATTERED);      MRE.r_word1 = Value2; -    Writer->addRelocation(nullptr, Fragment->getParent(), MRE); +    Writer->addRelocation(Fragment->getParent(), MRE);    }    MachO::any_relocation_info MRE; @@ -243,7 +243,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,                   (IsPCRel     << 30) |                   MachO::R_SCATTERED);    MRE.r_word1 = Value; -  Writer->addRelocation(nullptr, Fragment->getParent(), MRE); +  Writer->addRelocation(Fragment->getParent(), MRE);  }  void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, @@ -297,7 +297,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,                     (IsPCRel               << 30) |                     MachO::R_SCATTERED);      MRE.r_word1 = Value2; -    Writer->addRelocation(nullptr, Fragment->getParent(), MRE); +    Writer->addRelocation(Fragment->getParent(), MRE);    }    MachO::any_relocation_info MRE; @@ -307,7 +307,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,                   (IsPCRel     << 30) |                   MachO::R_SCATTERED);    MRE.r_word1 = Value; -  Writer->addRelocation(nullptr, Fragment->getParent(), MRE); +  Writer->addRelocation(Fragment->getParent(), MRE);  }  bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, @@ -351,10 +351,11 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,  }  void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, -                                           MCAssembler &Asm, +                                           const MCAssembler &Asm,                                             const MCAsmLayout &Layout,                                             const MCFragment *Fragment, -                                           const MCFixup &Fixup, MCValue Target, +                                           const MCFixup &Fixup, +                                           MCValue Target,                                             uint64_t &FixedValue) {    unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());    unsigned Log2Size; @@ -400,8 +401,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,    // See <reloc.h>.    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();    unsigned Index = 0; +  unsigned IsExtern = 0;    unsigned Type = 0; -  const MCSymbolData *RelSymbol = nullptr;    if (Target.isAbsolute()) { // constant      // FIXME! @@ -421,7 +422,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,      // Check whether we need an external or internal relocation.      if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,                                   FixedValue)) { -      RelSymbol = SD; +      IsExtern = 1; +      Index = SD->getIndex();        // For external relocations, make sure to offset the fixup value to        // compensate for the addend of the symbol address, if it was @@ -445,8 +447,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,    // struct relocation_info (8 bytes)    MachO::any_relocation_info MRE;    MRE.r_word0 = FixupOffset; -  MRE.r_word1 = -      (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); +  MRE.r_word1 = ((Index     <<  0) | +                 (IsPCRel   << 24) | +                 (Log2Size  << 25) | +                 (IsExtern  << 27) | +                 (Type      << 28));    // Even when it's not a scattered relocation, movw/movt always uses    // a PAIR relocation. @@ -471,10 +476,10 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,                         (Log2Size              << 25) |                         (MachO::ARM_RELOC_PAIR << 28)); -    Writer->addRelocation(nullptr, Fragment->getParent(), MREPair); +    Writer->addRelocation(Fragment->getParent(), MREPair);    } -  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); +  Writer->addRelocation(Fragment->getParent(), MRE);  }  MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS, diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index 2b459a4336b3..0c2407508869 100644 --- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -15,6 +15,7 @@  #include "Hexagon.h"  #include "HexagonTargetMachine.h"  #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/Instructions.h"  #include "llvm/Pass.h" @@ -42,7 +43,7 @@ namespace {      void getAnalysisUsage(AnalysisUsage &AU) const override {        AU.addRequired<MachineFunctionAnalysis>();        AU.addPreserved<MachineFunctionAnalysis>(); -      AU.addPreserved("stack-protector"); +      AU.addPreserved<StackProtector>();        FunctionPass::getAnalysisUsage(AU);      }    }; diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index d4852c4ece40..74796954a528 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -497,14 +497,14 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,    SDValue JumpTarget = Callee;    // T9 should contain the address of the callee function if -  // -reloction-model=pic or it is an indirect call. +  // -relocation-model=pic or it is an indirect call.    if (IsPICCall || !GlobalOrExternal) {      unsigned V0Reg = Mips::V0;      if (NeedMips16Helper) {        RegsToPass.push_front(std::make_pair(V0Reg, Callee));        JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());        ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget); -      JumpTarget = getAddrGlobal(S, JumpTarget.getValueType(), DAG, +      JumpTarget = getAddrGlobal(S, CLI.DL, JumpTarget.getValueType(), DAG,                                   MipsII::MO_GOT, Chain,                                   FuncInfo->callPtrInfo(S->getSymbol()));      } else diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 6d6735b3aae6..185d12ec93fd 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -756,7 +756,7 @@ def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>,        ISA_MIPS32R6;  def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>,        ISA_MIPS32R6; -def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>, +def : MipsPat<(setle f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,        ISA_MIPS32R6;  def : MipsPat<(setne f32:$lhs, f32:$rhs),                (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; @@ -776,7 +776,7 @@ def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>,        ISA_MIPS32R6;  def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>,        ISA_MIPS32R6; -def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>, +def : MipsPat<(setle f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,        ISA_MIPS32R6;  def : MipsPat<(setne f64:$lhs, f64:$rhs),                (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 99fd739c3ed0..d25f5637f57c 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1613,22 +1613,22 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,      if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine()))        // %gp_rel relocation -      return getAddrGPRel(N, Ty, DAG); +      return getAddrGPRel(N, SDLoc(N), Ty, DAG);      // %hi/%lo relocation -    return getAddrNonPIC(N, Ty, DAG); +    return getAddrNonPIC(N, SDLoc(N), Ty, DAG);    }    if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV))) -    return getAddrLocal(N, Ty, DAG, +    return getAddrLocal(N, SDLoc(N), Ty, DAG,                          Subtarget.isABI_N32() || Subtarget.isABI_N64());    if (LargeGOT) -    return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16, +    return getAddrGlobalLargeGOT(N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16,                                   MipsII::MO_GOT_LO16, DAG.getEntryNode(),                                   MachinePointerInfo::getGOT()); -  return getAddrGlobal(N, Ty, DAG, +  return getAddrGlobal(N, SDLoc(N), Ty, DAG,                         (Subtarget.isABI_N32() || Subtarget.isABI_N64())                             ? MipsII::MO_GOT_DISP                             : MipsII::MO_GOT16, @@ -1642,9 +1642,9 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,    if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&        !Subtarget.isABI_N64()) -    return getAddrNonPIC(N, Ty, DAG); +    return getAddrNonPIC(N, SDLoc(N), Ty, DAG); -  return getAddrLocal(N, Ty, DAG, +  return getAddrLocal(N, SDLoc(N), Ty, DAG,                        Subtarget.isABI_N32() || Subtarget.isABI_N64());  } @@ -1735,9 +1735,9 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const    if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&        !Subtarget.isABI_N64()) -    return getAddrNonPIC(N, Ty, DAG); +    return getAddrNonPIC(N, SDLoc(N), Ty, DAG); -  return getAddrLocal(N, Ty, DAG, +  return getAddrLocal(N, SDLoc(N), Ty, DAG,                        Subtarget.isABI_N32() || Subtarget.isABI_N64());  } @@ -1754,12 +1754,12 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const      if (TLOF.IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))        // %gp_rel relocation -      return getAddrGPRel(N, Ty, DAG); +      return getAddrGPRel(N, SDLoc(N), Ty, DAG); -    return getAddrNonPIC(N, Ty, DAG); +    return getAddrNonPIC(N, SDLoc(N), Ty, DAG);    } -  return getAddrLocal(N, Ty, DAG, +  return getAddrLocal(N, SDLoc(N), Ty, DAG,                        Subtarget.isABI_N32() || Subtarget.isABI_N64());  } @@ -2681,15 +2681,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,        InternalLinkage = Val->hasInternalLinkage();        if (InternalLinkage) -        Callee = getAddrLocal(G, Ty, DAG, +        Callee = getAddrLocal(G, DL, Ty, DAG,                                Subtarget.isABI_N32() || Subtarget.isABI_N64());        else if (LargeGOT) { -        Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16, +        Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16,                                         MipsII::MO_CALL_LO16, Chain,                                         FuncInfo->callPtrInfo(Val));          IsCallReloc = true;        } else { -        Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain, +        Callee = getAddrGlobal(G, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,                                 FuncInfo->callPtrInfo(Val));          IsCallReloc = true;        } @@ -2702,15 +2702,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,      const char *Sym = S->getSymbol();      if (!Subtarget.isABI_N64() && !IsPIC) // !N64 && static -      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), -                                            MipsII::MO_NO_FLAG); +      Callee = +          DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG);      else if (LargeGOT) { -      Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16, +      Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16,                                       MipsII::MO_CALL_LO16, Chain,                                       FuncInfo->callPtrInfo(Sym));        IsCallReloc = true;      } else { // N64 || PIC -      Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain, +      Callee = getAddrGlobal(S, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,                               FuncInfo->callPtrInfo(Sym));        IsCallReloc = true;      } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 71f140b7a656..4132de6dbcad 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -272,9 +272,8 @@ namespace llvm {      //      // (add (load (wrapper $gp, %got(sym)), %lo(sym))      template <class NodeTy> -    SDValue getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG, +    SDValue getAddrLocal(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG,                           bool IsN32OrN64) const { -      SDLoc DL(N);        unsigned GOTFlag = IsN32OrN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;        SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),                                  getTargetNode(N, Ty, DAG, GOTFlag)); @@ -291,11 +290,10 @@ namespace llvm {      // computing a global symbol's address:      //      // (load (wrapper $gp, %got(sym))) -    template<class NodeTy> -    SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG, +    template <class NodeTy> +    SDValue getAddrGlobal(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG,                            unsigned Flag, SDValue Chain,                            const MachinePointerInfo &PtrInfo) const { -      SDLoc DL(N);        SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),                                  getTargetNode(N, Ty, DAG, Flag));        return DAG.getLoad(Ty, DL, Chain, Tgt, PtrInfo, false, false, false, 0); @@ -305,14 +303,13 @@ namespace llvm {      // computing a global symbol's address in large-GOT mode:      //      // (load (wrapper (add %hi(sym), $gp), %lo(sym))) -    template<class NodeTy> -    SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG, -                                  unsigned HiFlag, unsigned LoFlag, -                                  SDValue Chain, +    template <class NodeTy> +    SDValue getAddrGlobalLargeGOT(NodeTy *N, SDLoc DL, EVT Ty, +                                  SelectionDAG &DAG, unsigned HiFlag, +                                  unsigned LoFlag, SDValue Chain,                                    const MachinePointerInfo &PtrInfo) const { -      SDLoc DL(N); -      SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, -                               getTargetNode(N, Ty, DAG, HiFlag)); +      SDValue Hi = +          DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(N, Ty, DAG, HiFlag));        Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));        SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,                                      getTargetNode(N, Ty, DAG, LoFlag)); @@ -324,9 +321,9 @@ namespace llvm {      // computing a symbol's address in non-PIC mode:      //      // (add %hi(sym), %lo(sym)) -    template<class NodeTy> -    SDValue getAddrNonPIC(NodeTy *N, EVT Ty, SelectionDAG &DAG) const { -      SDLoc DL(N); +    template <class NodeTy> +    SDValue getAddrNonPIC(NodeTy *N, SDLoc DL, EVT Ty, +                          SelectionDAG &DAG) const {        SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI);        SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO);        return DAG.getNode(ISD::ADD, DL, Ty, @@ -338,9 +335,8 @@ namespace llvm {      // computing a symbol's address using gp-relative addressing:      //      // (add $gp, %gp_rel(sym)) -    template<class NodeTy> -    SDValue getAddrGPRel(NodeTy *N, EVT Ty, SelectionDAG &DAG) const { -      SDLoc DL(N); +    template <class NodeTy> +    SDValue getAddrGPRel(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG) const {        assert(Ty == MVT::i32);        SDValue GPRel = getTargetNode(N, Ty, DAG, MipsII::MO_GPREL);        return DAG.getNode(ISD::ADD, DL, Ty, diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 69fc86e75414..c343980c5014 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -15,6 +15,7 @@  #define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H  #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/Pass.h" @@ -32,8 +33,8 @@ public:    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<DataLayoutPass>(); -    AU.addPreserved("stack-protector");      AU.addPreserved<MachineFunctionAnalysis>(); +    AU.addPreserved<StackProtector>();    }    const char *getPassName() const override { diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index 8759406a6803..da301d5de62d 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -16,6 +16,7 @@  #define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H  #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/Pass.h" @@ -29,8 +30,8 @@ struct NVPTXLowerAggrCopies : public FunctionPass {    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<DataLayoutPass>(); -    AU.addPreserved("stack-protector");      AU.addPreserved<MachineFunctionAnalysis>(); +    AU.addPreserved<StackProtector>();    }    bool runOnFunction(Function &F) override; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index f7259b9a098c..df2f14a91a7e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -41,7 +41,7 @@ public:        : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,                                   /*UseAggressiveSymbolFolding=*/Is64Bit) {} -  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, +  void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,                          const MCAsmLayout &Layout, const MCFragment *Fragment,                          const MCFixup &Fixup, MCValue Target,                          uint64_t &FixedValue) override { @@ -282,7 +282,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(      MachO::any_relocation_info MRE;      makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,                                  Log2Size, IsPCRel, Value2); -    Writer->addRelocation(nullptr, Fragment->getParent(), MRE); +    Writer->addRelocation(Fragment->getParent(), MRE);    } else {      // If the offset is more than 24-bits, it won't fit in a scattered      // relocation offset field, so we fall back to using a non-scattered @@ -296,7 +296,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(    }    MachO::any_relocation_info MRE;    makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value); -  Writer->addRelocation(nullptr, Fragment->getParent(), MRE); +  Writer->addRelocation(Fragment->getParent(), MRE);    return true;  } @@ -331,9 +331,9 @@ void PPCMachObjectWriter::RecordPPCRelocation(    // See <reloc.h>.    const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);    unsigned Index = 0; +  unsigned IsExtern = 0;    unsigned Type = RelocType; -  const MCSymbolData *RelSymbol = nullptr;    if (Target.isAbsolute()) { // constant                               // SymbolNum of 0 indicates the absolute section.                               // @@ -355,7 +355,8 @@ void PPCMachObjectWriter::RecordPPCRelocation(      // Check whether we need an external or internal relocation.      if (Writer->doesSymbolRequireExternRelocation(SD)) { -      RelSymbol = SD; +      IsExtern = 1; +      Index = SD->getIndex();        // For external relocations, make sure to offset the fixup value to        // compensate for the addend of the symbol address, if it was        // undefined. This occurs with weak definitions, for example. @@ -374,8 +375,9 @@ void PPCMachObjectWriter::RecordPPCRelocation(    // struct relocation_info (8 bytes)    MachO::any_relocation_info MRE; -  makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type); -  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); +  makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern, +                     Type); +  Writer->addRelocation(Fragment->getParent(), MRE);  }  MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index fcf9eca80e96..c6600550126e 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -77,7 +77,11 @@ extern Target TheGCNTarget;  namespace AMDGPU {  enum TargetIndex { -  TI_CONSTDATA_START +  TI_CONSTDATA_START, +  TI_SCRATCH_RSRC_DWORD0, +  TI_SCRATCH_RSRC_DWORD1, +  TI_SCRATCH_RSRC_DWORD2, +  TI_SCRATCH_RSRC_DWORD3  };  } diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index 8a5ca613dc80..1df4448abf05 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -92,6 +92,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",          "true",          "Support flat address space">; +def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", +        "EnableVGPRSpilling", +        "true", +        "Enable spilling of VGPRs to scratch memory">; +  class SubtargetFeatureFetchLimit <string Value> :                            SubtargetFeature <"fetch"#Value,          "TexVTXClauseSize", diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 624f3919b409..6185e367ff50 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -116,7 +116,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {    const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();    SIProgramInfo KernelInfo;    if (STM.isAmdHsaOS()) { -    OutStreamer.SwitchSection(getObjFileLowering().getTextSection());      getSIProgramInfo(KernelInfo, MF);      EmitAmdKernelCodeT(MF, KernelInfo);      OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1)); @@ -421,6 +420,7 @@ static unsigned getRsrcReg(unsigned ShaderType) {  void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,                                           const SIProgramInfo &KernelInfo) { +  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();    unsigned RsrcReg = getRsrcReg(MFI->getShaderType()); @@ -441,6 +441,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,      OutStreamer.EmitIntValue(RsrcReg, 4);      OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |                               S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4); +    if (STM.isVGPRSpillingEnabled(MFI)) { +      OutStreamer.EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); +      OutStreamer.EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4); +    }    }    if (MFI->getShaderType() == ShaderType::PIXEL) { @@ -504,6 +508,19 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,    header.wavefront_size = STM.getWavefrontSize(); +  const MCSectionELF *VersionSection = OutContext.getELFSection(".hsa.version", +      ELF::SHT_PROGBITS, 0, SectionKind::getReadOnly()); +  OutStreamer.SwitchSection(VersionSection); +  OutStreamer.EmitBytes(Twine("HSA Code Unit:" + +                        Twine(header.hsail_version_major) + "." + +                        Twine(header.hsail_version_minor) + ":" + +                        "AMD:" + +                        Twine(header.amd_code_version_major) + "." + +                        Twine(header.amd_code_version_minor) +  ":" + +                        "GFX8.1:0").str()); + +  OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); +    if (isVerbose()) {      OutStreamer.emitRawComment("amd_code_version_major = " +                                 Twine(header.amd_code_version_major), false); diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index eaa506db96c3..15112c7e54d4 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -417,6 +417,28 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {                                    N->getValueType(0), Ops);    } +  case ISD::LOAD: { +    // To simplify the TableGen patters, we replace all i64 loads with +    // v2i32 loads.  Alternatively, we could promote i64 loads to v2i32 +    // during DAG legalization, however, so places (ExpandUnalignedLoad) +    // in the DAG legalizer assume that if i64 is legal, so doing this +    // promotion early can cause problems. +    EVT VT = N->getValueType(0); +    LoadSDNode *LD = cast<LoadSDNode>(N); +    if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) +      break; + +    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(), +                                     LD->getBasePtr(), LD->getMemOperand()); +    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N), +                                      MVT::i64, NewLoad); +    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1)); +    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast); +    SelectCode(NewLoad.getNode()); +    N = BitCast.getNode(); +    break; +  } +    case AMDGPUISD::REGISTER_LOAD: {      if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)        break; @@ -962,16 +984,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,    const SITargetLowering& Lowering =      *static_cast<const SITargetLowering*>(getTargetLowering()); -  unsigned ScratchPtrReg = -      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);    unsigned ScratchOffsetReg =        TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);    Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,                                  ScratchOffsetReg, MVT::i32); +  SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); +  SDValue ScratchRsrcDword0 = +      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); -  SDValue ScratchPtr = -    CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, -                           MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64); +  SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); +  SDValue ScratchRsrcDword1 = +      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); + +  const SDValue RsrcOps[] = { +      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), +      ScratchRsrcDword0, +      CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), +      ScratchRsrcDword1, +      CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), +  }; +  SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, +                                              MVT::v2i32, RsrcOps), 0);    Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);    SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,        MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); @@ -988,22 +1021,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,      }    } -  // (add FI, n0) -  if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && -       isa<FrameIndexSDNode>(Addr.getOperand(0))) { -    VAddr = Addr.getOperand(1); -    ImmOffset = Addr.getOperand(0); -    return true; -  } - -  // (FI) -  if (isa<FrameIndexSDNode>(Addr)) { -    VAddr = SDValue(CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, -                                          CurDAG->getConstant(0, MVT::i32)), 0); -    ImmOffset = Addr; -    return true; -  } -    // (node)    VAddr = Addr;    ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 206050d54a02..2adcdf1c299e 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -187,9 +187,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :    setOperationAction(ISD::LOAD, MVT::v2f32, Promote);    AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); -  setOperationAction(ISD::LOAD, MVT::i64, Promote); -  AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); -    setOperationAction(ISD::LOAD, MVT::v4f32, Promote);    AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index 5beaa6841c94..e34a7b7345f1 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -341,8 +341,39 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {  // instead.  namespace llvm {  namespace AMDGPU { -int getMCOpcode(uint16_t Opcode, unsigned Gen) { +static int getMCOpcode(uint16_t Opcode, unsigned Gen) {    return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);  }  }  } + +// This must be kept in sync with the SISubtarget class in SIInstrInfo.td +enum SISubtarget { +  SI = 0, +  VI = 1 +}; + +enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) { +  switch (Gen) { +  default: +    return SI; +  case AMDGPUSubtarget::VOLCANIC_ISLANDS: +    return VI; +  } +} + +int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { +  int MCOp = AMDGPU::getMCOpcode(Opcode, +                        AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration())); + +  // -1 means that Opcode is already a native instruction. +  if (MCOp == -1) +    return Opcode; + +  // (uint16_t)-1 means that Opcode is a pseudo instruction that has +  // no encoding in the given subtarget generation. +  if (MCOp == (uint16_t)-1) +    return -1; + +  return MCOp; +} diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index da9833d25a52..e28ce0f03acc 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -135,6 +135,11 @@ public:    bool isRegisterStore(const MachineInstr &MI) const;    bool isRegisterLoad(const MachineInstr &MI) const; +  /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. +  /// Return -1 if the target-specific opcode for the pseudo instruction does +  /// not exist. If Opcode is not a pseudo instruction, this is identity. +  int pseudoToMCOpcode(int Opcode) const; +  //===---------------------------------------------------------------------===//  // Pure virtual funtions to be implemented by sub-classes.  //===---------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index 1995ef2b0c9e..03aa32d05ef0 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -22,6 +22,7 @@  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h"  #include "llvm/IR/GlobalVariable.h"  #include "llvm/MC/MCCodeEmitter.h"  #include "llvm/MC/MCContext.h" @@ -39,29 +40,17 @@ AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):    Ctx(ctx), ST(st)  { } -enum AMDGPUMCInstLower::SISubtarget -AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned Gen) const { -  switch (Gen) { -  default: -    return AMDGPUMCInstLower::SI; -  case AMDGPUSubtarget::VOLCANIC_ISLANDS: -    return AMDGPUMCInstLower::VI; -  } -} - -unsigned AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode) const { - -  int MCOpcode = AMDGPU::getMCOpcode(MIOpcode, -                              AMDGPUSubtargetToSISubtarget(ST.getGeneration())); -  if (MCOpcode == -1) -    MCOpcode = MIOpcode; +void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { -  return MCOpcode; -} +  int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode()); -void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { +  if (MCOpcode == -1) { +    LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext(); +    C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " +                "a target-specific version: " + Twine(MI->getOpcode())); +  } -  OutMI.setOpcode(getMCOpcode(MI->getOpcode())); +  OutMI.setOpcode(MCOpcode);    for (const MachineOperand &MO : MI->explicit_operands()) {      MCOperand MCOp; @@ -91,6 +80,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {        MCOp = MCOperand::CreateExpr(Expr);        break;      } +    case MachineOperand::MO_ExternalSymbol: { +      MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName())); +      const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); +      MCOp = MCOperand::CreateExpr(Expr); +      break; +    }      }      OutMI.addOperand(MCOp);    } diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h index 0ae4d11bf1d1..d322fe072b2b 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.h +++ b/lib/Target/R600/AMDGPUMCInstLower.h @@ -19,23 +19,9 @@ class MCContext;  class MCInst;  class AMDGPUMCInstLower { - -  // This must be kept in sync with the SISubtarget class in SIInstrInfo.td -  enum SISubtarget { -    SI = 0, -    VI = 1 -  }; -    MCContext &Ctx;    const AMDGPUSubtarget &ST; -  /// Convert a member of the AMDGPUSubtarget::Generation enum to the -  /// SISubtarget enum. -  enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) const; - -  /// Get the MC opcode for this MachineInstr. -  unsigned getMCOpcode(unsigned MIOpcode) const; -  public:    AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST); diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 597e558e6634..b1c7498fc142 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -18,7 +18,9 @@  #include "R600MachineScheduler.h"  #include "SIISelLowering.h"  #include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h"  #include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineScheduler.h"  using namespace llvm; @@ -78,6 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,        FlatAddressSpace(false), EnableIRStructurizer(true),        EnablePromoteAlloca(false), EnableIfCvt(true),        EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), +      EnableVGPRSpilling(false),        DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),        FrameLowering(TargetFrameLowering::StackGrowsUp,                      64 * 16, // Maximum stack alignment (long16) @@ -113,3 +116,26 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {    case SEA_ISLANDS: return 12;    }  } + +bool AMDGPUSubtarget::isVGPRSpillingEnabled( +                                       const SIMachineFunctionInfo *MFI) const { +  return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling; +} + +void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, +                                          MachineInstr *begin, +                                          MachineInstr *end, +                                          unsigned NumRegionInstrs) const { +  if (getGeneration() >= SOUTHERN_ISLANDS) { + +    // Track register pressure so the scheduler can try to decrease +    // pressure once register usage is above the threshold defined by +    // SIRegisterInfo::getRegPressureSetLimit() +    Policy.ShouldTrackPressure = true; + +    // Enabling both top down and bottom up scheduling seems to give us less +    // register spills than just using one of these approaches on its own. +    Policy.OnlyTopDown = false; +    Policy.OnlyBottomUp = false; +  } +} diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 90179d79d25d..566b45c1dccc 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -30,6 +30,8 @@  namespace llvm { +class SIMachineFunctionInfo; +  class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {  public: @@ -63,6 +65,7 @@ private:    unsigned WavefrontSize;    bool CFALUBug;    int LocalMemorySize; +  bool EnableVGPRSpilling;    const DataLayout DL;    AMDGPUFrameLowering FrameLowering; @@ -206,6 +209,10 @@ public:      return getGeneration() <= NORTHERN_ISLANDS;    } +  void overrideSchedPolicy(MachineSchedPolicy &Policy, +                           MachineInstr *begin, MachineInstr *end, +                           unsigned NumRegionInstrs) const override; +    // Helper functions to simplify if statements    bool isTargetELF() const {      return false; @@ -224,6 +231,15 @@ public:    bool isAmdHsaOS() const {      return TargetTriple.getOS() == Triple::AMDHSA;    } +  bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const; + +  unsigned getMaxWavesPerCU() const { +    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) +      return 10; + +    // FIXME: Not sure what this is for other subtagets. +    llvm_unreachable("do not know max waves per CU for this subtarget."); +  }  };  } // End namespace llvm diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index d0c634fb7e42..5fb311b3016b 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -29,7 +29,7 @@ public:                                  const MCAsmLayout &Layout) override {      //XXX: Implement if necessary.    } -  void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, +  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,                          const MCFragment *Fragment, const MCFixup &Fixup,                          MCValue Target, bool &IsPCRel,                          uint64_t &FixedValue) override { diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 73a9c73d8e7b..7601794beab8 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -163,4 +163,8 @@ namespace SIOutMods {  #define R_00B860_COMPUTE_TMPRING_SIZE                                   0x00B860  #define   S_00B860_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12) +#define R_0286E8_SPI_TMPRING_SIZE                                       0x0286E8 +#define   S_0286E8_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12) + +  #endif diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0a3fa2f930d7..6b2ea0682a43 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -588,6 +588,12 @@ SDValue SITargetLowering::LowerFormalArguments(      InVals.push_back(Val);    } + +  if (Info->getShaderType() != ShaderType::COMPUTE) { +    unsigned ScratchIdx = CCInfo.getFirstUnallocated( +        AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()); +    Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx); +  }    return Chain;  } diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 99a1df36c1f4..09c0cbe8f5c3 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -85,49 +85,41 @@ class Enc64 {  let Uses = [EXEC] in { -class VOPCCommon <dag ins, string asm, list<dag> pattern> : -    InstSI <(outs VCCReg:$dst), ins, asm, pattern> { +class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> : +    InstSI <outs, ins, asm, pattern> { -  let DisableEncoding = "$dst";    let mayLoad = 0;    let mayStore = 0;    let hasSideEffects = 0;    let UseNamedOperandTable = 1; -  let VOPC = 1;    let VALU = 1; +} + +class VOPCCommon <dag ins, string asm, list<dag> pattern> : +    VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> { + +  let DisableEncoding = "$dst"; +  let VOPC = 1;    let Size = 4;  }  class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> : -    InstSI <outs, ins, asm, pattern> { -  let mayLoad = 0; -  let mayStore = 0; -  let hasSideEffects = 0; -  let UseNamedOperandTable = 1; +    VOPAnyCommon <outs, ins, asm, pattern> { +    let VOP1 = 1; -  let VALU = 1;    let Size = 4;  }  class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> : -    InstSI <outs, ins, asm, pattern> { +    VOPAnyCommon <outs, ins, asm, pattern> { -  let mayLoad = 0; -  let mayStore = 0; -  let hasSideEffects = 0; -  let UseNamedOperandTable = 1;    let VOP2 = 1; -  let VALU = 1;    let Size = 4;  }  class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> : -    InstSI <outs, ins, asm, pattern> { +    VOPAnyCommon <outs, ins, asm, pattern> { -  let mayLoad = 0; -  let mayStore = 0; -  let hasSideEffects = 0; -  let UseNamedOperandTable = 1;    // Using complex patterns gives VOP3 patterns a very high complexity rating,    // but standalone patterns are almost always prefered, so we need to adjust the    // priority lower.  The goal is to use a high number to reduce complexity to @@ -135,8 +127,6 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :    let AddedComplexity = -1000;    let VOP3 = 1; -  let VALU = 1; -    int Size = 8;  } diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 1a4c0d4e57b0..80b560eb65ae 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -430,15 +430,6 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {    return AMDGPU::COPY;  } -static bool shouldTryToSpillVGPRs(MachineFunction *MF) { - -  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); - -  // FIXME: Implement spilling for other shader types. -  return MFI->getShaderType() == ShaderType::COMPUTE; - -} -  void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,                                        MachineBasicBlock::iterator MI,                                        unsigned SrcReg, bool isKill, @@ -462,7 +453,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,        case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;        case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;      } -  } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) { +  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {      MFI->setHasSpilledVGPRs();      switch(RC->getSize() * 8) { @@ -482,7 +473,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,              .addFrameIndex(FrameIndex)              // Place-holder registers, these will be filled in by              // SIPrepareScratchRegs. -            .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) +            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)              .addReg(AMDGPU::SGPR0, RegState::Undef);    } else {      LLVMContext &Ctx = MF->getFunction()->getContext(); @@ -499,6 +490,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,                                         const TargetRegisterClass *RC,                                         const TargetRegisterInfo *TRI) const {    MachineFunction *MF = MBB.getParent(); +  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();    MachineFrameInfo *FrameInfo = MF->getFrameInfo();    DebugLoc DL = MBB.findDebugLoc(MI);    int Opcode = -1; @@ -511,7 +503,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,        case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;        case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;      } -  } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) { +  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {      switch(RC->getSize() * 8) {        case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;        case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break; @@ -528,7 +520,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,              .addFrameIndex(FrameIndex)              // Place-holder registers, these will be filled in by              // SIPrepareScratchRegs. -            .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) +            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)              .addReg(AMDGPU::SGPR0, RegState::Undef);    } else { @@ -615,7 +607,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,                .addImm(-1)                .addImm(0); -      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e32), +      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),                TIDReg)                .addImm(-1)                .addReg(TIDReg); @@ -1053,7 +1045,11 @@ bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {  }  bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { -  return AMDGPU::getVOPe32(Opcode) != -1; +  int Op32 = AMDGPU::getVOPe32(Opcode); +  if (Op32 == -1) +    return false; + +  return pseudoToMCOpcode(Op32) != -1;  }  bool SIInstrInfo::hasModifiers(unsigned Opcode) const { @@ -1126,12 +1122,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,      }      switch (Desc.OpInfo[i].OperandType) { -    case MCOI::OPERAND_REGISTER: { -      if (MI->getOperand(i).isImm() && -          !isImmOperandLegal(MI, i, MI->getOperand(i))) { -          ErrInfo = "Illegal immediate value for operand."; -          return false; -        } +    case MCOI::OPERAND_REGISTER: +      if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) { +        ErrInfo = "Illegal immediate value for operand."; +        return false; +      } +      break; +    case AMDGPU::OPERAND_REG_IMM32: +      break; +    case AMDGPU::OPERAND_REG_INLINE_C: +      if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) { +        ErrInfo = "Illegal immediate value for operand."; +        return false;        }        break;      case MCOI::OPERAND_IMMEDIATE: @@ -1287,7 +1289,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {    case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;    case AMDGPU::S_LOAD_DWORDX4_IMM:    case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; -  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32; +  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;    case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;    case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;    } @@ -2278,7 +2280,7 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist    MachineOperand &Dest = Inst->getOperand(0);    MachineOperand &Src = Inst->getOperand(1); -  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32); +  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);    const TargetRegisterClass *SrcRC = Src.isReg() ?      MRI.getRegClass(Src.getReg()) :      &AMDGPU::SGPR_32RegClass; diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index f766dc85e86a..28cd27dd8962 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -325,7 +325,6 @@ namespace AMDGPU {    int getVOPe32(uint16_t Opcode);    int getCommuteRev(uint16_t Opcode);    int getCommuteOrig(uint16_t Opcode); -  int getMCOpcode(uint16_t Opcode, unsigned Gen);    int getAddr64Inst(uint16_t Opcode);    int getAtomicRetOp(uint16_t Opcode);    int getAtomicNoRetOp(uint16_t Opcode); diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 7cc9588c8e4b..175e11d709cf 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -36,6 +36,12 @@ class vop2 <bits<6> si, bits<6> vi = si> : vop {    field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};  } +// Specify a VOP2 opcode for SI and VOP3 opcode for VI +// that doesn't have VOP2 encoding on VI +class vop23 <bits<6> si, bits<10> vi> : vop2 <si> { +  let VI3 = vi; +} +  class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {    let SI3 = si;    let VI3 = vi; @@ -57,7 +63,7 @@ class sopk <bits<5> si, bits<5> vi = si> {  }  // Execpt for the NONE field, this must be kept in sync with the SISubtarget enum -// in AMDGPUMCInstLower.h +// in AMDGPUInstrInfo.cpp  def SISubtarget {    int NONE = -1;    int SI = 0; @@ -731,7 +737,7 @@ class getAsm32 <int NumSrcArgs> {  // Returns the assembly string for the inputs and outputs of a VOP3  // instruction.  class getAsm64 <int NumSrcArgs, bit HasModifiers> { -  string src0 = "$src0_modifiers,"; +  string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");    string src1 = !if(!eq(NumSrcArgs, 1), "",                     !if(!eq(NumSrcArgs, 2), " $src1_modifiers",                                             " $src1_modifiers,")); @@ -848,6 +854,16 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :    let isPseudo = 1;  } +multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, +                     string opName, string revOpSI> { +  def "" : VOP2_Pseudo <outs, ins, pattern, opName>, +           VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>; + +  def _si : VOP2 <op.SI, outs, ins, opName#asm, []>, +            VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>, +            SIMCInstr <opName#"_e32", SISubtarget.SI>; +} +  multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,                     string opName, string revOpSI, string revOpVI> {    def "" : VOP2_Pseudo <outs, ins, pattern, opName>, @@ -889,16 +905,6 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :    VOP3e_vi <op>,    SIMCInstr <opName#"_e64", SISubtarget.VI>; -// VI only instruction -class VOP3_vi <bits<10> op, string opName, dag outs, dag ins, string asm, -               list<dag> pattern, int NumSrcArgs, bit HasMods = 1> : -  VOP3Common <outs, ins, asm, pattern>, -  VOP <opName>, -  VOP3e_vi <op>, -  VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1), -                    !if(!eq(NumSrcArgs, 2), 0, 1), -                    HasMods>; -  multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,                     string opName, int NumSrcArgs, bit HasMods = 1> { @@ -998,6 +1004,23 @@ multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,    }  } +// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers. +multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins, +                         string asm, list<dag> pattern = []> { +  let isPseudo = 1 in { +    def "" : VOPAnyCommon <outs, ins, "", pattern>, +             SIMCInstr<opName, SISubtarget.NONE>; +  } + +  def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>, +            SIMCInstr <opName, SISubtarget.SI>; + +  def _vi : VOP3Common <outs, ins, asm, []>, +            VOP3e_vi <op.VI3>, +            VOP3DisableFields <1, 0, 0>, +            SIMCInstr <opName, SISubtarget.VI>; +} +  multiclass VOP1_Helper <vop1 op, string opName, dag outs,                          dag ins32, string asm32, list<dag> pat32,                          dag ins64, string asm64, list<dag> pat64, @@ -1089,6 +1112,33 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,    revOp, P.HasModifiers  >; +// A VOP2 instruction that is VOP3-only on VI. +multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs, +                            dag ins32, string asm32, list<dag> pat32, +                            dag ins64, string asm64, list<dag> pat64, +                            string revOpSI, string revOpVI, bit HasMods> { +  defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOpSI>; + +  defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, +                        revOpSI, revOpVI, HasMods>; +} + +multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P, +                          SDPatternOperator node = null_frag, +                          string revOpSI = opName, string revOpVI = revOpSI> +                          : VOP2_VI3_Helper < +  op, opName, P.Outs, +  P.Ins32, P.Asm32, [], +  P.Ins64, P.Asm64, +  !if(P.HasModifiers, +      [(set P.DstVT:$dst, +           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, +                                      i1:$clamp, i32:$omod)), +                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], +      [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), +  revOpSI, revOpVI, P.HasModifiers +>; +  class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :    VOPCCommon <ins, "", pattern>,    VOP <opName>, @@ -1224,34 +1274,6 @@ multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,    P.NumSrcArgs, P.HasModifiers  >; -class VOP3InstVI <bits<10> op, string opName, VOPProfile P, -                  SDPatternOperator node = null_frag> : VOP3_vi < -  op, opName#"_vi", P.Outs, P.Ins64, opName#P.Asm64, -  !if(!eq(P.NumSrcArgs, 3), -    !if(P.HasModifiers, -        [(set P.DstVT:$dst, -            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, -                                       i1:$clamp, i32:$omod)), -                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), -                  (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))], -        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1, -                                  P.Src2VT:$src2))]), -  !if(!eq(P.NumSrcArgs, 2), -    !if(P.HasModifiers, -        [(set P.DstVT:$dst, -            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, -                                       i1:$clamp, i32:$omod)), -                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], -        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]) -  /* P.NumSrcArgs == 1 */, -    !if(P.HasModifiers, -        [(set P.DstVT:$dst, -            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, -                                       i1:$clamp, i32:$omod))))], -        [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))), -  P.NumSrcArgs, P.HasModifiers ->; -  multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,                      string opName, list<dag> pattern> :    VOP3b_2_m < diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e05b6bb7d0f1..4b1a84662cb5 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1525,25 +1525,25 @@ defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",  } // End Uses = [VCC]  } // End isCommutable = 1, Defs = [VCC] -// These instructions only exist on SI and CI -let SubtargetPredicate = isSICI in { - -def V_READLANE_B32 : VOP2 < -  0x00000001, +defm V_READLANE_B32 : VOP2SI_3VI_m < +  vop3 <0x001, 0x289>, +  "v_readlane_b32",    (outs SReg_32:$vdst),    (ins VGPR_32:$src0, SSrc_32:$vsrc1), -  "v_readlane_b32 $vdst, $src0, $vsrc1", -  [] +  "v_readlane_b32 $vdst, $src0, $vsrc1"  >; -def V_WRITELANE_B32 : VOP2 < -  0x00000002, +defm V_WRITELANE_B32 : VOP2SI_3VI_m < +  vop3 <0x002, 0x28a>, +  "v_writelane_b32",    (outs VGPR_32:$vdst),    (ins SReg_32:$src0, SSrc_32:$vsrc1), -  "v_writelane_b32 $vdst, $src0, $vsrc1", -  [] +  "v_writelane_b32 $vdst, $src0, $vsrc1"  >; +// These instructions only exist on SI and CI +let SubtargetPredicate = isSICI in { +  let isCommutable = 1 in {  defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",    VOP_F32_F32_F32 @@ -1568,30 +1568,33 @@ defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;  }  } // End isCommutable = 1 +} // End let SubtargetPredicate = SICI -defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "v_bfm_b32", VOP_I32_I32_I32, -  AMDGPUbfm>; -defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "v_bcnt_u32_b32", VOP_I32_I32_I32>; -defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "v_mbcnt_lo_u32_b32", +defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32, +  AMDGPUbfm +>; +defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32", +  VOP_I32_I32_I32 +>; +defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",    VOP_I32_I32_I32  >; -defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "v_mbcnt_hi_u32_b32", +defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",    VOP_I32_I32_I32  >; -defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32", +defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",    VOP_F32_F32_I32, AMDGPUldexp  >;  ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;  ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;  ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>; -defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32", +defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",   VOP_I32_F32_F32, int_SI_packf16  >;  ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;  ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>; -} // End let SubtargetPredicate = SICI  //===----------------------------------------------------------------------===//  // VOP3 Instructions  //===----------------------------------------------------------------------===// @@ -1656,9 +1659,6 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",    VOP_I32_I32_I32_I32  >; -// Only on SI -defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32", -  VOP_F32_F32_F32_F32>;  defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",    VOP_F32_F32_F32_F32, AMDGPUfmin3>; @@ -1699,20 +1699,6 @@ defm V_DIV_FIXUP_F64 : VOP3Inst <  } // let SchedRW = [WriteDouble] -defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", -  VOP_I64_I64_I32, shl ->; - -// Only on SI -defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", -  VOP_I64_I64_I32, srl ->; - -// Only on SI -defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", -  VOP_I64_I64_I32, sra ->; -  let SchedRW = [WriteDouble] in {  let isCommutable = 1 in { @@ -1785,6 +1771,26 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <  } // let SchedRW = [WriteDouble] +// These instructions only exist on SI and CI +let SubtargetPredicate = isSICI in { + +defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", +  VOP_I64_I64_I32, shl +>; + +defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", +  VOP_I64_I64_I32, srl +>; + +defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", +  VOP_I64_I64_I32, sra +>; + +defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32", +  VOP_F32_F32_F32_F32>; + +} // End SubtargetPredicate = isSICI +  //===----------------------------------------------------------------------===//  // Pseudo Instructions  //===----------------------------------------------------------------------===// @@ -1943,14 +1949,14 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {    let UseNamedOperandTable = 1 in {      def _SAVE : InstSI <        (outs), -      (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, +      (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,             SReg_32:$scratch_offset),        "", []      >;      def _RESTORE : InstSI <        (outs sgpr_class:$dst), -      (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), +      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),        "", []      >;    } // End UseNamedOperandTable = 1 @@ -1966,14 +1972,14 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {    let UseNamedOperandTable = 1 in {      def _SAVE : InstSI <        (outs), -      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, +      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,             SReg_32:$scratch_offset),        "", []      >;      def _RESTORE : InstSI <        (outs vgpr_class:$dst), -      (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), +      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),        "", []      >;    } // End UseNamedOperandTable = 1 @@ -2728,16 +2734,12 @@ def : Pat <                     (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))  >; -let Predicates = [isSICI] in { -  def : Pat <    (int_SI_tid), -  (V_MBCNT_HI_U32_B32_e32 0xffffffff, +  (V_MBCNT_HI_U32_B32_e64 0xffffffff,                            (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))  >; -} -  //===----------------------------------------------------------------------===//  // VOP3 Patterns  //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 71852717d7e6..667da4c8af61 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -50,6 +50,7 @@ public:    unsigned NumUserSGPRs;    std::map<unsigned, unsigned> LaneVGPRs;    unsigned LDSWaveSpillSize; +  unsigned ScratchOffsetReg;    bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };    unsigned getTIDReg() const { return TIDReg; };    void setTIDReg(unsigned Reg) { TIDReg = Reg; } diff --git a/lib/Target/R600/SIPrepareScratchRegs.cpp b/lib/Target/R600/SIPrepareScratchRegs.cpp index f0e7edec6b48..0a57a5bc201a 100644 --- a/lib/Target/R600/SIPrepareScratchRegs.cpp +++ b/lib/Target/R600/SIPrepareScratchRegs.cpp @@ -84,28 +84,10 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {    if (!Entry->isLiveIn(ScratchOffsetPreloadReg))      Entry->addLiveIn(ScratchOffsetPreloadReg); -  // Load the scratch pointer -  unsigned ScratchPtrReg = -      TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass); -  int ScratchPtrFI = -1; - -  if (ScratchPtrReg != AMDGPU::NoRegister) { -    // Found an SGPR to use. -    MRI.setPhysRegUsed(ScratchPtrReg); -    BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg) -            .addReg(ScratchPtrPreloadReg); -  } else { -    // No SGPR is available, we must spill. -    ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4); -    BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE)) -            .addReg(ScratchPtrPreloadReg) -            .addFrameIndex(ScratchPtrFI); -  } -    // Load the scratch offset.    unsigned ScratchOffsetReg =        TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass); -  int ScratchOffsetFI = ~0; +  int ScratchOffsetFI = -1;    if (ScratchOffsetReg != AMDGPU::NoRegister) {      // Found an SGPR to use @@ -117,7 +99,9 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {      ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);      BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))              .addReg(ScratchOffsetPreloadReg) -            .addFrameIndex(ScratchOffsetFI); +            .addFrameIndex(ScratchOffsetFI) +            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) +            .addReg(AMDGPU::SGPR0, RegState::Undef);    } @@ -125,22 +109,27 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {    // add them to all the SI_SPILL_V* instructions.    RegScavenger RS; -  bool UseRegScavenger = -      (ScratchPtrReg == AMDGPU::NoRegister || -      ScratchOffsetReg == AMDGPU::NoRegister); +  unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4); +  RS.addScavengingFrameIndex(ScratchRsrcFI); +    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();         BI != BE; ++BI) {      MachineBasicBlock &MBB = *BI; -    if (UseRegScavenger) -      RS.enterBasicBlock(&MBB); +    // Add the scratch offset reg as a live-in so that the register scavenger +    // doesn't re-use it. +    if (!MBB.isLiveIn(ScratchOffsetReg) && +        ScratchOffsetReg != AMDGPU::NoRegister) +      MBB.addLiveIn(ScratchOffsetReg); +    RS.enterBasicBlock(&MBB);      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();           I != E; ++I) {        MachineInstr &MI = *I; +      RS.forward(I);        DebugLoc DL = MI.getDebugLoc();        switch(MI.getOpcode()) { -        default: break;; +        default: break;          case AMDGPU::SI_SPILL_V512_SAVE:          case AMDGPU::SI_SPILL_V256_SAVE:          case AMDGPU::SI_SPILL_V128_SAVE: @@ -153,43 +142,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {          case AMDGPU::SI_SPILL_V256_RESTORE:          case AMDGPU::SI_SPILL_V512_RESTORE: -          // Scratch Pointer -          if (ScratchPtrReg == AMDGPU::NoRegister) { -            ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0); -            BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE), -                    ScratchPtrReg) -                    .addFrameIndex(ScratchPtrFI) -                    .addReg(AMDGPU::NoRegister) -                    .addReg(AMDGPU::NoRegister); -          } else if (!MBB.isLiveIn(ScratchPtrReg)) { -            MBB.addLiveIn(ScratchPtrReg); -          } +          // Scratch resource +          unsigned ScratchRsrcReg = +              RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); + +          uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | +                          0xffffffff; // Size + +          unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); +          unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); +          unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); +          unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); + +          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) +                  .addExternalSymbol("SCRATCH_RSRC_DWORD0") +                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + +          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) +                  .addExternalSymbol("SCRATCH_RSRC_DWORD1") +                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + +          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) +                  .addImm(Rsrc & 0xffffffff) +                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + +          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) +                  .addImm(Rsrc >> 32) +                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine); +          // Scratch Offset            if (ScratchOffsetReg == AMDGPU::NoRegister) {              ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);              BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),                      ScratchOffsetReg)                      .addFrameIndex(ScratchOffsetFI) -                    .addReg(AMDGPU::NoRegister) -                    .addReg(AMDGPU::NoRegister); +                    .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) +                    .addReg(AMDGPU::SGPR0, RegState::Undef);            } else if (!MBB.isLiveIn(ScratchOffsetReg)) {              MBB.addLiveIn(ScratchOffsetReg);            } -          if (ScratchPtrReg == AMDGPU::NoRegister || +          if (ScratchRsrcReg == AMDGPU::NoRegister ||                ScratchOffsetReg == AMDGPU::NoRegister) {              LLVMContext &Ctx = MF.getFunction()->getContext();              Ctx.emitError("ran out of SGPRs for spilling VGPRs"); -            ScratchPtrReg = AMDGPU::SGPR0; +            ScratchRsrcReg = AMDGPU::SGPR0;              ScratchOffsetReg = AMDGPU::SGPR0;            } -          MI.getOperand(2).setReg(ScratchPtrReg); +          MI.getOperand(2).setReg(ScratchRsrcReg); +          MI.getOperand(2).setIsKill(true); +          MI.getOperand(2).setIsUndef(false);            MI.getOperand(3).setReg(ScratchOffsetReg); +          MI.getOperand(3).setIsUndef(false); +          MI.getOperand(3).setIsKill(false); +          MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); +          MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); +          MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); +          MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));            break;        } -      if (UseRegScavenger) -        RS.forward();      }    }    return true; diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index f9feea470f15..0396bf384066 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -23,7 +23,6 @@  #include "llvm/IR/Function.h"  #include "llvm/IR/LLVMContext.h" -#include "llvm/Support/Debug.h"  using namespace llvm;  SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st) @@ -51,9 +50,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {    return Reserved;  } -unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, -                                             MachineFunction &MF) const { -  return RC->getNumRegs(); +unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { + +  // FIXME: We should adjust the max number of waves based on LDS size. +  unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU()); +  unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU()); + +  for (regclass_iterator I = regclass_begin(), E = regclass_end(); +       I != E; ++I) { + +    unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1); +    unsigned Limit; + +    if (isSGPRClass(*I)) { +      Limit = SGPRLimit / NumSubRegs; +    } else { +      Limit = VGPRLimit / NumSubRegs; +    } + +    const int *Sets = getRegClassPressureSets(*I); +    assert(Sets); +    for (unsigned i = 0; Sets[i] != -1; ++i) { +	    if (Sets[i] == (int)Idx) +        return Limit; +    } +  } +  return 256;  }  bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { @@ -98,7 +120,7 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {  void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,                                             unsigned LoadStoreOp,                                             unsigned Value, -                                           unsigned ScratchPtr, +                                           unsigned ScratchRsrcReg,                                             unsigned ScratchOffset,                                             int64_t Offset,                                             RegScavenger *RS) const { @@ -113,33 +135,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,    bool RanOutOfSGPRs = false;    unsigned SOffset = ScratchOffset; -  unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0); -  if (RsrcReg == AMDGPU::NoRegister) { -    RanOutOfSGPRs = true; -    RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; -  } -    unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());    unsigned Size = NumSubRegs * 4; -  uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | -                  0xffffffff; // Size - -  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64), -          getSubReg(RsrcReg, AMDGPU::sub0_sub1)) -          .addReg(ScratchPtr) -          .addReg(RsrcReg, RegState::ImplicitDefine); - -  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), -          getSubReg(RsrcReg, AMDGPU::sub2)) -          .addImm(Rsrc & 0xffffffff) -          .addReg(RsrcReg, RegState::ImplicitDefine); - -  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), -          getSubReg(RsrcReg, AMDGPU::sub3)) -          .addImm(Rsrc >> 32) -          .addReg(RsrcReg, RegState::ImplicitDefine); -    if (!isUInt<12>(Offset + Size)) {      SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);      if (SOffset == AMDGPU::NoRegister) { @@ -163,9 +161,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,      BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))              .addReg(SubReg, getDefRegState(IsLoad)) -            .addReg(RsrcReg, getKillRegState(IsKill)) +            .addReg(ScratchRsrcReg, getKillRegState(IsKill))              .addImm(Offset) -            .addReg(SOffset, getKillRegState(IsKill)) +            .addReg(SOffset)              .addImm(0) // glc              .addImm(0) // slc              .addImm(0) // tfe @@ -235,9 +233,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,             Ctx.emitError("Ran out of VGPRs for spilling SGPR");          } -        if (isM0) { +        if (isM0)            SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); -        }          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)                  .addReg(Spill.VGPR) @@ -262,7 +259,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,      case AMDGPU::SI_SPILL_V32_SAVE:        buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,              TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), -            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), +            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),              TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),               FrameInfo->getObjectOffset(Index), RS);        MI->eraseFromParent(); @@ -274,7 +271,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,      case AMDGPU::SI_SPILL_V512_RESTORE: {        buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,              TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), -            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), +            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),              TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),              FrameInfo->getObjectOffset(Index), RS);        MI->eraseFromParent(); @@ -289,7 +286,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,          BuildMI(*MBB, MI, MI->getDebugLoc(),                  TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)                  .addImm(Offset); -        FIOp.ChangeToRegister(TmpReg, false); +        FIOp.ChangeToRegister(TmpReg, false, false, true);        }      }    } @@ -446,6 +443,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,    case SIRegisterInfo::TGID_Z:      return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);    case SIRegisterInfo::SCRATCH_WAVE_OFFSET: +    if (MFI->getShaderType() != ShaderType::COMPUTE) +      return MFI->ScratchOffsetReg;      return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);    case SIRegisterInfo::SCRATCH_PTR:      return AMDGPU::SGPR2_SGPR3; @@ -475,3 +474,29 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,    return AMDGPU::NoRegister;  } +unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { +  switch(WaveCount) { +    case 10: return 24; +    case 9:  return 28; +    case 8:  return 32; +    case 7:  return 36; +    case 6:  return 40; +    case 5:  return 48; +    case 4:  return 64; +    case 3:  return 84; +    case 2:  return 128; +    default: return 256; +  } +} + +unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const { +  switch(WaveCount) { +    case 10: return 48; +    case 9:  return 56; +    case 8:  return 64; +    case 7:  return 72; +    case 6:  return 80; +    case 5:  return 96; +    default: return 103; +  } +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index d14212c2b104..d908ffd12d2c 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -17,6 +17,7 @@  #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H  #include "AMDGPURegisterInfo.h" +#include "llvm/Support/Debug.h"  namespace llvm { @@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {    BitVector getReservedRegs(const MachineFunction &MF) const override; -  unsigned getRegPressureLimit(const TargetRegisterClass *RC, -                               MachineFunction &MF) const override; +  unsigned getRegPressureSetLimit(unsigned Idx) const override;    bool requiresRegisterScavenging(const MachineFunction &Fn) const override; @@ -105,13 +105,21 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {    unsigned getPreloadedValue(const MachineFunction &MF,                               enum PreloadedValue Value) const; +  /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount +  ///        concurrent waves. +  unsigned getNumVGPRsAllowed(unsigned WaveCount) const; + +  /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount +  ///        concurrent waves. +  unsigned getNumSGPRsAllowed(unsigned WaveCount) const; +    unsigned findUnusedRegister(const MachineRegisterInfo &MRI,                                const TargetRegisterClass *RC) const;  private:    void buildScratchLoadStore(MachineBasicBlock::iterator MI,                               unsigned LoadStoreOp, unsigned Value, -                             unsigned ScratchPtr, unsigned ScratchOffset, +                             unsigned ScratchRsrcReg, unsigned ScratchOffset,                               int64_t Offset, RegScavenger *RS) const;  }; diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp index f91d1177bbae..6a3410688fe7 100644 --- a/lib/Target/R600/SIShrinkInstructions.cpp +++ b/lib/Target/R600/SIShrinkInstructions.cpp @@ -10,6 +10,7 @@  //  #include "AMDGPU.h" +#include "AMDGPUMCInstLower.h"  #include "AMDGPUSubtarget.h"  #include "SIInstrInfo.h"  #include "llvm/ADT/Statistic.h" @@ -206,13 +207,13 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {            continue;        } -      int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); - -      // Op32 could be -1 here if we started with an instruction that had a +      // getVOPe32 could be -1 here if we started with an instruction that had        // a 32-bit encoding and then commuted it to an instruction that did not. -      if (Op32 == -1) +      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))          continue; +      int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); +        if (TII->isVOPC(Op32)) {          unsigned DstReg = MI.getOperand(0).getReg();          if (TargetRegisterInfo::isVirtualRegister(DstReg)) { diff --git a/lib/Target/R600/VIInstructions.td b/lib/Target/R600/VIInstructions.td index 07cfa29ae12b..24e66cea6277 100644 --- a/lib/Target/R600/VIInstructions.td +++ b/lib/Target/R600/VIInstructions.td @@ -11,22 +11,6 @@  let SubtargetPredicate = isVI in { -def V_LDEXP_F32 : VOP3InstVI <0x288, "v_ldexp_f32", VOP_F32_F32_I32, -  AMDGPUldexp ->; -def V_BFM_B32 : VOP3InstVI <0x293, "v_bfm_b32", VOP_I32_I32_I32, AMDGPUbfm>; -def V_BCNT_U32_B32 : VOP3InstVI <0x28b, "v_bcnt_u32_b32", VOP_I32_I32_I32>; -def V_MBCNT_LO_U32_B32 : VOP3InstVI <0x28c, "v_mbcnt_lo_u32_b32", -  VOP_I32_I32_I32 ->; -def V_MBCNT_HI_U32_B32 : VOP3InstVI <0x28d, "v_mbcnt_hi_u32_b32", -  VOP_I32_I32_I32 ->; - -def V_CVT_PKRTZ_F16_F32 : VOP3InstVI <0x296, "v_cvt_pkrtz_f16_f32", - VOP_I32_F32_F32, int_SI_packf16 ->; -  defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi <    0x14, "buffer_load_dword", VGPR_32, i32, global_load  >; @@ -37,22 +21,13 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <  } // End SubtargetPredicate = isVI -//===----------------------------------------------------------------------===// -// VOP2 Patterns -//===----------------------------------------------------------------------===// - -let Predicates = [isVI] in { - -def : Pat < -  (int_SI_tid), -  (V_MBCNT_HI_U32_B32 0xffffffff, -                      (V_MBCNT_LO_U32_B32 0xffffffff, 0)) ->;  //===----------------------------------------------------------------------===//  // SMEM Patterns  //===----------------------------------------------------------------------===// +let Predicates = [isVI] in { +  // 1. Offset as 8bit DWORD immediate  def : Pat <    (SIload_constant v4i32:$sbase, IMM20bit:$offset), diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 6767e4b224f8..42690206e8c7 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -42,7 +42,8 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {    SunStyleELFSectionSwitchSyntax = true;    UsesELFSectionDirectiveForBSS = true; -  UseIntegratedAssembler = true; +  if (TheTriple.isOSSolaris() || TheTriple.isOSOpenBSD()) +    UseIntegratedAssembler = true;  }  const MCExpr* diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 719b761084f9..164b4192ae66 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -777,6 +777,19 @@ public:                                       MachO::CPU_TYPE_X86_64, Subtype);    } +  bool doesSectionRequireSymbols(const MCSection &Section) const override { +    // Temporary labels in the string literals sections require symbols. The +    // issue is that the x86_64 relocation format does not allow symbol + +    // offset, and so the linker does not have enough information to resolve the +    // access to the appropriate atom unless an external relocation is used. For +    // non-cstring sections, we expect the compiler to use a non-temporary label +    // for anything that could have an addend pointing outside the symbol. +    // +    // See <rdar://problem/4765733>. +    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); +    return SMO.getType() == MachO::S_CSTRING_LITERALS; +  } +    /// \brief Generate the compact unwind encoding for the CFI instructions.    uint32_t generateCompactUnwindEncoding(                               ArrayRef<MCCFIInstruction> Instrs) const override { diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index be6a8e4a43eb..e8b0b4c5826f 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -222,6 +222,9 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,          case MCSymbolRefExpr::VK_GOT:            Type = ELF::R_386_GOT32;            break; +        case MCSymbolRefExpr::VK_PLT: +          Type = ELF::R_386_PLT32; +          break;          case MCSymbolRefExpr::VK_GOTOFF:            Type = ELF::R_386_GOTOFF;            break; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 7a83f4c64e6d..67b0c8900850 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -10,7 +10,6 @@  #include "MCTargetDesc/X86MCTargetDesc.h"  #include "MCTargetDesc/X86FixupKinds.h"  #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmInfo.h"  #include "llvm/MC/MCAsmLayout.h"  #include "llvm/MC/MCAssembler.h"  #include "llvm/MC/MCContext.h" @@ -48,21 +47,23 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter {                                const MCFixup &Fixup,                                MCValue Target,                                uint64_t &FixedValue); -  void RecordX86_64Relocation(MachObjectWriter *Writer, MCAssembler &Asm, +  void RecordX86_64Relocation(MachObjectWriter *Writer, +                              const MCAssembler &Asm,                                const MCAsmLayout &Layout, -                              const MCFragment *Fragment, const MCFixup &Fixup, -                              MCValue Target, uint64_t &FixedValue); - +                              const MCFragment *Fragment, +                              const MCFixup &Fixup, +                              MCValue Target, +                              uint64_t &FixedValue);  public:    X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,                        uint32_t CPUSubtype)      : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,                                 /*UseAggressiveSymbolFolding=*/Is64Bit) {} -  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, -                        const MCAsmLayout &Layout, const MCFragment *Fragment, -                        const MCFixup &Fixup, MCValue Target, -                        uint64_t &FixedValue) override { +  void RecordRelocation(MachObjectWriter *Writer, +                        const MCAssembler &Asm, const MCAsmLayout &Layout, +                        const MCFragment *Fragment, const MCFixup &Fixup, +                        MCValue Target, uint64_t &FixedValue) override {      if (Writer->is64Bit())        RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,                               FixedValue); @@ -96,10 +97,13 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {    }  } -void X86MachObjectWriter::RecordX86_64Relocation( -    MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, -    const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, -    uint64_t &FixedValue) { +void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, +                                                 const MCAssembler &Asm, +                                                 const MCAsmLayout &Layout, +                                                 const MCFragment *Fragment, +                                                 const MCFixup &Fixup, +                                                 MCValue Target, +                                                 uint64_t &FixedValue) {    unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); @@ -113,7 +117,6 @@ void X86MachObjectWriter::RecordX86_64Relocation(    unsigned Index = 0;    unsigned IsExtern = 0;    unsigned Type = 0; -  const MCSymbolData *RelSymbol = nullptr;    Value = Target.getConstant(); @@ -129,6 +132,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(    if (Target.isAbsolute()) { // constant      // SymbolNum of 0 indicates the absolute section.      Type = MachO::X86_64_RELOC_UNSIGNED; +    Index = 0;      // FIXME: I believe this is broken, I don't think the linker can understand      // it. I think it would require a local relocation, but I'm not sure if that @@ -189,30 +193,36 @@ void X86MachObjectWriter::RecordX86_64Relocation(      Value -= Writer->getSymbolAddress(&B_SD, Layout) -        (!B_Base ? 0 : Writer->getSymbolAddress(B_Base, Layout)); -    if (!A_Base) +    if (A_Base) { +      Index = A_Base->getIndex(); +      IsExtern = 1; +    } else {        Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; +      IsExtern = 0; +    }      Type = MachO::X86_64_RELOC_UNSIGNED;      MachO::any_relocation_info MRE;      MRE.r_word0 = FixupOffset; -    MRE.r_word1 = -        (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); -    Writer->addRelocation(A_Base, Fragment->getParent(), MRE); - -    if (B_Base) -      RelSymbol = B_Base; -    else +    MRE.r_word1 = ((Index     <<  0) | +                   (IsPCRel   << 24) | +                   (Log2Size  << 25) | +                   (IsExtern  << 27) | +                   (Type      << 28)); +    Writer->addRelocation(Fragment->getParent(), MRE); + +    if (B_Base) { +      Index = B_Base->getIndex(); +      IsExtern = 1; +    } else {        Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; +      IsExtern = 0; +    }      Type = MachO::X86_64_RELOC_SUBTRACTOR;    } else {      const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); -    if (Symbol->isTemporary() && Value) { -      const MCSection &Sec = Symbol->getSection(); -      if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec)) -        Asm.addLocalUsedInReloc(*Symbol); -    }      const MCSymbolData &SD = Asm.getSymbolData(*Symbol); -    RelSymbol = Asm.getAtom(&SD); +    const MCSymbolData *Base = Asm.getAtom(&SD);      // Relocations inside debug sections always use local relocations when      // possible. This seems to be done because the debugger doesn't fully @@ -222,20 +232,23 @@ void X86MachObjectWriter::RecordX86_64Relocation(        const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(          Fragment->getParent()->getSection());        if (Section.hasAttribute(MachO::S_ATTR_DEBUG)) -        RelSymbol = nullptr; +        Base = nullptr;      }      // x86_64 almost always uses external relocations, except when there is no      // symbol to use as a base address (a local symbol with no preceding      // non-local symbol). -    if (RelSymbol) { +    if (Base) { +      Index = Base->getIndex(); +      IsExtern = 1; +        // Add the local offset, if needed. -      if (RelSymbol != &SD) -        Value += -            Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(RelSymbol); +      if (Base != &SD) +        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);      } else if (Symbol->isInSection() && !Symbol->isVariable()) {        // The index is the section ordinal (1-based).        Index = SD.getFragment()->getParent()->getOrdinal() + 1; +      IsExtern = 0;        Value += Writer->getSymbolAddress(&SD, Layout);        if (IsPCRel) @@ -334,9 +347,12 @@ void X86MachObjectWriter::RecordX86_64Relocation(    // struct relocation_info (8 bytes)    MachO::any_relocation_info MRE;    MRE.r_word0 = FixupOffset; -  MRE.r_word1 = (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | -                (IsExtern << 27) | (Type << 28); -  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); +  MRE.r_word1 = ((Index     <<  0) | +                 (IsPCRel   << 24) | +                 (Log2Size  << 25) | +                 (IsExtern  << 27) | +                 (Type      << 28)); +  Writer->addRelocation(Fragment->getParent(), MRE);  }  bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, @@ -408,7 +424,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,                     (IsPCRel                   << 30) |                     MachO::R_SCATTERED);      MRE.r_word1 = Value2; -    Writer->addRelocation(nullptr, Fragment->getParent(), MRE); +    Writer->addRelocation(Fragment->getParent(), MRE);    } else {      // If the offset is more than 24-bits, it won't fit in a scattered      // relocation offset field, so we fall back to using a non-scattered @@ -430,7 +446,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,                   (IsPCRel     << 30) |                   MachO::R_SCATTERED);    MRE.r_word1 = Value; -  Writer->addRelocation(nullptr, Fragment->getParent(), MRE); +  Writer->addRelocation(Fragment->getParent(), MRE);    return true;  } @@ -451,6 +467,7 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,    // Get the symbol data.    const MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); +  unsigned Index = SD_A->getIndex();    // We're only going to have a second symbol in pic mode and it'll be a    // subtraction from the picbase. For 32-bit pic the addend is the difference @@ -473,9 +490,12 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,    // struct relocation_info (8 bytes)    MachO::any_relocation_info MRE;    MRE.r_word0 = Value; -  MRE.r_word1 = -      (IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28); -  Writer->addRelocation(SD_A, Fragment->getParent(), MRE); +  MRE.r_word1 = ((Index                    <<  0) | +                 (IsPCRel                  << 24) | +                 (Log2Size                 << 25) | +                 (1                        << 27) | // r_extern +                 (MachO::GENERIC_RELOC_TLV << 28)); // r_type +  Writer->addRelocation(Fragment->getParent(), MRE);  }  void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, @@ -526,8 +546,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,    // See <reloc.h>.    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();    unsigned Index = 0; +  unsigned IsExtern = 0;    unsigned Type = 0; -  const MCSymbolData *RelSymbol = nullptr;    if (Target.isAbsolute()) { // constant      // SymbolNum of 0 indicates the absolute section. @@ -548,7 +568,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,      // Check whether we need an external or internal relocation.      if (Writer->doesSymbolRequireExternRelocation(SD)) { -      RelSymbol = SD; +      IsExtern = 1; +      Index = SD->getIndex();        // For external relocations, make sure to offset the fixup value to        // compensate for the addend of the symbol address, if it was        // undefined. This occurs with weak definitions, for example. @@ -570,9 +591,12 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,    // struct relocation_info (8 bytes)    MachO::any_relocation_info MRE;    MRE.r_word0 = FixupOffset; -  MRE.r_word1 = -      (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); -  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); +  MRE.r_word1 = ((Index     <<  0) | +                 (IsPCRel   << 24) | +                 (Log2Size  << 25) | +                 (IsExtern  << 27) | +                 (Type      << 28)); +  Writer->addRelocation(Fragment->getParent(), MRE);  }  MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS, diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index dab2c4b47ad6..83b4b82311f2 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1376,6 +1376,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {      dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());    if (!Callee)      return false; +  // The prototype of thunks are a lie, don't try to directly call such +  // functions. +  if (Callee->hasFnAttribute("thunk")) +    return false;    Instruction *Caller = CS.getInstruction();    const AttributeSet &CallerPAL = CS.getAttributes(); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index b814b2525dca..ac13eebf8275 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2182,9 +2182,16 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,        // Handle the floating point versions of equality comparisons too.        if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) || -          (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) -        Worklist.push_back(std::make_pair(Op0, Op1)); - +          (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) { +        // Floating point -0.0 and 0.0 compare equal, so we can't +        // propagate a constant based on that comparison. +        // FIXME: We should do this optimization if 'no signed zeros' is +        // applicable via an instruction-level fast-math-flag or some other +        // indicator that relaxed FP semantics are being used. +        if (!isa<ConstantFP>(Op1) || !cast<ConstantFP>(Op1)->isZero()) +          Worklist.push_back(std::make_pair(Op0, Op1)); +      } +         // If "A >= B" is known true, replace "A < B" with false everywhere.        CmpInst::Predicate NotPred = Cmp->getInversePredicate();        Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 1ac38e0f52a5..d664f85c773d 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -18,6 +18,7 @@  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Instructions.h" @@ -47,7 +48,7 @@ namespace {        AU.addRequiredID(LoopSimplifyID);        AU.addPreservedID(LoopSimplifyID);        AU.addPreservedID(LCSSAID); -      AU.addPreserved("scalar-evolution"); +      AU.addPreserved<ScalarEvolution>();        AU.addRequired<TargetLibraryInfo>();      }    }; diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 35cd917330ab..04b91306d3cb 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -46,7 +46,6 @@ namespace {      void getAnalysisUsage(AnalysisUsage &AU) const override {        // This is a cluster of orthogonal Transforms        AU.addPreserved<UnifyFunctionExitNodes>(); -      AU.addPreserved("mem2reg");        AU.addPreservedID(LowerInvokePassID);      } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index f8aa1d3eec12..6cb91a154f06 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -278,9 +278,8 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,                                                      Value *IVOperand) {    // Currently we only handle instructions of the form "add <indvar> <value>" -  // and "sub <indvar> <value>".    unsigned Op = BO->getOpcode(); -  if (!(Op == Instruction::Add || Op == Instruction::Sub)) +  if (Op != Instruction::Add)      return false;    // If BO is already both nuw and nsw then there is nothing left to do @@ -304,15 +303,6 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,    if (OtherOpSCEV == SE->getCouldNotCompute())      return false; -  if (Op == Instruction::Sub) { -    // If the subtraction is of the form "sub <indvar>, <op>", then pretend it -    // is "add <indvar>, -<op>" and continue, else bail out. -    if (OtherOperandIdx != 1) -      return false; - -    OtherOpSCEV = SE->getNegativeSCEV(OtherOpSCEV); -  } -    const SCEV *IVOpSCEV = SE->getSCEV(IVOperand);    const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 5b4647ddcb5e..5a0d52e04f9f 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1968,8 +1968,12 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {      // Try to further simplify the result.      CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);      if (SimplifiedCI && SimplifiedCI->getCalledFunction()) -      if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) +      if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) { +        // If we were able to further simplify, remove the now redundant call. +        SimplifiedCI->replaceAllUsesWith(V); +        SimplifiedCI->eraseFromParent();          return V; +      }      return SimplifiedFortifiedCI;    } @@ -2218,11 +2222,11 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &    return nullptr;  } -Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, +                                                      IRBuilder<> &B, +                                                      LibFunc::Func Func) {    Function *Callee = CI->getCalledFunction();    StringRef Name = Callee->getName(); -  LibFunc::Func Func = -      Name.startswith("str") ? LibFunc::strcpy_chk : LibFunc::stpcpy_chk;    if (!checkStringCopyLibFuncSignature(Callee, Func, DL))      return nullptr; @@ -2231,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &          *ObjSize = CI->getArgOperand(2);    // __stpcpy_chk(x,x,...)  -> x+strlen(x) -  if (!OnlyLowerUnknownSize && Dst == Src) { +  if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {      Value *StrLen = EmitStrLen(Src, B, DL, TLI);      return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;    } @@ -2266,11 +2270,11 @@ Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &    return nullptr;  } -Value *FortifiedLibCallSimplifier::optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, +                                                       IRBuilder<> &B, +                                                       LibFunc::Func Func) {    Function *Callee = CI->getCalledFunction();    StringRef Name = Callee->getName(); -  LibFunc::Func Func = -      Name.startswith("str") ? LibFunc::strncpy_chk : LibFunc::stpncpy_chk;    if (!checkStringCopyLibFuncSignature(Callee, Func, DL))      return nullptr; @@ -2310,10 +2314,10 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {      return optimizeMemSetChk(CI, Builder);    case LibFunc::stpcpy_chk:    case LibFunc::strcpy_chk: -    return optimizeStrCpyChk(CI, Builder); +    return optimizeStrpCpyChk(CI, Builder, Func);    case LibFunc::stpncpy_chk:    case LibFunc::strncpy_chk: -    return optimizeStrNCpyChk(CI, Builder); +    return optimizeStrpNCpyChk(CI, Builder, Func);    default:      break;    } diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index b35a662f17b5..d36283ea3ae1 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -79,6 +79,19 @@ static cl::list<std::string> RewriteMapFiles("rewrite-map-file",  namespace llvm {  namespace SymbolRewriter { +void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, +                   const std::string &Target) { +  if (Comdat *CD = GO->getComdat()) { +    auto &Comdats = M.getComdatSymbolTable(); + +    Comdat *C = M.getOrInsertComdat(Target); +    C->setSelectionKind(CD->getSelectionKind()); +    GO->setComdat(C); + +    Comdats.erase(Comdats.find(Source)); +  } +} +  template <RewriteDescriptor::Type DT, typename ValueType,            ValueType *(llvm::Module::*Get)(StringRef) const>  class ExplicitRewriteDescriptor : public RewriteDescriptor { @@ -102,10 +115,14 @@ template <RewriteDescriptor::Type DT, typename ValueType,  bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {    bool Changed = false;    if (ValueType *S = (M.*Get)(Source)) { +    if (GlobalObject *GO = dyn_cast<GlobalObject>(S)) +      rewriteComdat(M, GO, Source, Target); +      if (Value *T = (M.*Get)(Target))        S->setValueName(T->getValueName());      else        S->setName(Target); +      Changed = true;    }    return Changed; @@ -145,6 +162,12 @@ performOnModule(Module &M) {        report_fatal_error("unable to transforn " + C.getName() + " in " +                           M.getModuleIdentifier() + ": " + Error); +    if (C.getName() == Name) +      continue; + +    if (GlobalObject *GO = dyn_cast<GlobalObject>(&C)) +      rewriteComdat(M, GO, C.getName(), Name); +      if (Value *V = (M.*Get)(Name))        C.setValueName(V->getValueName());      else diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 0c2fc0a972b5..7e00a80989dc 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -35,7 +35,6 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{    // We preserve the non-critical-edgeness property    AU.addPreservedID(BreakCriticalEdgesID);    // This is a cluster of orthogonal Transforms -  AU.addPreserved("mem2reg");    AU.addPreservedID(LowerSwitchID);  } | 
