diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2014-05-11 18:24:26 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2014-05-11 18:24:26 +0000 | 
| commit | 68bcb7db193e4bc81430063148253d30a791023e (patch) | |
| tree | 9f9245264c66971905eab3af40b7fc82e38fc2ad /lib/Target | |
| parent | 512b84fc6c12bc496cef739e69bfaaf27e7ccc8e (diff) | |
Diffstat (limited to 'lib/Target')
40 files changed, 539 insertions, 167 deletions
| diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4fdb667b9539..cf7aec3b4538 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -31,12 +31,8 @@ using namespace llvm;  static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {    const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>(); - -  if (Subtarget->isTargetLinux()) -    return new AArch64LinuxTargetObjectFile(); -  if (Subtarget->isTargetELF()) -    return new TargetLoweringObjectFileELF(); -  llvm_unreachable("unknown subtarget type"); +  assert (Subtarget->isTargetELF() && "unknown subtarget type"); +  return new AArch64ElfTargetObjectFile();  }  AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) @@ -2782,7 +2778,7 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {  SDValue  AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {    const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); -  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); +  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();    // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes    // rather than just 8. diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 23d81fc478e8..8e5a4d30396c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2587,6 +2587,7 @@ class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,                          pat, itin> {    let mayStore = 1;    let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; +  let Constraints = "@earlyclobber $Rs";  }  multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> { diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index b4452f514590..f8f21198a4f9 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -22,3 +22,10 @@ AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,    TargetLoweringObjectFileELF::Initialize(Ctx, TM);    InitializeELF(TM.Options.UseInitArray);  } + +void +AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx, +                                       const TargetMachine &TM) { +  TargetLoweringObjectFileELF::Initialize(Ctx, TM); +  InitializeELF(TM.Options.UseInitArray); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index bf0565a79ec8..f782285d1c02 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -20,8 +20,12 @@  namespace llvm { -  /// AArch64LinuxTargetObjectFile - This implementation is used for linux -  /// AArch64. +  /// AArch64ElfTargetObjectFile - This implementation is used for ELF +  /// AArch64 targets. +  class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF { +    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); +  }; +    class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {      virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);    }; diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index ff585b41a2aa..3e805a2b68bc 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -418,7 +418,8 @@ SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {      if (!MO.isReg() || !MO.isUse())        continue;      if (!usesRegClass(MO, &ARM::DPRRegClass) && -        !usesRegClass(MO, &ARM::QPRRegClass)) +        !usesRegClass(MO, &ARM::QPRRegClass) && +        !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR        continue;      Defs.push_back(MO.getReg()); @@ -538,7 +539,10 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {    InsertPt++;    unsigned Out; -  if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { +  // DPair has the same length as QPR and also has two DPRs as subreg. +  // Treat DPair as QPR. +  if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) || +      MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {      unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,                                           ARM::dsub_0, &ARM::DPRRegClass);      unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, @@ -571,7 +575,9 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {        default: llvm_unreachable("Unknown preferred lane!");      } -    bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); +    // Treat DPair as QPR +    bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) || +                   usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);      Out = createImplicitDef(MBB, InsertPt, DL);      Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index f835a4e5b5fe..658af8380d8d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3684,6 +3684,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,      case ARM::VLD3d16Pseudo:      case ARM::VLD3d32Pseudo:      case ARM::VLD1d64TPseudo: +    case ARM::VLD1d64TPseudoWB_fixed:      case ARM::VLD3d8Pseudo_UPD:      case ARM::VLD3d16Pseudo_UPD:      case ARM::VLD3d32Pseudo_UPD: @@ -3700,6 +3701,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,      case ARM::VLD4d16Pseudo:      case ARM::VLD4d32Pseudo:      case ARM::VLD1d64QPseudo: +    case ARM::VLD1d64QPseudoWB_fixed:      case ARM::VLD4d8Pseudo_UPD:      case ARM::VLD4d16Pseudo_UPD:      case ARM::VLD4d32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index e6f7f86c5587..3e62b649e0dc 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -136,7 +136,9 @@ static const NEONLdStTableEntry NEONLdStTable[] = {  { ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD, true, true, true,  EvenDblSpc, 1, 8 ,true},  { ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, false, SingleSpc,  4, 1 ,false}, +{ ARM::VLD1d64QPseudoWB_fixed,  ARM::VLD1d64Qwb_fixed,   true,  true, false, SingleSpc,  4, 1 ,false},  { ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, false, SingleSpc,  3, 1 ,false}, +{ ARM::VLD1d64TPseudoWB_fixed,  ARM::VLD1d64Twb_fixed,   true,  true, false, SingleSpc,  3, 1 ,false},  { ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, false, SingleSpc,  2, 4 ,true},  { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true,  SingleSpc,  2, 4 ,true}, @@ -1071,6 +1073,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,      case ARM::VLD3d16Pseudo:      case ARM::VLD3d32Pseudo:      case ARM::VLD1d64TPseudo: +    case ARM::VLD1d64TPseudoWB_fixed:      case ARM::VLD3d8Pseudo_UPD:      case ARM::VLD3d16Pseudo_UPD:      case ARM::VLD3d32Pseudo_UPD: @@ -1087,6 +1090,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,      case ARM::VLD4d16Pseudo:      case ARM::VLD4d32Pseudo:      case ARM::VLD1d64QPseudo: +    case ARM::VLD1d64QPseudoWB_fixed:      case ARM::VLD4d8Pseudo_UPD:      case ARM::VLD4d16Pseudo_UPD:      case ARM::VLD4d32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 87d15226947a..6d9b18877f72 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1673,9 +1673,61 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,    return CurDAG->getTargetConstant(Alignment, MVT::i32);  } +static bool isVLDfixed(unsigned Opc) +{ +  switch (Opc) { +  default: return false; +  case ARM::VLD1d8wb_fixed : return true; +  case ARM::VLD1d16wb_fixed : return true; +  case ARM::VLD1d64Qwb_fixed : return true; +  case ARM::VLD1d32wb_fixed : return true; +  case ARM::VLD1d64wb_fixed : return true; +  case ARM::VLD1d64TPseudoWB_fixed : return true; +  case ARM::VLD1d64QPseudoWB_fixed : return true; +  case ARM::VLD1q8wb_fixed : return true; +  case ARM::VLD1q16wb_fixed : return true; +  case ARM::VLD1q32wb_fixed : return true; +  case ARM::VLD1q64wb_fixed : return true; +  case ARM::VLD2d8wb_fixed : return true; +  case ARM::VLD2d16wb_fixed : return true; +  case ARM::VLD2d32wb_fixed : return true; +  case ARM::VLD2q8PseudoWB_fixed : return true; +  case ARM::VLD2q16PseudoWB_fixed : return true; +  case ARM::VLD2q32PseudoWB_fixed : return true; +  case ARM::VLD2DUPd8wb_fixed : return true; +  case ARM::VLD2DUPd16wb_fixed : return true; +  case ARM::VLD2DUPd32wb_fixed : return true; +  } +} + +static bool isVSTfixed(unsigned Opc) +{ +  switch (Opc) { +  default: return false; +  case ARM::VST1d8wb_fixed : return true; +  case ARM::VST1d16wb_fixed : return true; +  case ARM::VST1d32wb_fixed : return true; +  case ARM::VST1d64wb_fixed : return true; +  case ARM::VST1q8wb_fixed : return true;  +  case ARM::VST1q16wb_fixed : return true;  +  case ARM::VST1q32wb_fixed : return true;  +  case ARM::VST1q64wb_fixed : return true;  +  case ARM::VST1d64TPseudoWB_fixed : return true; +  case ARM::VST1d64QPseudoWB_fixed : return true; +  case ARM::VST2d8wb_fixed : return true; +  case ARM::VST2d16wb_fixed : return true; +  case ARM::VST2d32wb_fixed : return true; +  case ARM::VST2q8PseudoWB_fixed : return true; +  case ARM::VST2q16PseudoWB_fixed : return true; +  case ARM::VST2q32PseudoWB_fixed : return true; +  } +} +  // Get the register stride update opcode of a VLD/VST instruction that  // is otherwise equivalent to the given fixed stride updating instruction.  static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { +  assert((isVLDfixed(Opc) || isVSTfixed(Opc)) +    && "Incorrect fixed stride updating instruction.");    switch (Opc) {    default: break;    case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; @@ -1686,6 +1738,10 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {    case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;    case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;    case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; +  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; +  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; +  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; +  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;    case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;    case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; @@ -1785,11 +1841,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,        SDValue Inc = N->getOperand(AddrOpIdx + 1);        // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0        // case entirely when the rest are updated to that form, too. -      if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode())) +      if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))          Opc = getVLDSTRegisterUpdateOpcode(Opc); -      // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so +      // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so        // check for that explicitly too. Horribly hacky, but temporary. -      if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) || +      if ((NumVecs > 2 && !isVLDfixed(Opc)) ||            !isa<ConstantSDNode>(Inc.getNode()))          Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);      } @@ -1937,11 +1993,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,        // case entirely when the rest are updated to that form, too.        if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))          Opc = getVLDSTRegisterUpdateOpcode(Opc); -      // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so +      // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so        // check for that explicitly too. Horribly hacky, but temporary. -      if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) || -          !isa<ConstantSDNode>(Inc.getNode())) -        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); +      if  (!isa<ConstantSDNode>(Inc.getNode())) +        Ops.push_back(Inc); +      else if (NumVecs > 2 && !isVSTfixed(Opc)) +        Ops.push_back(Reg0);      }      Ops.push_back(SrcReg);      Ops.push_back(Pred); @@ -2834,7 +2891,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {      static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,                                           ARM::VLD3d16Pseudo_UPD,                                           ARM::VLD3d32Pseudo_UPD, -                                         ARM::VLD1q64wb_fixed}; +                                         ARM::VLD1d64TPseudoWB_fixed};      static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,                                            ARM::VLD3q16Pseudo_UPD,                                            ARM::VLD3q32Pseudo_UPD }; @@ -2848,7 +2905,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {      static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,                                           ARM::VLD4d16Pseudo_UPD,                                           ARM::VLD4d32Pseudo_UPD, -                                         ARM::VLD1q64wb_fixed}; +                                         ARM::VLD1d64QPseudoWB_fixed};      static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,                                            ARM::VLD4q16Pseudo_UPD,                                            ARM::VLD4q32Pseudo_UPD }; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 43bd4c21dc39..0b05c08ed948 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -730,6 +730,8 @@ defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;  defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;  def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; +def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; +def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;  // ...with 4 registers  class VLD1D4<bits<4> op7_4, string Dt> @@ -769,6 +771,8 @@ defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32">;  defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64">;  def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; +def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; +def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;  //   VLD2     : Vector Load (multiple 2-element structures)  class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -1671,7 +1675,7 @@ defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;  defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;  def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>; -def VST1d64TPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>;  def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;  // ...with 4 registers @@ -1714,7 +1718,7 @@ defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;  defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;  def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>; -def VST1d64QPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>;  def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;  //   VST2     : Vector Store (multiple 2-element structures) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index f3dddce30120..1d9c06406a4b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -12,10 +12,14 @@  //===----------------------------------------------------------------------===//  #include "PPCMCAsmInfo.h" +#include "llvm/ADT/Triple.h" +  using namespace llvm;  void PPCMCAsmInfoDarwin::anchor() { } +/// This version of the constructor is here to maintain ABI compatibility with +/// LLVM 3.4.0  PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {    if (is64Bit) {      PointerSize = CalleeSaveStackSlotSize = 8; @@ -32,6 +36,28 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {    SupportsDebugInformation= true; // Debug information.  } +PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { +  if (is64Bit) { +    PointerSize = CalleeSaveStackSlotSize = 8; +  } +  IsLittleEndian = false; + +  CommentString = ";"; +  ExceptionsType = ExceptionHandling::DwarfCFI; + +  if (!is64Bit) +    Data64bitsDirective = 0;      // We can't emit a 64-bit unit in PPC32 mode. + +  AssemblerDialect = 1;           // New-Style mnemonics. +  SupportsDebugInformation= true; // Debug information. + +  // old assembler lacks some directives +  // FIXME: this should really be a check on the assembler characteristics +  // rather than OS version +  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6)) +    HasWeakDefCanBeHiddenDirective = false; +} +  void PPCLinuxMCAsmInfo::anchor() { }  PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 1530e774cfc7..633970ccc289 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -18,11 +18,15 @@  #include "llvm/MC/MCAsmInfoELF.h"  namespace llvm { +class Triple;    class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {      virtual void anchor();    public: +    /// This version of the constructor is here to maintain ABI compatibility +    /// with LLVM 3.4.0.      explicit PPCMCAsmInfoDarwin(bool is64Bit); +    explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);    };    class PPCLinuxMCAsmInfo : public MCAsmInfoELF { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index f18d095c6d02..6a5051840181 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -72,7 +72,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {    MCAsmInfo *MAI;    if (TheTriple.isOSDarwin()) -    MAI = new PPCMCAsmInfoDarwin(isPPC64); +    MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);    else      MAI = new PPCLinuxMCAsmInfo(isPPC64); diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index ada34ed9e18a..2d92a112d5ef 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -701,13 +701,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {        return;      }      break; -  case PPC::SYNC: -    // In Book E sync is called msync, handle this special case here... -    if (Subtarget.isBookE()) { -      OutStreamer.EmitRawText(StringRef("\tmsync")); -      return; -    } -    break;    case PPC::LD:    case PPC::STD:    case PPC::LWA_32: diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 4224ae2d273c..e419b9b40d8e 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -186,6 +186,13 @@ bool PPCCTRLoops::runOnFunction(Function &F) {    return MadeChange;  } +static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { +  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) +    return ITy->getBitWidth() > (Is32Bit ? 32 : 64); + +  return false; +} +  bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {    for (BasicBlock::iterator J = BB->begin(), JE = BB->end();         J != JE; ++J) { @@ -352,13 +359,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {        CastInst *CI = cast<CastInst>(J);        if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||            CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || -          (TT.isArch32Bit() && -           (CI->getSrcTy()->getScalarType()->isIntegerTy(64) || -            CI->getDestTy()->getScalarType()->isIntegerTy(64)) -          )) +          isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || +          isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))          return true; -    } else if (TT.isArch32Bit() && -               J->getType()->getScalarType()->isIntegerTy(64) && +    } else if (isLargeIntegerTy(TT.isArch32Bit(), +                                J->getType()->getScalarType()) &&                 (J->getOpcode() == Instruction::UDiv ||                  J->getOpcode() == Instruction::SDiv ||                  J->getOpcode() == Instruction::URem || diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 09117e7ded49..4e3b0b83244a 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -892,11 +892,13 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,    unsigned LoadOpc = PPC::LFD;    if (SrcVT == MVT::i32) { -    Addr.Offset = 4; -    if (!IsSigned) +    if (!IsSigned) {        LoadOpc = PPC::LFIWZX; -    else if (PPCSubTarget.hasLFIWAX()) +      Addr.Offset = 4; +    } else if (PPCSubTarget.hasLFIWAX()) {        LoadOpc = PPC::LFIWAX; +      Addr.Offset = 4; +    }    }    const TargetRegisterClass *RC = &PPC::F8RCRegClass; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6ba6af6446e5..d25762a5bbca 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -261,11 +261,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {      DebugLoc dl;      if (PPCLowering.getPointerTy() == MVT::i32) { -      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); +      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);      } else { -      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass); +      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);      } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8da5f0563c6a..25a7ca7f59a7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2333,7 +2333,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(              EVT ObjType = (ObjSize == 1 ? MVT::i8 :                             (ObjSize == 2 ? MVT::i16 : MVT::i32));              Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, -                                      MachinePointerInfo(FuncArg, CurArgOffset), +                                      MachinePointerInfo(FuncArg),                                        ObjType, false, false, 0);            } else {              // For sizes that don't fit a truncating store (3, 5, 6, 7), @@ -2345,7 +2345,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(              int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);              SDValue FIN = DAG.getFrameIndex(FI, PtrVT);              Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, -                                 MachinePointerInfo(FuncArg, ArgOffset), +                                 MachinePointerInfo(FuncArg),                                   false, false, 0);            } @@ -2369,7 +2369,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);            SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, -                                       MachinePointerInfo(FuncArg, ArgOffset), +                                       MachinePointerInfo(FuncArg, j),                                         false, false, 0);            MemOps.push_back(Store);            ++GPR_idx; @@ -2665,8 +2665,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);            EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;            SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, -                                            MachinePointerInfo(FuncArg, -                                              CurArgOffset), +                                            MachinePointerInfo(FuncArg),                                              ObjType, false, false, 0);            MemOps.push_back(Store);            ++GPR_idx; @@ -2690,7 +2689,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);            SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, -                                       MachinePointerInfo(FuncArg, ArgOffset), +                                       MachinePointerInfo(FuncArg, j),                                         false, false, 0);            MemOps.push_back(Store);            ++GPR_idx; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 315ad04ebe3e..80bc27a95765 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -570,12 +570,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,    // update isStoreToStackSlot.    DebugLoc DL; -  if (PPC::GPRCRegClass.hasSubClassEq(RC)) { +  if (PPC::GPRCRegClass.hasSubClassEq(RC) || +      PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))                                         .addReg(SrcReg,                                                 getKillRegState(isKill)),                                         FrameIdx)); -  } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { +  } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || +             PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))                                         .addReg(SrcReg,                                                 getKillRegState(isKill)), @@ -695,10 +697,12 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,    // Note: If additional load instructions are added here,    // update isLoadFromStackSlot. -  if (PPC::GPRCRegClass.hasSubClassEq(RC)) { +  if (PPC::GPRCRegClass.hasSubClassEq(RC) || +      PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),                                                 DestReg), FrameIdx)); -  } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { +  } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || +             PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),                                         FrameIdx));    } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 2bd3aadc798d..fc29c69642bf 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -580,6 +580,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;  def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;  def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;  def IsBookE  : Predicate<"PPCSubTarget.isBookE()">; +def IsNotBookE  : Predicate<"!PPCSubTarget.isBookE()">;  //===----------------------------------------------------------------------===//  // PowerPC Multiclass Definitions. @@ -1541,8 +1542,17 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),                     "stmw $rS, $dst", LdStLMW, []>;  def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), -                        "sync $L", LdStSync, []>; -def : Pat<(int_ppc_sync), (SYNC 0)>; +                        "sync $L", LdStSync, []>, Requires<[IsNotBookE]>; + +let isCodeGenOnly = 1 in { +  def MSYNC : XForm_24_sync<31, 598, (outs), (ins), +                           "msync", LdStSync, []>, Requires<[IsBookE]> { +    let L = 0; +  } +} + +def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>; +def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;  //===----------------------------------------------------------------------===//  // PPC32 Arithmetic Instructions. @@ -2284,7 +2294,8 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),  def : Pat<(f64 (fextend f32:$src)),            (COPY_TO_REGCLASS $src, F8RC)>; -def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>; +def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>; +def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;  // Additional FNMSUB patterns: -a*c + b == -(a*c - b)  def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), @@ -2373,10 +2384,10 @@ class PPCAsmPseudo<string asm, dag iops>  def : InstAlias<"sc", (SC 0)>; -def : InstAlias<"sync", (SYNC 0)>; -def : InstAlias<"msync", (SYNC 0)>; -def : InstAlias<"lwsync", (SYNC 1)>; -def : InstAlias<"ptesync", (SYNC 2)>; +def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>; +def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>; +def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>; +def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;  def : InstAlias<"wait", (WAIT 0)>;  def : InstAlias<"waitrsv", (WAIT 1)>; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index d566e2c3e52d..43663ce013e9 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -144,6 +144,13 @@ def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;  def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;  } +// The full condition-code register. This is not modeled fully, but defined +// here primarily, for compatibility with gcc, to allow the inline asm "cc" +// clobber specification to work. +def CC : PPCReg<"cc">, DwarfRegAlias<CR0> { +  let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7]; +} +  // Link register  def LR  : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;  //let Aliases = [LR] in @@ -234,3 +241,8 @@ def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;  def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {    let CopyCost = -1;  } + +def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> { +  let isAllocatable = 0; +} + diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index c863a6ecc777..ec8c82ad521c 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -126,22 +126,6 @@ public:    /// selection.    const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } -  /// getDataLayoutString - Return the pointer size and type alignment -  /// properties of this subtarget. -  const char *getDataLayoutString() const { -    // Note, the alignment values for f64 and i64 on ppc64 in Darwin -    // documentation are wrong; these are correct (i.e. "what gcc does"). -    if (isPPC64() && isSVR4ABI()) { -      if (TargetTriple.getOS() == llvm::Triple::FreeBSD) -        return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64"; -      else -        return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64"; -    } - -    return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" -                     : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; -  } -    /// \brief Reset the features for the PowerPC target.    virtual void resetSubtargetFeatures(const MachineFunction *MF);  private: diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 9acefe53ce4a..d6767d51f2cc 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -33,6 +33,43 @@ extern "C" void LLVMInitializePowerPCTarget() {    RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);  } +/// Return the datalayout string of a subtarget. +static std::string getDataLayoutString(const PPCSubtarget &ST) { +  const Triple &T = ST.getTargetTriple(); + +  // PPC is big endian +  std::string Ret = "E"; + +  // PPC64 has 64 bit pointers, PPC32 has 32 bit pointers. +  if (ST.isPPC64()) +    Ret += "-p:64:64"; +  else +    Ret += "-p:32:32"; + +  // Note, the alignment values for f64 and i64 on ppc64 in Darwin +  // documentation are wrong; these are correct (i.e. "what gcc does"). +  if (ST.isPPC64() || ST.isSVR4ABI()) +    Ret += "-f64:64:64-i64:64:64"; +  else +    Ret += "-f64:32:64"; + +  // Set support for 128 floats depending on the ABI. +  if (!ST.isPPC64() && ST.isSVR4ABI()) +    Ret += "-f128:64:128"; + +  // Some ABIs support 128 bit vectors. +  if (ST.isPPC64() && ST.isSVR4ABI()) +    Ret += "-v128:128:128"; + +  // PPC64 has 32 and 64 bit register, PPC32 has only 32 bit ones. +  if (ST.isPPC64()) +    Ret += "-n32:64"; +  else +    Ret += "-n32"; + +  return Ret; +} +  PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,                                     StringRef CPU, StringRef FS,                                     const TargetOptions &Options, @@ -41,7 +78,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,                                     bool is64Bit)    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),      Subtarget(TT, CPU, FS, is64Bit), -    DL(Subtarget.getDataLayoutString()), InstrInfo(*this), +    DL(getDataLayoutString(Subtarget)), InstrInfo(*this),      FrameLowering(Subtarget), JITInfo(*this, is64Bit),      TLInfo(*this), TSInfo(*this),      InstrItins(Subtarget.getInstrItineraryData()) { diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index c4d75ffa0d06..1029f306d632 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -133,6 +133,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :    setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);    setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); +  setOperationAction(ISD::BR_CC, MVT::i1, Expand); +    setOperationAction(ISD::FNEG, MVT::v2f32, Expand);    setOperationAction(ISD::FNEG, MVT::v4f32, Expand); diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 3c5375d84ecf..7acd67313eea 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -388,6 +388,11 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <  // Bitfield extract patterns +/* + +XXX: The BFE pattern is not working correctly because the XForm is not being +applied. +  def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;  def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],                              SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>; @@ -397,6 +402,8 @@ class BFEPattern <Instruction BFE> : Pat <    (BFE $x, $y, $z)  >; +*/ +  // rotr pattern  class ROTRPattern <Instruction BIT_ALIGN> : Pat <    (rotr i32:$src0, i32:$src1), diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 4a8e1b0b2d86..9b26af7bc736 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -13,7 +13,6 @@  using namespace llvm;  AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {    HasSingleParameterDotFile = false; -  WeakDefDirective = 0;    //===------------------------------------------------------------------===//    HasSubsectionsViaSymbols = true;    HasMachoZeroFillDirective = false; @@ -58,7 +57,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {    HasDotTypeDotSizeDirective = false;    HasNoDeadStrip = true;    WeakRefDirective = ".weakref\t"; -  LinkOnceDirective = 0;    //===--- Dwarf Emission Directives -----------------------------------===//    HasLEB128 = true;    SupportsDebugInformation = true; diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index ac3d8f63d57f..2a8276b2214f 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -356,6 +356,7 @@ public:            DEBUG(dbgs() << CfCount << ":"; I->dump(););            FetchClauses.push_back(MakeFetchClause(MBB, I));            CfCount++; +          LastAlu.back() = 0;            continue;          } diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index c0827fc1ca40..2eca6cf43271 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -716,7 +716,13 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,      return false;    } -  // Get the last instruction in the block. +  // Remove successive JUMP +  while (I != MBB.begin() && llvm::prior(I)->getOpcode() == AMDGPU::JUMP) { +      MachineBasicBlock::iterator PriorI = llvm::prior(I); +      if (AllowModify) +        I->removeFromParent(); +      I = PriorI; +  }    MachineInstr *LastInst = I;    // If there is only one terminator instruction, process it. diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 0346e24ab771..74c65daa065c 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1516,7 +1516,9 @@ let Predicates = [isEGorCayman] in {                                                 i32:$src2))],      VecALU    >; -  def : BFEPattern <BFE_UINT_eg>; +// XXX: This pattern is broken, disabling for now.  See comment in +// AMDGPUInstructions.td for more info. +//  def : BFEPattern <BFE_UINT_eg>;    def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;    defm : BFIPatterns <BFI_INT_eg>; @@ -1636,7 +1638,6 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <    let src2 = 0;    let src2_rel = 0; -  let Defs = [OQAP];    let usesCustomInserter = 1;    let LDS_1A = 1;    let DisableEncoding = "$dst"; @@ -1672,7 +1673,6 @@ class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :    let BaseOp = name;    let usesCustomInserter = 1;    let DisableEncoding = "$dst"; -  let Defs = [OQAP];  }  class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> : diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp index 3370c7955bc7..f0065ea13c5b 100644 --- a/lib/Target/R600/SIFixSGPRCopies.cpp +++ b/lib/Target/R600/SIFixSGPRCopies.cpp @@ -187,7 +187,7 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,        DstRC == &AMDGPU::M0RegRegClass)      return false; -  SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg); +  SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);    return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);  } diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 7ef662eb65b1..695ec407fdbe 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {    Counters Result = ZeroCounts; +  // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish, +  // but we also want to wait for any other outstanding transfers before +  // signalling other hardware blocks +  if (MI.getOpcode() == AMDGPU::S_SENDMSG) +    return LastIssued; +    // For each register affected by this    // instruction increase the result sequence    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 4cd0daa55c5f..b7879c6eface 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -290,10 +290,10 @@ multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,    : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;  multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern, -                     string revOp = opName> { +                     RegisterClass src0_rc, string revOp = opName> {    def _e32 : VOP2 < -    op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), +    op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),      opName#"_e32 $dst, $src0, $src1", pattern    >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; @@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU  multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> { -  let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */, -                                          mayLoad = 1 in { - -  let offen = 1, idxen = 0, addr64 = 0, offset = 0 in { -    def _OFFEN  : MUBUF <op, (outs regClass:$vdata), -                         (ins SReg_128:$srsrc, VReg_32:$vaddr), -                         asm#" $vdata, $srsrc + $vaddr", []>; -  } - -  let offen = 0, idxen = 1, addr64 = 0 in { -    def _IDXEN  : MUBUF <op, (outs regClass:$vdata), -                         (ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset), -                         asm#" $vdata, $srsrc[$vaddr] + $offset", []>; -  } +  let lds = 0, mayLoad = 1 in { + +    let addr64 = 0 in { + +      let offen = 0, idxen = 0 in { +        def _OFFSET : MUBUF <op, (outs regClass:$vdata), +                             (ins SReg_128:$srsrc, VReg_32:$vaddr, +                             i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, +                             i1imm:$slc, i1imm:$tfe), +                             asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; +      } + +      let offen = 1, idxen = 0, offset = 0 in { +        def _OFFEN  : MUBUF <op, (outs regClass:$vdata), +                             (ins SReg_128:$srsrc, VReg_32:$vaddr, +                             SSrc_32:$soffset, i1imm:$glc, i1imm:$slc, +                             i1imm:$tfe), +                             asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; +      } + +      let offen = 0, idxen = 1 in { +        def _IDXEN  : MUBUF <op, (outs regClass:$vdata), +                             (ins SReg_128:$srsrc, VReg_32:$vaddr, +                             i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, +                             i1imm:$slc, i1imm:$tfe), +                             asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; +      } + +      let offen = 1, idxen = 1 in { +        def _BOTHEN : MUBUF <op, (outs regClass:$vdata), +                             (ins SReg_128:$srsrc, VReg_64:$vaddr, +                             SSrc_32:$soffset, i1imm:$glc, +                             i1imm:$slc, i1imm:$tfe), +                             asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; +      } +    } -  let offen = 0, idxen = 0, addr64 = 1 in { -    def _ADDR64 : MUBUF <op, (outs regClass:$vdata), -                         (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset), -                         asm#" $vdata, $srsrc + $vaddr + $offset", []>; -  } +    let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in { +      def _ADDR64 : MUBUF <op, (outs regClass:$vdata), +                           (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset), +                           asm#" $vdata, $srsrc + $vaddr + $offset", []>; +    }    }  } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 76f05eb49655..2ca6a95978b3 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -22,6 +22,8 @@ def InterpSlot : Operand<i32> {    let PrintMethod = "printInterpSlot";  } +def SendMsgImm : Operand<i32>; +  def isSI : Predicate<"Subtarget.getGeneration() "                        ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; @@ -826,17 +828,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",  def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",    []  >; -} // End hasSideEffects  //def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;  //def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;  //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; -//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>; + +let Uses = [EXEC] in { +  def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16", +      [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)] +  > { +    let DisableEncoding = "$m0"; +  } +} // End Uses = [EXEC] +  //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;  //def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;  //def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;  //def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;  //def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;  //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; +} // End hasSideEffects  def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),    (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), @@ -979,14 +989,16 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;  let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC  // No patterns so that the scalar instructions are always selected.  // The scalar versions will be replaced with vector when needed later. -defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>; -defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>; -defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">; +defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>; +defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>; +defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32, +                              "V_SUB_I32">;  let Uses = [VCC] in { // Carry-in comes from VCC -defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; -defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; -defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">; +defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>; +defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>; +defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32, +                               "V_SUBB_U32">;  } // End Uses = [VCC]  } // End isCommutable = 1, Defs = [VCC] @@ -1403,7 +1415,7 @@ def : Pat <  /* int_SI_vs_load_input */  def : Pat<    (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), -  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset) +  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)  >;  /* int_SI_export */ @@ -1658,16 +1670,30 @@ def : Pat <     0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)  >; +/********** ================================ **********/ +/********** Floating point absolute/negative **********/ +/********** ================================ **********/ + +// Manipulate the sign bit directly, as e.g. using the source negation modifier +// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0, +// breaking the piglit *s-floatBitsToInt-neg* tests + +// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly +// removing these patterns + +def : Pat < +  (fneg (fabs f32:$src)), +  (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */ +>; +  def : Pat <    (fabs f32:$src), -  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */), -   1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) +  (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */  >;  def : Pat <    (fneg f32:$src), -  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */), -   0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */) +  (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */  >;  /********** ================== **********/ @@ -1794,6 +1820,11 @@ def : Pat <    (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)  >; +def : Pat < +  (i32 (zext i1:$src0)), +  (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0) +>; +  // 1. Offset as 8bit DWORD immediate  def : Pat <    (SIload_constant i128:$sbase, IMM8bitDWORD:$offset), @@ -1809,7 +1840,7 @@ def : Pat <  // 3. Offset in an 32Bit VGPR  def : Pat <    (SIload_constant i128:$sbase, i32:$voff), -  (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff) +  (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)  >;  // The multiplication scales from [0,1] to the unsigned integer range @@ -1970,6 +2001,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;  defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;  defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>; +// BUFFER_LOAD_DWORD*, addr64=0 +multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen, +                             MUBUF bothen> { + +  def : Pat < +    (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, +                                  imm:$offset, 0, 0, imm:$glc, imm:$slc, +                                  imm:$tfe)), +    (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), +            (as_i1imm $slc), (as_i1imm $tfe)) +  >; + +  def : Pat < +    (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, +                                  imm, 1, 0, imm:$glc, imm:$slc, +                                  imm:$tfe)), +    (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), +           (as_i1imm $tfe)) +  >; + +  def : Pat < +    (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, +                                  imm:$offset, 0, 1, imm:$glc, imm:$slc, +                                  imm:$tfe)), +    (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), +           (as_i1imm $slc), (as_i1imm $tfe)) +  >; + +  def : Pat < +    (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset, +                                  imm, 1, 1, imm:$glc, imm:$slc, +                                  imm:$tfe)), +    (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), +            (as_i1imm $tfe)) +  >; +} + +defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN, +                         BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>; +defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN, +                         BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>; +defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN, +                         BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>; +  //===----------------------------------------------------------------------===//  // MTBUF Patterns  //===----------------------------------------------------------------------===// @@ -2057,6 +2132,11 @@ def : Pat <    (EXTRACT_SUBREG $a, sub0)  >; +def : Pat < +  (i1 (trunc i32:$a)), +  (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1) +>; +  // V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector  // case, the sgpr-copies pass will fix this to use the vector version.  def : Pat < diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 7fcc96452114..00e32c03a99e 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -38,6 +38,22 @@ let TargetPrefix = "SI", isTarget = 1 in {       llvm_i32_ty],   // tfe(imm)      []>; +  // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed +  def int_SI_buffer_load_dword : Intrinsic < +    [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32 +    [llvm_anyint_ty,  // rsrc(SGPR) +     llvm_anyint_ty,  // vaddr(VGPR) +     llvm_i32_ty,     // soffset(SGPR) +     llvm_i32_ty,     // inst_offset(imm) +     llvm_i32_ty,     // offen(imm) +     llvm_i32_ty,     // idxen(imm) +     llvm_i32_ty,     // glc(imm) +     llvm_i32_ty,     // slc(imm) +     llvm_i32_ty],    // tfe(imm) +    [IntrReadArgMem]>; + +  def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +    class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;    def int_SI_sample : Sample; diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 958763dffc22..ef867d36692d 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -109,6 +109,23 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {    return new SILowerControlFlowPass(tm);  } +static bool isDS(unsigned Opcode) { +  switch(Opcode) { +  default: return false; +  case AMDGPU::DS_ADD_U32_RTN: +  case AMDGPU::DS_SUB_U32_RTN: +  case AMDGPU::DS_WRITE_B32: +  case AMDGPU::DS_WRITE_B8: +  case AMDGPU::DS_WRITE_B16: +  case AMDGPU::DS_READ_B32: +  case AMDGPU::DS_READ_I8: +  case AMDGPU::DS_READ_U8: +  case AMDGPU::DS_READ_I16: +  case AMDGPU::DS_READ_U16: +    return true; +  } +} +  bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,                                          MachineBasicBlock *To) { @@ -145,7 +162,9 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {    MachineBasicBlock &MBB = *MI.getParent();    DebugLoc DL = MI.getDebugLoc(); -  if (!shouldSkip(&MBB, &MBB.getParent()->back())) +  if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType != +      ShaderType::PIXEL || +      !shouldSkip(&MBB, &MBB.getParent()->back()))      return;    MachineBasicBlock::iterator Insert = &MI; @@ -296,9 +315,11 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {    MachineBasicBlock &MBB = *MI.getParent();    DebugLoc DL = MI.getDebugLoc(); -  // Kill is only allowed in pixel shaders +  // Kill is only allowed in pixel / geometry shaders    assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType == -         ShaderType::PIXEL); +         ShaderType::PIXEL || +         MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType == +         ShaderType::GEOMETRY);    // Clear this pixel from the exec mask if the operand is negative    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) @@ -431,6 +452,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {        Next = llvm::next(I);        MachineInstr &MI = *I; +      if (isDS(MI.getOpcode())) { +        NeedM0 = true; +        NeedWQM = true; +      } +        switch (MI.getOpcode()) {          default: break;          case AMDGPU::SI_IF: @@ -491,14 +517,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {            IndirectDst(MI);            break; -        case AMDGPU::DS_READ_B32: -          NeedWQM = true; -          // Fall through -        case AMDGPU::DS_WRITE_B32: -        case AMDGPU::DS_ADD_U32_RTN: -          NeedM0 = true; -          break; -          case AMDGPU::V_INTERP_P1_F32:          case AMDGPU::V_INTERP_P2_F32:          case AMDGPU::V_INTERP_MOV_F32: @@ -517,7 +535,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {              AMDGPU::M0).addImm(0xffffffff);    } -  if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) { +  if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {      MachineBasicBlock &MBB = MF.front();      BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),              AMDGPU::EXEC).addReg(AMDGPU::EXEC); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index bc8f367e9255..22b79b3b2844 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1181,16 +1181,23 @@ X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,                                      unsigned Scale, SMLoc Start, SMLoc End,                                      unsigned Size, StringRef Identifier,                                      InlineAsmIdentifierInfo &Info){ -  if (isa<MCSymbolRefExpr>(Disp)) { -    // If this is not a VarDecl then assume it is a FuncDecl or some other label -    // reference.  We need an 'r' constraint here, so we need to create register -    // operand to ensure proper matching.  Just pick a GPR based on the size of -    // a pointer. -    if (!Info.IsVarDecl) { -      unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; -      return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, -                                   SMLoc(), Identifier, Info.OpDecl); -    } +  // If this is not a VarDecl then assume it is a FuncDecl or some other label +  // reference.  We need an 'r' constraint here, so we need to create register +  // operand to ensure proper matching.  Just pick a GPR based on the size of +  // a pointer. +  if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) { +    unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; +    return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, +                                 SMLoc(), Identifier, Info.OpDecl); +  } + +  // We either have a direct symbol reference, or an offset from a symbol.  The +  // parser always puts the symbol on the LHS, so look there for size +  // calculation purposes. +  const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp); +  bool IsSymRef = +      isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp); +  if (IsSymRef) {      if (!Size) {        Size = Info.Type * 8; // Size is in terms of bits in this context.        if (Size) @@ -1312,10 +1319,15 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {            if (getParser().parsePrimaryExpr(Val, End))              return Error(Tok.getLoc(), "Unexpected identifier!");          } else { -          InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); -          if (ParseIntelIdentifier(Val, Identifier, Info, -                                   /*Unevaluated=*/false, End)) -            return true; +          // This is a dot operator, not an adjacent identifier. +          if (Identifier.find('.') != StringRef::npos) { +            return false; +          } else { +            InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); +            if (ParseIntelIdentifier(Val, Identifier, Info, +                                     /*Unevaluated=*/false, End)) +              return true; +          }          }          SM.onIdentifierExpr(Val, Identifier);          UpdateLocLex = false; @@ -1366,7 +1378,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,    if (ParseIntelExpression(SM, End))      return 0; -  const MCExpr *Disp; +  const MCExpr *Disp = 0;    if (const MCExpr *Sym = SM.getSym()) {      // A symbolic displacement.      Disp = Sym; @@ -1374,13 +1386,20 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,        RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),                                   ImmDisp, SM.getImm(), BracLoc, StartInBrac,                                   End); -  } else { -    // An immediate displacement only.    -    Disp = MCConstantExpr::Create(SM.getImm(), getContext());    } -  // Parse the dot operator (e.g., [ebx].foo.bar). -  if (Tok.getString().startswith(".")) { +  if (SM.getImm() || !Disp) { +    const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext()); +    if (Disp) +      Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext()); +    else +      Disp = Imm;  // An immediate displacement only. +  } + +  // Parse struct field access.  Intel requires a dot, but MSVC doesn't.  MSVC +  // will in fact do global lookup the field name inside all global typedefs, +  // but we don't emulate that. +  if (Tok.getString().find('.') != StringRef::npos) {      const MCExpr *NewDisp;      if (ParseIntelDotOperator(Disp, NewDisp))        return 0; @@ -1532,8 +1551,10 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,    else      return Error(Tok.getLoc(), "Non-constant offsets are not supported!"); -  // Drop the '.'. -  StringRef DotDispStr = Tok.getString().drop_front(1); +  // Drop the optional '.'. +  StringRef DotDispStr = Tok.getString(); +  if (DotDispStr.startswith(".")) +    DotDispStr = DotDispStr.drop_front(1);    // .Imm gets lexed as a real.    if (Tok.is(AsmToken::Real)) { diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index c81a85755f82..16ee0d357b77 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1065,6 +1065,7 @@ static int readSIB(struct InternalInstruction* insn) {    switch (base) {    case 0x5: +  case 0xd:      switch (modFromModRM(insn->modRM)) {      case 0x0:        insn->eaDisplacement = EA_DISP_32; @@ -1072,13 +1073,11 @@ static int readSIB(struct InternalInstruction* insn) {        break;      case 0x1:        insn->eaDisplacement = EA_DISP_8; -      insn->sibBase = (insn->addressSize == 4 ? -                       SIB_BASE_EBP : SIB_BASE_RBP); +      insn->sibBase = (SIBBase)(sibBaseBase + base);        break;      case 0x2:        insn->eaDisplacement = EA_DISP_32; -      insn->sibBase = (insn->addressSize == 4 ? -                       SIB_BASE_EBP : SIB_BASE_RBP); +      insn->sibBase = (SIBBase)(sibBaseBase + base);        break;      case 0x3:        debug("Cannot have Mod = 0b11 and a SIB byte"); diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 3861e1ce290a..8d2b5954ef20 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -65,6 +65,17 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {    // Exceptions handling    ExceptionsType = ExceptionHandling::DwarfCFI; + +  // FIXME: this should not depend on the target OS version, but on the ld64 +  // version in use.  From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified +  // FDE relocs may be used. +  DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6); + +  // old assembler lacks some directives +  // FIXME: this should really be a check on the assembler characteristics +  // rather than OS version +  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6)) +    HasWeakDefCanBeHiddenDirective = false;  }  X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 12584411509d..1f5f91844f80 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -393,9 +393,11 @@ bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,    case 'k': // Print SImode register      Reg = getX86SubSuperRegister(Reg, MVT::i32);      break; -  case 'q': // Print DImode register -    // FIXME: gcc will actually print e instead of r for 32-bit. -    Reg = getX86SubSuperRegister(Reg, MVT::i64); +  case 'q': +    // Print 64-bit register names if 64-bit integer registers are available. +    // Otherwise, print 32-bit register names. +    MVT::SimpleValueType Ty = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; +    Reg = getX86SubSuperRegister(Reg, Ty);      break;    } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 76eeb64650ba..716c146811a1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15226,9 +15226,15 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(      MBB->addSuccessor(EndMBB);    } +  // Make sure the last operand is EFLAGS, which gets clobbered by the branch +  // that was just emitted, but clearly shouldn't be "saved". +  assert((MI->getNumOperands() <= 3 || +          !MI->getOperand(MI->getNumOperands() - 1).isReg() || +          MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS) +         && "Expected last argument to be EFLAGS");    unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;    // In the XMM save block, save all the XMM argument registers. -  for (int i = 3, e = MI->getNumOperands(); i != e; ++i) { +  for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {      int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;      MachineMemOperand *MMO =        F->getMachineMemOperand( @@ -17577,12 +17583,30 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,            // FIXME: need symbolic constants for these magic numbers.            // See X86ATTInstPrinter.cpp:printSSECC().            unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; -          SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01, +          SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, CMP00.getValueType(), +                                              CMP00, CMP01,                                                DAG.getConstant(x86cc, MVT::i8)); -          SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32, -                                              OnesOrZeroesF); -          SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI, -                                      DAG.getConstant(1, MVT::i32)); + +          MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; + +          if (is64BitFP && !Subtarget->is64Bit()) { +            // On a 32-bit target, we cannot bitcast the 64-bit float to a +            // 64-bit integer, since that's not a legal type. Since +            // OnesOrZeroesF is all ones of all zeroes, we don't need all the +            // bits, but can do this little dance to extract the lowest 32 bits +            // and work with those going forward. +            SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, +                                           OnesOrZeroesF); +            SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, +                                           Vector64); +            OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, +                                        Vector32, DAG.getIntPtrConstant(0)); +            IntVT = MVT::i32; +          } + +          SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF); +          SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, +                                      DAG.getConstant(1, IntVT));            SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);            return OneBitOfTruth;          } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 7d10b67bfe6d..5c8840823b16 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -72,7 +72,7 @@ def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),  // x86-64 va_start lowering magic. -let usesCustomInserter = 1 in { +let usesCustomInserter = 1, Defs = [EFLAGS] in {  def VASTART_SAVE_XMM_REGS : I<0, Pseudo,                                (outs),                                (ins GR8:$al, @@ -81,7 +81,8 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,                                "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",                                [(X86vastart_save_xmm_regs GR8:$al,                                                           imm:$regsavefi, -                                                         imm:$offset)]>; +                                                         imm:$offset), +                               (implicit EFLAGS)]>;  // The VAARG_64 pseudo-instruction takes the address of the va_list,  // and places the address of the next argument into a register. | 
