diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2015-12-25 14:25:49 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2015-12-25 14:25:49 +0000 | 
| commit | 2fe5752e3a7c345cdb59e869278d36af33c13fa4 (patch) | |
| tree | df68ca4b788599e14cbadaf19b704672393efccd /lib | |
| parent | 69156b4c20249e7800cc09e0eef0beb3d15ac1ad (diff) | |
Diffstat (limited to 'lib')
41 files changed, 501 insertions, 97 deletions
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 71c77815e281d..a2b9316aa875b 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -169,7 +169,7 @@ void WinException::endFunction(const MachineFunction *MF) {      Asm->OutStreamer->PopSection();    } -  if (shouldEmitMoves) +  if (shouldEmitMoves || shouldEmitPersonality)      Asm->OutStreamer->EmitWinCFIEndProc();  } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 21ab07234c815..fbc8f1e89f6e8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -439,7 +439,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,                               ISD::ANY_EXTEND, dl, VT, Result);        ValResult = Result; -      ChainResult = Chain; +      ChainResult = newLoad.getValue(1);        return;      } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a7392fabf1e71..54cfaf5706191 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1010,6 +1010,8 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,    // Calculate the element offset and add it to the pointer.    unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. +  assert(EltSize * 8 == EltVT.getSizeInBits() && +         "Converting bits to bytes lost precision");    Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,                        DAG.getConstant(EltSize, dl, Index.getValueType())); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4348ab79f7d19..51cd6619f7833 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1528,9 +1528,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {    if (CustomLowerNode(N, N->getValueType(0), true))      return SDValue(); -  // Store the vector to the stack. -  EVT EltVT = VecVT.getVectorElementType(); +  // Make the vector elements byte-addressable if they aren't already.    SDLoc dl(N); +  EVT EltVT = VecVT.getVectorElementType(); +  if (EltVT.getSizeInBits() < 8) { +    SmallVector<SDValue, 4> ElementOps; +    for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) { +      ElementOps.push_back(DAG.getAnyExtOrTrunc( +          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, +                      DAG.getConstant(i, dl, MVT::i8)), +          dl, MVT::i8)); +    } + +    EltVT = MVT::i8; +    VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, +                             VecVT.getVectorNumElements()); +    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps); +  } + +  // Store the vector to the stack.    SDValue StackPtr = DAG.CreateStackTemporary(VecVT);    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,                                 MachinePointerInfo(), false, false, 0); diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index adc620db897c2..b553f11018c79 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -794,6 +794,10 @@ void SlotTracker::processFunction() {    ST_DEBUG("begin processFunction!\n");    fNext = 0; +  // Process function metadata if it wasn't hit at the module-level. +  if (!ShouldInitializeAllMetadata) +    processFunctionMetadata(*TheFunction); +    // Add all the function arguments with no names.    for(Function::const_arg_iterator AI = TheFunction->arg_begin(),        AE = TheFunction->arg_end(); AI != AE; ++AI) @@ -807,8 +811,6 @@ void SlotTracker::processFunction() {      if (!BB.hasName())        CreateFunctionSlot(&BB); -    processFunctionMetadata(*TheFunction); -      for (auto &I : BB) {        if (!I.getType()->isVoidTy() && !I.hasName())          CreateFunctionSlot(&I); @@ -836,11 +838,11 @@ void SlotTracker::processFunction() {  void SlotTracker::processFunctionMetadata(const Function &F) {    SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; -  for (auto &BB : F) { -    F.getAllMetadata(MDs); -    for (auto &MD : MDs) -      CreateMetadataSlot(MD.second); +  F.getAllMetadata(MDs); +  for (auto &MD : MDs) +    CreateMetadataSlot(MD.second); +  for (auto &BB : F) {      for (auto &I : BB)        processInstructionMetadata(I);    } diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index e0e729d534bd4..0eb88a9675751 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -2257,7 +2257,14 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,  }  LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty, -                                 unsigned NumClauses, const char *Name) { +                                 LLVMValueRef PersFn, unsigned NumClauses, +                                 const char *Name) { +  // The personality used to live on the landingpad instruction, but now it +  // lives on the parent function. For compatibility, take the provided +  // personality and put it on the parent function. +  if (PersFn) +    unwrap(B)->GetInsertBlock()->getParent()->setPersonalityFn( +        cast<Function>(unwrap(PersFn)));    return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty), NumClauses, Name));  } diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 149ec6a4f3725..25ae4ac76e3cd 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -63,14 +63,21 @@ const char* LTOCodeGenerator::getVersionString() {  #endif  } +static void handleLTODiagnostic(const DiagnosticInfo &DI) { +  DiagnosticPrinterRawOStream DP(errs()); +  DI.print(DP); +  errs() << "\n"; +} +  LTOCodeGenerator::LTOCodeGenerator() -    : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)) { +    : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context), +                                            handleLTODiagnostic) {    initializeLTOPasses();  }  LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context)      : OwnedContext(std::move(Context)), Context(*OwnedContext), -      IRLinker(new Module("ld-temp.o", *OwnedContext)) { +      IRLinker(new Module("ld-temp.o", *OwnedContext), handleLTODiagnostic) {    initializeLTOPasses();  } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index c601c56f3952d..a85796cfbad90 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -82,6 +82,7 @@ void MCContext::reset() {    UsedNames.clear();    Symbols.clear(); +  SectionSymbols.clear();    Allocator.Reset();    Instances.clear();    CompilationDir.clear(); diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 709d7531d38bd..0a5309b16ee5d 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -264,6 +264,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,    for (const MachineBasicBlock &MBB : MF) {      for (const MachineInstr &MI : MBB) {        // TODO: CodeSize should account for multiple functions. + +      // TODO: Should we count size of debug info? +      if (MI.isDebugValue()) +        continue; + +      // FIXME: This is reporting 0 for many instructions.        CodeSize += MI.getDesc().Size;        unsigned numOperands = MI.getNumOperands(); diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 4a65bfc57f149..57b7a73bf56c2 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -134,13 +134,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {  //  // TODO: Check isTriviallyVectorizable for calls and handle other  // instructions. -static bool canVectorizeInst(Instruction *Inst) { +static bool canVectorizeInst(Instruction *Inst, User *User) {    switch (Inst->getOpcode()) {    case Instruction::Load: -  case Instruction::Store:    case Instruction::BitCast:    case Instruction::AddrSpaceCast:      return true; +  case Instruction::Store: { +    // Must be the stored pointer operand, not a stored value. +    StoreInst *SI = cast<StoreInst>(Inst); +    return SI->getPointerOperand() == User; +  }    default:      return false;    } @@ -166,7 +170,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {    for (User *AllocaUser : Alloca->users()) {      GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);      if (!GEP) { -      if (!canVectorizeInst(cast<Instruction>(AllocaUser))) +      if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))          return false;        WorkList.push_back(AllocaUser); @@ -184,7 +188,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {      GEPVectorIdx[GEP] = Index;      for (User *GEPUser : AllocaUser->users()) { -      if (!canVectorizeInst(cast<Instruction>(GEPUser))) +      if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))          return false;        WorkList.push_back(GEPUser); @@ -240,7 +244,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {    for (User *User : Val->users()) {      if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())        continue; -    if (isa<CallInst>(User)) { +    if (CallInst *CI = dyn_cast<CallInst>(User)) { +      // TODO: We might be able to handle some cases where the callee is a +      // constantexpr bitcast of a function. +      if (!CI->getCalledFunction()) +        return false; +        WorkList.push_back(User);        continue;      } @@ -250,6 +259,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {      if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)        return false; +    if (StoreInst *SI = dyn_cast_or_null<StoreInst>(UseInst)) { +      // Reject if the stored value is not the pointer operand. +      if (SI->getPointerOperand() != Val) +        return false; +    } +      if (!User->getType()->isPointerTy())        continue; diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/lib/Target/AMDGPU/AMDGPURegisterInfo.td index 835a1464395c3..ba0490abee8ca 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.td +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -14,8 +14,7 @@  let Namespace = "AMDGPU" in {  foreach Index = 0-15 in { -  // Indices are used in a variety of ways here, so don't set a size/offset. -  def sub#Index : SubRegIndex<-1, -1>; +  def sub#Index : SubRegIndex<32, !shl(Index, 5)>;  }  def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 468563c449826..4434d9b119c63 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -71,12 +71,26 @@ void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm,    }  } +static unsigned getFixupKindNumBytes(unsigned Kind) { +  switch (Kind) { +  case FK_Data_1: +    return 1; +  case FK_Data_2: +    return 2; +  case FK_Data_4: +    return 4; +  case FK_Data_8: +    return 8; +  default: +    llvm_unreachable("Unknown fixup kind!"); +  } +} +  void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,                                    unsigned DataSize, uint64_t Value,                                    bool IsPCRel) const {    switch ((unsigned)Fixup.getKind()) { -    default: llvm_unreachable("Unknown fixup kind");      case AMDGPU::fixup_si_sopp_br: {        uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());        *Dst = (Value - 4) / 4; @@ -96,6 +110,24 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,        *Dst = Value + 4;        break;      } +    default: { +      // FIXME: Copied from AArch64 +      unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); +      if (!Value) +        return; // Doesn't change encoding. +      MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); + +      // Shift the value into position. +      Value <<= Info.TargetOffset; + +      unsigned Offset = Fixup.getOffset(); +      assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + +      // For each byte of the fragment that the fixup touches, mask in the +      // bits from the fixup value. +      for (unsigned i = 0; i != NumBytes; ++i) +        Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); +    }    }  } diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 099b0b15942b8..c2db9ff537e9f 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -157,6 +157,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,    setTruncStoreAction(MVT::i64, MVT::i32, Expand);    setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); +  setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);    setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);    setOperationAction(ISD::LOAD, MVT::i1, Custom); @@ -2252,10 +2253,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,                                                    SDValue Ptr) const {    const SIInstrInfo *TII =        static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); -  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE | -                  0xffffffff; // Size -  return buildRSRC(DAG, DL, Ptr, 0, Rsrc); +  return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());  }  SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 18910615bebea..cfd2c42d1aef5 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2778,3 +2778,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {    return RsrcDataFormat;  } + +uint64_t SIInstrInfo::getScratchRsrcWords23() const { +  uint64_t Rsrc23 = getDefaultRsrcDataFormat() | +                    AMDGPU::RSRC_TID_ENABLE | +                    0xffffffff; // Size; + +  // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. +  // Clear them unless we want a huge stride. +  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) +    Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; + +  return Rsrc23; +} diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 015ea12d4598a..5053786a39f5f 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -353,7 +353,7 @@ public:    }    uint64_t getDefaultRsrcDataFormat() const; - +  uint64_t getScratchRsrcWords23() const;  };  namespace AMDGPU { diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index f78ffd72314cc..e0eeea9034b3a 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1548,6 +1548,12 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m <  // These instructions only exist on SI and CI  let SubtargetPredicate = isSICI in { +let isCommutable = 1 in { +defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32", +  VOP_F32_F32_F32 +>; +} // End isCommutable = 1 +  defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",    VOP_F32_F32_F32, AMDGPUfmin_legacy  >; @@ -1562,12 +1568,6 @@ defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;  } // End isCommutable = 1  } // End let SubtargetPredicate = SICI -let isCommutable = 1 in { -defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32", -  VOP_F32_F32_F32 ->; -} // End isCommutable = 1 -  defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",    VOP_I32_I32_I32  >; diff --git a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp index 0a7f684552f03..2cd600df22686 100644 --- a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp +++ b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp @@ -135,8 +135,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {        unsigned ScratchRsrcReg =            RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); -      uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | -                      0xffffffff; // Size +      uint64_t Rsrc23 = TII->getScratchRsrcWords23();        unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);        unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); @@ -152,11 +151,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {                .addReg(ScratchRsrcReg, RegState::ImplicitDefine);        BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) -              .addImm(Rsrc & 0xffffffff) +              .addImm(Rsrc23 & 0xffffffff)                .addReg(ScratchRsrcReg, RegState::ImplicitDefine);        BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) -              .addImm(Rsrc >> 32) +              .addImm(Rsrc23 >> 32)                .addReg(ScratchRsrcReg, RegState::ImplicitDefine);        // Scratch Offset diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 54c4d549fac7d..e9e8412e263d0 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -26,23 +26,25 @@ using namespace llvm;  SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {} -BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { -  BitVector Reserved(getNumRegs()); -  Reserved.set(AMDGPU::EXEC); +void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { +  MCRegAliasIterator R(Reg, this, true); -  // EXEC_LO and EXEC_HI could be allocated and used as regular register, -  // but this seems likely to result in bugs, so I'm marking them as reserved. -  Reserved.set(AMDGPU::EXEC_LO); -  Reserved.set(AMDGPU::EXEC_HI); +  for (; R.isValid(); ++R) +    Reserved.set(*R); +} +BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { +  BitVector Reserved(getNumRegs());    Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); -  Reserved.set(AMDGPU::FLAT_SCR); -  Reserved.set(AMDGPU::FLAT_SCR_LO); -  Reserved.set(AMDGPU::FLAT_SCR_HI); + +  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but +  // this seems likely to result in bugs, so I'm marking them as reserved. +  reserveRegisterTuples(Reserved, AMDGPU::EXEC); +  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);    // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs -  Reserved.set(AMDGPU::VGPR255); -  Reserved.set(AMDGPU::VGPR254); +  reserveRegisterTuples(Reserved, AMDGPU::VGPR254); +  reserveRegisterTuples(Reserved, AMDGPU::VGPR255);    // Tonga and Iceland can only allocate a fixed number of SGPRs due    // to a hw bug. @@ -54,10 +56,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {      for (unsigned i = Limit; i < NumSGPRs; ++i) {        unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); -      MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); - -      for (; R.isValid(); ++R) -        Reserved.set(*R); +      reserveRegisterTuples(Reserved, Reg);      }    } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index bfdb67c5e12b7..7da6de282c11d 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -23,7 +23,10 @@  namespace llvm {  struct SIRegisterInfo : public AMDGPURegisterInfo { +private: +  void reserveRegisterTuples(BitVector &, unsigned Reg) const; +public:    SIRegisterInfo();    BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f8f0eb2d4baaa..cf6b8929f311a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -15,6 +15,7 @@  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/StringExtras.h"  #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h"  #include "llvm/ADT/Twine.h"  #include "llvm/MC/MCAsmInfo.h"  #include "llvm/MC/MCAssembler.h" @@ -9104,6 +9105,10 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {      return false;    } +  Triple T; +  STI.setDefaultFeatures(T.getARMCPUForArch(Arch)); +  setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); +    getTargetStreamer().emitArch(ID);    return false;  } diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index d9e654c76428e..9d5f1d406d0ea 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -50,6 +50,7 @@ private:    // Complex Pattern for address selection.    bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); +  bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset);  };  } @@ -67,7 +68,7 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {        Addr.getOpcode() == ISD::TargetGlobalAddress)      return false; -  // Addresses of the form FI+const or FI|const +  // Addresses of the form Addr+const or Addr|const    if (CurDAG->isBaseWithConstantOffset(Addr)) {      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));      if (isInt<32>(CN->getSExtValue())) { @@ -89,6 +90,31 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {    return true;  } +// ComplexPattern used on BPF FI instruction +bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { +  SDLoc DL(Addr); + +  if (!CurDAG->isBaseWithConstantOffset(Addr)) +    return false; + +  // Addresses of the form Addr+const or Addr|const +  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); +  if (isInt<32>(CN->getSExtValue())) { + +    // If the first operand is a FI, get the TargetFI Node +    if (FrameIndexSDNode *FIN = +            dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) +      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); +    else +      return false; + +    Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, MVT::i64); +    return true; +  } + +  return false; +} +  SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {    unsigned Opcode = Node->getOpcode(); @@ -104,13 +130,6 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {    // tablegen selection should be handled here.    switch (Opcode) {    default: break; - -  case ISD::UNDEF: { -    errs() << "BUG: "; Node->dump(CurDAG); errs() << '\n'; -    report_fatal_error("shouldn't see UNDEF during Select"); -    break; -  } -    case ISD::INTRINSIC_W_CHAIN: {      unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();      switch (IntNo) { diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index 58498a1aec7d9..73418283d9bf6 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -102,6 +102,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,    setOperationAction(ISD::BR_CC, MVT::i64, Custom);    setOperationAction(ISD::BR_JT, MVT::Other, Expand); +  setOperationAction(ISD::BRIND, MVT::Other, Expand);    setOperationAction(ISD::BRCOND, MVT::Other, Expand);    setOperationAction(ISD::SETCC, MVT::i64, Expand);    setOperationAction(ISD::SELECT, MVT::i64, Expand); @@ -128,9 +129,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,    setOperationAction(ISD::SUBC, MVT::i64, Expand);    setOperationAction(ISD::SUBE, MVT::i64, Expand); -  // no UNDEF allowed -  setOperationAction(ISD::UNDEF, MVT::i64, Expand); -    setOperationAction(ISD::ROTR, MVT::i64, Expand);    setOperationAction(ISD::ROTL, MVT::i64, Expand);    setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index 26b2cfebdc830..6b73db87fa263 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -54,7 +54,8 @@ def i64immSExt32 : PatLeaf<(imm),                  [{return isInt<32>(N->getSExtValue()); }]>;  // Addressing modes. -def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [frameindex], []>; +def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [], []>; +def FIri : ComplexPattern<i64, 2, "SelectFIAddr", [add, or], []>;  // Address operands  def MEMri : Operand<i64> { @@ -260,6 +261,15 @@ def MOV_rr : MOV_RR<"mov">;  def MOV_ri : MOV_RI<"mov">;  } +def FI_ri +    : InstBPF<(outs GPR:$dst), (ins MEMri:$addr), +               "lea\t$dst, $addr", +               [(set i64:$dst, FIri:$addr)]> { +  // This is a tentative instruction, and will be replaced +  // with MOV_rr and ADD_ri in PEI phase +} + +  def LD_pseudo      : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm),                "ld_pseudo\t$dst, $pseudo, $imm", diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp index 8f885c3ea61b8..952615bd1c2b8 100644 --- a/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/lib/Target/BPF/BPFRegisterInfo.cpp @@ -58,14 +58,13 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,    unsigned FrameReg = getFrameRegister(MF);    int FrameIndex = MI.getOperand(i).getIndex(); +  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +  MachineBasicBlock &MBB = *MI.getParent();    if (MI.getOpcode() == BPF::MOV_rr) { -    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();      int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);      MI.getOperand(i).ChangeToRegister(FrameReg, false); - -    MachineBasicBlock &MBB = *MI.getParent();      unsigned reg = MI.getOperand(i - 1).getReg();      BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg)          .addReg(reg) @@ -79,8 +78,24 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,    if (!isInt<32>(Offset))      llvm_unreachable("bug in frame offset"); -  MI.getOperand(i).ChangeToRegister(FrameReg, false); -  MI.getOperand(i + 1).ChangeToImmediate(Offset); +  if (MI.getOpcode() == BPF::FI_ri) { +    // architecture does not really support FI_ri, replace it with +    //    MOV_rr <target_reg>, frame_reg +    //    ADD_ri <target_reg>, imm +    unsigned reg = MI.getOperand(i - 1).getReg(); + +    BuildMI(MBB, ++II, DL, TII.get(BPF::MOV_rr), reg) +        .addReg(FrameReg); +    BuildMI(MBB, II, DL, TII.get(BPF::ADD_ri), reg) +        .addReg(reg) +        .addImm(Offset); + +    // Remove FI_ri instruction +    MI.eraseFromParent(); +  } else { +    MI.getOperand(i).ChangeToRegister(FrameReg, false); +    MI.getOperand(i + 1).ChangeToImmediate(Offset); +  }  }  unsigned BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 6fe8f830d35da..b3d861d34da72 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -269,6 +269,14 @@ namespace llvm {      unsigned getRegisterByName(const char* RegName, EVT VT,                                 SelectionDAG &DAG) const override; +    /// Returns true if a cast between SrcAS and DestAS is a noop. +    bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { +      // Mips doesn't have any special address spaces so we just reserve +      // the first 256 for software use (e.g. OpenCL) and treat casts +      // between them as noops. +      return SrcAS < 256 && DestAS < 256; +    } +    protected:      SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index cb46d731da290..2ebfbd17d7d05 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -115,6 +115,11 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,      if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())        continue; +    // Also, we have to check that the register class of the operand +    // contains the zero register. +    if (!MRI->getRegClass(MO.getReg())->contains(ZeroReg)) +      continue; +      MO.setReg(ZeroReg);    } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 444446692c589..8e118ec27e67c 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -947,11 +947,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {      return;    }    case PPC::ADDISdtprelHA: -    // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> -    // Into:      %Xd = ADDIS8 %X3, sym@dtprel@ha +    // Transform: %Xd = ADDISdtprelHA %Xs, <ga:@sym> +    // Into:      %Xd = ADDIS8 %Xs, sym@dtprel@ha    case PPC::ADDISdtprelHA32: { -    // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym> -    // Into:      %Rd = ADDIS %R3, sym@dtprel@ha +    // Transform: %Rd = ADDISdtprelHA32 %Rs, <ga:@sym> +    // Into:      %Rd = ADDIS %Rs, sym@dtprel@ha      const MachineOperand &MO = MI->getOperand(2);      const GlobalValue *GValue = MO.getGlobal();      MCSymbol *MOSymbol = getSymbol(GValue); @@ -962,7 +962,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {          *OutStreamer,          MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)              .addReg(MI->getOperand(0).getReg()) -            .addReg(Subtarget->isPPC64() ? PPC::X3 : PPC::R3) +            .addReg(MI->getOperand(1).getReg())              .addExpr(SymDtprel));      return;    } diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index baadf081a64c5..fd150beeb5a94 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -197,10 +197,18 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {  // Determining the address of a TLS variable results in a function call in  // certain TLS models.  static bool memAddrUsesCTR(const PPCTargetMachine *TM, -                           const llvm::Value *MemAddr) { +                           const Value *MemAddr) {    const auto *GV = dyn_cast<GlobalValue>(MemAddr); -  if (!GV) +  if (!GV) { +    // Recurse to check for constants that refer to TLS global variables. +    if (const auto *CV = dyn_cast<Constant>(MemAddr)) +      for (const auto &CO : CV->operands()) +        if (memAddrUsesCTR(TM, CO)) +          return true; +      return false; +  } +    if (!GV->isThreadLocal())      return false;    if (!TM) @@ -239,6 +247,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {          if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {            switch (F->getIntrinsicID()) {            default: continue; +          // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr +          // we're definitely using CTR. +          case Intrinsic::ppc_is_decremented_ctr_nonzero: +	  case Intrinsic::ppc_mtctr: +	    return true;  // VisualStudio defines setjmp as _setjmp  #if defined(_MSC_VER) && defined(setjmp) && \ @@ -426,6 +439,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {    // Process nested loops first.    for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {      MadeChange |= convertToCTRLoop(*I); +    DEBUG(dbgs() << "Nested loop converted\n");    }    // If a nested loop has been converted, then we can't convert this loop. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b6025bf66ef70..932226842bb7f 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2570,13 +2570,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {        return nullptr;      }      // ISD::OR doesn't get all the bitfield insertion fun. -    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert +    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a +    // bitfield insert.      if (isInt32Immediate(N->getOperand(1), Imm) &&          N->getOperand(0).getOpcode() == ISD::OR &&          isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { +      // The idea here is to check whether this is equivalent to: +      //   (c1 & m) | (x & ~m) +      // where m is a run-of-ones mask. The logic here is that, for each bit in +      // c1 and c2: +      //  - if both are 1, then the output will be 1. +      //  - if both are 0, then the output will be 0. +      //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will +      //    come from x. +      //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will +      //    be 0. +      //  If that last condition is never the case, then we can form m from the +      //  bits that are the same between c1 and c2.        unsigned MB, ME; -      Imm = ~(Imm^Imm2); -      if (isRunOfOnes(Imm, MB, ME)) { +      if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {          SDValue Ops[] = { N->getOperand(0).getOperand(0),                              N->getOperand(0).getOperand(1),                              getI32Imm(0, dl), getI32Imm(MB, dl), @@ -2787,6 +2799,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {          SDValue Base, Offset;          if (LD->isUnindexed() && +            (LD->getMemoryVT() == MVT::f64 || +             LD->getMemoryVT() == MVT::i64) &&              SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {            SDValue Chain = LD->getChain();            SDValue Ops[] = { Base, Offset, Chain }; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1e28913d1fca8..1b8f8fb2f45b1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -431,6 +431,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,        AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);        setOperationAction(ISD::SELECT, VT, Promote);        AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); +      setOperationAction(ISD::SELECT_CC, VT, Promote); +      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);        setOperationAction(ISD::STORE, VT, Promote);        AddPromotedToType (ISD::STORE, VT, MVT::v4i32); @@ -7175,7 +7177,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,          PPC::isSplatShuffleMask(SVOp, 4) ||          PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||          PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || -        PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||          PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||          PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||          PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || @@ -7183,8 +7184,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,          PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||          PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||          PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || -        PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG)   || -        PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) { +        (Subtarget.hasP8Altivec() && ( +         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || +         PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || +         PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {        return Op;      }    } @@ -7195,7 +7198,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,    unsigned int ShuffleKind = isLittleEndian ? 2 : 0;    if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||        PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || -      PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||        PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||        PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||        PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || @@ -7203,8 +7205,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,        PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||        PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||        PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || -      PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG)             || -      PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)) +      (Subtarget.hasP8Altivec() && ( +       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || +       PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || +       PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))      return Op;    // Check to see if this is a shuffle of 4-byte values.  If so, we can use our diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index bf6e40296405d..d4e666cc1f3e7 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -309,6 +309,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {    unsigned MB = MI->getOperand(4).getImm();    unsigned ME = MI->getOperand(5).getImm(); +  // We can't commute a trivial mask (there is no way to represent an all-zero +  // mask). +  if (MB == 0 && ME == 31) +    return nullptr; +    if (NewMI) {      // Create a new instruction.      unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index b50124db1ea19..24fd9bd5c1f72 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2835,24 +2835,84 @@ def : Pat<(i64 (anyext i1:$in)),            (SELECT_I8 $in, (LI8 1), (LI8 0))>;  // match setcc on i1 variables. +// CRANDC is: +//   1 1 : F +//   1 0 : T +//   0 1 : F +//   0 0 : F +// +// LT is: +//  -1 -1  : F +//  -1  0  : T +//   0 -1  : F +//   0  0  : F +// +// ULT is: +//   1 1 : F +//   1 0 : F +//   0 1 : T +//   0 0 : F  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)), -          (CRANDC $s2, $s1)>; +          (CRANDC $s1, $s2)>;  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)),            (CRANDC $s2, $s1)>; +// CRORC is: +//   1 1 : T +//   1 0 : T +//   0 1 : F +//   0 0 : T +// +// LE is: +//  -1 -1 : T +//  -1  0 : T +//   0 -1 : F +//   0  0 : T +// +// ULE is: +//   1 1 : T +//   1 0 : F +//   0 1 : T +//   0 0 : T  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)), -          (CRORC $s2, $s1)>; +          (CRORC $s1, $s2)>;  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)),            (CRORC $s2, $s1)>; +  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)),            (CREQV $s1, $s2)>; + +// GE is: +//  -1 -1 : T +//  -1  0 : F +//   0 -1 : T +//   0  0 : T +// +// UGE is: +//   1 1 : T +//   1 0 : T +//   0 1 : F +//   0 0 : T  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)), -          (CRORC $s1, $s2)>; +          (CRORC $s2, $s1)>;  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)),            (CRORC $s1, $s2)>; + +// GT is: +//  -1 -1 : F +//  -1  0 : F +//   0 -1 : T +//   0  0 : F +// +// UGT is: +//  1 1 : F +//  1 0 : T +//  0 1 : F +//  0 0 : F  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)), -          (CRANDC $s1, $s2)>; +          (CRANDC $s2, $s1)>;  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)),            (CRANDC $s1, $s2)>; +  def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)),            (CRXOR $s1, $s2)>; @@ -3203,18 +3263,30 @@ def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)),  //   select (lhs == rhs), tval, fval is:  //   ((lhs == rhs) & tval) | (!(lhs == rhs) & fval)  def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)), +           (CROR (CRAND (CRANDC $lhs, $rhs), $tval), +                 (CRAND (CRORC  $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULT)),             (CROR (CRAND (CRANDC $rhs, $lhs), $tval),                   (CRAND (CRORC  $lhs, $rhs), $fval))>;  def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)), +           (CROR (CRAND (CRORC  $lhs, $rhs), $tval), +                 (CRAND (CRANDC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULE)),             (CROR (CRAND (CRORC  $rhs, $lhs), $tval),                   (CRAND (CRANDC $lhs, $rhs), $fval))>;  def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)),             (CROR (CRAND (CREQV $lhs, $rhs), $tval),                   (CRAND (CRXOR $lhs, $rhs), $fval))>;  def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)), +           (CROR (CRAND (CRORC  $rhs, $lhs), $tval), +                 (CRAND (CRANDC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGE)),             (CROR (CRAND (CRORC  $lhs, $rhs), $tval),                   (CRAND (CRANDC $rhs, $lhs), $fval))>;  def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)), +           (CROR (CRAND (CRANDC $rhs, $lhs), $tval), +                 (CRAND (CRORC  $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGT)),             (CROR (CRAND (CRANDC $lhs, $rhs), $tval),                   (CRAND (CRORC  $rhs, $lhs), $fval))>;  def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), @@ -3223,66 +3295,106 @@ def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)),  // match selectcc on i1 variables with non-i1 output.  def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)), +          (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULT)),            (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)), +          (SELECT_I4 (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULE)),            (SELECT_I4 (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)),            (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)), +          (SELECT_I4 (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGE)),            (SELECT_I4 (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)), +          (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGT)),            (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)),            (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>;  def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)), +          (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULT)),            (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)), +          (SELECT_I8 (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULE)),            (SELECT_I8 (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)),            (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)), +          (SELECT_I8 (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGE)),            (SELECT_I8 (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)), +          (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGT)),            (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)),            (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>;  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), +          (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),            (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), +          (SELECT_F4 (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),            (SELECT_F4 (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),            (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), +          (SELECT_F4 (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),            (SELECT_F4 (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), +          (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),            (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),            (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), +          (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),            (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), +          (SELECT_F8 (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),            (SELECT_F8 (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),            (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), +          (SELECT_F8 (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),            (SELECT_F8 (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), +          (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),            (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),            (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)), +          (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULT)),            (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)), +          (SELECT_VRRC (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULE)),            (SELECT_VRRC (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)),            (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)), +          (SELECT_VRRC (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGE)),            (SELECT_VRRC (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)), +          (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),            (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),            (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td index 5c66b42690c3f..0a044c5c6ea44 100644 --- a/lib/Target/PowerPC/PPCInstrQPX.td +++ b/lib/Target/PowerPC/PPCInstrQPX.td @@ -1115,40 +1115,64 @@ def : Pat<(v4f64 (PPCqbflt v4i1:$src)),            (COPY_TO_REGCLASS $src, QFRC)>;  def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)), +          (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)),            (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)), +          (SELECT_QFRC (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)),            (SELECT_QFRC (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),            (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)), +          (SELECT_QFRC (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)),            (SELECT_QFRC (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)), +          (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)),            (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),            (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)), +          (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)),            (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)), +          (SELECT_QSRC (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)),            (SELECT_QSRC (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),            (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)), +          (SELECT_QSRC (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)),            (SELECT_QSRC (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)), +          (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)),            (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),            (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)), +          (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)),            (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)), +          (SELECT_QBRC (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)),            (SELECT_QBRC (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),            (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)), +          (SELECT_QBRC (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)),            (SELECT_QBRC (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)), +          (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)),            (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),            (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>; diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 20c95fe888e00..ce63c22992e82 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -958,27 +958,43 @@ def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;  // Selects.  def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), +          (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),            (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), +          (SELECT_VSRC (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),            (SELECT_VSRC (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),            (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), +          (SELECT_VSRC (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),            (SELECT_VSRC (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), +          (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),            (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),            (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), +          (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),            (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), +          (SELECT_VSFRC (CRORC  $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),            (SELECT_VSFRC (CRORC  $rhs, $lhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),            (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), +          (SELECT_VSFRC (CRORC  $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),            (SELECT_VSFRC (CRORC  $lhs, $rhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), +          (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),            (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;  def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),            (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; @@ -1060,18 +1076,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.              (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;    def : Pat<(f64 (fextend f32:$src)),              (COPY_TO_REGCLASS $src, VSFRC)>; +    def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), +            (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),              (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;    def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), +            (SELECT_VSSRC (CRORC  $lhs, $rhs), $tval, $fval)>; +  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),              (SELECT_VSSRC (CRORC  $rhs, $lhs), $tval, $fval)>;    def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),              (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;    def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), +            (SELECT_VSSRC (CRORC  $rhs, $lhs), $tval, $fval)>; +  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),              (SELECT_VSSRC (CRORC  $lhs, $rhs), $tval, $fval)>;    def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), +            (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),              (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;    def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), -          (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; +            (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;    // VSX Elementary Scalar FP arithmetic (SP)    let isCommutable = 1 in { diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 58d3c3d3fa2ef..46b8d13e47b91 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -103,6 +103,11 @@ protected:          VNInfo *AddendValNo =            LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); +        if (!AddendValNo) { +          // This can be null if the register is undef. +          continue; +        } +          MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);          // The addend and this instruction must be in the same block. @@ -181,11 +186,14 @@ protected:          if (!KilledProdOp)            continue; -        // For virtual registers, verify that the addend source register -        // is live here (as should have been assured above). -        assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) || -                LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) && -               "Addend source register is not live!"); +	// If the addend copy is used only by this MI, then the addend source +	// register is likely not live here. This could be fixed (based on the +	// legality checks above, the live range for the addend source register +	// could be extended), but it seems likely that such a trivial copy can +	// be coalesced away later, and thus is not worth the effort. +	if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && +            !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) +          continue;          // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 3fb1dcc3d4aff..d7132d5272d86 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -240,6 +240,9 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {    for (MachineBasicBlock &MBB : *MF) {      for (MachineInstr &MI : MBB) { +      if (MI.isDebugValue()) +        continue; +        bool RelevantInstr = false;        bool Partial = false; diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 4a33f7fc34676..1c4e486da4188 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -77,7 +77,7 @@ class SparcAsmParser : public MCTargetAsmParser {    bool parseDirectiveWord(unsigned Size, SMLoc L);    bool is64Bit() const { -    return STI.getTargetTriple().getArchName().startswith("sparcv9"); +    return STI.getTargetTriple().getArch() == Triple::sparcv9;    }    void expandSET(MCInst &Inst, SMLoc IDLoc, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 71ccb1ab1e554..0f29b514146c7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13573,6 +13573,35 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,                         DAG.getConstant(SSECC, dl, MVT::i8));    } +  MVT VTOp0 = Op0.getSimpleValueType(); +  assert(VTOp0 == Op1.getSimpleValueType() && +         "Expected operands with same type!"); +  assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && +         "Invalid number of packed elements for source and destination!"); + +  if (VT.is128BitVector() && VTOp0.is256BitVector()) { +    // On non-AVX512 targets, a vector of MVT::i1 is promoted by the type +    // legalizer to a wider vector type.  In the case of 'vsetcc' nodes, the +    // legalizer firstly checks if the first operand in input to the setcc has +    // a legal type. If so, then it promotes the return type to that same type. +    // Otherwise, the return type is promoted to the 'next legal type' which, +    // for a vector of MVT::i1 is always a 128-bit integer vector type. +    // +    // We reach this code only if the following two conditions are met: +    // 1. Both return type and operand type have been promoted to wider types +    //    by the type legalizer. +    // 2. The original operand type has been promoted to a 256-bit vector. +    // +    // Note that condition 2. only applies for AVX targets. +    SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode); +    return DAG.getZExtOrTrunc(NewOp, dl, VT); +  } + +  // The non-AVX512 code below works under the assumption that source and +  // destination types are the same. +  assert((Subtarget->hasAVX512() || (VT == VTOp0)) && +         "Value types for source and destination must be the same!"); +    // Break 256-bit integer vector compare into smaller ones.    if (VT.is256BitVector() && !Subtarget->hasInt256())      return Lower256IntVSETCC(Op, DAG); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 88e5e479136f8..909baae92548a 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -228,7 +228,7 @@ void PassManagerBuilder::populateModulePassManager(    // Start of function pass.    // Break up aggregate allocas, using SSAUpdater.    if (UseNewSROA) -    MPM.add(createSROAPass(/*RequiresDomTree*/ false)); +    MPM.add(createSROAPass());    else      MPM.add(createScalarReplAggregatesPass(-1, false));    MPM.add(createEarlyCSEPass());              // Catch trivial redundancies diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index d1eba6e70e570..89a0d0af93be4 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1761,7 +1761,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {      if (isa<PHINode>(V))        V->takeName(LI);      if (Instruction *I = dyn_cast<Instruction>(V)) -      I->setDebugLoc(LI->getDebugLoc()); +      if (LI->getDebugLoc()) +        I->setDebugLoc(LI->getDebugLoc());      if (V->getType()->getScalarType()->isPointerTy())        MD->invalidateCachedPointerInfo(V);      markInstructionForDeletion(LI); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 50ca6234d0b7a..ba8af47b54e1f 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -869,6 +869,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {        PN->replaceAllUsesWith(*Inserted.first);        PN->eraseFromParent();        Changed = true; + +      // The RAUW can change PHIs that we already visited. Start over from the +      // beginning. +      PHISet.clear(); +      I = BB->begin();      }    }  | 
