diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2016-11-25 19:05:59 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2016-11-25 19:05:59 +0000 | 
| commit | 6449741f4c1842221757c062f4abbae7bb524ba9 (patch) | |
| tree | 5a2ca31d10f5ca2e8fb9c1ade59c306526de8329 /lib | |
| parent | 60a9e02f5509f102642299ee408fab21b2ee30e4 (diff) | |
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 32 | ||||
| -rw-r--r-- | lib/CodeGen/BranchFolding.cpp | 19 | ||||
| -rw-r--r-- | lib/Linker/IRMover.cpp | 16 | ||||
| -rw-r--r-- | lib/Support/Unix/Signals.inc | 2 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 4 | ||||
| -rw-r--r-- | lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 3 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 127 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 9 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCInstr64Bit.td | 12 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 36 | ||||
| -rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
| -rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 2 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 4 | 
13 files changed, 261 insertions, 19 deletions
| diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 0d774cf08e2f..5214eb7c051c 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -148,6 +148,19 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,    return OrigSCEV;  } +/// Calculate Start and End points of memory access. +/// Let's assume A is the first access and B is a memory access on N-th loop +/// iteration. Then B is calculated as:   +///   B = A + Step*N .  +/// Step value may be positive or negative. +/// N is a calculated back-edge taken count: +///     N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 +/// Start and End points are calculated in the following way: +/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, +/// where SizeOfElt is the size of single memory access in bytes. +/// +/// There is no conflict when the intervals are disjoint: +/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)  void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,                                      unsigned DepSetId, unsigned ASId,                                      const ValueToValueMap &Strides, @@ -176,12 +189,17 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,        if (CStep->getValue()->isNegative())          std::swap(ScStart, ScEnd);      } else { -      // Fallback case: the step is not constant, but the we can still +      // Fallback case: the step is not constant, but we can still        // get the upper and lower bounds of the interval by using min/max        // expressions.        ScStart = SE->getUMinExpr(ScStart, ScEnd);        ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);      } +    // Add the size of the pointed element to ScEnd. +    unsigned EltSize = +      Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8; +    const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize); +    ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);    }    Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); @@ -1863,9 +1881,17 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(      Value *End0 =   ChkBuilder.CreateBitCast(A.End,   PtrArithTy1, "bc");      Value *End1 =   ChkBuilder.CreateBitCast(B.End,   PtrArithTy0, "bc"); -    Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); +    // [A|B].Start points to the first accessed byte under base [A|B]. +    // [A|B].End points to the last accessed byte, plus one. +    // There is no conflict when the intervals are disjoint: +    // NoConflict = (B.Start >= A.End) || (A.Start >= B.End) +    // +    // bound0 = (B.Start < A.End) +    // bound1 = (A.Start < B.End) +    //  IsConflict = bound0 & bound1 +    Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");      FirstInst = getFirstInst(FirstInst, Cmp0, Loc); -    Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); +    Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");      FirstInst = getFirstInst(FirstInst, Cmp1, Loc);      Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");      FirstInst = getFirstInst(FirstInst, IsConflict, Loc); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 23e2aa70d0c7..5dacbf9e6b02 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -776,9 +776,8 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,  }  static void -mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, -                              MachineBasicBlock &MBBCommon) { -  // Merge MMOs from memory operations in the common block. +mergeOperations(MachineBasicBlock::iterator MBBIStartPos, +                MachineBasicBlock &MBBCommon) {    MachineBasicBlock *MBB = MBBIStartPos->getParent();    // Note CommonTailLen does not necessarily matches the size of    // the common BB nor all its instructions because of debug @@ -808,8 +807,18 @@ mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,             "Reached BB end within common tail length!");      assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!"); +    // Merge MMOs from memory operations in the common block.      if (MBBICommon->mayLoad() || MBBICommon->mayStore())        MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI)); +    // Drop undef flags if they aren't present in all merged instructions. +    for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) { +      MachineOperand &MO = MBBICommon->getOperand(I); +      if (MO.isReg() && MO.isUndef()) { +        const MachineOperand &OtherMO = MBBI->getOperand(I); +        if (!OtherMO.isUndef()) +          MO.setIsUndef(false); +      } +    }      ++MBBI;      ++MBBICommon; @@ -928,8 +937,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,          continue;        DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()                     << (i == e-1 ? "" : ", ")); -      // Merge MMOs from memory operations as needed. -      mergeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); +      // Merge operations (MMOs, undef flags) +      mergeOperations(SameTails[i].getTailStartPos(), *MBB);        // Hack the end off BB i, making it jump to BB commonTailIndex instead.        ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);        // BB i is no longer a predecessor of SuccBB; remove it from the worklist. diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index 4935868c00f4..09c67bc47863 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -694,6 +694,14 @@ void IRLinker::computeTypeMapping() {      if (!ST->hasName())        continue; +    if (TypeMap.DstStructTypesSet.hasType(ST)) { +      // This is actually a type from the destination module. +      // getIdentifiedStructTypes() can have found it by walking debug info +      // metadata nodes, some of which get linked by name when ODR Type Uniquing +      // is enabled on the Context, from the source to the destination module. +      continue; +    } +      // Check to see if there is a dot in the name followed by a digit.      size_t DotPos = ST->getName().rfind('.');      if (DotPos == 0 || DotPos == StringRef::npos || @@ -1336,13 +1344,19 @@ bool IRMover::IdentifiedStructTypeSet::hasType(StructType *Ty) {  IRMover::IRMover(Module &M) : Composite(M) {    TypeFinder StructTypes; -  StructTypes.run(M, true); +  StructTypes.run(M, /* OnlyNamed */ false);    for (StructType *Ty : StructTypes) {      if (Ty->isOpaque())        IdentifiedStructTypes.addOpaque(Ty);      else        IdentifiedStructTypes.addNonOpaque(Ty);    } +  // Self-map metadatas in the destination module. This is needed when +  // DebugTypeODRUniquing is enabled on the LLVMContext, since metadata in the +  // destination module may be reached from the source module. +  for (auto *MD : StructTypes.getVisitedMetadata()) { +    SharedMDs[MD].reset(const_cast<MDNode *>(MD)); +  }  }  Error IRMover::move( diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 117d4e8bcb52..55fd76d375a2 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -412,7 +412,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {    if (printSymbolizedStackTrace(Argv0, StackTrace, depth, OS))      return; -#if HAVE_DLFCN_H && __GNUG__ +#if HAVE_DLFCN_H && __GNUG__ && !defined(__CYGWIN__)    int width = 0;    for (int i = 0; i < depth; ++i) {      Dl_info dlinfo; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index fe699b284882..db8b9fb923bf 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4819,6 +4819,10 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm",  def t2LDRConstPool    : t2AsmPseudo<"ldr${p} $Rt, $immediate",                  (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; +// Version w/ the .w suffix. +def : t2InstAlias<"ldr${p}.w $Rt, $immediate", +                  (t2LDRConstPool GPRnopc:$Rt, +                  const_pool_asm_imm:$immediate, pred:$p)>;  // PLD/PLDW/PLI with alternate literal form.  def : t2InstAlias<"pld${p} $addr", diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 7d49302f9a96..f5de8a3cd25e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -6933,6 +6933,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,      else if (Inst.getOpcode() == ARM::t2LDRConstPool)        TmpInst.setOpcode(ARM::t2LDRpci);      const ARMOperand &PoolOperand = +      (static_cast<ARMOperand &>(*Operands[2]).isToken() && +       static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w") ? +      static_cast<ARMOperand &>(*Operands[4]) :        static_cast<ARMOperand &>(*Operands[3]);      const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm();      // If SubExprVal is a constant we may be able to use a MOV diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3d06de804200..6dd73174565a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -665,9 +665,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,        addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);        addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);      } +      if (Subtarget.hasP9Vector()) { -      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); -      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); +      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); +      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);      }    } @@ -7846,6 +7847,17 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,    return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());  } +SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, +                                                  SelectionDAG &DAG) const { +  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && +         "Should only be called for ISD::INSERT_VECTOR_ELT"); +  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); +  // We have legal lowering for constant indices but not for variable ones. +  if (C) +    return Op; +  return SDValue(); +} +  SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,                                                     SelectionDAG &DAG) const {    SDLoc dl(Op); @@ -8248,6 +8260,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {    case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);    case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);    case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); +  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);    case ISD::MUL:                return LowerMUL(Op, DAG);    // For counter-based loop handling. @@ -8372,7 +8385,9 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,  MachineBasicBlock *  PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,                                      unsigned AtomicSize, -                                    unsigned BinOpcode) const { +                                    unsigned BinOpcode, +                                    unsigned CmpOpcode, +                                    unsigned CmpPred) const {    // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.    const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -8412,8 +8427,12 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,    DebugLoc dl = MI.getDebugLoc();    MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); +  MachineBasicBlock *loop2MBB = +    CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);    F->insert(It, loopMBB); +  if (CmpOpcode) +    F->insert(It, loop2MBB);    F->insert(It, exitMBB);    exitMBB->splice(exitMBB->begin(), BB,                    std::next(MachineBasicBlock::iterator(MI)), BB->end()); @@ -8435,11 +8454,40 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,    //   st[wd]cx. r0, ptr    //   bne- loopMBB    //   fallthrough --> exitMBB + +  // For max/min... +  //  loopMBB: +  //   l[wd]arx dest, ptr +  //   cmpl?[wd] incr, dest +  //   bgt exitMBB +  //  loop2MBB: +  //   st[wd]cx. dest, ptr +  //   bne- loopMBB +  //   fallthrough --> exitMBB +    BB = loopMBB;    BuildMI(BB, dl, TII->get(LoadMnemonic), dest)      .addReg(ptrA).addReg(ptrB);    if (BinOpcode)      BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); +  if (CmpOpcode) { +    // Signed comparisons of byte or halfword values must be sign-extended. +    if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { +      unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass); +      BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), +              ExtReg).addReg(dest); +      BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) +        .addReg(incr).addReg(ExtReg); +    } else +      BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) +        .addReg(incr).addReg(dest); + +    BuildMI(BB, dl, TII->get(PPC::BCC)) +      .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); +    BB->addSuccessor(loop2MBB); +    BB->addSuccessor(exitMBB); +    BB = loop2MBB; +  }    BuildMI(BB, dl, TII->get(StoreMnemonic))      .addReg(TmpReg).addReg(ptrA).addReg(ptrB);    BuildMI(BB, dl, TII->get(PPC::BCC)) @@ -8457,10 +8505,13 @@ MachineBasicBlock *  PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,                                              MachineBasicBlock *BB,                                              bool is8bit, // operation -                                            unsigned BinOpcode) const { +                                            unsigned BinOpcode, +                                            unsigned CmpOpcode, +                                            unsigned CmpPred) const {    // If we support part-word atomic mnemonics, just use them    if (Subtarget.hasPartwordAtomics()) -    return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); +    return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, +                            CmpOpcode, CmpPred);    // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.    const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -8482,8 +8533,12 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,    DebugLoc dl = MI.getDebugLoc();    MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); +  MachineBasicBlock *loop2MBB = +    CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);    F->insert(It, loopMBB); +  if (CmpOpcode) +    F->insert(It, loop2MBB);    F->insert(It, exitMBB);    exitMBB->splice(exitMBB->begin(), BB,                    std::next(MachineBasicBlock::iterator(MI)), BB->end()); @@ -8568,6 +8623,32 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,      .addReg(TmpDestReg).addReg(MaskReg);    BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)      .addReg(TmpReg).addReg(MaskReg); +  if (CmpOpcode) { +    // For unsigned comparisons, we can directly compare the shifted values. +    // For signed comparisons we shift and sign extend. +    unsigned SReg = RegInfo.createVirtualRegister(RC); +    BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) +      .addReg(TmpDestReg).addReg(MaskReg); +    unsigned ValueReg = SReg; +    unsigned CmpReg = Incr2Reg; +    if (CmpOpcode == PPC::CMPW) { +      ValueReg = RegInfo.createVirtualRegister(RC); +      BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) +        .addReg(SReg).addReg(ShiftReg); +      unsigned ValueSReg = RegInfo.createVirtualRegister(RC); +      BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) +        .addReg(ValueReg); +      ValueReg = ValueSReg; +      CmpReg = incr; +    } +    BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) +      .addReg(CmpReg).addReg(ValueReg); +    BuildMI(BB, dl, TII->get(PPC::BCC)) +      .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); +    BB->addSuccessor(loop2MBB); +    BB->addSuccessor(exitMBB); +    BB = loop2MBB; +  }    BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)      .addReg(Tmp3Reg).addReg(Tmp2Reg);    BuildMI(BB, dl, TII->get(PPC::STWCX)) @@ -9074,6 +9155,42 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,    else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)      BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) +    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) +    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) +    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) +    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); + +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) +    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) +    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) +    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) +    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); + +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) +    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) +    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) +    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) +    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); + +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) +    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) +    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) +    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); +  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) +    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); +    else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)      BB = EmitPartwordAtomicBinary(MI, BB, true, 0);    else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index e3be8074e62e..e60504507d32 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -585,11 +585,15 @@ namespace llvm {      MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI,                                          MachineBasicBlock *MBB,                                          unsigned AtomicSize, -                                        unsigned BinOpcode) const; +                                        unsigned BinOpcode, +                                        unsigned CmpOpcode = 0, +                                        unsigned CmpPred = 0) const;      MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI,                                                  MachineBasicBlock *MBB,                                                  bool is8bit, -                                                unsigned Opcode) const; +                                                unsigned Opcode, +                                                unsigned CmpOpcode = 0, +                                                unsigned CmpPred = 0) const;      MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,                                          MachineBasicBlock *MBB) const; @@ -824,6 +828,7 @@ namespace llvm {      SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; +    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index e7eb8a16180a..5e514c8e8cf6 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -224,6 +224,18 @@ let usesCustomInserter = 1 in {      def ATOMIC_LOAD_NAND_I64 : Pseudo<        (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",        [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; +    def ATOMIC_LOAD_MIN_I64 : Pseudo< +      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64", +      [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>; +    def ATOMIC_LOAD_MAX_I64 : Pseudo< +      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64", +      [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>; +    def ATOMIC_LOAD_UMIN_I64 : Pseudo< +      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64", +      [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>; +    def ATOMIC_LOAD_UMAX_I64 : Pseudo< +      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64", +      [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;      def ATOMIC_CMP_SWAP_I64 : Pseudo<        (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64", diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 4a42a947c6cb..a40d4e1a4a69 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1509,6 +1509,18 @@ let usesCustomInserter = 1 in {      def ATOMIC_LOAD_NAND_I8 : Pseudo<        (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",        [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_MIN_I8 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8", +      [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_MAX_I8 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8", +      [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_UMIN_I8 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8", +      [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_UMAX_I8 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8", +      [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;      def ATOMIC_LOAD_ADD_I16 : Pseudo<        (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",        [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; @@ -1527,6 +1539,18 @@ let usesCustomInserter = 1 in {      def ATOMIC_LOAD_NAND_I16 : Pseudo<        (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",        [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_MIN_I16 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16", +      [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_MAX_I16 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16", +      [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_UMIN_I16 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16", +      [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_UMAX_I16 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16", +      [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;      def ATOMIC_LOAD_ADD_I32 : Pseudo<        (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",        [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; @@ -1545,6 +1569,18 @@ let usesCustomInserter = 1 in {      def ATOMIC_LOAD_NAND_I32 : Pseudo<        (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",        [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_MIN_I32 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32", +      [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_MAX_I32 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32", +      [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_UMIN_I32 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32", +      [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>; +    def ATOMIC_LOAD_UMAX_I32 : Pseudo< +      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32", +      [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;      def ATOMIC_CMP_SWAP_I8 : Pseudo<        (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8", diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2c548384f1cb..ca2053350138 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8656,6 +8656,17 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,      V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,                      DAG.getMachineFunction().getMachineMemOperand(                          Ld->getMemOperand(), Offset, SVT.getStoreSize())); + +    // Make sure the newly-created LOAD is in the same position as Ld in +    // terms of dependency. We create a TokenFactor for Ld and V, +    // and update uses of Ld's output chain to use the TokenFactor. +    if (Ld->hasAnyUseOfValue(1)) { +      SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, +                                     SDValue(Ld, 1), SDValue(V.getNode(), 1)); +      DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain); +      DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1), +                             SDValue(V.getNode(), 1)); +    }    } else if (!BroadcastFromReg) {      // We can't broadcast from a vector register.      return SDValue(); @@ -27516,7 +27527,8 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,                                 const X86Subtarget &Subtarget) {    // pmulld is supported since SSE41. It is better to use pmulld    // instead of pmullw+pmulhw. -  if (Subtarget.hasSSE41()) +  // pmullw/pmulhw are not supported by SSE. +  if (Subtarget.hasSSE41() || !Subtarget.hasSSE2())      return SDValue();    ShrinkMode Mode; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index de4129f86541..803a7e35c209 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2124,7 +2124,7 @@ let Predicates = [HasAVX512] in {              (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;    def : Pat<(i1 (trunc (i8 GR8:$src))), -            (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri8 $src, (i8 1)), +            (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri $src, (i8 1)),                                      sub_8bit)), VK1)>;    def : Pat<(i1 (trunc (i8 (assertzext_i1 GR8:$src)))), diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index d1769fc3ebb3..55ffc23e1308 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1322,6 +1322,10 @@ bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {    if (!isa<PHINode>(BB->front()))      return false; +  // If this BB is a landing pad, we won't be able to split the edge into it. +  if (BB->isEHPad()) +    return false; +    // If we have a xor as the branch input to this block, and we know that the    // LHS or RHS of the xor in any predecessor is true/false, then we can clone    // the condition into the predecessor and fix that value to true, saving some | 
