diff options
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 19 | ||||
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 38 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 44 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 55 |
5 files changed, 113 insertions, 53 deletions
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 005f2d51e4036..9a7f45bde6c99 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -388,6 +388,10 @@ static unsigned isMatchingStore(MachineInstr &LoadInst, } static unsigned getPreIndexedOpcode(unsigned Opc) { + // FIXME: We don't currently support creating pre-indexed loads/stores when + // the load or store is the unscaled version. If we decide to perform such an + // optimization in the future the cases for the unscaled loads/stores will + // need to be added here. switch (Opc) { default: llvm_unreachable("Opcode has no pre-indexed equivalent!"); @@ -451,32 +455,42 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { default: llvm_unreachable("Opcode has no post-indexed wise equivalent!"); case AArch64::STRSui: + case AArch64::STURSi: return AArch64::STRSpost; case AArch64::STRDui: + case AArch64::STURDi: return AArch64::STRDpost; case AArch64::STRQui: + case AArch64::STURQi: return AArch64::STRQpost; case AArch64::STRBBui: return AArch64::STRBBpost; case AArch64::STRHHui: return AArch64::STRHHpost; case AArch64::STRWui: + case AArch64::STURWi: return AArch64::STRWpost; case AArch64::STRXui: + case AArch64::STURXi: return AArch64::STRXpost; case AArch64::LDRSui: + case AArch64::LDURSi: return AArch64::LDRSpost; case AArch64::LDRDui: + case AArch64::LDURDi: return AArch64::LDRDpost; case AArch64::LDRQui: + case AArch64::LDURQi: return AArch64::LDRQpost; case AArch64::LDRBBui: return AArch64::LDRBBpost; case AArch64::LDRHHui: return AArch64::LDRHHpost; case AArch64::LDRWui: + case AArch64::LDURWi: return AArch64::LDRWpost; case AArch64::LDRXui: + case AArch64::LDURXi: return AArch64::LDRXpost; case AArch64::LDRSWui: return AArch64::LDRSWpost; @@ -1694,8 +1708,9 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++NumPostFolded; break; } - // Don't know how to handle pre/post-index versions, so move to the next - // instruction. + + // Don't know how to handle unscaled pre/post-index versions below, so + // move to the next instruction. if (TII->isUnscaledLdSt(Opc)) { ++MBBI; break; diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index ec49f0d37af44..46d8f0dba6914 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -769,8 +769,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); const MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - bool StatusDead = MI.getOperand(1).isDead(); + unsigned TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); @@ -797,23 +796,9 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, } // .Lloadcmp: - // mov wStatus, #0 // ldrex rDest, [rAddr] // cmp rDest, rDesired // bne .Ldone - if (!StatusDead) { - if (IsThumb) { - BuildMI(LoadCmpBB, DL, TII->get(ARM::tMOVi8), StatusReg) - .addDef(ARM::CPSR, RegState::Dead) - .addImm(0) - .add(predOps(ARMCC::AL)); - } else { - BuildMI(LoadCmpBB, DL, TII->get(ARM::MOVi), StatusReg) - .addImm(0) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); - } - } MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); @@ -836,10 +821,10 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strex rStatus, rNew, [rAddr] - // cmp rStatus, #0 + // strex rTempReg, rNew, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp - MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg) + MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg) .addReg(NewReg) .addReg(AddrReg); if (StrexOp == ARM::t2STREX) @@ -848,7 +833,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, getKillRegState(StatusDead)) + .addReg(TempReg, RegState::Kill) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) @@ -904,8 +889,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - bool StatusDead = MI.getOperand(1).isDead(); + unsigned TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); @@ -931,7 +915,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, // .Lloadcmp: // ldrexd rDestLo, rDestHi, [rAddr] // cmp rDestLo, rDesiredLo - // sbcs rStatus<dead>, rDestHi, rDesiredHi + // sbcs rTempReg<dead>, rDestHi, rDesiredHi // bne .Ldone unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; @@ -959,17 +943,17 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strexd rStatus, rNewLo, rNewHi, [rAddr] - // cmp rStatus, #0 + // strexd rTempReg, rNewLo, rNewHi, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; - MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); + MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, getKillRegState(StatusDead)) + .addReg(TempReg, RegState::Kill) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index d06b7d0896f16..7206083a70791 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -6053,21 +6053,21 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), // significantly more naive than the standard expansion: we conservatively // assume seq_cst, strong cmpxchg and omit clrex on failure. -let Constraints = "@earlyclobber $Rd,@earlyclobber $status", +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", mayLoad = 1, mayStore = 1 in { -def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), +def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp), (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7563bffd8f873..1e73122cdc388 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -419,6 +419,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); } + + // Custom action for SELECT MMX and expand action for SELECT_CC MMX + setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); + setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); + setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to @@ -1383,7 +1388,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // (result) is 256-bit but the source is 512-bit wide. // 128-bit was made Custom under AVX1. for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, - MVT::v8f32, MVT::v4f64 }) + MVT::v8f32, MVT::v4f64, MVT::v1i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1, MVT::v32i1, MVT::v64i1 }) @@ -14570,6 +14575,21 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); MVT ResVT = Op.getSimpleValueType(); + // When v1i1 is legal a scalarization of a vselect with a vXi1 Cond + // would result with: v1i1 = extract_subvector(vXi1, idx). + // Lower these into extract_vector_elt which is already selectable. + if (ResVT == MVT::v1i1) { + assert(Subtarget.hasAVX512() && + "Boolean EXTRACT_SUBVECTOR requires AVX512"); + + MVT EltVT = ResVT.getVectorElementType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT LegalVT = + (TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT(); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res); + } + assert((In.getSimpleValueType().is256BitVector() || In.getSimpleValueType().is512BitVector()) && "Can only extract from 256-bit or 512-bit vectors"); @@ -20651,8 +20671,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, } // ADC/ADCX/SBB case ADX: { - SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other); - SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other); + SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); + SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32); SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2), DAG.getConstant(-1, dl, MVT::i8)); SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3), @@ -30663,6 +30683,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(N, 0); } + // Custom action for SELECT MMX + if (VT == MVT::x86mmx) { + LHS = DAG.getBitcast(MVT::i64, LHS); + RHS = DAG.getBitcast(MVT::i64, RHS); + SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS); + return DAG.getBitcast(VT, newSelect); + } + return SDValue(); } @@ -33358,7 +33386,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); - SDValue NewChain = NewLd.getValue(1); + // Make sure new load is placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd); if (TokenFactorIndex >= 0) { Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); @@ -33379,11 +33408,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, Ld->getPointerInfo().getWithOffset(4), MinAlign(Ld->getAlignment(), 4), Ld->getMemOperand()->getFlags()); + // Make sure new loads are placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd); + NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd); - SDValue NewChain = LoLd.getValue(1); if (TokenFactorIndex >= 0) { - Ops.push_back(LoLd); - Ops.push_back(HiLd); + Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 705d0f7a5cf7d..0e654a380e7c5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -978,6 +978,44 @@ multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _, (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX; } +multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, + X86VectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg> { + let ExeDomain = _.ExeDomain in + defm r : AVX512_maskable_custom<opc, MRMSrcReg, + (outs _.RC:$dst), (ins GR32:$src), + !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), + !con((ins _.KRCWM:$mask), (ins GR32:$src)), + "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], + "$src0 = $dst">, T8PD, EVEX; + + def : Pat <(_.VT (OpNode SrcRC:$src)), + (!cast<Instruction>(Name#r) + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), + (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), + (!cast<Instruction>(Name#rkz) _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; +} + +multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, + AVX512VLVectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, _.info512, OpNode, SrcRC, + Subreg>, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, _.info256, OpNode, + SrcRC, Subreg>, EVEX_V256; + defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, _.info128, OpNode, + SrcRC, Subreg>, EVEX_V128; + } +} + multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, Predicate prd> { @@ -989,18 +1027,11 @@ multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, } } -let isCodeGenOnly = 1 in { -defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - X86VBroadcast, GR8, HasBWI>; -defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - X86VBroadcast, GR16, HasBWI>; -} -let isAsmParserOnly = 1 in { - defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - null_frag, GR32, HasBWI>; - defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - null_frag, GR32, HasBWI>; -} +defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", + avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; +defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", + avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, + HasBWI>; defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, X86VBroadcast, GR32, HasAVX512>; defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, |