diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86FrameLowering.cpp | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 63 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86InstructionSelector.cpp | 61 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterBankInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.h | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.td | 1 |
9 files changed, 108 insertions, 47 deletions
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 48a1d8f1330c..9c35a251e480 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -43,7 +43,7 @@ void X86MCAsmInfoDarwin::anchor() { } X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { bool is64Bit = T.getArch() == Triple::x86_64; if (is64Bit) - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; AssemblerDialect = AsmWriterFlavor; @@ -92,7 +92,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { // For ELF, x86-64 pointer size depends on the ABI. // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64 // with the x32 ABI, pointer size remains the default 4. - PointerSize = (is64Bit && !isX32) ? 8 : 4; + CodePointerSize = (is64Bit && !isX32) ? 8 : 4; // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI. CalleeSaveStackSlotSize = is64Bit ? 8 : 4; @@ -129,7 +129,7 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; PrivateLabelPrefix = ".L"; - PointerSize = 8; + CodePointerSize = 8; WinEHEncodingType = WinEH::EncodingType::Itanium; } else { // 32-bit X86 doesn't use CFI, so this isn't a real encoding type. It's just @@ -156,7 +156,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; PrivateLabelPrefix = ".L"; - PointerSize = 8; + CodePointerSize = 8; WinEHEncodingType = WinEH::EncodingType::Itanium; ExceptionsType = ExceptionHandling::WinEH; } else { diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 78e0bca4158e..8678a13b95d0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1698,21 +1698,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } -// NOTE: this only has a subset of the full frame index logic. In -// particular, the FI < 0 and AfterFPPop logic is handled in -// X86RegisterInfo::eliminateFrameIndex, but not here. Possibly -// (probably?) it should be moved into here. int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); + bool IsFixed = MFI.isFixedObjectIndex(FI); // We can't calculate offset from frame pointer if the stack is realigned, // so enforce usage of stack/base pointer. The base pointer is used when we // have dynamic allocas in addition to dynamic realignment. if (TRI->hasBasePointer(MF)) - FrameReg = TRI->getBaseRegister(); + FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister(); else if (TRI->needsStackRealignment(MF)) - FrameReg = TRI->getStackRegister(); + FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister(); else FrameReg = TRI->getFrameRegister(MF); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index eb5c56ff2ff9..2d788bf0cf99 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1311,8 +1311,9 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, ++Cost; // If the base is a register with multiple uses, this // transformation may save a mov. - if ((AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() && + // FIXME: Don't rely on DELETED_NODEs. + if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && + AM.Base_Reg->getOpcode() != ISD::DELETED_NODE && !AM.Base_Reg.getNode()->hasOneUse()) || AM.BaseType == X86ISelAddressMode::FrameIndexBase) --Cost; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7ff483063ec2..b5f29fb400ef 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2742,13 +2742,13 @@ static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { return GuaranteedTailCallOpt && canGuaranteeTCO(CC); } -bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; - CallSite CS(CI); + ImmutableCallSite CS(CI); CallingConv::ID CalleeCC = CS.getCallingConv(); if (!mayTailCallThisCC(CalleeCC)) return false; @@ -8327,13 +8327,13 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask, Zeroable.setBit(i); else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { APInt Val = Cst->getAPIntValue(); - Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); if (Val == 0) Zeroable.setBit(i); } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) { APInt Val = Cst->getValueAPF().bitcastToAPInt(); - Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); if (Val == 0) Zeroable.setBit(i); @@ -16069,7 +16069,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { unsigned EltBits = EltVT.getSizeInBits(); // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... APInt MaskElt = - IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits); + IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits); const fltSemantics &Sem = EltVT == MVT::f64 ? APFloat::IEEEdouble() : (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle()); @@ -16132,9 +16132,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // The mask constants are automatically splatted for vector types. unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue SignMask = DAG.getConstantFP( - APFloat(Sem, APInt::getSignBit(EltSizeInBits)), dl, LogicVT); + APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT); SDValue MagMask = DAG.getConstantFP( - APFloat(Sem, ~APInt::getSignBit(EltSizeInBits)), dl, LogicVT); + APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT); // First, clear all bits but the sign bit from the second operand (sign). if (IsFakeVector) @@ -17344,10 +17344,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // bits of the inputs before performing those operations. if (FlipSigns) { MVT EltVT = VT.getVectorElementType(); - SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl, + SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, VT); - Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); - Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB); + Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); + Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); @@ -22111,11 +22111,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, } // i64 vector arithmetic shift can be emulated with the transform: - // M = lshr(SIGN_BIT, Amt) + // M = lshr(SIGN_MASK, Amt) // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M) if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) && Op.getOpcode() == ISD::SRA) { - SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT); + SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT); SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); R = DAG.getNode(ISD::XOR, dl, VT, R, M); @@ -22647,7 +22647,7 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - auto PTy = cast<PointerType>(LI->getPointerOperand()->getType()); + auto PTy = cast<PointerType>(LI->getPointerOperandType()); return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } @@ -26722,8 +26722,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // Low bits are known zero. KnownZero.setLowBits(ShAmt); } else { - KnownZero = KnownZero.lshr(ShAmt); - KnownOne = KnownOne.lshr(ShAmt); + KnownZero.lshrInPlace(ShAmt); + KnownOne.lshrInPlace(ShAmt); // High bits are known zero. KnownZero.setHighBits(ShAmt); } @@ -30152,7 +30152,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // x s< 0 ? x^C : 0 --> subus x, C if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && ISD::isBuildVectorAllZeros(CondRHS.getNode()) && - OpRHSConst->getAPIntValue().isSignBit()) + OpRHSConst->getAPIntValue().isSignMask()) // Note that we have to rebuild the RHS constant here to ensure we // don't rely on particular values of undef lanes. return DAG.getNode( @@ -30203,7 +30203,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); - APInt DemandedMask(APInt::getSignBit(BitWidth)); + APInt DemandedMask(APInt::getSignMask(BitWidth)); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); @@ -31269,7 +31269,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, else if (X86ISD::VSRAI == Opcode) Elt = Elt.ashr(ShiftImm); else - Elt = Elt.lshr(ShiftImm); + Elt.lshrInPlace(ShiftImm); } return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); } @@ -32234,8 +32234,8 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V); if (!BV || !BV->isConstant()) return false; - for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) { - ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(i)); + for (SDValue Op : V->ops()) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); if (!C) return false; uint64_t Val = C->getZExtValue(); @@ -33428,8 +33428,8 @@ static SDValue isFNEG(SDNode *N) { SDValue Op0 = peekThroughBitcasts(Op.getOperand(0)); unsigned EltBits = Op1.getScalarValueSizeInBits(); - auto isSignBitValue = [&](const ConstantFP *C) { - return C->getValueAPF().bitcastToAPInt() == APInt::getSignBit(EltBits); + auto isSignMask = [&](const ConstantFP *C) { + return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits); }; // There is more than one way to represent the same constant on @@ -33440,21 +33440,21 @@ static SDValue isFNEG(SDNode *N) { // We check all variants here. if (Op1.getOpcode() == X86ISD::VBROADCAST) { if (auto *C = getTargetConstantFromNode(Op1.getOperand(0))) - if (isSignBitValue(cast<ConstantFP>(C))) + if (isSignMask(cast<ConstantFP>(C))) return Op0; } else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) { if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode()) - if (isSignBitValue(CN->getConstantFPValue())) + if (isSignMask(CN->getConstantFPValue())) return Op0; } else if (auto *C = getTargetConstantFromNode(Op1)) { if (C->getType()->isVectorTy()) { if (auto *SplatV = C->getSplatValue()) - if (isSignBitValue(cast<ConstantFP>(SplatV))) + if (isSignMask(cast<ConstantFP>(SplatV))) return Op0; } else if (auto *FPConst = dyn_cast<ConstantFP>(C)) - if (isSignBitValue(FPConst)) + if (isSignMask(FPConst)) return Op0; } return SDValue(); @@ -34631,7 +34631,7 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, return SDValue(); ShrinkMode Mode; - if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode)) + if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16) return SDValue(); EVT VT = N->getValueType(0); @@ -35922,14 +35922,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (Subtarget.is64Bit()) { Res.first = X86::RAX; Res.second = &X86::GR64_ADRegClass; - } else if (Subtarget.is32Bit()) { + } else { + assert((Subtarget.is32Bit() || Subtarget.is16Bit()) && + "Expecting 64, 32 or 16 bit subtarget"); Res.first = X86::EAX; Res.second = &X86::GR32_ADRegClass; - } else if (Subtarget.is16Bit()) { - Res.first = X86::AX; - Res.second = &X86::GR16_ADRegClass; - } else { - llvm_unreachable("Expecting 64, 32 or 16 bit subtarget"); } return Res; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ab4910daca02..190a88335000 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1207,7 +1207,7 @@ namespace llvm { bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override; diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index 6cc5e8b63597..fb9315792892 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -67,6 +67,8 @@ private: MachineFunction &MF) const; bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; const X86Subtarget &STI; const X86InstrInfo &TII; @@ -99,6 +101,10 @@ X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, static const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { if (RB.getID() == X86::GPRRegBankID) { + if (Ty.getSizeInBits() <= 8) + return &X86::GR8RegClass; + if (Ty.getSizeInBits() == 16) + return &X86::GR16RegClass; if (Ty.getSizeInBits() == 32) return &X86::GR32RegClass; if (Ty.getSizeInBits() == 64) @@ -207,6 +213,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectConstant(I, MRI, MF)) return true; + if (selectTrunc(I, MRI, MF)) + return true; return selectImpl(I); } @@ -509,6 +517,59 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectTrunc(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_TRUNC) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + + const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); + + if (DstRB.getID() != SrcRB.getID()) { + DEBUG(dbgs() << "G_TRUNC input/output on different banks\n"); + return false; + } + + if (DstRB.getID() != X86::GPRRegBankID) + return false; + + const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); + if (!DstRC) + return false; + + const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB); + if (!SrcRC) + return false; + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + return false; + } + + if (DstRC == SrcRC) { + // Nothing to be done + } else if (DstRC == &X86::GR32RegClass) { + I.getOperand(1).setSubReg(X86::sub_32bit); + } else if (DstRC == &X86::GR16RegClass) { + I.getOperand(1).setSubReg(X86::sub_16bit); + } else if (DstRC == &X86::GR8RegClass) { + I.getOperand(1).setSubReg(X86::sub_8bit); + } else { + return false; + } + + I.setDesc(TII.get(X86::COPY)); + return true; +} + InstructionSelector * llvm::createX86InstructionSelector(X86Subtarget &Subtarget, X86RegisterBankInfo &RBI) { diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp index d395c826e6bf..0f8a750a0235 100644 --- a/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/lib/Target/X86/X86RegisterBankInfo.cpp @@ -68,6 +68,7 @@ X86GenRegisterBankInfo::PartialMappingIdx X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) { if ((Ty.isScalar() && !isFP) || Ty.isPointer()) { switch (Ty.getSizeInBits()) { + case 1: case 8: return PMI_GPR8; case 16: diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 58fa31e94fba..25958f0c3106 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -133,6 +133,11 @@ public: unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } unsigned getBaseRegister() const { return BasePtr; } + /// Returns physical register used as frame pointer. + /// This will always returns the frame pointer register, contrary to + /// getFrameRegister() which returns the "base pointer" in situations + /// involving a stack, frame and base pointer. + unsigned getFramePtr() const { return FramePtr; } // FIXME: Move to FrameInfok unsigned getSlotSize() const { return SlotSize; } }; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index c177ba1d52f7..d235d2b40b15 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -438,7 +438,6 @@ def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32, (add LOW32_ADDR_ACCESS, RBP)>; // A class to support the 'A' assembler constraint: [ER]AX then [ER]DX. -def GR16_AD : RegisterClass<"X86", [i16], 16, (add AX, DX)>; def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>; def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>; |