diff options
Diffstat (limited to 'llvm')
24 files changed, 334 insertions, 185 deletions
diff --git a/llvm/include/llvm/MC/MCFixupKindInfo.h b/llvm/include/llvm/MC/MCFixupKindInfo.h index 0d57441ce0dc..ecf85fa56931 100644 --- a/llvm/include/llvm/MC/MCFixupKindInfo.h +++ b/llvm/include/llvm/MC/MCFixupKindInfo.h @@ -22,7 +22,12 @@ struct MCFixupKindInfo { FKF_IsAlignedDownTo32Bits = (1 << 1), /// Should this fixup be evaluated in a target dependent manner? - FKF_IsTarget = (1 << 2) + FKF_IsTarget = (1 << 2), + + /// This fixup kind should be resolved if defined. + /// FIXME This is a workaround because we don't support certain ARM + /// relocation types. This flag should eventually be removed. + FKF_Constant = 1 << 3, }; /// A target specific name for the fixup kind. The names will be unique for diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index bf8dc94bfbf9..77f4125b5d4b 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -2298,7 +2298,10 @@ bool MemorySSAWrapperPass::runOnFunction(Function &F) { return false; } -void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } +void MemorySSAWrapperPass::verifyAnalysis() const { + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); +} void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { MSSA->print(OS); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e5bc08b9280a..8ff04797c8d8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16510,33 +16510,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { CombineTo(ST1, ST1->getChain()); return SDValue(); } - - // If ST stores to a subset of preceding store's write set, we may be - // able to fold ST's value into the preceding stored value. As we know - // the other uses of ST1's chain are unconcerned with ST, this folding - // will not affect those nodes. - int64_t BitOffset; - if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize, - BitOffset)) { - SDValue ChainValue = ST1->getValue(); - if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) { - if (auto *C = dyn_cast<ConstantSDNode>(Value)) { - APInt Val = C1->getAPIntValue(); - APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize); - // FIXME: Handle Big-endian mode. - if (!DAG.getDataLayout().isBigEndian()) { - Val.insertBits(InsertVal, BitOffset); - SDValue NewSDVal = - DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(), - C1->isTargetOpcode(), C1->isOpaque()); - SDNode *NewST1 = DAG.UpdateNodeOperands( - ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2), - ST1->getOperand(3)); - return CombineTo(ST, SDValue(NewST1, 0)); - } - } - } - } // End ST subset of ST1 case. } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0e46f8d68f83..6aed5796acc6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1233,7 +1233,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); return false; @@ -1330,10 +1329,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { if (Res.getNode() == N) return true; - assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + const bool IsStrictFp = N->isStrictFPOpcode(); + assert(Res.getValueType() == N->getValueType(0) && + N->getNumValues() == (IsStrictFp ? 2 : 1) && "Invalid operand expansion"); + LLVM_DEBUG(dbgs() << "Replacing: "; N->dump(&DAG); dbgs() << " with: "; + Res.dump()); ReplaceValueWith(SDValue(N, 0), Res); + if (IsStrictFp) + ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1)); + return false; } diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 75ec27975564..6f897edb5d60 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -224,6 +224,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, return getBackend().evaluateTargetFixup(*this, Layout, Fixup, DF, Target, Value, WasForced); + unsigned FixupFlags = getBackendPtr()->getFixupKindInfo(Fixup.getKind()).Flags; bool IsPCRel = getBackendPtr()->getFixupKindInfo(Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel; @@ -239,8 +240,9 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, if (A->getKind() != MCSymbolRefExpr::VK_None || SA.isUndefined()) { IsResolved = false; } else if (auto *Writer = getWriterPtr()) { - IsResolved = Writer->isSymbolRefDifferenceFullyResolvedImpl( - *this, SA, *DF, false, true); + IsResolved = (FixupFlags & MCFixupKindInfo::FKF_Constant) || + Writer->isSymbolRefDifferenceFullyResolvedImpl( + *this, SA, *DF, false, true); } } } else { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d45a80057564..23f05eaad944 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -211,6 +211,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, MVT::f16, Custom); setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); + setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); + setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); + setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); setOperationAction(ISD::BRCOND, MVT::Other, Expand); @@ -266,6 +272,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSUB, MVT::f128, Custom); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); setOperationAction(ISD::SETCC, MVT::f128, Custom); + setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom); setOperationAction(ISD::BR_CC, MVT::f128, Custom); setOperationAction(ISD::SELECT, MVT::f128, Custom); setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); @@ -276,17 +284,31 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom); setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); // Variable arguments. setOperationAction(ISD::VASTART, MVT::Other, Custom); @@ -1235,6 +1257,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::CCMN: return "AArch64ISD::CCMN"; case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP"; case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; + case AArch64ISD::STRICT_FCMP: return "AArch64ISD::STRICT_FCMP"; + case AArch64ISD::STRICT_FCMPE: return "AArch64ISD::STRICT_FCMPE"; case AArch64ISD::DUP: return "AArch64ISD::DUP"; case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8"; case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16"; @@ -1668,6 +1692,17 @@ static bool isCMN(SDValue Op, ISD::CondCode CC) { (CC == ISD::SETEQ || CC == ISD::SETNE); } +static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, + SelectionDAG &DAG, SDValue Chain, + bool IsSignaling) { + EVT VT = LHS.getValueType(); + assert(VT != MVT::f128); + assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"); + unsigned Opcode = + IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP; + return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS}); +} + static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG) { EVT VT = LHS.getValueType(); @@ -2284,9 +2319,16 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const { - SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end()); + bool IsStrict = Op->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); + SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end()); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first; + SDValue Result; + SDLoc dl(Op); + std::tie(Result, Chain) = makeLibCall(DAG, Call, Op.getValueType(), Ops, + CallOptions, dl, Chain); + return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result; } // Returns true if the given Op is the overflow flag result of an overflow @@ -2483,21 +2525,26 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { - if (Op.getOperand(0).getValueType() != MVT::f128) { + bool IsStrict = Op->isStrictFPOpcode(); + SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); + if (SrcVal.getValueType() != MVT::f128) { // It's legal except when f128 is involved return Op; } RTLIB::Libcall LC; - LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + LC = RTLIB::getFPROUND(SrcVal.getValueType(), Op.getValueType()); // FP_ROUND node has a second operand indicating whether it is known to be // precise. That doesn't take part in the LibCall so we can't directly use // LowerF128Call. - SDValue SrcVal = Op.getOperand(0); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions, - SDLoc(Op)).first; + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); + SDValue Result; + SDLoc dl(Op); + std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal, + CallOptions, dl, Chain); + return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result; } SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, @@ -2542,32 +2589,34 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { - if (Op.getOperand(0).getValueType().isVector()) + bool IsStrict = Op->isStrictFPOpcode(); + SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); + + if (SrcVal.getValueType().isVector()) return LowerVectorFP_TO_INT(Op, DAG); // f16 conversions are promoted to f32 when full fp16 is not supported. - if (Op.getOperand(0).getValueType() == MVT::f16 && - !Subtarget->hasFullFP16()) { + if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { + assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); SDLoc dl(Op); return DAG.getNode( Op.getOpcode(), dl, Op.getValueType(), - DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0))); + DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal)); } - if (Op.getOperand(0).getValueType() != MVT::f128) { + if (SrcVal.getValueType() != MVT::f128) { // It's legal except when f128 is involved return Op; } RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::FP_TO_SINT) - LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); + if (Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT) + LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), Op.getValueType()); else - LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); + LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), Op.getValueType()); - SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end()); - MakeLibCallOptions CallOptions; - return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first; + return LowerF128Call(Op, DAG, LC); } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -2603,18 +2652,22 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType().isVector()) return LowerVectorINT_TO_FP(Op, DAG); + bool IsStrict = Op->isStrictFPOpcode(); + SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); + // f16 conversions are promoted to f32 when full fp16 is not supported. if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { + assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); SDLoc dl(Op); return DAG.getNode( ISD::FP_ROUND, dl, MVT::f16, - DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)), + DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal), DAG.getIntPtrConstant(0, dl)); } // i128 conversions are libcalls. - if (Op.getOperand(0).getValueType() == MVT::i128) + if (SrcVal.getValueType() == MVT::i128) return SDValue(); // Other conversions are legal, unless it's to the completely software-based @@ -2623,10 +2676,11 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, return Op; RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::SINT_TO_FP) - LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + if (Op.getOpcode() == ISD::SINT_TO_FP || + Op.getOpcode() == ISD::STRICT_SINT_TO_FP) + LC = RTLIB::getSINTTOFP(SrcVal.getValueType(), Op.getValueType()); else - LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + LC = RTLIB::getUINTTOFP(SrcVal.getValueType(), Op.getValueType()); return LowerF128Call(Op, DAG, LC); } @@ -3104,6 +3158,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); @@ -3146,6 +3202,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128); case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); @@ -3190,9 +3247,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerPREFETCH(Op, DAG); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -5154,9 +5215,15 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + bool IsStrict = Op->isStrictFPOpcode(); + bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; + unsigned OpNo = IsStrict ? 1 : 0; + SDValue Chain; + if (IsStrict) + Chain = Op.getOperand(0); + SDValue LHS = Op.getOperand(OpNo + 0); + SDValue RHS = Op.getOperand(OpNo + 1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get(); SDLoc dl(Op); // We chose ZeroOrOneBooleanContents, so use zero and one. @@ -5167,13 +5234,14 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Handle f128 first, since one possible outcome is a normal integer // comparison which gets picked up by the next if statement. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain, + IsSignaling); // If softenSetCCOperands returned a scalar, use it. if (!RHS.getNode()) { assert(LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"); - return LHS; + return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS; } } @@ -5185,7 +5253,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Note that we inverted the condition above, so we reverse the order of // the true and false operands here. This will allow the setcc to be // matched to a single CSINC instruction. - return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp); + SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp); + return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; } // Now we know we're dealing with FP values. @@ -5194,10 +5263,15 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead // and do the comparison. - SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); + SDValue Cmp; + if (IsStrict) + Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling); + else + Cmp = emitComparison(LHS, RHS, CC, dl, DAG); AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); + SDValue Res; if (CC2 == AArch64CC::AL) { changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1, CC2); @@ -5206,7 +5280,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Note that we inverted the condition above, so we reverse the order of // the true and false operands here. This will allow the setcc to be // matched to a single CSINC instruction. - return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp); + Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp); } else { // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't // totally clean. Some of them require two CSELs to implement. As is in @@ -5219,8 +5293,9 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); - return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); + Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } + return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res; } SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 672dfc4fcbc0..5ec453e274dc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -241,6 +241,10 @@ enum NodeType : unsigned { SST1_SXTW_SCALED, SST1_IMM, + // Strict (exception-raising) floating point comparison + STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCMPE, + // NEON Load/Store with post-increment base updates LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, LD3post, diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index c3efe03a0987..11ba69878847 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -4702,11 +4702,11 @@ class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType, multiclass FPConversion<string asm> { // Double-precision to Half-precision def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, - [(set FPR16:$Rd, (fpround FPR64:$Rn))]>; + [(set FPR16:$Rd, (any_fpround FPR64:$Rn))]>; // Double-precision to Single-precision def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm, - [(set FPR32:$Rd, (fpround FPR64:$Rn))]>; + [(set FPR32:$Rd, (any_fpround FPR64:$Rn))]>; // Half-precision to Double-precision def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, @@ -4722,7 +4722,7 @@ multiclass FPConversion<string asm> { // Single-precision to Half-precision def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, - [(set FPR16:$Rd, (fpround FPR32:$Rn))]>; + [(set FPR16:$Rd, (any_fpround FPR32:$Rn))]>; } //--- diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d590d4d913ff..1e3bf299b265 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -419,7 +419,14 @@ def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; -def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; +def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; +def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, + [SDNPHasChain]>; +def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, + [SDNPHasChain]>; +def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), + [(AArch64strict_fcmp node:$lhs, node:$rhs), + (AArch64fcmp node:$lhs, node:$rhs)]>; def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; @@ -3300,10 +3307,10 @@ defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; -defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>; -defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>; -defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>; -defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>; +defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; +defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; +defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; +defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; multiclass FPToIntegerIntPats<Intrinsic round, string INST> { def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>; @@ -3375,8 +3382,8 @@ def : Pat<(i64 (llround f64:$Rn)), // Scaled integer to floating point conversion instructions. //===----------------------------------------------------------------------===// -defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>; -defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; +defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; +defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; //===----------------------------------------------------------------------===// // Unscaled integer to floating point conversion instruction. @@ -3541,8 +3548,8 @@ def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))), // Floating point comparison instructions. //===----------------------------------------------------------------------===// -defm FCMPE : FPComparison<1, "fcmpe">; -defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; +defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; +defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; //===----------------------------------------------------------------------===// // Floating point conditional comparison instructions. diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 05c81feb23ec..c4f511abc4ae 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -244,11 +244,6 @@ static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) { return OS; } -static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) { - Operand.print(OS); - return OS; -} - LLVM_DUMP_METHOD void SDWASrcOperand::print(raw_ostream& OS) const { OS << "SDWA src: " << *getTargetOperand() @@ -850,6 +845,13 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { return std::unique_ptr<SDWAOperand>(nullptr); } +#if !defined(NDEBUG) +static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) { + Operand.print(OS); + return OS; +} +#endif + void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) { for (MachineInstr &MI : MBB) { if (auto Operand = matchSDWAOperand(MI)) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 1e6f7d889201..66f3f418d06c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1354,6 +1354,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } + + // Strict floating-point comparisons need custom lowering. + setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); + setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); + setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); } // Use __sincos_stret if available. @@ -1552,7 +1560,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMN: return "ARMISD::CMN"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; + case ARMISD::CMPFPE: return "ARMISD::CMPFPE"; case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; + case ARMISD::CMPFPEw0: return "ARMISD::CMPFPEw0"; case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; @@ -4344,13 +4354,16 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const SDLoc &dl) const { + SelectionDAG &DAG, const SDLoc &dl, + bool Signaling) const { assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64); SDValue Cmp; if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); + Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, + dl, MVT::Glue, LHS, RHS); else - Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); + Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, + dl, MVT::Glue, LHS); return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } @@ -5408,7 +5421,12 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { // FIXME: Remove this when we have strict fp instruction selection patterns if (IsStrict) { - DAG.mutateStrictFPToFP(Op.getNode()); + SDLoc Loc(Op); + SDValue Result = + DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT + : ISD::FP_TO_UINT, + Loc, Op.getValueType(), SrcVal); + return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc); } return Op; @@ -9222,6 +9240,51 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N, Results.push_back(SDValue(CmpSwap, 2)); } +SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + SDValue Chain = Op.getOperand(0); + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get(); + bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; + + // If we don't have instructions of this float type then soften to a libcall + // and use SETCC instead. + if (isUnsupportedFloatingType(LHS.getValueType())) { + DAG.getTargetLoweringInfo().softenSetCCOperands( + DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling); + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, dl, LHS.getValueType()); + CC = ISD::SETNE; + } + SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS, + DAG.getCondCode(CC)); + return DAG.getMergeValues({Result, Chain}, dl); + } + + ARMCC::CondCodes CondCode, CondCode2; + FPCCToARMCC(CC, CondCode, CondCode2); + + // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit + // in CMPFP and CMPFPE, but instead it should be made explicit by these + // instructions using a chain instead of glue. This would also fix the problem + // here (and also in LowerSELECT_CC) where we generate two comparisons when + // CondCode2 != AL. + SDValue True = DAG.getConstant(1, dl, VT); + SDValue False = DAG.getConstant(0, dl, VT); + SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling); + SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG); + if (CondCode2 != ARMCC::AL) { + ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); + Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling); + Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG); + } + return DAG.getMergeValues({Result, Chain}, dl); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { @@ -9315,6 +9378,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); } } @@ -16320,6 +16385,18 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(!(DstSz == 32 && Subtarget->hasFP16()) && "With FP16, 16 to 32 conversion is legal!"); + // Converting from 32 -> 64 is valid if we have FP64. + if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) { + // FIXME: Remove this when we have strict fp instruction selection patterns + if (IsStrict) { + SDLoc Loc(Op); + SDValue Result = DAG.getNode(ISD::FP_EXTEND, + Loc, Op.getValueType(), SrcVal); + return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc); + } + return Op; + } + // Either we are converting from 16 -> 64, without FP16 and/or // FP.double-precision or without Armv8-fp. So we must do it in two // steps. @@ -16815,7 +16892,7 @@ bool ARMTargetLowering::isCheapToSpeculateCtlz() const { } bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { - return !Subtarget->hasMinSize(); + return !Subtarget->hasMinSize() || Subtarget->isTargetWindows(); } Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index cc74e5d875d8..6061a65d3b89 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -84,7 +84,9 @@ class VectorType; CMN, // ARM CMN instructions. CMPZ, // ARM compare that sets only Z flag. CMPFP, // ARM VFP compare instruction, sets FPSCR. + CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR. CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. + CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets FPSCR. FMSTAT, // ARM fmstat instruction. CMOV, // ARM conditional move instructions. @@ -729,6 +731,7 @@ class VectorType; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const; void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const; @@ -817,7 +820,7 @@ class VectorType; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - const SDLoc &dl) const; + const SDLoc &dl, bool Signaling = false) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index a41a483d1a4c..f1d1d8a89164 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -21,6 +21,8 @@ def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; +def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_ARMCmp, [SDNPOutGlue]>; +def arm_cmpfpe0: SDNode<"ARMISD::CMPFPEw0",SDT_CMPFP0, [SDNPOutGlue]>; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>; @@ -548,12 +550,12 @@ let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", - [/* For disassembly only; pattern left blank */]>; + [(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -562,7 +564,7 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins HPR:$Sd, HPR:$Sm), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [(arm_cmpfpe HPR:$Sd, HPR:$Sm)]>; def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), @@ -611,7 +613,7 @@ let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfpe0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -619,7 +621,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfpe0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -631,7 +633,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfpe0 HPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 6196881a9b8f..062d1d36c43c 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -55,31 +55,29 @@ Optional<MCFixupKind> ARMAsmBackend::getFixupKind(StringRef Name) const { } const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + unsigned IsPCRelConstant = + MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_Constant; const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in // ARMFixupKinds.h. // // Name Offset (bits) Size (bits) Flags - {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_ldst_pcrel_12", 0, 32, IsPCRelConstant}, {"fixup_t2_ldst_pcrel_12", 0, 32, - MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, - {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_10_unscaled", 0, 32, IsPCRelConstant}, + {"fixup_arm_pcrel_10", 0, 32, IsPCRelConstant}, {"fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_t2_pcrel_9", 0, 32, - MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_thumb_adr_pcrel_10", 0, 8, - MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, - {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_adr_pcrel_12", 0, 32, IsPCRelConstant}, {"fixup_t2_adr_pcrel_12", 0, 32, - MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 1d41994ef1e3..207742520ed6 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -156,13 +156,6 @@ const uint32_t * RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF, CallingConv::ID /*CC*/) const { auto &Subtarget = MF.getSubtarget<RISCVSubtarget>(); - if (MF.getFunction().hasFnAttribute("interrupt")) { - if (Subtarget.hasStdExtD()) - return CSR_XLEN_F64_Interrupt_RegMask; - if (Subtarget.hasStdExtF()) - return CSR_XLEN_F32_Interrupt_RegMask; - return CSR_Interrupt_RegMask; - } switch (Subtarget.getTargetABI()) { default: diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp index fe43bf4cbbce..fe5cb3ae2bf6 100644 --- a/llvm/lib/Target/X86/X86CmovConversion.cpp +++ b/llvm/lib/Target/X86/X86CmovConversion.cpp @@ -364,12 +364,13 @@ bool X86CmovConverterPass::collectCmovCandidates( /// \param TrueOpDepth depth cost of CMOV true value operand. /// \param FalseOpDepth depth cost of CMOV false value operand. static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) { - //===--------------------------------------------------------------------===// - // With no info about branch weight, we assume 50% for each value operand. - // Thus, depth of optimized CMOV instruction is the rounded up average of - // its True-Operand-Value-Depth and False-Operand-Value-Depth. - //===--------------------------------------------------------------------===// - return (TrueOpDepth + FalseOpDepth + 1) / 2; + // The depth of the result after branch conversion is + // TrueOpDepth * TrueOpProbability + FalseOpDepth * FalseOpProbability. + // As we have no info about branch weight, we assume 75% for one and 25% for + // the other, and pick the result with the largest resulting depth. + return std::max( + divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4), + divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4)); } bool X86CmovConverterPass::checkForProfitableCmovCandidates( diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index f38dc436722d..e49e6cec65c0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3494,7 +3494,8 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, Instruction *NarrowestShift = XShift; Type *WidestTy = WidestShift->getType(); - assert(NarrowestShift->getType() == I.getOperand(0)->getType() && + Type *NarrowestTy = NarrowestShift->getType(); + assert(NarrowestTy == I.getOperand(0)->getType() && "We did not look past any shifts while matching XShift though."); bool HadTrunc = WidestTy != I.getOperand(0)->getType(); @@ -3533,6 +3534,23 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, if (XShAmt->getType() != YShAmt->getType()) return nullptr; + // As input, we have the following pattern: + // icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0 + // We want to rewrite that as: + // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x) + // While we know that originally (Q+K) would not overflow + // (because 2 * (N-1) u<= iN -1), we have looked past extensions of + // shift amounts. so it may now overflow in smaller bitwidth. + // To ensure that does not happen, we need to ensure that the total maximal + // shift amount is still representable in that smaller bit width. + unsigned MaximalPossibleTotalShiftAmount = + (WidestTy->getScalarSizeInBits() - 1) + + (NarrowestTy->getScalarSizeInBits() - 1); + APInt MaximalRepresentableShiftAmount = + APInt::getAllOnesValue(XShAmt->getType()->getScalarSizeInBits()); + if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount)) + return nullptr; + // Can we fold (XShAmt+YShAmt) ? auto *NewShAmt = dyn_cast_or_null<Constant>( SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index fbff5dd4a8cd..739579e2d38e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -23,8 +23,11 @@ using namespace PatternMatch; // Given pattern: // (x shiftopcode Q) shiftopcode K // we should rewrite it as -// x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) -// This is valid for any shift, but they must be identical. +// x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) and +// +// This is valid for any shift, but they must be identical, and we must be +// careful in case we have (zext(Q)+zext(K)) and look past extensions, +// (Q+K) must not overflow or else (Q+K) u< bitwidth(x) is bogus. // // AnalyzeForSignBitExtraction indicates that we will only analyze whether this // pattern has any 2 right-shifts that sum to 1 less than original bit width. @@ -58,6 +61,23 @@ Value *InstCombiner::reassociateShiftAmtsOfTwoSameDirectionShifts( if (ShAmt0->getType() != ShAmt1->getType()) return nullptr; + // As input, we have the following pattern: + // Sh0 (Sh1 X, Q), K + // We want to rewrite that as: + // Sh x, (Q+K) iff (Q+K) u< bitwidth(x) + // While we know that originally (Q+K) would not overflow + // (because 2 * (N-1) u<= iN -1), we have looked past extensions of + // shift amounts. so it may now overflow in smaller bitwidth. + // To ensure that does not happen, we need to ensure that the total maximal + // shift amount is still representable in that smaller bit width. + unsigned MaximalPossibleTotalShiftAmount = + (Sh0->getType()->getScalarSizeInBits() - 1) + + (Sh1->getType()->getScalarSizeInBits() - 1); + APInt MaximalRepresentableShiftAmount = + APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits()); + if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount)) + return nullptr; + // We are only looking for signbit extraction if we have two right shifts. bool HadTwoRightShifts = match(Sh0, m_Shr(m_Value(), m_Value())) && match(Sh1, m_Shr(m_Value(), m_Value())); diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 8c33045c2380..a1c012fddde3 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -790,41 +790,6 @@ public: }; } // namespace - -/// Return true if we know how to rewrite all uses of the given alloca after -/// hoisting it out of the loop. The main concerns are a) potential captures -/// and b) invariant.start markers which don't capture, but are no longer -/// valid w/o a corresponding invariant.end. -static bool canRewriteUsesOfAlloca(AllocaInst &AI) { - // TODO: This looks a lot like capture tracking, but we need to remove any - // invariant starts if we extend the lifetime of the alloca by hoisting it. - // We should probably refactor capture tracking into a form which allows us - // to reuse the relevant bits and remove the duplicated logic here. - - SmallVector<Use *, 16> Worklist; - for (Use &U : AI.uses()) - Worklist.push_back(&U); - - unsigned NumUsesExplored = 0; - while (!Worklist.empty()) { - Use *U = Worklist.pop_back_val(); - Instruction *I = cast<Instruction>(U->getUser()); - NumUsesExplored++; - if (NumUsesExplored > DefaultMaxUsesToExplore) - return false; - // Non capturing, terminating uses - if (isa<LoadInst>(I) || - (isa<StoreInst>(I) && U->getOperandNo() == 1)) - continue; - // Non capturing, non-terminating - if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I)) - return false; - for (Use &U : I->uses()) - Worklist.push_back(&U); - } - return true; -} - /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before @@ -945,16 +910,6 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, continue; } - if (isa<AllocaInst>(&I) && - SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && - canRewriteUsesOfAlloca(cast<AllocaInst>(I))) { - hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, - MSSAU, SE, ORE); - HoistedInstructions.push_back(&I); - Changed = true; - continue; - } - if (PHINode *PN = dyn_cast<PHINode>(&I)) { if (CFH.canHoistPHI(PN)) { // Redirect incoming blocks first to ensure that we create hoisted @@ -1537,7 +1492,8 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { return false; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *BBPred = *PI; - if (isa<IndirectBrInst>(BBPred->getTerminator())) + if (isa<IndirectBrInst>(BBPred->getTerminator()) || + isa<CallBrInst>(BBPred->getTerminator())) return false; } return true; diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 0868e742f4ee..67c20b2edae8 100644 --- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -81,10 +81,8 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetTransformInfoWrapperPass>(); - if (EnableMSSALoopDependency) { - AU.addRequired<MemorySSAWrapperPass>(); + if (EnableMSSALoopDependency) AU.addPreserved<MemorySSAWrapperPass>(); - } getLoopAnalysisUsage(AU); } @@ -101,8 +99,11 @@ public: const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); Optional<MemorySSAUpdater> MSSAU; if (EnableMSSALoopDependency) { - MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); - MSSAU = MemorySSAUpdater(MSSA); + // Not requiring MemorySSA and getting it only if available will split + // the loop pass pipeline when LoopRotate is being run first. + auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>(); + if (MSSAA) + MSSAU = MemorySSAUpdater(&MSSAA->getMSSA()); } return LoopRotation(L, LI, TTI, AC, &DT, &SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index c9eb4abfa21a..9a7379e27ed6 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -505,7 +505,8 @@ llvm::SplitAllCriticalEdges(Function &F, unsigned NumBroken = 0; for (BasicBlock &BB : F) { Instruction *TI = BB.getTerminator(); - if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI)) + if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI) && + !isa<CallBrInst>(TI)) for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) if (SplitCriticalEdge(TI, i, Options)) ++NumBroken; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 479bca83b51e..26cae4134ebc 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -832,13 +832,12 @@ public: // Extracts from consecutive indexes of the same vector better score as // the extracts could be optimized away. - auto *Ex1 = dyn_cast<ExtractElementInst>(V1); - auto *Ex2 = dyn_cast<ExtractElementInst>(V2); - if (Ex1 && Ex2 && Ex1->getVectorOperand() == Ex2->getVectorOperand() && - cast<ConstantInt>(Ex1->getIndexOperand())->getZExtValue() + 1 == - cast<ConstantInt>(Ex2->getIndexOperand())->getZExtValue()) { + Value *EV; + ConstantInt *Ex1Idx, *Ex2Idx; + if (match(V1, m_ExtractElement(m_Value(EV), m_ConstantInt(Ex1Idx))) && + match(V2, m_ExtractElement(m_Deferred(EV), m_ConstantInt(Ex2Idx))) && + Ex1Idx->getZExtValue() + 1 == Ex2Idx->getZExtValue()) return VLOperands::ScoreConsecutiveExtracts; - } auto *I1 = dyn_cast<Instruction>(V1); auto *I2 = dyn_cast<Instruction>(V2); diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp index dd3db7c150ba..c392651180b6 100644 --- a/llvm/utils/TableGen/DFAEmitter.cpp +++ b/llvm/utils/TableGen/DFAEmitter.cpp @@ -53,14 +53,14 @@ void DfaEmitter::addTransition(state_type From, state_type To, action_type A) { ++NumNfaTransitions; } -void DfaEmitter::visitDfaState(DfaState DS) { +void DfaEmitter::visitDfaState(const DfaState &DS) { // For every possible action... auto FromId = DfaStates.idFor(DS); for (action_type A : Actions) { DfaState NewStates; DfaTransitionInfo TI; // For every represented state, word pair in the original NFA... - for (state_type &FromState : DS) { + for (state_type FromState : DS) { // If this action is possible from this state add the transitioned-to // states to NewStates. auto I = NfaTransitions.find({FromState, A}); @@ -90,8 +90,11 @@ void DfaEmitter::constructDfa() { // Note that UniqueVector starts indices at 1, not zero. unsigned DfaStateId = 1; - while (DfaStateId <= DfaStates.size()) - visitDfaState(DfaStates[DfaStateId++]); + while (DfaStateId <= DfaStates.size()) { + DfaState S = DfaStates[DfaStateId]; + visitDfaState(S); + DfaStateId++; + } } void DfaEmitter::emit(StringRef Name, raw_ostream &OS) { diff --git a/llvm/utils/TableGen/DFAEmitter.h b/llvm/utils/TableGen/DFAEmitter.h index 76de8f72cd88..f7724ce06bac 100644 --- a/llvm/utils/TableGen/DFAEmitter.h +++ b/llvm/utils/TableGen/DFAEmitter.h @@ -99,7 +99,7 @@ private: void constructDfa(); /// Visit a single DFA state and construct all possible transitions to new DFA /// states. - void visitDfaState(DfaState DS); + void visitDfaState(const DfaState &DS); }; } // namespace llvm |