summaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/MC/MCFixupKindInfo.h7
-rw-r--r--llvm/lib/Analysis/MemorySSA.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp27
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp10
-rw-r--r--llvm/lib/MC/MCAssembler.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp139
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h4
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td25
-rw-r--r--llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp12
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp87
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h5
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td14
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp22
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp7
-rw-r--r--llvm/lib/Target/X86/X86CmovConversion.cpp13
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp20
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp24
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp48
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRotation.cpp11
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp11
-rw-r--r--llvm/utils/TableGen/DFAEmitter.cpp11
-rw-r--r--llvm/utils/TableGen/DFAEmitter.h2
24 files changed, 334 insertions, 185 deletions
diff --git a/llvm/include/llvm/MC/MCFixupKindInfo.h b/llvm/include/llvm/MC/MCFixupKindInfo.h
index 0d57441ce0dc..ecf85fa56931 100644
--- a/llvm/include/llvm/MC/MCFixupKindInfo.h
+++ b/llvm/include/llvm/MC/MCFixupKindInfo.h
@@ -22,7 +22,12 @@ struct MCFixupKindInfo {
FKF_IsAlignedDownTo32Bits = (1 << 1),
/// Should this fixup be evaluated in a target dependent manner?
- FKF_IsTarget = (1 << 2)
+ FKF_IsTarget = (1 << 2),
+
+ /// This fixup kind should be resolved if defined.
+ /// FIXME This is a workaround because we don't support certain ARM
+ /// relocation types. This flag should eventually be removed.
+ FKF_Constant = 1 << 3,
};
/// A target specific name for the fixup kind. The names will be unique for
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index bf8dc94bfbf9..77f4125b5d4b 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -2298,7 +2298,10 @@ bool MemorySSAWrapperPass::runOnFunction(Function &F) {
return false;
}
-void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); }
+void MemorySSAWrapperPass::verifyAnalysis() const {
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+}
void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
MSSA->print(OS);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e5bc08b9280a..8ff04797c8d8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16510,33 +16510,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
CombineTo(ST1, ST1->getChain());
return SDValue();
}
-
- // If ST stores to a subset of preceding store's write set, we may be
- // able to fold ST's value into the preceding stored value. As we know
- // the other uses of ST1's chain are unconcerned with ST, this folding
- // will not affect those nodes.
- int64_t BitOffset;
- if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
- BitOffset)) {
- SDValue ChainValue = ST1->getValue();
- if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
- if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = C1->getAPIntValue();
- APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
- // FIXME: Handle Big-endian mode.
- if (!DAG.getDataLayout().isBigEndian()) {
- Val.insertBits(InsertVal, BitOffset);
- SDValue NewSDVal =
- DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
- C1->isTargetOpcode(), C1->isOpaque());
- SDNode *NewST1 = DAG.UpdateNodeOperands(
- ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
- ST1->getOperand(3));
- return CombineTo(ST, SDValue(NewST1, 0));
- }
- }
- }
- } // End ST subset of ST1 case.
}
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0e46f8d68f83..6aed5796acc6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1233,7 +1233,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
-
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
return false;
@@ -1330,10 +1329,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
if (Res.getNode() == N)
return true;
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ const bool IsStrictFp = N->isStrictFPOpcode();
+ assert(Res.getValueType() == N->getValueType(0) &&
+ N->getNumValues() == (IsStrictFp ? 2 : 1) &&
"Invalid operand expansion");
+ LLVM_DEBUG(dbgs() << "Replacing: "; N->dump(&DAG); dbgs() << " with: ";
+ Res.dump());
ReplaceValueWith(SDValue(N, 0), Res);
+ if (IsStrictFp)
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
+
return false;
}
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index 75ec27975564..6f897edb5d60 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -224,6 +224,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
return getBackend().evaluateTargetFixup(*this, Layout, Fixup, DF, Target,
Value, WasForced);
+ unsigned FixupFlags = getBackendPtr()->getFixupKindInfo(Fixup.getKind()).Flags;
bool IsPCRel = getBackendPtr()->getFixupKindInfo(Fixup.getKind()).Flags &
MCFixupKindInfo::FKF_IsPCRel;
@@ -239,8 +240,9 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
if (A->getKind() != MCSymbolRefExpr::VK_None || SA.isUndefined()) {
IsResolved = false;
} else if (auto *Writer = getWriterPtr()) {
- IsResolved = Writer->isSymbolRefDifferenceFullyResolvedImpl(
- *this, SA, *DF, false, true);
+ IsResolved = (FixupFlags & MCFixupKindInfo::FKF_Constant) ||
+ Writer->isSymbolRefDifferenceFullyResolvedImpl(
+ *this, SA, *DF, false, true);
}
}
} else {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d45a80057564..23f05eaad944 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -211,6 +211,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::f16, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
@@ -266,6 +272,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSUB, MVT::f128, Custom);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
@@ -276,17 +284,31 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
// Variable arguments.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
@@ -1235,6 +1257,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
+ case AArch64ISD::STRICT_FCMP: return "AArch64ISD::STRICT_FCMP";
+ case AArch64ISD::STRICT_FCMPE: return "AArch64ISD::STRICT_FCMPE";
case AArch64ISD::DUP: return "AArch64ISD::DUP";
case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
@@ -1668,6 +1692,17 @@ static bool isCMN(SDValue Op, ISD::CondCode CC) {
(CC == ISD::SETEQ || CC == ISD::SETNE);
}
+static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
+ SelectionDAG &DAG, SDValue Chain,
+ bool IsSignaling) {
+ EVT VT = LHS.getValueType();
+ assert(VT != MVT::f128);
+ assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented");
+ unsigned Opcode =
+ IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
+ return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
+}
+
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
@@ -2284,9 +2319,16 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
- SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end());
MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
+ SDValue Result;
+ SDLoc dl(Op);
+ std::tie(Result, Chain) = makeLibCall(DAG, Call, Op.getValueType(), Ops,
+ CallOptions, dl, Chain);
+ return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
}
// Returns true if the given Op is the overflow flag result of an overflow
@@ -2483,21 +2525,26 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getOperand(0).getValueType() != MVT::f128) {
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
+ if (SrcVal.getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
RTLIB::Libcall LC;
- LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+ LC = RTLIB::getFPROUND(SrcVal.getValueType(), Op.getValueType());
// FP_ROUND node has a second operand indicating whether it is known to be
// precise. That doesn't take part in the LibCall so we can't directly use
// LowerF128Call.
- SDValue SrcVal = Op.getOperand(0);
MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions,
- SDLoc(Op)).first;
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ SDValue Result;
+ SDLoc dl(Op);
+ std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
+ CallOptions, dl, Chain);
+ return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
}
SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
@@ -2542,32 +2589,34 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getOperand(0).getValueType().isVector())
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
+
+ if (SrcVal.getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
// f16 conversions are promoted to f32 when full fp16 is not supported.
- if (Op.getOperand(0).getValueType() == MVT::f16 &&
- !Subtarget->hasFullFP16()) {
+ if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
+ assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
- DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
+ DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
}
- if (Op.getOperand(0).getValueType() != MVT::f128) {
+ if (SrcVal.getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
RTLIB::Libcall LC;
- if (Op.getOpcode() == ISD::FP_TO_SINT)
- LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
+ if (Op.getOpcode() == ISD::FP_TO_SINT ||
+ Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), Op.getValueType());
else
- LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
+ LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), Op.getValueType());
- SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
- MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first;
+ return LowerF128Call(Op, DAG, LC);
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -2603,18 +2652,22 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
+
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (Op.getValueType() == MVT::f16 &&
!Subtarget->hasFullFP16()) {
+ assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
SDLoc dl(Op);
return DAG.getNode(
ISD::FP_ROUND, dl, MVT::f16,
- DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
+ DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
DAG.getIntPtrConstant(0, dl));
}
// i128 conversions are libcalls.
- if (Op.getOperand(0).getValueType() == MVT::i128)
+ if (SrcVal.getValueType() == MVT::i128)
return SDValue();
// Other conversions are legal, unless it's to the completely software-based
@@ -2623,10 +2676,11 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
return Op;
RTLIB::Libcall LC;
- if (Op.getOpcode() == ISD::SINT_TO_FP)
- LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+ if (Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP)
+ LC = RTLIB::getSINTTOFP(SrcVal.getValueType(), Op.getValueType());
else
- LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+ LC = RTLIB::getUINTTOFP(SrcVal.getValueType(), Op.getValueType());
return LowerF128Call(Op, DAG, LC);
}
@@ -3104,6 +3158,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::GlobalTLSAddress:
return LowerGlobalTLSAddress(Op, DAG);
case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
return LowerSETCC(Op, DAG);
case ISD::BR_CC:
return LowerBR_CC(Op, DAG);
@@ -3146,6 +3202,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::FDIV:
return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND:
return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);
@@ -3190,9 +3247,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerPREFETCH(Op, DAG);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
return LowerFP_TO_INT(Op, DAG);
case ISD::FSINCOS:
return LowerFSINCOS(Op, DAG);
@@ -5154,9 +5215,15 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVSETCC(Op, DAG);
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ bool IsStrict = Op->isStrictFPOpcode();
+ bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Chain;
+ if (IsStrict)
+ Chain = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(OpNo + 0);
+ SDValue RHS = Op.getOperand(OpNo + 1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
SDLoc dl(Op);
// We chose ZeroOrOneBooleanContents, so use zero and one.
@@ -5167,13 +5234,14 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Handle f128 first, since one possible outcome is a normal integer
// comparison which gets picked up by the next if statement.
if (LHS.getValueType() == MVT::f128) {
- softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
+ IsSignaling);
// If softenSetCCOperands returned a scalar, use it.
if (!RHS.getNode()) {
assert(LHS.getValueType() == Op.getValueType() &&
"Unexpected setcc expansion!");
- return LHS;
+ return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
}
}
@@ -5185,7 +5253,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
// matched to a single CSINC instruction.
- return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
+ SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
+ return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
// Now we know we're dealing with FP values.
@@ -5194,10 +5263,15 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
// and do the comparison.
- SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+ SDValue Cmp;
+ if (IsStrict)
+ Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
+ else
+ Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
+ SDValue Res;
if (CC2 == AArch64CC::AL) {
changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
CC2);
@@ -5206,7 +5280,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
// matched to a single CSINC instruction.
- return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
+ Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
} else {
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
// totally clean. Some of them require two CSELs to implement. As is in
@@ -5219,8 +5293,9 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
- return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
+ Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
+ return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
}
SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 672dfc4fcbc0..5ec453e274dc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -241,6 +241,10 @@ enum NodeType : unsigned {
SST1_SXTW_SCALED,
SST1_IMM,
+ // Strict (exception-raising) floating point comparison
+ STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+ STRICT_FCMPE,
+
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index c3efe03a0987..11ba69878847 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -4702,11 +4702,11 @@ class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType,
multiclass FPConversion<string asm> {
// Double-precision to Half-precision
def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm,
- [(set FPR16:$Rd, (fpround FPR64:$Rn))]>;
+ [(set FPR16:$Rd, (any_fpround FPR64:$Rn))]>;
// Double-precision to Single-precision
def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm,
- [(set FPR32:$Rd, (fpround FPR64:$Rn))]>;
+ [(set FPR32:$Rd, (any_fpround FPR64:$Rn))]>;
// Half-precision to Double-precision
def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm,
@@ -4722,7 +4722,7 @@ multiclass FPConversion<string asm> {
// Single-precision to Half-precision
def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm,
- [(set FPR16:$Rd, (fpround FPR32:$Rn))]>;
+ [(set FPR16:$Rd, (any_fpround FPR32:$Rn))]>;
}
//---
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d590d4d913ff..1e3bf299b265 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -419,7 +419,14 @@ def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
-def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
+def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
+def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
+ [SDNPHasChain]>;
+def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
+ [SDNPHasChain]>;
+def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
+ [(AArch64strict_fcmp node:$lhs, node:$rhs),
+ (AArch64fcmp node:$lhs, node:$rhs)]>;
def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
@@ -3300,10 +3307,10 @@ defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns
defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
-defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
-defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
-defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
-defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
+defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
+defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
@@ -3375,8 +3382,8 @@ def : Pat<(i64 (llround f64:$Rn)),
// Scaled integer to floating point conversion instructions.
//===----------------------------------------------------------------------===//
-defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>;
-defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
+defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
+defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
//===----------------------------------------------------------------------===//
// Unscaled integer to floating point conversion instruction.
@@ -3541,8 +3548,8 @@ def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))),
// Floating point comparison instructions.
//===----------------------------------------------------------------------===//
-defm FCMPE : FPComparison<1, "fcmpe">;
-defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>;
+defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
+defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>;
//===----------------------------------------------------------------------===//
// Floating point conditional comparison instructions.
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 05c81feb23ec..c4f511abc4ae 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -244,11 +244,6 @@ static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
return OS;
}
-static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) {
- Operand.print(OS);
- return OS;
-}
-
LLVM_DUMP_METHOD
void SDWASrcOperand::print(raw_ostream& OS) const {
OS << "SDWA src: " << *getTargetOperand()
@@ -850,6 +845,13 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
return std::unique_ptr<SDWAOperand>(nullptr);
}
+#if !defined(NDEBUG)
+static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) {
+ Operand.print(OS);
+ return OS;
+}
+#endif
+
void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
for (MachineInstr &MI : MBB) {
if (auto Operand = matchSDWAOperand(MI)) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1e6f7d889201..66f3f418d06c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1354,6 +1354,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
+
+ // Strict floating-point comparisons need custom lowering.
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
}
// Use __sincos_stret if available.
@@ -1552,7 +1560,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMN: return "ARMISD::CMN";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPE: return "ARMISD::CMPFPE";
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::CMPFPEw0: return "ARMISD::CMPFPEw0";
case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
@@ -4344,13 +4354,16 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const SDLoc &dl) const {
+ SelectionDAG &DAG, const SDLoc &dl,
+ bool Signaling) const {
assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
if (!isFloatingPointZero(RHS))
- Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
+ dl, MVT::Glue, LHS, RHS);
else
- Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
+ dl, MVT::Glue, LHS);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
@@ -5408,7 +5421,12 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
// FIXME: Remove this when we have strict fp instruction selection patterns
if (IsStrict) {
- DAG.mutateStrictFPToFP(Op.getNode());
+ SDLoc Loc(Op);
+ SDValue Result =
+ DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT
+ : ISD::FP_TO_UINT,
+ Loc, Op.getValueType(), SrcVal);
+ return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
}
return Op;
@@ -9222,6 +9240,51 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
Results.push_back(SDValue(CmpSwap, 2));
}
+SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ SDValue Chain = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
+ bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
+
+ // If we don't have instructions of this float type then soften to a libcall
+ // and use SETCC instead.
+ if (isUnsupportedFloatingType(LHS.getValueType())) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(
+ DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, dl, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
+ DAG.getCondCode(CC));
+ return DAG.getMergeValues({Result, Chain}, dl);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
+ // in CMPFP and CMPFPE, but instead it should be made explicit by these
+ // instructions using a chain instead of glue. This would also fix the problem
+ // here (and also in LowerSELECT_CC) where we generate two comparisons when
+ // CondCode2 != AL.
+ SDValue True = DAG.getConstant(1, dl, VT);
+ SDValue False = DAG.getConstant(0, dl, VT);
+ SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
+ SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
+ if (CondCode2 != ARMCC::AL) {
+ ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
+ Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
+ Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
+ }
+ return DAG.getMergeValues({Result, Chain}, dl);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -9315,6 +9378,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
}
}
@@ -16320,6 +16385,18 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
"With FP16, 16 to 32 conversion is legal!");
+ // Converting from 32 -> 64 is valid if we have FP64.
+ if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
+ // FIXME: Remove this when we have strict fp instruction selection patterns
+ if (IsStrict) {
+ SDLoc Loc(Op);
+ SDValue Result = DAG.getNode(ISD::FP_EXTEND,
+ Loc, Op.getValueType(), SrcVal);
+ return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
+ }
+ return Op;
+ }
+
// Either we are converting from 16 -> 64, without FP16 and/or
// FP.double-precision or without Armv8-fp. So we must do it in two
// steps.
@@ -16815,7 +16892,7 @@ bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
}
bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
- return !Subtarget->hasMinSize();
+ return !Subtarget->hasMinSize() || Subtarget->isTargetWindows();
}
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index cc74e5d875d8..6061a65d3b89 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -84,7 +84,9 @@ class VectorType;
CMN, // ARM CMN instructions.
CMPZ, // ARM compare that sets only Z flag.
CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR.
CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets FPSCR.
FMSTAT, // ARM fmstat instruction.
CMOV, // ARM conditional move instructions.
@@ -729,6 +731,7 @@ class VectorType;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
@@ -817,7 +820,7 @@ class VectorType;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- const SDLoc &dl) const;
+ const SDLoc &dl, bool Signaling = false) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index a41a483d1a4c..f1d1d8a89164 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -21,6 +21,8 @@ def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfpe0: SDNode<"ARMISD::CMPFPEw0",SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>;
@@ -548,12 +550,12 @@ let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
- [/* For disassembly only; pattern left blank */]>;
+ [(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -562,7 +564,7 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [(arm_cmpfpe HPR:$Sd, HPR:$Sm)]>;
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
@@ -611,7 +613,7 @@ let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfpe0 (f64 DPR:$Dd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -619,7 +621,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfpe0 SPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -631,7 +633,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfpe0 HPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 6196881a9b8f..062d1d36c43c 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -55,31 +55,29 @@ Optional<MCFixupKind> ARMAsmBackend::getFixupKind(StringRef Name) const {
}
const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ unsigned IsPCRelConstant =
+ MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_Constant;
const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined in
// ARMFixupKinds.h.
//
// Name Offset (bits) Size (bits) Flags
- {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_ldst_pcrel_12", 0, 32, IsPCRelConstant},
{"fixup_t2_ldst_pcrel_12", 0, 32,
- MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
- {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
- {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_10_unscaled", 0, 32, IsPCRelConstant},
+ {"fixup_arm_pcrel_10", 0, 32, IsPCRelConstant},
{"fixup_t2_pcrel_10", 0, 32,
MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_t2_pcrel_9", 0, 32,
- MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{"fixup_thumb_adr_pcrel_10", 0, 8,
- MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
- {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_adr_pcrel_12", 0, 32, IsPCRelConstant},
{"fixup_t2_adr_pcrel_12", 0, 32,
- MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 1d41994ef1e3..207742520ed6 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -156,13 +156,6 @@ const uint32_t *
RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF,
CallingConv::ID /*CC*/) const {
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
- if (MF.getFunction().hasFnAttribute("interrupt")) {
- if (Subtarget.hasStdExtD())
- return CSR_XLEN_F64_Interrupt_RegMask;
- if (Subtarget.hasStdExtF())
- return CSR_XLEN_F32_Interrupt_RegMask;
- return CSR_Interrupt_RegMask;
- }
switch (Subtarget.getTargetABI()) {
default:
diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp
index fe43bf4cbbce..fe5cb3ae2bf6 100644
--- a/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -364,12 +364,13 @@ bool X86CmovConverterPass::collectCmovCandidates(
/// \param TrueOpDepth depth cost of CMOV true value operand.
/// \param FalseOpDepth depth cost of CMOV false value operand.
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) {
- //===--------------------------------------------------------------------===//
- // With no info about branch weight, we assume 50% for each value operand.
- // Thus, depth of optimized CMOV instruction is the rounded up average of
- // its True-Operand-Value-Depth and False-Operand-Value-Depth.
- //===--------------------------------------------------------------------===//
- return (TrueOpDepth + FalseOpDepth + 1) / 2;
+ // The depth of the result after branch conversion is
+ // TrueOpDepth * TrueOpProbability + FalseOpDepth * FalseOpProbability.
+ // As we have no info about branch weight, we assume 75% for one and 25% for
+ // the other, and pick the result with the largest resulting depth.
+ return std::max(
+ divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
+ divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
}
bool X86CmovConverterPass::checkForProfitableCmovCandidates(
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index f38dc436722d..e49e6cec65c0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3494,7 +3494,8 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
Instruction *NarrowestShift = XShift;
Type *WidestTy = WidestShift->getType();
- assert(NarrowestShift->getType() == I.getOperand(0)->getType() &&
+ Type *NarrowestTy = NarrowestShift->getType();
+ assert(NarrowestTy == I.getOperand(0)->getType() &&
"We did not look past any shifts while matching XShift though.");
bool HadTrunc = WidestTy != I.getOperand(0)->getType();
@@ -3533,6 +3534,23 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
if (XShAmt->getType() != YShAmt->getType())
return nullptr;
+ // As input, we have the following pattern:
+ // icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
+ // We want to rewrite that as:
+ // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x)
+ // While we know that originally (Q+K) would not overflow
+ // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
+ // shift amounts. so it may now overflow in smaller bitwidth.
+ // To ensure that does not happen, we need to ensure that the total maximal
+ // shift amount is still representable in that smaller bit width.
+ unsigned MaximalPossibleTotalShiftAmount =
+ (WidestTy->getScalarSizeInBits() - 1) +
+ (NarrowestTy->getScalarSizeInBits() - 1);
+ APInt MaximalRepresentableShiftAmount =
+ APInt::getAllOnesValue(XShAmt->getType()->getScalarSizeInBits());
+ if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
+ return nullptr;
+
// Can we fold (XShAmt+YShAmt) ?
auto *NewShAmt = dyn_cast_or_null<Constant>(
SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index fbff5dd4a8cd..739579e2d38e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -23,8 +23,11 @@ using namespace PatternMatch;
// Given pattern:
// (x shiftopcode Q) shiftopcode K
// we should rewrite it as
-// x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x)
-// This is valid for any shift, but they must be identical.
+// x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) and
+//
+// This is valid for any shift, but they must be identical, and we must be
+// careful in case we have (zext(Q)+zext(K)) and look past extensions,
+// (Q+K) must not overflow or else (Q+K) u< bitwidth(x) is bogus.
//
// AnalyzeForSignBitExtraction indicates that we will only analyze whether this
// pattern has any 2 right-shifts that sum to 1 less than original bit width.
@@ -58,6 +61,23 @@ Value *InstCombiner::reassociateShiftAmtsOfTwoSameDirectionShifts(
if (ShAmt0->getType() != ShAmt1->getType())
return nullptr;
+ // As input, we have the following pattern:
+ // Sh0 (Sh1 X, Q), K
+ // We want to rewrite that as:
+ // Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
+ // While we know that originally (Q+K) would not overflow
+ // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
+ // shift amounts. so it may now overflow in smaller bitwidth.
+ // To ensure that does not happen, we need to ensure that the total maximal
+ // shift amount is still representable in that smaller bit width.
+ unsigned MaximalPossibleTotalShiftAmount =
+ (Sh0->getType()->getScalarSizeInBits() - 1) +
+ (Sh1->getType()->getScalarSizeInBits() - 1);
+ APInt MaximalRepresentableShiftAmount =
+ APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
+ if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
+ return nullptr;
+
// We are only looking for signbit extraction if we have two right shifts.
bool HadTwoRightShifts = match(Sh0, m_Shr(m_Value(), m_Value())) &&
match(Sh1, m_Shr(m_Value(), m_Value()));
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 8c33045c2380..a1c012fddde3 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -790,41 +790,6 @@ public:
};
} // namespace
-
-/// Return true if we know how to rewrite all uses of the given alloca after
-/// hoisting it out of the loop. The main concerns are a) potential captures
-/// and b) invariant.start markers which don't capture, but are no longer
-/// valid w/o a corresponding invariant.end.
-static bool canRewriteUsesOfAlloca(AllocaInst &AI) {
- // TODO: This looks a lot like capture tracking, but we need to remove any
- // invariant starts if we extend the lifetime of the alloca by hoisting it.
- // We should probably refactor capture tracking into a form which allows us
- // to reuse the relevant bits and remove the duplicated logic here.
-
- SmallVector<Use *, 16> Worklist;
- for (Use &U : AI.uses())
- Worklist.push_back(&U);
-
- unsigned NumUsesExplored = 0;
- while (!Worklist.empty()) {
- Use *U = Worklist.pop_back_val();
- Instruction *I = cast<Instruction>(U->getUser());
- NumUsesExplored++;
- if (NumUsesExplored > DefaultMaxUsesToExplore)
- return false;
- // Non capturing, terminating uses
- if (isa<LoadInst>(I) ||
- (isa<StoreInst>(I) && U->getOperandNo() == 1))
- continue;
- // Non capturing, non-terminating
- if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I))
- return false;
- for (Use &U : I->uses())
- Worklist.push_back(&U);
- }
- return true;
-}
-
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
@@ -945,16 +910,6 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
- if (isa<AllocaInst>(&I) &&
- SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
- canRewriteUsesOfAlloca(cast<AllocaInst>(I))) {
- hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
- MSSAU, SE, ORE);
- HoistedInstructions.push_back(&I);
- Changed = true;
- continue;
- }
-
if (PHINode *PN = dyn_cast<PHINode>(&I)) {
if (CFH.canHoistPHI(PN)) {
// Redirect incoming blocks first to ensure that we create hoisted
@@ -1537,7 +1492,8 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
return false;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *BBPred = *PI;
- if (isa<IndirectBrInst>(BBPred->getTerminator()))
+ if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
+ isa<CallBrInst>(BBPred->getTerminator()))
return false;
}
return true;
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 0868e742f4ee..67c20b2edae8 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -81,10 +81,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
+ if (EnableMSSALoopDependency)
AU.addPreserved<MemorySSAWrapperPass>();
- }
getLoopAnalysisUsage(AU);
}
@@ -101,8 +99,11 @@ public:
const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
Optional<MemorySSAUpdater> MSSAU;
if (EnableMSSALoopDependency) {
- MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
+ // Not requiring MemorySSA and getting it only if available will split
+ // the loop pass pipeline when LoopRotate is being run first.
+ auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ if (MSSAA)
+ MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
}
return LoopRotation(L, LI, TTI, AC, &DT, &SE,
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index c9eb4abfa21a..9a7379e27ed6 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -505,7 +505,8 @@ llvm::SplitAllCriticalEdges(Function &F,
unsigned NumBroken = 0;
for (BasicBlock &BB : F) {
Instruction *TI = BB.getTerminator();
- if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI) &&
+ !isa<CallBrInst>(TI))
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (SplitCriticalEdge(TI, i, Options))
++NumBroken;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 479bca83b51e..26cae4134ebc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -832,13 +832,12 @@ public:
// Extracts from consecutive indexes of the same vector better score as
// the extracts could be optimized away.
- auto *Ex1 = dyn_cast<ExtractElementInst>(V1);
- auto *Ex2 = dyn_cast<ExtractElementInst>(V2);
- if (Ex1 && Ex2 && Ex1->getVectorOperand() == Ex2->getVectorOperand() &&
- cast<ConstantInt>(Ex1->getIndexOperand())->getZExtValue() + 1 ==
- cast<ConstantInt>(Ex2->getIndexOperand())->getZExtValue()) {
+ Value *EV;
+ ConstantInt *Ex1Idx, *Ex2Idx;
+ if (match(V1, m_ExtractElement(m_Value(EV), m_ConstantInt(Ex1Idx))) &&
+ match(V2, m_ExtractElement(m_Deferred(EV), m_ConstantInt(Ex2Idx))) &&
+ Ex1Idx->getZExtValue() + 1 == Ex2Idx->getZExtValue())
return VLOperands::ScoreConsecutiveExtracts;
- }
auto *I1 = dyn_cast<Instruction>(V1);
auto *I2 = dyn_cast<Instruction>(V2);
diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp
index dd3db7c150ba..c392651180b6 100644
--- a/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/llvm/utils/TableGen/DFAEmitter.cpp
@@ -53,14 +53,14 @@ void DfaEmitter::addTransition(state_type From, state_type To, action_type A) {
++NumNfaTransitions;
}
-void DfaEmitter::visitDfaState(DfaState DS) {
+void DfaEmitter::visitDfaState(const DfaState &DS) {
// For every possible action...
auto FromId = DfaStates.idFor(DS);
for (action_type A : Actions) {
DfaState NewStates;
DfaTransitionInfo TI;
// For every represented state, word pair in the original NFA...
- for (state_type &FromState : DS) {
+ for (state_type FromState : DS) {
// If this action is possible from this state add the transitioned-to
// states to NewStates.
auto I = NfaTransitions.find({FromState, A});
@@ -90,8 +90,11 @@ void DfaEmitter::constructDfa() {
// Note that UniqueVector starts indices at 1, not zero.
unsigned DfaStateId = 1;
- while (DfaStateId <= DfaStates.size())
- visitDfaState(DfaStates[DfaStateId++]);
+ while (DfaStateId <= DfaStates.size()) {
+ DfaState S = DfaStates[DfaStateId];
+ visitDfaState(S);
+ DfaStateId++;
+ }
}
void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
diff --git a/llvm/utils/TableGen/DFAEmitter.h b/llvm/utils/TableGen/DFAEmitter.h
index 76de8f72cd88..f7724ce06bac 100644
--- a/llvm/utils/TableGen/DFAEmitter.h
+++ b/llvm/utils/TableGen/DFAEmitter.h
@@ -99,7 +99,7 @@ private:
void constructDfa();
/// Visit a single DFA state and construct all possible transitions to new DFA
/// states.
- void visitDfaState(DfaState DS);
+ void visitDfaState(const DfaState &DS);
};
} // namespace llvm