summaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp198
1 files changed, 163 insertions, 35 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index afba1587a743f..32b7c87e61bb9 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -608,15 +608,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
// a __gnu_ prefix (which is the default).
if (Subtarget->isTargetAEABI()) {
- setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h");
- setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h");
- setLibcallName(RTLIB::FPEXT_F16_F32, "__aeabi_h2f");
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
+ { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
+ { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
}
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
addRegisterClass(MVT::i32, &ARM::GPRRegClass);
+
if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
@@ -976,6 +988,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
+
// Register based DivRem for AEABI (RTABI 4.2)
if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
@@ -984,29 +997,49 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UREM, MVT::i64, Custom);
HasStandaloneRem = false;
- for (const auto &LC :
- {RTLIB::SDIVREM_I8, RTLIB::SDIVREM_I16, RTLIB::SDIVREM_I32})
- setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_sdiv"
- : "__aeabi_idivmod");
- setLibcallName(RTLIB::SDIVREM_I64, Subtarget->isTargetWindows()
- ? "__rt_sdiv64"
- : "__aeabi_ldivmod");
- for (const auto &LC :
- {RTLIB::UDIVREM_I8, RTLIB::UDIVREM_I16, RTLIB::UDIVREM_I32})
- setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_udiv"
- : "__aeabi_uidivmod");
- setLibcallName(RTLIB::UDIVREM_I64, Subtarget->isTargetWindows()
- ? "__rt_udiv64"
- : "__aeabi_uldivmod");
-
- setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
+ if (Subtarget->isTargetWindows()) {
+ const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
+
+ { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
+ } else {
+ const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
+
+ { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
+ }
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
@@ -3305,11 +3338,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
- case Intrinsic::arm_rbit: {
- assert(Op.getOperand(1).getValueType() == MVT::i32 &&
- "RBIT intrinsic must have i32 type!");
- return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
- }
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
@@ -9232,12 +9260,102 @@ SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
return SDValue();
}
-// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
-// (only after legalization).
-static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+static bool IsVUZPShuffleNode(SDNode *N) {
+ // VUZP shuffle node.
+ if (N->getOpcode() == ARMISD::VUZP)
+ return true;
+
+ // "VUZP" on i32 is an alias for VTRN.
+ if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
+ return true;
+
+ return false;
+}
+
+static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
+ // Look for ADD(VUZP.0, VUZP.1).
+ if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
+ N0 == N1)
+ return SDValue();
+
+ // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
+ if (!N->getValueType(0).is64BitVector())
+ return SDValue();
+ // Generate vpadd.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc dl(N);
+ SDNode *Unzip = N0.getNode();
+ EVT VT = N->getValueType(0);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Ops.push_back(Unzip->getOperand(0));
+ Ops.push_back(Unzip->getOperand(1));
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
+}
+
+static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Check for two extended operands.
+ if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N1.getOpcode() == ISD::SIGN_EXTEND) &&
+ !(N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOpcode() == ISD::ZERO_EXTEND))
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N10 = N1.getOperand(0);
+
+ // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
+ if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
+ N00 == N10)
+ return SDValue();
+
+ // We only recognize Q register paddl here; this can't be reached until
+ // after type legalization.
+ if (!N00.getValueType().is64BitVector() ||
+ !N0.getValueType().is128BitVector())
+ return SDValue();
+
+ // Generate vpaddl.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ SmallVector<SDValue, 8> Ops;
+ // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
+ unsigned Opcode;
+ if (N0.getOpcode() == ISD::SIGN_EXTEND)
+ Opcode = Intrinsic::arm_neon_vpaddls;
+ else
+ Opcode = Intrinsic::arm_neon_vpaddlu;
+ Ops.push_back(DAG.getConstant(Opcode, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ EVT ElemTy = N00.getValueType().getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
+ N00.getOperand(0), N00.getOperand(1));
+ Ops.push_back(Concat);
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
+}
+
+// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
+// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
+// much easier to match.
+static SDValue
+AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// Only perform optimization if after legalize, and if NEON is available. We
// also expected both operands to be BUILD_VECTORs.
if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
@@ -9293,6 +9411,10 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+ // Don't generate vpaddl+vmovn; we'll match it to vpadd later.
+ if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
+ return SDValue();
+
// Create VPADDL node.
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -9564,9 +9686,15 @@ static SDValue PerformADDCCombine(SDNode *N,
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget){
+ // Attempt to create vpadd for this add.
+ if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
+ return Result;
// Attempt to create vpaddl for this add.
- if (SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget))
+ if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
+ return Result;
+ if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
+ Subtarget))
return Result;
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))