aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64ISelLowering.cpp')
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp305
1 files changed, 220 insertions, 85 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4ddc95199d4c..a7c98fbb425f 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -91,6 +91,7 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
+STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
@@ -105,6 +106,12 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
+static cl::opt<bool>
+EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
+ cl::desc("Enable AArch64 logical imm instruction "
+ "optimization"),
+ cl::init(true));
+
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
@@ -787,6 +794,140 @@ EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
return VT.changeVectorElementTypeToInteger();
}
+static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
+ const APInt &Demanded,
+ TargetLowering::TargetLoweringOpt &TLO,
+ unsigned NewOpc) {
+ uint64_t OldImm = Imm, NewImm, Enc;
+ uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
+
+ // Return if the immediate is already all zeros, all ones, a bimm32 or a
+ // bimm64.
+ if (Imm == 0 || Imm == Mask ||
+ AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
+ return false;
+
+ unsigned EltSize = Size;
+ uint64_t DemandedBits = Demanded.getZExtValue();
+
+ // Clear bits that are not demanded.
+ Imm &= DemandedBits;
+
+ while (true) {
+ // The goal here is to set the non-demanded bits in a way that minimizes
+ // the number of switching between 0 and 1. In order to achieve this goal,
+ // we set the non-demanded bits to the value of the preceding demanded bits.
+ // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
+ // non-demanded bit), we copy bit0 (1) to the least significant 'x',
+ // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
+ // The final result is 0b11000011.
+ uint64_t NonDemandedBits = ~DemandedBits;
+ uint64_t InvertedImm = ~Imm & DemandedBits;
+ uint64_t RotatedImm =
+ ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
+ NonDemandedBits;
+ uint64_t Sum = RotatedImm + NonDemandedBits;
+ bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
+ uint64_t Ones = (Sum + Carry) & NonDemandedBits;
+ NewImm = (Imm | Ones) & Mask;
+
+ // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
+ // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
+ // we halve the element size and continue the search.
+ if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
+ break;
+
+ // We cannot shrink the element size any further if it is 2-bits.
+ if (EltSize == 2)
+ return false;
+
+ EltSize /= 2;
+ Mask >>= EltSize;
+ uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
+
+ // Return if there is mismatch in any of the demanded bits of Imm and Hi.
+ if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
+ return false;
+
+ // Merge the upper and lower halves of Imm and DemandedBits.
+ Imm |= Hi;
+ DemandedBits |= DemandedBitsHi;
+ }
+
+ ++NumOptimizedImms;
+
+ // Replicate the element across the register width.
+ while (EltSize < Size) {
+ NewImm |= NewImm << EltSize;
+ EltSize *= 2;
+ }
+
+ (void)OldImm;
+ assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
+ "demanded bits should never be altered");
+ assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
+
+ // Create the new constant immediate node.
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ // If the new constant immediate is all-zeros or all-ones, let the target
+ // independent DAG combine optimize this node.
+ if (NewImm == 0 || NewImm == OrigMask)
+ return TLO.CombineTo(Op.getOperand(1), TLO.DAG.getConstant(NewImm, DL, VT));
+
+ // Otherwise, create a machine node so that target independent DAG combine
+ // doesn't undo this optimization.
+ Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
+ SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
+ SDValue New(
+ TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
+
+ return TLO.CombineTo(Op, New);
+}
+
+bool AArch64TargetLowering::targetShrinkDemandedConstant(
+ SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
+ // Delay this optimization to as late as possible.
+ if (!TLO.LegalOps)
+ return false;
+
+ if (!EnableOptimizeLogicalImm)
+ return false;
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return false;
+
+ unsigned Size = VT.getSizeInBits();
+ assert((Size == 32 || Size == 64) &&
+ "i32 or i64 is expected after legalization.");
+
+ // Exit early if we demand all bits.
+ if (Demanded.countPopulation() == Size)
+ return false;
+
+ unsigned NewOpc;
+ switch (Op.getOpcode()) {
+ default:
+ return false;
+ case ISD::AND:
+ NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
+ break;
+ case ISD::OR:
+ NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
+ break;
+ case ISD::XOR:
+ NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
+ break;
+ }
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C)
+ return false;
+ uint64_t Imm = C->getZExtValue();
+ return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
+}
+
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
/// Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
@@ -3418,11 +3559,75 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Other Lowering Code
//===----------------------------------------------------------------------===//
+SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
+}
+
+SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
+}
+
+SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ N->getOffset(), Flag);
+}
+
+SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
+}
+
+// (loadGOT sym)
+template <class NodeTy>
+SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const {
+ DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT);
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes instead of using a wrapper node.
+ return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
+}
+
+// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
+template <class NodeTy>
+SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG)
+ const {
+ DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ const unsigned char MO_NC = AArch64II::MO_NC;
+ return DAG.getNode(
+ AArch64ISD::WrapperLarge, DL, Ty,
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G3),
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC),
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC),
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC));
+}
+
+// (addlow (adrp %hi(sym)) %lo(sym))
+template <class NodeTy>
+SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const {
+ DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE);
+ SDValue Lo = getTargetNode(N, Ty, DAG,
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
+ return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
+}
+
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc DL(Op);
- const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+ GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
@@ -3430,32 +3635,15 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node");
- // This also catched the large code model case for Darwin.
+ // This also catches the large code model case for Darwin.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
- SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
- // FIXME: Once remat is capable of dealing with instructions with register
- // operands, expand this into two nodes instead of using a wrapper node.
- return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
+ return getGOT(GN, DAG);
}
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
- const unsigned char MO_NC = AArch64II::MO_NC;
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
+ return getAddrLarge(GN, DAG);
} else {
- // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and
- // the only correct model on Darwin.
- SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
- OpFlags | AArch64II::MO_PAGE);
- unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
- SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags);
-
- SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ return getAddr(GN, DAG);
}
}
@@ -4232,90 +4420,37 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
- const unsigned char MO_NC = AArch64II::MO_NC;
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
- AArch64II::MO_G0 | MO_NC));
+ return getAddrLarge(JT, DAG);
}
-
- SDValue Hi =
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE);
- SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ return getAddr(JT, DAG);
}
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
// Use the GOT for the large code model on iOS.
if (Subtarget->isTargetMachO()) {
- SDValue GotAddr = DAG.getTargetConstantPool(
- CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
- AArch64II::MO_GOT);
- return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
+ return getGOT(CP, DAG);
}
-
- const unsigned char MO_NC = AArch64II::MO_NC;
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_G3),
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_G2 | MO_NC),
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_G1 | MO_NC),
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_G0 | MO_NC));
+ return getAddrLarge(CP, DAG);
} else {
- // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on
- // ELF, the only valid one on Darwin.
- SDValue Hi =
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_PAGE);
- SDValue Lo = DAG.getTargetConstantPool(
- CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ return getAddr(CP, DAG);
}
}
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc DL(Op);
+ BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
- const unsigned char MO_NC = AArch64II::MO_NC;
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
+ return getAddrLarge(BA, DAG);
} else {
- SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE);
- SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
- SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ return getAddr(BA, DAG);
}
}