summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-01-06 20:01:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-01-06 20:01:02 +0000
commit8a6c1c25bce0267ee4072bd7b786b921e8a66a35 (patch)
treeea70b740d40cffe568a990c7aecd1acb5f83f786 /lib/Target/AArch64
parent84fe440ded1bfc237d720c49408b36798d67ceff (diff)
Notes
Diffstat (limited to 'lib/Target/AArch64')
-rw-r--r--lib/Target/AArch64/AArch64.td10
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp8
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp104
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp26
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h10
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp4
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h12
7 files changed, 137 insertions, 37 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 0bff9b592c15..46ef2c111bae 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -124,6 +124,14 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeaturePerfMon,
FeatureZCRegMove, FeatureZCZeroing]>;
+def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
+ "Samsung Exynos-M1 processors",
+ [FeatureFPARMv8,
+ FeatureNEON,
+ FeatureCrypto,
+ FeatureCRC,
+ FeaturePerfMon]>;
+
def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureNEON,
FeatureCRC,
@@ -136,6 +144,8 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
+// FIXME: Exynos-M1 is currently modelled without a specific SchedModel.
+def : ProcessorModel<"exynos-m1", NoSchedModel, [ProcExynosM1]>;
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 79a84ad8c6c5..3d1ab4e3fc2b 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -158,7 +158,7 @@ INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
"AArch64 A57 FP Load-Balancing", false, false)
namespace {
-/// A Chain is a sequence of instructions that are linked together by
+/// A Chain is a sequence of instructions that are linked together by
/// an accumulation operand. For example:
///
/// fmul d0<def>, ?
@@ -285,7 +285,7 @@ public:
std::string str() const {
std::string S;
raw_string_ostream OS(S);
-
+
OS << "{";
StartInst->print(OS, /* SkipOpers= */true);
OS << " -> ";
@@ -427,7 +427,7 @@ Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
return Ch;
}
}
-
+
// Bailout case - just return the first item.
Chain *Ch = L.front();
L.erase(L.begin());
@@ -495,7 +495,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
RS.enterBasicBlock(&MBB);
RS.forward(MachineBasicBlock::iterator(G->getStart()));
- // Can we find an appropriate register that is available throughout the life
+ // Can we find an appropriate register that is available throughout the life
// of the chain?
unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9f5beff12100..4ecfbe9e2280 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2426,7 +2426,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
continue;
}
-
+
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
@@ -5074,7 +5074,7 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
// The index of an EXT is the first element if it is not UNDEF.
// Watch out for the beginning UNDEFs. The EXT index should be the expected
- // value of the first element. E.g.
+ // value of the first element. E.g.
// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
// <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
// ExpectedElt is the last mask index plus 1.
@@ -9491,6 +9491,103 @@ static SDValue performBRCONDCombine(SDNode *N,
return SDValue();
}
+// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
+// as well as whether the test should be inverted. This code is required to
+// catch these cases (as opposed to standard dag combines) because
+// AArch64ISD::TBZ is matched during legalization.
+static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
+ SelectionDAG &DAG) {
+
+ if (!Op->hasOneUse())
+ return Op;
+
+ // We don't handle undef/constant-fold cases below, as they should have
+ // already been taken care of (e.g. and of 0, test of undefined shifted bits,
+ // etc.)
+
+ // (tbz (trunc x), b) -> (tbz x, b)
+ // This case is just here to enable more of the below cases to be caught.
+ if (Op->getOpcode() == ISD::TRUNCATE &&
+ Bit < Op->getValueType(0).getSizeInBits()) {
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ }
+
+ if (Op->getNumOperands() != 2)
+ return Op;
+
+ auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!C)
+ return Op;
+
+ switch (Op->getOpcode()) {
+ default:
+ return Op;
+
+ // (tbz (and x, m), b) -> (tbz x, b)
+ case ISD::AND:
+ if ((C->getZExtValue() >> Bit) & 1)
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ return Op;
+
+ // (tbz (shl x, c), b) -> (tbz x, b-c)
+ case ISD::SHL:
+ if (C->getZExtValue() <= Bit &&
+ (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
+ Bit = Bit - C->getZExtValue();
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ }
+ return Op;
+
+ // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
+ case ISD::SRA:
+ Bit = Bit + C->getZExtValue();
+ if (Bit >= Op->getValueType(0).getSizeInBits())
+ Bit = Op->getValueType(0).getSizeInBits() - 1;
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+
+ // (tbz (srl x, c), b) -> (tbz x, b+c)
+ case ISD::SRL:
+ if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
+ Bit = Bit + C->getZExtValue();
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ }
+ return Op;
+
+ // (tbz (xor x, -1), b) -> (tbnz x, b)
+ case ISD::XOR:
+ if ((C->getZExtValue() >> Bit) & 1)
+ Invert = !Invert;
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ }
+}
+
+// Optimize test single bit zero/non-zero and branch.
+static SDValue performTBZCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ bool Invert = false;
+ SDValue TestSrc = N->getOperand(1);
+ SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
+
+ if (TestSrc == NewTestSrc)
+ return SDValue();
+
+ unsigned NewOpc = N->getOpcode();
+ if (Invert) {
+ if (NewOpc == AArch64ISD::TBZ)
+ NewOpc = AArch64ISD::TBNZ;
+ else {
+ assert(NewOpc == AArch64ISD::TBNZ);
+ NewOpc = AArch64ISD::TBZ;
+ }
+ }
+
+ SDLoc DL(N);
+ return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
+ DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
+}
+
// vselect (v1i1 setcc) ->
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
@@ -9642,6 +9739,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
+ case AArch64ISD::TBNZ:
+ case AArch64ISD::TBZ:
+ return performTBZCombine(N, DCI, DAG);
case AArch64ISD::CSEL:
return performCONDCombine(N, DCI, DAG, 2, 3);
case AArch64ISD::DUP:
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 566aa2c9a9ba..43664df3b861 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -613,21 +613,6 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}
-// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
-static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
- MachineInstr *Op1) {
- assert(MI->memoperands_empty() && "expected a new machineinstr");
- size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) +
- (Op1->memoperands_end() - Op1->memoperands_begin());
-
- MachineFunction *MF = MI->getParent()->getParent();
- MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
- MachineSDNode::mmo_iterator MemEnd =
- std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
- MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
- MI->setMemRefs(MemBegin, MemEnd);
-}
-
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
@@ -692,10 +677,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
TII->get(NewOpc))
.addOperand(getLdStRegOp(RtNewDest))
.addOperand(BaseRegOp)
- .addImm(OffsetImm);
-
- // Copy MachineMemOperands from the original loads.
- concatenateMemOperands(NewMemMI, I, Paired);
+ .addImm(OffsetImm)
+ .setMemRefs(I->mergeMemRefsWith(*Paired));
DEBUG(
dbgs()
@@ -786,9 +769,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
TII->get(NewOpc))
.addOperand(getLdStRegOp(I))
.addOperand(BaseRegOp)
- .addImm(OffsetImm);
- // Copy MachineMemOperands from the original stores.
- concatenateMemOperands(MIB, I, Paired);
+ .addImm(OffsetImm)
+ .setMemRefs(I->mergeMemRefsWith(*Paired));
} else {
// Handle Unscaled
if (IsUnscaled)
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 1b8b9b27719c..151133b2f32c 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -33,7 +33,14 @@ class Triple;
class AArch64Subtarget : public AArch64GenSubtargetInfo {
protected:
- enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone};
+ enum ARMProcFamilyEnum {
+ Others,
+ CortexA35,
+ CortexA53,
+ CortexA57,
+ Cyclone,
+ ExynosM1
+ };
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily;
@@ -143,6 +150,7 @@ public:
bool isCyclone() const { return CPUString == "cyclone"; }
bool isCortexA57() const { return CPUString == "cortex-a57"; }
bool isCortexA53() const { return CPUString == "cortex-a53"; }
+ bool isExynosM1() const { return CPUString == "exynos-m1"; }
bool useAA() const override { return isCortexA53(); }
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 78f5289ec26d..cde1c6df2608 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -834,7 +834,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings
};
uint32_t
-AArch64SysReg::SysRegMapper::fromString(StringRef Name,
+AArch64SysReg::SysRegMapper::fromString(StringRef Name,
const FeatureBitset& FeatureBits, bool &Valid) const {
std::string NameLower = Name.lower();
@@ -878,7 +878,7 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name,
}
std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
const FeatureBitset& FeatureBits) const {
// First search the registers shared by all
for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index f649cb9b8a8d..e63627eae123 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -285,17 +285,17 @@ struct AArch64NamedImmMapper {
// Zero value of FeatureBitSet means the mapping is always available
FeatureBitset FeatureBitSet;
- bool isNameEqual(std::string Other,
+ bool isNameEqual(std::string Other,
const FeatureBitset& FeatureBits) const {
- if (FeatureBitSet.any() &&
+ if (FeatureBitSet.any() &&
(FeatureBitSet & FeatureBits).none())
return false;
return Name == Other;
}
- bool isValueEqual(uint32_t Other,
+ bool isValueEqual(uint32_t Other,
const FeatureBitset& FeatureBits) const {
- if (FeatureBitSet.any() &&
+ if (FeatureBitSet.any() &&
(FeatureBitSet & FeatureBits).none())
return false;
return Value == Other;
@@ -310,7 +310,7 @@ struct AArch64NamedImmMapper {
StringRef toString(uint32_t Value, const FeatureBitset& FeatureBits,
bool &Valid) const;
// Maps string to value, depending on availability for FeatureBits given
- uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
+ uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
bool &Valid) const;
/// Many of the instructions allow an alternative assembly form consisting of
@@ -1322,7 +1322,7 @@ namespace AArch64TLBI {
return true;
}
}
-}
+}
namespace AArch64II {
/// Target Operand Flag enum.