summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp556
1 files changed, 421 insertions, 135 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5139ae5ccaf1..6b38e216a854 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -107,6 +107,13 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
break;
+ case TargetOpcode::STATEPOINT:
+ NumBytes = StatepointOpers(&MI).getNumPatchBytes();
+ assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+ // No patch bytes means a normal call inst is emitted
+ if (NumBytes == 0)
+ NumBytes = 4;
+ break;
case AArch64::TLSDESC_CALLSEQ:
// This gets lowered to an instruction sequence which takes 16 bytes
NumBytes = 16;
@@ -287,6 +294,31 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
}
+ // If we're allowed to modify and the block ends in a unconditional branch
+ // which could simply fallthrough, remove the branch. (Note: This case only
+ // matters when we can't understand the whole sequence, otherwise it's also
+ // handled by BranchFolding.cpp.)
+ if (AllowModify && isUncondBranchOpcode(LastOpc) &&
+ MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
+ assert(!isUncondBranchOpcode(LastOpc) &&
+ "unreachable unconditional branches removed above");
+
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ parseCondBranch(LastInst, TBB, Cond);
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ } else {
+ SecondLastInst = &*I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
return true;
@@ -321,6 +353,56 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return true;
}
+bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
+ MachineBranchPredicate &MBP,
+ bool AllowModify) const {
+ // For the moment, handle only a block which ends with a cb(n)zx followed by
+ // a fallthrough. Why this? Because it is a common form.
+ // TODO: Should we handle b.cc?
+
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return true;
+
+ // Skip over SpeculationBarrierEndBB terminators
+ if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
+ I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
+ --I;
+ }
+
+ if (!isUnpredicatedTerminator(*I))
+ return true;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = &*I;
+ unsigned LastOpc = LastInst->getOpcode();
+ if (!isCondBranchOpcode(LastOpc))
+ return true;
+
+ switch (LastOpc) {
+ default:
+ return true;
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ break;
+ };
+
+ MBP.TrueDest = LastInst->getOperand(1).getMBB();
+ assert(MBP.TrueDest && "expected!");
+ MBP.FalseDest = MBB.getNextNode();
+
+ MBP.ConditionDef = nullptr;
+ MBP.SingleUseCondition = false;
+
+ MBP.LHS = LastInst->getOperand(0);
+ MBP.RHS = MachineOperand::CreateImm(0);
+ MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
+ : MachineBranchPredicate::PRED_EQ;
+ return false;
+}
+
bool AArch64InstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond[0].getImm() != -1) {
@@ -1037,6 +1119,13 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
switch (MI.getOpcode()) {
default:
break;
+ case AArch64::PTEST_PP:
+ SrcReg = MI.getOperand(0).getReg();
+ SrcReg2 = MI.getOperand(1).getReg();
+ // Not sure about the mask and value for now...
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
case AArch64::SUBSWrr:
case AArch64::SUBSWrs:
case AArch64::SUBSWrx:
@@ -1192,10 +1281,9 @@ static bool areCFlagsAccessedBetweenInstrs(
return true;
// From must be above To.
- assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
- [From](MachineInstr &MI) {
- return MI.getIterator() == From;
- }) != To->getParent()->rend());
+ assert(std::any_of(
+ ++To.getReverse(), To->getParent()->rend(),
+ [From](MachineInstr &MI) { return MI.getIterator() == From; }));
// We iterate backward starting at \p To until we hit \p From.
for (const MachineInstr &Instr :
@@ -1208,6 +1296,127 @@ static bool areCFlagsAccessedBetweenInstrs(
return false;
}
+/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
+/// operation which could set the flags in an identical manner
+bool AArch64InstrInfo::optimizePTestInstr(
+ MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
+ const MachineRegisterInfo *MRI) const {
+ auto *Mask = MRI->getUniqueVRegDef(MaskReg);
+ auto *Pred = MRI->getUniqueVRegDef(PredReg);
+ auto NewOp = Pred->getOpcode();
+ bool OpChanged = false;
+
+ unsigned MaskOpcode = Mask->getOpcode();
+ unsigned PredOpcode = Pred->getOpcode();
+ bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
+ bool PredIsWhileLike = isWhileOpcode(PredOpcode);
+
+ if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
+ // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
+ // deactivate any lanes OTHER_INST might set.
+ uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
+ uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
+
+ // Must be an all active predicate of matching element size.
+ if ((PredElementSize != MaskElementSize) ||
+ (Mask->getOperand(1).getImm() != 31))
+ return false;
+
+ // Fallthough to simply remove the PTEST.
+ } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
+ // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
+ // instruction that sets the flags as PTEST would.
+
+ // Fallthough to simply remove the PTEST.
+ } else if (PredIsPTestLike) {
+ // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
+ // instructions use the same predicate.
+ auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PTestLikeMask)
+ return false;
+
+ // Fallthough to simply remove the PTEST.
+ } else {
+ switch (Pred->getOpcode()) {
+ case AArch64::BRKB_PPzP:
+ case AArch64::BRKPB_PPzPP: {
+ // Op 0 is chain, 1 is the mask, 2 the previous predicate to
+ // propagate, 3 the new predicate.
+
+ // Check to see if our mask is the same as the brkpb's. If
+ // not the resulting flag bits may be different and we
+ // can't remove the ptest.
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return false;
+
+ // Switch to the new opcode
+ NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
+ : AArch64::BRKPBS_PPzPP;
+ OpChanged = true;
+ break;
+ }
+ case AArch64::BRKN_PPzP: {
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return false;
+
+ NewOp = AArch64::BRKNS_PPzP;
+ OpChanged = true;
+ break;
+ }
+ default:
+ // Bail out if we don't recognize the input
+ return false;
+ }
+ }
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // If the predicate is in a different block (possibly because its been
+ // hoisted out), then assume the flags are set in between statements.
+ if (Pred->getParent() != PTest->getParent())
+ return false;
+
+ // If another instruction between the propagation and test sets the
+ // flags, don't remove the ptest.
+ MachineBasicBlock::iterator I = Pred, E = PTest;
+ ++I; // Skip past the predicate op itself.
+ for (; I != E; ++I) {
+ const MachineInstr &Inst = *I;
+
+ // TODO: If the ptest flags are unused, we could still remove it.
+ if (Inst.modifiesRegister(AArch64::NZCV, TRI))
+ return false;
+ }
+
+ // If we pass all the checks, it's safe to remove the PTEST and use the flags
+ // as they are prior to PTEST. Sometimes this requires the tested PTEST
+ // operand to be replaced with an equivalent instruction that also sets the
+ // flags.
+ Pred->setDesc(get(NewOp));
+ PTest->eraseFromParent();
+ if (OpChanged) {
+ bool succeeded = UpdateOperandRegClass(*Pred);
+ (void)succeeded;
+ assert(succeeded && "Operands have incompatible register classes!");
+ Pred->addRegisterDefined(AArch64::NZCV, TRI);
+ }
+
+ // Ensure that the flags def is live.
+ if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
+ unsigned i = 0, e = Pred->getNumOperands();
+ for (; i != e; ++i) {
+ MachineOperand &MO = Pred->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
+ MO.setIsDead(false);
+ break;
+ }
+ }
+ }
+ return true;
+}
+
/// Try to optimize a compare instruction. A compare instruction is an
/// instruction which produces AArch64::NZCV. It can be truly compare
/// instruction
@@ -1246,6 +1455,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
return true;
}
+ if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
+ return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
+
// Continue only if we have a "ri" where immediate is zero.
// FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
// function.
@@ -2062,6 +2274,24 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
return true;
}
+Optional<ExtAddrMode>
+AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
+ const TargetRegisterInfo *TRI) const {
+ const MachineOperand *Base; // Filled with the base operand of MI.
+ int64_t Offset; // Filled with the offset of MI.
+ bool OffsetIsScalable;
+ if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
+ return None;
+
+ if (!Base->isReg())
+ return None;
+ ExtAddrMode AM;
+ AM.BaseReg = Base->getReg();
+ AM.Displacement = Offset;
+ AM.ScaledReg = 0;
+ return AM;
+}
+
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
bool &OffsetIsScalable, unsigned &Width,
@@ -2744,6 +2974,35 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // Copy a Z register pair by copying the individual sub-registers.
+ if (AArch64::ZPR2RegClass.contains(DestReg) &&
+ AArch64::ZPR2RegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+ Indices);
+ return;
+ }
+
+ // Copy a Z register triple by copying the individual sub-registers.
+ if (AArch64::ZPR3RegClass.contains(DestReg) &&
+ AArch64::ZPR3RegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+ AArch64::zsub2};
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+ Indices);
+ return;
+ }
+
+ // Copy a Z register quad by copying the individual sub-registers.
+ if (AArch64::ZPR4RegClass.contains(DestReg) &&
+ AArch64::ZPR4RegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+ AArch64::zsub2, AArch64::zsub3};
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+ Indices);
+ return;
+ }
+
if (AArch64::GPR64spRegClass.contains(DestReg) &&
(AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
@@ -3031,7 +3290,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_PXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 4:
@@ -3075,7 +3334,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
@@ -3097,7 +3356,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 48:
@@ -3108,7 +3367,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 64:
@@ -3119,7 +3378,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
}
@@ -3185,7 +3444,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_PXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 4:
@@ -3229,7 +3488,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
@@ -3251,7 +3510,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 48:
@@ -3262,7 +3521,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 64:
@@ -3273,7 +3532,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
}
@@ -3300,6 +3559,47 @@ bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
});
}
+void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
+ const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
+
+ // VGSized offsets are divided by '2', because the VG register is the
+ // the number of 64bit granules as opposed to 128bit vector chunks,
+ // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
+ // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
+ // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
+ ByteSized = Offset.getFixed();
+ VGSized = Offset.getScalable() / 2;
+}
+
+/// Returns the offset in parts to which this frame offset can be
+/// decomposed for the purpose of describing a frame offset.
+/// For non-scalable offsets this is simply its byte size.
+void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
+ const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
+ int64_t &NumDataVectors) {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
+
+ NumBytes = Offset.getFixed();
+ NumDataVectors = 0;
+ NumPredicateVectors = Offset.getScalable() / 2;
+ // This method is used to get the offsets to adjust the frame offset.
+ // If the function requires ADDPL to be used and needs more than two ADDPL
+ // instructions, part of the offset is folded into NumDataVectors so that it
+ // uses ADDVL for part of it, reducing the number of ADDPL instructions.
+ if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
+ NumPredicateVectors > 62) {
+ NumDataVectors = NumPredicateVectors / 8;
+ NumPredicateVectors -= NumDataVectors * 8;
+ }
+}
+
// Helper function to emit a frame offset adjustment from a given
// pointer (SrcReg), stored into DestReg. This function is explicit
// in that it requires the opcode.
@@ -3409,12 +3709,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineInstr::MIFlag Flag, bool SetNZCV,
bool NeedsWinCFI, bool *HasWinCFI) {
int64_t Bytes, NumPredicateVectors, NumDataVectors;
- Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
+ AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
+ Offset, Bytes, NumPredicateVectors, NumDataVectors);
// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
- assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
- "SP increment/decrement not 16-byte aligned");
+ assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
+ "SP increment/decrement not 8-byte aligned");
unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
if (Bytes < 0) {
Bytes = -Bytes;
@@ -3669,7 +3970,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
// Construct the complete offset.
bool IsMulVL = ScaleValue.isScalable();
unsigned Scale = ScaleValue.getKnownMinSize();
- int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes();
+ int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
const MachineOperand &ImmOpnd =
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
@@ -3711,11 +4012,9 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
*OutUnscaledOp = *UnscaledOp;
if (IsMulVL)
- SOffset = StackOffset(Offset, MVT::nxv1i8) +
- StackOffset(SOffset.getBytes(), MVT::i8);
+ SOffset = StackOffset::get(SOffset.getFixed(), Offset);
else
- SOffset = StackOffset(Offset, MVT::i8) +
- StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
+ SOffset = StackOffset::get(Offset, SOffset.getScalable());
return AArch64FrameOffsetCanUpdate |
(SOffset ? 0 : AArch64FrameOffsetIsLegal);
}
@@ -3727,7 +4026,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned ImmIdx = FrameRegIdx + 1;
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
- Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8);
+ Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
MI.getOperand(0).getReg(), FrameReg, Offset, TII,
MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
@@ -3832,7 +4131,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
return false;
}
-// FP Opcodes that can be combined with a FMUL
+// FP Opcodes that can be combined with a FMUL.
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
default:
@@ -3854,8 +4153,12 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
- return (Options.UnsafeFPMath ||
- Options.AllowFPOpFusion == FPOpFusion::Fast);
+ // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
+ // the target options or if FADD/FSUB has the contract fast-math flag.
+ return Options.UnsafeFPMath ||
+ Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Inst.getFlag(MachineInstr::FmContract);
+ return true;
}
return false;
}
@@ -4335,8 +4638,8 @@ bool AArch64InstrInfo::isThroughputPattern(
/// pattern evaluator stops checking as soon as it finds a faster sequence.
bool AArch64InstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
// Integer patterns
if (getMaddPatterns(Root, Patterns))
return true;
@@ -4344,7 +4647,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getFMAPatterns(Root, Patterns))
return true;
- return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
+ DoRegPressureReduce);
}
enum class FMAInstKind { Default, Indexed, Accumulator };
@@ -4567,7 +4871,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- MachineInstr *MUL;
+ MachineInstr *MUL = nullptr;
const TargetRegisterClass *RC;
unsigned Opc;
switch (Pattern) {
@@ -5388,6 +5692,9 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
+ // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
+ // CodeGen/AArch64/urem-seteq-nonzero.ll.
+ // assert(MUL && "MUL was never set");
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
}
@@ -5727,84 +6034,20 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
static bool
outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
const outliner::Candidate &b) {
- const Function &Fa = a.getMF()->getFunction();
- const Function &Fb = b.getMF()->getFunction();
-
- // If none of the functions have the "sign-return-address" attribute their
- // signing behaviour is equal
- if (!Fa.hasFnAttribute("sign-return-address") &&
- !Fb.hasFnAttribute("sign-return-address")) {
- return true;
- }
+ const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
+ const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
- // If both functions have the "sign-return-address" attribute their signing
- // behaviour is equal, if the values of the attributes are equal
- if (Fa.hasFnAttribute("sign-return-address") &&
- Fb.hasFnAttribute("sign-return-address")) {
- StringRef ScopeA =
- Fa.getFnAttribute("sign-return-address").getValueAsString();
- StringRef ScopeB =
- Fb.getFnAttribute("sign-return-address").getValueAsString();
- return ScopeA.equals(ScopeB);
- }
-
- // If function B doesn't have the "sign-return-address" attribute but A does,
- // the functions' signing behaviour is equal if A's value for
- // "sign-return-address" is "none" and vice versa.
- if (Fa.hasFnAttribute("sign-return-address")) {
- StringRef ScopeA =
- Fa.getFnAttribute("sign-return-address").getValueAsString();
- return ScopeA.equals("none");
- }
-
- if (Fb.hasFnAttribute("sign-return-address")) {
- StringRef ScopeB =
- Fb.getFnAttribute("sign-return-address").getValueAsString();
- return ScopeB.equals("none");
- }
-
- llvm_unreachable("Unkown combination of sign-return-address attributes");
+ return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
+ MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
}
static bool
outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
const outliner::Candidate &b) {
- const Function &Fa = a.getMF()->getFunction();
- const Function &Fb = b.getMF()->getFunction();
+ const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
+ const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
- // If none of the functions have the "sign-return-address-key" attribute
- // their keys are equal
- if (!Fa.hasFnAttribute("sign-return-address-key") &&
- !Fb.hasFnAttribute("sign-return-address-key")) {
- return true;
- }
-
- // If both functions have the "sign-return-address-key" attribute their
- // keys are equal if the values of "sign-return-address-key" are equal
- if (Fa.hasFnAttribute("sign-return-address-key") &&
- Fb.hasFnAttribute("sign-return-address-key")) {
- StringRef KeyA =
- Fa.getFnAttribute("sign-return-address-key").getValueAsString();
- StringRef KeyB =
- Fb.getFnAttribute("sign-return-address-key").getValueAsString();
- return KeyA.equals(KeyB);
- }
-
- // If B doesn't have the "sign-return-address-key" attribute, both keys are
- // equal, if function a has the default key (a_key)
- if (Fa.hasFnAttribute("sign-return-address-key")) {
- StringRef KeyA =
- Fa.getFnAttribute("sign-return-address-key").getValueAsString();
- return KeyA.equals_lower("a_key");
- }
-
- if (Fb.hasFnAttribute("sign-return-address-key")) {
- StringRef KeyB =
- Fb.getFnAttribute("sign-return-address-key").getValueAsString();
- return KeyB.equals_lower("a_key");
- }
-
- llvm_unreachable("Unkown combination of sign-return-address-key attributes");
+ return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
}
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
@@ -5860,9 +6103,10 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
// necessary. However, at this point we don't know if the outlined function
// will have a RET instruction so we assume the worst.
- const Function &FCF = FirstCand.getMF()->getFunction();
const TargetRegisterInfo &TRI = getRegisterInfo();
- if (FCF.hasFnAttribute("sign-return-address")) {
+ if (FirstCand.getMF()
+ ->getInfo<AArch64FunctionInfo>()
+ ->shouldSignReturnAddress(true)) {
// One PAC and one AUT instructions
NumBytesToCreateFrame += 8;
@@ -5919,10 +6163,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
return false;
};
// Remove candidates with illegal stack modifying instructions
- RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- hasIllegalSPModification),
- RepeatedSequenceLocs.end());
+ llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
@@ -5965,10 +6206,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// Erase every candidate that violates the restrictions above. (It could be
// true that we have viable candidates, so it's not worth bailing out in
// the case that, say, 1 out of 20 candidates violate the restructions.)
- RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- CantGuaranteeValueAcrossCall),
- RepeatedSequenceLocs.end());
+ llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
@@ -5991,7 +6229,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
NumBytesToCreateFrame += 4;
bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
- return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
+ return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
});
// We check to see if CFI Instructions are present, and if they are
@@ -6160,6 +6398,60 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
FrameID = MachineOutlinerNoLRSave;
} else {
SetCandidateCallInfo(MachineOutlinerDefault, 12);
+
+ // Bugzilla ID: 46767
+ // TODO: Check if fixing up the stack more than once is safe so we can
+ // outline these.
+ //
+ // An outline resulting in a caller that requires stack fixups at the
+ // callsite to a callee that also requires stack fixups can happen when
+ // there are no available registers at the candidate callsite for a
+ // candidate that itself also has calls.
+ //
+ // In other words if function_containing_sequence in the following pseudo
+ // assembly requires that we save LR at the point of the call, but there
+ // are no available registers: in this case we save using SP and as a
+ // result the SP offsets requires stack fixups by multiples of 16.
+ //
+ // function_containing_sequence:
+ // ...
+ // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
+ // call OUTLINED_FUNCTION_N
+ // restore LR from SP
+ // ...
+ //
+ // OUTLINED_FUNCTION_N:
+ // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
+ // ...
+ // bl foo
+ // restore LR from SP
+ // ret
+ //
+ // Because the code to handle more than one stack fixup does not
+ // currently have the proper checks for legality, these cases will assert
+ // in the AArch64 MachineOutliner. This is because the code to do this
+ // needs more hardening, testing, better checks that generated code is
+ // legal, etc and because it is only verified to handle a single pass of
+ // stack fixup.
+ //
+ // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
+ // these cases until they are known to be handled. Bugzilla 46767 is
+ // referenced in comments at the assert site.
+ //
+ // To avoid asserting (or generating non-legal code on noassert builds)
+ // we remove all candidates which would need more than one stack fixup by
+ // pruning the cases where the candidate has calls while also having no
+ // available LR and having no available general purpose registers to copy
+ // LR to (ie one extra stack save/restore).
+ //
+ if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
+ erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
+ return (std::any_of(
+ C.front(), std::next(C.back()),
+ [](const MachineInstr &MI) { return MI.isCall(); })) &&
+ (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C));
+ });
+ }
}
// If we dropped all of the candidates, bail out here.
@@ -6528,7 +6820,7 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
// If v8.3a features are available we can replace a RET instruction by
// RETAA or RETAB and omit the AUT instructions
- if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() &&
+ if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
MBBAUT->getOpcode() == AArch64::RET) {
BuildMI(MBB, MBBAUT, DL,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
@@ -6580,9 +6872,12 @@ void AArch64InstrInfo::buildOutlinedFrame(
return MI.isCall() && !MI.isReturn();
};
- if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
+ if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
// Fix up the instructions in the range, since we're going to modify the
// stack.
+
+ // Bugzilla ID: 46767
+ // TODO: Check if fixing up twice is safe so we can outline these.
assert(OF.FrameConstructionID != MachineOutlinerDefault &&
"Can only fix up stack references once");
fixupPostOutline(MBB);
@@ -6639,27 +6934,11 @@ void AArch64InstrInfo::buildOutlinedFrame(
// If a bunch of candidates reach this point they must agree on their return
// address signing. It is therefore enough to just consider the signing
// behaviour of one of them
- const Function &CF = OF.Candidates.front().getMF()->getFunction();
- bool ShouldSignReturnAddr = false;
- if (CF.hasFnAttribute("sign-return-address")) {
- StringRef Scope =
- CF.getFnAttribute("sign-return-address").getValueAsString();
- if (Scope.equals("all"))
- ShouldSignReturnAddr = true;
- else if (Scope.equals("non-leaf") && !IsLeafFunction)
- ShouldSignReturnAddr = true;
- }
+ const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>();
+ bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction);
// a_key is the default
- bool ShouldSignReturnAddrWithAKey = true;
- if (CF.hasFnAttribute("sign-return-address-key")) {
- const StringRef Key =
- CF.getFnAttribute("sign-return-address-key").getValueAsString();
- // Key can either be a_key or b_key
- assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) &&
- "Return address signing key must be either a_key or b_key");
- ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key");
- }
+ bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey();
// If this is a tail call outlined function, then there's already a return.
if (OF.FrameConstructionID == MachineOutlinerTailCall ||
@@ -6818,10 +7097,9 @@ Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
!MI.getOperand(2).isImm())
return None;
- Offset = MI.getOperand(2).getImm() * Sign;
int Shift = MI.getOperand(3).getImm();
assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
- Offset = Offset << Shift;
+ Offset = Sign * (MI.getOperand(2).getImm() << Shift);
}
}
return RegImmPair{MI.getOperand(1).getReg(), Offset};
@@ -6897,6 +7175,14 @@ uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::ElementSizeMask;
}
+bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
+}
+
+bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
+}
+
unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
return AArch64::BLRNoIP;