summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp327
1 files changed, 316 insertions, 11 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 3b8f8a19fe49c..9e65ad2e18f95 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -68,6 +68,8 @@ private:
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned BitSize);
+ bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
unsigned ExtendImm, unsigned ZeroReg,
@@ -78,6 +80,9 @@ private:
bool expandSetTagLoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandSVESpillFill(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, unsigned Opc,
+ unsigned N);
};
} // end anonymous namespace
@@ -344,27 +349,225 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
return true;
}
+/// \brief Expand Pseudos to Instructions with destructive operands.
+///
+/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
+/// or for fixing relaxed register allocation conditions to comply with
+/// the instructions register constraints. The latter case may be cheaper
+/// than setting the register constraints in the register allocator,
+/// since that will insert regular MOV instructions rather than MOVPRFX.
+///
+/// Example (after register allocation):
+///
+/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
+///
+/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
+/// * We cannot map directly to FSUB_ZPmZ_B because the register
+/// constraints of the instruction are not met.
+/// * Also the _ZERO specifies the false lanes need to be zeroed.
+///
+/// We first try to see if the destructive operand == result operand,
+/// if not, we try to swap the operands, e.g.
+///
+/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
+///
+/// But because FSUB_ZPmZ is not commutative, this is semantically
+/// different, so we need a reverse instruction:
+///
+/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
+///
+/// Then we implement the zeroing of the false lanes of Z0 by adding
+/// a zeroing MOVPRFX instruction:
+///
+/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
+/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
+///
+/// Note that this can only be done for _ZERO or _UNDEF variants where
+/// we can guarantee the false lanes to be zeroed (by implementing this)
+/// or that they are undef (don't care / not used), otherwise the
+/// swapping of operands is illegal because the operation is not
+/// (or cannot be emulated to be) fully commutative.
+bool AArch64ExpandPseudo::expand_DestructiveOp(
+ MachineInstr &MI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
+ uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
+ uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
+ bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+
+ if (DType == AArch64::DestructiveBinary)
+ assert(DstReg != MI.getOperand(3).getReg());
+
+ bool UseRev = false;
+ unsigned PredIdx, DOPIdx, SrcIdx;
+ switch (DType) {
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ if (DstReg == MI.getOperand(3).getReg()) {
+ // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
+ std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
+ UseRev = true;
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case AArch64::DestructiveBinary:
+ case AArch64::DestructiveBinaryImm:
+ std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
+ break;
+ default:
+ llvm_unreachable("Unsupported Destructive Operand type");
+ }
+
+#ifndef NDEBUG
+ // MOVPRFX can only be used if the destination operand
+ // is the destructive operand, not as any other operand,
+ // so the Destructive Operand must be unique.
+ bool DOPRegIsUnique = false;
+ switch (DType) {
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ DOPRegIsUnique =
+ DstReg != MI.getOperand(DOPIdx).getReg() ||
+ MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
+ break;
+ case AArch64::DestructiveBinaryImm:
+ DOPRegIsUnique = true;
+ break;
+ }
+#endif
+
+ // Resolve the reverse opcode
+ if (UseRev) {
+ int NewOpcode;
+ // e.g. DIV -> DIVR
+ if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
+ Opcode = NewOpcode;
+ // e.g. DIVR -> DIV
+ else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
+ Opcode = NewOpcode;
+ }
+
+ // Get the right MOVPRFX
+ uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
+ unsigned MovPrfx, MovPrfxZero;
+ switch (ElementSize) {
+ case AArch64::ElementSizeNone:
+ case AArch64::ElementSizeB:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
+ break;
+ case AArch64::ElementSizeH:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
+ break;
+ case AArch64::ElementSizeS:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
+ break;
+ case AArch64::ElementSizeD:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
+ break;
+ default:
+ llvm_unreachable("Unsupported ElementSize");
+ }
+
+ //
+ // Create the destructive operation (if required)
+ //
+ MachineInstrBuilder PRFX, DOP;
+ if (FalseZero) {
+#ifndef NDEBUG
+ assert(DOPRegIsUnique && "The destructive operand should be unique");
+#endif
+ assert(ElementSize != AArch64::ElementSizeNone &&
+ "This instruction is unpredicated");
+
+ // Merge source operand into destination register
+ PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
+ .addReg(DstReg, RegState::Define)
+ .addReg(MI.getOperand(PredIdx).getReg())
+ .addReg(MI.getOperand(DOPIdx).getReg());
+
+ // After the movprfx, the destructive operand is same as Dst
+ DOPIdx = 0;
+ } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
+#ifndef NDEBUG
+ assert(DOPRegIsUnique && "The destructive operand should be unique");
+#endif
+ PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
+ .addReg(DstReg, RegState::Define)
+ .addReg(MI.getOperand(DOPIdx).getReg());
+ DOPIdx = 0;
+ }
+
+ //
+ // Create the destructive operation
+ //
+ DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
+
+ switch (DType) {
+ case AArch64::DestructiveBinaryImm:
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ DOP.add(MI.getOperand(PredIdx))
+ .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
+ .add(MI.getOperand(SrcIdx));
+ break;
+ }
+
+ if (PRFX) {
+ finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
+ transferImpOps(MI, PRFX, DOP);
+ } else
+ transferImpOps(MI, DOP, DOP);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64ExpandPseudo::expandSetTagLoop(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
- Register SizeReg = MI.getOperand(2).getReg();
- Register AddressReg = MI.getOperand(3).getReg();
+ Register SizeReg = MI.getOperand(0).getReg();
+ Register AddressReg = MI.getOperand(1).getReg();
MachineFunction *MF = MBB.getParent();
- bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
- const unsigned OpCode =
+ bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
+ const unsigned OpCode1 =
+ ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
+ const unsigned OpCode2 =
ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
+ unsigned Size = MI.getOperand(2).getImm();
+ assert(Size > 0 && Size % 16 == 0);
+ if (Size % (16 * 2) != 0) {
+ BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
+ .addReg(AddressReg)
+ .addReg(AddressReg)
+ .addImm(1);
+ Size -= 16;
+ }
+ MachineBasicBlock::iterator I =
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
+ .addImm(Size);
+ expandMOVImm(MBB, I, 64);
+
auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
MF->insert(++MBB.getIterator(), LoopBB);
MF->insert(++LoopBB->getIterator(), DoneBB);
- BuildMI(LoopBB, DL, TII->get(OpCode))
+ BuildMI(LoopBB, DL, TII->get(OpCode2))
.addDef(AddressReg)
.addReg(AddressReg)
.addReg(AddressReg)
@@ -402,6 +605,28 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
return true;
}
+bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned Opc, unsigned N) {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ MachineInstr &MI = *MBBI;
+ for (unsigned Offset = 0; Offset < N; ++Offset) {
+ int ImmOffset = MI.getOperand(2).getImm() + Offset;
+ bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
+ assert(ImmOffset >= -256 && ImmOffset < 256 &&
+ "Immediate spill offset out of range");
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
+ .addReg(
+ TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
+ Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
+ .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
+ .addImm(ImmOffset);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -409,10 +634,76 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
+
+ // Check if we can expand the destructive op
+ int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
+ if (OrigInstr != -1) {
+ auto &Orig = TII->get(OrigInstr);
+ if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
+ != AArch64::NotDestructive) {
+ return expand_DestructiveOp(MI, MBB, MBBI);
+ }
+ }
+
switch (Opcode) {
default:
break;
+ case AArch64::BSPv8i8:
+ case AArch64::BSPv16i8: {
+ Register DstReg = MI.getOperand(0).getReg();
+ if (DstReg == MI.getOperand(3).getReg()) {
+ // Expand to BIT
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
+ : AArch64::BITv16i8))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(1));
+ } else if (DstReg == MI.getOperand(2).getReg()) {
+ // Expand to BIF
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
+ : AArch64::BIFv16i8))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(1));
+ } else {
+ // Expand to BSL, use additional move if required
+ if (DstReg == MI.getOperand(1).getReg()) {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
+ : AArch64::BSLv16i8))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ } else {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
+ : AArch64::ORRv16i8))
+ .addReg(DstReg,
+ RegState::Define |
+ getRenamableRegState(MI.getOperand(0).isRenamable()))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(1));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
+ : AArch64::BSLv16i8))
+ .add(MI.getOperand(0))
+ .addReg(DstReg,
+ RegState::Kill |
+ getRenamableRegState(MI.getOperand(0).isRenamable()))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ }
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+
case AArch64::ADDWrr:
case AArch64::SUBWrr:
case AArch64::ADDXrr:
@@ -599,10 +890,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
Register DstReg = MI.getOperand(0).getReg();
auto SysReg = AArch64SysReg::TPIDR_EL0;
MachineFunction *MF = MBB.getParent();
- if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
- MF->getTarget().getCodeModel() == CodeModel::Kernel)
- SysReg = AArch64SysReg::TPIDR_EL1;
- else if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
+ if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
SysReg = AArch64SysReg::TPIDR_EL3;
else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
SysReg = AArch64SysReg::TPIDR_EL2;
@@ -676,7 +964,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
// almost always point to SP-after-prologue; if not, emit a longer
// instruction sequence.
int BaseOffset = -AFI->getTaggedBasePointerOffset();
- unsigned FrameReg;
+ Register FrameReg;
StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
/*PreferFP=*/false,
@@ -706,9 +994,26 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
+ case AArch64::STGloop_wback:
+ case AArch64::STZGloop_wback:
+ return expandSetTagLoop(MBB, MBBI, NextMBBI);
case AArch64::STGloop:
case AArch64::STZGloop:
- return expandSetTagLoop(MBB, MBBI, NextMBBI);
+ report_fatal_error(
+ "Non-writeback variants of STGloop / STZGloop should not "
+ "survive past PrologEpilogInserter.");
+ case AArch64::STR_ZZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
+ case AArch64::STR_ZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
+ case AArch64::STR_ZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
+ case AArch64::LDR_ZZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
+ case AArch64::LDR_ZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
+ case AArch64::LDR_ZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
}
return false;
}