diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-08-02 17:32:43 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-08-02 17:32:43 +0000 |
commit | b7eb8e35e481a74962664b63dfb09483b200209a (patch) | |
tree | 1937fb4a348458ce2d02ade03ac3bb0aa18d2fcd /lib/Target/AArch64 | |
parent | eb11fae6d08f479c0799db45860a98af528fa6e7 (diff) |
Diffstat (limited to 'lib/Target/AArch64')
-rw-r--r-- | lib/Target/AArch64/AArch64FastISel.cpp | 2 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 13 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrFormats.td | 15 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrInfo.cpp | 254 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrInfo.h | 31 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstructionSelector.cpp | 96 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64LegalizerInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64MachineFunctionInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64RegisterInfo.td | 69 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64SVEInstrInfo.td | 106 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64TargetMachine.cpp | 3 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 161 | ||||
-rw-r--r-- | lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp | 14 | ||||
-rw-r--r-- | lib/Target/AArch64/SVEInstrFormats.td | 365 |
16 files changed, 921 insertions, 216 deletions
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 43a3ae77a170..572d1c22feea 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -3774,7 +3774,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { CallingConv::ID CC = F.getCallingConv(); SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); + GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 0c72f2ebee18..de762a7bb1d4 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8580,7 +8580,7 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, SDValue AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, - std::vector<SDNode *> *Created) const { + SmallVectorImpl<SDNode *> &Created) const { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (isIntDivCheap(N->getValueType(0), Attr)) return SDValue(N,0); // Lower SDIV as SDIV @@ -8603,11 +8603,9 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp); - if (Created) { - Created->push_back(Cmp.getNode()); - Created->push_back(Add.getNode()); - Created->push_back(CSel.getNode()); - } + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(CSel.getNode()); // Divide by pow2. SDValue SRA = @@ -8618,8 +8616,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, if (Divisor.isNonNegative()) return SRA; - if (Created) - Created->push_back(SRA.getNode()); + Created.push_back(SRA.getNode()); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 592845640a44..d783c8a6048c 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -644,7 +644,7 @@ private: SelectionDAG &DAG) const; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, - std::vector<SDNode *> *Created) const override; + SmallVectorImpl<SDNode *> &Created) const override; SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &ExtraSteps, bool &UseOneConst, bool Reciprocal) const override; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 1060c64f7b5d..15d61cd1ad26 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -57,6 +57,14 @@ class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> { let Size = 4; } +// Enum describing whether an instruction is +// destructive in its first source operand. +class DestructiveInstTypeEnum<bits<1> val> { + bits<1> Value = val; +} +def NotDestructive : DestructiveInstTypeEnum<0>; +def Destructive : DestructiveInstTypeEnum<1>; + // Normal instructions class I<dag oops, dag iops, string asm, string operands, string cstr, list<dag> pattern> @@ -64,6 +72,13 @@ class I<dag oops, dag iops, string asm, string operands, string cstr, dag OutOperandList = oops; dag InOperandList = iops; let AsmString = !strconcat(asm, operands); + + // Destructive operations (SVE) + DestructiveInstTypeEnum DestructiveInstType = NotDestructive; + ElementSizeEnum ElementSize = ElementSizeB; + + let TSFlags{3} = DestructiveInstType.Value; + let TSFlags{2-0} = ElementSize.Value; } class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 230480cf1cea..032d53d19620 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4851,75 +4851,92 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { return makeArrayRef(TargetFlags); } - /// Constants defining how certain sequences should be outlined. - /// This encompasses how an outlined function should be called, and what kind of - /// frame should be emitted for that outlined function. - /// - /// \p MachineOutlinerDefault implies that the function should be called with - /// a save and restore of LR to the stack. - /// - /// That is, - /// - /// I1 Save LR OUTLINED_FUNCTION: - /// I2 --> BL OUTLINED_FUNCTION I1 - /// I3 Restore LR I2 - /// I3 - /// RET - /// - /// * Call construction overhead: 3 (save + BL + restore) - /// * Frame construction overhead: 1 (ret) - /// * Requires stack fixups? Yes - /// - /// \p MachineOutlinerTailCall implies that the function is being created from - /// a sequence of instructions ending in a return. - /// - /// That is, - /// - /// I1 OUTLINED_FUNCTION: - /// I2 --> B OUTLINED_FUNCTION I1 - /// RET I2 - /// RET - /// - /// * Call construction overhead: 1 (B) - /// * Frame construction overhead: 0 (Return included in sequence) - /// * Requires stack fixups? No - /// - /// \p MachineOutlinerNoLRSave implies that the function should be called using - /// a BL instruction, but doesn't require LR to be saved and restored. This - /// happens when LR is known to be dead. - /// - /// That is, - /// - /// I1 OUTLINED_FUNCTION: - /// I2 --> BL OUTLINED_FUNCTION I1 - /// I3 I2 - /// I3 - /// RET - /// - /// * Call construction overhead: 1 (BL) - /// * Frame construction overhead: 1 (RET) - /// * Requires stack fixups? No - /// - /// \p MachineOutlinerThunk implies that the function is being created from - /// a sequence of instructions ending in a call. The outlined function is - /// called with a BL instruction, and the outlined function tail-calls the - /// original call destination. - /// - /// That is, - /// - /// I1 OUTLINED_FUNCTION: - /// I2 --> BL OUTLINED_FUNCTION I1 - /// BL f I2 - /// B f - /// * Call construction overhead: 1 (BL) - /// * Frame construction overhead: 0 - /// * Requires stack fixups? No - /// +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerDefault implies that the function should be called with +/// a save and restore of LR to the stack. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 3 (save + BL + restore) +/// * Frame construction overhead: 1 (ret) +/// * Requires stack fixups? Yes +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// RET I2 +/// RET +/// +/// * Call construction overhead: 1 (B) +/// * Frame construction overhead: 0 (Return included in sequence) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerNoLRSave implies that the function should be called using +/// a BL instruction, but doesn't require LR to be saved and restored. This +/// happens when LR is known to be dead. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 1 (BL) +/// * Frame construction overhead: 1 (RET) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// * Call construction overhead: 1 (BL) +/// * Frame construction overhead: 0 +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerRegSave implies that the function should be called with a +/// save and restore of LR to an available register. This allows us to avoid +/// stack fixups. Note that this outlining variant is compatible with the +/// NoLRSave case. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 3 (save + BL + restore) +/// * Frame construction overhead: 1 (ret) +/// * Requires stack fixups? No enum MachineOutlinerClass { MachineOutlinerDefault, /// Emit a save, restore, call, and return. MachineOutlinerTailCall, /// Only emit a branch. MachineOutlinerNoLRSave, /// Emit a call and return. MachineOutlinerThunk, /// Emit a call and tail-call. + MachineOutlinerRegSave /// Same as default, but save to a register. }; enum MachineOutlinerMBBFlags { @@ -4927,6 +4944,27 @@ enum MachineOutlinerMBBFlags { HasCalls = 0x4 }; +unsigned +AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { + MachineFunction *MF = C.getMF(); + const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( + MF->getSubtarget().getRegisterInfo()); + + // Check if there is an available register across the sequence that we can + // use. + for (unsigned Reg : AArch64::GPR64RegClass) { + if (!ARI->isReservedReg(*MF, Reg) && + Reg != AArch64::LR && // LR is not reserved, but don't use it. + Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. + Reg != AArch64::X17 && // Ditto for X17. + C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + return Reg; + } + + // No suitable register. Return 0. + return 0u; +} + outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { @@ -5015,11 +5053,27 @@ AArch64InstrInfo::getOutliningCandidateInfo( SetCandidateCallInfo(MachineOutlinerNoLRSave, 4); } - // LR is live, so we need to save it to the stack. + // LR is live, so we need to save it. Decide whether it should be saved to + // the stack, or if it can be saved to a register. else { - FrameID = MachineOutlinerDefault; - NumBytesToCreateFrame = 4; - SetCandidateCallInfo(MachineOutlinerDefault, 12); + if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [this](outliner::Candidate &C) { + return findRegisterToSaveLRTo(C); + })) { + // Every candidate has an available callee-saved register for the save. + // We can save LR to a register. + FrameID = MachineOutlinerRegSave; + NumBytesToCreateFrame = 4; + SetCandidateCallInfo(MachineOutlinerRegSave, 12); + } + + else { + // At least one candidate does not have an available callee-saved + // register. We must save LR to the stack. + FrameID = MachineOutlinerDefault; + NumBytesToCreateFrame = 4; + SetCandidateCallInfo(MachineOutlinerDefault, 12); + } } // Check if the range contains a call. These require a save + restore of the @@ -5088,7 +5142,7 @@ AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const { MBB.rend(), [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); - if (!LRU.available(AArch64::LR)) + if (!LRU.available(AArch64::LR)) Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; return Flags; @@ -5114,14 +5168,14 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, // ahead and skip over them. if (MI.isKill()) return outliner::InstrType::Invisible; - + // Is this a terminator for a basic block? if (MI.isTerminator()) { // Is this the end of a function? if (MI.getParent()->succ_empty()) return outliner::InstrType::Legal; - + // It's not, so don't outline it. return outliner::InstrType::Illegal; } @@ -5424,7 +5478,7 @@ void AArch64InstrInfo::buildOutlinedFrame( MBB.insert(MBB.end(), ret); // Did we have to modify the stack by saving the link register? - if (OF.FrameConstructionID == MachineOutlinerNoLRSave) + if (OF.FrameConstructionID != MachineOutlinerDefault) return; // We modified the stack. @@ -5457,13 +5511,41 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( // We want to return the spot where we inserted the call. MachineBasicBlock::iterator CallPt; - // We have a default call. Save the link register. - MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) - .addReg(AArch64::SP, RegState::Define) - .addReg(AArch64::LR) - .addReg(AArch64::SP) - .addImm(-16); - It = MBB.insert(It, STRXpre); + // Instructions for saving and restoring LR around the call instruction we're + // going to insert. + MachineInstr *Save; + MachineInstr *Restore; + // Can we save to a register? + if (C.CallConstructionID == MachineOutlinerRegSave) { + // FIXME: This logic should be sunk into a target-specific interface so that + // we don't have to recompute the register. + unsigned Reg = findRegisterToSaveLRTo(C); + assert(Reg != 0 && "No callee-saved register available?"); + + // Save and restore LR from that register. + Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) + .addReg(AArch64::XZR) + .addReg(AArch64::LR) + .addImm(0); + Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) + .addReg(AArch64::XZR) + .addReg(Reg) + .addImm(0); + } else { + // We have the default case. Save and restore from SP. + Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(-16); + Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR, RegState::Define) + .addReg(AArch64::SP) + .addImm(16); + } + + It = MBB.insert(It, Save); It++; // Insert the call. @@ -5472,13 +5554,11 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( CallPt = It; It++; - // Restore the link register. - MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) - .addReg(AArch64::SP, RegState::Define) - .addReg(AArch64::LR, RegState::Define) - .addReg(AArch64::SP) - .addImm(16); - It = MBB.insert(It, LDRXpost); - + It = MBB.insert(It, Restore); return CallPt; } + +bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( + MachineFunction &MF) const { + return MF.getFunction().optForMinSize(); +} diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 0e5953f6216d..11882e238b70 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -249,6 +249,7 @@ public: insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::Candidate &C) const override; + bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; /// Returns true if the instruction sets to an immediate value that can be /// executed more efficiently. bool isExynosResetFast(const MachineInstr &MI) const; @@ -271,6 +272,10 @@ private: ArrayRef<MachineOperand> Cond) const; bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, const MachineRegisterInfo *MRI) const; + + /// Returns an unused general-purpose register which can be used for + /// constructing an outlined call if one exists. Returns 0 otherwise. + unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; }; /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg @@ -339,6 +344,32 @@ static inline bool isIndirectBranchOpcode(int Opc) { return Opc == AArch64::BR; } +// struct TSFlags { +#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits +#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit +// } + +namespace AArch64 { + +enum ElementSizeType { + ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7), + ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0), + ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1), + ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2), + ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3), + ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4), +}; + +enum DestructiveInstType { + DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1), + NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0), + Destructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1), +}; + +#undef TSFLAG_ELEMENT_SIZE_TYPE +#undef TSFLAG_DESTRUCTIVE_INST_TYPE +} + } // end namespace llvm #endif diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 4d7ca2349ed1..b2b500320b5c 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -21,6 +21,7 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -94,6 +95,10 @@ private: void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const; + // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. + void materializeLargeCMVal(MachineInstr &I, const Value *V, + unsigned char OpFlags) const; + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -655,6 +660,45 @@ bool AArch64InstructionSelector::selectVaStartDarwin( return true; } +void AArch64InstructionSelector::materializeLargeCMVal( + MachineInstr &I, const Value *V, unsigned char OpFlags) const { + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineIRBuilder MIB(I); + + auto MovZ = MIB.buildInstr(AArch64::MOVZXi, &AArch64::GPR64RegClass); + MovZ->addOperand(MF, I.getOperand(1)); + MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | + AArch64II::MO_NC); + MovZ->addOperand(MF, MachineOperand::CreateImm(0)); + constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); + + auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset, + unsigned ForceDstReg) { + unsigned DstReg = ForceDstReg + ? ForceDstReg + : MRI.createVirtualRegister(&AArch64::GPR64RegClass); + auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); + if (auto *GV = dyn_cast<GlobalValue>(V)) { + MovI->addOperand(MF, MachineOperand::CreateGA( + GV, MovZ->getOperand(1).getOffset(), Flags)); + } else { + MovI->addOperand( + MF, MachineOperand::CreateBA(cast<BlockAddress>(V), + MovZ->getOperand(1).getOffset(), Flags)); + } + MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); + constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); + return DstReg; + }; + unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(), + AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); + DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); + BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); + return; +} + bool AArch64InstructionSelector::select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const { assert(I.getParent() && "Instruction should be in a basic block!"); @@ -936,36 +980,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, I.getOperand(1).setTargetFlags(OpFlags); } else if (TM.getCodeModel() == CodeModel::Large) { // Materialize the global using movz/movk instructions. - unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); - auto InsertPt = std::next(I.getIterator()); - auto MovZ = - BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi)) - .addDef(MovZDstReg); - MovZ->addOperand(MF, I.getOperand(1)); - MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | - AArch64II::MO_NC); - MovZ->addOperand(MF, MachineOperand::CreateImm(0)); - constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); - - auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, - unsigned Offset, unsigned ForceDstReg) { - unsigned DstReg = - ForceDstReg ? ForceDstReg - : MRI.createVirtualRegister(&AArch64::GPR64RegClass); - auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(), - TII.get(AArch64::MOVKXi)) - .addDef(DstReg) - .addReg(SrcReg); - MovI->addOperand(MF, MachineOperand::CreateGA( - GV, MovZ->getOperand(1).getOffset(), Flags)); - MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); - constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); - return DstReg; - }; - unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(), - AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); - DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); - BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); + materializeLargeCMVal(I, GV, OpFlags); I.eraseFromParent(); return true; } else { @@ -1482,7 +1497,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, .addImm(1); I.eraseFromParent(); return true; - case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_IMPLICIT_DEF: { I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); const unsigned DstReg = I.getOperand(0).getReg(); @@ -1492,6 +1507,25 @@ bool AArch64InstructionSelector::select(MachineInstr &I, RBI.constrainGenericRegister(DstReg, *DstRC, MRI); return true; } + case TargetOpcode::G_BLOCK_ADDR: { + if (TM.getCodeModel() == CodeModel::Large) { + materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); + I.eraseFromParent(); + return true; + } else { + I.setDesc(TII.get(AArch64::MOVaddrBA)); + auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), + I.getOperand(0).getReg()) + .addBlockAddress(I.getOperand(1).getBlockAddress(), + /* Offset */ 0, AArch64II::MO_PAGE) + .addBlockAddress( + I.getOperand(1).getBlockAddress(), /* Offset */ 0, + AArch64II::MO_NC | AArch64II::MO_PAGEOFF); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); + } + } + } return false; } diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 9b8c0a34efba..327c758a7f8e 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -293,6 +293,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); } + getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); + // Merge/Unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 798340f8fed8..e42214d15699 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -146,7 +146,7 @@ public: Optional<bool> hasRedZone() const { return HasRedZone; } void setHasRedZone(bool s) { HasRedZone = s; } - + int getVarArgsStackIndex() const { return VarArgsStackIndex; } void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; } diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index 7a653e117fd1..bbf401b474ca 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -764,18 +764,35 @@ def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>; def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>; } +// Enum descibing the element size for destructive +// operations. +class ElementSizeEnum<bits<3> val> { + bits<3> Value = val; +} + +def ElementSizeNone : ElementSizeEnum<0>; +def ElementSizeB : ElementSizeEnum<1>; +def ElementSizeH : ElementSizeEnum<2>; +def ElementSizeS : ElementSizeEnum<3>; +def ElementSizeD : ElementSizeEnum<4>; +def ElementSizeQ : ElementSizeEnum<5>; // Unused + class SVERegOp <string Suffix, AsmOperandClass C, + ElementSizeEnum Size, RegisterClass RC> : RegisterOperand<RC> { + ElementSizeEnum ElementSize; + + let ElementSize = Size; let PrintMethod = !if(!eq(Suffix, ""), "printSVERegOp<>", "printSVERegOp<'" # Suffix # "'>"); let ParserMatchClass = C; } -class PPRRegOp <string Suffix, AsmOperandClass C, - RegisterClass RC> : SVERegOp<Suffix, C, RC> {} -class ZPRRegOp <string Suffix, AsmOperandClass C, - RegisterClass RC> : SVERegOp<Suffix, C, RC> {} +class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size, + RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {} +class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size, + RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {} //****************************************************************************** @@ -805,11 +822,11 @@ def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>; def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>; def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>; -def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>; -def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>; -def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>; -def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>; -def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>; +def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>; +def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>; +def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>; +def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>; +def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>; def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>; def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>; @@ -817,11 +834,11 @@ def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>; def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>; def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>; -def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, PPR_3b>; -def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, PPR_3b>; -def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, PPR_3b>; -def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, PPR_3b>; -def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, PPR_3b>; +def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>; +def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>; +def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>; +def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>; +def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>; //****************************************************************************** @@ -874,28 +891,28 @@ def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>; def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>; def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>; -def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ZPR>; -def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ZPR>; -def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ZPR>; -def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ZPR>; -def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ZPR>; -def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ZPR>; +def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>; +def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>; +def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>; +def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>; +def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>; +def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>; def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">; def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">; def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">; -def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ZPR_3b>; -def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ZPR_3b>; -def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ZPR_3b>; +def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>; +def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>; +def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>; def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">; def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">; def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">; -def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ZPR_4b>; -def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ZPR_4b>; -def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ZPR_4b>; +def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>; +def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>; +def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>; class FPRasZPR<int Width> : AsmOperandClass{ let Name = "FPR" # Width # "asZPR"; diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td index 16e6ddda6398..0fde68011e86 100644 --- a/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -220,10 +220,33 @@ let Predicates = [HasSVE] in { def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">; def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">; + defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; + defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; + def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>; def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>; def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>; def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>; + def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">; + def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">; + def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">; + def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">; + + def BRKN_PPzP : sve_int_brkn<0b0, "brkn">; + def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">; + + defm BRKA_PPzP : sve_int_break_z<0b000, "brka">; + defm BRKA_PPmP : sve_int_break_m<0b001, "brka">; + defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">; + defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">; + defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">; + defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">; + + def PTEST_PP : sve_int_ptest<0b010000, "ptest">; + def PFALSE : sve_int_pfalse<0b000000, "pfalse">; + defm PFIRST : sve_int_pfirst<0b00000, "pfirst">; + defm PNEXT : sve_int_pnext<0b00110, "pnext">; + def AND_PPzPP : sve_int_pred_log<0b0000, "and">; def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">; def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">; @@ -731,6 +754,21 @@ let Predicates = [HasSVE] in { defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">; defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">; + defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">; + defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">; + defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">; + defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">; + + defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">; + defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">; + defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">; + defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">; + + def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>; + def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>; + def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>; + def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>; + def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">; def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">; def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">; @@ -854,40 +892,40 @@ let Predicates = [HasSVE] in { defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">; - def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16>; - def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32>; - def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16>; - def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32>; - def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32>; - def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16>; - def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16>; - def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32>; - def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16>; - def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32>; - def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16>; - def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64>; - def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32>; - def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64>; - def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64>; - def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64>; - def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16>; - def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32>; - def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16>; - def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16>; - def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32>; - def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16>; - def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64>; - def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64>; - def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32>; - def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32>; - def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64>; - def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32>; - def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64>; - def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32>; - def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64>; - def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64>; - def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64>; - def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64>; + def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>; + def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>; + def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>; + def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>; + def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>; + def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>; + def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>; + def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>; + def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>; + def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>; + def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>; + def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>; + def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>; + def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>; + def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>; + def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>; + def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>; + def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>; + def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>; + def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>; + def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>; + def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>; + def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>; + def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>; + def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">; diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 01a997e5aed7..120d71381c67 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -255,6 +255,9 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, // AArch64 supports the MachineOutliner. setMachineOutliner(true); + + // AArch64 supports default outlining behaviour. + setSupportsDefaultOutlining(true); } AArch64TargetMachine::~AArch64TargetMachine() = default; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index d75fef7b0171..96e751e86971 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -577,7 +577,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, unsigned NumVectorInstToHideOverhead = 10; int MaxMergeDistance = 64; - if (Ty->isVectorTy() && SE && + if (Ty->isVectorTy() && SE && !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) return NumVectorInstToHideOverhead; diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index a51c41d70915..30a9a08f2346 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/AArch64MCExpr.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "MCTargetDesc/AArch64TargetStreamer.h" +#include "AArch64InstrInfo.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -79,6 +80,67 @@ private: // Map of register aliases registers via the .req directive. StringMap<std::pair<RegKind, unsigned>> RegisterReqs; + class PrefixInfo { + public: + static PrefixInfo CreateFromInst(const MCInst &Inst, uint64_t TSFlags) { + PrefixInfo Prefix; + switch (Inst.getOpcode()) { + case AArch64::MOVPRFX_ZZ: + Prefix.Active = true; + Prefix.Dst = Inst.getOperand(0).getReg(); + break; + case AArch64::MOVPRFX_ZPmZ_B: + case AArch64::MOVPRFX_ZPmZ_H: + case AArch64::MOVPRFX_ZPmZ_S: + case AArch64::MOVPRFX_ZPmZ_D: + Prefix.Active = true; + Prefix.Predicated = true; + Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask; + assert(Prefix.ElementSize != AArch64::ElementSizeNone && + "No destructive element size set for movprfx"); + Prefix.Dst = Inst.getOperand(0).getReg(); + Prefix.Pg = Inst.getOperand(2).getReg(); + break; + case AArch64::MOVPRFX_ZPzZ_B: + case AArch64::MOVPRFX_ZPzZ_H: + case AArch64::MOVPRFX_ZPzZ_S: + case AArch64::MOVPRFX_ZPzZ_D: + Prefix.Active = true; + Prefix.Predicated = true; + Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask; + assert(Prefix.ElementSize != AArch64::ElementSizeNone && + "No destructive element size set for movprfx"); + Prefix.Dst = Inst.getOperand(0).getReg(); + Prefix.Pg = Inst.getOperand(1).getReg(); + break; + default: + break; + } + + return Prefix; + } + + PrefixInfo() : Active(false), Predicated(false) {} + bool isActive() const { return Active; } + bool isPredicated() const { return Predicated; } + unsigned getElementSize() const { + assert(Predicated); + return ElementSize; + } + unsigned getDstReg() const { return Dst; } + unsigned getPgReg() const { + assert(Predicated); + return Pg; + } + + private: + bool Active; + bool Predicated; + unsigned ElementSize; + unsigned Dst; + unsigned Pg; + } NextPrefix; + AArch64TargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast<AArch64TargetStreamer &>(TS); @@ -113,7 +175,8 @@ private: bool parseDirectiveReq(StringRef Name, SMLoc L); bool parseDirectiveUnreq(SMLoc L); - bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc); + bool validateInstruction(MCInst &Inst, SMLoc &IDLoc, + SmallVectorImpl<SMLoc> &Loc); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -3665,12 +3728,89 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, return false; } +static inline bool isMatchingOrAlias(unsigned ZReg, unsigned Reg) { + assert((ZReg >= AArch64::Z0) && (ZReg <= AArch64::Z31)); + return (ZReg == ((Reg - AArch64::B0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::H0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::S0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::D0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::Q0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::Z0) + AArch64::Z0)); +} + // FIXME: This entire function is a giant hack to provide us with decent // operand range validation/diagnostics until TableGen/MC can be extended // to support autogeneration of this kind of validation. -bool AArch64AsmParser::validateInstruction(MCInst &Inst, - SmallVectorImpl<SMLoc> &Loc) { +bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc, + SmallVectorImpl<SMLoc> &Loc) { const MCRegisterInfo *RI = getContext().getRegisterInfo(); + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + + // A prefix only applies to the instruction following it. Here we extract + // prefix information for the next instruction before validating the current + // one so that in the case of failure we don't erronously continue using the + // current prefix. + PrefixInfo Prefix = NextPrefix; + NextPrefix = PrefixInfo::CreateFromInst(Inst, MCID.TSFlags); + + // Before validating the instruction in isolation we run through the rules + // applicable when it follows a prefix instruction. + // NOTE: brk & hlt can be prefixed but require no additional validation. + if (Prefix.isActive() && + (Inst.getOpcode() != AArch64::BRK) && + (Inst.getOpcode() != AArch64::HLT)) { + + // Prefixed intructions must have a destructive operand. + if ((MCID.TSFlags & AArch64::DestructiveInstTypeMask) == + AArch64::NotDestructive) + return Error(IDLoc, "instruction is unpredictable when following a" + " movprfx, suggest replacing movprfx with mov"); + + // Destination operands must match. + if (Inst.getOperand(0).getReg() != Prefix.getDstReg()) + return Error(Loc[0], "instruction is unpredictable when following a" + " movprfx writing to a different destination"); + + // Destination operand must not be used in any other location. + for (unsigned i = 1; i < Inst.getNumOperands(); ++i) { + if (Inst.getOperand(i).isReg() && + (MCID.getOperandConstraint(i, MCOI::TIED_TO) == -1) && + isMatchingOrAlias(Prefix.getDstReg(), Inst.getOperand(i).getReg())) + return Error(Loc[0], "instruction is unpredictable when following a" + " movprfx and destination also used as non-destructive" + " source"); + } + + auto PPRRegClass = AArch64MCRegisterClasses[AArch64::PPRRegClassID]; + if (Prefix.isPredicated()) { + int PgIdx = -1; + + // Find the instructions general predicate. + for (unsigned i = 1; i < Inst.getNumOperands(); ++i) + if (Inst.getOperand(i).isReg() && + PPRRegClass.contains(Inst.getOperand(i).getReg())) { + PgIdx = i; + break; + } + + // Instruction must be predicated if the movprfx is predicated. + if (PgIdx == -1 || + (MCID.TSFlags & AArch64::ElementSizeMask) == AArch64::ElementSizeNone) + return Error(IDLoc, "instruction is unpredictable when following a" + " predicated movprfx, suggest using unpredicated movprfx"); + + // Instruction must use same general predicate as the movprfx. + if (Inst.getOperand(PgIdx).getReg() != Prefix.getPgReg()) + return Error(IDLoc, "instruction is unpredictable when following a" + " predicated movprfx using a different general predicate"); + + // Instruction element type must match the movprfx. + if ((MCID.TSFlags & AArch64::ElementSizeMask) != Prefix.getElementSize()) + return Error(IDLoc, "instruction is unpredictable when following a" + " predicated movprfx with a different element size"); + } + } + // Check for indexed addressing modes w/ the base register being the // same as a destination/source register or pair load where // the Rt == Rt2. All of those are undefined behaviour. @@ -4516,7 +4656,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, NumOperands = Operands.size(); for (unsigned i = 1; i < NumOperands; ++i) OperandLocs.push_back(Operands[i]->getStartLoc()); - if (validateInstruction(Inst, OperandLocs)) + if (validateInstruction(Inst, IDLoc, OperandLocs)) return true; Inst.setLoc(IDLoc); @@ -4719,7 +4859,6 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { const MCObjectFileInfo::Environment Format = getContext().getObjectFileInfo()->getObjectFileType(); bool IsMachO = Format == MCObjectFileInfo::IsMachO; - bool IsCOFF = Format == MCObjectFileInfo::IsCOFF; StringRef IDVal = DirectiveID.getIdentifier(); SMLoc Loc = DirectiveID.getLoc(); @@ -4733,14 +4872,14 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveLtorg(Loc); else if (IDVal == ".unreq") parseDirectiveUnreq(Loc); - else if (!IsMachO && !IsCOFF) { - if (IDVal == ".inst") - parseDirectiveInst(Loc); + else if (IDVal == ".inst") + parseDirectiveInst(Loc); + else if (IsMachO) { + if (IDVal == MCLOHDirectiveName()) + parseDirectiveLOH(IDVal, Loc); else return true; - } else if (IDVal == MCLOHDirectiveName()) - parseDirectiveLOH(IDVal, Loc); - else + } else return true; return false; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index 1b949b54590c..dee964df2635 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -39,4 +39,16 @@ void AArch64TargetStreamer::emitCurrentConstantPool() { // finish() - write out any non-empty assembler constant pools. void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); } -void AArch64TargetStreamer::emitInst(uint32_t Inst) {} +void AArch64TargetStreamer::emitInst(uint32_t Inst) { + char Buffer[4]; + + // We can't just use EmitIntValue here, as that will swap the + // endianness on big-endian systems (instructions are always + // little-endian). + for (unsigned I = 0; I < 4; ++I) { + Buffer[I] = uint8_t(Inst); + Inst >>= 8; + } + + getStreamer().EmitBytes(StringRef(Buffer, 4)); +} diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td index 17b3f6041279..7a8dd8bc5aee 100644 --- a/lib/Target/AArch64/SVEInstrFormats.td +++ b/lib/Target/AArch64/SVEInstrFormats.td @@ -282,6 +282,79 @@ let Predicates = [HasSVE] in { //===----------------------------------------------------------------------===// +// SVE Predicate Misc Group +//===----------------------------------------------------------------------===// + +class sve_int_pfalse<bits<6> opc, string asm> +: I<(outs PPR8:$Pd), (ins), + asm, "\t$Pd", + "", + []>, Sched<[]> { + bits<4> Pd; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{5-4}; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc{3-1}; + let Inst{15-10} = 0b111001; + let Inst{9} = opc{0}; + let Inst{8-4} = 0b00000; + let Inst{3-0} = Pd; +} + +class sve_int_ptest<bits<6> opc, string asm> +: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn), + asm, "\t$Pg, $Pn", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{5-4}; + let Inst{21-19} = 0b010; + let Inst{18-16} = opc{3-1}; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9} = opc{0}; + let Inst{8-5} = Pn; + let Inst{4-0} = 0b00000; + + let Defs = [NZCV]; +} + +class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm, + PPRRegOp pprty> +: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn), + asm, "\t$Pdn, $Pg, $_Pdn", + "", + []>, Sched<[]> { + bits<4> Pdn; + bits<4> Pg; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc{4-2}; + let Inst{15-11} = 0b11000; + let Inst{10-9} = opc{1-0}; + let Inst{8-5} = Pg; + let Inst{4} = 0; + let Inst{3-0} = Pdn; + + let Constraints = "$Pdn = $_Pdn"; + let Defs = [NZCV]; +} + +multiclass sve_int_pfirst<bits<5> opc, string asm> { + def : sve_int_pfirst_next<0b01, opc, asm, PPR8>; +} + +multiclass sve_int_pnext<bits<5> opc, string asm> { + def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>; + def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>; + def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>; + def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>; +} + +//===----------------------------------------------------------------------===// // SVE Predicate Count Group //===----------------------------------------------------------------------===// @@ -348,6 +421,8 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_count_v<bits<5> opc, string asm> { @@ -433,6 +508,8 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> { @@ -738,6 +815,8 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_perm_insrs<string asm> { @@ -762,6 +841,8 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_perm_insrv<string asm> { @@ -790,6 +871,8 @@ class sve_int_perm_extract_i<string asm> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } //===----------------------------------------------------------------------===// @@ -883,6 +966,8 @@ class sve_int_log_imm<bits<2> opc, string asm> let Constraints = "$Zdn = $_Zdn"; let DecoderMethod = "DecodeSVELogicalImmInstruction"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_log_imm<bits<2> opc, string asm, string alias> { @@ -993,6 +1078,8 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> { @@ -1020,6 +1107,8 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_2op_p_zds<bits<4> opc, string asm> { @@ -1045,6 +1134,8 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_fp_ftmad<string asm> { @@ -1106,6 +1197,8 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty> let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> { @@ -1135,6 +1228,8 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> { @@ -1163,6 +1258,8 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> { @@ -1253,6 +1350,8 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty> let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_fcmla<string asm> { @@ -1284,6 +1383,8 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_fp_fcmla_by_indexed_elem<string asm> { @@ -1325,6 +1426,8 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_fcadd<string asm> { @@ -1405,7 +1508,7 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> { //===----------------------------------------------------------------------===// class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype, - RegisterOperand o_zprtype> + RegisterOperand o_zprtype, ElementSizeEnum size> : I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn), asm, "\t$Zd, $Pg/m, $Zn", "", @@ -1423,12 +1526,14 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = size; } multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> { - def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16>; - def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32>; - def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64>; + def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; + def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; + def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>; } //===----------------------------------------------------------------------===// @@ -1480,6 +1585,8 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_bin_pred_log<bits<3> opc, string asm> { @@ -1541,6 +1648,8 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm> { @@ -1571,6 +1680,8 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> { @@ -1601,6 +1712,8 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty1.ElementSize; } multiclass sve_intx_dot<bit opc, string asm> { @@ -1629,6 +1742,8 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> { @@ -1670,6 +1785,8 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm> { @@ -1800,6 +1917,8 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_arith_imm0<bits<3> opc, string asm> { @@ -1825,6 +1944,8 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_arith_imm1<bits<2> opc, string asm, Operand immtype> { @@ -1885,6 +2006,8 @@ class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_dup_fpimm_pred<string asm> { @@ -1917,6 +2040,9 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm, let Inst{13} = imm{8}; // sh let Inst{12-5} = imm{7-0}; // imm8 let Inst{4-0} = Zd; + + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_dup_imm_pred_merge<string asm> { @@ -2083,6 +2209,65 @@ multiclass sve_int_ucmp_vi<bits<2> opc, string asm> { //===----------------------------------------------------------------------===// +// SVE Integer Compare - Scalars Group +//===----------------------------------------------------------------------===// + +class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt> +: I<(outs), (ins rt:$Rn, rt:$Rm), + asm, "\t$Rn, $Rm", + "", + []>, Sched<[]> { + bits<5> Rm; + bits<5> Rn; + let Inst{31-23} = 0b001001011; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = opc; + let Inst{3-0} = 0b0000; + + let Defs = [NZCV]; +} + +class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm, + RegisterClass gprty, PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm), + asm, "\t$Pd, $Rn, $Rm", + "", []>, Sched<[]> { + bits<4> Pd; + bits<5> Rm; + bits<5> Rn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b000; + let Inst{12-10} = opc{3-1}; + let Inst{9-5} = Rn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_while4_rr<bits<3> opc, string asm> { + def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>; + def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>; + def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>; + def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>; +} + +multiclass sve_int_while8_rr<bits<3> opc, string asm> { + def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>; + def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>; + def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>; + def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>; +} + + +//===----------------------------------------------------------------------===// // SVE Floating Point Fast Reduction Group //===----------------------------------------------------------------------===// @@ -2312,9 +2497,9 @@ multiclass sve_int_index_rr<string asm> { //===----------------------------------------------------------------------===// // SVE Bitwise Shift - Predicated Group //===----------------------------------------------------------------------===// - class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm, - ZPRRegOp zprty, Operand immtype> + ZPRRegOp zprty, Operand immtype, + ElementSizeEnum size> : I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm), asm, "\t$Zdn, $Pg/m, $_Zdn, $imm", "", @@ -2333,31 +2518,41 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = size; } multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8, + ElementSizeB>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16, + ElementSizeH> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32, + ElementSizeS> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64, + ElementSizeD> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } } multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, + ElementSizeB>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, + ElementSizeH> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32, + ElementSizeS> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64, + ElementSizeD> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } @@ -2383,6 +2578,8 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> { @@ -3017,6 +3214,8 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_perm_clast_zz<bit ab, string asm> { @@ -3094,6 +3293,8 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_perm_splice<string asm> { @@ -3122,6 +3323,8 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_perm_rev_rbit<string asm> { @@ -3163,6 +3366,8 @@ class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_perm_cpy_r<string asm> { @@ -3198,6 +3403,8 @@ class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_perm_cpy_v<string asm> { @@ -4117,3 +4324,133 @@ multiclass sve_int_reduce_2<bits<3> opc, string asm> { def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>; def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>; } + +class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm, + ZPRRegOp zprty, string pg_suffix, dag iops> +: I<(outs zprty:$Zd), iops, + asm, "\t$Zd, $Pg"#pg_suffix#", $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_32; + let Inst{21-19} = 0b010; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> { +let Constraints = "$Zd = $_Zd" in { + def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m", + (ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>; + def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m", + (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>; + def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m", + (ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>; + def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m", + (ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>; +} +} + +multiclass sve_int_movprfx_pred_zero<bits<3> opc, string asm> { + def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z", + (ins PPR3bAny:$Pg, ZPR8:$Zn)>; + def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z", + (ins PPR3bAny:$Pg, ZPR16:$Zn)>; + def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z", + (ins PPR3bAny:$Pg, ZPR32:$Zn)>; + def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z", + (ins PPR3bAny:$Pg, ZPR64:$Zn)>; +} + +//===----------------------------------------------------------------------===// +// SVE Propagate Break Group +//===----------------------------------------------------------------------===// + +class sve_int_brkp<bits<2> opc, string asm> +: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm), + asm, "\t$Pd, $Pg/z, $Pn, $Pm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pm; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23} = 0b0; + let Inst{22} = opc{1}; + let Inst{21-20} = 0b00; + let Inst{19-16} = Pm; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = !if(!eq (opc{1}, 1), [NZCV], []); +} + + +//===----------------------------------------------------------------------===// +// SVE Partition Break Group +//===----------------------------------------------------------------------===// + +class sve_int_brkn<bit S, string asm> +: I<(outs PPR8:$Pdm), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$_Pdm), + asm, "\t$Pdm, $Pg/z, $Pn, $_Pdm", + "", + []>, Sched<[]> { + bits<4> Pdm; + bits<4> Pg; + bits<4> Pn; + let Inst{31-23} = 0b001001010; + let Inst{22} = S; + let Inst{21-14} = 0b01100001; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pdm; + + let Constraints = "$Pdm = $_Pdm"; + let Defs = !if(!eq (S, 0b1), [NZCV], []); +} + +class sve_int_break<bits<3> opc, string asm, string suffix, dag iops> +: I<(outs PPR8:$Pd), iops, + asm, "\t$Pd, $Pg"#suffix#", $Pn", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{2-1}; + let Inst{21-14} = 0b01000001; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", ""); + let Defs = !if(!eq (opc{1}, 1), [NZCV], []); + +} + +multiclass sve_int_break_m<bits<3> opc, string asm> { + def NAME : sve_int_break<opc, asm, "/m", (ins PPR8:$_Pd, PPRAny:$Pg, PPR8:$Pn)>; +} + +multiclass sve_int_break_z<bits<3> opc, string asm> { + def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>; +} + |