diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 556 |
1 files changed, 421 insertions, 135 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5139ae5ccaf1..6b38e216a854 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -107,6 +107,13 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); break; + case TargetOpcode::STATEPOINT: + NumBytes = StatepointOpers(&MI).getNumPatchBytes(); + assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + // No patch bytes means a normal call inst is emitted + if (NumBytes == 0) + NumBytes = 4; + break; case AArch64::TLSDESC_CALLSEQ: // This gets lowered to an instruction sequence which takes 16 bytes NumBytes = 16; @@ -287,6 +294,31 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, } } + // If we're allowed to modify and the block ends in a unconditional branch + // which could simply fallthrough, remove the branch. (Note: This case only + // matters when we can't understand the whole sequence, otherwise it's also + // handled by BranchFolding.cpp.) + if (AllowModify && isUncondBranchOpcode(LastOpc) && + MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { + assert(!isUncondBranchOpcode(LastOpc) && + "unreachable unconditional branches removed above"); + + if (isCondBranchOpcode(LastOpc)) { + // Block ends with fall-through condbranch. + parseCondBranch(LastInst, TBB, Cond); + return false; + } + return true; // Can't handle indirect branch. + } else { + SecondLastInst = &*I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) return true; @@ -321,6 +353,56 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } +bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, + MachineBranchPredicate &MBP, + bool AllowModify) const { + // For the moment, handle only a block which ends with a cb(n)zx followed by + // a fallthrough. Why this? Because it is a common form. + // TODO: Should we handle b.cc? + + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return true; + + // Skip over SpeculationBarrierEndBB terminators + if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || + I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { + --I; + } + + if (!isUnpredicatedTerminator(*I)) + return true; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + if (!isCondBranchOpcode(LastOpc)) + return true; + + switch (LastOpc) { + default: + return true; + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + break; + }; + + MBP.TrueDest = LastInst->getOperand(1).getMBB(); + assert(MBP.TrueDest && "expected!"); + MBP.FalseDest = MBB.getNextNode(); + + MBP.ConditionDef = nullptr; + MBP.SingleUseCondition = false; + + MBP.LHS = LastInst->getOperand(0); + MBP.RHS = MachineOperand::CreateImm(0); + MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE + : MachineBranchPredicate::PRED_EQ; + return false; +} + bool AArch64InstrInfo::reverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const { if (Cond[0].getImm() != -1) { @@ -1037,6 +1119,13 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, switch (MI.getOpcode()) { default: break; + case AArch64::PTEST_PP: + SrcReg = MI.getOperand(0).getReg(); + SrcReg2 = MI.getOperand(1).getReg(); + // Not sure about the mask and value for now... + CmpMask = ~0; + CmpValue = 0; + return true; case AArch64::SUBSWrr: case AArch64::SUBSWrs: case AArch64::SUBSWrx: @@ -1192,10 +1281,9 @@ static bool areCFlagsAccessedBetweenInstrs( return true; // From must be above To. - assert(std::find_if(++To.getReverse(), To->getParent()->rend(), - [From](MachineInstr &MI) { - return MI.getIterator() == From; - }) != To->getParent()->rend()); + assert(std::any_of( + ++To.getReverse(), To->getParent()->rend(), + [From](MachineInstr &MI) { return MI.getIterator() == From; })); // We iterate backward starting at \p To until we hit \p From. for (const MachineInstr &Instr : @@ -1208,6 +1296,127 @@ static bool areCFlagsAccessedBetweenInstrs( return false; } +/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating +/// operation which could set the flags in an identical manner +bool AArch64InstrInfo::optimizePTestInstr( + MachineInstr *PTest, unsigned MaskReg, unsigned PredReg, + const MachineRegisterInfo *MRI) const { + auto *Mask = MRI->getUniqueVRegDef(MaskReg); + auto *Pred = MRI->getUniqueVRegDef(PredReg); + auto NewOp = Pred->getOpcode(); + bool OpChanged = false; + + unsigned MaskOpcode = Mask->getOpcode(); + unsigned PredOpcode = Pred->getOpcode(); + bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode); + bool PredIsWhileLike = isWhileOpcode(PredOpcode); + + if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) { + // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't + // deactivate any lanes OTHER_INST might set. + uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode); + uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); + + // Must be an all active predicate of matching element size. + if ((PredElementSize != MaskElementSize) || + (Mask->getOperand(1).getImm() != 31)) + return false; + + // Fallthough to simply remove the PTEST. + } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) { + // For PTEST(PG, PG), PTEST is redundant when PG is the result of an + // instruction that sets the flags as PTEST would. + + // Fallthough to simply remove the PTEST. + } else if (PredIsPTestLike) { + // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both + // instructions use the same predicate. + auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PTestLikeMask) + return false; + + // Fallthough to simply remove the PTEST. + } else { + switch (Pred->getOpcode()) { + case AArch64::BRKB_PPzP: + case AArch64::BRKPB_PPzPP: { + // Op 0 is chain, 1 is the mask, 2 the previous predicate to + // propagate, 3 the new predicate. + + // Check to see if our mask is the same as the brkpb's. If + // not the resulting flag bits may be different and we + // can't remove the ptest. + auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PredMask) + return false; + + // Switch to the new opcode + NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP + : AArch64::BRKPBS_PPzPP; + OpChanged = true; + break; + } + case AArch64::BRKN_PPzP: { + auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PredMask) + return false; + + NewOp = AArch64::BRKNS_PPzP; + OpChanged = true; + break; + } + default: + // Bail out if we don't recognize the input + return false; + } + } + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // If the predicate is in a different block (possibly because its been + // hoisted out), then assume the flags are set in between statements. + if (Pred->getParent() != PTest->getParent()) + return false; + + // If another instruction between the propagation and test sets the + // flags, don't remove the ptest. + MachineBasicBlock::iterator I = Pred, E = PTest; + ++I; // Skip past the predicate op itself. + for (; I != E; ++I) { + const MachineInstr &Inst = *I; + + // TODO: If the ptest flags are unused, we could still remove it. + if (Inst.modifiesRegister(AArch64::NZCV, TRI)) + return false; + } + + // If we pass all the checks, it's safe to remove the PTEST and use the flags + // as they are prior to PTEST. Sometimes this requires the tested PTEST + // operand to be replaced with an equivalent instruction that also sets the + // flags. + Pred->setDesc(get(NewOp)); + PTest->eraseFromParent(); + if (OpChanged) { + bool succeeded = UpdateOperandRegClass(*Pred); + (void)succeeded; + assert(succeeded && "Operands have incompatible register classes!"); + Pred->addRegisterDefined(AArch64::NZCV, TRI); + } + + // Ensure that the flags def is live. + if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) { + unsigned i = 0, e = Pred->getNumOperands(); + for (; i != e; ++i) { + MachineOperand &MO = Pred->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) { + MO.setIsDead(false); + break; + } + } + } + return true; +} + /// Try to optimize a compare instruction. A compare instruction is an /// instruction which produces AArch64::NZCV. It can be truly compare /// instruction @@ -1246,6 +1455,9 @@ bool AArch64InstrInfo::optimizeCompareInstr( return true; } + if (CmpInstr.getOpcode() == AArch64::PTEST_PP) + return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); + // Continue only if we have a "ri" where immediate is zero. // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare // function. @@ -2062,6 +2274,24 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( return true; } +Optional<ExtAddrMode> +AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, + const TargetRegisterInfo *TRI) const { + const MachineOperand *Base; // Filled with the base operand of MI. + int64_t Offset; // Filled with the offset of MI. + bool OffsetIsScalable; + if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI)) + return None; + + if (!Base->isReg()) + return None; + ExtAddrMode AM; + AM.BaseReg = Base->getReg(); + AM.Displacement = Offset; + AM.ScaledReg = 0; + return AM; +} + bool AArch64InstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, @@ -2744,6 +2974,35 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Copy a Z register pair by copying the individual sub-registers. + if (AArch64::ZPR2RegClass.contains(DestReg) && + AArch64::ZPR2RegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1}; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, + Indices); + return; + } + + // Copy a Z register triple by copying the individual sub-registers. + if (AArch64::ZPR3RegClass.contains(DestReg) && + AArch64::ZPR3RegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, + AArch64::zsub2}; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, + Indices); + return; + } + + // Copy a Z register quad by copying the individual sub-registers. + if (AArch64::ZPR4RegClass.contains(DestReg) && + AArch64::ZPR4RegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, + AArch64::zsub2, AArch64::zsub3}; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, + Indices); + return; + } + if (AArch64::GPR64spRegClass.contains(DestReg) && (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { @@ -3031,7 +3290,7 @@ void AArch64InstrInfo::storeRegToStackSlot( else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_PXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; case 4: @@ -3075,7 +3334,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_ZXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; case 24: @@ -3097,7 +3356,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_ZZXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; case 48: @@ -3108,7 +3367,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_ZZZXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; case 64: @@ -3119,7 +3378,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_ZZZZXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; } @@ -3185,7 +3444,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_PXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; case 4: @@ -3229,7 +3488,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_ZXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; case 24: @@ -3251,7 +3510,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_ZZXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; case 48: @@ -3262,7 +3521,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_ZZZXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; case 64: @@ -3273,7 +3532,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_ZZZZXI; - StackID = TargetStackID::SVEVector; + StackID = TargetStackID::ScalableVector; } break; } @@ -3300,6 +3559,47 @@ bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, }); } +void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( + const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) { + // The smallest scalable element supported by scaled SVE addressing + // modes are predicates, which are 2 scalable bytes in size. So the scalable + // byte offset must always be a multiple of 2. + assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); + + // VGSized offsets are divided by '2', because the VG register is the + // the number of 64bit granules as opposed to 128bit vector chunks, + // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. + // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. + // VG = n * 2 and the dwarf offset must be VG * 8 bytes. + ByteSized = Offset.getFixed(); + VGSized = Offset.getScalable() / 2; +} + +/// Returns the offset in parts to which this frame offset can be +/// decomposed for the purpose of describing a frame offset. +/// For non-scalable offsets this is simply its byte size. +void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( + const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, + int64_t &NumDataVectors) { + // The smallest scalable element supported by scaled SVE addressing + // modes are predicates, which are 2 scalable bytes in size. So the scalable + // byte offset must always be a multiple of 2. + assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); + + NumBytes = Offset.getFixed(); + NumDataVectors = 0; + NumPredicateVectors = Offset.getScalable() / 2; + // This method is used to get the offsets to adjust the frame offset. + // If the function requires ADDPL to be used and needs more than two ADDPL + // instructions, part of the offset is folded into NumDataVectors so that it + // uses ADDVL for part of it, reducing the number of ADDPL instructions. + if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || + NumPredicateVectors > 62) { + NumDataVectors = NumPredicateVectors / 8; + NumPredicateVectors -= NumDataVectors * 8; + } +} + // Helper function to emit a frame offset adjustment from a given // pointer (SrcReg), stored into DestReg. This function is explicit // in that it requires the opcode. @@ -3409,12 +3709,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, MachineInstr::MIFlag Flag, bool SetNZCV, bool NeedsWinCFI, bool *HasWinCFI) { int64_t Bytes, NumPredicateVectors, NumDataVectors; - Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors); + AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( + Offset, Bytes, NumPredicateVectors, NumDataVectors); // First emit non-scalable frame offsets, or a simple 'mov'. if (Bytes || (!Offset && SrcReg != DestReg)) { - assert((DestReg != AArch64::SP || Bytes % 16 == 0) && - "SP increment/decrement not 16-byte aligned"); + assert((DestReg != AArch64::SP || Bytes % 8 == 0) && + "SP increment/decrement not 8-byte aligned"); unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; if (Bytes < 0) { Bytes = -Bytes; @@ -3669,7 +3970,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, // Construct the complete offset. bool IsMulVL = ScaleValue.isScalable(); unsigned Scale = ScaleValue.getKnownMinSize(); - int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes(); + int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed(); const MachineOperand &ImmOpnd = MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); @@ -3711,11 +4012,9 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, *OutUnscaledOp = *UnscaledOp; if (IsMulVL) - SOffset = StackOffset(Offset, MVT::nxv1i8) + - StackOffset(SOffset.getBytes(), MVT::i8); + SOffset = StackOffset::get(SOffset.getFixed(), Offset); else - SOffset = StackOffset(Offset, MVT::i8) + - StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8); + SOffset = StackOffset::get(Offset, SOffset.getScalable()); return AArch64FrameOffsetCanUpdate | (SOffset ? 0 : AArch64FrameOffsetIsLegal); } @@ -3727,7 +4026,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned ImmIdx = FrameRegIdx + 1; if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { - Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8); + Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), MI.getOperand(0).getReg(), FrameReg, Offset, TII, MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); @@ -3832,7 +4131,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) { return false; } -// FP Opcodes that can be combined with a FMUL +// FP Opcodes that can be combined with a FMUL. static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { default: @@ -3854,8 +4153,12 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; - return (Options.UnsafeFPMath || - Options.AllowFPOpFusion == FPOpFusion::Fast); + // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by + // the target options or if FADD/FSUB has the contract fast-math flag. + return Options.UnsafeFPMath || + Options.AllowFPOpFusion == FPOpFusion::Fast || + Inst.getFlag(MachineInstr::FmContract); + return true; } return false; } @@ -4335,8 +4638,8 @@ bool AArch64InstrInfo::isThroughputPattern( /// pattern evaluator stops checking as soon as it finds a faster sequence. bool AArch64InstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl<MachineCombinerPattern> &Patterns) const { + MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, + bool DoRegPressureReduce) const { // Integer patterns if (getMaddPatterns(Root, Patterns)) return true; @@ -4344,7 +4647,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns( if (getFMAPatterns(Root, Patterns)) return true; - return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, + DoRegPressureReduce); } enum class FMAInstKind { Default, Indexed, Accumulator }; @@ -4567,7 +4871,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - MachineInstr *MUL; + MachineInstr *MUL = nullptr; const TargetRegisterClass *RC; unsigned Opc; switch (Pattern) { @@ -5388,6 +5692,9 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion + // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and + // CodeGen/AArch64/urem-seteq-nonzero.ll. + // assert(MUL && "MUL was never set"); DelInstrs.push_back(MUL); DelInstrs.push_back(&Root); } @@ -5727,84 +6034,20 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b) { - const Function &Fa = a.getMF()->getFunction(); - const Function &Fb = b.getMF()->getFunction(); - - // If none of the functions have the "sign-return-address" attribute their - // signing behaviour is equal - if (!Fa.hasFnAttribute("sign-return-address") && - !Fb.hasFnAttribute("sign-return-address")) { - return true; - } + const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); + const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); - // If both functions have the "sign-return-address" attribute their signing - // behaviour is equal, if the values of the attributes are equal - if (Fa.hasFnAttribute("sign-return-address") && - Fb.hasFnAttribute("sign-return-address")) { - StringRef ScopeA = - Fa.getFnAttribute("sign-return-address").getValueAsString(); - StringRef ScopeB = - Fb.getFnAttribute("sign-return-address").getValueAsString(); - return ScopeA.equals(ScopeB); - } - - // If function B doesn't have the "sign-return-address" attribute but A does, - // the functions' signing behaviour is equal if A's value for - // "sign-return-address" is "none" and vice versa. - if (Fa.hasFnAttribute("sign-return-address")) { - StringRef ScopeA = - Fa.getFnAttribute("sign-return-address").getValueAsString(); - return ScopeA.equals("none"); - } - - if (Fb.hasFnAttribute("sign-return-address")) { - StringRef ScopeB = - Fb.getFnAttribute("sign-return-address").getValueAsString(); - return ScopeB.equals("none"); - } - - llvm_unreachable("Unkown combination of sign-return-address attributes"); + return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) && + MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true); } static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b) { - const Function &Fa = a.getMF()->getFunction(); - const Function &Fb = b.getMF()->getFunction(); + const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); + const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); - // If none of the functions have the "sign-return-address-key" attribute - // their keys are equal - if (!Fa.hasFnAttribute("sign-return-address-key") && - !Fb.hasFnAttribute("sign-return-address-key")) { - return true; - } - - // If both functions have the "sign-return-address-key" attribute their - // keys are equal if the values of "sign-return-address-key" are equal - if (Fa.hasFnAttribute("sign-return-address-key") && - Fb.hasFnAttribute("sign-return-address-key")) { - StringRef KeyA = - Fa.getFnAttribute("sign-return-address-key").getValueAsString(); - StringRef KeyB = - Fb.getFnAttribute("sign-return-address-key").getValueAsString(); - return KeyA.equals(KeyB); - } - - // If B doesn't have the "sign-return-address-key" attribute, both keys are - // equal, if function a has the default key (a_key) - if (Fa.hasFnAttribute("sign-return-address-key")) { - StringRef KeyA = - Fa.getFnAttribute("sign-return-address-key").getValueAsString(); - return KeyA.equals_lower("a_key"); - } - - if (Fb.hasFnAttribute("sign-return-address-key")) { - StringRef KeyB = - Fb.getFnAttribute("sign-return-address-key").getValueAsString(); - return KeyB.equals_lower("a_key"); - } - - llvm_unreachable("Unkown combination of sign-return-address-key attributes"); + return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey(); } static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, @@ -5860,9 +6103,10 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is // necessary. However, at this point we don't know if the outlined function // will have a RET instruction so we assume the worst. - const Function &FCF = FirstCand.getMF()->getFunction(); const TargetRegisterInfo &TRI = getRegisterInfo(); - if (FCF.hasFnAttribute("sign-return-address")) { + if (FirstCand.getMF() + ->getInfo<AArch64FunctionInfo>() + ->shouldSignReturnAddress(true)) { // One PAC and one AUT instructions NumBytesToCreateFrame += 8; @@ -5919,10 +6163,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( return false; }; // Remove candidates with illegal stack modifying instructions - RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), - RepeatedSequenceLocs.end(), - hasIllegalSPModification), - RepeatedSequenceLocs.end()); + llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification); // If the sequence doesn't have enough candidates left, then we're done. if (RepeatedSequenceLocs.size() < 2) @@ -5965,10 +6206,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // Erase every candidate that violates the restrictions above. (It could be // true that we have viable candidates, so it's not worth bailing out in // the case that, say, 1 out of 20 candidates violate the restructions.) - RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), - RepeatedSequenceLocs.end(), - CantGuaranteeValueAcrossCall), - RepeatedSequenceLocs.end()); + llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall); // If the sequence doesn't have enough candidates left, then we're done. if (RepeatedSequenceLocs.size() < 2) @@ -5991,7 +6229,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( NumBytesToCreateFrame += 4; bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { - return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement"); + return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement(); }); // We check to see if CFI Instructions are present, and if they are @@ -6160,6 +6398,60 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( FrameID = MachineOutlinerNoLRSave; } else { SetCandidateCallInfo(MachineOutlinerDefault, 12); + + // Bugzilla ID: 46767 + // TODO: Check if fixing up the stack more than once is safe so we can + // outline these. + // + // An outline resulting in a caller that requires stack fixups at the + // callsite to a callee that also requires stack fixups can happen when + // there are no available registers at the candidate callsite for a + // candidate that itself also has calls. + // + // In other words if function_containing_sequence in the following pseudo + // assembly requires that we save LR at the point of the call, but there + // are no available registers: in this case we save using SP and as a + // result the SP offsets requires stack fixups by multiples of 16. + // + // function_containing_sequence: + // ... + // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N + // call OUTLINED_FUNCTION_N + // restore LR from SP + // ... + // + // OUTLINED_FUNCTION_N: + // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N + // ... + // bl foo + // restore LR from SP + // ret + // + // Because the code to handle more than one stack fixup does not + // currently have the proper checks for legality, these cases will assert + // in the AArch64 MachineOutliner. This is because the code to do this + // needs more hardening, testing, better checks that generated code is + // legal, etc and because it is only verified to handle a single pass of + // stack fixup. + // + // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch + // these cases until they are known to be handled. Bugzilla 46767 is + // referenced in comments at the assert site. + // + // To avoid asserting (or generating non-legal code on noassert builds) + // we remove all candidates which would need more than one stack fixup by + // pruning the cases where the candidate has calls while also having no + // available LR and having no available general purpose registers to copy + // LR to (ie one extra stack save/restore). + // + if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { + erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) { + return (std::any_of( + C.front(), std::next(C.back()), + [](const MachineInstr &MI) { return MI.isCall(); })) && + (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C)); + }); + } } // If we dropped all of the candidates, bail out here. @@ -6528,7 +6820,7 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, // If v8.3a features are available we can replace a RET instruction by // RETAA or RETAB and omit the AUT instructions - if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() && + if (Subtarget.hasPAuth() && MBBAUT != MBB.end() && MBBAUT->getOpcode() == AArch64::RET) { BuildMI(MBB, MBBAUT, DL, TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA @@ -6580,9 +6872,12 @@ void AArch64InstrInfo::buildOutlinedFrame( return MI.isCall() && !MI.isReturn(); }; - if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { + if (llvm::any_of(MBB.instrs(), IsNonTailCall)) { // Fix up the instructions in the range, since we're going to modify the // stack. + + // Bugzilla ID: 46767 + // TODO: Check if fixing up twice is safe so we can outline these. assert(OF.FrameConstructionID != MachineOutlinerDefault && "Can only fix up stack references once"); fixupPostOutline(MBB); @@ -6639,27 +6934,11 @@ void AArch64InstrInfo::buildOutlinedFrame( // If a bunch of candidates reach this point they must agree on their return // address signing. It is therefore enough to just consider the signing // behaviour of one of them - const Function &CF = OF.Candidates.front().getMF()->getFunction(); - bool ShouldSignReturnAddr = false; - if (CF.hasFnAttribute("sign-return-address")) { - StringRef Scope = - CF.getFnAttribute("sign-return-address").getValueAsString(); - if (Scope.equals("all")) - ShouldSignReturnAddr = true; - else if (Scope.equals("non-leaf") && !IsLeafFunction) - ShouldSignReturnAddr = true; - } + const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>(); + bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction); // a_key is the default - bool ShouldSignReturnAddrWithAKey = true; - if (CF.hasFnAttribute("sign-return-address-key")) { - const StringRef Key = - CF.getFnAttribute("sign-return-address-key").getValueAsString(); - // Key can either be a_key or b_key - assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) && - "Return address signing key must be either a_key or b_key"); - ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key"); - } + bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey(); // If this is a tail call outlined function, then there's already a return. if (OF.FrameConstructionID == MachineOutlinerTailCall || @@ -6818,10 +7097,9 @@ Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() || !MI.getOperand(2).isImm()) return None; - Offset = MI.getOperand(2).getImm() * Sign; int Shift = MI.getOperand(3).getImm(); assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12"); - Offset = Offset << Shift; + Offset = Sign * (MI.getOperand(2).getImm() << Shift); } } return RegImmPair{MI.getOperand(1).getReg(), Offset}; @@ -6897,6 +7175,14 @@ uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { return get(Opc).TSFlags & AArch64::ElementSizeMask; } +bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const { + return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike; +} + +bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const { + return get(Opc).TSFlags & AArch64::InstrFlagIsWhile; +} + unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr()) return AArch64::BLRNoIP; |
