diff options
Diffstat (limited to 'llvm/lib/Target/ARM/Thumb2SizeReduction.cpp')
| -rw-r--r-- | llvm/lib/Target/ARM/Thumb2SizeReduction.cpp | 1151 |
1 files changed, 1151 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp new file mode 100644 index 000000000000..c5a62aa33990 --- /dev/null +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -0,0 +1,1151 @@ +//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "Thumb2InstrInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <iterator> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "t2-reduce-size" +#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass" + +STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); +STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); +STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); + +static cl::opt<int> ReduceLimit("t2-reduce-limit", + cl::init(-1), cl::Hidden); +static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", + cl::init(-1), cl::Hidden); +static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", + cl::init(-1), cl::Hidden); + +namespace { + + /// ReduceTable - A static table with information on mapping from wide + /// opcodes to narrow + struct ReduceEntry { + uint16_t WideOpc; // Wide opcode + uint16_t NarrowOpc1; // Narrow opcode to transform to + uint16_t NarrowOpc2; // Narrow opcode when it's two-address + uint8_t Imm1Limit; // Limit of immediate field (bits) + uint8_t Imm2Limit; // Limit of immediate field when it's two-address + unsigned LowRegs1 : 1; // Only possible if low-registers are used + unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) + unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. + // 1 - No cc field. + // 2 - Always set CPSR. + unsigned PredCC2 : 2; + unsigned PartFlag : 1; // 16-bit instruction does partial flag update + unsigned Special : 1; // Needs to be dealt with specially + unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) + }; + + static const ReduceEntry ReduceTable[] = { + // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM + { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, + { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, + { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, + { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, + { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, + //FIXME: Disable CMN, as CCodes are backwards from compare expectations + //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, + { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, + // FIXME: adr.n immediate offset must be multiple of 4. + //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 }, + // FIXME: Do we need the 16-bit 'S' variant? + { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, + { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, + { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, + { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, + { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, + { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, + { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + + // FIXME: Clean this up after splitting each Thumb load / store opcode + // into multiple ones. + { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, + + { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, + // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent. + // tSTMIA_UPD is a change in semantics which can only be used if the base + // register is killed. This difference is correctly handled elsewhere. + { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } + }; + + class Thumb2SizeReduce : public MachineFunctionPass { + public: + static char ID; + + const Thumb2InstrInfo *TII; + const ARMSubtarget *STI; + + Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr); + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return THUMB2_SIZE_REDUCE_NAME; + } + + private: + /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. + DenseMap<unsigned, unsigned> ReduceOpcodeMap; + + bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop); + + bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, + bool is2Addr, ARMCC::CondCodes Pred, + bool LiveCPSR, bool &HasCC, bool &CCDead); + + bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry); + + bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); + + /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address + /// instruction. + bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, bool LiveCPSR, + bool IsSelfLoop); + + /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit + /// non-two-address instruction. + bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, bool LiveCPSR, + bool IsSelfLoop); + + /// ReduceMI - Attempt to reduce MI, return true on success. + bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, + bool LiveCPSR, bool IsSelfLoop); + + /// ReduceMBB - Reduce width of instructions in the specified basic block. + bool ReduceMBB(MachineBasicBlock &MBB); + + bool OptimizeSize; + bool MinimizeSize; + + // Last instruction to define CPSR in the current block. + MachineInstr *CPSRDef; + // Was CPSR last defined by a high latency instruction? + // When CPSRDef is null, this refers to CPSR defs in predecessors. + bool HighLatencyCPSR; + + struct MBBInfo { + // The flags leaving this block have high latency. + bool HighLatencyCPSR = false; + // Has this block been visited yet? + bool Visited = false; + + MBBInfo() = default; + }; + + SmallVector<MBBInfo, 8> BlockInfo; + + std::function<bool(const Function &)> PredicateFtor; + }; + + char Thumb2SizeReduce::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false, + false) + +Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) + : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { + OptimizeSize = MinimizeSize = false; + for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { + unsigned FromOpc = ReduceTable[i].WideOpc; + if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) + llvm_unreachable("Duplicated entries?"); + } +} + +static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { + for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) + if (*Regs == ARM::CPSR) + return true; + return false; +} + +// Check for a likely high-latency flag def. +static bool isHighLatencyCPSR(MachineInstr *Def) { + switch(Def->getOpcode()) { + case ARM::FMSTAT: + case ARM::tMUL: + return true; + } + return false; +} + +/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, +/// the 's' 16-bit instruction partially update CPSR. Abort the +/// transformation to avoid adding false dependency on last CPSR setting +/// instruction which hurts the ability for out-of-order execution engine +/// to do register renaming magic. +/// This function checks if there is a read-of-write dependency between the +/// last instruction that defines the CPSR and the current instruction. If there +/// is, then there is no harm done since the instruction cannot be retired +/// before the CPSR setting instruction anyway. +/// Note, we are not doing full dependency analysis here for the sake of compile +/// time. We're not looking for cases like: +/// r0 = muls ... +/// r1 = add.w r0, ... +/// ... +/// = mul.w r1 +/// In this case it would have been ok to narrow the mul.w to muls since there +/// are indirect RAW dependency between the muls and the mul.w +bool +Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { + // Disable the check for -Oz (aka OptimizeForSizeHarder). + if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) + return false; + + if (!CPSRDef) + // If this BB loops back to itself, conservatively avoid narrowing the + // first instruction that does partial flag update. + return HighLatencyCPSR || FirstInSelfLoop; + + SmallSet<unsigned, 2> Defs; + for (const MachineOperand &MO : CPSRDef->operands()) { + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + Register Reg = MO.getReg(); + if (Reg == 0 || Reg == ARM::CPSR) + continue; + Defs.insert(Reg); + } + + for (const MachineOperand &MO : Use->operands()) { + if (!MO.isReg() || MO.isUndef() || MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (Defs.count(Reg)) + return false; + } + + // If the current CPSR has high latency, try to avoid the false dependency. + if (HighLatencyCPSR) + return true; + + // tMOVi8 usually doesn't start long dependency chains, and there are a lot + // of them, so always shrink them when CPSR doesn't have high latency. + if (Use->getOpcode() == ARM::t2MOVi || + Use->getOpcode() == ARM::t2MOVi16) + return false; + + // No read-after-write dependency. The narrowing will add false dependency. + return true; +} + +bool +Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, + bool is2Addr, ARMCC::CondCodes Pred, + bool LiveCPSR, bool &HasCC, bool &CCDead) { + if ((is2Addr && Entry.PredCC2 == 0) || + (!is2Addr && Entry.PredCC1 == 0)) { + if (Pred == ARMCC::AL) { + // Not predicated, must set CPSR. + if (!HasCC) { + // Original instruction was not setting CPSR, but CPSR is not + // currently live anyway. It's ok to set it. The CPSR def is + // dead though. + if (!LiveCPSR) { + HasCC = true; + CCDead = true; + return true; + } + return false; + } + } else { + // Predicated, must not set CPSR. + if (HasCC) + return false; + } + } else if ((is2Addr && Entry.PredCC2 == 2) || + (!is2Addr && Entry.PredCC1 == 2)) { + /// Old opcode has an optional def of CPSR. + if (HasCC) + return true; + // If old opcode does not implicitly define CPSR, then it's not ok since + // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. + if (!HasImplicitCPSRDef(MI->getDesc())) + return false; + HasCC = true; + } else { + // 16-bit instruction does not set CPSR. + if (HasCC) + return false; + } + + return true; +} + +static bool VerifyLowRegs(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD); + bool isLROk = (Opc == ARM::t2STMDB_UPD); + bool isSPOk = isPCOk || isLROk; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + Register Reg = MO.getReg(); + if (Reg == 0 || Reg == ARM::CPSR) + continue; + if (isPCOk && Reg == ARM::PC) + continue; + if (isLROk && Reg == ARM::LR) + continue; + if (Reg == ARM::SP) { + if (isSPOk) + continue; + if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) + // Special case for these ldr / str with sp as base register. + continue; + } + if (!isARMLowRegister(Reg)) + return false; + } + return true; +} + +bool +Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry) { + if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) + return false; + + unsigned Scale = 1; + bool HasImmOffset = false; + bool HasShift = false; + bool HasOffReg = true; + bool isLdStMul = false; + unsigned Opc = Entry.NarrowOpc1; + unsigned OpNum = 3; // First 'rest' of operands. + uint8_t ImmLimit = Entry.Imm1Limit; + + switch (Entry.WideOpc) { + default: + llvm_unreachable("Unexpected Thumb2 load / store opcode!"); + case ARM::t2LDRi12: + case ARM::t2STRi12: + if (MI->getOperand(1).getReg() == ARM::SP) { + Opc = Entry.NarrowOpc2; + ImmLimit = Entry.Imm2Limit; + } + + Scale = 4; + HasImmOffset = true; + HasOffReg = false; + break; + case ARM::t2LDRBi12: + case ARM::t2STRBi12: + HasImmOffset = true; + HasOffReg = false; + break; + case ARM::t2LDRHi12: + case ARM::t2STRHi12: + Scale = 2; + HasImmOffset = true; + HasOffReg = false; + break; + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSBs: + case ARM::t2LDRSHs: + case ARM::t2STRs: + case ARM::t2STRBs: + case ARM::t2STRHs: + HasShift = true; + OpNum = 4; + break; + case ARM::t2LDR_POST: + case ARM::t2STR_POST: { + if (!MinimizeSize) + return false; + + if (!MI->hasOneMemOperand() || + (*MI->memoperands_begin())->getAlignment() < 4) + return false; + + // We're creating a completely different type of load/store - LDM from LDR. + // For this reason we can't reuse the logic at the end of this function; we + // have to implement the MI building here. + bool IsStore = Entry.WideOpc == ARM::t2STR_POST; + Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); + Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); + unsigned Offset = MI->getOperand(3).getImm(); + unsigned PredImm = MI->getOperand(4).getImm(); + Register PredReg = MI->getOperand(5).getReg(); + assert(isARMLowRegister(Rt)); + assert(isARMLowRegister(Rn)); + + if (Offset != 4) + return false; + + // Add the 16-bit load / store instruction. + DebugLoc dl = MI->getDebugLoc(); + auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1)) + .addReg(Rn, RegState::Define) + .addReg(Rn) + .addImm(PredImm) + .addReg(PredReg) + .addReg(Rt, IsStore ? 0 : RegState::Define); + + // Transfer memoperands. + MIB.setMemRefs(MI->memoperands()); + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + + // Kill the old instruction. + MI->eraseFromBundle(); + ++NumLdSts; + return true; + } + case ARM::t2LDMIA: { + Register BaseReg = MI->getOperand(0).getReg(); + assert(isARMLowRegister(BaseReg)); + + // For the non-writeback version (this one), the base register must be + // one of the registers being loaded. + bool isOK = false; + for (unsigned i = 3; i < MI->getNumOperands(); ++i) { + if (MI->getOperand(i).getReg() == BaseReg) { + isOK = true; + break; + } + } + + if (!isOK) + return false; + + OpNum = 0; + isLdStMul = true; + break; + } + case ARM::t2STMIA: + // If the base register is killed, we don't care what its value is after the + // instruction, so we can use an updating STMIA. + if (!MI->getOperand(0).isKill()) + return false; + + break; + case ARM::t2LDMIA_RET: { + Register BaseReg = MI->getOperand(1).getReg(); + if (BaseReg != ARM::SP) + return false; + Opc = Entry.NarrowOpc2; // tPOP_RET + OpNum = 2; + isLdStMul = true; + break; + } + case ARM::t2LDMIA_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: { + OpNum = 0; + + Register BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == ARM::SP && + (Entry.WideOpc == ARM::t2LDMIA_UPD || + Entry.WideOpc == ARM::t2STMDB_UPD)) { + Opc = Entry.NarrowOpc2; // tPOP or tPUSH + OpNum = 2; + } else if (!isARMLowRegister(BaseReg) || + (Entry.WideOpc != ARM::t2LDMIA_UPD && + Entry.WideOpc != ARM::t2STMIA_UPD)) { + return false; + } + + isLdStMul = true; + break; + } + } + + unsigned OffsetReg = 0; + bool OffsetKill = false; + bool OffsetInternal = false; + if (HasShift) { + OffsetReg = MI->getOperand(2).getReg(); + OffsetKill = MI->getOperand(2).isKill(); + OffsetInternal = MI->getOperand(2).isInternalRead(); + + if (MI->getOperand(3).getImm()) + // Thumb1 addressing mode doesn't support shift. + return false; + } + + unsigned OffsetImm = 0; + if (HasImmOffset) { + OffsetImm = MI->getOperand(2).getImm(); + unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; + + if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) + // Make sure the immediate field fits. + return false; + } + + // Add the 16-bit load / store instruction. + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); + + // tSTMIA_UPD takes a defining register operand. We've already checked that + // the register is killed, so mark it as dead here. + if (Entry.WideOpc == ARM::t2STMIA) + MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); + + if (!isLdStMul) { + MIB.add(MI->getOperand(0)); + MIB.add(MI->getOperand(1)); + + if (HasImmOffset) + MIB.addImm(OffsetImm / Scale); + + assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); + + if (HasOffReg) + MIB.addReg(OffsetReg, getKillRegState(OffsetKill) | + getInternalReadRegState(OffsetInternal)); + } + + // Transfer the rest of operands. + for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) + MIB.add(MI->getOperand(OpNum)); + + // Transfer memoperands. + MIB.setMemRefs(MI->memoperands()); + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + + LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + << " to 16-bit: " << *MIB); + + MBB.erase_instr(MI); + ++NumLdSts; + return true; +} + +bool +Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR, bool IsSelfLoop) { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::t2ADDri) { + // If the source register is SP, try to reduce to tADDrSPi, otherwise + // it's a normal reduce. + if (MI->getOperand(1).getReg() != ARM::SP) { + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) + return true; + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); + } + // Try to reduce to tADDrSPi. + unsigned Imm = MI->getOperand(2).getImm(); + // The immediate must be in range, the destination register must be a low + // reg, the predicate must be "always" and the condition flags must not + // be being set. + if (Imm & 3 || Imm > 1020) + return false; + if (!isARMLowRegister(MI->getOperand(0).getReg())) + return false; + if (MI->getOperand(3).getImm() != ARMCC::AL) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.hasOptionalDef() && + MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) + return false; + + MachineInstrBuilder MIB = + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(ARM::tADDrSPi)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(Imm / 4) // The tADDrSPi has an implied scale by four. + .add(predOps(ARMCC::AL)); + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + + LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + << " to 16-bit: " << *MIB); + + MBB.erase_instr(MI); + ++NumNarrows; + return true; + } + + if (Entry.LowRegs1 && !VerifyLowRegs(MI)) + return false; + + if (MI->mayLoadOrStore()) + return ReduceLoadStore(MBB, MI, Entry); + + switch (Opc) { + default: break; + case ARM::t2ADDSri: + case ARM::t2ADDSrr: { + unsigned PredReg = 0; + if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) { + switch (Opc) { + default: break; + case ARM::t2ADDSri: + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) + return true; + LLVM_FALLTHROUGH; + case ARM::t2ADDSrr: + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); + } + } + break; + } + case ARM::t2RSBri: + case ARM::t2RSBSri: + case ARM::t2SXTB: + case ARM::t2SXTH: + case ARM::t2UXTB: + case ARM::t2UXTH: + if (MI->getOperand(2).getImm() == 0) + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); + break; + case ARM::t2MOVi16: + // Can convert only 'pure' immediate operands, not immediates obtained as + // globals' addresses. + if (MI->getOperand(1).isImm()) + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); + break; + case ARM::t2CMPrr: { + // Try to reduce to the lo-reg only version first. Why there are two + // versions of the instruction is a mystery. + // It would be nice to just have two entries in the master table that + // are prioritized, but the table assumes a unique entry for each + // source insn opcode. So for now, we hack a local entry record to use. + static const ReduceEntry NarrowEntry = + { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; + if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) + return true; + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); + } + case ARM::t2TEQrr: { + unsigned PredReg = 0; + // Can only convert to eors if we're not in an IT block. + if (getInstrPredicate(*MI, PredReg) != ARMCC::AL) + break; + // TODO if Operand 0 is not killed but Operand 1 is, then we could write + // to Op1 instead. + if (MI->getOperand(0).isKill()) + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); + } + } + return false; +} + +bool +Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR, bool IsSelfLoop) { + if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) + return false; + + if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) + // Don't issue movs with shifter operand for some CPUs unless we + // are optimizing for size. + return false; + + Register Reg0 = MI->getOperand(0).getReg(); + Register Reg1 = MI->getOperand(1).getReg(); + // t2MUL is "special". The tied source operand is second, not first. + if (MI->getOpcode() == ARM::t2MUL) { + Register Reg2 = MI->getOperand(2).getReg(); + // Early exit if the regs aren't all low regs. + if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) + || !isARMLowRegister(Reg2)) + return false; + if (Reg0 != Reg2) { + // If the other operand also isn't the same as the destination, we + // can't reduce. + if (Reg1 != Reg0) + return false; + // Try to commute the operands to make it a 2-address instruction. + MachineInstr *CommutedMI = TII->commuteInstruction(*MI); + if (!CommutedMI) + return false; + } + } else if (Reg0 != Reg1) { + // Try to commute the operands to make it a 2-address instruction. + unsigned CommOpIdx1 = 1; + unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; + if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) || + MI->getOperand(CommOpIdx2).getReg() != Reg0) + return false; + MachineInstr *CommutedMI = + TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2); + if (!CommutedMI) + return false; + } + if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) + return false; + if (Entry.Imm2Limit) { + unsigned Imm = MI->getOperand(2).getImm(); + unsigned Limit = (1 << Entry.Imm2Limit) - 1; + if (Imm > Limit) + return false; + } else { + Register Reg2 = MI->getOperand(2).getReg(); + if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) + return false; + } + + // Check if it's possible / necessary to transfer the predicate. + const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); + bool SkipPred = false; + if (Pred != ARMCC::AL) { + if (!NewMCID.isPredicable()) + // Can't transfer predicate, fail. + return false; + } else { + SkipPred = !NewMCID.isPredicable(); + } + + bool HasCC = false; + bool CCDead = false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.hasOptionalDef()) { + unsigned NumOps = MCID.getNumOperands(); + HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); + if (HasCC && MI->getOperand(NumOps-1).isDead()) + CCDead = true; + } + if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) + return false; + + // Avoid adding a false dependency on partial flag update by some 16-bit + // instructions which has the 's' bit set. + if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && + canAddPseudoFlagDep(MI, IsSelfLoop)) + return false; + + // Add the 16-bit instruction. + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); + MIB.add(MI->getOperand(0)); + if (NewMCID.hasOptionalDef()) + MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); + + // Transfer the rest of operands. + unsigned NumOps = MCID.getNumOperands(); + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { + if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) + continue; + if (SkipPred && MCID.OpInfo[i].isPredicate()) + continue; + MIB.add(MI->getOperand(i)); + } + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + + LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + << " to 16-bit: " << *MIB); + + MBB.erase_instr(MI); + ++Num2Addrs; + return true; +} + +bool +Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR, bool IsSelfLoop) { + if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) + return false; + + if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) + // Don't issue movs with shifter operand for some CPUs unless we + // are optimizing for size. + return false; + + unsigned Limit = ~0U; + if (Entry.Imm1Limit) + Limit = (1 << Entry.Imm1Limit) - 1; + + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { + if (MCID.OpInfo[i].isPredicate()) + continue; + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + Register Reg = MO.getReg(); + if (!Reg || Reg == ARM::CPSR) + continue; + if (Entry.LowRegs1 && !isARMLowRegister(Reg)) + return false; + } else if (MO.isImm() && + !MCID.OpInfo[i].isPredicate()) { + if (((unsigned)MO.getImm()) > Limit) + return false; + } + } + + // Check if it's possible / necessary to transfer the predicate. + const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); + bool SkipPred = false; + if (Pred != ARMCC::AL) { + if (!NewMCID.isPredicable()) + // Can't transfer predicate, fail. + return false; + } else { + SkipPred = !NewMCID.isPredicable(); + } + + bool HasCC = false; + bool CCDead = false; + if (MCID.hasOptionalDef()) { + unsigned NumOps = MCID.getNumOperands(); + HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); + if (HasCC && MI->getOperand(NumOps-1).isDead()) + CCDead = true; + } + if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) + return false; + + // Avoid adding a false dependency on partial flag update by some 16-bit + // instructions which has the 's' bit set. + if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && + canAddPseudoFlagDep(MI, IsSelfLoop)) + return false; + + // Add the 16-bit instruction. + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); + + // TEQ is special in that it doesn't define a register but we're converting + // it into an EOR which does. So add the first operand as a def and then + // again as a use. + if (MCID.getOpcode() == ARM::t2TEQrr) { + MIB.add(MI->getOperand(0)); + MIB->getOperand(0).setIsKill(false); + MIB->getOperand(0).setIsDef(true); + MIB->getOperand(0).setIsDead(true); + + if (NewMCID.hasOptionalDef()) + MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); + MIB.add(MI->getOperand(0)); + } else { + MIB.add(MI->getOperand(0)); + if (NewMCID.hasOptionalDef()) + MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); + } + + // Transfer the rest of operands. + unsigned NumOps = MCID.getNumOperands(); + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { + if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) + continue; + if ((MCID.getOpcode() == ARM::t2RSBSri || + MCID.getOpcode() == ARM::t2RSBri || + MCID.getOpcode() == ARM::t2SXTB || + MCID.getOpcode() == ARM::t2SXTH || + MCID.getOpcode() == ARM::t2UXTB || + MCID.getOpcode() == ARM::t2UXTH) && i == 2) + // Skip the zero immediate operand, it's now implicit. + continue; + bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); + if (SkipPred && isPred) + continue; + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) + // Skip implicit def of CPSR. Either it's modeled as an optional + // def now or it's already an implicit def on the new instruction. + continue; + MIB.add(MO); + } + if (!MCID.isPredicable() && NewMCID.isPredicable()) + MIB.add(predOps(ARMCC::AL)); + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + + LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + << " to 16-bit: " << *MIB); + + MBB.erase_instr(MI); + ++NumNarrows; + return true; +} + +static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { + bool HasDef = false; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + + DefCPSR = true; + if (!MO.isDead()) + HasDef = true; + } + + return HasDef || LiveCPSR; +} + +static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.isUndef() || MO.isDef()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + assert(LiveCPSR && "CPSR liveness tracking is wrong!"); + if (MO.isKill()) { + LiveCPSR = false; + break; + } + } + + return LiveCPSR; +} + +bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, + bool LiveCPSR, bool IsSelfLoop) { + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); + if (OPI == ReduceOpcodeMap.end()) + return false; + const ReduceEntry &Entry = ReduceTable[OPI->second]; + + // Don't attempt normal reductions on "special" cases for now. + if (Entry.Special) + return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop); + + // Try to transform to a 16-bit two-address instruction. + if (Entry.NarrowOpc2 && + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) + return true; + + // Try to transform to a 16-bit non-two-address instruction. + if (Entry.NarrowOpc1 && + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) + return true; + + return false; +} + +bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + // Yes, CPSR could be livein. + bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); + MachineInstr *BundleMI = nullptr; + + CPSRDef = nullptr; + HighLatencyCPSR = false; + + // Check predecessors for the latest CPSRDef. + for (auto *Pred : MBB.predecessors()) { + const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; + if (!PInfo.Visited) { + // Since blocks are visited in RPO, this must be a back-edge. + continue; + } + if (PInfo.HighLatencyCPSR) { + HighLatencyCPSR = true; + break; + } + } + + // If this BB loops back to itself, conservatively avoid narrowing the + // first instruction that does partial flag update. + bool IsSelfLoop = MBB.isSuccessor(&MBB); + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); + MachineBasicBlock::instr_iterator NextMII; + for (; MII != E; MII = NextMII) { + NextMII = std::next(MII); + + MachineInstr *MI = &*MII; + if (MI->isBundle()) { + BundleMI = MI; + continue; + } + if (MI->isDebugInstr()) + continue; + + LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); + + // Does NextMII belong to the same bundle as MI? + bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); + + if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { + Modified = true; + MachineBasicBlock::instr_iterator I = std::prev(NextMII); + MI = &*I; + // Removing and reinserting the first instruction in a bundle will break + // up the bundle. Fix the bundling if it was broken. + if (NextInSameBundle && !NextMII->isBundledWithPred()) + NextMII->bundleWithPred(); + } + + if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) { + // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill + // marker is only on the BUNDLE instruction. Process the BUNDLE + // instruction as we finish with the bundled instruction to work around + // the inconsistency. + if (BundleMI->killsRegister(ARM::CPSR)) + LiveCPSR = false; + MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); + if (MO && !MO->isDead()) + LiveCPSR = true; + MO = BundleMI->findRegisterUseOperand(ARM::CPSR); + if (MO && !MO->isKill()) + LiveCPSR = true; + } + + bool DefCPSR = false; + LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); + if (MI->isCall()) { + // Calls don't really set CPSR. + CPSRDef = nullptr; + HighLatencyCPSR = false; + IsSelfLoop = false; + } else if (DefCPSR) { + // This is the last CPSR defining instruction. + CPSRDef = MI; + HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); + IsSelfLoop = false; + } + } + + MBBInfo &Info = BlockInfo[MBB.getNumber()]; + Info.HighLatencyCPSR = HighLatencyCPSR; + Info.Visited = true; + return Modified; +} + +bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { + if (PredicateFtor && !PredicateFtor(MF.getFunction())) + return false; + + STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); + if (STI->isThumb1Only() || STI->prefers32BitThumb()) + return false; + + TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); + + // Optimizing / minimizing size? Minimizing size implies optimizing for size. + OptimizeSize = MF.getFunction().hasOptSize(); + MinimizeSize = STI->hasMinSize(); + + BlockInfo.clear(); + BlockInfo.resize(MF.getNumBlockIDs()); + + // Visit blocks in reverse post-order so LastCPSRDef is known for all + // predecessors. + ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); + bool Modified = false; + for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator + I = RPOT.begin(), E = RPOT.end(); I != E; ++I) + Modified |= ReduceMBB(**I); + return Modified; +} + +/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size +/// reduction pass. +FunctionPass *llvm::createThumb2SizeReductionPass( + std::function<bool(const Function &)> Ftor) { + return new Thumb2SizeReduce(std::move(Ftor)); +} |
