diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 222 |
1 files changed, 106 insertions, 116 deletions
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8a29456430b9..4a19ecd69103 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -98,8 +98,8 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { const TargetRegisterInfo *TRI; const AArch64Subtarget *Subtarget; - // Track which registers have been modified and used. - BitVector ModifiedRegs, UsedRegs; + // Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AAResultsWrapperPass>(); @@ -702,16 +702,17 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR) .add(BaseRegOp) .addImm(OffsetImm) - .setMemRefs(I->mergeMemRefsWith(*MergeMI)); + .setMemRefs(I->mergeMemRefsWith(*MergeMI)) + .setMIFlags(I->mergeFlagsWith(*MergeMI)); (void)MIB; - DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(MergeMI->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n "); + LLVM_DEBUG(I->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(MergeMI->print(dbgs())); + LLVM_DEBUG(dbgs() << " with instruction:\n "); + LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); + LLVM_DEBUG(dbgs() << "\n"); // Erase the old instructions. I->eraseFromParent(); @@ -818,15 +819,17 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, .add(RegOp1) .add(BaseRegOp) .addImm(OffsetImm) - .setMemRefs(I->mergeMemRefsWith(*Paired)); + .setMemRefs(I->mergeMemRefsWith(*Paired)) + .setMIFlags(I->mergeFlagsWith(*Paired)); (void)MIB; - DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(Paired->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); + LLVM_DEBUG( + dbgs() << "Creating pair load/store. Replacing instructions:\n "); + LLVM_DEBUG(I->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(Paired->print(dbgs())); + LLVM_DEBUG(dbgs() << " with instruction:\n "); if (SExtIdx != -1) { // Generate the sign extension for the proper result of the ldp. // I.e., with X1, that would be: @@ -840,8 +843,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); // Update the result of LDP to use the W instead of the X variant. DstMO.setReg(DstRegW); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); + LLVM_DEBUG(dbgs() << "\n"); // Make the machine verifier happy by providing a definition for // the X register. // Insert this definition right after the generated LDP, i.e., before @@ -858,12 +861,12 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, .addImm(0) .addImm(31); (void)MIBSXTW; - DEBUG(dbgs() << " Extend operand:\n "); - DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); + LLVM_DEBUG(dbgs() << " Extend operand:\n "); + LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); } else { - DEBUG(((MachineInstr *)MIB)->print(dbgs())); + LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); } - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); // Erase the old instructions. I->eraseFromParent(); @@ -901,9 +904,9 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, break; } } - DEBUG(dbgs() << "Remove load instruction:\n "); - DEBUG(LoadI->print(dbgs())); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Remove load instruction:\n "); + LLVM_DEBUG(LoadI->print(dbgs())); + LLVM_DEBUG(dbgs() << "\n"); LoadI->eraseFromParent(); return NextI; } @@ -913,7 +916,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) .add(StMO) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) + .setMIFlags(LoadI->getFlags()); } else { // FIXME: Currently we disable this transformation in big-endian targets as // performance and correctness are verified only in little-endian. @@ -954,7 +958,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), DestReg) .add(StMO) - .addImm(AndMaskEncoded); + .addImm(AndMaskEncoded) + .setMIFlags(LoadI->getFlags()); } else { BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), @@ -962,7 +967,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, DestReg) .add(StMO) .addImm(Immr) - .addImm(Imms); + .addImm(Imms) + .setMIFlags(LoadI->getFlags()); } } @@ -974,48 +980,21 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, break; } - DEBUG(dbgs() << "Promoting load by replacing :\n "); - DEBUG(StoreI->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(LoadI->print(dbgs())); - DEBUG(dbgs() << " with instructions:\n "); - DEBUG(StoreI->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG((BitExtMI)->print(dbgs())); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n "); + LLVM_DEBUG(StoreI->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(LoadI->print(dbgs())); + LLVM_DEBUG(dbgs() << " with instructions:\n "); + LLVM_DEBUG(StoreI->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG((BitExtMI)->print(dbgs())); + LLVM_DEBUG(dbgs() << "\n"); // Erase the old instructions. LoadI->eraseFromParent(); return NextI; } -/// trackRegDefsUses - Remember what registers the specified instruction uses -/// and modifies. -static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, - BitVector &UsedRegs, - const TargetRegisterInfo *TRI) { - for (const MachineOperand &MO : MI.operands()) { - if (MO.isRegMask()) - ModifiedRegs.setBitsNotInMask(MO.getRegMask()); - - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (MO.isDef()) { - // WZR/XZR are not modified even when used as a destination register. - if (Reg != AArch64::WZR && Reg != AArch64::XZR) - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - ModifiedRegs.set(*AI); - } else { - assert(MO.isUse() && "Reg operand not a def and not a use?!?"); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedRegs.set(*AI); - } - } -} - static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { // Convert the byte-offset used by unscaled into an "element" offset used // by the scaled pair load/store instructions. @@ -1073,10 +1052,10 @@ bool AArch64LoadStoreOpt::findMatchingStore( if (MBBI == B) return false; - // Track which registers have been modified and used between the first insn - // and the second insn. - ModifiedRegs.reset(); - UsedRegs.reset(); + // Track which register units have been modified and used between the first + // insn and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); unsigned Count = 0; do { @@ -1095,7 +1074,7 @@ bool AArch64LoadStoreOpt::findMatchingStore( if (MI.mayStore() && isMatchingStore(LoadMI, MI) && BaseReg == getLdStBaseOp(MI).getReg() && isLdOffsetInRangeOfSt(LoadMI, MI, TII) && - !ModifiedRegs[getLdStRegOp(MI).getReg()]) { + ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) { StoreI = MBBI; return true; } @@ -1103,12 +1082,12 @@ bool AArch64LoadStoreOpt::findMatchingStore( if (MI.isCall()) return false; - // Update modified / uses register lists. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + // Update modified / uses register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. - if (ModifiedRegs[BaseReg]) + if (!ModifiedRegUnits.available(BaseReg)) return false; // If we encounter a store aliased with the load, return early. @@ -1186,10 +1165,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); - // Track which registers have been modified and used between the first insn - // (inclusive) and the second insn. - ModifiedRegs.reset(); - UsedRegs.reset(); + // Track which register units have been modified and used between the first + // insn (inclusive) and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); // Remember any instructions that read/write memory between FirstMI and MI. SmallVector<MachineInstr *, 4> MemInsns; @@ -1224,7 +1203,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the unscaled offset isn't a multiple of the MemSize, we can't // pair the operations together: bail and keep looking. if (MIOffset % MemSize) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, + UsedRegUnits, TRI); MemInsns.push_back(&MI); continue; } @@ -1244,7 +1224,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // the stored value is the same (i.e., WZR). if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) || (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, + UsedRegUnits, TRI); MemInsns.push_back(&MI); continue; } @@ -1254,7 +1235,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // immediate offset of merging these instructions is out of range for // a pairwise instruction, bail and keep looking. if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, + UsedRegUnits, TRI); MemInsns.push_back(&MI); continue; } @@ -1262,7 +1244,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // can't express the offset of the unscaled input, bail and keep // looking. if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, + UsedRegUnits, TRI); MemInsns.push_back(&MI); continue; } @@ -1271,7 +1254,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); MemInsns.push_back(&MI); continue; } @@ -1280,8 +1264,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // the two instructions and none of the instructions between the second // and first alias with the second, we can combine the second into the // first. - if (!ModifiedRegs[getLdStRegOp(MI).getReg()] && - !(MI.mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) && + if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) && + !(MI.mayLoad() && + !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && !mayAlias(MI, MemInsns, AA)) { Flags.setMergeForward(false); return MBBI; @@ -1291,8 +1276,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] && - !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) && + if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg()) && + !(MayLoad && + !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) && !mayAlias(FirstMI, MemInsns, AA)) { Flags.setMergeForward(true); return MBBI; @@ -1307,12 +1293,12 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, if (MI.isCall()) return E; - // Update modified / uses register lists. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + // Update modified / uses register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. - if (ModifiedRegs[BaseReg]) + if (!ModifiedRegUnits.available(BaseReg)) return E; // Update list of instructions that read/write memory. @@ -1352,7 +1338,8 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, .add(getLdStRegOp(*I)) .add(getLdStBaseOp(*I)) .addImm(Value) - .setMemRefs(I->memoperands_begin(), I->memoperands_end()); + .setMemRefs(I->memoperands_begin(), I->memoperands_end()) + .setMIFlags(I->mergeFlagsWith(*Update)); } else { // Paired instruction. int Scale = getMemScale(*I); @@ -1362,24 +1349,25 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, .add(getLdStRegOp(*I, 1)) .add(getLdStBaseOp(*I)) .addImm(Value / Scale) - .setMemRefs(I->memoperands_begin(), I->memoperands_end()); + .setMemRefs(I->memoperands_begin(), I->memoperands_end()) + .setMIFlags(I->mergeFlagsWith(*Update)); } (void)MIB; if (IsPreIdx) { ++NumPreFolded; - DEBUG(dbgs() << "Creating pre-indexed load/store."); + LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store."); } else { ++NumPostFolded; - DEBUG(dbgs() << "Creating post-indexed load/store."); + LLVM_DEBUG(dbgs() << "Creating post-indexed load/store."); } - DEBUG(dbgs() << " Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(Update->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << " Replacing instructions:\n "); + LLVM_DEBUG(I->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(Update->print(dbgs())); + LLVM_DEBUG(dbgs() << " with instruction:\n "); + LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); + LLVM_DEBUG(dbgs() << "\n"); // Erase the old instructions for the block. I->eraseFromParent(); @@ -1466,10 +1454,10 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( return E; } - // Track which registers have been modified and used between the first insn - // (inclusive) and the second insn. - ModifiedRegs.reset(); - UsedRegs.reset(); + // Track which register units have been modified and used between the first + // insn (inclusive) and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); ++MBBI; for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr &MI = *MBBI; @@ -1484,11 +1472,12 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( return MBBI; // Update the status of what the instruction clobbered and used. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); // Otherwise, if the base register is used or modified, we have no match, so // return early. - if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) + if (!ModifiedRegUnits.available(BaseReg) || + !UsedRegUnits.available(BaseReg)) return E; } return E; @@ -1517,10 +1506,10 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; } - // Track which registers have been modified and used between the first insn - // (inclusive) and the second insn. - ModifiedRegs.reset(); - UsedRegs.reset(); + // Track which register units have been modified and used between the first + // insn (inclusive) and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); unsigned Count = 0; do { --MBBI; @@ -1536,11 +1525,12 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return MBBI; // Update the status of what the instruction clobbered and used. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); // Otherwise, if the base register is used or modified, we have no match, so // return early. - if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) + if (!ModifiedRegUnits.available(BaseReg) || + !UsedRegUnits.available(BaseReg)) return E; } while (MBBI != B && Count < Limit); return E; @@ -1767,11 +1757,11 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { TRI = Subtarget->getRegisterInfo(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - // Resize the modified and used register bitfield trackers. We do this once - // per function and then clear the bitfield each time we optimize a load or - // store. - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); + // Resize the modified and used register unit trackers. We do this once + // per function and then clear the register units each time we optimize a load + // or store. + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); bool Modified = false; bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign(); |