diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2013-04-08 18:41:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2013-04-08 18:41:23 +0000 |
commit | 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch) | |
tree | 06099edc18d30894081a822b756f117cbe0b8207 /lib/Target/ARM/ARMLoadStoreOptimizer.cpp | |
parent | 482e7bddf617ae804dc47133cb07eb4aa81e45de (diff) | |
download | src-test2-f7d92b5cb9ddb3fef35c44a9b066c5f16b6a40c8.tar.gz src-test2-f7d92b5cb9ddb3fef35c44a9b066c5f16b6a40c8.zip |
Diffstat (limited to 'lib/Target/ARM/ARMLoadStoreOptimizer.cpp')
-rw-r--r-- | lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 200 |
1 files changed, 175 insertions, 25 deletions
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 0185289f3bd8..b7ac5d57c362 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -18,8 +18,12 @@ #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -27,19 +31,15 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); @@ -87,6 +87,53 @@ namespace { MachineBasicBlock::iterator i) : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; + class UnitRegsMap { + public: + UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {} + const SmallVector<unsigned, 4>& operator[](unsigned Reg) { + DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found = + Cache.find(Reg); + if (found != Cache.end()) + return found->second; + else + return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg))) + .first->second; + } + private: + SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) { + SmallVector<unsigned, 4> Res; + + const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg); + if (TRC == &ARM::QPRRegClass) { + if (Reg > ARM::Q7) { + Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0)); + Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1)); + return Res; + } + + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3)); + + return Res; + } + + if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) { + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); + + return Res; + } + + Res.push_back(Reg); + + return Res; + + } + const TargetRegisterInfo* TRI; + DenseMap<unsigned, SmallVector<unsigned, 4> > Cache; + }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -128,6 +175,11 @@ namespace { MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I); + unsigned AddMemOp(MemOpQueue& MemOps, + const MemOpQueueEntry newEntry, + UnitRegsMap& UnitRegsInfo, + SmallSet<unsigned, 4>& UsedUnitRegs, + unsigned At = -1U); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -865,7 +917,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. - if (isLd && MI->getOperand(0).getReg() == Base) + if (MI->getOperand(0).getReg() == Base) return false; unsigned PredReg = 0; @@ -1188,7 +1240,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddDeadKill = true; } // Never kill the base register in the first instruction. - // <rdar://problem/11101911> if (EvenReg == BaseReg) EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, @@ -1214,12 +1265,103 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; } +/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti. +/// It adds store mem ops with simple push_back/insert method, +/// without any additional logic. +/// For load operation it does the next: +/// 1. Adds new load operation into MemOp collection at "At" position. +/// 2. Removes any "load" operations from MemOps, that changes "Reg" register +/// contents, prior to "At". +/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector > +/// UsedUnitRegs - set of unit-registers currently in use. +/// At - position at which it would added, and prior which the clean-up +/// should be made (for load operation). +/// FIXME: The clean-up also should be made for store operations, +/// but the memory address should be analyzed instead of unit registers. +unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps, + const MemOpQueueEntry NewEntry, + UnitRegsMap& UnitRegsInfo, + SmallSet<unsigned, 4>& UsedUnitRegs, + unsigned At) { + unsigned Cleaned = 0; + + if (At == -1U) { + At = MemOps.size(); + MemOps.push_back(NewEntry); + } else + MemOps.insert(&MemOps[At], NewEntry); + + // FIXME: + // If operation is not load, leave it as is by now, + // So 0 overridden ops would cleaned in this case. + if (!NewEntry.MBBI->mayLoad()) + return 0; + + const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg]; + + bool FoundOverriddenLoads = false; + + for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i) + if (UsedUnitRegs.count(NewEntryUnitRegs[i])) { + FoundOverriddenLoads = true; + break; + } + + // If we detect that this register is used by load operations that are + // predecessors for the new one, remove them from MemOps then. + if (FoundOverriddenLoads) { + MemOpQueue UpdatedMemOps; + + // Scan through MemOps entries. + for (unsigned i = 0; i != At; ++i) { + MemOpQueueEntry& MemOpEntry = MemOps[i]; + + // FIXME: Skip non-load operations by now. + if (!MemOpEntry.MBBI->mayLoad()) + continue; + + const SmallVector<unsigned, 4>& MemOpUnitRegs = + UnitRegsInfo[MemOpEntry.Reg]; + + // Lookup entry that loads contents into register used by new entry. + bool ReleaseThisEntry = false; + for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) { + if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(), + MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) { + ReleaseThisEntry = true; + ++Cleaned; + break; + } + } + + if (ReleaseThisEntry) { + const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg]; + for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r) + UsedUnitRegs.erase(RelesedRegs[r]); + } else + UpdatedMemOps.push_back(MemOpEntry); + } + + // Keep anything without changes after At position. + for (unsigned i = At, e = MemOps.size(); i != e; ++i) + UpdatedMemOps.push_back(MemOps[i]); + + MemOps.swap(UpdatedMemOps); + } + + UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end()); + + return Cleaned; +} + /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned NumMerges = 0; unsigned NumMemOps = 0; MemOpQueue MemOps; + UnitRegsMap UnitRegsInfo(TRI); + SmallSet<unsigned, 4> UsedRegUnits; unsigned CurrBase = 0; int CurrOpc = -1; unsigned CurrSize = 0; @@ -1266,8 +1408,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); ++NumMemOps; + const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg]; + UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end()); Advance = true; } else { if (Clobber) { @@ -1279,20 +1424,24 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; + unsigned OverridesCleaned = + AddMemOp(MemOps, + MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI), + UnitRegsInfo, UsedRegUnits) != 0; + NumMemOps += 1 - OverridesCleaned; Advance = true; } else { - for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); - I != E; ++I) { - if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; + for (unsigned I = 0; I != NumMemOps; ++I) { + if (Offset < MemOps[I].Offset) { + MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI); + unsigned OverridesCleaned = + AddMemOp(MemOps, entry, UnitRegsInfo, + UsedRegUnits, I) != 0; + NumMemOps += 1 - OverridesCleaned; + Advance = true; break; - } else if (Offset == I->Offset) { + } else if (Offset == MemOps[I].Offset) { // Collision! This can't be merged! break; } @@ -1363,6 +1512,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrPredReg = 0; if (NumMemOps) { MemOps.clear(); + UsedRegUnits.clear(); NumMemOps = 0; } @@ -1408,7 +1558,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); PrevMI->setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); - PrevMI->copyImplicitOps(&*MBBI); + PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); MBB.erase(MBBI); return true; } |