aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp14
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp68
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp54
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp9
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp12
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp139
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp3
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp34
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp28
-rw-r--r--lib/CodeGen/IfConversion.cpp13
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp39
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp2
-rw-r--r--lib/CodeGen/LiveDebugVariables.h2
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp6
-rw-r--r--lib/CodeGen/MachineInstr.cpp18
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp1
-rw-r--r--lib/CodeGen/MachineScheduler.cpp172
-rw-r--r--lib/CodeGen/MacroFusion.cpp150
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp428
-rw-r--r--lib/CodeGen/RegisterUsageInfo.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt1
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp256
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp29
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp78
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp95
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp24
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp20
-rw-r--r--lib/CodeGen/TargetSubtargetInfo.cpp14
33 files changed, 1147 insertions, 624 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index ad348d723bae..c48fcaa7b0d1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2801,26 +2801,24 @@ void AsmPrinter::emitXRayTable() {
}
// Before we switch over, we force a reference to a label inside the
- // xray_instr_map and xray_fn_idx sections. Since this function is always
- // called just before the function's end, we assume that this is happening
- // after the last return instruction. We also use the synthetic label in the
- // xray_inster_map as a delimeter for the range of sleds for this function in
- // the index.
+ // xray_fn_idx sections. This makes sure that the xray_fn_idx section is kept
+ // live by the linker if the function is not garbage-collected. Since this
+ // function is always called just before the function's end, we assume that
+ // this is happening after the last return instruction.
auto WordSizeBytes = MAI->getCodePointerSize();
- MCSymbol *SledsStart = OutContext.createTempSymbol("xray_synthetic_", true);
MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true);
OutStreamer->EmitCodeAlignment(16);
- OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false);
OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false);
// Now we switch to the instrumentation map section. Because this is done
// per-function, we are able to create an index entry that will represent the
// range of sleds associated with a function.
+ MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true);
OutStreamer->SwitchSection(InstMap);
OutStreamer->EmitLabel(SledsStart);
for (const auto &Sled : Sleds)
Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
- MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_synthetic_end", true);
+ MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true);
OutStreamer->EmitLabel(SledsEnd);
// We then emit a single entry in the index per function. We use the symbols
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 30bfd7c94e68..886e6e264b3e 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -105,7 +105,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
}
LLVM_DUMP_METHOD
-void DIEAbbrev::print(raw_ostream &O) {
+void DIEAbbrev::print(raw_ostream &O) const {
O << "Abbreviation @"
<< format("0x%lx", (long)(intptr_t)this)
<< " "
@@ -128,7 +128,7 @@ void DIEAbbrev::print(raw_ostream &O) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void DIEAbbrev::dump() {
+LLVM_DUMP_METHOD void DIEAbbrev::dump() const {
print(dbgs());
}
#endif
@@ -268,7 +268,7 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void DIE::dump() {
+LLVM_DUMP_METHOD void DIE::dump() const {
print(dbgs());
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 75eb355bfb54..f1b4d9f20ca9 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -972,16 +972,62 @@ DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU,
return ConcreteVariables.back().get();
}
-// Determine whether this DBG_VALUE is valid at the beginning of the function.
-static bool validAtEntry(const MachineInstr *MInsn) {
- auto MBB = MInsn->getParent();
- // Is it in the entry basic block?
- if (!MBB->pred_empty())
+/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
+/// enclosing lexical scope. The check ensures there are no other instructions
+/// in the same lexical scope preceding the DBG_VALUE and that its range is
+/// either open or otherwise rolls off the end of the scope.
+static bool validThroughout(LexicalScopes &LScopes,
+ const MachineInstr *DbgValue,
+ const MachineInstr *RangeEnd) {
+ assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
+ auto MBB = DbgValue->getParent();
+ auto DL = DbgValue->getDebugLoc();
+ auto *LScope = LScopes.findLexicalScope(DL);
+ // Scope doesn't exist; this is a dead DBG_VALUE.
+ if (!LScope)
return false;
- for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I)
- if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup)))
+ auto &LSRange = LScope->getRanges();
+ if (LSRange.size() == 0)
+ return false;
+
+ // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
+ const MachineInstr *LScopeBegin = LSRange.front().first;
+ // Early exit if the lexical scope begins outside of the current block.
+ if (LScopeBegin->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
+ for (++Pred; Pred != MBB->rend(); ++Pred) {
+ if (Pred->getFlag(MachineInstr::FrameSetup))
+ break;
+ auto PredDL = Pred->getDebugLoc();
+ if (!PredDL || Pred->isMetaInstruction())
+ continue;
+ // Check whether the instruction preceding the DBG_VALUE is in the same
+ // (sub)scope as the DBG_VALUE.
+ if (DL->getScope() == PredDL->getScope())
return false;
- return true;
+ auto *PredScope = LScopes.findLexicalScope(PredDL);
+ if (!PredScope || LScope->dominates(PredScope))
+ return false;
+ }
+
+ // If the range of the DBG_VALUE is open-ended, report success.
+ if (!RangeEnd)
+ return true;
+
+ // Fail if there are instructions belonging to our scope in another block.
+ const MachineInstr *LScopeEnd = LSRange.back().second;
+ if (LScopeEnd->getParent() != MBB)
+ return false;
+
+ // Single, constant DBG_VALUEs in the prologue are promoted to be live
+ // throughout the function. This is a hack, presumably for DWARF v2 and not
+ // necessarily correct. It would be much better to use a dbg.declare instead
+ // if we know the constant is live throughout the scope.
+ if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
+ return true;
+
+ return false;
}
// Find variables for each lexical scope.
@@ -1016,11 +1062,9 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
- // Check if there is a single DBG_VALUE, valid throughout the function.
- // A single constant is also considered valid for the entire function.
+ // Check if there is a single DBG_VALUE, valid throughout the var's scope.
if (Ranges.size() == 1 &&
- (MInsn->getOperand(0).isImm() ||
- (validAtEntry(MInsn) && Ranges.front().second == nullptr))) {
+ validThroughout(LScopes, MInsn, Ranges.front().second)) {
RegVar->initializeDbgValue(MInsn);
continue;
}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 55a27e2fb79e..7f3c6da91268 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_library(LLVMCodeGen
PatchableFunction.cpp
MIRPrinter.cpp
MIRPrintingPass.cpp
+ MacroFusion.cpp
OptimizePHIs.cpp
ParallelCG.cpp
PeepholeOptimizer.cpp
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 37e176099ea7..cb31c21293f4 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1663,17 +1663,18 @@ class MemCmpExpansion {
bool IsUsedForZeroCmp;
const DataLayout &DL;
- int calculateNumBlocks(unsigned Size);
+ unsigned calculateNumBlocks(unsigned Size);
void createLoadCmpBlocks();
void createResultBlock();
void setupResultBlockPHINodes();
void setupEndBlockPHINodes();
- void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex);
+ void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
+ unsigned GEPIndex);
Value *getCompareLoadPairs(unsigned Index, unsigned Size,
unsigned &NumBytesProcessed, IRBuilder<> &Builder);
void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
unsigned &NumBytesProcessed);
- void emitLoadCompareByteBlock(unsigned Index, int GEPIndex);
+ void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);
void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase(unsigned Size);
Value *getMemCmpEqZeroOneBlock(unsigned Size);
@@ -1751,7 +1752,8 @@ void MemCmpExpansion::createResultBlock() {
// It loads 1 byte from each source of the memcmp parameters with the given
// GEPIndex. It then subtracts the two loaded values and adds this result to the
// final phi node for selecting the memcmp result.
-void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) {
+void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,
+ unsigned GEPIndex) {
IRBuilder<> Builder(CI->getContext());
Value *Source1 = CI->getArgOperand(0);
@@ -1833,6 +1835,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ assert(LoadSize <= MaxLoadSize && "Unexpected load type");
Value *Source1 = CI->getArgOperand(0);
Value *Source2 = CI->getArgOperand(1);
@@ -1851,18 +1854,28 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
ConstantInt::get(LoadSizeType, GEPIndex));
}
- // Load LoadSizeType from the base address.
- Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
- Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+ // Get a constant or load a value for each source address.
+ Value *LoadSrc1 = nullptr;
+ if (auto *Source1C = dyn_cast<Constant>(Source1))
+ LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
+ if (!LoadSrc1)
+ LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+
+ Value *LoadSrc2 = nullptr;
+ if (auto *Source2C = dyn_cast<Constant>(Source2))
+ LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
+ if (!LoadSrc2)
+ LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
if (NumLoads != 1) {
if (LoadSizeType != MaxLoadType) {
- LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
- LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}
// If we have multiple loads per block, we need to generate a composite
// comparison using xor+or.
Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
- Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType);
+ Diff = Builder.CreateZExt(Diff, MaxLoadType);
XorList.push_back(Diff);
} else {
// If there's only one load per block, we just compare the loaded values.
@@ -1926,8 +1939,8 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
// a special case through emitLoadCompareByteBlock. The special handling can
// simply subtract the loaded values and add it to the result phi node.
-void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
- int GEPIndex) {
+void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
+ unsigned GEPIndex) {
if (LoadSize == 1) {
MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex);
return;
@@ -1937,6 +1950,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ assert(LoadSize <= MaxLoadSize && "Unexpected load type");
Value *Source1 = CI->getArgOperand(0);
Value *Source2 = CI->getArgOperand(1);
@@ -1970,8 +1984,8 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
}
if (LoadSizeType != MaxLoadType) {
- LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
- LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}
// Add the loaded values to the phi nodes for calculating memcmp result only
@@ -2034,8 +2048,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
PhiRes->addIncoming(Res, ResBlock.BB);
}
-int MemCmpExpansion::calculateNumBlocks(unsigned Size) {
- int NumBlocks = 0;
+unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {
+ unsigned NumBlocks = 0;
bool HaveOneByteLoad = false;
unsigned RemainingSize = Size;
unsigned LoadSize = MaxLoadSize;
@@ -2104,13 +2118,13 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
// memcmp sources. It starts with loading using the maximum load size set by
// the target. It processes any remaining bytes using a load size which is the
// next smallest power of 2.
- int LoadSize = MaxLoadSize;
- int NumBytesToBeProcessed = Size;
+ unsigned LoadSize = MaxLoadSize;
+ unsigned NumBytesToBeProcessed = Size;
unsigned Index = 0;
while (NumBytesToBeProcessed) {
// Calculate how many blocks we can create with the current load size.
- int NumBlocks = NumBytesToBeProcessed / LoadSize;
- int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
+ unsigned NumBlocks = NumBytesToBeProcessed / LoadSize;
+ unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize;
// For each NumBlocks, populate the instruction sequence for loading and
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index dccd8e0706ca..239bad2f5355 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -582,7 +582,7 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MIB.addUse(Zero);
}
- MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width);
+ MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width});
return true;
}
@@ -686,6 +686,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(0)))
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
+ case Intrinsic::fma:
+ MIRBuilder.buildInstr(TargetOpcode::G_FMA)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(2)));
+ return true;
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 4c0b06dffd21..5466efd7e90f 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -25,6 +25,18 @@ using namespace llvm;
InstructionSelector::InstructionSelector() {}
+bool InstructionSelector::constrainOperandRegToRegClass(
+ MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC,
+ const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const {
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ return llvm::constrainRegToClass(MRI, TII, RBI, I,
+ I.getOperand(OpIdx).getReg(), RC);
+}
+
bool InstructionSelector::constrainSelectedInstRegOperands(
MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) const {
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1b50489deeba..b699156c568b 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -50,72 +50,9 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
void Legalizer::init(MachineFunction &MF) {
}
-bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII) {
- bool Changed = false;
- if (MI.getOpcode() != TargetOpcode::G_EXTRACT)
- return Changed;
-
- unsigned NumDefs = (MI.getNumOperands() - 1) / 2;
- unsigned SrcReg = MI.getOperand(NumDefs).getReg();
- MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg);
- if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE)
- return Changed;
-
- unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2;
- bool AllDefsReplaced = true;
-
- // Try to match each register extracted with a corresponding insertion formed
- // by the G_SEQUENCE.
- for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) {
- MachineOperand &ExtractMO = MI.getOperand(Idx);
- assert(ExtractMO.isReg() && ExtractMO.isDef() &&
- "unexpected extract operand");
-
- unsigned ExtractReg = ExtractMO.getReg();
- unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm();
-
- while (SeqIdx < NumSeqSrcs &&
- SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos)
- ++SeqIdx;
-
- if (SeqIdx == NumSeqSrcs) {
- AllDefsReplaced = false;
- continue;
- }
-
- unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg();
- if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos ||
- MRI.getType(OrigReg) != MRI.getType(ExtractReg)) {
- AllDefsReplaced = false;
- continue;
- }
-
- assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) &&
- "unexpected physical register in G_SEQUENCE");
-
- // Finally we can replace the uses.
- MRI.replaceRegWith(ExtractReg, OrigReg);
- }
-
- if (AllDefsReplaced) {
- // If SeqI was the next instruction in the BB and we removed it, we'd break
- // the outer iteration.
- assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI &&
- "G_SEQUENCE does not dominate G_EXTRACT");
-
- MI.eraseFromParent();
-
- if (MRI.use_empty(SrcReg))
- SeqI.eraseFromParent();
- Changed = true;
- }
-
- return Changed;
-}
-
bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII) {
+ const TargetInstrInfo &TII,
+ MachineIRBuilder &MIRBuilder) {
if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
return false;
@@ -125,18 +62,62 @@ bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES)
return false;
- if (MergeI.getNumOperands() - 1 != NumDefs)
- return false;
+ const unsigned NumMergeRegs = MergeI.getNumOperands() - 1;
- // FIXME: is a COPY appropriate if the types mismatch? We know both registers
- // are allocatable by now.
- if (MRI.getType(MI.getOperand(0).getReg()) !=
- MRI.getType(MergeI.getOperand(1).getReg()))
- return false;
+ if (NumMergeRegs < NumDefs) {
+ if (NumDefs % NumMergeRegs != 0)
+ return false;
+
+ MIRBuilder.setInstr(MI);
+ // Transform to UNMERGEs, for example
+ // %1 = G_MERGE_VALUES %4, %5
+ // %9, %10, %11, %12 = G_UNMERGE_VALUES %1
+ // to
+ // %9, %10 = G_UNMERGE_VALUES %4
+ // %11, %12 = G_UNMERGE_VALUES %5
+
+ const unsigned NewNumDefs = NumDefs / NumMergeRegs;
+ for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
+ SmallVector<unsigned, 2> DstRegs;
+ for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
+ ++j, ++DefIdx)
+ DstRegs.push_back(MI.getOperand(DefIdx).getReg());
+
+ MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg());
+ }
+
+ } else if (NumMergeRegs > NumDefs) {
+ if (NumMergeRegs % NumDefs != 0)
+ return false;
+
+ MIRBuilder.setInstr(MI);
+ // Transform to MERGEs
+ // %6 = G_MERGE_VALUES %17, %18, %19, %20
+ // %7, %8 = G_UNMERGE_VALUES %6
+ // to
+ // %7 = G_MERGE_VALUES %17, %18
+ // %8 = G_MERGE_VALUES %19, %20
+
+ const unsigned NumRegs = NumMergeRegs / NumDefs;
+ for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
+ SmallVector<unsigned, 2> Regs;
+ for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx)
+ Regs.push_back(MergeI.getOperand(Idx).getReg());
+
+ MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs);
+ }
- for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
- MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
- MergeI.getOperand(Idx + 1).getReg());
+ } else {
+ // FIXME: is a COPY appropriate if the types mismatch? We know both
+ // registers are allocatable by now.
+ if (MRI.getType(MI.getOperand(0).getReg()) !=
+ MRI.getType(MergeI.getOperand(1).getReg()))
+ return false;
+
+ for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
+ MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
+ MergeI.getOperand(Idx + 1).getReg());
+ }
MI.eraseFromParent();
if (MRI.use_empty(MergeI.getOperand(0).getReg()))
@@ -226,13 +207,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// Get the next Instruction before we try to legalize, because there's a
// good chance MI will be deleted.
NextMI = std::next(MI);
-
- // combineExtracts erases MI.
- if (combineExtracts(*MI, MRI, TII)) {
- Changed = true;
- continue;
- }
- Changed |= combineMerges(*MI, MRI, TII);
+ Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder);
}
}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 4d4591042296..595802f2228b 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -75,8 +75,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
// FIXME: the long-term plan calls for expansion in terms of load/store (if
// they're not legal).
- if (Aspect.Opcode == TargetOpcode::G_SEQUENCE ||
- Aspect.Opcode == TargetOpcode::G_EXTRACT ||
+ if (Aspect.Opcode == TargetOpcode::G_EXTRACT ||
Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
return std::make_pair(Legal, Aspect.Type);
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 79d312fb52ca..3c70013ea296 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -425,10 +425,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
.addImm(Index);
}
-MachineInstrBuilder
-MachineIRBuilder::buildSequence(unsigned Res,
- ArrayRef<unsigned> Ops,
- ArrayRef<uint64_t> Indices) {
+void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+ ArrayRef<uint64_t> Indices) {
#ifndef NDEBUG
assert(Ops.size() == Indices.size() && "incompatible args");
assert(!Ops.empty() && "invalid trivial sequence");
@@ -440,13 +438,31 @@ MachineIRBuilder::buildSequence(unsigned Res,
assert(MRI->getType(Op).isValid() && "invalid operand type");
#endif
- MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE);
- MIB.addDef(Res);
+ LLT ResTy = MRI->getType(Res);
+ LLT OpTy = MRI->getType(Ops[0]);
+ unsigned OpSize = OpTy.getSizeInBits();
+ bool MaybeMerge = true;
for (unsigned i = 0; i < Ops.size(); ++i) {
- MIB.addUse(Ops[i]);
- MIB.addImm(Indices[i]);
+ if (MRI->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) {
+ MaybeMerge = false;
+ break;
+ }
+ }
+
+ if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) {
+ buildMerge(Res, Ops);
+ return;
+ }
+
+ unsigned ResIn = MRI->createGenericVirtualRegister(ResTy);
+ buildUndef(ResIn);
+
+ for (unsigned i = 0; i < Ops.size(); ++i) {
+ unsigned ResOut =
+ i + 1 == Ops.size() ? Res : MRI->createGenericVirtualRegister(ResTy);
+ buildInsert(ResOut, ResIn, Ops[i], Indices[i]);
+ ResIn = ResOut;
}
- return MIB;
}
MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) {
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 254bdf10d804..5ecaf5c563f8 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -26,6 +26,23 @@
using namespace llvm;
+unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI,
+ MachineInstr &InsertPt, unsigned Reg,
+ const TargetRegisterClass &RegClass) {
+ if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) {
+ unsigned NewReg = MRI.createVirtualRegister(&RegClass);
+ BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), NewReg)
+ .addReg(Reg);
+ return NewReg;
+ }
+
+ return Reg;
+}
+
+
unsigned llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
@@ -36,16 +53,7 @@ unsigned llvm::constrainOperandRegClass(
"PhysReg not implemented");
const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
-
- if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) {
- unsigned NewReg = MRI.createVirtualRegister(RegClass);
- BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
- TII.get(TargetOpcode::COPY), NewReg)
- .addReg(Reg);
- return NewReg;
- }
-
- return Reg;
+ return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass);
}
bool llvm::isTriviallyDead(const MachineInstr &MI,
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index c98c9b68ac0e..ff8405366173 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1474,8 +1474,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
DontKill.addLiveIns(NextMBB);
}
+ // Remove the branches from the entry so we can add the contents of the true
+ // block to it.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
if (CvtMBB.pred_size() > 1) {
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
@@ -1484,11 +1487,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// explicitly remove CvtBBI as a successor.
BBI.BB->removeSuccessor(&CvtMBB, true);
} else {
+ // Predicate the instructions in the true block.
RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI);
PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
// Merge converted block into entry block.
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
MergeBlocks(BBI, *CvtBBI);
}
@@ -1588,8 +1591,11 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
}
+ // Remove the branches from the entry so we can add the contents of the true
+ // block to it.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
if (CvtMBB.pred_size() > 1) {
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
@@ -1603,7 +1609,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
// Now merge the entry of the triangle with the true block.
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
MergeBlocks(BBI, *CvtBBI, false);
}
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index b831ddfa601a..e308f49ec4e8 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -359,30 +359,15 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
Offset < PageSize))
return SR_Unsuitable;
- // Finally, we need to make sure that the access instruction actually is
- // accessing from PointerReg, and there isn't some re-definition of PointerReg
- // between the compare and the memory access.
- // If PointerReg has been redefined before then there is no sense to continue
- // lookup due to this condition will fail for any further instruction.
- SuitabilityResult Suitable = SR_Suitable;
- for (auto *PrevMI : PrevInsts)
- for (auto &PrevMO : PrevMI->operands()) {
- if (PrevMO.isReg() && PrevMO.getReg() && PrevMO.isDef() &&
- TRI->regsOverlap(PrevMO.getReg(), PointerReg))
- return SR_Impossible;
-
- // Check whether the current memory access aliases with previous one.
- // If we already found that it aliases then no need to continue.
- // But we continue base pointer check as it can result in SR_Impossible.
- if (Suitable == SR_Suitable) {
- AliasResult AR = areMemoryOpsAliased(MI, PrevMI);
- if (AR == AR_WillAliasEverything)
- return SR_Impossible;
- if (AR == AR_MayAlias)
- Suitable = SR_Unsuitable;
- }
- }
- return Suitable;
+ // Finally, check whether the current memory access aliases with previous one.
+ for (auto *PrevMI : PrevInsts) {
+ AliasResult AR = areMemoryOpsAliased(MI, PrevMI);
+ if (AR == AR_WillAliasEverything)
+ return SR_Impossible;
+ if (AR == AR_MayAlias)
+ return SR_Unsuitable;
+ }
+ return SR_Suitable;
}
bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
@@ -569,6 +554,12 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
return true;
}
+ // If MI re-defines the PointerReg then we cannot move further.
+ if (any_of(MI.operands(), [&](MachineOperand &MO) {
+ return MO.isReg() && MO.getReg() && MO.isDef() &&
+ TRI->regsOverlap(MO.getReg(), PointerReg);
+ }))
+ return false;
InstsSeenSoFar.push_back(&MI);
}
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index bbd783367c9e..0c76478af551 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -1006,7 +1006,7 @@ bool LiveDebugVariables::doInitialization(Module &M) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void LiveDebugVariables::dump() {
+LLVM_DUMP_METHOD void LiveDebugVariables::dump() const {
if (pImpl)
static_cast<LDVImpl*>(pImpl)->print(dbgs());
}
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index afe87a52544d..1d7e3d4371a2 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -59,7 +59,7 @@ public:
void emitDebugValues(VirtRegMap *VRM);
/// dump - Print data structures to dbgs().
- void dump();
+ void dump() const;
private:
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 590acc01008a..81597afe6b02 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -228,6 +228,12 @@ LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
}
#endif
+bool MachineBasicBlock::isLegalToHoistInto() const {
+ if (isReturnBlock() || hasEHPadSuccessor())
+ return false;
+ return true;
+}
+
StringRef MachineBasicBlock::getName() const {
if (const BasicBlock *LBB = getBasicBlock())
return LBB->getName();
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 2a6cb07dbd2d..81c6dace92e0 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -558,6 +559,23 @@ unsigned MachinePointerInfo::getAddrSpace() const {
return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
}
+/// isDereferenceable - Return true if V is always dereferenceable for
+/// Offset + Size byte.
+bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
+ const DataLayout &DL) const {
+ if (!V.is<const Value*>())
+ return false;
+
+ const Value *BasePtr = V.get<const Value*>();
+ if (BasePtr == nullptr)
+ return false;
+
+ return isDereferenceableAndAlignedPointer(BasePtr, 1,
+ APInt(DL.getPointerSize(),
+ Offset + Size),
+ DL);
+}
+
/// getConstantPool - Return a MachinePointerInfo record that refers to the
/// constant pool.
MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 4c81fd91cb82..22d519e5d88f 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -23,7 +23,6 @@ using namespace llvm;
// Out of line virtual method.
void MachineModuleInfoMachO::anchor() {}
void MachineModuleInfoELF::anchor() {}
-void MachineModuleInfoWasm::anchor() {}
static int SortSymbolPair(const void *LHS, const void *RHS) {
typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 01a2286b8d66..eaba9a58557c 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -542,10 +542,10 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void ReadyQueue::dump() {
+LLVM_DUMP_METHOD void ReadyQueue::dump() const {
dbgs() << "Queue " << Name << ": ";
- for (unsigned i = 0, e = Queue.size(); i < e; ++i)
- dbgs() << Queue[i]->NodeNum << " ";
+ for (const SUnit *SU : Queue)
+ dbgs() << SU->NodeNum << " ";
dbgs() << "\n";
}
#endif
@@ -609,10 +609,8 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- releaseSucc(SU, &*I);
- }
+ for (SDep &Succ : SU->Succs)
+ releaseSucc(SU, &Succ);
}
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
@@ -648,10 +646,8 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
/// releasePredecessors - Call releasePred on each of SU's predecessors.
void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- releasePred(SU, &*I);
- }
+ for (SDep &Pred : SU->Preds)
+ releasePred(SU, &Pred);
}
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
@@ -724,8 +720,8 @@ void ScheduleDAGMI::schedule() {
DEBUG(
if (EntrySU.getInstr() != nullptr)
EntrySU.dumpAll(this);
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this);
+ for (const SUnit &SU : SUnits)
+ SU.dumpAll(this);
if (ExitSU.getInstr() != nullptr)
ExitSU.dumpAll(this);
);
@@ -786,28 +782,25 @@ void ScheduleDAGMI::schedule() {
/// Apply each ScheduleDAGMutation step in order.
void ScheduleDAGMI::postprocessDAG() {
- for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
- Mutations[i]->apply(this);
- }
+ for (auto &m : Mutations)
+ m->apply(this);
}
void ScheduleDAGMI::
findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
SmallVectorImpl<SUnit*> &BotRoots) {
- for (std::vector<SUnit>::iterator
- I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
- SUnit *SU = &(*I);
- assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
+ for (SUnit &SU : SUnits) {
+ assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
// Order predecessors so DFSResult follows the critical path.
- SU->biasCriticalPath();
+ SU.biasCriticalPath();
// A SUnit is ready to top schedule if it has no predecessors.
- if (!I->NumPredsLeft)
- TopRoots.push_back(SU);
+ if (!SU.NumPredsLeft)
+ TopRoots.push_back(&SU);
// A SUnit is ready to bottom schedule if it has no successors.
- if (!I->NumSuccsLeft)
- BotRoots.push_back(SU);
+ if (!SU.NumSuccsLeft)
+ BotRoots.push_back(&SU);
}
ExitSU.biasCriticalPath();
}
@@ -822,10 +815,9 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
//
// Nodes with unreleased weak edges can still be roots.
// Release top roots in forward order.
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
- SchedImpl->releaseTopNode(*I);
- }
+ for (SUnit *SU : TopRoots)
+ SchedImpl->releaseTopNode(SU);
+
// Release bottom roots in reverse order so the higher priority nodes appear
// first. This is more natural and slightly more efficient.
for (SmallVectorImpl<SUnit*>::const_reverse_iterator
@@ -1029,9 +1021,9 @@ void ScheduleDAGMILive::initRegPressure() {
}
}
DEBUG(dbgs() << "Excess PSets: ";
- for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+ for (const PressureChange &RCPS : RegionCriticalPSets)
dbgs() << TRI->getRegPressureSetName(
- RegionCriticalPSets[i].getPSet()) << " ";
+ RCPS.getPSet()) << " ";
dbgs() << "\n");
}
@@ -1040,11 +1032,10 @@ updateScheduledPressure(const SUnit *SU,
const std::vector<unsigned> &NewMaxPressure) {
const PressureDiff &PDiff = getPressureDiff(SU);
unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
- for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
- I != E; ++I) {
- if (!I->isValid())
+ for (const PressureChange &PC : PDiff) {
+ if (!PC.isValid())
break;
- unsigned ID = I->getPSet();
+ unsigned ID = PC.getPSet();
while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
++CritIdx;
if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
@@ -1508,8 +1499,7 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII,
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
- for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
- SUnit *SU = MemOps[Idx];
+ for (SUnit *SU : MemOps) {
unsigned BaseReg;
int64_t Offset;
if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
@@ -1537,12 +1527,11 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
// dependent on SUa can prevent load combining due to register reuse.
// Predecessor edges do not need to be copied from SUb to SUa since nearby
// loads should have effectively the same inputs.
- for (SUnit::const_succ_iterator
- SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
- if (SI->getSUnit() == SUb)
+ for (const SDep &Succ : SUa->Succs) {
+ if (Succ.getSUnit() == SUb)
continue;
- DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
- DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
+ DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
+ DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
}
++ClusterLength;
} else
@@ -1559,17 +1548,15 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
DenseMap<unsigned, unsigned> StoreChainIDs;
// Map each store chain to a set of dependent MemOps.
SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
- for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
- SUnit *SU = &DAG->SUnits[Idx];
- if ((IsLoad && !SU->getInstr()->mayLoad()) ||
- (!IsLoad && !SU->getInstr()->mayStore()))
+ for (SUnit &SU : DAG->SUnits) {
+ if ((IsLoad && !SU.getInstr()->mayLoad()) ||
+ (!IsLoad && !SU.getInstr()->mayStore()))
continue;
unsigned ChainPredID = DAG->SUnits.size();
- for (SUnit::const_pred_iterator
- PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
- if (PI->isCtrl()) {
- ChainPredID = PI->getSUnit()->NodeNum;
+ for (const SDep &Pred : SU.Preds) {
+ if (Pred.isCtrl()) {
+ ChainPredID = Pred.getSUnit()->NodeNum;
break;
}
}
@@ -1580,12 +1567,12 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
if (Result.second)
StoreChainDependents.resize(NumChains + 1);
- StoreChainDependents[Result.first->second].push_back(SU);
+ StoreChainDependents[Result.first->second].push_back(&SU);
}
// Iterate over the store chains.
- for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
- clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
+ for (auto &SCD : StoreChainDependents)
+ clusterNeighboringMemOps(SCD, DAG);
}
//===----------------------------------------------------------------------===//
@@ -1728,16 +1715,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
- for (SUnit::const_succ_iterator
- I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
- I != E; ++I) {
- if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
+ for (const SDep &Succ : LastLocalSU->Succs) {
+ if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
continue;
- if (I->getSUnit() == GlobalSU)
+ if (Succ.getSUnit() == GlobalSU)
continue;
- if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
+ if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
return;
- LocalUses.push_back(I->getSUnit());
+ LocalUses.push_back(Succ.getSUnit());
}
// Open the top of the GlobalLI hole by constraining any earlier global uses
// to precede the start of LocalLI.
@@ -1745,15 +1730,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
MachineInstr *FirstLocalDef =
LIS->getInstructionFromIndex(LocalLI->beginIndex());
SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
- for (SUnit::const_pred_iterator
- I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
- if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
+ for (const SDep &Pred : GlobalSU->Preds) {
+ if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
continue;
- if (I->getSUnit() == FirstLocalSU)
+ if (Pred.getSUnit() == FirstLocalSU)
continue;
- if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
+ if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
return;
- GlobalUses.push_back(I->getSUnit());
+ GlobalUses.push_back(Pred.getSUnit());
}
DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
// Add the weak edges.
@@ -1784,12 +1768,11 @@ void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
RegionEndIdx = DAG->getLIS()->getInstructionIndex(
*priorNonDebug(DAG->end(), DAG->begin()));
- for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
- SUnit *SU = &DAG->SUnits[Idx];
- if (!SU->getInstr()->isCopy())
+ for (SUnit &SU : DAG->SUnits) {
+ if (!SU.getInstr()->isCopy())
continue;
- constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
+ constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
}
}
@@ -1840,10 +1823,9 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
if (!SchedModel->hasInstrSchedModel())
return;
RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
- for (std::vector<SUnit>::iterator
- I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
- const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
- RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
+ for (SUnit &SU : DAG->SUnits) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
+ RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
* SchedModel->getMicroOpFactor();
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
@@ -1957,12 +1939,11 @@ unsigned SchedBoundary::
findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
SUnit *LateSU = nullptr;
unsigned RemLatency = 0;
- for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
- I != E; ++I) {
- unsigned L = getUnscheduledLatency(*I);
+ for (SUnit *SU : ReadySUs) {
+ unsigned L = getUnscheduledLatency(SU);
if (L > RemLatency) {
RemLatency = L;
- LateSU = *I;
+ LateSU = SU;
}
}
if (LateSU) {
@@ -2328,7 +2309,7 @@ SUnit *SchedBoundary::pickOnlyChoice() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// This is useful information to dump after bumpNode.
// Note that the Queue contents are more useful before pickNodeFromQueue.
-LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() {
+LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
unsigned ResFactor;
unsigned ResCount;
if (ZoneCritResIdx) {
@@ -2667,7 +2648,7 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
}
}
-void GenericScheduler::dumpPolicy() {
+void GenericScheduler::dumpPolicy() const {
// Cannot completely remove virtual function even in release mode.
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << "GenericScheduler RegionPolicy: "
@@ -2719,10 +2700,9 @@ void GenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
// Some roots may not feed into ExitSU. Check all of them in case.
- for (std::vector<SUnit*>::const_iterator
- I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
- if ((*I)->getDepth() > Rem.CriticalPath)
- Rem.CriticalPath = (*I)->getDepth();
+ for (const SUnit *SU : Bot.Available) {
+ if (SU->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = SU->getDepth();
}
DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
if (DumpCriticalPathLength) {
@@ -2969,10 +2949,10 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
ReadyQueue &Q = Zone.Available;
- for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ for (SUnit *SU : Q) {
SchedCandidate TryCand(ZonePolicy);
- initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
tryCandidate(Cand, TryCand, ZoneArg);
@@ -3118,18 +3098,17 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
// Find already scheduled copies with a single physreg dependence and move
// them just above the scheduled instruction.
- for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
- I != E; ++I) {
- if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+ for (SDep &Dep : Deps) {
+ if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
continue;
- SUnit *DepSU = I->getSUnit();
+ SUnit *DepSU = Dep.getSUnit();
if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
continue;
MachineInstr *Copy = DepSU->getInstr();
if (!Copy->isCopy())
continue;
DEBUG(dbgs() << " Rescheduling physreg copy ";
- I->getSUnit()->dump(DAG));
+ Dep.getSUnit()->dump(DAG));
DAG->moveInstruction(Copy, InsertPos);
}
}
@@ -3204,10 +3183,9 @@ void PostGenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
// Some roots may not feed into ExitSU. Check all of them in case.
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
- if ((*I)->getDepth() > Rem.CriticalPath)
- Rem.CriticalPath = (*I)->getDepth();
+ for (const SUnit *SU : BotRoots) {
+ if (SU->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = SU->getDepth();
}
DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
if (DumpCriticalPathLength) {
@@ -3260,9 +3238,9 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
ReadyQueue &Q = Top.Available;
- for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ for (SUnit *SU : Q) {
SchedCandidate TryCand(Cand.Policy);
- TryCand.SU = *I;
+ TryCand.SU = SU;
TryCand.AtTop = true;
TryCand.initResourceDelta(DAG, SchedModel);
tryCandidate(Cand, TryCand);
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
new file mode 100644
index 000000000000..45ea0e4c39ab
--- /dev/null
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -0,0 +1,150 @@
+//===- MacroFusion.cpp - Macro Fusion ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the implementation of the DAG scheduling mutation
+/// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+STATISTIC(NumFused, "Number of instr pairs fused");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+namespace {
+
+static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
+ SUnit &SecondSU) {
+ // Create a single weak edge between the adjacent instrs. The only effect is
+ // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
+ DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
+
+ // Adjust the latency between the anchor instr and its
+ // predecessors.
+ for (SDep &IDep : SecondSU.Preds)
+ if (IDep.getSUnit() == &FirstSU)
+ IDep.setLatency(0);
+
+ // Adjust the latency between the dependent instr and its
+ // predecessors.
+ for (SDep &IDep : FirstSU.Succs)
+ if (IDep.getSUnit() == &SecondSU)
+ IDep.setLatency(0);
+
+ DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse ";
+ FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
+ SecondSU.print(dbgs(), &DAG); dbgs() << " / ";
+ dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " <<
+ DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; );
+
+ if (&SecondSU != &DAG.ExitSU)
+ // Make instructions dependent on FirstSU also dependent on SecondSU to
+ // prevent them from being scheduled between FirstSU and and SecondSU.
+ for (const SDep &SI : FirstSU.Succs) {
+ if (SI.getSUnit() == &SecondSU)
+ continue;
+ DEBUG(dbgs() << " Copy Succ ";
+ SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';);
+ DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial));
+ }
+
+ ++NumFused;
+}
+
+
+/// \brief Post-process the DAG to create cluster edges between instrs that may
+/// be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+ ShouldSchedulePredTy shouldScheduleAdjacent;
+ bool FuseBlock;
+ bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU);
+
+public:
+ MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
+ : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
+ if (FuseBlock)
+ // For each of the SUnits in the scheduling block, try to fuse the instr in
+ // it with one in its predecessors.
+ for (SUnit &ISU : DAG->SUnits)
+ scheduleAdjacentImpl(*DAG, ISU);
+
+ if (DAG->ExitSU.getInstr())
+ // Try to fuse the instr in the ExitSU with one in its predecessors.
+ scheduleAdjacentImpl(*DAG, DAG->ExitSU);
+}
+
+/// \brief Implement the fusion of instr pairs in the scheduling DAG,
+/// anchored at the instr in AnchorSU..
+bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
+ const MachineInstr &AnchorMI = *AnchorSU.getInstr();
+ const TargetInstrInfo &TII = *DAG.TII;
+ const TargetSubtargetInfo &ST = DAG.MF.getSubtarget();
+
+ // Check if the anchor instr may be fused.
+ if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI))
+ return false;
+
+ // Explorer for fusion candidates among the dependencies of the anchor instr.
+ for (SDep &Dep : AnchorSU.Preds) {
+ // Ignore dependencies that don't enforce ordering.
+ if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output ||
+ Dep.isWeak())
+ continue;
+
+ SUnit &DepSU = *Dep.getSUnit();
+ if (DepSU.isBoundaryNode())
+ continue;
+
+ const MachineInstr *DepMI = DepSU.getInstr();
+ if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
+ continue;
+
+ fuseInstructionPair(DAG, DepSU, AnchorSU);
+ return true;
+ }
+
+ return false;
+}
+
+} // end anonymous namespace
+
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+ if(EnableMacroFusion)
+ return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
+ return nullptr;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+ if(EnableMacroFusion)
+ return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
+ return nullptr;
+}
+
+} // end namespace llvm
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 1aed58c36e17..05e641d9489d 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -35,6 +35,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
#include <cassert>
#include <iterator>
#include <limits>
@@ -260,6 +261,14 @@ void RegScavenger::backward() {
const MachineInstr &MI = *MBBI;
LiveUnits.stepBackward(MI);
+ // Expire scavenge spill frameindex uses.
+ for (ScavengedInfo &I : Scavenged) {
+ if (I.Restore == &MI) {
+ I.Reg = 0;
+ I.Restore = nullptr;
+ }
+ }
+
if (MBBI == MBB->begin()) {
MBBI = MachineBasicBlock::iterator(nullptr);
Tracking = false;
@@ -356,6 +365,80 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
return Survivor;
}
+/// Given the bitvector \p Available of free register units at position
+/// \p From. Search backwards to find a register that is part of \p
+/// Candidates and not used/clobbered until the point \p To. If there is
+/// multiple candidates continue searching and pick the one that is not used/
+/// clobbered for the longest time.
+/// Returns the register and the earliest position we know it to be free or
+/// the position MBB.end() if no register is available.
+static std::pair<MCPhysReg, MachineBasicBlock::iterator>
+findSurvivorBackwards(const MachineRegisterInfo &MRI,
+ MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
+ const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder) {
+ bool FoundTo = false;
+ MCPhysReg Survivor = 0;
+ MachineBasicBlock::iterator Pos;
+ MachineBasicBlock &MBB = *From->getParent();
+ unsigned InstrLimit = 25;
+ unsigned InstrCountDown = InstrLimit;
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ LiveRegUnits Used(TRI);
+
+ for (MachineBasicBlock::iterator I = From;; --I) {
+ const MachineInstr &MI = *I;
+
+ Used.accumulateBackward(MI);
+
+ if (I == To) {
+ // See if one of the registers in RC wasn't used so far.
+ for (MCPhysReg Reg : AllocationOrder) {
+ if (!MRI.isReserved(Reg) && Used.available(Reg) &&
+ LiveOut.available(Reg))
+ return std::make_pair(Reg, MBB.end());
+ }
+ // Otherwise we will continue up to InstrLimit instructions to find
+ // the register which is not defined/used for the longest time.
+ FoundTo = true;
+ Pos = To;
+ }
+ if (FoundTo) {
+ if (Survivor == 0 || !Used.available(Survivor)) {
+ MCPhysReg AvilableReg = 0;
+ for (MCPhysReg Reg : AllocationOrder) {
+ if (!MRI.isReserved(Reg) && Used.available(Reg)) {
+ AvilableReg = Reg;
+ break;
+ }
+ }
+ if (AvilableReg == 0)
+ break;
+ Survivor = AvilableReg;
+ }
+ if (--InstrCountDown == 0)
+ break;
+
+ // Keep searching when we find a vreg since the spilled register will
+ // be usefull for this other vreg as well later.
+ bool FoundVReg = false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ FoundVReg = true;
+ break;
+ }
+ }
+ if (FoundVReg) {
+ InstrCountDown = InstrLimit;
+ Pos = I;
+ }
+ if (I == MBB.begin())
+ break;
+ }
+ }
+
+ return std::make_pair(Survivor, Pos);
+}
+
static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
@@ -365,44 +448,16 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
return i;
}
-unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
- MachineBasicBlock::iterator I,
- int SPAdj) {
- MachineInstr &MI = *I;
- const MachineFunction &MF = *MI.getParent()->getParent();
- // Consider all allocatable registers in the register class initially
- BitVector Candidates = TRI->getAllocatableSet(MF, RC);
-
- // Exclude all the registers being used by the instruction.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Candidates.reset(*AI);
- }
-
- // Try to find a register that's unused if there is one, as then we won't
- // have to spill.
- BitVector Available = getRegsAvailable(RC);
- Available &= Candidates;
- if (Available.any())
- Candidates = Available;
-
- // Find the register whose use is furthest away.
- MachineBasicBlock::iterator UseMI;
- unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
-
- // If we found an unused register there is no reason to spill it.
- if (!isRegUsed(SReg)) {
- DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
- return SReg;
- }
-
+RegScavenger::ScavengedInfo &
+RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
+ MachineBasicBlock::iterator Before,
+ MachineBasicBlock::iterator &UseMI) {
// Find an available scavenging slot with size and alignment matching
// the requirements of the class RC.
+ const MachineFunction &MF = *Before->getParent()->getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned NeedSize = TRI->getSpillSize(*RC);
- unsigned NeedAlign = TRI->getSpillAlignment(*RC);
+ unsigned NeedSize = TRI->getSpillSize(RC);
+ unsigned NeedAlign = TRI->getSpillAlignment(RC);
unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max();
int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
@@ -437,39 +492,72 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
}
// Avoid infinite regress
- Scavenged[SI].Reg = SReg;
+ Scavenged[SI].Reg = Reg;
// If the target knows how to save/restore the register, let it do so;
// otherwise, use the emergency stack spill slot.
- if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
- // Spill the scavenged register before I.
+ if (!TRI->saveScavengerRegister(*MBB, Before, UseMI, &RC, Reg)) {
+ // Spill the scavenged register before \p Before.
int FI = Scavenged[SI].FrameIndex;
if (FI < FIB || FI >= FIE) {
std::string Msg = std::string("Error while trying to spill ") +
- TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) +
+ TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) +
": Cannot scavenge register without an emergency spill slot!";
report_fatal_error(Msg.c_str());
}
- TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
- RC, TRI);
- MachineBasicBlock::iterator II = std::prev(I);
+ TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex,
+ &RC, TRI);
+ MachineBasicBlock::iterator II = std::prev(Before);
unsigned FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
- RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex,
+ &RC, TRI);
II = std::prev(UseMI);
FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
}
+ return Scavenged[SI];
+}
- Scavenged[SI].Restore = &*std::prev(UseMI);
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ MachineInstr &MI = *I;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates = TRI->getAllocatableSet(MF, RC);
- // Doing this here leads to infinite regress.
- // Scavenged[SI].Reg = SReg;
+ // Exclude all the registers being used by the instruction.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
+ }
+
+ // Try to find a register that's unused if there is one, as then we won't
+ // have to spill.
+ BitVector Available = getRegsAvailable(RC);
+ Available &= Candidates;
+ if (Available.any())
+ Candidates = Available;
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+ // If we found an unused register there is no reason to spill it.
+ if (!isRegUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+ return SReg;
+ }
+
+ ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
+ Scavenged.Restore = &*std::prev(UseMI);
DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
"\n");
@@ -477,85 +565,195 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
-void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
- // FIXME: Iterating over the instruction stream is unnecessary. We can simply
- // iterate over the vreg use list, which at this point only contains machine
- // operands for which eliminateFrameIndex need a new scratch reg.
+unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
+ MachineBasicBlock::iterator To,
+ bool RestoreAfter, int SPAdj) {
+ const MachineBasicBlock &MBB = *To->getParent();
+ const MachineFunction &MF = *MBB.getParent();
- // Run through the instructions and find any virtual registers.
- MachineRegisterInfo &MRI = MF.getRegInfo();
- for (MachineBasicBlock &MBB : MF) {
- RS.enterBasicBlock(MBB);
-
- int SPAdj = 0;
-
- // The instruction stream may change in the loop, so check MBB.end()
- // directly.
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
- // We might end up here again with a NULL iterator if we scavenged a
- // register for which we inserted spill code for definition by what was
- // originally the first instruction in MBB.
- if (I == MachineBasicBlock::iterator(nullptr))
- I = MBB.begin();
-
- const MachineInstr &MI = *I;
- MachineBasicBlock::iterator J = std::next(I);
- MachineBasicBlock::iterator P =
- I == MBB.begin() ? MachineBasicBlock::iterator(nullptr)
- : std::prev(I);
-
- // RS should process this instruction before we might scavenge at this
- // location. This is because we might be replacing a virtual register
- // defined by this instruction, and if so, registers killed by this
- // instruction are available, and defined registers are not.
- RS.forward(I);
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF);
+ std::pair<MCPhysReg, MachineBasicBlock::iterator> P =
+ findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder);
+ MCPhysReg Reg = P.first;
+ MachineBasicBlock::iterator SpillBefore = P.second;
+ assert(Reg != 0 && "No register left to scavenge!");
+ // Found an available register?
+ if (SpillBefore != MBB.end()) {
+ MachineBasicBlock::iterator ReloadAfter =
+ RestoreAfter ? std::next(MBBI) : MBBI;
+ MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
+ DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+ ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
+ Scavenged.Restore = &*std::prev(SpillBefore);
+ LiveUnits.removeReg(Reg);
+ DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI)
+ << " until " << *SpillBefore);
+ } else {
+ DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n');
+ }
+ return Reg;
+}
- for (const MachineOperand &MO : MI.operands()) {
+/// Allocate a register for the virtual register \p VReg. The last use of
+/// \p VReg is around the current position of the register scavenger \p RS.
+/// \p ReserveAfter controls whether the scavenged register needs to be reserved
+/// after the current instruction, otherwise it will only be reserved before the
+/// current instruction.
+static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
+ unsigned VReg, bool ReserveAfter) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+#ifndef NDEBUG
+ // Verify that all definitions and uses are in the same basic block.
+ const MachineBasicBlock *CommonMBB = nullptr;
+ // Real definition for the reg, re-definitions are not considered.
+ const MachineInstr *RealDef = nullptr;
+ for (MachineOperand &MO : MRI.reg_nodbg_operands(VReg)) {
+ MachineBasicBlock *MBB = MO.getParent()->getParent();
+ if (CommonMBB == nullptr)
+ CommonMBB = MBB;
+ assert(MBB == CommonMBB && "All defs+uses must be in the same basic block");
+ if (MO.isDef()) {
+ const MachineInstr &MI = *MO.getParent();
+ if (!MI.readsRegister(VReg, &TRI)) {
+ assert((!RealDef || RealDef == &MI) &&
+ "Can have at most one definition which is not a redefinition");
+ RealDef = &MI;
+ }
+ }
+ }
+ assert(RealDef != nullptr && "Must have at least 1 Def");
+#endif
+
+ // We should only have one definition of the register. However to accomodate
+ // the requirements of two address code we also allow definitions in
+ // subsequent instructions provided they also read the register. That way
+ // we get a single contiguous lifetime.
+ //
+ // Definitions in MRI.def_begin() are unordered, search for the first.
+ MachineRegisterInfo::def_iterator FirstDef =
+ std::find_if(MRI.def_begin(VReg), MRI.def_end(),
+ [VReg, &TRI](const MachineOperand &MO) {
+ return !MO.getParent()->readsRegister(VReg, &TRI);
+ });
+ assert(FirstDef != MRI.def_end() &&
+ "Must have one definition that does not redefine vreg");
+ MachineInstr &DefMI = *FirstDef->getParent();
+
+ // The register scavenger will report a free register inserting an emergency
+ // spill/reload if necessary.
+ int SPAdj = 0;
+ const TargetRegisterClass &RC = *MRI.getRegClass(VReg);
+ unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(),
+ ReserveAfter, SPAdj);
+ MRI.replaceRegWith(VReg, SReg);
+ ++NumScavengedRegs;
+ return SReg;
+}
+
+/// Allocate (scavenge) vregs inside a single basic block.
+/// Returns true if the target spill callback created new vregs and a 2nd pass
+/// is necessary.
+static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
+ RegScavenger &RS,
+ MachineBasicBlock &MBB) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ RS.enterBasicBlockEnd(MBB);
+
+ unsigned InitialNumVirtRegs = MRI.getNumVirtRegs();
+ bool NextInstructionReadsVReg = false;
+ for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) {
+ --I;
+ // Move RegScavenger to the position between *I and *std::next(I).
+ RS.backward(I);
+
+ // Look for unassigned vregs in the uses of *std::next(I).
+ if (NextInstructionReadsVReg) {
+ MachineBasicBlock::iterator N = std::next(I);
+ const MachineInstr &NMI = *N;
+ for (const MachineOperand &MO : NMI.operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ // We only care about virtual registers and ignore virtual registers
+ // created by the target callbacks in the process (those will be handled
+ // in a scavenging round).
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ continue;
+ if (!MO.readsReg())
continue;
- // When we first encounter a new virtual register, it
- // must be a definition.
- assert(MO.isDef() && "frame index virtual missing def!");
- // Scavenge a new scratch register
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- unsigned ScratchReg = RS.scavengeRegister(RC, J, SPAdj);
+ unsigned SReg = scavengeVReg(MRI, RS, Reg, true);
+ N->addRegisterKilled(SReg, &TRI, false);
+ RS.setRegUsed(SReg);
+ }
+ }
- ++NumScavengedRegs;
+ // Look for unassigned vregs in the defs of *I.
+ NextInstructionReadsVReg = false;
+ const MachineInstr &MI = *I;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ // Only vregs, no newly created vregs (see above).
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ continue;
+ // We have to look at all operands anyway so we can precalculate here
+ // whether there is a reading operand. This allows use to skip the use
+ // step in the next iteration if there was none.
+ assert(!MO.isInternalRead() && "Cannot assign inside bundles");
+ assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
+ if (MO.readsReg()) {
+ NextInstructionReadsVReg = true;
+ }
+ if (MO.isDef()) {
+ unsigned SReg = scavengeVReg(MRI, RS, Reg, false);
+ I->addRegisterDead(SReg, &TRI, false);
+ }
+ }
+ }
+#ifndef NDEBUG
+ for (const MachineOperand &MO : MBB.front().operands()) {
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ assert(!MO.isInternalRead() && "Cannot assign inside bundles");
+ assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
+ assert(!MO.readsReg() && "Vreg use in first instruction not allowed");
+ }
+#endif
- // Replace this reference to the virtual register with the
- // scratch register.
- assert(ScratchReg && "Missing scratch register!");
- MRI.replaceRegWith(Reg, ScratchReg);
+ return MRI.getNumVirtRegs() != InitialNumVirtRegs;
+}
- // Because this instruction was processed by the RS before this
- // register was allocated, make sure that the RS now records the
- // register as being used.
- RS.setRegUsed(ScratchReg);
- }
+void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
+ // FIXME: Iterating over the instruction stream is unnecessary. We can simply
+ // iterate over the vreg use list, which at this point only contains machine
+ // operands for which eliminateFrameIndex need a new scratch reg.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Shortcut.
+ if (MRI.getNumVirtRegs() == 0) {
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+ return;
+ }
+
+ // Run through the instructions and find any virtual registers.
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
- // If the scavenger needed to use one of its spill slots, the
- // spill code will have been inserted in between I and J. This is a
- // problem because we need the spill code before I: Move I to just
- // prior to J.
- if (I != std::prev(J)) {
- MBB.splice(J, &MBB, I);
-
- // Before we move I, we need to prepare the RS to visit I again.
- // Specifically, RS will assert if it sees uses of registers that
- // it believes are undefined. Because we have already processed
- // register kills in I, when it visits I again, it will believe that
- // those registers are undefined. To avoid this situation, unprocess
- // the instruction I.
- assert(RS.getCurrentPosition() == I &&
- "The register scavenger has an unexpected position");
- I = P;
- RS.unprocess(P);
- } else
- ++I;
+ bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
+ if (Again) {
+ DEBUG(dbgs() << "Warning: Required two scavenging passes for block "
+ << MBB.getName() << '\n');
+ Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
+ // The target required a 2nd run (because it created new vregs while
+ // spilling). Refuse to do another pass to keep compiletime in check.
+ if (Again)
+ report_fatal_error("Incomplete scavenging after 2nd pass");
}
}
diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp
index d7a3ac080823..30757f070cad 100644
--- a/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/lib/CodeGen/RegisterUsageInfo.cpp
@@ -1,4 +1,4 @@
-//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===//
+//===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -38,7 +38,7 @@ static cl::opt<bool> DumpRegUsage(
cl::desc("print register usage details collected for analysis."));
INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
- "Register Usage Informartion Stroage", false, true)
+ "Register Usage Information Storage", false, true)
char PhysicalRegisterUsageInfo::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index a668ddb7389f..ae9c5adb0397 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_library(LLVMSelectionDAG
ScheduleDAGVLIW.cpp
SelectionDAGBuilder.cpp
SelectionDAG.cpp
+ SelectionDAGAddressAnalysis.cpp
SelectionDAGDumper.cpp
SelectionDAGISel.cpp
SelectionDAGPrinter.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2d4422d94a17..d02dcb6f4439 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -469,7 +470,8 @@ namespace {
/// \return True if a merged store was created.
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumStores,
- bool IsConstantSrc, bool UseVector);
+ bool IsConstantSrc, bool UseVector,
+ bool UseTrunc);
/// This is a helper function for MergeConsecutiveStores.
/// Stores that may be merged are placed in StoreNodes.
@@ -2549,14 +2551,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
- if (N1IsConst && ConstValue1 == 0)
+ if (N1IsConst && ConstValue1.isNullValue())
return N1;
// We require a splat of the entire scalar bit width for non-contiguous
// bit patterns.
bool IsFullSplat =
ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
+ if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat)
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -3685,7 +3687,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
- if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -4694,110 +4696,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
}
namespace {
-/// Helper struct to parse and store a memory address as base + index + offset.
-/// We ignore sign extensions when it is safe to do so.
-/// The following two expressions are not equivalent. To differentiate we need
-/// to store whether there was a sign extension involved in the index
-/// computation.
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (add (i8 load %index)
-/// (i8 1))))
-/// vs
-///
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
-/// (i32 1)))))
-struct BaseIndexOffset {
- SDValue Base;
- SDValue Index;
- int64_t Offset;
- bool IsIndexSignExt;
-
- BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
-
- BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
- bool IsIndexSignExt) :
- Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
-
- bool equalBaseIndex(const BaseIndexOffset &Other) {
- return Other.Base == Base && Other.Index == Index &&
- Other.IsIndexSignExt == IsIndexSignExt;
- }
-
- /// Parses tree in Ptr for base, index, offset addresses.
- static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
- int64_t PartialOffset = 0) {
- bool IsIndexSignExt = false;
-
- // Split up a folded GlobalAddress+Offset into its component parts.
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
- if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
- return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
- SDLoc(GA),
- GA->getValueType(0),
- /*Offset=*/PartialOffset,
- /*isTargetGA=*/false,
- GA->getTargetFlags()),
- SDValue(),
- GA->getOffset(),
- IsIndexSignExt);
- }
-
- // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
- // instruction, then it could be just the BASE or everything else we don't
- // know how to handle. Just use Ptr as BASE and give up.
- if (Ptr->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
-
- // We know that we have at least an ADD instruction. Try to pattern match
- // the simple case of BASE + OFFSET.
- if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
- int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
- return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
- }
-
- // Inside a loop the current BASE pointer is calculated using an ADD and a
- // MUL instruction. In this case Ptr is the actual BASE pointer.
- // (i64 add (i64 %array_ptr)
- // (i64 mul (i64 %induction_var)
- // (i64 %element_size)))
- if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
- return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
-
- // Look at Base + Index + Offset cases.
- SDValue Base = Ptr->getOperand(0);
- SDValue IndexOffset = Ptr->getOperand(1);
-
- // Skip signextends.
- if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
- IndexOffset = IndexOffset->getOperand(0);
- IsIndexSignExt = true;
- }
-
- // Either the case of Base + Index (no offset) or something else.
- if (IndexOffset->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
-
- // Now we have the case of Base + Index + offset.
- SDValue Index = IndexOffset->getOperand(0);
- SDValue Offset = IndexOffset->getOperand(1);
-
- if (!isa<ConstantSDNode>(Offset))
- return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
-
- // Ignore signextends.
- if (Index->getOpcode() == ISD::SIGN_EXTEND) {
- Index = Index->getOperand(0);
- IsIndexSignExt = true;
- } else IsIndexSignExt = false;
-
- int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
- return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
- }
-};
-} // namespace
-
-namespace {
/// Represents known origin of an individual byte in load combine pattern. The
/// value of the byte is either constant zero or comes from memory.
struct ByteProvider {
@@ -5017,14 +4915,15 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Loads must share the same base address
- BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr());
+ int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
- else if (!Base->equalBaseIndex(Ptr))
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
return SDValue();
// Calculate the offset of the current byte from the base address
- int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P);
+ ByteOffsetFromBase += MemoryByteOffset(*P);
ByteOffsets[i] = ByteOffsetFromBase;
// Remember the first byte load
@@ -12378,8 +12277,8 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
- SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
- unsigned NumStores, bool IsConstantSrc, bool UseVector) {
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector, bool UseTrunc) {
// Make sure we have something to merge.
if (NumStores < 2)
return false;
@@ -12464,7 +12363,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
- if (TLI.isTypeLegal(StoredVal.getValueType())) {
+ if (UseVector || !UseTrunc) {
NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
@@ -12495,15 +12394,15 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
EVT MemVT = St->getMemoryVT();
// We must have a base and an offset.
- if (!BasePtr.Base.getNode())
+ if (!BasePtr.getBase().getNode())
return;
// Do not handle stores to undef base pointers.
- if (BasePtr.Base.isUndef())
+ if (BasePtr.getBase().isUndef())
return;
bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
@@ -12515,10 +12414,11 @@ void DAGCombiner::getStoreMergeCandidates(
BaseIndexOffset LBasePtr;
// Match on loadbaseptr if relevant.
if (IsLoadSrc)
- LBasePtr = BaseIndexOffset::match(
- cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
+ LBasePtr =
+ BaseIndexOffset::match(cast<LoadSDNode>(St->getValue())->getBasePtr());
- auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool {
+ auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
+ int64_t &Offset) -> bool {
if (Other->isVolatile() || Other->isIndexed())
return false;
// We can merge constant floats to equivalent integers
@@ -12529,8 +12429,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (IsLoadSrc) {
// The Load's Base Ptr must also match
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
- auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
- if (!(LBasePtr.equalBaseIndex(LPtr)))
+ auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr());
+ if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
return false;
@@ -12543,8 +12443,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
return false;
- Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
- return (Ptr.equalBaseIndex(BasePtr));
+ Ptr = BaseIndexOffset::match(Other->getBasePtr());
+ return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
// We looking for a root node which is an ancestor to all mergable
// stores. We search up through a load, to our root and then down
@@ -12572,16 +12472,18 @@ void DAGCombiner::getStoreMergeCandidates(
if (I2.getOperandNo() == 0)
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
BaseIndexOffset Ptr;
- if (CandidateMatch(OtherST, Ptr))
- StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
} else
for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
if (I.getOperandNo() == 0)
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
BaseIndexOffset Ptr;
- if (CandidateMatch(OtherST, Ptr))
- StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
}
@@ -12721,8 +12623,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned LastLegalType = 0;
- unsigned LastLegalVectorType = 0;
+ unsigned LastLegalType = 1;
+ unsigned LastLegalVectorType = 1;
+ bool LastIntegerTrunc = false;
bool NonZero = false;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
@@ -12747,6 +12650,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast) {
+ LastIntegerTrunc = false;
LastLegalType = i + 1;
// Or check whether a truncstore is legal.
} else if (TLI.getTypeAction(Context, StoreTy) ==
@@ -12758,6 +12662,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
FirstStoreAS, FirstStoreAlign, &IsFast) &&
IsFast) {
+ LastIntegerTrunc = true;
LastLegalType = i + 1;
}
}
@@ -12787,8 +12692,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
- bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
- true, UseVector);
+ bool Merged = MergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
if (!Merged) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
continue;
@@ -12836,7 +12741,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
}
bool Merged = MergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumStoresToMerge, false, true);
+ StoreNodes, MemVT, NumStoresToMerge, false, true, false);
if (!Merged) {
StoreNodes.erase(StoreNodes.begin(),
StoreNodes.begin() + NumStoresToMerge);
@@ -12881,11 +12786,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->getMemoryVT() != MemVT)
break;
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
// If this is not the first ptr that we check.
- if (LdBasePtr.Base.getNode()) {
+ int64_t LdOffset = 0;
+ if (LdBasePtr.getBase().getNode()) {
// The base ptr must be the same.
- if (!LdPtr.equalBaseIndex(LdBasePtr))
+ if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
break;
} else {
// Check that all other base pointers are the same as this one.
@@ -12893,7 +12799,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
}
// We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
+ LoadNodes.push_back(MemOpLink(Ld, LdOffset));
}
if (LoadNodes.size() < 2) {
@@ -12919,10 +12825,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Scan the memory operations on the chain and find the first
// non-consecutive load memory address. These variables hold the index in
// the store node array.
- unsigned LastConsecutiveLoad = 0;
+ unsigned LastConsecutiveLoad = 1;
// This variable refers to the size and not index in the array.
- unsigned LastLegalVectorType = 0;
- unsigned LastLegalIntegerType = 0;
+ unsigned LastLegalVectorType = 1;
+ unsigned LastLegalIntegerType = 1;
+ bool DoIntegerTruncate = false;
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
@@ -12958,11 +12865,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
- IsFastLd)
+ IsFastLd) {
LastLegalIntegerType = i + 1;
- // Or check whether a truncstore and extload is legal.
- else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
+ DoIntegerTruncate = false;
+ // Or check whether a truncstore and extload is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
@@ -12976,8 +12884,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
- IsFastLd)
+ IsFastLd) {
LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = true;
+ }
}
}
@@ -13012,17 +12922,31 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// The merged loads are required to have the same incoming chain, so
// using the first's chain is acceptable.
- SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
- FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoadAlign);
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
-
AddToWorklist(NewStoreChain.getNode());
- SDValue NewStore = DAG.getStore(
- NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign);
+ SDValue NewLoad, NewStore;
+ if (UseVectorTy || !DoIntegerTruncate) {
+ NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoadAlign);
+ NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign);
+ } else { // This must be the truncstore/extload case
+ EVT ExtendedTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+ NewLoad =
+ DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
+ JointMemOpVT, FirstLoadAlign);
+ NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), JointMemOpVT,
+ FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
// Transfer chain users from old loads to the new load.
for (unsigned i = 0; i < NumElem; ++i) {
@@ -13285,7 +13209,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Only perform this optimization before the types are legal, because we
// don't want to perform this optimization on every DAGCombine invocation.
- if (!LegalTypes) {
+ if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
+ : !LegalTypes) {
for (;;) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
@@ -14035,6 +13960,11 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// when we start sorting the vectors by type.
return SDValue();
}
+ } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
+ InVT1.getSizeInBits() == VT.getSizeInBits()) {
+ SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
+ ConcatOps[0] = VecIn2;
+ VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
} else {
// TODO: Support cases where the length mismatch isn't exactly by a
// factor of 2.
@@ -16610,11 +16540,11 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
// Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
- if (BasePtr0.equalBaseIndex(BasePtr1))
- return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) ||
- (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset));
+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr());
+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr());
+ int64_t PtrDiff;
+ if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
+ return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
// FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
// modified to use BaseIndexOffset.
@@ -16821,14 +16751,14 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
// We must have a base and an offset.
- if (!BasePtr.Base.getNode())
+ if (!BasePtr.getBase().getNode())
return false;
// Do not handle stores to undef base pointers.
- if (BasePtr.Base.isUndef())
+ if (BasePtr.getBase().isUndef())
return false;
SmallVector<StoreSDNode *, 8> ChainedStores;
@@ -16847,10 +16777,10 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
// Check that the base pointer is the same as the original one.
- if (!Ptr.equalBaseIndex(BasePtr))
+ if (!BasePtr.equalBaseIndex(Ptr, DAG))
break;
// Walk up the chain to find the next store node, ignoring any
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 15e87b7af18d..873b2bd48f1e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3530,17 +3530,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LC = RTLIB::MUL_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
- // The high part is obtained by SRA'ing all but one of the bits of low
- // part.
- unsigned LoSize = VT.getSizeInBits();
- SDValue HiLHS =
- DAG.getNode(ISD::SRA, dl, VT, LHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
- SDValue HiRHS =
- DAG.getNode(ISD::SRA, dl, VT, RHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ SDValue HiLHS;
+ SDValue HiRHS;
+ if (isSigned) {
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ HiLHS =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ HiRHS =
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, VT);
+ }
// Here we're passing the 2 arguments explicitly as 4 arguments that are
// pre-lowered to the correct types. This all depends upon WideVT not
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a3ba52a148ee..75fec7bd1d48 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -615,9 +615,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS,
N->getOperand(2));
- assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
// Convert to the expected type.
- return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+ return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 593efc5121f9..70b1fa77a099 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1861,28 +1861,68 @@ static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
/// Smaller number is the higher priority.
static unsigned
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
- unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
- if (SethiUllmanNumber != 0)
- return SethiUllmanNumber;
-
- unsigned Extra = 0;
- for (const SDep &Pred : SU->Preds) {
- if (Pred.isCtrl()) continue; // ignore chain preds
- SUnit *PredSU = Pred.getSUnit();
- unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
- if (PredSethiUllman > SethiUllmanNumber) {
- SethiUllmanNumber = PredSethiUllman;
- Extra = 0;
- } else if (PredSethiUllman == SethiUllmanNumber)
- ++Extra;
- }
+ if (SUNumbers[SU->NodeNum] != 0)
+ return SUNumbers[SU->NodeNum];
+
+ // Use WorkList to avoid stack overflow on excessively large IRs.
+ struct WorkState {
+ WorkState(const SUnit *SU) : SU(SU) {}
+ const SUnit *SU;
+ unsigned PredsProcessed = 0;
+ };
- SethiUllmanNumber += Extra;
+ SmallVector<WorkState, 16> WorkList;
+ WorkList.push_back(SU);
+ while (!WorkList.empty()) {
+ auto &Temp = WorkList.back();
+ auto *TempSU = Temp.SU;
+ bool AllPredsKnown = true;
+ // Try to find a non-evaluated pred and push it into the processing stack.
+ for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
+ auto &Pred = TempSU->Preds[P];
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ if (SUNumbers[PredSU->NodeNum] == 0) {
+#ifndef NDEBUG
+ // In debug mode, check that we don't have such element in the stack.
+ for (auto It : WorkList)
+ assert(It.SU != PredSU && "Trying to push an element twice?");
+#endif
+ // Next time start processing this one starting from the next pred.
+ Temp.PredsProcessed = P + 1;
+ WorkList.push_back(PredSU);
+ AllPredsKnown = false;
+ break;
+ }
+ }
- if (SethiUllmanNumber == 0)
- SethiUllmanNumber = 1;
+ if (!AllPredsKnown)
+ continue;
- return SethiUllmanNumber;
+ // Once all preds are known, we can calculate the answer for this one.
+ unsigned SethiUllmanNumber = 0;
+ unsigned Extra = 0;
+ for (const SDep &Pred : TempSU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
+ assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+ SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
+ WorkList.pop_back();
+ }
+
+ assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
+ return SUNumbers[SU->NodeNum];
}
/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7abdc76cb004..98553152117d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4897,6 +4897,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// TODO: In the AlwaysInline case, if the size is big then generate a loop
// rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -4923,15 +4925,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return SDValue();
if (DstAlignCanChange) {
- Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
while (NewAlign > Align &&
- DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
+ DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign /= 2;
if (NewAlign > Align) {
@@ -4991,12 +4993,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// thing to do is generate a LoadExt/StoreTrunc pair. These simplify
// to Load/Store if NVT==VT.
// FIXME does the case above also need this?
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ EVT NVT = TLI.getTypeToTransformTo(C, VT);
assert(NVT.bitsGE(VT));
+
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
- MinAlign(SrcAlign, SrcOff), MMOFlags);
+ MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
OutChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
@@ -5024,6 +5033,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// Expand memmove to a series of load and store ops if the size operand falls
// below a certain threshold.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -5046,8 +5057,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return SDValue();
if (DstAlignCanChange) {
- Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -5068,9 +5079,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value;
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
Value =
DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags);
+ SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
new file mode 100644
index 000000000000..d2e0dbbf88ec
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -0,0 +1,95 @@
+//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address
+//Analysis ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
+ const SelectionDAG &DAG, int64_t &Off) {
+ // Obvious equivalent
+ Off = Other.Offset - Offset;
+ if (Other.Base == Base && Other.Index == Index &&
+ Other.IsIndexSignExt == IsIndexSignExt)
+ return true;
+
+ // Match GlobalAddresses
+ if (Index == Other.Index)
+ if (GlobalAddressSDNode *A = dyn_cast<GlobalAddressSDNode>(Base))
+ if (GlobalAddressSDNode *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
+ if (A->getGlobal() == B->getGlobal()) {
+ Off += B->getOffset() - A->getOffset();
+ return true;
+ }
+
+ // TODO: we should be able to add FrameIndex analysis improvements here.
+
+ return false;
+}
+
+/// Parses tree in Ptr for base, index, offset addresses.
+BaseIndexOffset BaseIndexOffset::match(SDValue Ptr) {
+ // (((B + I*M) + c)) + c ...
+ SDValue Base = Ptr;
+ SDValue Index = SDValue();
+ int64_t Offset = 0;
+ bool IsIndexSignExt = false;
+
+ // Consume constant adds
+ while (Base->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Base->getOperand(1))) {
+ int64_t POffset = cast<ConstantSDNode>(Base->getOperand(1))->getSExtValue();
+ Offset += POffset;
+ Base = Base->getOperand(0);
+ }
+
+ if (Base->getOpcode() == ISD::ADD) {
+ // TODO: The following code appears to be needless as it just
+ // bails on some Ptrs early, reducing the cases where we
+ // find equivalence. We should be able to remove this.
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Base is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Base->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+
+ // Look at Base + Index + Offset cases.
+ Index = Base->getOperand(1);
+ SDValue PotentialBase = Base->getOperand(0);
+
+ // Skip signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Check if Index Offset pattern
+ if (Index->getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Index->getOperand(1)))
+ return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt);
+
+ Offset += cast<ConstantSDNode>(Index->getOperand(1))->getSExtValue();
+ Index = Index->getOperand(0);
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else
+ IsIndexSignExt = false;
+ Base = PotentialBase;
+ }
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+}
+} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index dcccd17bb98e..f711ca71f79f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -337,12 +337,13 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
/// may trap on it. In this case we have to split the edge so that the path
/// through the predecessor block that doesn't go to the phi block doesn't
-/// execute the possibly trapping instruction. If available, we pass a
-/// dominator tree to be updated when we split critical edges. This is because
-/// SelectionDAGISel preserves the DominatorTree.
+/// execute the possibly trapping instruction. If available, we pass domtree
+/// and loop info to be updated when we split critical edges. This is because
+/// SelectionDAGISel preserves these analyses.
/// This is required for correctness, so it must be done at -O0.
///
-static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) {
+static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
+ LoopInfo *LI) {
// Loop for blocks with phi nodes.
for (BasicBlock &BB : Fn) {
PHINode *PN = dyn_cast<PHINode>(BB.begin());
@@ -368,7 +369,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) {
// Okay, we have to split this edge.
SplitCriticalEdge(
Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
- CriticalEdgeSplittingOptions(DT).setMergeIdenticalEdges());
+ CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges());
goto ReprocessBlock;
}
}
@@ -406,10 +407,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT);
+ SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
CurDAG->init(*MF, *ORE);
FuncInfo->set(Fn, *MF, CurDAG);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cfda0fffd031..8652df7bbd70 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -365,10 +365,10 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
// If this is a 'not' op, don't touch it because that's a canonical form.
const APInt &C = Op1C->getAPIntValue();
- if (Opcode == ISD::XOR && (C | ~Demanded).isAllOnesValue())
+ if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
return false;
- if (C.intersects(~Demanded)) {
+ if (!C.isSubsetOf(Demanded)) {
EVT VT = Op.getValueType();
SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
@@ -919,7 +919,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (NewMask == 1)
+ if (NewMask.isOneValue())
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
@@ -1349,7 +1349,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
case UndefinedBooleanContent:
return CVal[0];
case ZeroOrOneBooleanContent:
- return CVal == 1;
+ return CVal.isOneValue();
case ZeroOrNegativeOneBooleanContent:
return CVal.isAllOnesValue();
}
@@ -1506,7 +1506,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
- if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
const APInt &ShAmt
@@ -1666,7 +1666,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
for (unsigned width = origWidth / 2; width>=8; width /= 2) {
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
- if ((newMask & Mask) == Mask) {
+ if (Mask.isSubsetOf(newMask)) {
if (DAG.getDataLayout().isLittleEndian())
bestOffset = (uint64_t)offset * (width/8);
else
@@ -1785,12 +1785,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ExtSrcTyBits),
dl, ExtDstTy),
Cond);
- } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ } else if ((N1C->isNullValue() || N1C->isOne()) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
- bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
if (TrueWhenTrue)
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
@@ -1807,7 +1807,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
// can only do this if the top bits are known zero.
unsigned BitWidth = N0.getValueSizeInBits();
@@ -1830,7 +1830,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, Val, N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
- } else if (N1C->getAPIntValue() == 1 &&
+ } else if (N1C->isOne() &&
(VT == MVT::i1 ||
getBooleanContents(N0->getValueType(0)) ==
ZeroOrOneBooleanContent)) {
@@ -1848,7 +1848,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (Op0.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Op0.getOperand(1)) &&
- cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+ cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
if (Op0.getValueType().bitsGT(VT))
Op0 = DAG.getNode(ISD::AND, dl, VT,
@@ -2482,7 +2482,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// gcc prints these as sign extended. Sign extend value to 64 bits
// now; without this it would get ZExt'd later in
// ScheduleDAGSDNodes::EmitNode, which is very generic.
- Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
SDLoc(C), MVT::i64));
}
return;
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index c8537ad2f313..eeb00a784b0d 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//==- TargetRegisterInfo.cpp - Target Register Information Implementation --==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,17 +11,27 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <utility>
#define DEBUG_TYPE "target-reg-info"
@@ -38,7 +48,7 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
CoveringLanes(SRICoveringLanes) {
}
-TargetRegisterInfo::~TargetRegisterInfo() {}
+TargetRegisterInfo::~TargetRegisterInfo() = default;
void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg)
const {
@@ -126,7 +136,7 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
});
}
-} // End of llvm namespace
+} // end namespace llvm
/// getAllocatableClass - Return the maximal subclass of the given register
/// class that is alloctable, or NULL.
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index 82e85bab1474..f6d5bc80ddff 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
+//===- TargetSubtargetInfo.cpp - General Target Information ----------------==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,15 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <string>
+
using namespace llvm;
-//---------------------------------------------------------------------------
-// TargetSubtargetInfo Class
-//
TargetSubtargetInfo::TargetSubtargetInfo(
const Triple &TT, StringRef CPU, StringRef FS,
ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
@@ -29,7 +31,7 @@ TargetSubtargetInfo::TargetSubtargetInfo(
: MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
}
-TargetSubtargetInfo::~TargetSubtargetInfo() {}
+TargetSubtargetInfo::~TargetSubtargetInfo() = default;
bool TargetSubtargetInfo::enableAtomicExpand() const {
return true;