diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-02 18:30:13 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-02 18:30:13 +0000 |
commit | a303c417bbdb53703c2c17398b08486bde78f1f6 (patch) | |
tree | 98366d6b93d863cefdc53f16c66c0c5ae7fb2261 /lib/CodeGen | |
parent | 12f3ca4cdb95b193af905a00e722a4dcb40b3de3 (diff) |
Notes
Diffstat (limited to 'lib/CodeGen')
32 files changed, 1359 insertions, 1150 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d99065b1b67a..b11e30c359b3 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -820,7 +820,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { const DILocalVariable *V = MI->getDebugVariable(); if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) { - StringRef Name = SP->getDisplayName(); + StringRef Name = SP->getName(); if (!Name.empty()) OS << Name << ":"; } diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 2571f6869651..786b11618d75 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -17,6 +17,7 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDatabase.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" @@ -237,7 +238,7 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { // The display name includes function template arguments. Drop them to match // MSVC. - StringRef DisplayName = SP->getDisplayName().split('<').first; + StringRef DisplayName = SP->getName().split('<').first; const DIScope *Scope = SP->getScope().resolve(); TypeIndex TI; @@ -392,7 +393,7 @@ void CodeViewDebug::endModule() { // subprograms. switchToDebugSectionForSymbol(nullptr); - MCSymbol *CompilerInfo = beginCVSubsection(ModuleSubstreamKind::Symbols); + MCSymbol *CompilerInfo = beginCVSubsection(ModuleDebugFragmentKind::Symbols); emitCompilerInformation(); endCVSubsection(CompilerInfo); @@ -416,7 +417,7 @@ void CodeViewDebug::endModule() { // Emit UDT records for any types used by global variables. if (!GlobalUDTs.empty()) { - MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols); + MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols); emitDebugInfoForUDTs(GlobalUDTs); endCVSubsection(SymbolsEnd); } @@ -644,7 +645,8 @@ void CodeViewDebug::emitInlineeLinesSubsection() { return; OS.AddComment("Inlinee lines subsection"); - MCSymbol *InlineEnd = beginCVSubsection(ModuleSubstreamKind::InlineeLines); + MCSymbol *InlineEnd = + beginCVSubsection(ModuleDebugFragmentKind::InlineeLines); // We don't provide any extra file info. // FIXME: Find out if debuggers use this info. @@ -657,7 +659,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() { OS.AddBlankLine(); unsigned FileId = maybeRecordFile(SP->getFile()); - OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " + + OS.AddComment("Inlined function " + SP->getName() + " starts at " + SP->getFilename() + Twine(':') + Twine(SP->getLine())); OS.AddBlankLine(); // The filechecksum table uses 8 byte entries for now, and file ids start at @@ -759,9 +761,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // If we have a display name, build the fully qualified name by walking the // chain of scopes. - if (!SP->getDisplayName().empty()) + if (!SP->getName().empty()) FuncName = - getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName()); + getFullyQualifiedName(SP->getScope().resolve(), SP->getName()); // If our DISubprogram name is empty, use the mangled name. if (FuncName.empty()) @@ -769,7 +771,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); - MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols); + MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols); { MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(), *ProcRecordEnd = MMI->getContext().createTempSymbol(); @@ -2114,7 +2116,7 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { maybeRecordLocation(DL, Asm->MF); } -MCSymbol *CodeViewDebug::beginCVSubsection(ModuleSubstreamKind Kind) { +MCSymbol *CodeViewDebug::beginCVSubsection(ModuleDebugFragmentKind Kind) { MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(), *EndLabel = MMI->getContext().createTempSymbol(); OS.EmitIntValue(unsigned(Kind), 4); @@ -2174,7 +2176,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() { if (!GV->hasComdat() && !GV->isDeclarationForLinker()) { if (!EndLabel) { OS.AddComment("Symbol subsection for globals"); - EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); + EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols); } // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV)); @@ -2192,7 +2194,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() { OS.AddComment("Symbol subsection for " + Twine(GlobalValue::getRealLinkageName(GV->getName()))); switchToDebugSectionForSymbol(GVSym); - EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); + EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols); // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym); endCVSubsection(EndLabel); diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 343384c51772..46b2daa1e007 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -216,7 +216,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Opens a subsection of the given kind in a .debug$S codeview section. /// Returns an end label for use with endCVSubsection when the subsection is /// finished. - MCSymbol *beginCVSubsection(codeview::ModuleSubstreamKind Kind); + MCSymbol *beginCVSubsection(codeview::ModuleDebugFragmentKind Kind); void endCVSubsection(MCSymbol *EndLabel); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 16fb20dd7e20..8d25def7772c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -375,10 +375,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); } -void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) { - addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory()); -} - /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName either the struct, or a pointer to the struct, as @@ -662,6 +658,14 @@ void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) { } } +/// Add thrown types. +void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) { + for (const auto *Ty : ThrownTypes) { + DIE &TT = createAndAddDIE(dwarf::DW_TAG_thrown_type, Die); + addType(TT, cast<DIType>(Ty)); + } +} + DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { if (!Context || isa<DIFile>(Context)) return &getUnitDie(); @@ -1077,7 +1081,6 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) { Name = "(anonymous namespace)"; DD->addAccelNamespace(Name, NDie); addGlobalName(Name, NDie, NS->getScope()); - addSourceLine(NDie, NS); if (NS->getExportSymbols()) addFlag(NDie, dwarf::DW_AT_export_symbols); return &NDie; @@ -1249,6 +1252,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, constructSubprogramArguments(SPDie, Args); } + addThrownTypes(SPDie, SP->getThrownTypes()); + if (SP->isArtificial()) addFlag(SPDie, dwarf::DW_AT_artificial); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index e84df4650882..8fc841703e23 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -210,7 +210,6 @@ public: void addSourceLine(DIE &Die, const DIGlobalVariable *G); void addSourceLine(DIE &Die, const DISubprogram *SP); void addSourceLine(DIE &Die, const DIType *Ty); - void addSourceLine(DIE &Die, const DINamespace *NS); void addSourceLine(DIE &Die, const DIObjCProperty *Ty); /// Add constant value entry in variable DIE. @@ -230,6 +229,9 @@ public: /// Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DINodeArray TParams); + /// Add thrown types. + void addThrownTypes(DIE &Die, DINodeArray ThrownTypes); + // FIXME: Should be reformulated in terms of addComplexAddress. /// Start with the address based on the location provided, and generate the /// DWARF information necessary to find the actual Block variable (navigating diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 0912d9f68aff..26da748fa244 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -65,6 +65,7 @@ add_llvm_library(LLVMCodeGen MachineCSE.cpp MachineDominanceFrontier.cpp MachineDominators.cpp + MachineFrameInfo.cpp MachineFunction.cpp MachineFunctionPass.cpp MachineFunctionPrinterPass.cpp diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index c862cfd28add..c6c93811a0f9 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -2226,10 +2226,11 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { ConstantInt *RetVal = lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); // Substituting this can cause recursive simplifications, which can - // invalidate our iterator. Use a WeakVH to hold onto it in case this + // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case + // this // happens. Value *CurValue = &*CurInstIterator; - WeakVH IterHandle(CurValue); + WeakTrackingVH IterHandle(CurValue); replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); @@ -4442,9 +4443,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // using it. if (Repl->use_empty()) { // This can cause recursive deletion, which can invalidate our iterator. - // Use a WeakVH to hold onto it in case this happens. + // Use a WeakTrackingVH to hold onto it in case this happens. Value *CurValue = &*CurInstIterator; - WeakVH IterHandle(CurValue); + WeakTrackingVH IterHandle(CurValue); BasicBlock *BB = CurInstIterator->getParent(); RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); @@ -5959,7 +5960,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) { + if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) { P->replaceAllUsesWith(V); P->eraseFromParent(); ++NumPHIsElim; diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 7b1b2d64fccc..65f58e5686e0 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -213,10 +213,8 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, VLIWPacketizerList::~VLIWPacketizerList() { - if (VLIWScheduler) - delete VLIWScheduler; - if (ResourceTracker) - delete ResourceTracker; + delete VLIWScheduler; + delete ResourceTracker; } diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index 035a2ac78ed9..ebfe6cb3b733 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -83,8 +83,8 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; - if (FuncInfo.getParamAlignment(OpIdx)) - FrameAlign = FuncInfo.getParamAlignment(OpIdx); + if (FuncInfo.getParamAlignment(OpIdx - 1)) + FrameAlign = FuncInfo.getParamAlignment(OpIdx - 1); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); Arg.Flags.setByValAlign(FrameAlign); diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 5fb8dfc95d3f..75be7a55bd2a 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1199,9 +1199,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { finishPendingPhis(); - // Now that the MachineFrameInfo has been configured, no further changes to - // the reserved registers are possible. - MRI->freezeReservedRegs(*MF); + auto &TLI = *MF->getSubtarget().getTargetLowering(); + TLI.finalizeLowering(*MF); // Merge the argument lowering and constants block with its single // successor, the LLVM-IR entry block. We want the basic block to diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 942680b6fff3..c67da8629a3b 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -58,10 +58,11 @@ bool InstructionSelector::constrainSelectedInstRegOperands( MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), Reg, OpI)); - // Tie uses to defs as indicated in MCInstrDesc. + // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been + // done. if (MO.isUse()) { int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); - if (DefIdx != -1) + if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) I.tieOperands(DefIdx, OpI); } } diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index a2773cccc5db..bd04acd049db 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -541,7 +541,8 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, MFI.ensureMaxAlignment(YamlMFI.MaxAlignment); MFI.setAdjustsStack(YamlMFI.AdjustsStack); MFI.setHasCalls(YamlMFI.HasCalls); - MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); + if (YamlMFI.MaxCallFrameSize != ~0u) + MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index b6624b88fe23..d017b21f0a59 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -286,7 +286,8 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.MaxAlignment = MFI.getMaxAlignment(); YamlMFI.AdjustsStack = MFI.adjustsStack(); YamlMFI.HasCalls = MFI.hasCalls(); - YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize(); + YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed() + ? MFI.getMaxCallFrameSize() : ~0u; YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); YamlMFI.HasVAStart = MFI.hasVAStart(); YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp new file mode 100644 index 000000000000..7de8434df806 --- /dev/null +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -0,0 +1,218 @@ +//===-- MachineFrameInfo.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Implements MachineFrameInfo that manages the stack frame. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFrameInfo.h" + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> + +#define DEBUG_TYPE "codegen" + +using namespace llvm; + +void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { + if (!StackRealignable) + assert(Align <= StackAlignment && + "For targets without stack realignment, Align is out of limit!"); + if (MaxAlignment < Align) MaxAlignment = Align; +} + +/// Clamp the alignment if requested and emit a warning. +static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, + unsigned StackAlign) { + if (!ShouldClamp || Align <= StackAlign) + return Align; + DEBUG(dbgs() << "Warning: requested alignment " << Align + << " exceeds the stack alignment " << StackAlign + << " when stack realignment is off" << '\n'); + return StackAlign; +} + +int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSS, const AllocaInst *Alloca) { + assert(Size != 0 && "Cannot allocate zero size stack objects!"); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, + !isSS)); + int Index = (int)Objects.size() - NumFixedObjects - 1; + assert(Index >= 0 && "Bad frame index!"); + ensureMaxAlignment(Alignment); + return Index; +} + +int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, + unsigned Alignment) { + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + CreateStackObject(Size, Alignment, true); + int Index = (int)Objects.size() - NumFixedObjects - 1; + ensureMaxAlignment(Alignment); + return Index; +} + +int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, + const AllocaInst *Alloca) { + HasVarSizedObjects = true; + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); + ensureMaxAlignment(Alignment); + return (int)Objects.size()-NumFixedObjects-1; +} + +int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, + bool Immutable, bool isAliased) { + assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. Note that unlike the non-fixed case, if the + // stack needs realignment, we can't assume that the stack will in fact be + // aligned. + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/ false, + /*Alloca*/ nullptr, isAliased)); + return -++NumFixedObjects; +} + +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, + int64_t SPOffset, + bool Immutable) { + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/ true, + /*Alloca*/ nullptr, + /*isAliased*/ false)); + return -++NumFixedObjects; +} + +BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + BitVector BV(TRI->getNumRegs()); + + // Before CSI is calculated, no registers are considered pristine. They can be + // freely used and PEI will make sure they are saved. + if (!isCalleeSavedInfoValid()) + return BV; + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; + ++CSR) + BV.set(*CSR); + + // Saved CSRs are not pristine. + for (auto &I : getCalleeSavedInfo()) + for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) + BV.reset(*S); + + return BV; +} + +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} + +void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ + if (Objects.empty()) return; + + const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); + int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); + + OS << "Frame Objects:\n"; + + for (unsigned i = 0, e = Objects.size(); i != e; ++i) { + const StackObject &SO = Objects[i]; + OS << " fi#" << (int)(i-NumFixedObjects) << ": "; + if (SO.Size == ~0ULL) { + OS << "dead\n"; + continue; + } + if (SO.Size == 0) + OS << "variable sized"; + else + OS << "size=" << SO.Size; + OS << ", align=" << SO.Alignment; + + if (i < NumFixedObjects) + OS << ", fixed"; + if (i < NumFixedObjects || SO.SPOffset != -1) { + int64_t Off = SO.SPOffset - ValOffset; + OS << ", at location [SP"; + if (Off > 0) + OS << "+" << Off; + else if (Off < 0) + OS << Off; + OS << "]"; + } + OS << "\n"; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const { + print(MF, dbgs()); +} +#endif diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index c9767a25e908..ac4ccb81b884 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -757,214 +757,6 @@ void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) { /// \} //===----------------------------------------------------------------------===// -// MachineFrameInfo implementation -//===----------------------------------------------------------------------===// - -/// Make sure the function is at least Align bytes aligned. -void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!StackRealignable) - assert(Align <= StackAlignment && - "For targets without stack realignment, Align is out of limit!"); - if (MaxAlignment < Align) MaxAlignment = Align; -} - -/// Clamp the alignment if requested and emit a warning. -static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, - unsigned StackAlign) { - if (!ShouldClamp || Align <= StackAlign) - return Align; - DEBUG(dbgs() << "Warning: requested alignment " << Align - << " exceeds the stack alignment " << StackAlign - << " when stack realignment is off" << '\n'); - return StackAlign; -} - -/// Create a new statically sized stack object, returning a nonnegative -/// identifier to represent it. -int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, - bool isSS, const AllocaInst *Alloca) { - assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, - !isSS)); - int Index = (int)Objects.size() - NumFixedObjects - 1; - assert(Index >= 0 && "Bad frame index!"); - ensureMaxAlignment(Alignment); - return Index; -} - -/// Create a new statically sized stack object that represents a spill slot, -/// returning a nonnegative identifier to represent it. -int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, - unsigned Alignment) { - Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - CreateStackObject(Size, Alignment, true); - int Index = (int)Objects.size() - NumFixedObjects - 1; - ensureMaxAlignment(Alignment); - return Index; -} - -/// Notify the MachineFrameInfo object that a variable sized object has been -/// created. This must be created whenever a variable sized object is created, -/// whether or not the index returned is actually used. -int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, - const AllocaInst *Alloca) { - HasVarSizedObjects = true; - Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); - ensureMaxAlignment(Alignment); - return (int)Objects.size()-NumFixedObjects-1; -} - -/// Create a new object at a fixed location on the stack. -/// All fixed objects should be created before other objects are created for -/// efficiency. By default, fixed objects are immutable. This returns an -/// index with a negative value. -int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable, bool isAliased) { - assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. Note that unlike the non-fixed case, if the - // stack needs realignment, we can't assume that the stack will in fact be - // aligned. - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ false, - /*Alloca*/ nullptr, isAliased)); - return -++NumFixedObjects; -} - -/// Create a spill slot at a fixed location on the stack. -/// Returns an index with a negative value. -int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, - int64_t SPOffset, - bool Immutable) { - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ true, - /*Alloca*/ nullptr, - /*isAliased*/ false)); - return -++NumFixedObjects; -} - -BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - BitVector BV(TRI->getNumRegs()); - - // Before CSI is calculated, no registers are considered pristine. They can be - // freely used and PEI will make sure they are saved. - if (!isCalleeSavedInfoValid()) - return BV; - - const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; - ++CSR) - BV.set(*CSR); - - // Saved CSRs are not pristine. - for (auto &I : getCalleeSavedInfo()) - for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) - BV.reset(*S); - - return BV; -} - -unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - unsigned MaxAlign = getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { - if (isDeadObjectIndex(i)) - continue; - Offset += getObjectSize(i); - unsigned Align = getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (adjustsStack() || hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - -void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ - if (Objects.empty()) return; - - const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); - int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); - - OS << "Frame Objects:\n"; - - for (unsigned i = 0, e = Objects.size(); i != e; ++i) { - const StackObject &SO = Objects[i]; - OS << " fi#" << (int)(i-NumFixedObjects) << ": "; - if (SO.Size == ~0ULL) { - OS << "dead\n"; - continue; - } - if (SO.Size == 0) - OS << "variable sized"; - else - OS << "size=" << SO.Size; - OS << ", align=" << SO.Alignment; - - if (i < NumFixedObjects) - OS << ", fixed"; - if (i < NumFixedObjects || SO.SPOffset != -1) { - int64_t Off = SO.SPOffset - ValOffset; - OS << ", at location [SP"; - if (Off > 0) - OS << "+" << Off; - else if (Off < 0) - OS << Off; - OS << "]"; - } - OS << "\n"; - } -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const { - print(MF, dbgs()); -} -#endif - -//===----------------------------------------------------------------------===// // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 1faf6292a9c1..d665201a5d17 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -2350,7 +2350,7 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, const MachineInstr &Orig, int FrameIndex) { const MDNode *Var = Orig.getDebugVariable(); - auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression()); + const auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression()); bool IsIndirect = Orig.isIndirectDebugValue(); uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0; DebugLoc DL = Orig.getDebugLoc(); @@ -2359,13 +2359,8 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, // If the DBG_VALUE already was a memory location, add an extra // DW_OP_deref. Otherwise just turning this from a register into a // memory/indirect location is sufficient. - if (IsIndirect) { - SmallVector<uint64_t, 8> Ops; - Ops.push_back(dwarf::DW_OP_deref); - if (Expr) - Ops.append(Expr->elements_begin(), Expr->elements_end()); - Expr = DIExpression::get(Expr->getContext(), Ops); - } + if (IsIndirect) + Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); return BuildMI(BB, I, DL, Orig.getDesc()) .addFrameIndex(FrameIndex) .addImm(Offset) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1251ae6262b8..dc0276d57667 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" @@ -236,10 +237,13 @@ namespace { SDValue visitSUB(SDNode *N); SDValue visitADDC(SDNode *N); SDValue visitUADDO(SDNode *N); + SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitSUBC(SDNode *N); SDValue visitUSUBO(SDNode *N); SDValue visitADDE(SDNode *N); + SDValue visitADDCARRY(SDNode *N); SDValue visitSUBE(SDNode *N); + SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); @@ -369,14 +373,14 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); - SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip); + SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal); + SDNodeFlags Flags, bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal); + SDNodeFlags Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -396,6 +400,7 @@ namespace { SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); + SDValue matchVSelectOpSizesWithSetCC(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -644,7 +649,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options->NoSignedZerosFPMath && - !Op.getNode()->getFlags()->hasNoSignedZeros()) + !Op.getNode()->getFlags().hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -682,7 +687,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); - const SDNodeFlags *Flags = Op.getNode()->getFlags(); + const SDNodeFlags Flags = Op.getNode()->getFlags(); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown code"); @@ -965,8 +970,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); - APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + KnownBits Known; + if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO)) return false; // Revisit the node. @@ -1412,7 +1417,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SUBC: return visitSUBC(N); case ISD::USUBO: return visitUSUBO(N); case ISD::ADDE: return visitADDE(N); + case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); + case ISD::SUBCARRY: return visitSUBCARRY(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1866,14 +1873,31 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (isNullConstant(N1)) return N0; - // fold ((c1-A)+c2) -> (c1+c2)-A if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { - if (N0.getOpcode() == ISD::SUB) - if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), - N0.getOperand(1)); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { + // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), + N0.getOperand(1)); + } + + // add (sext i1 X), 1 -> zext (not i1 X) + // We don't transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // because most (?) targets generate better code for the zext form. + if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && + isOneConstantOrOneSplatConstant(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } + } } if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -1992,6 +2016,11 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) } } + // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) + return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), + N0, N1.getOperand(0), N1.getOperand(2)); + return SDValue(); } @@ -2055,6 +2084,26 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); + if (SDValue Combined = visitUADDOLike(N0, N1, N)) + return Combined; + + if (SDValue Combined = visitUADDOLike(N1, N0, N)) + return Combined; + + return SDValue(); +} + +SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { + // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) + // If Y + 1 cannot overflow. + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { + SDValue Y = N1.getOperand(0); + SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType()); + if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y, + N1.getOperand(2)); + } + return SDValue(); } @@ -2077,6 +2126,25 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitADDCARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), + N1, N0, CarryIn); + + // fold (addcarry x, y, false) -> (uaddo x, y) + if (isNullConstant(CarryIn)) + return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N0, N1); + + return SDValue(); +} + // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, @@ -2143,13 +2211,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // 0 - X --> 0 if the sub is NUW. - if (N->getFlags()->hasNoUnsignedWrap()) + if (N->getFlags().hasNoUnsignedWrap()) return N0; if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { // N1 is either 0 or the minimum signed value. If the sub is NSW, then // N1 must be 0 because negating the minimum signed value is undefined. - if (N->getFlags()->hasNoSignedWrap()) + if (N->getFlags().hasNoSignedWrap()) return N0; // 0 - X --> X if X is 0 or the minimum signed value. @@ -2309,6 +2377,18 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (subcarry x, y, false) -> (usubo x, y) + if (isNullConstant(CarryIn)) + return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2589,9 +2669,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && - (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; @@ -3766,7 +3845,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, EVT VT = N->getValueType(0); if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) @@ -3880,8 +3959,15 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { SDValue N0 = N.getOperand(0); unsigned Opc0 = N0.getOpcode(); + if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) + return false; - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + ConstantSDNode *N1C = nullptr; + // SHL or SRL: look upstream for AND mask operand + if (Opc == ISD::AND) + N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + else if (Opc0 == ISD::AND) + N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (!N1C) return false; @@ -3952,7 +4038,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Look for either @@ -3967,18 +4053,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) + if (!isBSwapHWordElement(N00, Parts)) return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + if (!isBSwapHWordElement(N01, Parts)) return SDValue(); - SDValue N010 = N01.getOperand(0); - if (!isBSwapHWordElement(N010, Parts)) + SDValue N10 = N1.getOperand(0); + if (!isBSwapHWordElement(N10, Parts)) return SDValue(); - SDValue N011 = N01.getOperand(1); - if (!isBSwapHWordElement(N011, Parts)) + SDValue N11 = N1.getOperand(1); + if (!isBSwapHWordElement(N11, Parts)) return SDValue(); } else { // (or (or (or (and), (and)), (and)), (and)) @@ -5322,7 +5406,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && - cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) { + N0->getFlags().hasExact()) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t C1 = N0C1->getZExtValue(); uint64_t C2 = N1C->getZExtValue(); @@ -5347,7 +5431,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); SDValue Shift; if (c2 > c1) { - Mask = Mask.shl(c2 - c1); + Mask <<= c2 - c1; SDLoc DL(N); Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), DAG.getConstant(c2 - c1, DL, N1.getValueType())); @@ -5680,20 +5764,20 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { - APInt KnownZero, KnownOne; - DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); + KnownBits Known; + DAG.computeKnownBits(N0.getOperand(0), Known); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. - if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); + if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. - APInt UnknownBits = ~KnownZero; + APInt UnknownBits = ~Known.Zero; if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. - if ((UnknownBits & (UnknownBits - 1)) == 0) { + if (UnknownBits.isPowerOf2()) { // Okay, we know that only that the single bit specified by UnknownBits // could be set on input to the CTLZ node. If this bit is set, the SRL // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair @@ -6889,6 +6973,51 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } +/// If we're narrowing or widening the result of a vector select and the final +/// size is the same size as a setcc (compare) feeding the select, then try to +/// apply the cast operation to the select's operands because matching vector +/// sizes for a select condition and other operands should be more efficient. +SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { + unsigned CastOpcode = Cast->getOpcode(); + assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || + CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || + CastOpcode == ISD::FP_ROUND) && + "Unexpected opcode for vector select narrowing/widening"); + + // We only do this transform before legal ops because the pattern may be + // obfuscated by target-specific operations after legalization. Do not create + // an illegal select op, however, because that may be difficult to lower. + EVT VT = Cast->getValueType(0); + if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + SDValue VSel = Cast->getOperand(0); + if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() || + VSel.getOperand(0).getOpcode() != ISD::SETCC) + return SDValue(); + + // Does the setcc have the same vector size as the casted select? + SDValue SetCC = VSel.getOperand(0); + EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType()); + if (SetCCVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B) + SDValue A = VSel.getOperand(1); + SDValue B = VSel.getOperand(2); + SDValue CastA, CastB; + SDLoc DL(Cast); + if (CastOpcode == ISD::FP_ROUND) { + // FP_ROUND (fptrunc) has an extra flag operand to pass along. + CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1)); + CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1)); + } else { + CastA = DAG.getNode(CastOpcode, DL, VT, A); + CastB = DAG.getNode(CastOpcode, DL, VT, B); + } + return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7112,19 +7241,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } // isTruncateOf - If N is a truncate of some other value, return true, record -// the value being truncated in Op and which of Op's bits are zero in KnownZero. -// This function computes KnownZero to avoid a duplicated call to +// the value being truncated in Op and which of Op's bits are zero/one in Known. +// This function computes KnownBits to avoid a duplicated call to // computeKnownBits in the caller. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, - APInt &KnownZero) { - APInt KnownOne; + KnownBits &Known) { if (N->getOpcode() == ISD::TRUNCATE) { Op = N->getOperand(0); - DAG.computeKnownBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, Known); return true; } @@ -7143,9 +7274,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, else return false; - DAG.computeKnownBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, Known); - if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + if (!(Known.Zero | 1).isAllOnesValue()) return false; return true; @@ -7170,8 +7301,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // This is valid when the truncated bits of x are already zero. // FIXME: We should extend this to work for vectors too. SDValue Op; - APInt KnownZero; - if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + KnownBits Known; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) { APInt TruncatedBits = (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? APInt(Op.getValueSizeInBits(), 0) : @@ -7179,7 +7310,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueSizeInBits(), std::min(Op.getValueSizeInBits(), VT.getSizeInBits())); - if (TruncatedBits == (KnownZero & TruncatedBits)) { + if (TruncatedBits.isSubsetOf(Known.Zero)) { if (VT.bitsGT(Op.getValueType())) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); if (VT.bitsLT(Op.getValueType())) @@ -7446,6 +7577,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { ShAmt); } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -7802,7 +7936,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, DL, PtrType), - &Flags); + Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -8228,17 +8362,21 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return SDValue(N, 0); // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) + // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) // When the adde's carry is not used. - if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() && - !N0.getNode()->hasAnyUseOfValue(1) && - (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) { + if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && + N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && + (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) { SDLoc SL(N); auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); - return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue), - X, Y, N0.getOperand(2)); + auto VTs = DAG.getVTList(VT, N0->getValueType(1)); + return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -8701,7 +8839,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } static bool isContractable(SDNode *N) { - SDNodeFlags F = cast<BinaryWithFlagsSDNode>(N)->Flags; + SDNodeFlags F = N->getFlags(); return F.hasAllowContract() || F.hasUnsafeAlgebra(); } @@ -9287,7 +9425,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + const SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) @@ -9318,7 +9456,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { GetNegatedExpression(N0, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { // fold (fadd A, 0) -> A if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) if (N1C->isZero()) @@ -9441,7 +9579,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + const SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) @@ -9461,7 +9599,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { GetNegatedExpression(N1, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -9512,7 +9650,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + const SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) { @@ -9656,7 +9794,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), - &Flags), &Flags); + Flags), Flags); } // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) @@ -9666,7 +9804,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), - &Flags), + Flags), N2); } } @@ -9692,16 +9830,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, - DAG.getConstantFP(1.0, DL, VT), &Flags), - &Flags); + DAG.getConstantFP(1.0, DL, VT), Flags), + Flags); } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, - DAG.getConstantFP(-1.0, DL, VT), &Flags), - &Flags); + DAG.getConstantFP(-1.0, DL, VT), Flags), + Flags); } } @@ -9717,8 +9855,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; - const SDNodeFlags *Flags = N->getFlags(); - if (!UnsafeMath && !Flags->hasAllowReciprocal()) + const SDNodeFlags Flags = N->getFlags(); + if (!UnsafeMath && !Flags.hasAllowReciprocal()) return SDValue(); // Skip if current node is a reciprocal. @@ -9741,7 +9879,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { // This division is eligible for optimization only if global unsafe math // is enabled or if this division allows reciprocal formation. - if (UnsafeMath || U->getFlags()->hasAllowReciprocal()) + if (UnsafeMath || U->getFlags().hasAllowReciprocal()) Users.insert(U); } } @@ -9780,7 +9918,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) @@ -9894,8 +10032,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, - &cast<BinaryWithFlagsSDNode>(N)->Flags); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags()); if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -9915,7 +10052,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); - return buildSqrtEstimate(N0, &Flags); + return buildSqrtEstimate(N0, Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -10190,6 +10327,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { Tmp, N0.getOperand(1)); } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -10256,6 +10396,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -10341,10 +10484,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (Level >= AfterLegalizeDAG && (TLI.isFPImmLegal(CVal, VT) || TLI.isOperationLegal(ISD::ConstantFP, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N0.getOperand(1)), - &cast<BinaryWithFlagsSDNode>(N0)->Flags); + return DAG.getNode( + ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), + N0->getFlags()); } } @@ -15832,7 +15975,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -15887,7 +16030,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal) { + SDNodeFlags Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); @@ -15931,7 +16074,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal) { + SDNodeFlags Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -15976,7 +16119,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Reciprocal) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -16029,11 +16172,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { return buildSqrtEstimateImpl(Op, Flags, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { return buildSqrtEstimateImpl(Op, Flags, false); } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 377a5237f15a..a0135dc40b87 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -400,10 +400,10 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (!LOI->IsValid) return nullptr; - if (BitWidth > LOI->KnownZero.getBitWidth()) { + if (BitWidth > LOI->Known.getBitWidth()) { LOI->NumSignBits = 1; - LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth); - LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth); + LOI->Known.Zero = LOI->Known.Zero.zextOrTrunc(BitWidth); + LOI->Known.One = LOI->Known.One.zextOrTrunc(BitWidth); } return LOI; @@ -436,17 +436,15 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { Value *V = PN->getIncomingValue(0); if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { DestLOI.NumSignBits = 1; - APInt Zero(BitWidth, 0); - DestLOI.KnownZero = Zero; - DestLOI.KnownOne = Zero; + DestLOI.Known = KnownBits(BitWidth); return; } if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { APInt Val = CI->getValue().zextOrTrunc(BitWidth); DestLOI.NumSignBits = Val.getNumSignBits(); - DestLOI.KnownZero = ~Val; - DestLOI.KnownOne = Val; + DestLOI.Known.Zero = ~Val; + DestLOI.Known.One = Val; } else { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); @@ -463,25 +461,23 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { DestLOI = *SrcLOI; } - assert(DestLOI.KnownZero.getBitWidth() == BitWidth && - DestLOI.KnownOne.getBitWidth() == BitWidth && + assert(DestLOI.Known.Zero.getBitWidth() == BitWidth && + DestLOI.Known.One.getBitWidth() == BitWidth && "Masks should have the same bit width as the type."); for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { DestLOI.NumSignBits = 1; - APInt Zero(BitWidth, 0); - DestLOI.KnownZero = Zero; - DestLOI.KnownOne = Zero; + DestLOI.Known = KnownBits(BitWidth); return; } if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { APInt Val = CI->getValue().zextOrTrunc(BitWidth); DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); - DestLOI.KnownZero &= ~Val; - DestLOI.KnownOne &= Val; + DestLOI.Known.Zero &= ~Val; + DestLOI.Known.One &= Val; continue; } @@ -498,8 +494,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { return; } DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits); - DestLOI.KnownZero &= SrcLOI->KnownZero; - DestLOI.KnownOne &= SrcLOI->KnownOne; + DestLOI.Known.Zero &= SrcLOI->Known.Zero; + DestLOI.Known.One &= SrcLOI->Known.One; } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fdebb8bd00db..2654b3ad7a62 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2589,7 +2589,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); APInt Shift(Sz, 1); - Shift = Shift.shl(J); + Shift <<= J; Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); } @@ -3253,7 +3253,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { - const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags; + const SDNodeFlags Flags = Node->getFlags(); Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); Results.push_back(Tmp1); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9ed70c9b4db9..92b0d2ae4015 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -21,6 +21,7 @@ #include "LegalizeTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -134,6 +135,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMULO: case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + case ISD::ADDCARRY: + case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break; + case ISD::ATOMIC_LOAD: Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break; @@ -510,9 +514,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); EVT ValueVTs[] = { N->getValueType(0), NVT }; - SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) }; + unsigned NumOps = N->getNumOperands(); + assert(NumOps <= 3 && "Too many operands"); + if (NumOps == 3) + Ops[2] = N->getOperand(2); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), - DAG.getVTList(ValueVTs), Ops); + DAG.getVTList(ValueVTs), makeArrayRef(Ops, NumOps)); // Modified the sum result - switch anything that used the old sum to use // the new one. @@ -762,6 +771,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + llvm_unreachable("Not implemented"); +} + SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { // Promote the overflow bit trivially. if (ResNo == 1) @@ -924,6 +939,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + + case ISD::ADDCARRY: + case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break; } // If the result is null, the sub-method took care of registering results etc. @@ -1276,6 +1294,30 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { N->getOperand(0).getValueType().getScalarType()); } +SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDLoc DL(N); + + auto VT = getSetCCResultType(LHS.getValueType()); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(VT); + switch (BoolType) { + case TargetLoweringBase::UndefinedBooleanContent: + Carry = DAG.getAnyExtOrTrunc(Carry, DL, VT); + break; + case TargetLoweringBase::ZeroOrOneBooleanContent: + Carry = DAG.getZExtOrTrunc(Carry, DL, VT); + break; + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + Carry = DAG.getSExtOrTrunc(Carry, DL, VT); + break; + } + + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0); +} //===----------------------------------------------------------------------===// // Integer Result Expansion @@ -1395,6 +1437,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ADDE: case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + case ISD::ADDCARRY: + case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break; + case ISD::SHL: case ISD::SRA: case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; @@ -1525,11 +1570,11 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); - APInt KnownZero, KnownOne; - DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne); + KnownBits Known; + DAG.computeKnownBits(N->getOperand(1), Known); // If we don't know anything about the high bits, exit. - if (((KnownZero|KnownOne) & HighBitMask) == 0) + if (((Known.Zero|Known.One) & HighBitMask) == 0) return false; // Get the incoming operand to be shifted. @@ -1538,7 +1583,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // If we know that any of the high bits of the shift amount are one, then we // can do this as a couple of simple shifts. - if (KnownOne.intersects(HighBitMask)) { + if (Known.One.intersects(HighBitMask)) { // Mask out the high bit, which we know is set. Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, DAG.getConstant(~HighBitMask, dl, ShTy)); @@ -1563,7 +1608,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // If we know that all of the high bits of the shift amount are zero, then we // can do this as a couple of simple shifts. - if ((KnownZero & HighBitMask) == HighBitMask) { + if (HighBitMask.isSubsetOf(Known.Zero)) { // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined // shift if x is zero. We can use XOR here because x is known to be smaller // than 32. @@ -1738,6 +1783,23 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue LoOps[2] = { LHSL, RHSL }; SDValue HiOps[3] = { LHSH, RHSH }; + bool HasOpCarry = TLI.isOperationLegalOrCustom( + N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasOpCarry) { + SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); + } else { + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); + } + return; + } + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support // them. TODO: Teach operation legalization how to expand unsupported // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate @@ -1767,7 +1829,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, ISD::UADDO : ISD::USUBO, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); if (hasOVF) { - SDVTList VTList = DAG.getVTList(NVT, NVT); + EVT OvfVT = getSetCCResultType(NVT); + SDVTList VTList = DAG.getVTList(NVT, OvfVT); TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); int RevOpc; if (N->getOpcode() == ISD::ADD) { @@ -1783,12 +1846,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, switch (BoolType) { case TargetLoweringBase::UndefinedBooleanContent: - OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF); + OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF); LLVM_FALLTHROUGH; case TargetLoweringBase::ZeroOrOneBooleanContent: + OVF = DAG.getZExtOrTrunc(OVF, dl, NVT); Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); break; case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + OVF = DAG.getSExtOrTrunc(OVF, dl, NVT); Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF); } return; @@ -1866,6 +1931,71 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDLoc dl(N); + + SDValue Ovf; + + bool HasOpCarry = TLI.isOperationLegalOrCustom( + N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, + TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); + + if (HasOpCarry) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY; + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(Opc, dl, VTList, HiOps); + + Ovf = Hi.getValue(1); + } else { + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ovf); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH, SDValue() }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -2532,29 +2662,6 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } -void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, - SDValue &Lo, SDValue &Hi) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDLoc dl(N); - - // Expand the result by simply replacing it with the equivalent - // non-overflow-checking operation. - SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? - ISD::ADD : ISD::SUB, dl, LHS.getValueType(), - LHS, RHS); - SplitInteger(Sum, Lo, Hi); - - // Calculate the overflow: addition overflows iff a + b < a, and subtraction - // overflows iff a - b > a. - SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, - N->getOpcode () == ISD::UADDO ? - ISD::SETULT : ISD::SETUGT); - - // Use the calculated overflow everywhere. - ReplaceValueWith(SDValue(N, 1), Ofl); -} - void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index af55a22972a6..cde4331cc42d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -279,6 +279,7 @@ private: SDValue PromoteIntRes_SRL(SDNode *N); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); @@ -311,6 +312,7 @@ private: SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -350,6 +352,7 @@ private: void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4a3160297d64..97a7fab6efd0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -523,16 +523,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); } -/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
-/// so just return the element, ignoring the index.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
- EVT VT = N->getValueType(0);
- SDValue Res = GetScalarizedVector(N->getOperand(0));
- if (Res.getValueType() != VT)
- Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
- return Res;
-}
-
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>, +/// so just return the element, ignoring the index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != VT) + Res = VT.isFloatingPoint() + ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res) + : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res); + return Res; +} /// If the input condition is a vector that needs to be scalarized, it must be /// <1 x i1>, so just convert to a normal ISD::SELECT @@ -730,7 +731,7 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(1), RHSLo, RHSHi); SDLoc dl(N); - const SDNodeFlags *Flags = N->getFlags(); + const SDNodeFlags Flags = N->getFlags(); unsigned Opcode = N->getOpcode(); Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); @@ -2219,7 +2220,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); - const SDNodeFlags *Flags = N->getFlags(); + const SDNodeFlags Flags = N->getFlags(); while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); @@ -2367,7 +2368,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - const SDNodeFlags *Flags = N->getFlags(); + const SDNodeFlags Flags = N->getFlags(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 439f67f1e155..9d949a2bbfa6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -811,8 +811,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) - if (const SDNodeFlags *Flags = N->getFlags()) - Node->intersectFlagsWith(Flags); + Node->intersectFlagsWith(N->getFlags()); return Node; } @@ -832,8 +831,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) - if (const SDNodeFlags *Flags = N->getFlags()) - Node->intersectFlagsWith(Flags); + Node->intersectFlagsWith(N->getFlags()); return Node; } @@ -852,8 +850,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) - if (const SDNodeFlags *Flags = N->getFlags()) - Node->intersectFlagsWith(Flags); + Node->intersectFlagsWith(N->getFlags()); return Node; } @@ -901,29 +898,6 @@ void SelectionDAG::allnodes_clear() { #endif } -SDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, const SDLoc &DL, - SDVTList VTs, SDValue N1, SDValue N2, - const SDNodeFlags *Flags) { - SDValue Ops[] = {N1, N2}; - - if (isBinOpWithFlags(Opcode)) { - // If no flags were passed in, use a default flags object. - SDNodeFlags F; - if (Flags == nullptr) - Flags = &F; - - auto *FN = newSDNode<BinaryWithFlagsSDNode>(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, *Flags); - createOperands(FN, Ops); - - return FN; - } - - auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); - createOperands(N, Ops); - return N; -} - SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); @@ -985,6 +959,12 @@ void SelectionDAG::clear() { DbgInfo->clear(); } +SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) + ? getNode(ISD::FP_EXTEND, DL, VT, Op) + : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL)); +} + SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : @@ -1967,9 +1947,9 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { /// for bits that V cannot have. bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { - APInt KnownZero, KnownOne; - computeKnownBits(Op, KnownZero, KnownOne, Depth); - return (KnownZero & Mask) == Mask; + KnownBits Known; + computeKnownBits(Op, Known, Depth); + return Mask.isSubsetOf(Known.Zero); } /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that @@ -1985,31 +1965,30 @@ static const APInt *getValidShiftAmountConstant(SDValue V) { } /// Determine which bits of Op are known to be either zero or one and return -/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are -/// those that are shared by every vector element. -void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, unsigned Depth) const { +/// them in Known. For vectors, the known bits are those that are shared by +/// every vector element. +void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, + unsigned Depth) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); - computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth); + computeKnownBits(Op, Known, DemandedElts, Depth); } /// Determine which bits of Op are known to be either zero or one and return -/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows -/// us to only collect the known bits that are shared by the requested vector -/// elements. -void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, const APInt &DemandedElts, +/// them in Known. The DemandedElts argument allows us to only collect the known +/// bits that are shared by the requested vector elements. +void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, + const APInt &DemandedElts, unsigned Depth) const { unsigned BitWidth = Op.getScalarValueSizeInBits(); - KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + Known = KnownBits(BitWidth); // Don't know anything. if (Depth == 6) return; // Limit search depth. - APInt KnownZero2, KnownOne2; + KnownBits Known2; unsigned NumElts = DemandedElts.getBitWidth(); if (!DemandedElts) @@ -2019,35 +1998,35 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, switch (Opcode) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); - KnownZero = ~KnownOne; + Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); + Known.Zero = ~Known.One; break; case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. assert(NumElts == Op.getValueType().getVectorNumElements() && "Unexpected vector size"); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { if (!DemandedElts[i]) continue; SDValue SrcOp = Op.getOperand(i); - computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1); + computeKnownBits(SrcOp, Known2, Depth + 1); // BUILD_VECTOR can implicitly truncate sources, we must handle this. if (SrcOp.getValueSizeInBits() != BitWidth) { assert(SrcOp.getValueSizeInBits() > BitWidth && "Expected BUILD_VECTOR implicit truncation"); - KnownOne2 = KnownOne2.trunc(BitWidth); - KnownZero2 = KnownZero2.trunc(BitWidth); + Known2.One = Known2.One.trunc(BitWidth); + Known2.Zero = Known2.Zero.trunc(BitWidth); } // Known bits are the values that are shared by every demanded element. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; } break; @@ -2055,7 +2034,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Collect the known bits that are shared by every vector element referenced // by the shuffle. APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); for (unsigned i = 0; i != NumElts; ++i) { @@ -2066,8 +2045,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (M < 0) { // For UNDEF elements, we don't know anything about the common state of // the shuffle result. - KnownOne.clearAllBits(); - KnownZero.clearAllBits(); + Known.One.clearAllBits(); + Known.Zero.clearAllBits(); DemandedLHS.clearAllBits(); DemandedRHS.clearAllBits(); break; @@ -2081,24 +2060,24 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Known bits are the values that are shared by every demanded element. if (!!DemandedLHS) { SDValue LHS = Op.getOperand(0); - computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); - computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } break; } case ISD::CONCAT_VECTORS: { // Split DemandedElts and test each of the demanded subvectors. - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVectorVT = Op.getOperand(0).getValueType(); unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); unsigned NumSubVectors = Op.getNumOperands(); @@ -2107,12 +2086,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, DemandedSub = DemandedSub.trunc(NumSubVectorElts); if (!!DemandedSub) { SDValue Sub = Op.getOperand(i); - computeKnownBits(Sub, KnownZero2, KnownOne2, DemandedSub, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(Sub, Known2, DemandedSub, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; } break; @@ -2127,9 +2106,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); - computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1); + computeKnownBits(Src, Known, DemandedSrc, Depth + 1); } else { - computeKnownBits(Src, KnownZero, KnownOne, Depth + 1); + computeKnownBits(Src, Known, Depth + 1); } break; } @@ -2143,7 +2122,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Fast handling of 'identity' bitcasts. if (BitWidth == SubBitWidth) { - computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1); + computeKnownBits(N0, Known, DemandedElts, Depth + 1); break; } @@ -2167,10 +2146,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, SubDemandedElts.setBit(i * SubScale); for (unsigned i = 0; i != SubScale; ++i) { - computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts.shl(i), + computeKnownBits(N0, Known2, SubDemandedElts.shl(i), Depth + 1); - KnownOne |= KnownOne2.zext(BitWidth).shl(SubBitWidth * i); - KnownZero |= KnownZero2.zext(BitWidth).shl(SubBitWidth * i); + Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); + Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); } } @@ -2187,16 +2166,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (DemandedElts[i]) SubDemandedElts.setBit(i / SubScale); - computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1); + computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Offset = (i % SubScale) * BitWidth; - KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth); - KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth); + Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); + Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; } } @@ -2204,101 +2183,91 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + Known.Zero |= Known2.Zero; break; case ISD::OR: - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + Known.One |= Known2.One; break; case ISD::XOR: { - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = KnownZeroOut; + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = KnownZeroOut; break; } case ISD::MUL: { - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // If low bits are zero in either operand, output low known-0 bits. // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), + Known.One.clearAllBits(); + unsigned TrailZ = Known.Zero.countTrailingOnes() + + Known2.Zero.countTrailingOnes(); + unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() + + Known2.Zero.countLeadingOnes(), BitWidth) - BitWidth; - KnownZero.clearAllBits(); - KnownZero.setLowBits(std::min(TrailZ, BitWidth)); - KnownZero.setHighBits(std::min(LeadZ, BitWidth)); + Known.Zero.clearAllBits(); + Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); + Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); break; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned LeadZ = KnownZero2.countLeadingOnes(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + unsigned LeadZ = Known2.Zero.countLeadingOnes(); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero.setHighBits(LeadZ); + Known.Zero.setHighBits(LeadZ); break; } case ISD::SELECT: - computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(2), Known, Depth+1); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), Known2, Depth+1); // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(3), Known, Depth+1); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; - computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(2), Known2, Depth+1); // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SMULO: case ISD::UMULO: @@ -2311,49 +2280,46 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero << *ShAmt; - KnownOne = KnownOne << *ShAmt; + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero <<= *ShAmt; + Known.One <<= *ShAmt; // Low bits are known zero. - KnownZero.setLowBits(ShAmt->getZExtValue()); + Known.Zero.setLowBits(ShAmt->getZExtValue()); } break; case ISD::SRL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero.lshrInPlace(*ShAmt); - KnownOne.lshrInPlace(*ShAmt); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero.lshrInPlace(*ShAmt); + Known.One.lshrInPlace(*ShAmt); // High bits are known zero. - KnownZero.setHighBits(ShAmt->getZExtValue()); + Known.Zero.setHighBits(ShAmt->getZExtValue()); } break; case ISD::SRA: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero.lshrInPlace(*ShAmt); - KnownOne.lshrInPlace(*ShAmt); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero.lshrInPlace(*ShAmt); + Known.One.lshrInPlace(*ShAmt); // If we know the value of the sign bit, then we know it is copied across // the high bits by the shift amount. APInt SignMask = APInt::getSignMask(BitWidth); SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask. - if (KnownZero.intersects(SignMask)) { - KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. - } else if (KnownOne.intersects(SignMask)) { - KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one. + if (Known.Zero.intersects(SignMask)) { + Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. + } else if (Known.One.intersects(SignMask)) { + Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one. } } break; @@ -2374,31 +2340,44 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (NewBits.getBoolValue()) InputDemandedBits |= InSignMask; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownOne &= InputDemandedBits; - KnownZero &= InputDemandedBits; + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.One &= InputDemandedBits; + Known.Zero &= InputDemandedBits; // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - if (KnownZero.intersects(InSignMask)) { // Input sign bit known clear - KnownZero |= NewBits; - KnownOne &= ~NewBits; - } else if (KnownOne.intersects(InSignMask)) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; + if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear + Known.Zero |= NewBits; + Known.One &= ~NewBits; + } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set + Known.One |= NewBits; + Known.Zero &= ~NewBits; } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; + Known.Zero &= ~NewBits; + Known.One &= ~NewBits; } break; } case ISD::CTTZ: - case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: { + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleTZ = Known2.One.countTrailingZeros(); + unsigned LowBits = Log2_32(PossibleTZ) + 1; + Known.Zero.setBitsFrom(LowBits); + break; + } case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTLZ_ZERO_UNDEF: { + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleLZ = Known2.One.countLeadingZeros(); + unsigned LowBits = Log2_32(PossibleLZ) + 1; + Known.Zero.setBitsFrom(LowBits); + break; + } case ISD::CTPOP: { - KnownZero.setBitsFrom(Log2_32(BitWidth)+1); + Known.Zero.setBitsFrom(Log2_32(BitWidth)+1); break; } case ISD::LOAD: { @@ -2407,36 +2386,35 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); - KnownZero.setBitsFrom(MemBits); + Known.Zero.setBitsFrom(MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { if (LD->getExtensionType() == ISD::NON_EXTLOAD) - computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne); + computeKnownBitsFromRangeMetadata(*Ranges, Known); } break; } case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, + Known.Zero = Known.Zero.trunc(InBits); + Known.One = Known.One.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts.zext(InVT.getVectorNumElements()), Depth + 1); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - KnownZero.setBitsFrom(InBits); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + Known.Zero.setBitsFrom(InBits); break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - KnownZero.setBitsFrom(InBits); + Known.Zero = Known.Zero.trunc(InBits); + Known.One = Known.One.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + Known.Zero.setBitsFrom(InBits); break; } // TODO ISD::SIGN_EXTEND_VECTOR_INREG @@ -2444,49 +2422,47 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); + Known.Zero = Known.Zero.trunc(InBits); + Known.One = Known.One.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. - KnownZero = KnownZero.sext(BitWidth); - KnownOne = KnownOne.sext(BitWidth); + Known.Zero = Known.Zero.sext(BitWidth); + Known.One = Known.One.sext(BitWidth); break; } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + Known.Zero = Known.Zero.trunc(InBits); + Known.One = Known.One.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, Depth+1); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); break; } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.zext(InBits); - KnownOne = KnownOne.zext(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); + Known.Zero = Known.Zero.zext(InBits); + Known.One = Known.One.zext(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero = Known.Zero.trunc(BitWidth); + Known.One = Known.One.trunc(BitWidth); break; } case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - KnownZero |= (~InMask); - KnownOne &= (~KnownZero); + computeKnownBits(Op.getOperand(0), Known, Depth+1); + Known.Zero |= (~InMask); + Known.One &= (~Known.Zero); break; } case ISD::FGETSIGN: // All bits are zero except the low bit. - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; case ISD::USUBO: case ISD::SSUBO: @@ -2495,7 +2471,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; } LLVM_FALLTHROUGH; @@ -2509,16 +2485,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is // from [0-C]. - if ((KnownZero2 & MaskV) == MaskV) { + if ((Known2.Zero & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); // Top bits known zero. - KnownZero.setHighBits(NLZ2); + Known.Zero.setHighBits(NLZ2); } } } @@ -2526,27 +2502,26 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // If low bits are know to be zero in both operands, then we know they are // going to be 0 in the result. Both addition and complement operations // preserve the low zero bits. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + unsigned KnownZeroLow = Known2.Zero.countTrailingOnes(); if (KnownZeroLow == 0) break; - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); KnownZeroLow = std::min(KnownZeroLow, - KnownZero2.countTrailingOnes()); - KnownZero.setBits(0, KnownZeroLow); + Known2.Zero.countTrailingOnes()); + Known.Zero.setLowBits(KnownZeroLow); break; } case ISD::UADDO: case ISD::SADDO: + case ISD::ADDCARRY: if (Op.getResNo() == 1) { // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; } LLVM_FALLTHROUGH; @@ -2560,31 +2535,30 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // known to be clear. For example, if one input has the top 10 bits clear // and the other has the top 8 bits clear, we know the top 7 bits of the // output must be clear. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned KnownZeroHigh = KnownZero2.countLeadingOnes(); - unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + unsigned KnownZeroHigh = Known2.Zero.countLeadingOnes(); + unsigned KnownZeroLow = Known2.Zero.countTrailingOnes(); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); KnownZeroHigh = std::min(KnownZeroHigh, - KnownZero2.countLeadingOnes()); + Known2.Zero.countLeadingOnes()); KnownZeroLow = std::min(KnownZeroLow, - KnownZero2.countTrailingOnes()); + Known2.Zero.countTrailingOnes()); - if (Opcode == ISD::ADDE) { - // With ADDE, a carry bit may be added in, so we can only use this - // information if we know (at least) that the low two bits are clear. - // We then return to the caller that the low bit is unknown but that - // other bits are known zero. + if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) { + // With ADDE and ADDCARRY, a carry bit may be added in, so we can only + // use this information if we know (at least) that the low two bits are + // clear. We then return to the caller that the low bit is unknown but + // that other bits are known zero. if (KnownZeroLow >= 2) - KnownZero.setBits(1, KnownZeroLow); + Known.Zero.setBits(1, KnownZeroLow); break; } - KnownZero.setLowBits(KnownZeroLow); + Known.Zero.setLowBits(KnownZeroLow); if (KnownZeroHigh > 1) - KnownZero.setHighBits(KnownZeroHigh - 1); + Known.Zero.setHighBits(KnownZeroHigh - 1); break; } case ISD::SREM: @@ -2592,23 +2566,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // The low bits of the first operand are unchanged by the srem. - KnownZero = KnownZero2 & LowBits; - KnownOne = KnownOne2 & LowBits; + Known.Zero = Known2.Zero & LowBits; + Known.One = Known2.One & LowBits; // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) - KnownZero |= ~LowBits; + if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits)) + Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) - KnownOne |= ~LowBits; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0)) + Known.One |= ~LowBits; + assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); } } break; @@ -2617,42 +2590,39 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // The upper bits are all zero, the lower ones are unchanged. - KnownZero = KnownZero2 | ~LowBits; - KnownOne = KnownOne2 & LowBits; + Known.Zero = Known2.Zero | ~LowBits; + Known.One = Known2.One & LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - - uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); - KnownOne.clearAllBits(); - KnownZero.clearAllBits(); - KnownZero.setHighBits(Leaders); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + + uint32_t Leaders = std::max(Known.Zero.countLeadingOnes(), + Known2.Zero.countLeadingOnes()); + Known.One.clearAllBits(); + Known.Zero.clearAllBits(); + Known.Zero.setHighBits(Leaders); break; } case ISD::EXTRACT_ELEMENT: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), Known, Depth+1); const unsigned Index = Op.getConstantOperandVal(1); const unsigned BitWidth = Op.getValueSizeInBits(); // Remove low part of known bits mask - KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth); - KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth); + Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth); + Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth); // Remove high part of known bit mask - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); + Known.Zero = Known.Zero.trunc(BitWidth); + Known.One = Known.One.trunc(BitWidth); break; } case ISD::EXTRACT_VECTOR_ELT: { @@ -2665,22 +2635,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know // anything about the extended bits. if (BitWidth > EltBitWidth) { - KnownZero = KnownZero.trunc(EltBitWidth); - KnownOne = KnownOne.trunc(EltBitWidth); + Known.Zero = Known.Zero.trunc(EltBitWidth); + Known.One = Known.One.trunc(EltBitWidth); } ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { // If we know the element index, just demand that vector element. unsigned Idx = ConstEltNo->getZExtValue(); APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); - computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1); + computeKnownBits(InVec, Known, DemandedElt, Depth + 1); } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1); + computeKnownBits(InVec, Known, Depth + 1); } if (BitWidth > EltBitWidth) { - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); } break; } @@ -2693,117 +2663,110 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { // If we know the element index, split the demand between the // source vector and the inserted element. - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth); unsigned EltIdx = CEltNo->getZExtValue(); // If we demand the inserted element then add its common known bits. if (DemandedElts[EltIdx]) { - computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1); - KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth()); - KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());; + computeKnownBits(InVal, Known2, Depth + 1); + Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());; } // If we demand the source vector then add its common known bits, ensuring // that we don't demand the inserted element. APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); if (!!VectorElts) { - computeKnownBits(InVec, KnownZero2, KnownOne2, VectorElts, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(InVec, Known2, VectorElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1); - computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1); - KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth()); - KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());; + computeKnownBits(InVec, Known, Depth + 1); + computeKnownBits(InVal, Known2, Depth + 1); + Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());; } break; } case ISD::BITREVERSE: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZero = KnownZero2.reverseBits(); - KnownOne = KnownOne2.reverseBits(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known.Zero = Known2.Zero.reverseBits(); + Known.One = Known2.One.reverseBits(); break; } case ISD::BSWAP: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZero = KnownZero2.byteSwap(); - KnownOne = KnownOne2.byteSwap(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known.Zero = Known2.Zero.byteSwap(); + Known.One = Known2.One.byteSwap(); break; } case ISD::ABS: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // If the source's MSB is zero then we know the rest of the bits already. - if (KnownZero2[BitWidth - 1]) { - KnownZero = KnownZero2; - KnownOne = KnownOne2; + if (Known2.isNonNegative()) { + Known.Zero = Known2.Zero; + Known.One = Known2.One; break; } // We only know that the absolute values's MSB will be zero iff there is // a set bit that isn't the sign bit (otherwise it could be INT_MIN). - KnownOne2.clearBit(BitWidth - 1); - if (KnownOne2.getBoolValue()) { - KnownZero = APInt::getSignMask(BitWidth); + Known2.One.clearSignBit(); + if (Known2.One.getBoolValue()) { + Known.Zero = APInt::getSignMask(BitWidth); break; } break; } case ISD::UMIN: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // UMIN - we know that the result will have the maximum of the // known zero leading bits of the inputs. - unsigned LeadZero = KnownZero.countLeadingOnes(); - LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes()); + unsigned LeadZero = Known.Zero.countLeadingOnes(); + LeadZero = std::max(LeadZero, Known2.Zero.countLeadingOnes()); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; - KnownZero.setHighBits(LeadZero); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + Known.Zero.setHighBits(LeadZero); break; } case ISD::UMAX: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // UMAX - we know that the result will have the maximum of the // known one leading bits of the inputs. - unsigned LeadOne = KnownOne.countLeadingOnes(); - LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes()); + unsigned LeadOne = Known.One.countLeadingOnes(); + LeadOne = std::max(LeadOne, Known2.One.countLeadingOnes()); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; - KnownOne.setHighBits(LeadOne); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + Known.One.setHighBits(LeadOne); break; } case ISD::SMIN: case ISD::SMAX: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; break; } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { // The low bits are known zero if the pointer is aligned. - KnownZero.setLowBits(Log2_32(Align)); + Known.Zero.setLowBits(Log2_32(Align)); break; } break; @@ -2816,12 +2779,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, DemandedElts, - *this, Depth); + TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); break; } - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, @@ -2830,28 +2792,28 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, if (isNullConstant(N1)) return OFK_Never; - APInt N1Zero, N1One; - computeKnownBits(N1, N1Zero, N1One); - if (N1Zero.getBoolValue()) { - APInt N0Zero, N0One; - computeKnownBits(N0, N0Zero, N0One); + KnownBits N1Known; + computeKnownBits(N1, N1Known); + if (N1Known.Zero.getBoolValue()) { + KnownBits N0Known; + computeKnownBits(N0, N0Known); bool overflow; - (void)(~N0Zero).uadd_ov(~N1Zero, overflow); + (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow); if (!overflow) return OFK_Never; } // mulhi + 1 never overflow if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && - (~N1Zero & 0x01) == ~N1Zero) + (~N1Known.Zero & 0x01) == ~N1Known.Zero) return OFK_Never; if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { - APInt N0Zero, N0One; - computeKnownBits(N0, N0Zero, N0One); + KnownBits N0Known; + computeKnownBits(N0, N0Known); - if ((~N0Zero & 0x01) == ~N0Zero) + if ((~N0Known.Zero & 0x01) == ~N0Known.Zero) return OFK_Never; } @@ -2895,10 +2857,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // to handle some common cases. // Fall back to computeKnownBits to catch other known cases. - APInt KnownZero, KnownOne; - computeKnownBits(Val, KnownZero, KnownOne); - return (KnownZero.countPopulation() == BitWidth - 1) && - (KnownOne.countPopulation() == 1); + KnownBits Known; + computeKnownBits(Val, Known); + return (Known.Zero.countPopulation() == BitWidth - 1) && + (Known.One.countPopulation() == 1); } unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { @@ -2971,7 +2933,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(Tmp, Tmp2); case ISD::SRA: - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); // SRA X, C -> adds C sign bits. if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { APInt ShiftVal = C->getAPIntValue(); @@ -3068,17 +3030,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Special case decrementing a value (ADD X, -1): if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { - APInt KnownZero, KnownOne; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownBits Known; + computeKnownBits(Op.getOperand(0), Known, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return VTBits; // If we are subtracting one from a positive number, there is no carry // out of the result. - if (KnownZero.isNegative()) + if (Known.isNonNegative()) return Tmp; } @@ -3093,16 +3055,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Handle NEG. if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) if (CLHS->isNullValue()) { - APInt KnownZero, KnownOne; - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + KnownBits Known; + computeKnownBits(Op.getOperand(1), Known, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return VTBits; // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. - if (KnownZero.isNegative()) + if (Known.isNonNegative()) return Tmp2; // Otherwise, we treat this like a SUB. @@ -3134,6 +3096,44 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // result. Otherwise it gives either negative or > bitwidth result return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } + case ISD::INSERT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue InVal = Op.getOperand(1); + SDValue EltNo = Op.getOperand(2); + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + + ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { + // If we know the element index, split the demand between the + // source vector and the inserted element. + unsigned EltIdx = CEltNo->getZExtValue(); + + // If we demand the inserted element then get its sign bits. + Tmp = UINT_MAX; + if (DemandedElts[EltIdx]) { + // TODO - handle implicit truncation of inserted elements. + if (InVal.getScalarValueSizeInBits() != VTBits) + break; + Tmp = ComputeNumSignBits(InVal, Depth + 1); + } + + // If we demand the source vector then get its sign bits, and determine + // the minimum. + APInt VectorElts = DemandedElts; + VectorElts.clearBit(EltIdx); + if (!!VectorElts) { + Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + } else { + // Unknown element index, so ignore DemandedElts and demand them all. + Tmp = ComputeNumSignBits(InVec, Depth + 1); + Tmp2 = ComputeNumSignBits(InVal, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } case ISD::EXTRACT_VECTOR_ELT: { SDValue InVec = Op.getOperand(0); SDValue EltNo = Op.getOperand(1); @@ -3199,14 +3199,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. - APInt KnownZero, KnownOne; - computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth); + KnownBits Known; + computeKnownBits(Op, Known, DemandedElts, Depth); APInt Mask; - if (KnownZero.isNegative()) { // sign bit is 0 - Mask = KnownZero; - } else if (KnownOne.isNegative()) { // sign bit is 1; - Mask = KnownOne; + if (Known.isNonNegative()) { // sign bit is 0 + Mask = Known.Zero; + } else if (Known.isNegative()) { // sign bit is 1; + Mask = Known.One; } else { // Nothing known. return FirstAnswer; @@ -3239,8 +3239,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { if (getTarget().Options.NoNaNsFPMath) return true; - if (const BinaryWithFlagsSDNode *BF = dyn_cast<BinaryWithFlagsSDNode>(Op)) - return BF->Flags.hasNoNaNs(); + if (Op->getFlags().hasNoNaNs()) + return true; // If the value is a constant, we can obviously see if it is a NaN or not. if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) @@ -3284,11 +3284,10 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); - APInt AZero, AOne; - APInt BZero, BOne; - computeKnownBits(A, AZero, AOne); - computeKnownBits(B, BZero, BOne); - return (AZero | BZero).isAllOnesValue(); + KnownBits AKnown, BKnown; + computeKnownBits(A, AKnown); + computeKnownBits(B, BKnown); + return (AKnown.Zero | BKnown.Zero).isAllOnesValue(); } static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, @@ -3357,7 +3356,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue Operand) { + SDValue Operand, const SDNodeFlags Flags) { // Constant fold unary operations with an integer constant operand. Even // opaque constant will be folded, because the folding of unary operations // doesn't create new constants with different values. Nevertheless, the @@ -3683,8 +3682,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), - Operand.getNode()->getOperand(0), - &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags); + Operand.getNode()->getOperand(0), + Operand.getNode()->getFlags()); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getNode()->getOperand(0); break; @@ -3701,10 +3700,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + E->intersectFlagsWith(Flags); return SDValue(E, 0); + } N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { @@ -3883,7 +3885,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, - const SDNodeFlags *Flags) { + const SDNodeFlags Flags) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. @@ -3975,8 +3977,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue N1, SDValue N2, - const SDNodeFlags *Flags) { + SDValue N1, SDValue N2, const SDNodeFlags Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); @@ -4161,7 +4162,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) { unsigned FromBits = EVT.getScalarSizeInBits(); Val <<= Val.getBitWidth() - FromBits; - Val = Val.ashr(Val.getBitWidth() - FromBits); + Val.ashrInPlace(Val.getBitWidth() - FromBits); return getConstant(Val, DL, ConstantVT); }; @@ -4443,21 +4444,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Memoize this node if possible. SDNode *N; SDVTList VTs = getVTList(VT); + SDValue Ops[] = {N1, N2}; if (VT != MVT::Glue) { - SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { - if (Flags) - E->intersectFlagsWith(Flags); + E->intersectFlagsWith(Flags); return SDValue(E, 0); } - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); + createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); } InsertNode(N); @@ -5979,7 +5982,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) { + ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -6641,14 +6644,13 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops, - const SDNodeFlags *Flags) { + const SDNodeFlags Flags) { if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { - if (Flags) - E->intersectFlagsWith(Flags); + E->intersectFlagsWith(Flags); return E; } } @@ -7392,15 +7394,8 @@ bool SDNode::hasPredecessor(const SDNode *N) const { return hasPredecessorHelper(N, Visited, Worklist); } -const SDNodeFlags *SDNode::getFlags() const { - if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) - return &FlagsNode->Flags; - return nullptr; -} - -void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) { - if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) - FlagsNode->Flags.intersectWith(Flags); +void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { + this->Flags.intersectWith(Flags); } SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6a737ed84ea4..ba9e11798f15 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -83,20 +83,6 @@ LimitFPPrecision("limit-float-precision", "for some float libcalls"), cl::location(LimitFloatPrecision), cl::init(0)); - -/// Minimum jump table density for normal functions. -static cl::opt<unsigned> -JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, - cl::desc("Minimum density for building a jump table in " - "a normal function")); - -/// Minimum jump table density for -Os or -Oz functions. -static cl::opt<unsigned> -OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, - cl::desc("Minimum density for building a jump table in " - "an optsize function")); - - // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -364,7 +350,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT ValueSVT = ValueVT.getVectorElementType(); if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) - Val = DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) + : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); return DAG.getBuildVector(ValueVT, DL, Val); } @@ -557,10 +544,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, Val = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - - Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } + assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); Parts[0] = Val; return; } @@ -675,7 +661,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, unsigned RegSize = RegisterVT.getSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; - unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + unsigned NumZeroBits = LOI->Known.Zero.countLeadingOnes(); if (NumZeroBits == RegSize) { // The current value is a zero. @@ -1349,7 +1335,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], getCurSDLoc()), - &Flags); + Flags); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), // FIXME: better loc info would be nice. @@ -2589,7 +2575,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, &Flags); + Op1, Op2, Flags); setValue(&I, BinNodeValue); } @@ -2642,7 +2628,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, - &Flags); + Flags); setValue(&I, Res); } @@ -2654,7 +2640,7 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { Flags.setExact(isa<PossiblyExactOperator>(&I) && cast<PossiblyExactOperator>(&I)->isExact()); setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, - Op2, &Flags)); + Op2, Flags)); } void SelectionDAGBuilder::visitICmp(const User &I) { @@ -3266,7 +3252,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, - DAG.getConstant(Offset, dl, N.getValueType()), &Flags); + DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { MVT PtrTy = @@ -3296,7 +3282,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) Flags.setNoUnsignedWrap(true); - N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); continue; } @@ -3374,7 +3360,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); + DAG.getIntPtrConstant(StackAlign - 1, dl), Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, @@ -3478,7 +3464,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), - &Flags); + Flags); auto MMOFlags = MachineMemOperand::MONone; if (isVolatile) MMOFlags |= MachineMemOperand::MOVolatile; @@ -3633,7 +3619,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); + DAG.getConstant(Offsets[i], dl, PtrVT), Flags); SDValue St = DAG.getStore( Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); @@ -7897,7 +7883,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT), &Flags); + PtrVT), Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), @@ -8187,15 +8173,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); // Set up the incoming argument description vector. - unsigned Idx = 0; for (const Argument &Arg : F.args()) { - ++Idx; + unsigned ArgNo = Arg.getArgNo(); SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); bool isArgValueUsed = !Arg.use_empty(); unsigned PartBase = 0; Type *FinalType = Arg.getType(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::ByVal)) FinalType = cast<PointerType>(FinalType)->getElementType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg()); @@ -8206,11 +8191,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) Flags.setSExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) { + if (Arg.hasAttribute(Attribute::InReg)) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (F.getCallingConv() == CallingConv::X86_VectorCall && @@ -8223,15 +8208,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set InReg Flag Flags.setInReg(); } - if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) + if (Arg.hasAttribute(Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) + if (Arg.hasAttribute(Attribute::SwiftSelf)) Flags.setSwiftSelf(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) + if (Arg.hasAttribute(Attribute::SwiftError)) Flags.setSwiftError(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + if (Arg.hasAttribute(Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -8242,7 +8227,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. - if (Idx == 1) + if (ArgNo == 0) Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { @@ -8252,13 +8237,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; - if (F.getParamAlignment(Idx)) - FrameAlign = F.getParamAlignment(Idx); + if (Arg.getParamAlignment()) + FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } - if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); @@ -8270,7 +8255,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, - Idx-1, PartBase+i*RegisterVT.getStoreSize()); + ArgNo, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -8311,7 +8296,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set up the argument values. unsigned i = 0; - Idx = 0; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. @@ -8333,14 +8317,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. - // Idx indexes LLVM arguments. Don't touch it. ++i; } SmallVector<SDValue, 4> Chains; DenseMap<int, int> ArgCopyElisionFrameIndexMap; for (const Argument &Arg : F.args()) { - ++Idx; SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); @@ -8362,7 +8344,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // debugging information. bool isSwiftErrorArg = TLI->supportSwiftError() && - F.getAttributes().hasAttribute(Idx, Attribute::SwiftError); + Arg.hasAttribute(Attribute::SwiftError); if (!ArgHasUses && !isSwiftErrorArg) { SDB->setUnusedArgValue(&Arg, InVals[i]); @@ -8382,9 +8364,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // function. if (ArgHasUses || isSwiftErrorArg) { Optional<ISD::NodeType> AssertOp; - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) AssertOp = ISD::AssertSext; - else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + else if (Arg.hasAttribute(Attribute::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, @@ -8589,13 +8571,10 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { HasTailCall = true; } -bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, - const SmallVectorImpl<unsigned> &TotalCases, - unsigned First, unsigned Last, - unsigned Density) const { +uint64_t +SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters, + unsigned First, unsigned Last) const { assert(Last >= First); - assert(TotalCases[Last] >= TotalCases[First]); - const APInt &LowCase = Clusters[First].Low->getValue(); const APInt &HighCase = Clusters[Last].High->getValue(); assert(LowCase.getBitWidth() == HighCase.getBitWidth()); @@ -8604,26 +8583,17 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, // comparison to lower. We should discriminate against such consecutive ranges // in jump tables. - uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); - uint64_t Range = Diff + 1; + return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1; +} +uint64_t SelectionDAGBuilder::getJumpTableNumCases( + const SmallVectorImpl<unsigned> &TotalCases, unsigned First, + unsigned Last) const { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); uint64_t NumCases = TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); - - assert(NumCases < UINT64_MAX / 100); - assert(Range >= NumCases); - - return NumCases * 100 >= Range * Density; -} - -static inline bool areJTsAllowed(const TargetLowering &TLI, - const SwitchInst *SI) { - const Function *Fn = SI->getParent()->getParent(); - if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") - return false; - - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + return NumCases; } bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, @@ -8662,10 +8632,11 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumDests = JTProbs.size(); - if (isSuitableForBitTests(NumDests, NumCmps, - Clusters[First].Low->getValue(), - Clusters[Last].High->getValue())) { + if (TLI.isSuitableForBitTests( + NumDests, NumCmps, Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), DAG.getDataLayout())) { // Clusters[First..Last] should be lowered as bit tests instead. return false; } @@ -8686,7 +8657,6 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, } JumpTableMBB->normalizeSuccProbs(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) ->createJumpTableIndex(Table); @@ -8715,17 +8685,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI, SI)) + if (!TLI.areJTsAllowed(SI->getParent()->getParent())) return; - const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize(); - const int64_t N = Clusters.size(); const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; - const unsigned MaxJumpTableSize = - OptForSize || TLI.getMaximumJumpTableSize() == 0 - ? UINT_MAX : TLI.getMaximumJumpTableSize(); if (N < 2 || N < MinJumpTableEntries) return; @@ -8740,15 +8705,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, TotalCases[i] += TotalCases[i - 1]; } - const unsigned MinDensity = - OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; - // Cheap case: the whole range may be suitable for jump table. - unsigned JumpTableSize = (Clusters[N - 1].High->getValue() - - Clusters[0].Low->getValue()) - .getLimitedValue(UINT_MAX - 1) + 1; - if (JumpTableSize <= MaxJumpTableSize && - isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) { + uint64_t Range = getJumpTableRange(Clusters,0, N - 1); + uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; @@ -8801,11 +8763,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - JumpTableSize = (Clusters[j].High->getValue() - - Clusters[i].Low->getValue()) - .getLimitedValue(UINT_MAX - 1) + 1; - if (JumpTableSize <= MaxJumpTableSize && - isDense(Clusters, TotalCases, i, j, MinDensity)) { + uint64_t Range = getJumpTableRange(Clusters, i, j); + uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; int64_t NumEntries = j - i + 1; @@ -8849,36 +8811,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, Clusters.resize(DstIndex); } -bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { - // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); - uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; - return Range <= BW; -} - -bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, - unsigned NumCmps, - const APInt &Low, - const APInt &High) { - // FIXME: I don't think NumCmps is the correct metric: a single case and a - // range of cases both require only one branch to lower. Just looking at the - // number of clusters and destinations should be enough to decide whether to - // build bit tests. - - // To lower a range with bit tests, the range must fit the bitwidth of a - // machine word. - if (!rangeFitsInWord(Low, High)) - return false; - - // Decide whether it's profitable to lower this range with bit tests. Each - // destination requires a bit test and branch, and there is an overall range - // check branch. For a small number of clusters, separate comparisons might be - // cheaper, and for many destinations, splitting the range might be better. - return (NumDests == 1 && NumCmps >= 3) || - (NumDests == 2 && NumCmps >= 5) || - (NumDests == 3 && NumCmps >= 6); -} - bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, @@ -8900,16 +8832,17 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, APInt High = Clusters[Last].High->getValue(); assert(Low.slt(High)); - if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL)) return false; APInt LowBound; APInt CmpRange; - const int BitWidth = DAG.getTargetLoweringInfo() - .getPointerTy(DAG.getDataLayout()) - .getSizeInBits(); - assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); + const int BitWidth = TLI.getPointerTy(DL).getSizeInBits(); + assert(TLI.rangeFitsInWord(Low, High, DL) && + "Case range must fit in bit mask!"); // Check if the clusters cover a contiguous range such that no value in the // range will jump to the default statement. @@ -8999,7 +8932,9 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); + const DataLayout &DL = DAG.getDataLayout(); + + EVT PTy = TLI.getPointerTy(DL); if (!TLI.isOperationLegal(ISD::SHL, PTy)) return; @@ -9030,8 +8965,8 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, // Try building a partition from Clusters[i..j]. // Check the range. - if (!rangeFitsInWord(Clusters[i].Low->getValue(), - Clusters[j].High->getValue())) + if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue(), DL)) continue; // Check nbr of destinations and cluster types. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9e34590cc39c..9e9989058ae5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -304,10 +304,13 @@ private: BranchProbability DefaultProb; }; - /// Check whether a range of clusters is dense enough for a jump table. - bool isDense(const CaseClusterVector &Clusters, - const SmallVectorImpl<unsigned> &TotalCases, - unsigned First, unsigned Last, unsigned MinDensity) const; + /// Return the range of value in [First..Last]. + uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First, + unsigned Last) const; + + /// Return the number of cases in [First..Last]. + uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases, + unsigned First, unsigned Last) const; /// Build a jump table cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. @@ -319,14 +322,6 @@ private: void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, MachineBasicBlock *DefaultMBB); - /// Check whether the range [Low,High] fits in a machine word. - bool rangeFitsInWord(const APInt &Low, const APInt &High); - - /// Check whether these clusters are suitable for lowering with bit tests based - /// on the number of destinations, comparison metric, and range. - bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, - const APInt &Low, const APInt &High); - /// Build a bit test cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, @@ -777,6 +772,11 @@ public: bool VarArgDisallowed, bool ForceVoidReturnTy); + /// Returns the type of FrameIndex and TargetFrameIndex nodes. + MVT getFrameIndexTy() { + return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()); + } + private: // Terminator instructions. void visitRet(const ReturnInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 488c60a28ffb..26dd45ef933f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -227,6 +227,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; + case ISD::ADDCARRY: return "addcarry"; case ISD::SADDO: return "saddo"; case ISD::UADDO: return "uaddo"; case ISD::SSUBO: return "ssubo"; @@ -235,6 +236,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UMULO: return "umulo"; case ISD::SUBC: return "subc"; case ISD::SUBE: return "sube"; + case ISD::SUBCARRY: return "subcarry"; case ISD::SHL_PARTS: return "shl_parts"; case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e21204dbb966..3aabdaeaa094 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -73,6 +73,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -592,13 +593,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MRI.replaceRegWith(From, To); } - if (TLI->hasCopyImplyingStackAdjustment(MF)) - MFI.setHasCopyImplyingStackAdjustment(true); - - // Freeze the set of reserved registers now that MachineFrameInfo has been - // set up. All the information required by getReservedRegs() should be - // available now. - MRI.freezeReservedRegs(*MF); + TLI->finalizeLowering(*MF); // Release function-specific state. SDB and CurDAG are already cleared // at this point. @@ -650,8 +645,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { Worklist.push_back(CurDAG->getRoot().getNode()); - APInt KnownZero; - APInt KnownOne; + KnownBits Known; do { SDNode *N = Worklist.pop_back_val(); @@ -680,8 +674,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - CurDAG->computeKnownBits(Src, KnownZero, KnownOne); - FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); + CurDAG->computeKnownBits(Src, Known); + FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known); } while (!Worklist.empty()); } @@ -1930,11 +1924,11 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(LHS, KnownZero, KnownOne); + KnownBits Known; + CurDAG->computeKnownBits(LHS, Known); // If all the missing bits in the or are already known to be set, match! - if ((NeededMask & KnownOne) == NeededMask) + if (NeededMask.isSubsetOf(Known.One)) return true; // TODO: check to see if missing bits are just not demanded. diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index d27e2455978d..c0a5041b1395 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -242,7 +242,8 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, // Cache this slot so we find it when going through the normal // assignment loop. - SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType()); + SDValue Loc = + Builder.DAG.getTargetFrameIndex(*Index, Builder.getFrameIndexTy()); Builder.StatepointLowering.setLocation(Incoming, Loc); } @@ -343,7 +344,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, Builder); int Index = cast<FrameIndexSDNode>(Loc)->getIndex(); // We use TargetFrameIndex so that isel will not select it into LEA - Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); // TODO: We can create TokenFactor node instead of // chaining stores one after another, this may allow @@ -391,8 +392,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, // This handles allocas as arguments to the statepoint (this is only // really meaningful for a deopt value. For GC, we'd be trying to // relocate the address of the alloca itself?) + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Incoming.getValueType())); + Builder.getFrameIndexTy())); } else if (LiveInOnly) { // If this value is live in (not live-on-return, or live-through), we can // treat it the same way patchpoint treats it's "live in" values. We'll @@ -527,8 +530,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, SDValue Incoming = Builder.getValue(V); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { // This handles allocas as arguments to the statepoint + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Incoming.getValueType())); + Builder.getFrameIndexTy())); } } @@ -949,8 +954,8 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { return; } - SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation, - SD.getValueType()); + SDValue SpillSlot = + DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy()); // Be conservative: flush all pending loads // TODO: Probably we can be less restrictive on this, @@ -958,7 +963,9 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { SDValue Chain = getRoot(); SDValue SpillLoad = - DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + Relocate.getType()), + getCurSDLoc(), Chain, SpillSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), *DerivedPtrLocation)); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 136dec873cb8..2d39ecd9779b 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -27,6 +27,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -109,8 +110,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS, IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned); IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError); - // FIXME: getParamAlignment is off by one from argument index. - Alignment = CS->getParamAlignment(ArgIdx + 1); + Alignment = CS->getParamAlignment(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -437,10 +437,9 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const { SDValue Op = User->getOperand(OpIdx); - APInt KnownZero, KnownOne; + KnownBits Known; - if (!SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, - TLO, 0, true)) + if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true)) return false; @@ -488,10 +487,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, SelectionDAG &DAG = DCI.DAG; TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); - APInt KnownZero, KnownOne; + KnownBits Known; - bool Simplified = SimplifyDemandedBits(Op, DemandedMask, KnownZero, KnownOne, - TLO); + bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO); if (Simplified) DCI.CommitTargetLoweringOpt(TLO); return Simplified; @@ -501,13 +499,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the /// original and new nodes in Old and New. Otherwise, analyze the expression and -/// return a mask of KnownOne and KnownZero bits for the expression (used to -/// simplify the caller). The KnownZero/One bits may only be accurate for those -/// bits in the DemandedMask. +/// return a mask of Known bits for the expression (used to simplify the +/// caller). The Known bits may only be accurate for those bits in the +/// DemandedMask. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth, bool AssumeSingleUse) const { @@ -519,14 +516,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, auto &DL = TLO.DAG.getDataLayout(); // Don't know anything. - KnownZero = KnownOne = APInt(BitWidth, 0); + Known = KnownBits(BitWidth); // Other users may use these bits. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { - // If not at the root, Just compute the KnownZero/KnownOne bits to + // If not at the root, Just compute the Known bits to // simplify things downstream. - TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); + TLO.DAG.computeKnownBits(Op, Known, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -541,38 +538,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } - APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + KnownBits Known2, KnownOut; switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); - KnownZero = ~KnownOne; + Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); + Known.Zero = ~Known.One; return false; // Don't fall through, will infinitely loop. case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every constant vector element. - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); for (SDValue SrcOp : Op->ops()) { if (!isa<ConstantSDNode>(SrcOp)) { // We can only handle all constant values - bail out with no known bits. - KnownZero = KnownOne = APInt(BitWidth, 0); + Known = KnownBits(BitWidth); return false; } - KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue(); - KnownZero2 = ~KnownOne2; + Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue(); + Known2.Zero = ~Known2.One; // BUILD_VECTOR can implicitly truncate sources, we must handle this. - if (KnownOne2.getBitWidth() != BitWidth) { - assert(KnownOne2.getBitWidth() > BitWidth && - KnownZero2.getBitWidth() > BitWidth && + if (Known2.One.getBitWidth() != BitWidth) { + assert(Known2.getBitWidth() > BitWidth && "Expected BUILD_VECTOR implicit truncation"); - KnownOne2 = KnownOne2.trunc(BitWidth); - KnownZero2 = KnownZero2.trunc(BitWidth); + Known2.One = Known2.One.trunc(BitWidth); + Known2.Zero = Known2.Zero.trunc(BitWidth); } // Known bits are the values that are shared by every element. // TODO: support per-element known bits. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } return false; // Don't fall through, will infinitely loop. case ISD::AND: @@ -582,16 +578,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the RHS. if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) { SDValue Op0 = Op.getOperand(0); - APInt LHSZero, LHSOne; + KnownBits LHSKnown; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth); + TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); // If the LHS already has zeros where RHSC does, this and is dead. - if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op0); // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. - if (ShrinkDemandedConstant(Op, ~LHSZero & NewMask, TLO)) + if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO)) return true; // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its @@ -600,64 +596,56 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the xor. For example, for a 32-bit X: // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 if (isBitwiseNot(Op0) && Op0.hasOneUse() && - LHSOne == ~RHSC->getAPIntValue()) { + LHSKnown.One == ~RHSC->getAPIntValue()) { SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), Op0.getOperand(0), Op.getOperand(1)); return TLO.CombineTo(Op, Xor); } } - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, - KnownZero2, KnownOne2, TLO, Depth+1)) + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask, + Known2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + if (NewMask.isSubsetOf(Known2.Zero | Known.One)) return TLO.CombineTo(Op, Op.getOperand(0)); - if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + if (NewMask.isSubsetOf(Known.Zero | Known2.One)) return TLO.CombineTo(Op, Op.getOperand(1)); // If all of the demanded bits in the inputs are known zeros, return zero. - if ((NewMask & (KnownZero|KnownZero2)) == NewMask) + if (NewMask.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask, TLO)) + if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + Known.Zero |= Known2.Zero; break; case ISD::OR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, - KnownZero2, KnownOne2, TLO, Depth+1)) + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask, + Known2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) + if (NewMask.isSubsetOf(Known2.One | Known.Zero)) return TLO.CombineTo(Op, Op.getOperand(0)); - if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) - return TLO.CombineTo(Op, Op.getOperand(1)); - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) - return TLO.CombineTo(Op, Op.getOperand(0)); - if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + if (NewMask.isSubsetOf(Known.One | Known2.Zero)) return TLO.CombineTo(Op, Op.getOperand(1)); // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(Op, NewMask, TLO)) @@ -667,25 +655,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return true; // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + Known.One |= Known2.One; break; case ISD::XOR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, - KnownOne2, TLO, Depth+1)) + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if ((KnownZero & NewMask) == NewMask) + if (NewMask.isSubsetOf(Known.Zero)) return TLO.CombineTo(Op, Op.getOperand(0)); - if ((KnownZero2 & NewMask) == NewMask) + if (NewMask.isSubsetOf(Known2.Zero)) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) @@ -694,25 +680,25 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If all of the unknown bits are known to be zero on one side or the other // (but not both) turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((NewMask & ~KnownZero & ~KnownZero2) == 0) + if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); // If all of the demanded bits on one side are known, and all of the set // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side - if (KnownOne == KnownOne2) { // set bits are the same on both sides + if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side + if (Known.One == Known2.One) { // set bits are the same on both sides EVT VT = Op.getValueType(); - SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT); + SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); } @@ -738,44 +724,39 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - KnownZero = std::move(KnownZeroOut); - KnownOne = std::move(KnownOneOut); + Known = std::move(KnownOut); break; case ISD::SELECT: - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1)) return true; - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, - KnownOne2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1)) return true; - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, - KnownOne2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SETCC: { SDValue Op0 = Op.getOperand(0); @@ -801,7 +782,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (getBooleanContents(Op0.getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; } case ISD::SHL: @@ -835,8 +816,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1)) return true; // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits @@ -885,10 +865,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - KnownZero <<= SA->getZExtValue(); - KnownOne <<= SA->getZExtValue(); + Known.Zero <<= SA->getZExtValue(); + Known.One <<= SA->getZExtValue(); // low bits known zero. - KnownZero.setLowBits(SA->getZExtValue()); + Known.Zero.setLowBits(SA->getZExtValue()); } break; case ISD::SRL: @@ -906,7 +886,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the shift is exact, then it does demand the low bits (and knows that // they are zero). - if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()) + if (Op->getFlags().hasExact()) InDemandedMask.setLowBits(ShAmt); // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a @@ -931,14 +911,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // Compute the new bits that are at the top now. - if (SimplifyDemandedBits(InOp, InDemandedMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.lshrInPlace(ShAmt); - KnownOne.lshrInPlace(ShAmt); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); - KnownZero.setHighBits(ShAmt); // High bits known zero. + Known.Zero.setHighBits(ShAmt); // High bits known zero. } break; case ISD::SRA: @@ -963,33 +942,30 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the shift is exact, then it does demand the low bits (and knows that // they are zero). - if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()) + if (Op->getFlags().hasExact()) InDemandedMask.setLowBits(ShAmt); // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - if (HighBits.intersects(NewMask)) - InDemandedMask |= APInt::getSignMask(VT.getScalarSizeInBits()); + if (NewMask.countLeadingZeros() < ShAmt) + InDemandedMask.setSignBit(); - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO, + Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.lshrInPlace(ShAmt); - KnownOne.lshrInPlace(ShAmt); - - // Handle the sign bit, adjusted to where it is now in the mask. - APInt SignMask = APInt::getSignMask(BitWidth).lshr(ShAmt); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignMask) || (HighBits & ~NewMask) == HighBits) { + if (Known.Zero[BitWidth - ShAmt - 1] || + NewMask.countLeadingZeros() >= ShAmt) { SDNodeFlags Flags; - Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()); + Flags.setExact(Op->getFlags().hasExact()); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), - Op.getOperand(1), &Flags)); + Op.getOperand(1), Flags)); } int Log2 = NewMask.exactLogBase2(); @@ -1002,9 +978,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0), NewSA)); } - if (KnownOne.intersects(SignMask)) + if (Known.One[BitWidth - ShAmt - 1]) // New bits are known one. - KnownOne |= HighBits; + Known.One.setHighBits(ShAmt); } break; case ISD::SIGN_EXTEND_INREG: { @@ -1057,24 +1033,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, InputDemandedBits |= InSignBit; if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, - KnownZero, KnownOne, TLO, Depth+1)) + Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. // If the input sign bit is known zero, convert this into a zero extension. - if (KnownZero.intersects(InSignBit)) + if (Known.Zero.intersects(InSignBit)) return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg( Op.getOperand(0), dl, ExVT.getScalarType())); - if (KnownOne.intersects(InSignBit)) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; + if (Known.One.intersects(InSignBit)) { // Input sign bit known set + Known.One |= NewBits; + Known.Zero &= ~NewBits; } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; + Known.Zero &= ~NewBits; + Known.One &= ~NewBits; } break; } @@ -1085,22 +1061,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth); APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth); - APInt KnownZeroLo, KnownOneLo; - APInt KnownZeroHi, KnownOneHi; + KnownBits KnownLo, KnownHi; - if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo, - KnownOneLo, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1)) return true; - if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi, - KnownOneHi, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1)) return true; - KnownZero = KnownZeroLo.zext(BitWidth) | - KnownZeroHi.zext(BitWidth).shl(HalfBitWidth); + Known.Zero = KnownLo.Zero.zext(BitWidth) | + KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth); - KnownOne = KnownOneLo.zext(BitWidth) | - KnownOneHi.zext(BitWidth).shl(HalfBitWidth); + Known.One = KnownLo.One.zext(BitWidth) | + KnownHi.One.zext(BitWidth).shl(HalfBitWidth); break; } case ISD::ZERO_EXTEND: { @@ -1115,13 +1088,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getValueType(), Op.getOperand(0))); - if (SimplifyDemandedBits(Op.getOperand(0), InMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - KnownZero |= NewBits; + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + Known.Zero |= NewBits; break; } case ISD::SIGN_EXTEND: { @@ -1143,37 +1115,36 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, InDemandedBits |= InSignBit; InDemandedBits = InDemandedBits.trunc(InBits); - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO, + Depth+1)) return true; - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. - if (KnownZero.intersects(InSignBit)) + if (Known.Zero.intersects(InSignBit)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); // If the sign bit is known one, the top bits match. - if (KnownOne.intersects(InSignBit)) { - KnownOne |= NewBits; - assert((KnownZero & NewBits) == 0); + if (Known.One.intersects(InSignBit)) { + Known.One |= NewBits; + assert((Known.Zero & NewBits) == 0); } else { // Otherwise, top bits aren't known. - assert((KnownOne & NewBits) == 0); - assert((KnownZero & NewBits) == 0); + assert((Known.One & NewBits) == 0); + assert((Known.Zero & NewBits) == 0); } break; } case ISD::ANY_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); APInt InMask = NewMask.trunc(OperandBitWidth); - if (SimplifyDemandedBits(Op.getOperand(0), InMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); break; } case ISD::TRUNCATE: { @@ -1181,11 +1152,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // zero/one bits live out. unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); APInt TruncMask = NewMask.zext(OperandBitWidth); - if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1)) return true; - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); + Known.Zero = Known.Zero.trunc(BitWidth); + Known.One = Known.One.trunc(BitWidth); // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. @@ -1233,7 +1203,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); break; } case ISD::AssertZext: { @@ -1243,11 +1213,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, - KnownZero, KnownOne, TLO, Depth+1)) + Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); - KnownZero |= ~InMask; + Known.Zero |= ~InMask; break; } case ISD::BITCAST: @@ -1285,22 +1255,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // of the highest bit demanded of them. APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMask.countLeadingZeros()); - if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, - KnownOne2, TLO, Depth+1) || - SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, - KnownOne2, TLO, Depth+1) || + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) || + SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { - const SDNodeFlags *Flags = Op.getNode()->getFlags(); - if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) { + SDNodeFlags Flags = Op.getNode()->getFlags(); + if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. - SDNodeFlags NewFlags = *Flags; - NewFlags.setNoSignedWrap(false); - NewFlags.setNoUnsignedWrap(false); + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1), - &NewFlags); + Flags); return TLO.CombineTo(Op, NewOp); } return true; @@ -1309,13 +1276,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } default: // Just use computeKnownBits to compute output bits. - TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); + TLO.DAG.computeKnownBits(Op, Known, Depth); break; } // If we know the value of all of the demanded bits, return this as a // constant. - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { + if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. const SDNode *N = Op.getNode(); for (SDNodeIterator I = SDNodeIterator::begin(N), @@ -1326,17 +1293,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } return TLO.CombineTo(Op, - TLO.DAG.getConstant(KnownOne, dl, Op.getValueType())); + TLO.DAG.getConstant(Known.One, dl, Op.getValueType())); } return false; } /// Determine which of the bits specified in Mask are known to be either zero or -/// one and return them in the KnownZero/KnownOne bitsets. +/// one and return them in the Known. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { @@ -1346,7 +1312,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); + Known.Zero.clearAllBits(); Known.One.clearAllBits(); } /// This method can be implemented by targets that want to expose additional @@ -1721,7 +1687,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, bestWidth = width; break; } - newMask = newMask << width; + newMask <<= width; } } } @@ -2986,9 +2952,9 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, DAG.getDataLayout())); SDNodeFlags Flags; Flags.setExact(true); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, Flags); Created.push_back(Op1.getNode()); - d = d.ashr(ShAmt); + d.ashrInPlace(ShAmt); } // Calculate the multiplicative inverse, using Newton's method. @@ -3030,7 +2996,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, return SDValue(); // If the sdiv has an 'exact' bit we can use a simpler lowering. - if (cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact()) + if (N->getFlags().hasExact()) return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created); APInt::ms magics = Divisor.magic(); diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 315b059c5ac9..916b6f08c1b9 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -41,8 +41,8 @@ using namespace llvm; #define DEBUG_TYPE "stackmaps" static cl::opt<int> StackMapVersion( - "stackmap-version", cl::init(2), - cl::desc("Specify the stackmap encoding version (default = 2)")); + "stackmap-version", cl::init(3), + cl::desc("Specify the stackmap encoding version (default = 3)")); const char *StackMaps::WSMP = "Stack Maps: "; @@ -85,7 +85,7 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { } StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { - if (StackMapVersion != 2) + if (StackMapVersion != 3) llvm_unreachable("Unsupported stackmap version!"); } @@ -221,8 +221,9 @@ void StackMaps::print(raw_ostream &OS) { OS << "Constant Index " << Loc.Offset; break; } - OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size - << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n"; + OS << "\t[encoding: .byte " << Loc.Type << ", .byte 0" + << ", .short " << Loc.Size << ", .short " << Loc.Reg << ", .short 0" + << ", .int " << Loc.Offset << "]\n"; Idx++; } @@ -521,11 +522,16 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { for (const auto &Loc : CSLocs) { OS.EmitIntValue(Loc.Type, 1); - OS.EmitIntValue(Loc.Size, 1); + OS.EmitIntValue(0, 1); // Reserved + OS.EmitIntValue(Loc.Size, 2); OS.EmitIntValue(Loc.Reg, 2); + OS.EmitIntValue(0, 2); // Reserved OS.EmitIntValue(Loc.Offset, 4); } + // Emit alignment to 8 byte. + OS.EmitValueToAlignment(8); + // Num live-out registers and padding to align to 4 byte. OS.EmitIntValue(0, 2); OS.EmitIntValue(LiveOuts.size(), 2); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index e579922bb69e..39aa946fa840 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -53,6 +54,18 @@ static cl::opt<unsigned> MaximumJumpTableSize ("max-jump-table-size", cl::init(0), cl::Hidden, cl::desc("Set maximum size of jump tables; zero for no limit.")); +/// Minimum jump table density for normal functions. +static cl::opt<unsigned> + JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "a normal function")); + +/// Minimum jump table density for -Os or -Oz functions. +static cl::opt<unsigned> OptsizeJumpTableDensity( + "optsize-jump-table-density", cl::init(40), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "an optsize function")); + // Although this default value is arbitrary, it is not random. It is assumed // that a condition that evaluates the same way by a higher percentage than this // is best represented as control flow. Therefore, the default value N should be @@ -910,6 +923,10 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULO, VT, Expand); setOperationAction(ISD::UMULO, VT, Expand); + // ADDCARRY operations default to expand + setOperationAction(ISD::ADDCARRY, VT, Expand); + setOperationAction(ISD::SUBCARRY, VT, Expand); + // These default to Expand so they will be expanded to CTLZ/CTTZ by default. setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); @@ -1901,6 +1918,10 @@ void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) { MinimumJumpTableEntries = Val; } +unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const { + return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; +} + unsigned TargetLoweringBase::getMaximumJumpTableSize() const { return MaximumJumpTableSize; } @@ -2092,3 +2113,7 @@ int TargetLoweringBase::getDivRefinementSteps(EVT VT, MachineFunction &MF) const { return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF)); } + +void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { + MF.getRegInfo().freezeReservedRegs(MF); +} diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index c2db56a7657c..f085132b6a94 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -195,18 +196,30 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { } if (phi->getNumOperands() == 3) { - unsigned Input = phi->getOperand(1).getReg(); - unsigned Output = phi->getOperand(0).getReg(); - - phi++->eraseFromParent(); + const MachineOperand &Input = phi->getOperand(1); + const MachineOperand &Output = phi->getOperand(0); + unsigned InputReg = Input.getReg(); + unsigned OutputReg = Output.getReg(); + assert(Output.getSubReg() == 0 && "Cannot have output subregister"); ModifiedPHI = true; - if (Input != Output) { + if (InputReg != OutputReg) { MachineRegisterInfo &MRI = F.getRegInfo(); - MRI.constrainRegClass(Input, MRI.getRegClass(Output)); - MRI.replaceRegWith(Output, Input); + unsigned InputSub = Input.getSubReg(); + if (InputSub == 0) { + MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg)); + MRI.replaceRegWith(OutputReg, InputReg); + } else { + // The input register to the PHI has a subregister: + // insert a COPY instead of simply replacing the output + // with the input. + const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); + BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(), + TII->get(TargetOpcode::COPY), OutputReg) + .addReg(InputReg, getRegState(Input), InputSub); + } + phi++->eraseFromParent(); } - continue; } |