summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/Analysis.cpp29
-rw-r--r--lib/CodeGen/AsmPrinter/AccelTable.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.cpp18
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.h6
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp128
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterHandler.h74
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp115
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt3
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp850
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h104
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp9
-rw-r--r--lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp (renamed from lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp)90
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h67
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp29
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.h131
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp202
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h50
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp806
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h180
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp33
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h5
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp30
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h69
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.cpp49
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.h10
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp194
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h35
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp39
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h13
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/WasmException.cpp97
-rw-r--r--lib/CodeGen/AsmPrinter/WasmException.h42
-rw-r--r--lib/CodeGen/AsmPrinter/WinCFGuard.h2
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp28
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.h7
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp158
-rw-r--r--lib/CodeGen/BranchFolding.cpp12
-rw-r--r--lib/CodeGen/BreakFalseDeps.cpp2
-rw-r--r--lib/CodeGen/BuiltinGCs.cpp29
-rw-r--r--lib/CodeGen/CFIInstrInserter.cpp5
-rw-r--r--lib/CodeGen/CMakeLists.txt2
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp41
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp540
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp3
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp7
-rw-r--r--lib/CodeGen/ExpandMemCmp.cpp233
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp2
-rw-r--r--lib/CodeGen/GCMetadata.cpp12
-rw-r--r--lib/CodeGen/GCRootLowering.cpp133
-rw-r--r--lib/CodeGen/GlobalISel/CMakeLists.txt3
-rw-r--r--lib/CodeGen/GlobalISel/CSEInfo.cpp370
-rw-r--r--lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp231
-rw-r--r--lib/CodeGen/GlobalISel/CallLowering.cpp5
-rw-r--r--lib/CodeGen/GlobalISel/Combiner.cpp82
-rw-r--r--lib/CodeGen/GlobalISel/CombinerHelper.cpp306
-rw-r--r--lib/CodeGen/GlobalISel/GISelChangeObserver.cpp40
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp413
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/LegalityPredicates.cpp4
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp113
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp479
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp13
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp797
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp36
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp53
-rw-r--r--lib/CodeGen/GlobalMerge.cpp25
-rw-r--r--lib/CodeGen/IfConversion.cpp24
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp20
-rw-r--r--lib/CodeGen/InterleavedLoadCombinePass.cpp1359
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp56
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp4
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp79
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp227
-rw-r--r--lib/CodeGen/LiveDebugVariables.h7
-rw-r--r--lib/CodeGen/LiveInterval.cpp22
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp10
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp2
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp2
-rw-r--r--lib/CodeGen/MIRCanonicalizerPass.cpp11
-rw-r--r--lib/CodeGen/MIRParser/MILexer.cpp57
-rw-r--r--lib/CodeGen/MIRParser/MILexer.h9
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp227
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp2
-rw-r--r--lib/CodeGen/MIRPrinter.cpp53
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp78
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp5
-rw-r--r--lib/CodeGen/MachineCSE.cpp21
-rw-r--r--lib/CodeGen/MachineCombiner.cpp2
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp296
-rw-r--r--lib/CodeGen/MachineFunction.cpp208
-rw-r--r--lib/CodeGen/MachineFunctionPass.cpp36
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp2
-rw-r--r--lib/CodeGen/MachineInstr.cpp350
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp27
-rw-r--r--lib/CodeGen/MachineLICM.cpp12
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp5
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp1
-rw-r--r--lib/CodeGen/MachineOperand.cpp19
-rw-r--r--lib/CodeGen/MachineOutliner.cpp1101
-rw-r--r--lib/CodeGen/MachinePassRegistry.cpp55
-rw-r--r--lib/CodeGen/MachinePipeliner.cpp818
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp65
-rw-r--r--lib/CodeGen/MachineScheduler.cpp268
-rw-r--r--lib/CodeGen/MachineSink.cpp92
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp3
-rw-r--r--lib/CodeGen/MachineVerifier.cpp164
-rw-r--r--lib/CodeGen/MacroFusion.cpp12
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp17
-rw-r--r--lib/CodeGen/PHIElimination.cpp34
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp12
-rw-r--r--lib/CodeGen/PreISelIntrinsicLowering.cpp124
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp30
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp4
-rw-r--r--lib/CodeGen/README.txt4
-rw-r--r--lib/CodeGen/ReachingDefAnalysis.cpp2
-rw-r--r--lib/CodeGen/RegAllocFast.cpp767
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp69
-rw-r--r--lib/CodeGen/RegUsageInfoCollector.cpp47
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp246
-rw-r--r--lib/CodeGen/RegisterPressure.cpp3
-rw-r--r--lib/CodeGen/RegisterUsageInfo.cpp4
-rw-r--r--lib/CodeGen/SafeStack.cpp21
-rw-r--r--lib/CodeGen/SafeStackColoring.cpp9
-rw-r--r--lib/CodeGen/ScalarizeMaskedMemIntrin.cpp279
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp100
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp40
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp3143
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp48
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp42
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp453
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp416
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h32
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp221
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp717
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h13
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp50
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h4
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp1050
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp631
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp167
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1926
-rw-r--r--lib/CodeGen/SlotIndexes.cpp2
-rw-r--r--lib/CodeGen/SplitKit.h17
-rw-r--r--lib/CodeGen/StackColoring.cpp14
-rw-r--r--lib/CodeGen/StackMaps.cpp9
-rw-r--r--lib/CodeGen/StackProtector.cpp41
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp2
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp6
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp51
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp22
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp274
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp31
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp147
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp40
-rw-r--r--lib/CodeGen/VirtRegMap.cpp2
-rw-r--r--lib/CodeGen/WasmEHPrepare.cpp64
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp9
173 files changed, 16941 insertions, 8200 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 79f11def38f7..797f05ee5cf3 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -471,7 +471,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
const Instruction *I = CS.getInstruction();
const BasicBlock *ExitBB = I->getParent();
- const TerminatorInst *Term = ExitBB->getTerminator();
+ const Instruction *Term = ExitBB->getTerminator();
const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
// The block must end in a return statement or unreachable.
@@ -496,6 +496,10 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
// Debug info intrinsics do not get in the way of tail call optimization.
if (isa<DbgInfoIntrinsic>(BBI))
continue;
+ // A lifetime end intrinsic should not stop tail call optimization.
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
+ if (II->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
!isSafeToSpeculativelyExecute(&*BBI))
return false;
@@ -519,10 +523,12 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
AttributeList::ReturnIndex);
- // Noalias is completely benign as far as calling convention goes, it
- // shouldn't affect whether the call is a tail call.
+ // NoAlias and NonNull are completely benign as far as calling convention
+ // goes, they shouldn't affect whether the call is a tail call.
CallerAttrs.removeAttribute(Attribute::NoAlias);
CalleeAttrs.removeAttribute(Attribute::NoAlias);
+ CallerAttrs.removeAttribute(Attribute::NonNull);
+ CalleeAttrs.removeAttribute(Attribute::NonNull);
if (CallerAttrs.contains(Attribute::ZExt)) {
if (!CalleeAttrs.contains(Attribute::ZExt))
@@ -540,6 +546,21 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
CalleeAttrs.removeAttribute(Attribute::SExt);
}
+ // Drop sext and zext return attributes if the result is not used.
+ // This enables tail calls for code like:
+ //
+ // define void @caller() {
+ // entry:
+ // %unused_result = tail call zeroext i1 @callee()
+ // br label %retlabel
+ // retlabel:
+ // ret void
+ // }
+ if (I->use_empty()) {
+ CalleeAttrs.removeAttribute(Attribute::SExt);
+ CalleeAttrs.removeAttribute(Attribute::ZExt);
+ }
+
// If they're still different, there's some facet we don't understand
// (currently only "inreg", but in future who knows). It may be OK but the
// only safe option is to reject the tail call.
@@ -650,7 +671,7 @@ static void collectEHScopeMembers(
// Returns are boundaries where scope transfer can occur, don't follow
// successors.
- if (Visiting->isReturnBlock())
+ if (Visiting->isEHScopeReturnBlock())
continue;
for (const MachineBasicBlock *Succ : Visiting->successors())
diff --git a/lib/CodeGen/AsmPrinter/AccelTable.cpp b/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 20b0b8d3feab..95875ccb8a0b 100644
--- a/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
@@ -553,19 +554,31 @@ void llvm::emitDWARF5AccelTable(
AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents,
const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) {
std::vector<MCSymbol *> CompUnits;
+ SmallVector<unsigned, 1> CUIndex(CUs.size());
+ int Count = 0;
for (const auto &CU : enumerate(CUs)) {
+ if (CU.value()->getCUNode()->getNameTableKind() ==
+ DICompileUnit::DebugNameTableKind::None)
+ continue;
+ CUIndex[CU.index()] = Count++;
assert(CU.index() == CU.value()->getUniqueID());
const DwarfCompileUnit *MainCU =
DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get();
CompUnits.push_back(MainCU->getLabelBegin());
}
+ if (CompUnits.empty())
+ return;
+
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfDebugNamesSection());
+
Contents.finalize(Asm, "names");
Dwarf5AccelTableWriter<DWARF5AccelTableData>(
Asm, Contents, CompUnits,
- [&DD](const DWARF5AccelTableData &Entry) {
+ [&](const DWARF5AccelTableData &Entry) {
const DIE *CUDie = Entry.getDie().getUnitDie();
- return DD.lookupCU(CUDie)->getUniqueID();
+ return CUIndex[DD.lookupCU(CUDie)->getUniqueID()];
})
.emit();
}
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
index c8305ad9c547..042243b79259 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -27,29 +27,35 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
- Asm.OutStreamer->SwitchSection(Section);
-
uint64_t Length = sizeof(uint16_t) // version
+ sizeof(uint8_t) // address_size
+ sizeof(uint8_t) // segment_selector_size
+ AddrSize * Pool.size(); // entries
+ Asm.OutStreamer->AddComment("Length of contribution");
Asm.emitInt32(Length); // TODO: Support DWARF64 format.
+ Asm.OutStreamer->AddComment("DWARF version number");
Asm.emitInt16(Asm.getDwarfVersion());
+ Asm.OutStreamer->AddComment("Address size");
Asm.emitInt8(AddrSize);
+ Asm.OutStreamer->AddComment("Segment selector size");
Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size.
}
// Emit addresses into the section given.
void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
- if (Asm.getDwarfVersion() >= 5)
- emitHeader(Asm, AddrSection);
-
- if (Pool.empty())
+ if (isEmpty())
return;
// Start the dwarf addr section.
Asm.OutStreamer->SwitchSection(AddrSection);
+ if (Asm.getDwarfVersion() >= 5)
+ emitHeader(Asm, AddrSection);
+
+ // Define the symbol that marks the start of the contribution.
+ // It is referenced via DW_AT_addr_base.
+ Asm.OutStreamer->EmitLabel(AddressTableBaseSym);
+
// Order the address pool entries by ID
SmallVector<const MCExpr *, 64> Entries(Pool.size());
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index d5008fab5563..2209c7eb50ed 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -51,8 +51,14 @@ public:
void resetUsedFlag() { HasBeenUsed = false; }
+ MCSymbol *getLabel() { return AddressTableBaseSym; }
+ void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; }
+
private:
void emitHeader(AsmPrinter &Asm, MCSection *Section);
+
+ /// Symbol designates the start of the contribution to the address table.
+ MCSymbol *AddressTableBaseSym = nullptr;
};
} // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9bbc77b3056b..7070451e3330 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/AsmPrinter.h"
-#include "AsmPrinterHandler.h"
#include "CodeViewDebug.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
+#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
#include "llvm/ADT/APFloat.h"
@@ -32,8 +32,10 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
@@ -52,6 +54,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -260,7 +263,7 @@ bool AsmPrinter::doInitialization(Module &M) {
// use the directive, where it would need the same conditionalization
// anyway.
const Triple &Target = TM.getTargetTriple();
- OutStreamer->EmitVersionForTarget(Target);
+ OutStreamer->EmitVersionForTarget(Target, M.getSDKVersion());
// Allow the target to emit any magic that it wants at the start of the file.
EmitStartOfAsmFile(M);
@@ -355,7 +358,7 @@ bool AsmPrinter::doInitialization(Module &M) {
}
break;
case ExceptionHandling::Wasm:
- // TODO to prevent warning
+ ES = new WasmException(this);
break;
}
if (ES)
@@ -363,7 +366,7 @@ bool AsmPrinter::doInitialization(Module &M) {
DWARFGroupName, DWARFGroupDescription));
if (mdconst::extract_or_null<ConstantInt>(
- MMI->getModule()->getModuleFlag("cfguard")))
+ MMI->getModule()->getModuleFlag("cfguardtable")))
Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName,
CFGuardDescription, DWARFGroupName,
DWARFGroupDescription));
@@ -627,8 +630,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
///
/// \p Value - The value to emit.
/// \p Size - The size of the integer (in bytes) to emit.
-void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
- unsigned Size) const {
+void AsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const {
OutStreamer->EmitValue(Value, Size);
}
@@ -749,18 +751,30 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
const MachineFrameInfo &MFI = MF->getFrameInfo();
bool Commented = false;
+ auto getSize =
+ [&MFI](const SmallVectorImpl<const MachineMemOperand *> &Accesses) {
+ unsigned Size = 0;
+ for (auto A : Accesses)
+ if (MFI.isSpillSlotObjectIndex(
+ cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+ ->getFrameIndex()))
+ Size += A->getSize();
+ return Size;
+ };
+
// We assume a single instruction only has a spill or reload, not
// both.
const MachineMemOperand *MMO;
+ SmallVector<const MachineMemOperand *, 2> Accesses;
if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Reload";
Commented = true;
}
- } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI)) {
- CommentOS << MMO->getSize() << "-byte Folded Reload";
+ } else if (TII->hasLoadFromStackSlot(MI, Accesses)) {
+ if (auto Size = getSize(Accesses)) {
+ CommentOS << Size << "-byte Folded Reload";
Commented = true;
}
} else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
@@ -769,9 +783,9 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
CommentOS << MMO->getSize() << "-byte Spill";
Commented = true;
}
- } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI)) {
- CommentOS << MMO->getSize() << "-byte Folded Spill";
+ } else if (TII->hasStoreToStackSlot(MI, Accesses)) {
+ if (auto Size = getSize(Accesses)) {
+ CommentOS << Size << "-byte Folded Spill";
Commented = true;
}
}
@@ -1066,6 +1080,10 @@ void AsmPrinter::EmitFunctionBody() {
++NumInstsInFunction;
}
+ // If there is a pre-instruction symbol, emit a label for it here.
+ if (MCSymbol *S = MI.getPreInstrSymbol())
+ OutStreamer->EmitLabel(S);
+
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
@@ -1117,6 +1135,10 @@ void AsmPrinter::EmitFunctionBody() {
break;
}
+ // If there is a post-instruction symbol, emit a label for it here.
+ if (MCSymbol *S = MI.getPostInstrSymbol())
+ OutStreamer->EmitLabel(S);
+
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
@@ -1394,6 +1416,33 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
+ if (TM.getTargetTriple().isOSBinFormatCOFF()) {
+ MachineModuleInfoCOFF &MMICOFF =
+ MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoCOFF::SymbolListTy Stubs = MMICOFF.GetGVStubList();
+ if (!Stubs.empty()) {
+ const DataLayout &DL = M.getDataLayout();
+
+ for (const auto &Stub : Stubs) {
+ SmallString<256> SectionName = StringRef(".rdata$");
+ SectionName += Stub.first->getName();
+ OutStreamer->SwitchSection(OutContext.getCOFFSection(
+ SectionName,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_LNK_COMDAT,
+ SectionKind::getReadOnly(), Stub.first->getName(),
+ COFF::IMAGE_COMDAT_SELECT_ANY));
+ EmitAlignment(Log2_32(DL.getPointerSize()));
+ OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global);
+ OutStreamer->EmitLabel(Stub.first);
+ OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
+ DL.getPointerSize());
+ }
+ }
+ }
+
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
@@ -1450,6 +1499,9 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit llvm.ident metadata in an '.ident' directive.
EmitModuleIdents(M);
+ // Emit bytes for llvm.commandline metadata.
+ EmitModuleCommandLines(M);
+
// Emit __morestack address if needed for indirect calls.
if (MMI->usesMorestackAddr()) {
unsigned Align = 1;
@@ -1534,7 +1586,8 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit address-significance attributes for all globals.
OutStreamer->EmitAddrsig();
for (const GlobalValue &GV : M.global_values())
- if (!GV.isThreadLocal() && !GV.getName().startswith("llvm.") &&
+ if (!GV.use_empty() && !GV.isThreadLocal() &&
+ !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") &&
!GV.hasAtLeastLocalUnnamedAddr())
OutStreamer->EmitAddrsigSym(getSymbol(&GV));
}
@@ -1958,6 +2011,29 @@ void AsmPrinter::EmitModuleIdents(Module &M) {
}
}
+void AsmPrinter::EmitModuleCommandLines(Module &M) {
+ MCSection *CommandLine = getObjFileLowering().getSectionForCommandLines();
+ if (!CommandLine)
+ return;
+
+ const NamedMDNode *NMD = M.getNamedMetadata("llvm.commandline");
+ if (!NMD || !NMD->getNumOperands())
+ return;
+
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(CommandLine);
+ OutStreamer->EmitZeros(1);
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ assert(N->getNumOperands() == 1 &&
+ "llvm.commandline metadata entry can have only one operand");
+ const MDString *S = cast<MDString>(N->getOperand(0));
+ OutStreamer->EmitBytes(S->getString());
+ OutStreamer->EmitZeros(1);
+ }
+ OutStreamer->PopSection();
+}
+
//===--------------------------------------------------------------------===//
// Emission and print routines
//
@@ -2927,11 +3003,6 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
if (!S.usesMetadata())
return nullptr;
- assert(!S.useStatepoints() && "statepoints do not currently support custom"
- " stackmap formats, please see the documentation for a description of"
- " the default format. If you really need a custom serialized format,"
- " please file a bug");
-
gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
gcp_map_type::iterator GCPI = GCMap.find(&S);
if (GCPI != GCMap.end())
@@ -2952,6 +3023,27 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
}
+void AsmPrinter::emitStackMaps(StackMaps &SM) {
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ bool NeedsDefault = false;
+ if (MI->begin() == MI->end())
+ // No GC strategy, use the default format.
+ NeedsDefault = true;
+ else
+ for (auto &I : *MI) {
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ if (MP->emitStackMaps(SM, *this))
+ continue;
+ // The strategy doesn't have printer or doesn't emit custom stack maps.
+ // Use the default format.
+ NeedsDefault = true;
+ }
+
+ if (NeedsDefault)
+ SM.serializeToStackMapSection();
+}
+
/// Pin vtable to this file.
AsmPrinterHandler::~AsmPrinterHandler() = default;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 605588470670..afce3ad3133b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -212,6 +212,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpWindowSave:
OutStreamer->EmitCFIWindowSave();
break;
+ case MCCFIInstruction::OpNegateRAState:
+ OutStreamer->EmitCFINegateRAState();
+ break;
case MCCFIInstruction::OpSameValue:
OutStreamer->EmitCFISameValue(Inst.getRegister());
break;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
deleted file mode 100644
index f5ac95a20b10..000000000000
--- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ /dev/null
@@ -1,74 +0,0 @@
-//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a generic interface for AsmPrinter handlers,
-// like debug and EH info emitters.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
-
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class AsmPrinter;
-class MachineBasicBlock;
-class MachineFunction;
-class MachineInstr;
-class MCSymbol;
-
-typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm);
-
-/// Collects and handles AsmPrinter objects required to build debug
-/// or EH information.
-class AsmPrinterHandler {
-public:
- virtual ~AsmPrinterHandler();
-
- /// For symbols that have a size designated (e.g. common symbols),
- /// this tracks that size.
- virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0;
-
- /// Emit all sections that should come after the content.
- virtual void endModule() = 0;
-
- /// Gather pre-function debug information.
- /// Every beginFunction(MF) call should be followed by an endFunction(MF)
- /// call.
- virtual void beginFunction(const MachineFunction *MF) = 0;
-
- // Emit any of function marker (like .cfi_endproc). This is called
- // before endFunction and cannot switch sections.
- virtual void markFunctionEnd();
-
- /// Gather post-function debug information.
- /// Please note that some AsmPrinter implementations may not call
- /// beginFunction at all.
- virtual void endFunction(const MachineFunction *MF) = 0;
-
- virtual void beginFragment(const MachineBasicBlock *MBB,
- ExceptionSymbolProvider ESP) {}
- virtual void endFragment() {}
-
- /// Emit target-specific EH funclet machinery.
- virtual void beginFunclet(const MachineBasicBlock &MBB,
- MCSymbol *Sym = nullptr) {}
- virtual void endFunclet() {}
-
- /// Process beginning of an instruction.
- virtual void beginInstruction(const MachineInstr *MI) = 0;
-
- /// Process end of an instruction.
- virtual void endInstruction() = 0;
-};
-} // End of namespace llvm
-
-#endif
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 4159eb19423a..62103e3107c0 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -71,6 +71,42 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
}
+unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
+ const MDNode *LocMDNode) const {
+ if (!DiagInfo) {
+ DiagInfo = make_unique<SrcMgrDiagInfo>();
+
+ MCContext &Context = MMI->getContext();
+ Context.setInlineSourceManager(&DiagInfo->SrcMgr);
+
+ LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+ if (LLVMCtx.getInlineAsmDiagnosticHandler()) {
+ DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get());
+ }
+ }
+
+ SourceMgr &SrcMgr = DiagInfo->SrcMgr;
+
+ std::unique_ptr<MemoryBuffer> Buffer;
+ // The inline asm source manager will outlive AsmStr, so make a copy of the
+ // string for SourceMgr to own.
+ Buffer = MemoryBuffer::getMemBufferCopy(AsmStr, "<inline asm>");
+
+ // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+ unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+
+ // Store LocMDNode in DiagInfo, using BufNum as an identifier.
+ if (LocMDNode) {
+ DiagInfo->LocInfos.resize(BufNum);
+ DiagInfo->LocInfos[BufNum - 1] = LocMDNode;
+ }
+
+ return BufNum;
+}
+
+
/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
const MCTargetOptions &MCOptions,
@@ -98,39 +134,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
return;
}
- if (!DiagInfo) {
- DiagInfo = make_unique<SrcMgrDiagInfo>();
+ unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
+ DiagInfo->SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
- MCContext &Context = MMI->getContext();
- Context.setInlineSourceManager(&DiagInfo->SrcMgr);
-
- LLVMContext &LLVMCtx = MMI->getModule()->getContext();
- if (LLVMCtx.getInlineAsmDiagnosticHandler()) {
- DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
- DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
- DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get());
- }
- }
-
- SourceMgr &SrcMgr = DiagInfo->SrcMgr;
- SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
-
- std::unique_ptr<MemoryBuffer> Buffer;
- // The inline asm source manager will outlive Str, so make a copy of the
- // string for SourceMgr to own.
- Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
-
- // Tell SrcMgr about this buffer, it takes ownership of the buffer.
- unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
-
- // Store LocMDNode in DiagInfo, using BufNum as an identifier.
- if (LocMDNode) {
- DiagInfo->LocInfos.resize(BufNum);
- DiagInfo->LocInfos[BufNum-1] = LocMDNode;
- }
-
- std::unique_ptr<MCAsmParser> Parser(
- createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
+ std::unique_ptr<MCAsmParser> Parser(createMCAsmParser(
+ DiagInfo->SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
// Do not use assembler-level information for parsing inline assembly.
OutStreamer->setUseAssemblerInfoForParsing(false);
@@ -148,9 +156,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
+ // Enable lexing Masm binary and hex integer literals in intel inline
+ // assembly.
if (Dialect == InlineAsm::AD_Intel)
- // We need this flag to be able to parse numbers like "0bH"
- Parser->setParsingInlineAsm(true);
+ Parser->getLexer().setLexMasmIntegers(true);
if (MF) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
@@ -519,6 +528,44 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
MCOptions.SanitizeAddress =
MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress);
+ // Emit warnings if we use reserved registers on the clobber list, as
+ // that might give surprising results.
+ std::vector<std::string> RestrRegs;
+ // Start with the first operand descriptor, and iterate over them.
+ for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands();
+ I < NumOps; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (MO.isImm()) {
+ unsigned Flags = MO.getImm();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber &&
+ !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) {
+ RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg()));
+ }
+ // Skip to one before the next operand descriptor, if it exists.
+ I += InlineAsm::getNumOperandRegisters(Flags);
+ }
+ }
+
+ if (!RestrRegs.empty()) {
+ unsigned BufNum = addInlineAsmDiagBuffer(OS.str(), LocMD);
+ auto &SrcMgr = DiagInfo->SrcMgr;
+ SMLoc Loc = SMLoc::getFromPointer(
+ SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin());
+
+ std::string Msg = "inline asm clobber list contains reserved registers: ";
+ for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) {
+ if(I != RestrRegs.begin())
+ Msg += ", ";
+ Msg += *I;
+ }
+ std::string Note = "Reserved registers on the clobber list may not be "
+ "preserved across the asm statement, and clobbering them may "
+ "lead to undefined behaviour.";
+ SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg);
+ SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
+ }
+
EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
MI->getInlineAsmDialect());
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 0f8c24158ee2..3fb088ab6f0d 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -5,7 +5,7 @@ add_llvm_library(LLVMAsmPrinter
AsmPrinter.cpp
AsmPrinterDwarf.cpp
AsmPrinterInlineAsm.cpp
- DbgValueHistoryCalculator.cpp
+ DbgEntityHistoryCalculator.cpp
DebugHandlerBase.cpp
DebugLocStream.cpp
DIE.cpp
@@ -23,6 +23,7 @@ add_llvm_library(LLVMAsmPrinter
WinCFGuard.cpp
WinException.cpp
CodeViewDebug.cpp
+ WasmException.cpp
DEPENDS
intrinsics_gen
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8c5c5478d01a..8cabad4ad312 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -31,6 +31,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -43,6 +44,7 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
@@ -72,6 +74,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -90,8 +93,20 @@
using namespace llvm;
using namespace llvm::codeview;
-static cl::opt<bool> EmitDebugGlobalHashes("emit-codeview-ghash-section",
- cl::ReallyHidden, cl::init(false));
+static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
+ switch (Type) {
+ case Triple::ArchType::x86:
+ return CPUType::Pentium3;
+ case Triple::ArchType::x86_64:
+ return CPUType::X64;
+ case Triple::ArchType::thumb:
+ return CPUType::Thumb;
+ case Triple::ArchType::aarch64:
+ return CPUType::ARM64;
+ default:
+ report_fatal_error("target architecture doesn't map to a CodeView CPUType");
+ }
+}
CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
: DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {
@@ -100,11 +115,21 @@ CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
!AP->getObjFileLowering().getCOFFDebugSymbolsSection()) {
Asm = nullptr;
+ MMI->setDebugInfoAvailability(false);
return;
}
-
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
+
+ TheCPU =
+ mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
+
+ collectGlobalVariableInfo();
+
+ // Check if we should emit type record hashes.
+ ConstantInt *GH = mdconst::extract_or_null<ConstantInt>(
+ MMI->getModule()->getModuleFlag("CodeViewGHash"));
+ EmitDebugGlobalHashes = GH && !GH->isZero();
}
StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
@@ -116,7 +141,9 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
// If this is a Unix-style path, just use it as is. Don't try to canonicalize
// it textually because one of the path components could be a symlink.
- if (!Dir.empty() && Dir[0] == '/') {
+ if (Dir.startswith("/") || Filename.startswith("/")) {
+ if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix))
+ return Filename;
Filepath = Dir;
if (Dir.back() != '/')
Filepath += '/';
@@ -337,6 +364,36 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
return recordTypeIndexForDINode(SP, TI);
}
+static bool isTrivial(const DICompositeType *DCTy) {
+ return ((DCTy->getFlags() & DINode::FlagTrivial) == DINode::FlagTrivial);
+}
+
+static FunctionOptions
+getFunctionOptions(const DISubroutineType *Ty,
+ const DICompositeType *ClassTy = nullptr,
+ StringRef SPName = StringRef("")) {
+ FunctionOptions FO = FunctionOptions::None;
+ const DIType *ReturnTy = nullptr;
+ if (auto TypeArray = Ty->getTypeArray()) {
+ if (TypeArray.size())
+ ReturnTy = TypeArray[0].resolve();
+ }
+
+ if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) {
+ if (!isTrivial(ReturnDCTy))
+ FO |= FunctionOptions::CxxReturnUdt;
+ }
+
+ // DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison.
+ if (ClassTy && !isTrivial(ClassTy) && SPName == ClassTy->getName()) {
+ FO |= FunctionOptions::Constructor;
+
+ // TODO: put the FunctionOptions::ConstructorWithVirtualBases flag.
+
+ }
+ return FO;
+}
+
TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
const DICompositeType *Class) {
// Always use the method declaration as the key for the function type. The
@@ -356,8 +413,10 @@ TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
// member function type.
TypeLoweringScope S(*this);
const bool IsStaticMethod = (SP->getFlags() & DINode::FlagStaticMember) != 0;
+
+ FunctionOptions FO = getFunctionOptions(SP->getType(), Class, SP->getName());
TypeIndex TI = lowerTypeMemberFunction(
- SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod);
+ SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod, FO);
return recordTypeIndexForDINode(SP, TI, Class);
}
@@ -508,6 +567,11 @@ void CodeViewDebug::endModule() {
OS.AddComment("String table");
OS.EmitCVStringTableDirective();
+ // Emit S_BUILDINFO, which points to LF_BUILDINFO. Put this in its own symbol
+ // subsection in the generic .debug$S section at the end. There is no
+ // particular reason for this ordering other than to match MSVC.
+ emitBuildInfo();
+
// Emit type information and hashes last, so that any types we translate while
// emitting function info are included.
emitTypeInformation();
@@ -669,30 +733,8 @@ static Version parseVersion(StringRef Name) {
return V;
}
-static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
- switch (Type) {
- case Triple::ArchType::x86:
- return CPUType::Pentium3;
- case Triple::ArchType::x86_64:
- return CPUType::X64;
- case Triple::ArchType::thumb:
- return CPUType::Thumb;
- case Triple::ArchType::aarch64:
- return CPUType::ARM64;
- default:
- report_fatal_error("target architecture doesn't map to a CodeView CPUType");
- }
-}
-
void CodeViewDebug::emitCompilerInformation() {
- MCContext &Context = MMI->getContext();
- MCSymbol *CompilerBegin = Context.createTempSymbol(),
- *CompilerEnd = Context.createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(CompilerEnd, CompilerBegin, 2);
- OS.EmitLabel(CompilerBegin);
- OS.AddComment("Record kind: S_COMPILE3");
- OS.EmitIntValue(SymbolKind::S_COMPILE3, 2);
+ MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3);
uint32_t Flags = 0;
NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
@@ -707,9 +749,7 @@ void CodeViewDebug::emitCompilerInformation() {
OS.EmitIntValue(Flags, 4);
OS.AddComment("CPUType");
- CPUType CPU =
- mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
- OS.EmitIntValue(static_cast<uint64_t>(CPU), 2);
+ OS.EmitIntValue(static_cast<uint64_t>(TheCPU), 2);
StringRef CompilerVersion = CU->getProducer();
Version FrontVer = parseVersion(CompilerVersion);
@@ -733,7 +773,48 @@ void CodeViewDebug::emitCompilerInformation() {
OS.AddComment("Null-terminated compiler version string");
emitNullTerminatedSymbolName(OS, CompilerVersion);
- OS.EmitLabel(CompilerEnd);
+ endSymbolRecord(CompilerEnd);
+}
+
+static TypeIndex getStringIdTypeIdx(GlobalTypeTableBuilder &TypeTable,
+ StringRef S) {
+ StringIdRecord SIR(TypeIndex(0x0), S);
+ return TypeTable.writeLeafType(SIR);
+}
+
+void CodeViewDebug::emitBuildInfo() {
+ // First, make LF_BUILDINFO. It's a sequence of strings with various bits of
+ // build info. The known prefix is:
+ // - Absolute path of current directory
+ // - Compiler path
+ // - Main source file path, relative to CWD or absolute
+ // - Type server PDB file
+ // - Canonical compiler command line
+ // If frontend and backend compilation are separated (think llc or LTO), it's
+ // not clear if the compiler path should refer to the executable for the
+ // frontend or the backend. Leave it blank for now.
+ TypeIndex BuildInfoArgs[BuildInfoRecord::MaxArgs] = {};
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin(); // FIXME: Multiple CUs.
+ const auto *CU = cast<DICompileUnit>(Node);
+ const DIFile *MainSourceFile = CU->getFile();
+ BuildInfoArgs[BuildInfoRecord::CurrentDirectory] =
+ getStringIdTypeIdx(TypeTable, MainSourceFile->getDirectory());
+ BuildInfoArgs[BuildInfoRecord::SourceFile] =
+ getStringIdTypeIdx(TypeTable, MainSourceFile->getFilename());
+ // FIXME: Path to compiler and command line. PDB is intentionally blank unless
+ // we implement /Zi type servers.
+ BuildInfoRecord BIR(BuildInfoArgs);
+ TypeIndex BuildInfoIndex = TypeTable.writeLeafType(BIR);
+
+ // Make a new .debug$S subsection for the S_BUILDINFO record, which points
+ // from the module symbols into the type stream.
+ MCSymbol *BISubsecEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
+ MCSymbol *BIEnd = beginSymbolRecord(SymbolKind::S_BUILDINFO);
+ OS.AddComment("LF_BUILDINFO index");
+ OS.EmitIntValue(BuildInfoIndex.getIndex(), 4);
+ endSymbolRecord(BIEnd);
+ endCVSubsection(BISubsecEnd);
}
void CodeViewDebug::emitInlineeLinesSubsection() {
@@ -773,18 +854,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
const DILocation *InlinedAt,
const InlineSite &Site) {
- MCSymbol *InlineBegin = MMI->getContext().createTempSymbol(),
- *InlineEnd = MMI->getContext().createTempSymbol();
-
assert(TypeIndices.count({Site.Inlinee, nullptr}));
TypeIndex InlineeIdx = TypeIndices[{Site.Inlinee, nullptr}];
// SymbolRecord
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(InlineEnd, InlineBegin, 2); // RecordLength
- OS.EmitLabel(InlineBegin);
- OS.AddComment("Record kind: S_INLINESITE");
- OS.EmitIntValue(SymbolKind::S_INLINESITE, 2); // RecordKind
+ MCSymbol *InlineEnd = beginSymbolRecord(SymbolKind::S_INLINESITE);
OS.AddComment("PtrParent");
OS.EmitIntValue(0, 4);
@@ -799,9 +873,9 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
FI.Begin, FI.End);
- OS.EmitLabel(InlineEnd);
+ endSymbolRecord(InlineEnd);
- emitLocalVariableList(Site.InlinedLocals);
+ emitLocalVariableList(FI, Site.InlinedLocals);
// Recurse on child inlined call sites before closing the scope.
for (const DILocation *ChildSite : Site.ChildSites) {
@@ -812,10 +886,7 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
}
// Close the scope.
- OS.AddComment("Record length");
- OS.EmitIntValue(2, 2); // RecordLength
- OS.AddComment("Record kind: S_INLINESITE_END");
- OS.EmitIntValue(SymbolKind::S_INLINESITE_END, 2); // RecordKind
+ emitEndSymbolRecord(SymbolKind::S_INLINESITE_END);
}
void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
@@ -850,13 +921,7 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
// Emit S_THUNK32
- MCSymbol *ThunkRecordBegin = MMI->getContext().createTempSymbol(),
- *ThunkRecordEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(ThunkRecordEnd, ThunkRecordBegin, 2);
- OS.EmitLabel(ThunkRecordBegin);
- OS.AddComment("Record kind: S_THUNK32");
- OS.EmitIntValue(unsigned(SymbolKind::S_THUNK32), 2);
+ MCSymbol *ThunkRecordEnd = beginSymbolRecord(SymbolKind::S_THUNK32);
OS.AddComment("PtrParent");
OS.EmitIntValue(0, 4);
OS.AddComment("PtrEnd");
@@ -874,17 +939,13 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
OS.AddComment("Function name");
emitNullTerminatedSymbolName(OS, FuncName);
// Additional fields specific to the thunk ordinal would go here.
- OS.EmitLabel(ThunkRecordEnd);
+ endSymbolRecord(ThunkRecordEnd);
// Local variables/inlined routines are purposely omitted here. The point of
// marking this as a thunk is so Visual Studio will NOT stop in this routine.
// Emit S_PROC_ID_END
- const unsigned RecordLengthForSymbolEnd = 2;
- OS.AddComment("Record length");
- OS.EmitIntValue(RecordLengthForSymbolEnd, 2);
- OS.AddComment("Record kind: S_PROC_ID_END");
- OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2);
+ emitEndSymbolRecord(SymbolKind::S_PROC_ID_END);
endCVSubsection(SymbolsEnd);
}
@@ -927,19 +988,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Symbol subsection for " + Twine(FuncName));
MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
{
- MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(),
- *ProcRecordEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2);
- OS.EmitLabel(ProcRecordBegin);
-
- if (GV->hasLocalLinkage()) {
- OS.AddComment("Record kind: S_LPROC32_ID");
- OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
- } else {
- OS.AddComment("Record kind: S_GPROC32_ID");
- OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
- }
+ SymbolKind ProcKind = GV->hasLocalLinkage() ? SymbolKind::S_LPROC32_ID
+ : SymbolKind::S_GPROC32_ID;
+ MCSymbol *ProcRecordEnd = beginSymbolRecord(ProcKind);
// These fields are filled in by tools like CVPACK which run after the fact.
OS.AddComment("PtrParent");
@@ -968,9 +1019,28 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Function name");
// Truncate the name so we won't overflow the record length field.
emitNullTerminatedSymbolName(OS, FuncName);
- OS.EmitLabel(ProcRecordEnd);
+ endSymbolRecord(ProcRecordEnd);
- emitLocalVariableList(FI.Locals);
+ MCSymbol *FrameProcEnd = beginSymbolRecord(SymbolKind::S_FRAMEPROC);
+ // Subtract out the CSR size since MSVC excludes that and we include it.
+ OS.AddComment("FrameSize");
+ OS.EmitIntValue(FI.FrameSize - FI.CSRSize, 4);
+ OS.AddComment("Padding");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Offset of padding");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Bytes of callee saved registers");
+ OS.EmitIntValue(FI.CSRSize, 4);
+ OS.AddComment("Exception handler offset");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Exception handler section");
+ OS.EmitIntValue(0, 2);
+ OS.AddComment("Flags (defines frame register)");
+ OS.EmitIntValue(uint32_t(FI.FrameProcOpts), 4);
+ endSymbolRecord(FrameProcEnd);
+
+ emitLocalVariableList(FI, FI.Locals);
+ emitGlobalVariableList(FI.Globals);
emitLexicalBlockList(FI.ChildBlocks, FI);
// Emit inlined call site information. Only emit functions inlined directly
@@ -986,13 +1056,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
for (auto Annot : FI.Annotations) {
MCSymbol *Label = Annot.first;
MDTuple *Strs = cast<MDTuple>(Annot.second);
- MCSymbol *AnnotBegin = MMI->getContext().createTempSymbol(),
- *AnnotEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(AnnotEnd, AnnotBegin, 2);
- OS.EmitLabel(AnnotBegin);
- OS.AddComment("Record kind: S_ANNOTATION");
- OS.EmitIntValue(SymbolKind::S_ANNOTATION, 2);
+ MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION);
OS.EmitCOFFSecRel32(Label, /*Offset=*/0);
// FIXME: Make sure we don't overflow the max record size.
OS.EmitCOFFSectionIndex(Label);
@@ -1004,17 +1068,14 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString");
OS.EmitBytes(StringRef(Str.data(), Str.size() + 1));
}
- OS.EmitLabel(AnnotEnd);
+ endSymbolRecord(AnnotEnd);
}
if (SP != nullptr)
emitDebugInfoForUDTs(LocalUDTs);
// We're done with this function.
- OS.AddComment("Record length");
- OS.EmitIntValue(0x0002, 2);
- OS.AddComment("Record kind: S_PROC_ID_END");
- OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2);
+ emitEndSymbolRecord(SymbolKind::S_PROC_ID_END);
}
endCVSubsection(SymbolsEnd);
@@ -1034,21 +1095,8 @@ CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
return DR;
}
-CodeViewDebug::LocalVarDefRange
-CodeViewDebug::createDefRangeGeneral(uint16_t CVRegister, bool InMemory,
- int Offset, bool IsSubfield,
- uint16_t StructOffset) {
- LocalVarDefRange DR;
- DR.InMemory = InMemory;
- DR.DataOffset = Offset;
- DR.IsSubfield = IsSubfield;
- DR.StructOffset = StructOffset;
- DR.CVRegister = CVRegister;
- return DR;
-}
-
void CodeViewDebug::collectVariableInfoFromMFTable(
- DenseSet<InlinedVariable> &Processed) {
+ DenseSet<InlinedEntity> &Processed) {
const MachineFunction &MF = *Asm->MF;
const TargetSubtargetInfo &TSI = MF.getSubtarget();
const TargetFrameLowering *TFI = TSI.getFrameLowering();
@@ -1060,7 +1108,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
"Expected inlined-at fields to agree");
- Processed.insert(InlinedVariable(VI.Var, VI.Loc->getInlinedAt()));
+ Processed.insert(InlinedEntity(VI.Var, VI.Loc->getInlinedAt()));
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
// If variable scope is not found then skip this variable.
@@ -1196,15 +1244,15 @@ void CodeViewDebug::calculateRanges(
}
void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
- DenseSet<InlinedVariable> Processed;
+ DenseSet<InlinedEntity> Processed;
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(Processed);
for (const auto &I : DbgValues) {
- InlinedVariable IV = I.first;
+ InlinedEntity IV = I.first;
if (Processed.count(IV))
continue;
- const DILocalVariable *DIVar = IV.first;
+ const DILocalVariable *DIVar = cast<DILocalVariable>(IV.first);
const DILocation *InlinedAt = IV.second;
// Instruction ranges, specifying where IV is accessible.
@@ -1228,6 +1276,9 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
}
void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
+ const TargetSubtargetInfo &TSI = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
const Function &GV = MF->getFunction();
auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()});
assert(Insertion.second && "function already has info");
@@ -1235,6 +1286,66 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn->FuncId = NextFuncId++;
CurFn->Begin = Asm->getFunctionBegin();
+ // The S_FRAMEPROC record reports the stack size, and how many bytes of
+ // callee-saved registers were used. For targets that don't use a PUSH
+ // instruction (AArch64), this will be zero.
+ CurFn->CSRSize = MFI.getCVBytesOfCalleeSavedRegisters();
+ CurFn->FrameSize = MFI.getStackSize();
+ CurFn->OffsetAdjustment = MFI.getOffsetAdjustment();
+ CurFn->HasStackRealignment = TRI->needsStackRealignment(*MF);
+
+ // For this function S_FRAMEPROC record, figure out which codeview register
+ // will be the frame pointer.
+ CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::None; // None.
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::None; // None.
+ if (CurFn->FrameSize > 0) {
+ if (!TSI.getFrameLowering()->hasFP(*MF)) {
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
+ CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::StackPtr;
+ } else {
+ // If there is an FP, parameters are always relative to it.
+ CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::FramePtr;
+ if (CurFn->HasStackRealignment) {
+ // If the stack needs realignment, locals are relative to SP or VFRAME.
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
+ } else {
+ // Otherwise, locals are relative to EBP, and we probably have VLAs or
+ // other stack adjustments.
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::FramePtr;
+ }
+ }
+ }
+
+ // Compute other frame procedure options.
+ FrameProcedureOptions FPO = FrameProcedureOptions::None;
+ if (MFI.hasVarSizedObjects())
+ FPO |= FrameProcedureOptions::HasAlloca;
+ if (MF->exposesReturnsTwice())
+ FPO |= FrameProcedureOptions::HasSetJmp;
+ // FIXME: Set HasLongJmp if we ever track that info.
+ if (MF->hasInlineAsm())
+ FPO |= FrameProcedureOptions::HasInlineAssembly;
+ if (GV.hasPersonalityFn()) {
+ if (isAsynchronousEHPersonality(
+ classifyEHPersonality(GV.getPersonalityFn())))
+ FPO |= FrameProcedureOptions::HasStructuredExceptionHandling;
+ else
+ FPO |= FrameProcedureOptions::HasExceptionHandling;
+ }
+ if (GV.hasFnAttribute(Attribute::InlineHint))
+ FPO |= FrameProcedureOptions::MarkedInline;
+ if (GV.hasFnAttribute(Attribute::Naked))
+ FPO |= FrameProcedureOptions::Naked;
+ if (MFI.hasStackProtectorIndex())
+ FPO |= FrameProcedureOptions::SecurityChecks;
+ FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U);
+ FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U);
+ if (Asm->TM.getOptLevel() != CodeGenOpt::None && !GV.optForSize() &&
+ !GV.hasFnAttribute(Attribute::OptimizeNone))
+ FPO |= FrameProcedureOptions::OptimizedForSpeed;
+ // FIXME: Set GuardCfg when it is implemented.
+ CurFn->FrameProcOpts = FPO;
+
OS.EmitCVFuncIdDirective(CurFn->FuncId);
// Find the end of the function prolog. First known non-DBG_VALUE and
@@ -1358,6 +1469,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
case dwarf::DW_TAG_union_type:
return lowerTypeUnion(cast<DICompositeType>(Ty));
case dwarf::DW_TAG_unspecified_type:
+ if (Ty->getName() == "decltype(nullptr)")
+ return TypeIndex::NullptrT();
return TypeIndex::None();
default:
// Use the null type index.
@@ -1552,6 +1665,9 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty,
break;
}
+ if (Ty->isObjectPointer())
+ PO |= PointerOptions::Const;
+
PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
return TypeTable.writeLeafType(PR);
}
@@ -1702,49 +1818,54 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
- ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
- ArgTypeIndices.size(), ArgListIndex);
+ FunctionOptions FO = getFunctionOptions(Ty);
+ ProcedureRecord Procedure(ReturnTypeIndex, CC, FO, ArgTypeIndices.size(),
+ ArgListIndex);
return TypeTable.writeLeafType(Procedure);
}
TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
const DIType *ClassTy,
int ThisAdjustment,
- bool IsStaticMethod) {
+ bool IsStaticMethod,
+ FunctionOptions FO) {
// Lower the containing class type.
TypeIndex ClassType = getTypeIndex(ClassTy);
- SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
- for (DITypeRef ArgTypeRef : Ty->getTypeArray())
- ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+ DITypeRefArray ReturnAndArgs = Ty->getTypeArray();
- // MSVC uses type none for variadic argument.
- if (ReturnAndArgTypeIndices.size() > 1 &&
- ReturnAndArgTypeIndices.back() == TypeIndex::Void()) {
- ReturnAndArgTypeIndices.back() = TypeIndex::None();
- }
- TypeIndex ReturnTypeIndex = TypeIndex::Void();
- ArrayRef<TypeIndex> ArgTypeIndices = None;
- if (!ReturnAndArgTypeIndices.empty()) {
- auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices);
- ReturnTypeIndex = ReturnAndArgTypesRef.front();
- ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
- }
+ unsigned Index = 0;
+ SmallVector<TypeIndex, 8> ArgTypeIndices;
+ TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+
+ // If the first argument is a pointer type and this isn't a static method,
+ // treat it as the special 'this' parameter, which is encoded separately from
+ // the arguments.
TypeIndex ThisTypeIndex;
- if (!IsStaticMethod && !ArgTypeIndices.empty()) {
- ThisTypeIndex = ArgTypeIndices.front();
- ArgTypeIndices = ArgTypeIndices.drop_front();
+ if (!IsStaticMethod && ReturnAndArgs.size() > Index) {
+ if (const DIDerivedType *PtrTy =
+ dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index].resolve())) {
+ if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) {
+ ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty);
+ Index++;
+ }
+ }
}
+ while (Index < ReturnAndArgs.size())
+ ArgTypeIndices.push_back(getTypeIndex(ReturnAndArgs[Index++]));
+
+ // MSVC uses type none for variadic argument.
+ if (!ArgTypeIndices.empty() && ArgTypeIndices.back() == TypeIndex::Void())
+ ArgTypeIndices.back() = TypeIndex::None();
+
ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec);
CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
- // TODO: Need to use the correct values for FunctionOptions.
- MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC,
- FunctionOptions::None, ArgTypeIndices.size(),
- ArgListIndex, ThisAdjustment);
+ MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FO,
+ ArgTypeIndices.size(), ArgListIndex, ThisAdjustment);
return TypeTable.writeLeafType(MFR);
}
@@ -1825,12 +1946,20 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
if (ImmediateScope && isa<DICompositeType>(ImmediateScope))
CO |= ClassOptions::Nested;
- // Put the Scoped flag on function-local types.
- for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
- Scope = Scope->getScope().resolve()) {
- if (isa<DISubprogram>(Scope)) {
+ // Put the Scoped flag on function-local types. MSVC puts this flag for enum
+ // type only when it has an immediate function scope. Clang never puts enums
+ // inside DILexicalBlock scopes. Enum types, as generated by clang, are
+ // always in function, class, or file scopes.
+ if (Ty->getTag() == dwarf::DW_TAG_enumeration_type) {
+ if (ImmediateScope && isa<DISubprogram>(ImmediateScope))
CO |= ClassOptions::Scoped;
- break;
+ } else {
+ for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
+ Scope = Scope->getScope().resolve()) {
+ if (isa<DISubprogram>(Scope)) {
+ CO |= ClassOptions::Scoped;
+ break;
+ }
}
}
@@ -1930,6 +2059,7 @@ void CodeViewDebug::clear() {
GlobalUDTs.clear();
TypeIndices.clear();
CompleteTypeIndices.clear();
+ ScopeGlobals.clear();
}
void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
@@ -2275,6 +2405,32 @@ TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
return recordTypeIndexForDINode(Ty, TI, ClassTy);
}
+codeview::TypeIndex
+CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
+ const DISubroutineType *SubroutineTy) {
+ assert(PtrTy->getTag() == dwarf::DW_TAG_pointer_type &&
+ "this type must be a pointer type");
+
+ PointerOptions Options = PointerOptions::None;
+ if (SubroutineTy->getFlags() & DINode::DIFlags::FlagLValueReference)
+ Options = PointerOptions::LValueRefThisPointer;
+ else if (SubroutineTy->getFlags() & DINode::DIFlags::FlagRValueReference)
+ Options = PointerOptions::RValueRefThisPointer;
+
+ // Check if we've already translated this type. If there is no ref qualifier
+ // on the function then we look up this pointer type with no associated class
+ // so that the TypeIndex for the this pointer can be shared with the type
+ // index for other pointers to this class type. If there is a ref qualifier
+ // then we lookup the pointer using the subroutine as the parent type.
+ auto I = TypeIndices.find({PtrTy, SubroutineTy});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ TypeLoweringScope S(*this);
+ TypeIndex TI = lowerTypePointer(PtrTy, Options);
+ return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy);
+}
+
TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
DIType *Ty = TypeRef.resolve();
PointerRecord PR(getTypeIndex(Ty),
@@ -2292,6 +2448,14 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
if (!Ty)
return TypeIndex::Void();
+ // Look through typedefs when getting the complete type index. Call
+ // getTypeIndex on the typdef to ensure that any UDTs are accumulated and are
+ // emitted only once.
+ if (Ty->getTag() == dwarf::DW_TAG_typedef)
+ (void)getTypeIndex(Ty);
+ while (Ty->getTag() == dwarf::DW_TAG_typedef)
+ Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+
// If this is a non-record type, the complete type index is the same as the
// normal type index. Just call getTypeIndex.
switch (Ty->getTag()) {
@@ -2360,35 +2524,40 @@ void CodeViewDebug::emitDeferredCompleteTypes() {
}
}
-void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) {
+void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI,
+ ArrayRef<LocalVariable> Locals) {
// Get the sorted list of parameters and emit them first.
SmallVector<const LocalVariable *, 6> Params;
for (const LocalVariable &L : Locals)
if (L.DIVar->isParameter())
Params.push_back(&L);
- llvm::sort(Params.begin(), Params.end(),
- [](const LocalVariable *L, const LocalVariable *R) {
- return L->DIVar->getArg() < R->DIVar->getArg();
- });
+ llvm::sort(Params, [](const LocalVariable *L, const LocalVariable *R) {
+ return L->DIVar->getArg() < R->DIVar->getArg();
+ });
for (const LocalVariable *L : Params)
- emitLocalVariable(*L);
+ emitLocalVariable(FI, *L);
// Next emit all non-parameters in the order that we found them.
for (const LocalVariable &L : Locals)
if (!L.DIVar->isParameter())
- emitLocalVariable(L);
+ emitLocalVariable(FI, L);
}
-void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
- // LocalSym record, see SymbolRecord.h for more info.
- MCSymbol *LocalBegin = MMI->getContext().createTempSymbol(),
- *LocalEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(LocalEnd, LocalBegin, 2);
- OS.EmitLabel(LocalBegin);
+/// Only call this on endian-specific types like ulittle16_t and little32_t, or
+/// structs composed of them.
+template <typename T>
+static void copyBytesForDefRange(SmallString<20> &BytePrefix,
+ SymbolKind SymKind, const T &DefRangeHeader) {
+ BytePrefix.resize(2 + sizeof(T));
+ ulittle16_t SymKindLE = ulittle16_t(SymKind);
+ memcpy(&BytePrefix[0], &SymKindLE, 2);
+ memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T));
+}
- OS.AddComment("Record kind: S_LOCAL");
- OS.EmitIntValue(unsigned(SymbolKind::S_LOCAL), 2);
+void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
+ const LocalVariable &Var) {
+ // LocalSym record, see SymbolRecord.h for more info.
+ MCSymbol *LocalEnd = beginSymbolRecord(SymbolKind::S_LOCAL);
LocalSymFlags Flags = LocalSymFlags::None;
if (Var.DIVar->isParameter())
@@ -2405,7 +2574,7 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
OS.EmitIntValue(static_cast<uint16_t>(Flags), 2);
// Truncate the name so we won't overflow the record length field.
emitNullTerminatedSymbolName(OS, Var.DIVar->getName());
- OS.EmitLabel(LocalEnd);
+ endSymbolRecord(LocalEnd);
// Calculate the on disk prefix of the appropriate def range record. The
// records and on disk formats are described in SymbolRecords.h. BytePrefix
@@ -2414,45 +2583,53 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
for (const LocalVarDefRange &DefRange : Var.DefRanges) {
BytePrefix.clear();
if (DefRange.InMemory) {
- uint16_t RegRelFlags = 0;
- if (DefRange.IsSubfield) {
- RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
- (DefRange.StructOffset
- << DefRangeRegisterRelSym::OffsetInParentShift);
+ int Offset = DefRange.DataOffset;
+ unsigned Reg = DefRange.CVRegister;
+
+ // 32-bit x86 call sequences often use PUSH instructions, which disrupt
+ // ESP-relative offsets. Use the virtual frame pointer, VFRAME or $T0,
+ // instead. In frames without stack realignment, $T0 will be the CFA.
+ if (RegisterId(Reg) == RegisterId::ESP) {
+ Reg = unsigned(RegisterId::VFRAME);
+ Offset += FI.OffsetAdjustment;
+ }
+
+ // If we can use the chosen frame pointer for the frame and this isn't a
+ // sliced aggregate, use the smaller S_DEFRANGE_FRAMEPOINTER_REL record.
+ // Otherwise, use S_DEFRANGE_REGISTER_REL.
+ EncodedFramePtrReg EncFP = encodeFramePtrReg(RegisterId(Reg), TheCPU);
+ if (!DefRange.IsSubfield && EncFP != EncodedFramePtrReg::None &&
+ (bool(Flags & LocalSymFlags::IsParameter)
+ ? (EncFP == FI.EncodedParamFramePtrReg)
+ : (EncFP == FI.EncodedLocalFramePtrReg))) {
+ little32_t FPOffset = little32_t(Offset);
+ copyBytesForDefRange(BytePrefix, S_DEFRANGE_FRAMEPOINTER_REL, FPOffset);
+ } else {
+ uint16_t RegRelFlags = 0;
+ if (DefRange.IsSubfield) {
+ RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
+ (DefRange.StructOffset
+ << DefRangeRegisterRelSym::OffsetInParentShift);
+ }
+ DefRangeRegisterRelSym::Header DRHdr;
+ DRHdr.Register = Reg;
+ DRHdr.Flags = RegRelFlags;
+ DRHdr.BasePointerOffset = Offset;
+ copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER_REL, DRHdr);
}
- DefRangeRegisterRelSym Sym(S_DEFRANGE_REGISTER_REL);
- Sym.Hdr.Register = DefRange.CVRegister;
- Sym.Hdr.Flags = RegRelFlags;
- Sym.Hdr.BasePointerOffset = DefRange.DataOffset;
- ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL);
- BytePrefix +=
- StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
- BytePrefix +=
- StringRef(reinterpret_cast<const char *>(&Sym.Hdr), sizeof(Sym.Hdr));
} else {
assert(DefRange.DataOffset == 0 && "unexpected offset into register");
if (DefRange.IsSubfield) {
- // Unclear what matters here.
- DefRangeSubfieldRegisterSym Sym(S_DEFRANGE_SUBFIELD_REGISTER);
- Sym.Hdr.Register = DefRange.CVRegister;
- Sym.Hdr.MayHaveNoName = 0;
- Sym.Hdr.OffsetInParent = DefRange.StructOffset;
-
- ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_SUBFIELD_REGISTER);
- BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
- sizeof(SymKind));
- BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
- sizeof(Sym.Hdr));
+ DefRangeSubfieldRegisterSym::Header DRHdr;
+ DRHdr.Register = DefRange.CVRegister;
+ DRHdr.MayHaveNoName = 0;
+ DRHdr.OffsetInParent = DefRange.StructOffset;
+ copyBytesForDefRange(BytePrefix, S_DEFRANGE_SUBFIELD_REGISTER, DRHdr);
} else {
- // Unclear what matters here.
- DefRangeRegisterSym Sym(S_DEFRANGE_REGISTER);
- Sym.Hdr.Register = DefRange.CVRegister;
- Sym.Hdr.MayHaveNoName = 0;
- ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
- BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
- sizeof(SymKind));
- BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
- sizeof(Sym.Hdr));
+ DefRangeRegisterSym::Header DRHdr;
+ DRHdr.Register = DefRange.CVRegister;
+ DRHdr.MayHaveNoName = 0;
+ copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER, DRHdr);
}
}
OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
@@ -2469,15 +2646,7 @@ void CodeViewDebug::emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
/// lexical block scope.
void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
const FunctionInfo& FI) {
- MCSymbol *RecordBegin = MMI->getContext().createTempSymbol(),
- *RecordEnd = MMI->getContext().createTempSymbol();
-
- // Lexical block symbol record.
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(RecordEnd, RecordBegin, 2); // Record Length
- OS.EmitLabel(RecordBegin);
- OS.AddComment("Record kind: S_BLOCK32");
- OS.EmitIntValue(SymbolKind::S_BLOCK32, 2); // Record Kind
+ MCSymbol *RecordEnd = beginSymbolRecord(SymbolKind::S_BLOCK32);
OS.AddComment("PtrParent");
OS.EmitIntValue(0, 4); // PtrParent
OS.AddComment("PtrEnd");
@@ -2490,19 +2659,17 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol
OS.AddComment("Lexical block name");
emitNullTerminatedSymbolName(OS, Block.Name); // Name
- OS.EmitLabel(RecordEnd);
+ endSymbolRecord(RecordEnd);
// Emit variables local to this lexical block.
- emitLocalVariableList(Block.Locals);
+ emitLocalVariableList(FI, Block.Locals);
+ emitGlobalVariableList(Block.Globals);
// Emit lexical blocks contained within this block.
emitLexicalBlockList(Block.Children, FI);
// Close the lexical block scope.
- OS.AddComment("Record length");
- OS.EmitIntValue(2, 2); // Record Length
- OS.AddComment("Record kind: S_END");
- OS.EmitIntValue(SymbolKind::S_END, 2); // Record Kind
+ emitEndSymbolRecord(SymbolKind::S_END);
}
/// Convenience routine for collecting lexical block information for a list
@@ -2510,9 +2677,10 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
void CodeViewDebug::collectLexicalBlockInfo(
SmallVectorImpl<LexicalScope *> &Scopes,
SmallVectorImpl<LexicalBlock *> &Blocks,
- SmallVectorImpl<LocalVariable> &Locals) {
+ SmallVectorImpl<LocalVariable> &Locals,
+ SmallVectorImpl<CVGlobalVariable> &Globals) {
for (LexicalScope *Scope : Scopes)
- collectLexicalBlockInfo(*Scope, Blocks, Locals);
+ collectLexicalBlockInfo(*Scope, Blocks, Locals, Globals);
}
/// Populate the lexical blocks and local variable lists of the parent with
@@ -2520,45 +2688,58 @@ void CodeViewDebug::collectLexicalBlockInfo(
void CodeViewDebug::collectLexicalBlockInfo(
LexicalScope &Scope,
SmallVectorImpl<LexicalBlock *> &ParentBlocks,
- SmallVectorImpl<LocalVariable> &ParentLocals) {
+ SmallVectorImpl<LocalVariable> &ParentLocals,
+ SmallVectorImpl<CVGlobalVariable> &ParentGlobals) {
if (Scope.isAbstractScope())
return;
- auto LocalsIter = ScopeVariables.find(&Scope);
- if (LocalsIter == ScopeVariables.end()) {
- // This scope does not contain variables and can be eliminated.
- collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
- return;
- }
- SmallVectorImpl<LocalVariable> &Locals = LocalsIter->second;
-
+ // Gather information about the lexical scope including local variables,
+ // global variables, and address ranges.
+ bool IgnoreScope = false;
+ auto LI = ScopeVariables.find(&Scope);
+ SmallVectorImpl<LocalVariable> *Locals =
+ LI != ScopeVariables.end() ? &LI->second : nullptr;
+ auto GI = ScopeGlobals.find(Scope.getScopeNode());
+ SmallVectorImpl<CVGlobalVariable> *Globals =
+ GI != ScopeGlobals.end() ? GI->second.get() : nullptr;
const DILexicalBlock *DILB = dyn_cast<DILexicalBlock>(Scope.getScopeNode());
- if (!DILB) {
- // This scope is not a lexical block and can be eliminated, but keep any
- // local variables it contains.
- ParentLocals.append(Locals.begin(), Locals.end());
- collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
- return;
- }
-
const SmallVectorImpl<InsnRange> &Ranges = Scope.getRanges();
- if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second)) {
- // This lexical block scope has too many address ranges to represent in the
- // current CodeView format or does not have a valid address range.
- // Eliminate this lexical scope and promote any locals it contains to the
- // parent scope.
- //
- // For lexical scopes with multiple address ranges you may be tempted to
- // construct a single range covering every instruction where the block is
- // live and everything in between. Unfortunately, Visual Studio only
- // displays variables from the first matching lexical block scope. If the
- // first lexical block contains exception handling code or cold code which
- // is moved to the bottom of the routine creating a single range covering
- // nearly the entire routine, then it will hide all other lexical blocks
- // and the variables they contain.
- //
- ParentLocals.append(Locals.begin(), Locals.end());
- collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
+
+ // Ignore lexical scopes which do not contain variables.
+ if (!Locals && !Globals)
+ IgnoreScope = true;
+
+ // Ignore lexical scopes which are not lexical blocks.
+ if (!DILB)
+ IgnoreScope = true;
+
+ // Ignore scopes which have too many address ranges to represent in the
+ // current CodeView format or do not have a valid address range.
+ //
+ // For lexical scopes with multiple address ranges you may be tempted to
+ // construct a single range covering every instruction where the block is
+ // live and everything in between. Unfortunately, Visual Studio only
+ // displays variables from the first matching lexical block scope. If the
+ // first lexical block contains exception handling code or cold code which
+ // is moved to the bottom of the routine creating a single range covering
+ // nearly the entire routine, then it will hide all other lexical blocks
+ // and the variables they contain.
+ if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second))
+ IgnoreScope = true;
+
+ if (IgnoreScope) {
+ // This scope can be safely ignored and eliminating it will reduce the
+ // size of the debug information. Be sure to collect any variable and scope
+ // information from the this scope or any of its children and collapse them
+ // into the parent scope.
+ if (Locals)
+ ParentLocals.append(Locals->begin(), Locals->end());
+ if (Globals)
+ ParentGlobals.append(Globals->begin(), Globals->end());
+ collectLexicalBlockInfo(Scope.getChildren(),
+ ParentBlocks,
+ ParentLocals,
+ ParentGlobals);
return;
}
@@ -2569,8 +2750,8 @@ void CodeViewDebug::collectLexicalBlockInfo(
if (!BlockInsertion.second)
return;
- // Create a lexical block containing the local variables and collect the
- // the lexical block information for the children.
+ // Create a lexical block containing the variables and collect the the
+ // lexical block information for the children.
const InsnRange &Range = Ranges.front();
assert(Range.first && Range.second);
LexicalBlock &Block = BlockInsertion.first->second;
@@ -2579,9 +2760,15 @@ void CodeViewDebug::collectLexicalBlockInfo(
assert(Block.Begin && "missing label for scope begin");
assert(Block.End && "missing label for scope end");
Block.Name = DILB->getName();
- Block.Locals = std::move(Locals);
+ if (Locals)
+ Block.Locals = std::move(*Locals);
+ if (Globals)
+ Block.Globals = std::move(*Globals);
ParentBlocks.push_back(&Block);
- collectLexicalBlockInfo(Scope.getChildren(), Block.Children, Block.Locals);
+ collectLexicalBlockInfo(Scope.getChildren(),
+ Block.Children,
+ Block.Locals,
+ Block.Globals);
}
void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
@@ -2593,7 +2780,10 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
// Build the lexical block structure to emit for this routine.
if (LexicalScope *CFS = LScopes.getCurrentFunctionScope())
- collectLexicalBlockInfo(*CFS, CurFn->ChildBlocks, CurFn->Locals);
+ collectLexicalBlockInfo(*CFS,
+ CurFn->ChildBlocks,
+ CurFn->Locals,
+ CurFn->Globals);
// Clear the scope and variable information from the map which will not be
// valid after we have finished processing this routine. This also prepares
@@ -2660,30 +2850,57 @@ void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
OS.EmitValueToAlignment(4);
}
+static StringRef getSymbolName(SymbolKind SymKind) {
+ for (const EnumEntry<SymbolKind> &EE : getSymbolTypeNames())
+ if (EE.Value == SymKind)
+ return EE.Name;
+ return "";
+}
+
+MCSymbol *CodeViewDebug::beginSymbolRecord(SymbolKind SymKind) {
+ MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
+ *EndLabel = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
+ OS.EmitLabel(BeginLabel);
+ if (OS.isVerboseAsm())
+ OS.AddComment("Record kind: " + getSymbolName(SymKind));
+ OS.EmitIntValue(unsigned(SymKind), 2);
+ return EndLabel;
+}
+
+void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) {
+ // MSVC does not pad out symbol records to four bytes, but LLVM does to avoid
+ // an extra copy of every symbol record in LLD. This increases object file
+ // size by less than 1% in the clang build, and is compatible with the Visual
+ // C++ linker.
+ OS.EmitValueToAlignment(4);
+ OS.EmitLabel(SymEnd);
+}
+
+void CodeViewDebug::emitEndSymbolRecord(SymbolKind EndKind) {
+ OS.AddComment("Record length");
+ OS.EmitIntValue(2, 2);
+ if (OS.isVerboseAsm())
+ OS.AddComment("Record kind: " + getSymbolName(EndKind));
+ OS.EmitIntValue(unsigned(EndKind), 2); // Record Kind
+}
+
void CodeViewDebug::emitDebugInfoForUDTs(
ArrayRef<std::pair<std::string, const DIType *>> UDTs) {
for (const auto &UDT : UDTs) {
const DIType *T = UDT.second;
assert(shouldEmitUdt(T));
- MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(),
- *UDTRecordEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(UDTRecordEnd, UDTRecordBegin, 2);
- OS.EmitLabel(UDTRecordBegin);
-
- OS.AddComment("Record kind: S_UDT");
- OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2);
-
+ MCSymbol *UDTRecordEnd = beginSymbolRecord(SymbolKind::S_UDT);
OS.AddComment("Type");
OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4);
-
emitNullTerminatedSymbolName(OS, UDT.first);
- OS.EmitLabel(UDTRecordEnd);
+ endSymbolRecord(UDTRecordEnd);
}
}
-void CodeViewDebug::emitDebugInfoForGlobals() {
+void CodeViewDebug::collectGlobalVariableInfo() {
DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *>
GlobalMap;
for (const GlobalVariable &GV : MMI->getModule()->globals()) {
@@ -2696,42 +2913,56 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
for (const MDNode *Node : CUs->operands()) {
const auto *CU = cast<DICompileUnit>(Node);
-
- // First, emit all globals that are not in a comdat in a single symbol
- // substream. MSVC doesn't like it if the substream is empty, so only open
- // it if we have at least one global to emit.
- switchToDebugSectionForSymbol(nullptr);
- MCSymbol *EndLabel = nullptr;
for (const auto *GVE : CU->getGlobalVariables()) {
- if (const auto *GV = GlobalMap.lookup(GVE))
- if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
- if (!EndLabel) {
- OS.AddComment("Symbol subsection for globals");
- EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
- }
- // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
- emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV));
- }
+ const auto *GV = GlobalMap.lookup(GVE);
+ if (!GV || GV->isDeclarationForLinker())
+ continue;
+ const DIGlobalVariable *DIGV = GVE->getVariable();
+ DIScope *Scope = DIGV->getScope();
+ SmallVector<CVGlobalVariable, 1> *VariableList;
+ if (Scope && isa<DILocalScope>(Scope)) {
+ // Locate a global variable list for this scope, creating one if
+ // necessary.
+ auto Insertion = ScopeGlobals.insert(
+ {Scope, std::unique_ptr<GlobalVariableList>()});
+ if (Insertion.second)
+ Insertion.first->second = llvm::make_unique<GlobalVariableList>();
+ VariableList = Insertion.first->second.get();
+ } else if (GV->hasComdat())
+ // Emit this global variable into a COMDAT section.
+ VariableList = &ComdatVariables;
+ else
+ // Emit this globla variable in a single global symbol section.
+ VariableList = &GlobalVariables;
+ CVGlobalVariable CVGV = {DIGV, GV};
+ VariableList->emplace_back(std::move(CVGV));
}
- if (EndLabel)
- endCVSubsection(EndLabel);
+ }
+}
- // Second, emit each global that is in a comdat into its own .debug$S
- // section along with its own symbol substream.
- for (const auto *GVE : CU->getGlobalVariables()) {
- if (const auto *GV = GlobalMap.lookup(GVE)) {
- if (GV->hasComdat()) {
- MCSymbol *GVSym = Asm->getSymbol(GV);
- OS.AddComment("Symbol subsection for " +
- Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
- switchToDebugSectionForSymbol(GVSym);
- EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
- // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
- emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym);
- endCVSubsection(EndLabel);
- }
- }
- }
+void CodeViewDebug::emitDebugInfoForGlobals() {
+ // First, emit all globals that are not in a comdat in a single symbol
+ // substream. MSVC doesn't like it if the substream is empty, so only open
+ // it if we have at least one global to emit.
+ switchToDebugSectionForSymbol(nullptr);
+ if (!GlobalVariables.empty()) {
+ OS.AddComment("Symbol subsection for globals");
+ MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
+ emitGlobalVariableList(GlobalVariables);
+ endCVSubsection(EndLabel);
+ }
+
+ // Second, emit each global that is in a comdat into its own .debug$S
+ // section along with its own symbol substream.
+ for (const CVGlobalVariable &CVGV : ComdatVariables) {
+ MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
+ OS.AddComment("Symbol subsection for " +
+ Twine(GlobalValue::dropLLVMManglingEscape(CVGV.GV->getName())));
+ switchToDebugSectionForSymbol(GVSym);
+ MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
+ // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+ emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
+ endCVSubsection(EndLabel);
}
}
@@ -2747,34 +2978,26 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() {
}
}
+// Emit each global variable in the specified array.
+void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
+ for (const CVGlobalVariable &CVGV : Globals) {
+ MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
+ // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+ emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
+ }
+}
+
void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
const GlobalVariable *GV,
MCSymbol *GVSym) {
- // DataSym record, see SymbolRecord.h for more info.
- // FIXME: Thread local data, etc
- MCSymbol *DataBegin = MMI->getContext().createTempSymbol(),
- *DataEnd = MMI->getContext().createTempSymbol();
- const unsigned FixedLengthOfThisRecord = 12;
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);
- OS.EmitLabel(DataBegin);
- if (DIGV->isLocalToUnit()) {
- if (GV->isThreadLocal()) {
- OS.AddComment("Record kind: S_LTHREAD32");
- OS.EmitIntValue(unsigned(SymbolKind::S_LTHREAD32), 2);
- } else {
- OS.AddComment("Record kind: S_LDATA32");
- OS.EmitIntValue(unsigned(SymbolKind::S_LDATA32), 2);
- }
- } else {
- if (GV->isThreadLocal()) {
- OS.AddComment("Record kind: S_GTHREAD32");
- OS.EmitIntValue(unsigned(SymbolKind::S_GTHREAD32), 2);
- } else {
- OS.AddComment("Record kind: S_GDATA32");
- OS.EmitIntValue(unsigned(SymbolKind::S_GDATA32), 2);
- }
- }
+ // DataSym record, see SymbolRecord.h for more info. Thread local data
+ // happens to have the same format as global data.
+ SymbolKind DataSym = GV->isThreadLocal()
+ ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
+ : SymbolKind::S_GTHREAD32)
+ : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
+ : SymbolKind::S_GDATA32);
+ MCSymbol *DataEnd = beginSymbolRecord(DataSym);
OS.AddComment("Type");
OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
OS.AddComment("DataOffset");
@@ -2782,6 +3005,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
OS.AddComment("Segment");
OS.EmitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
- emitNullTerminatedSymbolName(OS, DIGV->getName(), FixedLengthOfThisRecord);
- OS.EmitLabel(DataEnd);
+ const unsigned LengthOfDataRecord = 12;
+ emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
+ endSymbolRecord(DataEnd);
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 6a0da5f993d0..21557ed1be35 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -14,14 +14,14 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
-#include "DbgValueHistoryCalculator.h"
-#include "DebugHandlerBase.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
@@ -54,6 +54,12 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
BumpPtrAllocator Allocator;
codeview::GlobalTypeTableBuilder TypeTable;
+ /// Whether to emit type record hashes into .debug$H.
+ bool EmitDebugGlobalHashes = false;
+
+ /// The codeview CPU type used by the translation unit.
+ codeview::CPUType TheCPU;
+
/// Represents the most general definition range.
struct LocalVarDefRange {
/// Indicates that variable data is stored in memory relative to the
@@ -85,10 +91,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
};
static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
- static LocalVarDefRange createDefRangeGeneral(uint16_t CVRegister,
- bool InMemory, int Offset,
- bool IsSubfield,
- uint16_t StructOffset);
/// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
struct LocalVariable {
@@ -97,6 +99,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
bool UseReferenceType = false;
};
+ struct CVGlobalVariable {
+ const DIGlobalVariable *DIGV;
+ const GlobalVariable *GV;
+ };
+
struct InlineSite {
SmallVector<LocalVariable, 1> InlinedLocals;
SmallVector<const DILocation *, 1> ChildSites;
@@ -110,6 +117,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
// Combines information from DILexicalBlock and LexicalScope.
struct LexicalBlock {
SmallVector<LocalVariable, 1> Locals;
+ SmallVector<CVGlobalVariable, 1> Globals;
SmallVector<LexicalBlock *, 1> Children;
const MCSymbol *Begin;
const MCSymbol *End;
@@ -132,6 +140,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<const DILocation *, 1> ChildSites;
SmallVector<LocalVariable, 1> Locals;
+ SmallVector<CVGlobalVariable, 1> Globals;
std::unordered_map<const DILexicalBlockBase*, LexicalBlock> LexicalBlocks;
@@ -144,6 +153,33 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
const MCSymbol *End = nullptr;
unsigned FuncId = 0;
unsigned LastFileId = 0;
+
+ /// Number of bytes allocated in the prologue for all local stack objects.
+ unsigned FrameSize = 0;
+
+ /// Number of bytes of parameters on the stack.
+ unsigned ParamSize = 0;
+
+ /// Number of bytes pushed to save CSRs.
+ unsigned CSRSize = 0;
+
+ /// Adjustment to apply on x86 when using the VFRAME frame pointer.
+ int OffsetAdjustment = 0;
+
+ /// Two-bit value indicating which register is the designated frame pointer
+ /// register for local variables. Included in S_FRAMEPROC.
+ codeview::EncodedFramePtrReg EncodedLocalFramePtrReg =
+ codeview::EncodedFramePtrReg::None;
+
+ /// Two-bit value indicating which register is the designated frame pointer
+ /// register for stack parameters. Included in S_FRAMEPROC.
+ codeview::EncodedFramePtrReg EncodedParamFramePtrReg =
+ codeview::EncodedFramePtrReg::None;
+
+ codeview::FrameProcedureOptions FrameProcOpts;
+
+ bool HasStackRealignment = false;
+
bool HaveLineInfo = false;
};
FunctionInfo *CurFn = nullptr;
@@ -154,6 +190,17 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
// and LexicalBlocks.
DenseMap<const LexicalScope *, SmallVector<LocalVariable, 1>> ScopeVariables;
+ // Map to separate global variables according to the lexical scope they
+ // belong in. A null local scope represents the global scope.
+ typedef SmallVector<CVGlobalVariable, 1> GlobalVariableList;
+ DenseMap<const DIScope*, std::unique_ptr<GlobalVariableList> > ScopeGlobals;
+
+ // Array of global variables which need to be emitted into a COMDAT section.
+ SmallVector<CVGlobalVariable, 1> ComdatVariables;
+
+ // Array of non-COMDAT global variables.
+ SmallVector<CVGlobalVariable, 1> GlobalVariables;
+
/// The set of comdat .debug$S sections that we've seen so far. Each section
/// must start with a magic version number that must only be emitted once.
/// This set tracks which sections we've already opened.
@@ -249,6 +296,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitCompilerInformation();
+ void emitBuildInfo();
+
void emitInlineeLinesSubsection();
void emitDebugInfoForThunk(const Function *GV,
@@ -257,13 +306,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
- void emitDebugInfoForGlobals();
-
void emitDebugInfoForRetainedTypes();
void
emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs);
+ void emitDebugInfoForGlobals();
+ void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
const GlobalVariable *GV, MCSymbol *GVSym);
@@ -271,36 +320,49 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// Returns an end label for use with endCVSubsection when the subsection is
/// finished.
MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind);
-
void endCVSubsection(MCSymbol *EndLabel);
+ /// Opens a symbol record of the given kind. Returns an end label for use with
+ /// endSymbolRecord.
+ MCSymbol *beginSymbolRecord(codeview::SymbolKind Kind);
+ void endSymbolRecord(MCSymbol *SymEnd);
+
+ /// Emits an S_END, S_INLINESITE_END, or S_PROC_ID_END record. These records
+ /// are empty, so we emit them with a simpler assembly sequence that doesn't
+ /// involve labels.
+ void emitEndSymbolRecord(codeview::SymbolKind EndKind);
+
void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt,
const InlineSite &Site);
- using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
+ using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+ void collectGlobalVariableInfo();
void collectVariableInfo(const DISubprogram *SP);
- void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed);
+ void collectVariableInfoFromMFTable(DenseSet<InlinedEntity> &Processed);
// Construct the lexical block tree for a routine, pruning emptpy lexical
// scopes, and populate it with local variables.
void collectLexicalBlockInfo(SmallVectorImpl<LexicalScope *> &Scopes,
SmallVectorImpl<LexicalBlock *> &Blocks,
- SmallVectorImpl<LocalVariable> &Locals);
+ SmallVectorImpl<LocalVariable> &Locals,
+ SmallVectorImpl<CVGlobalVariable> &Globals);
void collectLexicalBlockInfo(LexicalScope &Scope,
SmallVectorImpl<LexicalBlock *> &ParentBlocks,
- SmallVectorImpl<LocalVariable> &ParentLocals);
+ SmallVectorImpl<LocalVariable> &ParentLocals,
+ SmallVectorImpl<CVGlobalVariable> &ParentGlobals);
/// Records information about a local variable in the appropriate scope. In
/// particular, locals from inlined code live inside the inlining site.
void recordLocalVariable(LocalVariable &&Var, const LexicalScope *LS);
/// Emits local variables in the appropriate order.
- void emitLocalVariableList(ArrayRef<LocalVariable> Locals);
+ void emitLocalVariableList(const FunctionInfo &FI,
+ ArrayRef<LocalVariable> Locals);
/// Emits an S_LOCAL record and its associated defined ranges.
- void emitLocalVariable(const LocalVariable &Var);
+ void emitLocalVariable(const FunctionInfo &FI, const LocalVariable &Var);
/// Emits a sequence of lexical block scopes and their children.
void emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
@@ -314,6 +376,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
DITypeRef ClassTyRef = DITypeRef());
+ codeview::TypeIndex
+ getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
+ const DISubroutineType *SubroutineTy);
+
codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef);
codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
@@ -340,10 +406,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
- codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
- const DIType *ClassTy,
- int ThisAdjustment,
- bool IsStaticMethod);
+ codeview::TypeIndex lowerTypeMemberFunction(
+ const DISubroutineType *Ty, const DIType *ClassTy, int ThisAdjustment,
+ bool IsStaticMethod,
+ codeview::FunctionOptions FO = codeview::FunctionOptions::None);
codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty);
codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty);
codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty);
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 570424a79c81..e27659494f08 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -414,6 +414,8 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_GNU_addr_index:
case dwarf::DW_FORM_ref_udata:
case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_addrx:
+ case dwarf::DW_FORM_rnglistx:
case dwarf::DW_FORM_udata:
Asm->EmitULEB128(Integer);
return;
@@ -440,6 +442,8 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_GNU_addr_index:
case dwarf::DW_FORM_ref_udata:
case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_addrx:
+ case dwarf::DW_FORM_rnglistx:
case dwarf::DW_FORM_udata:
return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata:
@@ -461,7 +465,7 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->EmitDebugThreadLocal(Expr, SizeOf(AP, Form));
+ AP->EmitDebugValue(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
@@ -585,8 +589,7 @@ void DIEString::print(raw_ostream &O) const {
//===----------------------------------------------------------------------===//
void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_string) {
- for (char ch : S)
- AP->emitInt8(ch);
+ AP->OutStreamer->EmitBytes(S);
AP->emitInt8(0);
return;
}
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 25518a339c61..09867822c30a 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp --------------===//
+//===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "DbgValueHistoryCalculator.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -42,7 +42,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
}
-void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
+void DbgValueHistoryMap::startInstrRange(InlinedEntity Var,
const MachineInstr &MI) {
// Instruction range should start with a DBG_VALUE instruction for the
// variable.
@@ -57,7 +57,7 @@ void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
Ranges.push_back(std::make_pair(&MI, nullptr));
}
-void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
+void DbgValueHistoryMap::endInstrRange(InlinedEntity Var,
const MachineInstr &MI) {
auto &Ranges = VarInstrRanges[Var];
// Verify that the current instruction range is not yet closed.
@@ -68,7 +68,7 @@ void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
Ranges.back().second = &MI;
}
-unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
+unsigned DbgValueHistoryMap::getRegisterForVar(InlinedEntity Var) const {
const auto &I = VarInstrRanges.find(Var);
if (I == VarInstrRanges.end())
return 0;
@@ -78,17 +78,22 @@ unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
return isDescribedByReg(*Ranges.back().first);
}
+void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
+ assert(MI.isDebugLabel() && "not a DBG_LABEL");
+ LabelInstr[Label] = &MI;
+}
+
namespace {
// Maps physreg numbers to the variables they describe.
-using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
-using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>;
+using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>;
} // end anonymous namespace
// Claim that @Var is not described by @RegNo anymore.
static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
- InlinedVariable Var) {
+ InlinedEntity Var) {
const auto &I = RegVars.find(RegNo);
assert(RegNo != 0U && I != RegVars.end());
auto &VarSet = I->second;
@@ -102,7 +107,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
// Claim that @Var is now described by @RegNo.
static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
- InlinedVariable Var) {
+ InlinedEntity Var) {
assert(RegNo != 0U);
auto &VarSet = RegVars[RegNo];
assert(!is_contained(VarSet, Var));
@@ -187,9 +192,10 @@ static void collectChangingRegs(const MachineFunction *MF,
}
}
-void llvm::calculateDbgValueHistory(const MachineFunction *MF,
- const TargetRegisterInfo *TRI,
- DbgValueHistoryMap &Result) {
+void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &DbgValues,
+ DbgLabelInstrMap &DbgLabels) {
BitVector ChangingRegs(TRI->getNumRegs());
collectChangingRegs(MF, TRI, ChangingRegs);
@@ -210,14 +216,14 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
// If this is a virtual register, only clobber it since it doesn't
// have aliases.
if (TRI->isVirtualRegister(MO.getReg()))
- clobberRegisterUses(RegVars, MO.getReg(), Result, MI);
+ clobberRegisterUses(RegVars, MO.getReg(), DbgValues, MI);
// If this is a register def operand, it may end a debug value
// range.
else {
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
++AI)
if (ChangingRegs.test(*AI))
- clobberRegisterUses(RegVars, *AI, Result, MI);
+ clobberRegisterUses(RegVars, *AI, DbgValues, MI);
}
} else if (MO.isRegMask()) {
// If this is a register mask operand, clobber all debug values in
@@ -226,7 +232,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
// Don't consider SP to be clobbered by register masks.
if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
MO.clobbersPhysReg(I)) {
- clobberRegisterUses(RegVars, I, Result, MI);
+ clobberRegisterUses(RegVars, I, DbgValues, MI);
}
}
}
@@ -234,26 +240,34 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
continue;
}
- // Skip DBG_LABEL instructions.
- if (MI.isDebugLabel())
- continue;
-
- assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
- // Use the base variable (without any DW_OP_piece expressions)
- // as index into History. The full variables including the
- // piece expressions are attached to the MI.
- const DILocalVariable *RawVar = MI.getDebugVariable();
- assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
- "Expected inlined-at fields to agree");
- InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
-
- if (unsigned PrevReg = Result.getRegisterForVar(Var))
- dropRegDescribedVar(RegVars, PrevReg, Var);
-
- Result.startInstrRange(Var, MI);
-
- if (unsigned NewReg = isDescribedByReg(MI))
- addRegDescribedVar(RegVars, NewReg, Var);
+ if (MI.isDebugValue()) {
+ assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
+ // Use the base variable (without any DW_OP_piece expressions)
+ // as index into History. The full variables including the
+ // piece expressions are attached to the MI.
+ const DILocalVariable *RawVar = MI.getDebugVariable();
+ assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt());
+
+ if (unsigned PrevReg = DbgValues.getRegisterForVar(Var))
+ dropRegDescribedVar(RegVars, PrevReg, Var);
+
+ DbgValues.startInstrRange(Var, MI);
+
+ if (unsigned NewReg = isDescribedByReg(MI))
+ addRegDescribedVar(RegVars, NewReg, Var);
+ } else if (MI.isDebugLabel()) {
+ assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!");
+ const DILabel *RawLabel = MI.getDebugLabel();
+ assert(RawLabel->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ // When collecting debug information for labels, there is no MCSymbol
+ // generated for it. So, we keep MachineInstr in DbgLabels in order
+ // to query MCSymbol afterward.
+ InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt());
+ DbgLabels.addInstr(L, MI);
+ }
}
// Make sure locations for register-described variables are valid only
@@ -264,7 +278,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
auto CurElem = I++; // CurElem can be erased below.
if (TRI->isVirtualRegister(CurElem->first) ||
ChangingRegs.test(CurElem->first))
- clobberRegisterUses(RegVars, CurElem, Result, MBB.back());
+ clobberRegisterUses(RegVars, CurElem, DbgValues, MBB.back());
}
}
}
@@ -274,10 +288,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
dbgs() << "DbgValueHistoryMap:\n";
for (const auto &VarRangePair : *this) {
- const InlinedVariable &Var = VarRangePair.first;
+ const InlinedEntity &Var = VarRangePair.first;
const InstrRanges &Ranges = VarRangePair.second;
- const DILocalVariable *LocalVar = Var.first;
+ const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first);
const DILocation *Location = Var.second;
dbgs() << " - " << LocalVar->getName() << " at ";
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
deleted file mode 100644
index a262cb38b175..000000000000
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
-
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include <utility>
-
-namespace llvm {
-
-class DILocalVariable;
-class MachineFunction;
-class MachineInstr;
-class TargetRegisterInfo;
-
-// For each user variable, keep a list of instruction ranges where this variable
-// is accessible. The variables are listed in order of appearance.
-class DbgValueHistoryMap {
- // Each instruction range starts with a DBG_VALUE instruction, specifying the
- // location of a variable, which is assumed to be valid until the end of the
- // range. If end is not specified, location is valid until the start
- // instruction of the next instruction range, or until the end of the
- // function.
-public:
- using InstrRange = std::pair<const MachineInstr *, const MachineInstr *>;
- using InstrRanges = SmallVector<InstrRange, 4>;
- using InlinedVariable =
- std::pair<const DILocalVariable *, const DILocation *>;
- using InstrRangesMap = MapVector<InlinedVariable, InstrRanges>;
-
-private:
- InstrRangesMap VarInstrRanges;
-
-public:
- void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
- void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
-
- // Returns register currently describing @Var. If @Var is currently
- // unaccessible or is not described by a register, returns 0.
- unsigned getRegisterForVar(InlinedVariable Var) const;
-
- bool empty() const { return VarInstrRanges.empty(); }
- void clear() { VarInstrRanges.clear(); }
- InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); }
- InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const;
-#endif
-};
-
-void calculateDbgValueHistory(const MachineFunction *MF,
- const TargetRegisterInfo *TRI,
- DbgValueHistoryMap &Result);
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 82e14dc13cb1..551cd36d1984 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "DebugHandlerBase.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -125,6 +125,21 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
return LabelsAfterInsn.lookup(MI);
}
+// Return the function-local offset of an instruction.
+const MCExpr *
+DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) {
+ MCContext &MC = Asm->OutContext;
+
+ MCSymbol *Start = Asm->getFunctionBegin();
+ const auto *StartRef = MCSymbolRefExpr::create(Start, MC);
+
+ MCSymbol *AfterInsn = getLabelAfterInsn(MI);
+ assert(AfterInsn && "Expected label after instruction");
+ const auto *AfterRef = MCSymbolRefExpr::create(AfterInsn, MC);
+
+ return MCBinaryExpr::createSub(AfterRef, StartRef, MC);
+}
+
/// If this type is derived from a base type then return base type size.
uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
DIType *Ty = TyRef.resolve();
@@ -190,8 +205,9 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
// Calculate history for local variables.
assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");
- calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
- DbgValues);
+ assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!");
+ calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
+ DbgValues, DbgLabels);
LLVM_DEBUG(DbgValues.dump());
// Request labels for the full history.
@@ -229,6 +245,12 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
}
}
+ // Ensure there is a symbol before DBG_LABEL.
+ for (const auto &I : DbgLabels) {
+ const MachineInstr *MI = I.second;
+ requestLabelBeforeInsn(MI);
+ }
+
PrevInstLoc = DebugLoc();
PrevLabel = Asm->getFunctionBegin();
beginFunctionImpl(MF);
@@ -296,6 +318,7 @@ void DebugHandlerBase::endFunction(const MachineFunction *MF) {
if (hasDebugInfo(MMI, MF))
endFunctionImpl(MF);
DbgValues.clear();
+ DbgLabels.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
}
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
deleted file mode 100644
index 1ccefe32be75..000000000000
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ /dev/null
@@ -1,131 +0,0 @@
-//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h --------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Common functionality for different debug information format backends.
-// LLVM currently supports DWARF and CodeView.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
-
-#include "AsmPrinterHandler.h"
-#include "DbgValueHistoryCalculator.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-
-namespace llvm {
-
-class AsmPrinter;
-class MachineInstr;
-class MachineModuleInfo;
-
-/// Represents the location at which a variable is stored.
-struct DbgVariableLocation {
- /// Base register.
- unsigned Register;
-
- /// Chain of offsetted loads necessary to load the value if it lives in
- /// memory. Every load except for the last is pointer-sized.
- SmallVector<int64_t, 1> LoadChain;
-
- /// Present if the location is part of a larger variable.
- llvm::Optional<llvm::DIExpression::FragmentInfo> FragmentInfo;
-
- /// Extract a VariableLocation from a MachineInstr.
- /// This will only work if Instruction is a debug value instruction
- /// and the associated DIExpression is in one of the supported forms.
- /// If these requirements are not met, the returned Optional will not
- /// have a value.
- static Optional<DbgVariableLocation>
- extractFromMachineInstruction(const MachineInstr &Instruction);
-};
-
-/// Base class for debug information backends. Common functionality related to
-/// tracking which variables and scopes are alive at a given PC live here.
-class DebugHandlerBase : public AsmPrinterHandler {
-protected:
- DebugHandlerBase(AsmPrinter *A);
-
- /// Target of debug info emission.
- AsmPrinter *Asm;
-
- /// Collected machine module information.
- MachineModuleInfo *MMI;
-
- /// Previous instruction's location information. This is used to
- /// determine label location to indicate scope boundaries in debug info.
- /// We track the previous instruction's source location (if not line 0),
- /// whether it was a label, and its parent BB.
- DebugLoc PrevInstLoc;
- MCSymbol *PrevLabel = nullptr;
- const MachineBasicBlock *PrevInstBB = nullptr;
-
- /// This location indicates end of function prologue and beginning of
- /// function body.
- DebugLoc PrologEndLoc;
-
- /// If nonnull, stores the current machine instruction we're processing.
- const MachineInstr *CurMI = nullptr;
-
- LexicalScopes LScopes;
-
- /// History of DBG_VALUE and clobber instructions for each user
- /// variable. Variables are listed in order of appearance.
- DbgValueHistoryMap DbgValues;
-
- /// Maps instruction with label emitted before instruction.
- /// FIXME: Make this private from DwarfDebug, we have the necessary accessors
- /// for it.
- DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
-
- /// Maps instruction with label emitted after instruction.
- DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
-
- /// Indentify instructions that are marking the beginning of or
- /// ending of a scope.
- void identifyScopeMarkers();
-
- /// Ensure that a label will be emitted before MI.
- void requestLabelBeforeInsn(const MachineInstr *MI) {
- LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
- }
-
- /// Ensure that a label will be emitted after MI.
- void requestLabelAfterInsn(const MachineInstr *MI) {
- LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
- }
-
- virtual void beginFunctionImpl(const MachineFunction *MF) = 0;
- virtual void endFunctionImpl(const MachineFunction *MF) = 0;
- virtual void skippedNonDebugFunction() {}
-
- // AsmPrinterHandler overrides.
-public:
- void beginInstruction(const MachineInstr *MI) override;
- void endInstruction() override;
-
- void beginFunction(const MachineFunction *MF) override;
- void endFunction(const MachineFunction *MF) override;
-
- /// Return Label preceding the instruction.
- MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
-
- /// Return Label immediately following the instruction.
- MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
-
- /// If this type is derived from a base type then return base type size.
- static uint64_t getBaseTypeSize(const DITypeRef TyRef);
-};
-
-}
-
-#endif
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index ac49657b68fa..befa4b941c8d 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -139,7 +139,7 @@ public:
// Sort the pieces by offset.
// Remove any duplicate entries by dropping all but the first.
void sortUniqueValues() {
- llvm::sort(Values.begin(), Values.end());
+ llvm::sort(Values);
Values.erase(
std::unique(
Values.begin(), Values.end(), [](const Value &A, const Value &B) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 32271a0ef24a..1dca3f0fce5b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -69,14 +69,16 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
// pool from the skeleton - maybe even in non-fission (possibly fewer
// relocations by sharing them in the pool, but we have other ideas about how
// to reduce the number of relocations as well/instead).
- if (!DD->useSplitDwarf() || !Skeleton)
+ if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
return addLocalLabelAddress(Die, Attribute, Label);
if (Label)
DD->addArangeLabel(SymbolCU(this, Label));
unsigned idx = DD->getAddressPool().getIndex(Label);
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_GNU_addr_index,
+ Die.addValue(DIEValueAllocator, Attribute,
+ DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
+ : dwarf::DW_FORM_GNU_addr_index,
DIEInteger(idx));
}
@@ -160,6 +162,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
+ if (MDTuple *TP = GV->getTemplateParams())
+ addTemplateParams(*VariableDIE, DINodeArray(TP));
+
// Add location.
bool addToAccelTable = false;
DIELoc *Loc = nullptr;
@@ -186,6 +191,10 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
if (!Global && (!Expr || !Expr->isConstant()))
continue;
+ if (Global && Global->isThreadLocal() &&
+ !Asm->getObjFileLowering().supportDebugThreadLocalLocation())
+ continue;
+
if (!Loc) {
addToAccelTable = true;
Loc = new (DIEValueAllocator) DIELoc;
@@ -245,13 +254,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
addLinkageName(*VariableDIE, GV->getLinkageName());
if (addToAccelTable) {
- DD->addAccelName(GV->getName(), *VariableDIE);
+ DD->addAccelName(*CUNode, GV->getName(), *VariableDIE);
// If the linkage name is different than the name, go ahead and output
// that as well into the name table.
if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() &&
DD->useAllLinkageNames())
- DD->addAccelName(GV->getLinkageName(), *VariableDIE);
+ DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE);
}
return VariableDIE;
@@ -268,6 +277,7 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
(&CURanges.back().getEnd()->getSection() !=
&Range.getEnd()->getSection())) {
CURanges.push_back(Range);
+ DD->addSectionLabel(Range.getStart());
return;
}
@@ -275,6 +285,9 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
}
void DwarfCompileUnit::initStmtList() {
+ if (CUNode->isDebugDirectivesOnly())
+ return;
+
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym;
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -341,7 +354,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_subprogram nodes.
- DD->addSubprogramNames(SP, *SPDie);
+ DD->addSubprogramNames(*CUNode, SP, *SPDie);
return *SPDie;
}
@@ -412,24 +425,29 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
? TLOF.getDwarfRnglistsSection()->getBeginSymbol()
: TLOF.getDwarfRangesSection()->getBeginSymbol();
- RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));
+ HasRangeLists = true;
+
+ // Add the range list to the set of ranges to be emitted.
+ auto IndexAndList =
+ (DD->getDwarfVersion() < 5 && Skeleton ? Skeleton->DU : DU)
+ ->addRange(*(Skeleton ? Skeleton : this), std::move(Range));
+
+ uint32_t Index = IndexAndList.first;
+ auto &List = *IndexAndList.second;
// Under fission, ranges are specified by constant offsets relative to the
// CU's DW_AT_GNU_ranges_base.
// FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under
// fission until we support the forms using the .debug_addr section
// (DW_RLE_startx_endx etc.).
- if (isDwoUnit()) {
- if (DD->getDwarfVersion() < 5)
- addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
- RangeSectionSym);
- } else {
+ if (DD->getDwarfVersion() >= 5)
+ addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index);
+ else if (isDwoUnit())
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ else
addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
RangeSectionSym);
- }
-
- // Add the range list to the set of ranges to be emitted.
- (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List));
}
void DwarfCompileUnit::attachRangesOrLowHighPC(
@@ -479,7 +497,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
- DD->addSubprogramNames(InlinedSP, *ScopeDIE);
+ DD->addSubprogramNames(*CUNode, InlinedSP, *ScopeDIE);
return ScopeDIE;
}
@@ -506,6 +524,18 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) {
return D;
}
+DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL,
+ const LexicalScope &Scope) {
+ auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag());
+ insertDIE(DL.getLabel(), LabelDie);
+ DL.setDIE(*LabelDie);
+
+ if (Scope.isAbstractScope())
+ applyLabelAttributes(DL, *LabelDie);
+
+ return LabelDie;
+}
+
DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
bool Abstract) {
// Define variable debug information entry.
@@ -699,13 +729,17 @@ DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
if (HasNonScopeChildren)
*HasNonScopeChildren = !Children.empty();
+ for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
+ Children.push_back(constructLabelDIE(*DL, *Scope));
+
for (LexicalScope *LS : Scope->getChildren())
constructScopeDIE(LS, Children);
return ObjectPointer;
}
-void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) {
+DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
+ LexicalScope *Scope) {
DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
if (Scope) {
@@ -728,6 +762,8 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, Lexi
!includeMinimalInlineScopes())
ScopeDIE.addChild(
DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters));
+
+ return ScopeDIE;
}
DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
@@ -782,6 +818,32 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
+DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
+ const DISubprogram &CalleeSP,
+ bool IsTail,
+ const MCExpr *PCOffset) {
+ // Insert a call site entry DIE within ScopeDIE.
+ DIE &CallSiteDIE =
+ createAndAddDIE(dwarf::DW_TAG_call_site, ScopeDIE, nullptr);
+
+ // For the purposes of showing tail call frames in backtraces, a key piece of
+ // information is DW_AT_call_origin, a pointer to the callee DIE.
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(&CalleeSP);
+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
+ addDIEEntry(CallSiteDIE, dwarf::DW_AT_call_origin, *CalleeDIE);
+
+ if (IsTail) {
+ // Attach DW_AT_call_tail_call to tail calls for standards compliance.
+ addFlag(CallSiteDIE, dwarf::DW_AT_call_tail_call);
+ } else {
+ // Attach the return PC to allow the debugger to disambiguate call paths
+ // from one function to another.
+ assert(PCOffset && "Missing return PC information for a call");
+ addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset);
+ }
+ return CallSiteDIE;
+}
+
DIE *DwarfCompileUnit::constructImportedEntityDIE(
const DIImportedEntity *Module) {
DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
@@ -824,40 +886,51 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
}
}
-void DwarfCompileUnit::finishVariableDefinition(const DbgVariable &Var) {
- DbgVariable *AbsVar = getExistingAbstractVariable(
- InlinedVariable(Var.getVariable(), Var.getInlinedAt()));
- auto *VariableDie = Var.getDIE();
- if (AbsVar && AbsVar->getDIE()) {
- addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
- *AbsVar->getDIE());
- } else
- applyVariableAttributes(Var, *VariableDie);
-}
+void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) {
+ DbgEntity *AbsEntity = getExistingAbstractEntity(Entity->getEntity());
-DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(InlinedVariable IV) {
- const DILocalVariable *Cleansed;
- return getExistingAbstractVariable(IV, Cleansed);
+ auto *Die = Entity->getDIE();
+ /// Label may be used to generate DW_AT_low_pc, so put it outside
+ /// if/else block.
+ const DbgLabel *Label = nullptr;
+ if (AbsEntity && AbsEntity->getDIE()) {
+ addDIEEntry(*Die, dwarf::DW_AT_abstract_origin, *AbsEntity->getDIE());
+ Label = dyn_cast<const DbgLabel>(Entity);
+ } else {
+ if (const DbgVariable *Var = dyn_cast<const DbgVariable>(Entity))
+ applyVariableAttributes(*Var, *Die);
+ else if ((Label = dyn_cast<const DbgLabel>(Entity)))
+ applyLabelAttributes(*Label, *Die);
+ else
+ llvm_unreachable("DbgEntity must be DbgVariable or DbgLabel.");
+ }
+
+ if (Label)
+ if (const auto *Sym = Label->getSymbol())
+ addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym);
}
-// Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(
- InlinedVariable IV, const DILocalVariable *&Cleansed) {
- // More then one inlined variable corresponds to one abstract variable.
- Cleansed = IV.first;
- auto &AbstractVariables = getAbstractVariables();
- auto I = AbstractVariables.find(Cleansed);
- if (I != AbstractVariables.end())
+DbgEntity *DwarfCompileUnit::getExistingAbstractEntity(const DINode *Node) {
+ auto &AbstractEntities = getAbstractEntities();
+ auto I = AbstractEntities.find(Node);
+ if (I != AbstractEntities.end())
return I->second.get();
return nullptr;
}
-void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var,
- LexicalScope *Scope) {
+void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
+ LexicalScope *Scope) {
assert(Scope && Scope->isAbstractScope());
- auto AbsDbgVariable = llvm::make_unique<DbgVariable>(Var, /* IA */ nullptr);
- DU->addScopeVariable(Scope, AbsDbgVariable.get());
- getAbstractVariables()[Var] = std::move(AbsDbgVariable);
+ auto &Entity = getAbstractEntities()[Node];
+ if (isa<const DILocalVariable>(Node)) {
+ Entity = llvm::make_unique<DbgVariable>(
+ cast<const DILocalVariable>(Node), nullptr /* IA */);;
+ DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
+ } else if (isa<const DILabel>(Node)) {
+ Entity = llvm::make_unique<DbgLabel>(
+ cast<const DILabel>(Node), nullptr /* IA */);
+ DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get()));
+ }
}
void DwarfCompileUnit::emitHeader(bool UseOffsets) {
@@ -876,13 +949,18 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
}
bool DwarfCompileUnit::hasDwarfPubSections() const {
- // Opting in to GNU Pubnames/types overrides the default to ensure these are
- // generated for things like Gold's gdb_index generation.
- if (CUNode->getGnuPubnames())
+ switch (CUNode->getNameTableKind()) {
+ case DICompileUnit::DebugNameTableKind::None:
+ return false;
+ // Opting in to GNU Pubnames/types overrides the default to ensure these are
+ // generated for things like Gold's gdb_index generation.
+ case DICompileUnit::DebugNameTableKind::GNU:
return true;
-
- return DD->tuneForGDB() && DD->usePubSections() &&
- !includeMinimalInlineScopes();
+ case DICompileUnit::DebugNameTableKind::Default:
+ return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
+ !CUNode->isDebugDirectivesOnly();
+ }
+ llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum");
}
/// addGlobalName - Add a new global name to the compile unit.
@@ -939,8 +1017,6 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
"block byref variable without a complex expression");
if (DV.hasComplexAddress())
addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
- else if (DV.isBlockByrefVariable())
- addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
else
addAddress(Die, dwarf::DW_AT_location, Location);
}
@@ -1012,12 +1088,27 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
addFlag(VariableDie, dwarf::DW_AT_artificial);
}
+void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label,
+ DIE &LabelDie) {
+ StringRef Name = Label.getName();
+ if (!Name.empty())
+ addString(LabelDie, dwarf::DW_AT_name, Name);
+ const auto *DILabel = Label.getLabel();
+ addSourceLine(LabelDie, DILabel);
+}
+
/// Add a Dwarf expression attribute data and value.
void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
const MCExpr *Expr) {
Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr));
}
+void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute,
+ const MCExpr *Expr) {
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
+ DIEExpr(Expr));
+}
+
void DwarfCompileUnit::applySubprogramAttributesToDefinition(
const DISubprogram *SP, DIE &SPDie) {
auto *SPDecl = SP->getDeclaration();
@@ -1034,3 +1125,12 @@ bool DwarfCompileUnit::includeMinimalInlineScopes() const {
return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
(DD->useSplitDwarf() && !Skeleton);
}
+
+void DwarfCompileUnit::addAddrTableBase() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ MCSymbol *Label = DD->getAddressPool().getLabel();
+ addSectionLabel(getUnitDie(),
+ getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
+ : dwarf::DW_AT_GNU_addr_base,
+ Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 51e1558fe4a3..9ec22f68c12f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -14,7 +14,6 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
-#include "DbgValueHistoryCalculator.h"
#include "DwarfDebug.h"
#include "DwarfUnit.h"
#include "llvm/ADT/ArrayRef.h"
@@ -23,6 +22,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -44,6 +44,7 @@ class MDNode;
class DwarfCompileUnit final : public DwarfUnit {
/// A numeric ID unique among all CUs in the module
unsigned UniqueID;
+ bool HasRangeLists = false;
/// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
/// the need to search for it in applyStmtList.
@@ -69,10 +70,6 @@ class DwarfCompileUnit final : public DwarfUnit {
/// GlobalTypes - A map of globally visible types for this unit.
StringMap<const DIE *> GlobalTypes;
- // List of range lists for a given compile unit, separate from the ranges for
- // the CU itself.
- SmallVector<RangeSpanList, 1> CURangeLists;
-
// List of ranges for a given compile unit.
SmallVector<RangeSpan, 2> CURanges;
@@ -81,7 +78,7 @@ class DwarfCompileUnit final : public DwarfUnit {
const MCSymbol *BaseAddress = nullptr;
DenseMap<const MDNode *, DIE *> AbstractSPDies;
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
/// DWO ID for correlating skeleton and split units.
uint64_t DWOId = 0;
@@ -98,16 +95,17 @@ class DwarfCompileUnit final : public DwarfUnit {
return DU->getAbstractSPDies();
}
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
- return AbstractVariables;
- return DU->getAbstractVariables();
+ return AbstractEntities;
+ return DU->getAbstractEntities();
}
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
+ bool hasRangeLists() const { return HasRangeLists; }
unsigned getUniqueID() const { return UniqueID; }
DwarfCompileUnit *getSkeleton() const {
@@ -194,30 +192,39 @@ public:
DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope,
DIE *&ObjectPointer);
+ /// Construct a DIE for the given DbgLabel.
+ DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope);
+
/// A helper function to create children of a Scope DIE.
DIE *createScopeChildrenDIE(LexicalScope *Scope,
SmallVectorImpl<DIE *> &Children,
bool *HasNonScopeChildren = nullptr);
/// Construct a DIE for this subprogram scope.
- void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);
+ DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
+ LexicalScope *Scope);
DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+ /// Construct a call site entry DIE describing a call within \p Scope to a
+ /// callee described by \p CalleeSP. \p IsTail specifies whether the call is
+ /// a tail call. \p PCOffset must be non-zero for non-tail calls or be the
+ /// function-local offset to PC value after the call instruction.
+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram &CalleeSP,
+ bool IsTail, const MCExpr *PCOffset);
+
/// Construct import_module DIE.
DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
void finishSubprogramDefinition(const DISubprogram *SP);
- void finishVariableDefinition(const DbgVariable &Var);
+ void finishEntityDefinition(const DbgEntity *Entity);
/// Find abstract variable associated with Var.
- using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
- DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
- const DILocalVariable *&Cleansed);
- DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
- void createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope);
+ using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+ DbgEntity *getExistingAbstractEntity(const DINode *Node);
+ void createAbstractEntity(const DINode *Node, LexicalScope *Scope);
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
@@ -236,6 +243,9 @@ public:
void emitHeader(bool UseOffsets) override;
+ /// Add the DW_AT_addr_base attribute to the unit DIE.
+ void addAddrTableBase();
+
MCSymbol *getLabelBegin() const {
assert(getSection());
return LabelBegin;
@@ -285,13 +295,13 @@ public:
/// Add a Dwarf expression attribute data and value.
void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
+ /// Add an attribute containing an address expression to \p Die.
+ void addAddressExpr(DIE &Die, dwarf::Attribute Attribute, const MCExpr *Expr);
+
void applySubprogramAttributesToDefinition(const DISubprogram *SP,
DIE &SPDie);
- /// getRangeLists - Get the vector of range lists.
- const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
- return (Skeleton ? Skeleton : this)->CURangeLists;
- }
+ void applyLabelAttributes(const DbgLabel &Label, DIE &LabelDie);
/// getRanges - Get the list of ranges for this unit.
const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 500e7a00196f..1de2ffb6cfa1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
@@ -130,11 +131,6 @@ DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden,
cl::init(Default));
static cl::opt<bool>
- NoDwarfPubSections("no-dwarf-pub-sections", cl::Hidden,
- cl::desc("Disable emission of DWARF pub sections."),
- cl::init(false));
-
-static cl::opt<bool>
NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden,
cl::desc("Disable emission .debug_ranges section."),
cl::init(false));
@@ -188,12 +184,12 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
}
bool DbgVariable::isBlockByrefVariable() const {
- assert(Var && "Invalid complex DbgVariable!");
- return Var->getType().resolve()->isBlockByrefStruct();
+ assert(getVariable() && "Invalid complex DbgVariable!");
+ return getVariable()->getType().resolve()->isBlockByrefStruct();
}
const DIType *DbgVariable::getType() const {
- DIType *Ty = Var->getType().resolve();
+ DIType *Ty = getVariable()->getType().resolve();
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
// addresses instead.
if (Ty->isBlockByrefStruct()) {
@@ -246,7 +242,7 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
return A.Expr->isFragment();
}) &&
"multiple FI expressions without DW_OP_LLVM_fragment");
- llvm::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
+ llvm::sort(FrameIndexExprs,
[](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
return A.Expr->getFragmentInfo()->OffsetInBits <
B.Expr->getFragmentInfo()->OffsetInBits;
@@ -258,8 +254,8 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
void DbgVariable::addMMIEntry(const DbgVariable &V) {
assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
- assert(V.Var == Var && "conflicting variable");
- assert(V.IA == IA && "conflicting inlined-at location");
+ assert(V.getVariable() == getVariable() && "conflicting variable");
+ assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");
assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
@@ -355,7 +351,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfVersion =
TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);
- UsePubSections = !NoDwarfPubSections && !TT.isNVPTX();
UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();
// Use sections as references. Force for NVPTX.
@@ -421,30 +416,35 @@ static StringRef getObjCMethodName(StringRef In) {
}
// Add the various names to the Dwarf accelerator table names.
-void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
+void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
+ const DISubprogram *SP, DIE &Die) {
+ if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+ return;
+
if (!SP->isDefinition())
return;
if (SP->getName() != "")
- addAccelName(SP->getName(), Die);
+ addAccelName(CU, SP->getName(), Die);
// If the linkage name is different than the name, go ahead and output that as
// well into the name table. Only do that if we are going to actually emit
// that name.
if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
(useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP)))
- addAccelName(SP->getLinkageName(), Die);
+ addAccelName(CU, SP->getLinkageName(), Die);
// If this is an Objective-C selector name add it to the ObjC accelerator
// too.
if (isObjCClass(SP->getName())) {
StringRef Class, Category;
getObjCClassCategory(SP->getName(), Class, Category);
- addAccelObjC(Class, Die);
+ addAccelObjC(CU, Class, Die);
if (Category != "")
- addAccelObjC(Category, Die);
+ addAccelObjC(CU, Category, Die);
// Also add the base method name to the name table.
- addAccelName(getObjCMethodName(SP->getName()), Die);
+ addAccelName(CU, getObjCMethodName(SP->getName()), Die);
}
}
@@ -503,6 +503,64 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
+void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
+ DwarfCompileUnit &CU, DIE &ScopeDIE,
+ const MachineFunction &MF) {
+ // Add a call site-related attribute (DWARF5, Sec. 3.3.1.3). Do this only if
+ // the subprogram is required to have one.
+ if (!SP.areAllCallsDescribed() || !SP.isDefinition())
+ return;
+
+ // Use DW_AT_call_all_calls to express that call site entries are present
+ // for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls
+ // because one of its requirements is not met: call site entries for
+ // optimized-out calls are elided.
+ CU.addFlag(ScopeDIE, dwarf::DW_AT_call_all_calls);
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "TargetInstrInfo not found: cannot label tail calls");
+
+ // Emit call site entries for each call or tail call in the function.
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB.instrs()) {
+ // Skip instructions which aren't calls. Both calls and tail-calling jump
+ // instructions (e.g TAILJMPd64) are classified correctly here.
+ if (!MI.isCall())
+ continue;
+
+ // TODO: Add support for targets with delay slots (see: beginInstruction).
+ if (MI.hasDelaySlot())
+ return;
+
+ // If this is a direct call, find the callee's subprogram.
+ const MachineOperand &CalleeOp = MI.getOperand(0);
+ if (!CalleeOp.isGlobal())
+ continue;
+ const Function *CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
+ if (!CalleeDecl || !CalleeDecl->getSubprogram())
+ continue;
+
+ // TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
+ // TODO: Add support for indirect calls.
+
+ bool IsTail = TII->isTailCall(MI);
+
+ // For tail calls, no return PC information is needed. For regular calls,
+ // the return PC is needed to disambiguate paths in the call graph which
+ // could lead to some target function.
+ const MCExpr *PCOffset =
+ IsTail ? nullptr : getFunctionLocalOffsetAfterInsn(&MI);
+
+ assert((IsTail || PCOffset) && "Call without return PC information");
+ LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> "
+ << CalleeDecl->getName() << (IsTail ? " [tail]" : "")
+ << "\n");
+ CU.constructCallSiteEntryDIE(ScopeDIE, *CalleeDecl->getSubprogram(),
+ IsTail, PCOffset);
+ }
+ }
+}
+
void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
if (!U.hasDwarfPubSections())
return;
@@ -510,41 +568,14 @@ void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
}
-// Create new DwarfCompileUnit for the given metadata node with tag
-// DW_TAG_compile_unit.
-DwarfCompileUnit &
-DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
- if (auto *CU = CUMap.lookup(DIUnit))
- return *CU;
- StringRef FN = DIUnit->getFilename();
- CompilationDir = DIUnit->getDirectory();
-
- auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
- InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
- DwarfCompileUnit &NewCU = *OwnedUnit;
+void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
+ DwarfCompileUnit &NewCU) {
DIE &Die = NewCU.getUnitDie();
- InfoHolder.addUnit(std::move(OwnedUnit));
- if (useSplitDwarf()) {
- NewCU.setSkeleton(constructSkeletonCU(NewCU));
- NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
- Asm->TM.Options.MCOptions.SplitDwarfFile);
- }
-
- for (auto *IE : DIUnit->getImportedEntities())
- NewCU.addImportedEntity(IE);
-
- // LTO with assembly output shares a single line table amongst multiple CUs.
- // To avoid the compilation directory being ambiguous, let the line table
- // explicitly describe the directory of all files, never relying on the
- // compilation directory.
- if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
- Asm->OutStreamer->emitDwarfFile0Directive(
- CompilationDir, FN, NewCU.getMD5AsBytes(DIUnit->getFile()),
- DIUnit->getSource(), NewCU.getUniqueID());
+ StringRef FN = DIUnit->getFilename();
StringRef Producer = DIUnit->getProducer();
StringRef Flags = DIUnit->getFlags();
- if (!Flags.empty()) {
+ if (!Flags.empty() && !useAppleExtensionAttributes()) {
std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
} else
@@ -582,11 +613,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
dwarf::DW_FORM_data1, RVer);
}
- if (useSplitDwarf())
- NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
- else
- NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
-
if (DIUnit->getDWOId()) {
// This CU is either a clang module DWO or a skeleton CU.
NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
@@ -596,9 +622,44 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
DIUnit->getSplitDebugFilename());
}
+}
+// Create new DwarfCompileUnit for the given metadata node with tag
+// DW_TAG_compile_unit.
+DwarfCompileUnit &
+DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
+ if (auto *CU = CUMap.lookup(DIUnit))
+ return *CU;
+
+ CompilationDir = DIUnit->getDirectory();
+
+ auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
+ InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
+ DwarfCompileUnit &NewCU = *OwnedUnit;
+ InfoHolder.addUnit(std::move(OwnedUnit));
+
+ for (auto *IE : DIUnit->getImportedEntities())
+ NewCU.addImportedEntity(IE);
+
+ // LTO with assembly output shares a single line table amongst multiple CUs.
+ // To avoid the compilation directory being ambiguous, let the line table
+ // explicitly describe the directory of all files, never relying on the
+ // compilation directory.
+ if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
+ Asm->OutStreamer->emitDwarfFile0Directive(
+ CompilationDir, DIUnit->getFilename(),
+ NewCU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(),
+ NewCU.getUniqueID());
+
+ if (useSplitDwarf()) {
+ NewCU.setSkeleton(constructSkeletonCU(NewCU));
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
+ } else {
+ finishUnitAttributes(DIUnit, NewCU);
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
+ }
CUMap.insert({DIUnit, &NewCU});
- CUDieMap.insert({&Die, &NewCU});
+ CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
}
@@ -613,22 +674,21 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
/// Sort and unique GVEs by comparing their fragment offset.
static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
- llvm::sort(GVEs.begin(), GVEs.end(),
- [](DwarfCompileUnit::GlobalExpr A,
- DwarfCompileUnit::GlobalExpr B) {
- // Sort order: first null exprs, then exprs without fragment
- // info, then sort by fragment offset in bits.
- // FIXME: Come up with a more comprehensive comparator so
- // the sorting isn't non-deterministic, and so the following
- // std::unique call works correctly.
- if (!A.Expr || !B.Expr)
- return !!B.Expr;
- auto FragmentA = A.Expr->getFragmentInfo();
- auto FragmentB = B.Expr->getFragmentInfo();
- if (!FragmentA || !FragmentB)
- return !!FragmentB;
- return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
- });
+ llvm::sort(
+ GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
+ // Sort order: first null exprs, then exprs without fragment
+ // info, then sort by fragment offset in bits.
+ // FIXME: Come up with a more comprehensive comparator so
+ // the sorting isn't non-deterministic, and so the following
+ // std::unique call works correctly.
+ if (!A.Expr || !B.Expr)
+ return !!B.Expr;
+ auto FragmentA = A.Expr->getFragmentInfo();
+ auto FragmentB = B.Expr->getFragmentInfo();
+ if (!FragmentA || !FragmentB)
+ return !!FragmentB;
+ return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
+ });
GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
[](DwarfCompileUnit::GlobalExpr A,
DwarfCompileUnit::GlobalExpr B) {
@@ -644,15 +704,18 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
void DwarfDebug::beginModule() {
NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName,
DWARFGroupDescription, TimePassesIsEnabled);
- if (DisableDebugInfoPrinting)
+ if (DisableDebugInfoPrinting) {
+ MMI->setDebugInfoAvailability(false);
return;
+ }
const Module *M = MMI->getModule();
unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
// Tell MMI whether we have debug info.
- MMI->setDebugInfoAvailability(NumDebugCUs > 0);
+ assert(MMI->hasDebugInfo() == (NumDebugCUs > 0) &&
+ "DebugInfoAvailabilty initialized unexpectedly");
SingleCU = NumDebugCUs == 1;
DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
GVMap;
@@ -670,11 +733,24 @@ void DwarfDebug::beginModule() {
(useSplitDwarf() ? SkeletonHolder : InfoHolder)
.setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base"));
- // Create the symbol that designates the start of the DWARF v5 range list
- // table. It is located past the header and before the offsets table.
- if (getDwarfVersion() >= 5)
- (useSplitDwarf() ? SkeletonHolder : InfoHolder)
- .setRnglistsTableBaseSym(Asm->createTempSymbol("rnglists_table_base"));
+
+ // Create the symbols that designates the start of the DWARF v5 range list
+ // and locations list tables. They are located past the table headers.
+ if (getDwarfVersion() >= 5) {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.setRnglistsTableBaseSym(
+ Asm->createTempSymbol("rnglists_table_base"));
+ Holder.setLoclistsTableBaseSym(
+ Asm->createTempSymbol("loclists_table_base"));
+
+ if (useSplitDwarf())
+ InfoHolder.setRnglistsTableBaseSym(
+ Asm->createTempSymbol("rnglists_dwo_table_base"));
+ }
+
+ // Create the symbol that points to the first entry following the debug
+ // address table (.debug_addr) header.
+ AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
for (DICompileUnit *CUNode : M->debug_compile_units()) {
// FIXME: Move local imported entities into a list attached to the
@@ -728,16 +804,16 @@ void DwarfDebug::beginModule() {
}
}
-void DwarfDebug::finishVariableDefinitions() {
- for (const auto &Var : ConcreteVariables) {
- DIE *VariableDie = Var->getDIE();
- assert(VariableDie);
+void DwarfDebug::finishEntityDefinitions() {
+ for (const auto &Entity : ConcreteEntities) {
+ DIE *Die = Entity->getDIE();
+ assert(Die);
// FIXME: Consider the time-space tradeoff of just storing the unit pointer
- // in the ConcreteVariables list, rather than looking it up again here.
+ // in the ConcreteEntities list, rather than looking it up again here.
// DIE::getUnit isn't simple - it walks parent pointers, etc.
- DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie());
+ DwarfCompileUnit *Unit = CUDieMap.lookup(Die->getUnitDie());
assert(Unit);
- Unit->finishVariableDefinition(*Var);
+ Unit->finishEntityDefinition(Entity.get());
}
}
@@ -755,7 +831,7 @@ void DwarfDebug::finalizeModuleInfo() {
finishSubprogramDefinitions();
- finishVariableDefinitions();
+ finishEntityDefinitions();
// Include the DWO file name in the hash if there's more than one CU.
// This handles ThinLTO's situation where imported CUs may very easily be
@@ -768,6 +844,8 @@ void DwarfDebug::finalizeModuleInfo() {
// all other generation.
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
+ if (TheCU.getCUNode()->isDebugDirectivesOnly())
+ continue;
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
TheCU.constructContainingTypeDIEs();
@@ -776,7 +854,12 @@ void DwarfDebug::finalizeModuleInfo() {
// If we're splitting the dwarf out now that we've got the entire
// CU then add the dwo id to it.
auto *SkCU = TheCU.getSkeleton();
- if (useSplitDwarf()) {
+ if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) {
+ finishUnitAttributes(TheCU.getCUNode(), TheCU);
+ TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
+ SkCU->addString(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
// Emit a unique identifier for this CU.
uint64_t ID =
DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie());
@@ -789,18 +872,14 @@ void DwarfDebug::finalizeModuleInfo() {
SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
dwarf::DW_FORM_data8, ID);
}
- // We don't keep track of which addresses are used in which CU so this
- // is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty()) {
- const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol();
- SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
- Sym, Sym);
- }
- if (getDwarfVersion() < 5 && !SkCU->getRangeLists().empty()) {
+
+ if (getDwarfVersion() < 5 && !SkeletonHolder.getRangeLists().empty()) {
const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
Sym, Sym);
}
+ } else if (SkCU) {
+ finishUnitAttributes(SkCU->getCUNode(), *SkCU);
}
// If we have code split among multiple sections or non-contiguous
@@ -810,6 +889,14 @@ void DwarfDebug::finalizeModuleInfo() {
// .subsections_via_symbols in mach-o. This would mean turning on
// ranges for all subprogram DIEs for mach-o.
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+
+ // We don't keep track of which addresses are used in which CU so this
+ // is a bit pessimistic under LTO.
+ if (!AddrPool.isEmpty() &&
+ (getDwarfVersion() >= 5 ||
+ (SkCU && !empty(TheCU.getUnitDie().children()))))
+ U.addAddrTableBase();
+
if (unsigned NumRanges = TheCU.getRanges().size()) {
if (NumRanges > 1 && useRangesSection())
// A DW_AT_low_pc attribute may also be specified in combination with
@@ -822,9 +909,13 @@ void DwarfDebug::finalizeModuleInfo() {
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
- if (getDwarfVersion() >= 5 && !useSplitDwarf() &&
- !U.getRangeLists().empty())
- U.addRnglistsBase();
+ if (getDwarfVersion() >= 5) {
+ if (U.hasRangeLists())
+ U.addRnglistsBase();
+
+ if (!DebugLocs.getLists().empty() && !useSplitDwarf())
+ U.addLoclistsBase();
+ }
auto *CUNode = cast<DICompileUnit>(P.first);
// If compile Unit has macros, emit "DW_AT_macro_info" attribute.
@@ -888,9 +979,11 @@ void DwarfDebug::endModule() {
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
- emitDebugAddr();
+ emitDebugRangesDWO();
}
+ emitDebugAddr();
+
// Emit info into the dwarf accelerator table sections.
switch (getAccelTableKind()) {
case AccelTableKind::Apple:
@@ -915,38 +1008,37 @@ void DwarfDebug::endModule() {
// FIXME: AbstractVariables.clear();
}
-void DwarfDebug::ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,
- const MDNode *ScopeNode) {
- const DILocalVariable *Cleansed = nullptr;
- if (CU.getExistingAbstractVariable(IV, Cleansed))
+void DwarfDebug::ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
+ const DINode *Node,
+ const MDNode *ScopeNode) {
+ if (CU.getExistingAbstractEntity(Node))
return;
- CU.createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+ CU.createAbstractEntity(Node, LScopes.getOrCreateAbstractScope(
cast<DILocalScope>(ScopeNode)));
}
-void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU,
- InlinedVariable IV, const MDNode *ScopeNode) {
- const DILocalVariable *Cleansed = nullptr;
- if (CU.getExistingAbstractVariable(IV, Cleansed))
+void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
+ const DINode *Node, const MDNode *ScopeNode) {
+ if (CU.getExistingAbstractEntity(Node))
return;
if (LexicalScope *Scope =
LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
- CU.createAbstractVariable(Cleansed, Scope);
+ CU.createAbstractEntity(Node, Scope);
}
// Collect variable information from side table maintained by MF.
void DwarfDebug::collectVariableInfoFromMFTable(
- DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) {
- SmallDenseMap<InlinedVariable, DbgVariable *> MFVars;
+ DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) {
+ SmallDenseMap<InlinedEntity, DbgVariable *> MFVars;
for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
if (!VI.Var)
continue;
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
"Expected inlined-at fields to agree");
- InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt());
+ InlinedEntity Var(VI.Var, VI.Loc->getInlinedAt());
Processed.insert(Var);
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
@@ -954,14 +1046,15 @@ void DwarfDebug::collectVariableInfoFromMFTable(
if (!Scope)
continue;
- ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode());
- auto RegVar = llvm::make_unique<DbgVariable>(Var.first, Var.second);
+ ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
+ auto RegVar = llvm::make_unique<DbgVariable>(
+ cast<DILocalVariable>(Var.first), Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
if (DbgVariable *DbgVar = MFVars.lookup(Var))
DbgVar->addMMIEntry(*RegVar);
else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
MFVars.insert({Var, RegVar.get()});
- ConcreteVariables.push_back(std::move(RegVar));
+ ConcreteEntities.push_back(std::move(RegVar));
}
}
}
@@ -1087,6 +1180,18 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
auto Value = getDebugLocValue(Begin);
+
+ // Omit entries with empty ranges as they do not have any effect in DWARF.
+ if (StartLabel == EndLabel) {
+ // If this is a fragment, we must still add the value to the list of
+ // open ranges, since it may describe non-overlapping parts of the
+ // variable.
+ if (DIExpr->isFragment())
+ OpenRanges.push_back(Value);
+ LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n");
+ continue;
+ }
+
DebugLocEntry Loc(StartLabel, EndLabel, Value);
bool couldMerge = false;
@@ -1126,14 +1231,26 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
}
}
-DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU,
- LexicalScope &Scope,
- InlinedVariable IV) {
- ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode());
- ConcreteVariables.push_back(
- llvm::make_unique<DbgVariable>(IV.first, IV.second));
- InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
- return ConcreteVariables.back().get();
+DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope,
+ const DINode *Node,
+ const DILocation *Location,
+ const MCSymbol *Sym) {
+ ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode());
+ if (isa<const DILocalVariable>(Node)) {
+ ConcreteEntities.push_back(
+ llvm::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
+ Location));
+ InfoHolder.addScopeVariable(&Scope,
+ cast<DbgVariable>(ConcreteEntities.back().get()));
+ } else if (isa<const DILabel>(Node)) {
+ ConcreteEntities.push_back(
+ llvm::make_unique<DbgLabel>(cast<const DILabel>(Node),
+ Location, Sym));
+ InfoHolder.addScopeLabel(&Scope,
+ cast<DbgLabel>(ConcreteEntities.back().get()));
+ }
+ return ConcreteEntities.back().get();
}
/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
@@ -1195,14 +1312,14 @@ static bool validThroughout(LexicalScopes &LScopes,
}
// Find variables for each lexical scope.
-void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
- const DISubprogram *SP,
- DenseSet<InlinedVariable> &Processed) {
+void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
+ const DISubprogram *SP,
+ DenseSet<InlinedEntity> &Processed) {
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(TheCU, Processed);
for (const auto &I : DbgValues) {
- InlinedVariable IV = I.first;
+ InlinedEntity IV = I.first;
if (Processed.count(IV))
continue;
@@ -1212,16 +1329,18 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
continue;
LexicalScope *Scope = nullptr;
+ const DILocalVariable *LocalVar = cast<DILocalVariable>(IV.first);
if (const DILocation *IA = IV.second)
- Scope = LScopes.findInlinedScope(IV.first->getScope(), IA);
+ Scope = LScopes.findInlinedScope(LocalVar->getScope(), IA);
else
- Scope = LScopes.findLexicalScope(IV.first->getScope());
+ Scope = LScopes.findLexicalScope(LocalVar->getScope());
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
Processed.insert(IV);
- DbgVariable *RegVar = createConcreteVariable(TheCU, *Scope, IV);
+ DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU,
+ *Scope, LocalVar, IV.second));
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
@@ -1247,20 +1366,53 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
// unique identifiers, so don't bother resolving the type with the
// identifier map.
const DIBasicType *BT = dyn_cast<DIBasicType>(
- static_cast<const Metadata *>(IV.first->getType()));
+ static_cast<const Metadata *>(LocalVar->getType()));
// Finalize the entry by lowering it into a DWARF bytestream.
for (auto &Entry : Entries)
Entry.finalize(*Asm, List, BT);
}
- // Collect info for variables that were optimized out.
+ // For each InlinedEntity collected from DBG_LABEL instructions, convert to
+ // DWARF-related DbgLabel.
+ for (const auto &I : DbgLabels) {
+ InlinedEntity IL = I.first;
+ const MachineInstr *MI = I.second;
+ if (MI == nullptr)
+ continue;
+
+ LexicalScope *Scope = nullptr;
+ const DILabel *Label = cast<DILabel>(IL.first);
+ // Get inlined DILocation if it is inlined label.
+ if (const DILocation *IA = IL.second)
+ Scope = LScopes.findInlinedScope(Label->getScope(), IA);
+ else
+ Scope = LScopes.findLexicalScope(Label->getScope());
+ // If label scope is not found then skip this label.
+ if (!Scope)
+ continue;
+
+ Processed.insert(IL);
+ /// At this point, the temporary label is created.
+ /// Save the temporary label to DbgLabel entity to get the
+ /// actually address when generating Dwarf DIE.
+ MCSymbol *Sym = getLabelBeforeInsn(MI);
+ createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym);
+ }
+
+ // Collect info for variables/labels that were optimized out.
for (const DINode *DN : SP->getRetainedNodes()) {
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
+ continue;
+ LexicalScope *Scope = nullptr;
if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
- if (Processed.insert(InlinedVariable(DV, nullptr)).second)
- if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
- createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr));
+ Scope = LScopes.findLexicalScope(DV->getScope());
+ } else if (auto *DL = dyn_cast<DILabel>(DN)) {
+ Scope = LScopes.findLexicalScope(DL->getScope());
}
+
+ if (Scope)
+ createConcreteEntity(TheCU, *Scope, DN, nullptr);
}
}
@@ -1284,6 +1436,11 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
unsigned LastAsmLine =
Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
+ // Request a label after the call in order to emit AT_return_pc information
+ // in call site entries. TODO: Add support for targets with delay slots.
+ if (SP->areAllCallsDescribed() && MI->isCall() && !MI->hasDelaySlot())
+ requestLabelAfterInsn(MI);
+
if (DL == PrevInstLoc) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
@@ -1416,9 +1573,14 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
assert(!FnScope || SP == FnScope->getScopeNode());
DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
+ if (TheCU.getCUNode()->isDebugDirectivesOnly()) {
+ PrevLabel = nullptr;
+ CurFn = nullptr;
+ return;
+ }
- DenseSet<InlinedVariable> ProcessedVars;
- collectVariableInfo(TheCU, SP, ProcessedVars);
+ DenseSet<InlinedEntity> Processed;
+ collectEntityInfo(TheCU, SP, Processed);
// Add the range of this function to the list of ranges for the CU.
TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
@@ -1442,31 +1604,41 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
auto *SP = cast<DISubprogram>(AScope->getScopeNode());
for (const DINode *DN : SP->getRetainedNodes()) {
- if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
- // Collect info for variables that were optimized out.
- if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
- continue;
- ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr),
- DV->getScope());
- assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
- && "ensureAbstractVariableIsCreated inserted abstract scopes");
- }
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
+ continue;
+
+ const MDNode *Scope = nullptr;
+ if (auto *DV = dyn_cast<DILocalVariable>(DN))
+ Scope = DV->getScope();
+ else if (auto *DL = dyn_cast<DILabel>(DN))
+ Scope = DL->getScope();
+ else
+ llvm_unreachable("Unexpected DI type!");
+
+ // Collect info for variables/labels that were optimized out.
+ ensureAbstractEntityIsCreated(TheCU, DN, Scope);
+ assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
+ && "ensureAbstractEntityIsCreated inserted abstract scopes");
}
constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
ProcessedSPNodes.insert(SP);
- TheCU.constructSubprogramScopeDIE(SP, FnScope);
+ DIE &ScopeDIE = TheCU.constructSubprogramScopeDIE(SP, FnScope);
if (auto *SkelCU = TheCU.getSkeleton())
if (!LScopes.getAbstractScopesList().empty() &&
TheCU.getCUNode()->getSplitDebugInlining())
SkelCU->constructSubprogramScopeDIE(SP, FnScope);
+ // Construct call site entries.
+ constructCallSiteEntryDIEs(*SP, TheCU, ScopeDIE, *MF);
+
// Clear debug info
// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
// DbgVariables except those that are also in AbstractVariables (since they
// can be used cross-function)
InfoHolder.getScopeVariables().clear();
+ InfoHolder.getScopeLabels().clear();
PrevLabel = nullptr;
CurFn = nullptr;
}
@@ -1530,8 +1702,6 @@ void DwarfDebug::emitAccelDebugNames() {
if (getUnits().empty())
return;
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfDebugNamesSection());
emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits());
}
@@ -1636,7 +1806,8 @@ void DwarfDebug::emitDebugPubSections() {
if (!TheU->hasDwarfPubSections())
continue;
- bool GnuStyle = TheU->getCUNode()->getGnuPubnames();
+ bool GnuStyle = TheU->getCUNode()->getNameTableKind() ==
+ DICompileUnit::DebugNameTableKind::GNU;
Asm->OutStreamer->SwitchSection(
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
@@ -1692,8 +1863,8 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
if (GnuStyle) {
dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
Asm->OutStreamer->AddComment(
- Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
- dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
+ Twine("Attributes: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) +
+ ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
Asm->emitInt8(Desc.toBits());
}
@@ -1759,6 +1930,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
void DebugLocEntry::finalize(const AsmPrinter &AP,
DebugLocStream::ListBuilder &List,
const DIBasicType *BT) {
+ assert(Begin != End && "unexpected location list entry with empty range");
DebugLocStream::EntryBuilder Entry(List, Begin, End);
BufferByteStreamer Streamer = Entry.getStreamer();
DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
@@ -1791,25 +1963,119 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
emitDebugLocEntry(Streamer, Entry);
}
-// Emit locations into the debug loc section.
+// Emit the common part of the DWARF 5 range/locations list tables header.
+static void emitListsTableHeaderStart(AsmPrinter *Asm, const DwarfFile &Holder,
+ MCSymbol *TableStart,
+ MCSymbol *TableEnd) {
+ // Build the table header, which starts with the length field.
+ Asm->OutStreamer->AddComment("Length");
+ Asm->EmitLabelDifference(TableEnd, TableStart, 4);
+ Asm->OutStreamer->EmitLabel(TableStart);
+ // Version number (DWARF v5 and later).
+ Asm->OutStreamer->AddComment("Version");
+ Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion());
+ // Address size.
+ Asm->OutStreamer->AddComment("Address size");
+ Asm->emitInt8(Asm->MAI->getCodePointerSize());
+ // Segment selector size.
+ Asm->OutStreamer->AddComment("Segment selector size");
+ Asm->emitInt8(0);
+}
+
+// Emit the header of a DWARF 5 range list table list table. Returns the symbol
+// that designates the end of the table for the caller to emit when the table is
+// complete.
+static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
+ const DwarfFile &Holder) {
+ MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
+ MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
+ emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+
+ Asm->OutStreamer->AddComment("Offset entry count");
+ Asm->emitInt32(Holder.getRangeLists().size());
+ Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym());
+
+ for (const RangeSpanList &List : Holder.getRangeLists())
+ Asm->EmitLabelDifference(List.getSym(), Holder.getRnglistsTableBaseSym(),
+ 4);
+
+ return TableEnd;
+}
+
+// Emit the header of a DWARF 5 locations list table. Returns the symbol that
+// designates the end of the table for the caller to emit when the table is
+// complete.
+static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
+ const DwarfFile &Holder) {
+ MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start");
+ MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end");
+ emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+
+ // FIXME: Generate the offsets table and use DW_FORM_loclistx with the
+ // DW_AT_loclists_base attribute. Until then set the number of offsets to 0.
+ Asm->OutStreamer->AddComment("Offset entry count");
+ Asm->emitInt32(0);
+ Asm->OutStreamer->EmitLabel(Holder.getLoclistsTableBaseSym());
+
+ return TableEnd;
+}
+
+// Emit locations into the .debug_loc/.debug_rnglists section.
void DwarfDebug::emitDebugLoc() {
if (DebugLocs.getLists().empty())
return;
- // Start the dwarf loc section.
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLocSection());
+ bool IsLocLists = getDwarfVersion() >= 5;
+ MCSymbol *TableEnd = nullptr;
+ if (IsLocLists) {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLoclistsSection());
+ TableEnd = emitLoclistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder
+ : InfoHolder);
+ } else {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+ }
+
unsigned char Size = Asm->MAI->getCodePointerSize();
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->EmitLabel(List.Label);
+
const DwarfCompileUnit *CU = List.CU;
+ const MCSymbol *Base = CU->getBaseAddress();
for (const auto &Entry : DebugLocs.getEntries(List)) {
- // Set up the range. This range is relative to the entry point of the
- // compile unit. This is a hard coded 0 for low_pc when we're emitting
- // ranges, or the DW_AT_low_pc on the compile unit otherwise.
- if (auto *Base = CU->getBaseAddress()) {
- Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
- Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
+ if (Base) {
+ // Set up the range. This range is relative to the entry point of the
+ // compile unit. This is a hard coded 0 for low_pc when we're emitting
+ // ranges, or the DW_AT_low_pc on the compile unit otherwise.
+ if (IsLocLists) {
+ Asm->OutStreamer->AddComment("DW_LLE_offset_pair");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_offset_pair, 1);
+ Asm->OutStreamer->AddComment(" starting offset");
+ Asm->EmitLabelDifferenceAsULEB128(Entry.BeginSym, Base);
+ Asm->OutStreamer->AddComment(" ending offset");
+ Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Base);
+ } else {
+ Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
+ Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
+ }
+
+ emitDebugLocEntryLocation(Entry);
+ continue;
+ }
+
+ // We have no base address.
+ if (IsLocLists) {
+ // TODO: Use DW_LLE_base_addressx + DW_LLE_offset_pair, or
+ // DW_LLE_startx_length in case if there is only a single range.
+ // That should reduce the size of the debug data emited.
+ // For now just use the DW_LLE_startx_length for all cases.
+ Asm->OutStreamer->AddComment("DW_LLE_startx_length");
+ Asm->emitInt8(dwarf::DW_LLE_startx_length);
+ Asm->OutStreamer->AddComment(" start idx");
+ Asm->EmitULEB128(AddrPool.getIndex(Entry.BeginSym));
+ Asm->OutStreamer->AddComment(" length");
+ Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Entry.BeginSym);
} else {
Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size);
Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
@@ -1817,9 +2083,20 @@ void DwarfDebug::emitDebugLoc() {
emitDebugLocEntryLocation(Entry);
}
- Asm->OutStreamer->EmitIntValue(0, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
+
+ if (IsLocLists) {
+ // .debug_loclists section ends with DW_LLE_end_of_list.
+ Asm->OutStreamer->AddComment("DW_LLE_end_of_list");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_end_of_list, 1);
+ } else {
+ // Terminate the .debug_loc list with two 0 values.
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ }
}
+
+ if (TableEnd)
+ Asm->OutStreamer->EmitLabel(TableEnd);
}
void DwarfDebug::emitDebugLocDWO() {
@@ -1828,10 +2105,13 @@ void DwarfDebug::emitDebugLocDWO() {
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->EmitLabel(List.Label);
for (const auto &Entry : DebugLocs.getEntries(List)) {
- // Just always use start_length for now - at least that's one address
- // rather than two. We could get fancier and try to, say, reuse an
- // address we know we've emitted elsewhere (the start of the function?
- // The start of the CU or CU subrange that encloses this range?)
+ // GDB only supports startx_length in pre-standard split-DWARF.
+ // (in v5 standard loclists, it currently* /only/ supports base_address +
+ // offset_pair, so the implementations can't really share much since they
+ // need to use different representations)
+ // * as of October 2018, at least
+ // Ideally/in v5, this could use SectionLabels to reuse existing addresses
+ // in the address pool to minimize object size/relocations.
Asm->emitInt8(dwarf::DW_LLE_startx_length);
unsigned idx = AddrPool.getIndex(Entry.BeginSym);
Asm->EmitULEB128(idx);
@@ -1939,10 +2219,9 @@ void DwarfDebug::emitDebugARanges() {
}
// Sort the CU list (again, to ensure consistent output order).
- llvm::sort(CUs.begin(), CUs.end(),
- [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
- return A->getUniqueID() < B->getUniqueID();
- });
+ llvm::sort(CUs, [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
+ return A->getUniqueID() < B->getUniqueID();
+ });
// Emit an arange table for each CU we used.
for (DwarfCompileUnit *CU : CUs) {
@@ -2006,10 +2285,10 @@ void DwarfDebug::emitDebugARanges() {
}
/// Emit a single range list. We handle both DWARF v5 and earlier.
-static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
+static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
const RangeSpanList &List) {
- auto DwarfVersion = CU->getDwarfVersion();
+ auto DwarfVersion = DD.getDwarfVersion();
// Emit our symbol so we can find the beginning of the range.
Asm->OutStreamer->EmitLabel(List.getSym());
// Gather all the ranges that apply to the same section so they can share
@@ -2021,7 +2300,8 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
for (const RangeSpan &Range : List.getRanges())
SectionRanges[&Range.getStart()->getSection()].push_back(&Range);
- auto *CUBase = CU->getBaseAddress();
+ const DwarfCompileUnit &CU = List.getCU();
+ const MCSymbol *CUBase = CU.getBaseAddress();
bool BaseIsSet = false;
for (const auto &P : SectionRanges) {
// Don't bother with a base address entry if there's only one range in
@@ -2031,19 +2311,23 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
// or optnone where there may be holes in a single CU's section
// contributions.
auto *Base = CUBase;
- if (!Base && P.second.size() > 1 &&
- (UseDwarfRangesBaseAddressSpecifier || DwarfVersion >= 5)) {
+ if (!Base && (P.second.size() > 1 || DwarfVersion < 5) &&
+ (CU.getCUNode()->getRangesBaseAddress() || DwarfVersion >= 5)) {
BaseIsSet = true;
// FIXME/use care: This may not be a useful base address if it's not
// the lowest address/range in this object.
Base = P.second.front()->getStart();
if (DwarfVersion >= 5) {
- Asm->OutStreamer->AddComment("DW_RLE_base_address");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_address, 1);
- } else
+ Base = DD.getSectionLabel(&Base->getSection());
+ Asm->OutStreamer->AddComment("DW_RLE_base_addressx");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_addressx, 1);
+ Asm->OutStreamer->AddComment(" base address index");
+ Asm->EmitULEB128(DD.getAddressPool().getIndex(Base));
+ } else {
Asm->OutStreamer->EmitIntValue(-1, Size);
- Asm->OutStreamer->AddComment(" base address");
- Asm->OutStreamer->EmitSymbolValue(Base, Size);
+ Asm->OutStreamer->AddComment(" base address");
+ Asm->OutStreamer->EmitSymbolValue(Base, Size);
+ }
} else if (BaseIsSet && DwarfVersion < 5) {
BaseIsSet = false;
assert(!Base);
@@ -2070,10 +2354,10 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
Asm->EmitLabelDifference(End, Base, Size);
}
} else if (DwarfVersion >= 5) {
- Asm->OutStreamer->AddComment("DW_RLE_start_length");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_start_length, 1);
- Asm->OutStreamer->AddComment(" start");
- Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+ Asm->OutStreamer->AddComment("DW_RLE_startx_length");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_startx_length, 1);
+ Asm->OutStreamer->AddComment(" start index");
+ Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin));
Asm->OutStreamer->AddComment(" length");
Asm->EmitLabelDifferenceAsULEB128(End, Begin);
} else {
@@ -2092,31 +2376,13 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
}
}
-// Emit the header of a DWARF 5 range list table. Returns the symbol that
-// designates the end of the table for the caller to emit when the table is
-// complete.
-static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, DwarfFile &Holder) {
- // The length is described by a starting label right after the length field
- // and an end label.
- MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
- MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
- // Build the range table header, which starts with the length field.
- Asm->EmitLabelDifference(TableEnd, TableStart, 4);
- Asm->OutStreamer->EmitLabel(TableStart);
- // Version number (DWARF v5 and later).
- Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion());
- // Address size.
- Asm->emitInt8(Asm->MAI->getCodePointerSize());
- // Segment selector size.
- Asm->emitInt8(0);
-
- MCSymbol *RnglistTableBaseSym = Holder.getRnglistsTableBaseSym();
+static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
+ const DwarfFile &Holder, MCSymbol *TableEnd) {
+ for (const RangeSpanList &List : Holder.getRangeLists())
+ emitRangeList(DD, Asm, List);
- // FIXME: Generate the offsets table and use DW_FORM_rnglistx with the
- // DW_AT_ranges attribute. Until then set the number of offsets to 0.
- Asm->emitInt32(0);
- Asm->OutStreamer->EmitLabel(RnglistTableBaseSym);
- return TableEnd;
+ if (TableEnd)
+ Asm->OutStreamer->EmitLabel(TableEnd);
}
/// Emit address ranges into the .debug_ranges section or into the DWARF v5
@@ -2125,46 +2391,52 @@ void DwarfDebug::emitDebugRanges() {
if (CUMap.empty())
return;
- auto NoRangesPresent = [this]() {
- return llvm::all_of(
- CUMap, [](const decltype(CUMap)::const_iterator::value_type &Pair) {
- return Pair.second->getRangeLists().empty();
- });
- };
+ const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- if (!useRangesSection()) {
- assert(NoRangesPresent() && "No debug ranges expected.");
+ if (Holder.getRangeLists().empty())
return;
- }
- if (NoRangesPresent())
- return;
+ assert(useRangesSection());
+ assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+ return Pair.second->getCUNode()->isDebugDirectivesOnly();
+ }));
// Start the dwarf ranges section.
MCSymbol *TableEnd = nullptr;
if (getDwarfVersion() >= 5) {
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfRnglistsSection());
- TableEnd = emitRnglistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder
- : InfoHolder);
+ TableEnd = emitRnglistsTableHeader(Asm, Holder);
} else
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfRangesSection());
- // Grab the specific ranges for the compile units in the module.
- for (const auto &I : CUMap) {
- DwarfCompileUnit *TheCU = I.second;
+ emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
+}
- if (auto *Skel = TheCU->getSkeleton())
- TheCU = Skel;
+void DwarfDebug::emitDebugRangesDWO() {
+ assert(useSplitDwarf());
- // Iterate over the misc ranges for the compile units in the module.
- for (const RangeSpanList &List : TheCU->getRangeLists())
- emitRangeList(Asm, TheCU, List);
- }
+ if (CUMap.empty())
+ return;
- if (TableEnd)
- Asm->OutStreamer->EmitLabel(TableEnd);
+ const auto &Holder = InfoHolder;
+
+ if (Holder.getRangeLists().empty())
+ return;
+
+ assert(getDwarfVersion() >= 5);
+ assert(useRangesSection());
+ assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+ return Pair.second->getCUNode()->isDebugDirectivesOnly();
+ }));
+
+ // Start the dwarf ranges section.
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
+ MCSymbol *TableEnd = emitRnglistsTableHeader(Asm, Holder);
+
+ emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
}
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
@@ -2206,12 +2478,19 @@ void DwarfDebug::emitDebugMacinfo() {
if (CUMap.empty())
return;
+ if (llvm::all_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+ return Pair.second->getCUNode()->isDebugDirectivesOnly();
+ }))
+ return;
+
// Start the dwarf macinfo section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfMacinfoSection());
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
+ if (TheCU.getCUNode()->isDebugDirectivesOnly())
+ continue;
auto *SkCU = TheCU.getSkeleton();
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
auto *CUNode = cast<DICompileUnit>(P.first);
@@ -2229,8 +2508,6 @@ void DwarfDebug::emitDebugMacinfo() {
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfCompileUnit> NewU) {
- NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
- Asm->TM.Options.MCOptions.SplitDwarfFile);
if (!CompilationDir.empty())
NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -2298,9 +2575,8 @@ void DwarfDebug::emitDebugStrDWO() {
OffSec, /* UseRelativeOffsets = */ false);
}
-// Emit DWO addresses.
+// Emit address pool.
void DwarfDebug::emitDebugAddr() {
- assert(useSplitDwarf() && "No split dwarf?");
AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
}
@@ -2356,10 +2632,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
NewTU.setTypeSignature(Signature);
Ins.first->second = Signature;
- if (useSplitDwarf())
- NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
- else {
- NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+ if (useSplitDwarf()) {
+ MCSection *Section =
+ getDwarfVersion() <= 4
+ ? Asm->getObjFileLowering().getDwarfTypesDWOSection()
+ : Asm->getObjFileLowering().getDwarfInfoDWOSection();
+ NewTU.setSection(Section);
+ } else {
+ MCSection *Section =
+ getDwarfVersion() <= 4
+ ? Asm->getObjFileLowering().getDwarfTypesSection(Signature)
+ : Asm->getObjFileLowering().getDwarfInfoSection(Signature);
+ NewTU.setSection(Section);
// Non-split type units reuse the compile unit's line table.
CU.applyStmtList(UnitDie);
}
@@ -2408,14 +2692,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// AccelTableKind::Apple, we use the table we got as an argument). If
// accelerator tables are disabled, this function does nothing.
template <typename DataT>
-void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
+void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
+ AccelTable<DataT> &AppleAccel, StringRef Name,
const DIE &Die) {
if (getAccelTableKind() == AccelTableKind::None)
return;
+ if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+ return;
+
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- DwarfStringPoolEntryRef Ref =
- Holder.getStringPool().getEntry(*Asm, Name);
+ DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name);
switch (getAccelTableKind()) {
case AccelTableKind::Apple:
@@ -2431,24 +2719,36 @@ void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
}
}
-void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
- addAccelNameImpl(AccelNames, Name, Die);
+void DwarfDebug::addAccelName(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die) {
+ addAccelNameImpl(CU, AccelNames, Name, Die);
}
-void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) {
+void DwarfDebug::addAccelObjC(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die) {
// ObjC names go only into the Apple accelerator tables.
if (getAccelTableKind() == AccelTableKind::Apple)
- addAccelNameImpl(AccelObjC, Name, Die);
+ addAccelNameImpl(CU, AccelObjC, Name, Die);
}
-void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) {
- addAccelNameImpl(AccelNamespace, Name, Die);
+void DwarfDebug::addAccelNamespace(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die) {
+ addAccelNameImpl(CU, AccelNamespace, Name, Die);
}
-void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
- addAccelNameImpl(AccelTypes, Name, Die);
+void DwarfDebug::addAccelType(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die, char Flags) {
+ addAccelNameImpl(CU, AccelTypes, Name, Die);
}
uint16_t DwarfDebug::getDwarfVersion() const {
return Asm->OutStreamer->getContext().getDwarfVersion();
}
+
+void DwarfDebug::addSectionLabel(const MCSymbol *Sym) {
+ SectionLabels.insert(std::make_pair(&Sym->getSection(), Sym));
+}
+
+const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
+ return SectionLabels.find(S)->second;
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index abf2e43b1312..8a31e989b289 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,8 +15,6 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#include "AddressPool.h"
-#include "DbgValueHistoryCalculator.h"
-#include "DebugHandlerBase.h"
#include "DebugLocStream.h"
#include "DwarfFile.h"
#include "llvm/ADT/ArrayRef.h"
@@ -31,6 +29,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AccelTable.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
@@ -62,6 +62,47 @@ class MDNode;
class Module;
//===----------------------------------------------------------------------===//
+/// This class is defined as the common parent of DbgVariable and DbgLabel
+/// such that it could levarage polymorphism to extract common code for
+/// DbgVariable and DbgLabel.
+class DbgEntity {
+ const DINode *Entity;
+ const DILocation *InlinedAt;
+ DIE *TheDIE = nullptr;
+ unsigned SubclassID;
+
+public:
+ enum DbgEntityKind {
+ DbgVariableKind,
+ DbgLabelKind
+ };
+
+ DbgEntity(const DINode *N, const DILocation *IA, unsigned ID)
+ : Entity(N), InlinedAt(IA), SubclassID(ID) {}
+ virtual ~DbgEntity() {}
+
+ /// Accessors.
+ /// @{
+ const DINode *getEntity() const { return Entity; }
+ const DILocation *getInlinedAt() const { return InlinedAt; }
+ DIE *getDIE() const { return TheDIE; }
+ unsigned getDbgEntityID() const { return SubclassID; }
+ /// @}
+
+ void setDIE(DIE &D) { TheDIE = &D; }
+
+ static bool classof(const DbgEntity *N) {
+ switch (N->getDbgEntityID()) {
+ default:
+ return false;
+ case DbgVariableKind:
+ case DbgLabelKind:
+ return true;
+ }
+ }
+};
+
+//===----------------------------------------------------------------------===//
/// This class is used to track local variable information.
///
/// Variables can be created from allocas, in which case they're generated from
@@ -73,10 +114,7 @@ class Module;
/// single instruction use \a MInsn and (optionally) a single entry of \a Expr.
///
/// Variables that have been optimized out use none of these fields.
-class DbgVariable {
- const DILocalVariable *Var; /// Variable Descriptor.
- const DILocation *IA; /// Inlined at location.
- DIE *TheDIE = nullptr; /// Variable DIE.
+class DbgVariable : public DbgEntity {
unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs.
const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction.
@@ -93,7 +131,7 @@ public:
/// Creates a variable without any DW_AT_location. Call \a initializeMMI()
/// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
DbgVariable(const DILocalVariable *V, const DILocation *IA)
- : Var(V), IA(IA) {}
+ : DbgEntity(V, IA, DbgVariableKind) {}
/// Initialize from the MMI table.
void initializeMMI(const DIExpression *E, int FI) {
@@ -111,8 +149,9 @@ public:
assert(FrameIndexExprs.empty() && "Already initialized?");
assert(!MInsn && "Already initialized?");
- assert(Var == DbgValue->getDebugVariable() && "Wrong variable");
- assert(IA == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at");
+ assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
+ assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
+ "Wrong inlined-at");
MInsn = DbgValue;
if (auto *E = DbgValue->getDebugExpression())
@@ -121,19 +160,18 @@ public:
}
// Accessors.
- const DILocalVariable *getVariable() const { return Var; }
- const DILocation *getInlinedAt() const { return IA; }
+ const DILocalVariable *getVariable() const {
+ return cast<DILocalVariable>(getEntity());
+ }
const DIExpression *getSingleExpression() const {
assert(MInsn && FrameIndexExprs.size() <= 1);
return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
}
- void setDIE(DIE &D) { TheDIE = &D; }
- DIE *getDIE() const { return TheDIE; }
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
- StringRef getName() const { return Var->getName(); }
+ StringRef getName() const { return getVariable()->getName(); }
const MachineInstr *getMInsn() const { return MInsn; }
/// Get the FI entries, sorted by fragment offset.
ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
@@ -143,7 +181,7 @@ public:
// Translate tag to proper Dwarf tag.
dwarf::Tag getTag() const {
// FIXME: Why don't we just infer this tag and store it all along?
- if (Var->isParameter())
+ if (getVariable()->isParameter())
return dwarf::DW_TAG_formal_parameter;
return dwarf::DW_TAG_variable;
@@ -151,7 +189,7 @@ public:
/// Return true if DbgVariable is artificial.
bool isArtificial() const {
- if (Var->isArtificial())
+ if (getVariable()->isArtificial())
return true;
if (getType()->isArtificial())
return true;
@@ -159,7 +197,7 @@ public:
}
bool isObjectPointer() const {
- if (Var->isObjectPointer())
+ if (getVariable()->isObjectPointer())
return true;
if (getType()->isObjectPointer())
return true;
@@ -178,6 +216,45 @@ public:
bool isBlockByrefVariable() const;
const DIType *getType() const;
+ static bool classof(const DbgEntity *N) {
+ return N->getDbgEntityID() == DbgVariableKind;
+ }
+
+private:
+ template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
+ return Ref.resolve();
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// This class is used to track label information.
+///
+/// Labels are collected from \c DBG_LABEL instructions.
+class DbgLabel : public DbgEntity {
+ const MCSymbol *Sym; /// Symbol before DBG_LABEL instruction.
+
+public:
+ /// We need MCSymbol information to generate DW_AT_low_pc.
+ DbgLabel(const DILabel *L, const DILocation *IA, const MCSymbol *Sym = nullptr)
+ : DbgEntity(L, IA, DbgLabelKind), Sym(Sym) {}
+
+ /// Accessors.
+ /// @{
+ const DILabel *getLabel() const { return cast<DILabel>(getEntity()); }
+ const MCSymbol *getSymbol() const { return Sym; }
+
+ StringRef getName() const { return getLabel()->getName(); }
+ /// @}
+
+ /// Translate tag to proper Dwarf tag.
+ dwarf::Tag getTag() const {
+ return dwarf::DW_TAG_label;
+ }
+
+ static bool classof(const DbgEntity *N) {
+ return N->getDbgEntityID() == DbgLabelKind;
+ }
+
private:
template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
return Ref.resolve();
@@ -217,8 +294,8 @@ class DwarfDebug : public DebugHandlerBase {
/// Size of each symbol emitted (for those symbols that have a specific size).
DenseMap<const MCSymbol *, uint64_t> SymSize;
- /// Collection of abstract variables.
- SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
+ /// Collection of abstract variables/labels.
+ SmallVector<std::unique_ptr<DbgEntity>, 64> ConcreteEntities;
/// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
/// can refer to them in spite of insertions into this list.
@@ -250,6 +327,8 @@ class DwarfDebug : public DebugHandlerBase {
/// used to keep track of which types we have emitted type units for.
DenseMap<const MDNode *, uint64_t> TypeSignatures;
+ DenseMap<const MCSection *, const MCSymbol *> SectionLabels;
+
SmallVector<
std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
TypeUnitsUnderConstruction;
@@ -266,9 +345,6 @@ class DwarfDebug : public DebugHandlerBase {
/// Use inlined strings.
bool UseInlineStrings = false;
- /// Whether to emit DWARF pub sections or not.
- bool UsePubSections = true;
-
/// Allow emission of .debug_ranges section.
bool UseRangesSection = true;
@@ -332,24 +408,33 @@ class DwarfDebug : public DebugHandlerBase {
return InfoHolder.getUnits();
}
- using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
+ using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
- void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,
- const MDNode *Scope);
- void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable IV,
- const MDNode *Scope);
+ void ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
+ const DINode *Node,
+ const MDNode *Scope);
+ void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
+ const DINode *Node,
+ const MDNode *Scope);
- DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU,
- LexicalScope &Scope, InlinedVariable IV);
+ DbgEntity *createConcreteEntity(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope,
+ const DINode *Node,
+ const DILocation *Location,
+ const MCSymbol *Sym = nullptr);
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
+ /// Construct DIEs for call site entries describing the calls in \p MF.
+ void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
+ DIE &ScopeDIE, const MachineFunction &MF);
+
template <typename DataT>
- void addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
- const DIE &Die);
+ void addAccelNameImpl(const DICompileUnit &CU, AccelTable<DataT> &AppleAccel,
+ StringRef Name, const DIE &Die);
- void finishVariableDefinitions();
+ void finishEntityDefinitions();
void finishSubprogramDefinitions();
@@ -407,9 +492,7 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit address ranges into a debug ranges section.
void emitDebugRanges();
-
- /// Emit range lists into a DWARF v5 debug rnglists section.
- void emitDebugRnglists();
+ void emitDebugRangesDWO();
/// Emit macros into a debug macinfo section.
void emitDebugMacinfo();
@@ -457,6 +540,8 @@ class DwarfDebug : public DebugHandlerBase {
/// Create new DwarfCompileUnit for the given metadata node with tag
/// DW_TAG_compile_unit.
DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit);
+ void finishUnitAttributes(const DICompileUnit *DIUnit,
+ DwarfCompileUnit &NewCU);
/// Construct imported_module or imported_declaration DIE.
void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
@@ -469,8 +554,8 @@ class DwarfDebug : public DebugHandlerBase {
unsigned Flags);
/// Populate LexicalScope entries with variables' info.
- void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
- DenseSet<InlinedVariable> &ProcessedVars);
+ void collectEntityInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
+ DenseSet<InlinedEntity> &ProcessedVars);
/// Build the location list for all DBG_VALUEs in the
/// function that describe the same variable.
@@ -479,7 +564,7 @@ class DwarfDebug : public DebugHandlerBase {
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
- DenseSet<InlinedVariable> &P);
+ DenseSet<InlinedEntity> &P);
/// Emit the reference to the section.
void emitSectionReference(const DwarfCompileUnit &CU);
@@ -543,9 +628,6 @@ public:
/// Returns whether to use inline strings.
bool useInlineStrings() const { return UseInlineStrings; }
- /// Returns whether GNU pub sections should be emitted.
- bool usePubSections() const { return UsePubSections; }
-
/// Returns whether ranges section should be emitted.
bool useRangesSection() const { return UseRangesSection; }
@@ -608,17 +690,20 @@ public:
return Ref.resolve();
}
- void addSubprogramNames(const DISubprogram *SP, DIE &Die);
+ void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
+ DIE &Die);
AddressPool &getAddressPool() { return AddrPool; }
- void addAccelName(StringRef Name, const DIE &Die);
+ void addAccelName(const DICompileUnit &CU, StringRef Name, const DIE &Die);
- void addAccelObjC(StringRef Name, const DIE &Die);
+ void addAccelObjC(const DICompileUnit &CU, StringRef Name, const DIE &Die);
- void addAccelNamespace(StringRef Name, const DIE &Die);
+ void addAccelNamespace(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die);
- void addAccelType(StringRef Name, const DIE &Die, char Flags);
+ void addAccelType(const DICompileUnit &CU, StringRef Name, const DIE &Die,
+ char Flags);
const MachineFunction *getCurrentFunction() const { return CurFn; }
@@ -640,6 +725,9 @@ public:
bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
/// @}
+
+ void addSectionLabel(const MCSymbol *Sym);
+ const MCSymbol *getSectionLabel(const MCSection *S);
};
} // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index d8d1a5e8f841..19c350afbf17 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -24,6 +24,20 @@
using namespace llvm;
+void DwarfExpression::emitConstu(uint64_t Value) {
+ if (Value < 32)
+ emitOp(dwarf::DW_OP_lit0 + Value);
+ else if (Value == std::numeric_limits<uint64_t>::max()) {
+ // Only do this for 64-bit values as the DWARF expression stack uses
+ // target-address-size values.
+ emitOp(dwarf::DW_OP_lit0);
+ emitOp(dwarf::DW_OP_not);
+ } else {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Value);
+ }
+}
+
void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
assert((LocationKind == Unknown || LocationKind == Register) &&
@@ -72,14 +86,12 @@ void DwarfExpression::addOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
}
void DwarfExpression::addShr(unsigned ShiftBy) {
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(ShiftBy);
+ emitConstu(ShiftBy);
emitOp(dwarf::DW_OP_shr);
}
void DwarfExpression::addAnd(unsigned Mask) {
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(Mask);
+ emitConstu(Mask);
emitOp(dwarf::DW_OP_and);
}
@@ -181,8 +193,7 @@ void DwarfExpression::addSignedConstant(int64_t Value) {
void DwarfExpression::addUnsignedConstant(uint64_t Value) {
assert(LocationKind == Implicit || LocationKind == Unknown);
LocationKind = Implicit;
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(Value);
+ emitConstu(Value);
}
void DwarfExpression::addUnsignedConstant(const APInt &Value) {
@@ -243,10 +254,9 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// Don't emit locations that cannot be expressed without DW_OP_stack_value.
if (DwarfVersion < 4)
- if (std::any_of(ExprCursor.begin(), ExprCursor.end(),
- [](DIExpression::ExprOperand Op) -> bool {
- return Op.getOp() == dwarf::DW_OP_stack_value;
- })) {
+ if (any_of(ExprCursor, [](DIExpression::ExprOperand Op) -> bool {
+ return Op.getOp() == dwarf::DW_OP_stack_value;
+ })) {
DwarfRegs.clear();
LocationKind = Unknown;
return false;
@@ -373,8 +383,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
break;
case dwarf::DW_OP_constu:
assert(LocationKind != Register);
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(Op->getArg(0));
+ emitConstu(Op->getArg(0));
break;
case dwarf::DW_OP_stack_value:
LocationKind = Implicit;
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 0637d952eba4..91568ba6d107 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -138,6 +138,9 @@ protected:
/// Emit a raw unsigned value.
virtual void emitUnsigned(uint64_t Value) = 0;
+ /// Emit a normalized unsigned constant.
+ void emitConstu(uint64_t Value);
+
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
@@ -187,7 +190,7 @@ protected:
/// DW_OP_stack_value. Unfortunately, DW_OP_stack_value was not available
/// until DWARF 4, so we will continue to generate DW_OP_constu <const> for
/// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const
- /// <const> actually describes a value at a constant addess, not a constant
+ /// <const> actually describes a value at a constant address, not a constant
/// value. However, in the past there was no better way to describe a
/// constant value, so the producers and consumers started to rely on
/// heuristics to disambiguate the value vs. location status of the
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 049f349b009a..78ccad481411 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -36,13 +36,20 @@ void DwarfFile::emitUnits(bool UseOffsets) {
}
void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
- DIE &Die = TheU->getUnitDie();
- MCSection *USection = TheU->getSection();
- Asm->OutStreamer->SwitchSection(USection);
+ if (TheU->getCUNode()->isDebugDirectivesOnly())
+ return;
+ MCSection *S = TheU->getSection();
+
+ if (!S)
+ return;
+
+ Asm->OutStreamer->SwitchSection(S);
TheU->emitHeader(UseOffsets);
+ Asm->emitDwarfDIE(TheU->getUnitDie());
- Asm->emitDwarfDIE(Die);
+ if (MCSymbol *EndLabel = TheU->getEndLabel())
+ Asm->OutStreamer->EmitLabel(EndLabel);
}
// Compute the size and offset for each DIE.
@@ -53,6 +60,9 @@ void DwarfFile::computeSizeAndOffsets() {
// Iterate over each compile unit and set the size and offsets for each
// DIE within each compile unit. All offsets are CU relative.
for (const auto &TheU : CUs) {
+ if (TheU->getCUNode()->isDebugDirectivesOnly())
+ continue;
+
TheU->setDebugSectionOffset(SecOffset);
SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
}
@@ -98,3 +108,15 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
}
return true;
}
+
+void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) {
+ SmallVectorImpl<DbgLabel *> &Labels = ScopeLabels[LS];
+ Labels.push_back(Label);
+}
+
+std::pair<uint32_t, RangeSpanList *>
+DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) {
+ CURangeLists.push_back(
+ RangeSpanList(Asm->createTempSymbol("debug_ranges"), CU, std::move(R)));
+ return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back());
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 8dfbc4e1c434..51acca8c1e53 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -24,12 +24,44 @@
namespace llvm {
class AsmPrinter;
+class DbgEntity;
class DbgVariable;
+class DbgLabel;
class DwarfCompileUnit;
class DwarfUnit;
class LexicalScope;
class MCSection;
+// Data structure to hold a range for range lists.
+class RangeSpan {
+public:
+ RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
+ const MCSymbol *getStart() const { return Start; }
+ const MCSymbol *getEnd() const { return End; }
+ void setEnd(const MCSymbol *E) { End = E; }
+
+private:
+ const MCSymbol *Start, *End;
+};
+
+class RangeSpanList {
+private:
+ // Index for locating within the debug_range section this particular span.
+ MCSymbol *RangeSym;
+ const DwarfCompileUnit *CU;
+ // List of ranges.
+ SmallVector<RangeSpan, 2> Ranges;
+
+public:
+ RangeSpanList(MCSymbol *Sym, const DwarfCompileUnit &CU,
+ SmallVector<RangeSpan, 2> Ranges)
+ : RangeSym(Sym), CU(&CU), Ranges(std::move(Ranges)) {}
+ MCSymbol *getSym() const { return RangeSym; }
+ const DwarfCompileUnit &getCU() const { return *CU; }
+ const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
+ void addRange(RangeSpan Range) { Ranges.push_back(Range); }
+};
+
class DwarfFile {
// Target of Dwarf emission, used for sizing of abbreviations.
AsmPrinter *Asm;
@@ -44,6 +76,10 @@ class DwarfFile {
DwarfStringPool StrPool;
+ // List of range lists for a given compile unit, separate from the ranges for
+ // the CU itself.
+ SmallVector<RangeSpanList, 1> CURangeLists;
+
/// DWARF v5: The symbol that designates the start of the contribution to
/// the string offsets table. The contribution is shared by all units.
MCSymbol *StringOffsetsStartSym = nullptr;
@@ -52,6 +88,10 @@ class DwarfFile {
/// The table is shared by all units.
MCSymbol *RnglistsTableBaseSym = nullptr;
+ /// DWARF v5: The symbol that designates the base of the locations list table.
+ /// The table is shared by all units.
+ MCSymbol *LoclistsTableBaseSym = nullptr;
+
/// The variables of a lexical scope.
struct ScopeVars {
/// We need to sort Args by ArgNo and check for duplicates. This could also
@@ -62,9 +102,13 @@ class DwarfFile {
/// Collection of DbgVariables of each lexical scope.
DenseMap<LexicalScope *, ScopeVars> ScopeVariables;
+ /// Collection of DbgLabels of each lexical scope.
+ using LabelList = SmallVector<DbgLabel *, 4>;
+ DenseMap<LexicalScope *, LabelList> ScopeLabels;
+
// Collection of abstract subprogram DIEs.
DenseMap<const MDNode *, DIE *> AbstractSPDies;
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
/// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
/// be shared across CUs, that is why we keep the map here instead
@@ -78,6 +122,14 @@ public:
return CUs;
}
+ std::pair<uint32_t, RangeSpanList *> addRange(const DwarfCompileUnit &CU,
+ SmallVector<RangeSpan, 2> R);
+
+ /// getRangeLists - Get the vector of range lists.
+ const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
+ return CURangeLists;
+ }
+
/// Compute the size and offset of a DIE given an incoming Offset.
unsigned computeSizeAndOffset(DIE &Die, unsigned Offset);
@@ -112,26 +164,33 @@ public:
DwarfStringPool &getStringPool() { return StrPool; }
MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; }
-
void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; }
MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; }
-
void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; }
+ MCSymbol *getLoclistsTableBaseSym() const { return LoclistsTableBaseSym; }
+ void setLoclistsTableBaseSym(MCSymbol *Sym) { LoclistsTableBaseSym = Sym; }
+
/// \returns false if the variable was merged with a previous one.
bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+ void addScopeLabel(LexicalScope *LS, DbgLabel *Label);
+
DenseMap<LexicalScope *, ScopeVars> &getScopeVariables() {
return ScopeVariables;
}
+ DenseMap<LexicalScope *, LabelList> &getScopeLabels() {
+ return ScopeLabels;
+ }
+
DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
return AbstractSPDies;
}
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
- return AbstractVariables;
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
+ return AbstractEntities;
}
void insertDIE(const MDNode *TypeMD, DIE *Die) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index a61fa83cfb03..02016534a774 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -24,25 +24,39 @@ DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm,
: Pool(A), Prefix(Prefix),
ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {}
-DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
- StringRef Str) {
+StringMapEntry<DwarfStringPool::EntryTy> &
+DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
auto I = Pool.insert(std::make_pair(Str, EntryTy()));
+ auto &Entry = I.first->second;
if (I.second) {
- auto &Entry = I.first->second;
- Entry.Index = Pool.size() - 1;
+ Entry.Index = EntryTy::NotIndexed;
Entry.Offset = NumBytes;
Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
NumBytes += Str.size() + 1;
assert(NumBytes > Entry.Offset && "Unexpected overflow");
}
- return EntryRef(*I.first);
+ return *I.first;
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
+ StringRef Str) {
+ auto &MapEntry = getEntryImpl(Asm, Str);
+ return EntryRef(MapEntry, false);
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm,
+ StringRef Str) {
+ auto &MapEntry = getEntryImpl(Asm, Str);
+ if (!MapEntry.getValue().isIndexed())
+ MapEntry.getValue().Index = NumIndexedStrings++;
+ return EntryRef(MapEntry, true);
}
void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
MCSection *Section,
MCSymbol *StartSym) {
- if (empty())
+ if (getNumIndexedStrings() == 0)
return;
Asm.OutStreamer->SwitchSection(Section);
unsigned EntrySize = 4;
@@ -51,7 +65,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
// table. The header consists of an entry with the contribution's
// size (not including the size of the length field), the DWARF version and
// 2 bytes of padding.
- Asm.emitInt32(size() * EntrySize + 4);
+ Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4);
Asm.emitInt16(Asm.getDwarfVersion());
Asm.emitInt16(0);
// Define the symbol that marks the start of the contribution. It is
@@ -69,12 +83,17 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
// Start the dwarf str section.
Asm.OutStreamer->SwitchSection(StrSection);
- // Get all of the string pool entries and put them in an array by their ID so
- // we can sort them.
- SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size());
+ // Get all of the string pool entries and sort them by their offset.
+ SmallVector<const StringMapEntry<EntryTy> *, 64> Entries;
+ Entries.reserve(Pool.size());
for (const auto &E : Pool)
- Entries[E.getValue().Index] = &E;
+ Entries.push_back(&E);
+
+ llvm::sort(Entries, [](const StringMapEntry<EntryTy> *A,
+ const StringMapEntry<EntryTy> *B) {
+ return A->getValue().Offset < B->getValue().Offset;
+ });
for (const auto &Entry : Entries) {
assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) &&
@@ -93,6 +112,14 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
// If we've got an offset section go ahead and emit that now as well.
if (OffsetSection) {
+ // Now only take the indexed entries and put them in an array by their ID so
+ // we can emit them in order.
+ Entries.resize(NumIndexedStrings);
+ for (const auto &Entry : Pool) {
+ if (Entry.getValue().isIndexed())
+ Entries[Entry.getValue().Index] = &Entry;
+ }
+
Asm.OutStreamer->SwitchSection(OffsetSection);
unsigned size = 4; // FIXME: DWARF64 is 8.
for (const auto &Entry : Entries)
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 6e6988ea4ad4..f484540d8d37 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -30,8 +30,11 @@ class DwarfStringPool {
StringMap<EntryTy, BumpPtrAllocator &> Pool;
StringRef Prefix;
unsigned NumBytes = 0;
+ unsigned NumIndexedStrings = 0;
bool ShouldCreateSymbols;
+ StringMapEntry<EntryTy> &getEntryImpl(AsmPrinter &Asm, StringRef Str);
+
public:
using EntryRef = DwarfStringPoolEntryRef;
@@ -48,8 +51,15 @@ public:
unsigned size() const { return Pool.size(); }
+ unsigned getNumIndexedStrings() const { return NumIndexedStrings; }
+
/// Get a reference to an entry in the string pool.
EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
+
+ /// Same as getEntry, except that you can use EntryRef::getIndex to obtain a
+ /// unique ID of this entry (e.g., for use in indexed forms like
+ /// DW_FORM_strx).
+ EntryRef getIndexedEntry(AsmPrinter &Asm, StringRef Str);
};
} // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 600f4a78fda0..80b365f1aa43 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -234,15 +234,23 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
+ if (CUNode->isDebugDirectivesOnly())
+ return;
+
if (DD->useInlineStrings()) {
Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string,
new (DIEValueAllocator)
DIEInlineString(String, DIEValueAllocator));
return;
}
- auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String);
dwarf::Form IxForm =
isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp;
+
+ auto StringPoolEntry =
+ useSegmentedStringOffsetsTable() || IxForm == dwarf::DW_FORM_GNU_str_index
+ ? DU->getStringPool().getIndexedEntry(*Asm, String)
+ : DU->getStringPool().getEntry(*Asm, String);
+
// For DWARF v5 and beyond, use the smallest strx? form possible.
if (useSegmentedStringOffsetsTable()) {
IxForm = dwarf::DW_FORM_strx1;
@@ -307,14 +315,21 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
}
void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
- if (!DD->useSplitDwarf()) {
- addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Die, dwarf::DW_FORM_udata, Sym);
- } else {
+ if (DD->getDwarfVersion() >= 5) {
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx);
+ addUInt(Die, dwarf::DW_FORM_addrx, DD->getAddressPool().getIndex(Sym));
+ return;
+ }
+
+ if (DD->useSplitDwarf()) {
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
addUInt(Die, dwarf::DW_FORM_GNU_addr_index,
DD->getAddressPool().getIndex(Sym));
+ return;
}
+
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Die, dwarf::DW_FORM_udata, Sym);
}
void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
@@ -401,6 +416,12 @@ void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
addSourceLine(Die, SP->getLine(), SP->getFile());
}
+void DwarfUnit::addSourceLine(DIE &Die, const DILabel *L) {
+ assert(L);
+
+ addSourceLine(Die, L->getLine(), L->getFile());
+}
+
void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
assert(Ty);
@@ -413,138 +434,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
addSourceLine(Die, Ty->getLine(), Ty->getFile());
}
-/* Byref variables, in Blocks, are declared by the programmer as "SomeType
- VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
- gives the variable VarName either the struct, or a pointer to the struct, as
- its type. This is necessary for various behind-the-scenes things the
- compiler needs to do with by-reference variables in Blocks.
-
- However, as far as the original *programmer* is concerned, the variable
- should still have type 'SomeType', as originally declared.
-
- The function getBlockByrefType dives into the __Block_byref_x_VarName
- struct to find the original type of the variable, which is then assigned to
- the variable's Debug Information Entry as its real type. So far, so good.
- However now the debugger will expect the variable VarName to have the type
- SomeType. So we need the location attribute for the variable to be an
- expression that explains to the debugger how to navigate through the
- pointers and struct to find the actual variable of type SomeType.
-
- The following function does just that. We start by getting
- the "normal" location for the variable. This will be the location
- of either the struct __Block_byref_x_VarName or the pointer to the
- struct __Block_byref_x_VarName.
-
- The struct will look something like:
-
- struct __Block_byref_x_VarName {
- ... <various fields>
- struct __Block_byref_x_VarName *forwarding;
- ... <various other fields>
- SomeType VarName;
- ... <maybe more fields>
- };
-
- If we are given the struct directly (as our starting point) we
- need to tell the debugger to:
-
- 1). Add the offset of the forwarding field.
-
- 2). Follow that pointer to get the real __Block_byref_x_VarName
- struct to use (the real one may have been copied onto the heap).
-
- 3). Add the offset for the field VarName, to find the actual variable.
-
- If we started with a pointer to the struct, then we need to
- dereference that pointer first, before the other steps.
- Translating this into DWARF ops, we will need to append the following
- to the current location description for the variable:
-
- DW_OP_deref -- optional, if we start with a pointer
- DW_OP_plus_uconst <forward_fld_offset>
- DW_OP_deref
- DW_OP_plus_uconst <varName_fld_offset>
-
- That is what this function does. */
-
-void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
- dwarf::Attribute Attribute,
- const MachineLocation &Location) {
- const DIType *Ty = DV.getType();
- const DIType *TmpTy = Ty;
- uint16_t Tag = Ty->getTag();
- bool isPointer = false;
-
- StringRef varName = DV.getName();
-
- if (Tag == dwarf::DW_TAG_pointer_type) {
- auto *DTy = cast<DIDerivedType>(Ty);
- TmpTy = resolve(DTy->getBaseType());
- isPointer = true;
- }
-
- // Find the __forwarding field and the variable field in the __Block_byref
- // struct.
- DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements();
- const DIDerivedType *varField = nullptr;
- const DIDerivedType *forwardingField = nullptr;
-
- for (unsigned i = 0, N = Fields.size(); i < N; ++i) {
- auto *DT = cast<DIDerivedType>(Fields[i]);
- StringRef fieldName = DT->getName();
- if (fieldName == "__forwarding")
- forwardingField = DT;
- else if (fieldName == varName)
- varField = DT;
- }
-
- // Get the offsets for the forwarding field and the variable field.
- unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3;
- unsigned varFieldOffset = varField->getOffsetInBits() >> 2;
-
- // Decode the original location, and use that as the start of the byref
- // variable's location.
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- if (Location.isIndirect())
- DwarfExpr.setMemoryLocationKind();
-
- SmallVector<uint64_t, 6> Ops;
- // If we started with a pointer to the __Block_byref... struct, then
- // the first thing we need to do is dereference the pointer (DW_OP_deref).
- if (isPointer)
- Ops.push_back(dwarf::DW_OP_deref);
-
- // Next add the offset for the '__forwarding' field:
- // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
- // adding the offset if it's 0.
- if (forwardingFieldOffset > 0) {
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(forwardingFieldOffset);
- }
-
- // Now dereference the __forwarding field to get to the real __Block_byref
- // struct: DW_OP_deref.
- Ops.push_back(dwarf::DW_OP_deref);
-
- // Now that we've got the real __Block_byref... struct, add the offset
- // for the variable's field to get to the location of the actual variable:
- // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
- if (varFieldOffset > 0) {
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(varFieldOffset);
- }
-
- DIExpressionCursor Cursor(Ops);
- const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
- if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
- return;
- DwarfExpr.addExpression(std::move(Cursor));
-
- // Now attach the location information to the DIE.
- addBlock(Die, Attribute, DwarfExpr.finalize());
-}
-
/// Return true if type encoding is unsigned.
static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
@@ -787,7 +676,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
}
unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
- DD->addAccelType(Ty->getName(), TyDIE, Flags);
+ DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);
if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
isa<DINamespace>(Context))
@@ -851,6 +740,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
uint64_t Size = BTy->getSizeInBits() >> 3;
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+
+ if (BTy->isBigEndian())
+ addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big);
+ else if (BTy->isLittleEndian())
+ addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little);
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
@@ -1155,7 +1049,7 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
addString(NDie, dwarf::DW_AT_name, NS->getName());
else
Name = "(anonymous namespace)";
- DD->addAccelNamespace(Name, NDie);
+ DD->addAccelNamespace(*CUNode, Name, NDie);
addGlobalName(Name, NDie, NS->getScope());
if (NS->getExportSymbols())
addFlag(NDie, dwarf::DW_AT_export_symbols);
@@ -1404,7 +1298,7 @@ DIE *DwarfUnit::getIndexTyDie() {
addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::DW_ATE_unsigned);
- DD->addAccelType(Name, *IndexTyDie, /*Flags*/ 0);
+ DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0);
return IndexTyDie;
}
@@ -1467,7 +1361,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (DTy) {
if (DD->getDwarfVersion() >= 3)
addType(Buffer, DTy);
- if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagFixedEnum))
+ if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass))
addFlag(Buffer, dwarf::DW_AT_enum_class);
}
@@ -1659,7 +1553,14 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
Asm->OutStreamer->AddComment("Length of Unit");
- Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
+ if (!DD->useSectionsAsReferences()) {
+ StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_";
+ MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start");
+ EndLabel = Asm->createTempSymbol(Prefix + "end");
+ Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->OutStreamer->EmitLabel(BeginLabel);
+ } else
+ Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
Asm->OutStreamer->AddComment("DWARF version number");
unsigned Version = DD->getDwarfVersion();
@@ -1761,3 +1662,12 @@ void DwarfUnit::addRnglistsBase() {
DU->getRnglistsTableBaseSym(),
TLOF.getDwarfRnglistsSection()->getBeginSymbol());
}
+
+void DwarfUnit::addLoclistsBase() {
+ assert(DD->getDwarfVersion() >= 5 &&
+ "DW_AT_loclists_base requires DWARF version 5 or later");
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ addSectionLabel(getUnitDie(), dwarf::DW_AT_loclists_base,
+ DU->getLoclistsTableBaseSym(),
+ TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 69696f626536..a59ebb7c1465 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -35,33 +35,6 @@ class ConstantFP;
class DbgVariable;
class DwarfCompileUnit;
-// Data structure to hold a range for range lists.
-class RangeSpan {
-public:
- RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
- const MCSymbol *getStart() const { return Start; }
- const MCSymbol *getEnd() const { return End; }
- void setEnd(const MCSymbol *E) { End = E; }
-
-private:
- const MCSymbol *Start, *End;
-};
-
-class RangeSpanList {
-private:
- // Index for locating within the debug_range section this particular span.
- MCSymbol *RangeSym;
- // List of ranges.
- SmallVector<RangeSpan, 2> Ranges;
-
-public:
- RangeSpanList(MCSymbol *Sym, SmallVector<RangeSpan, 2> Ranges)
- : RangeSym(Sym), Ranges(std::move(Ranges)) {}
- MCSymbol *getSym() const { return RangeSym; }
- const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
- void addRange(RangeSpan Range) { Ranges.push_back(Range); }
-};
-
//===----------------------------------------------------------------------===//
/// This dwarf writer support class manages information associated with a
/// source file.
@@ -76,6 +49,9 @@ protected:
/// Target of Dwarf emission.
AsmPrinter *Asm;
+ /// Emitted at the end of the CU and used to compute the CU Length field.
+ MCSymbol *EndLabel = nullptr;
+
// Holders for some common dwarf information.
DwarfDebug *DD;
DwarfFile *DU;
@@ -109,6 +85,7 @@ protected:
public:
// Accessors.
AsmPrinter* getAsmPrinter() const { return Asm; }
+ MCSymbol *getEndLabel() const { return EndLabel; }
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
const DICompileUnit *getCUNode() const { return CUNode; }
@@ -213,6 +190,7 @@ public:
void addSourceLine(DIE &Die, const DILocalVariable *V);
void addSourceLine(DIE &Die, const DIGlobalVariable *G);
void addSourceLine(DIE &Die, const DISubprogram *SP);
+ void addSourceLine(DIE &Die, const DILabel *L);
void addSourceLine(DIE &Die, const DIType *Ty);
void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
@@ -298,6 +276,9 @@ public:
/// Add the DW_AT_rnglists_base attribute to the unit DIE.
void addRnglistsBase();
+ /// Add the DW_AT_loclists_base attribute to the unit DIE.
+ void addLoclistsBase();
+
virtual DwarfCompileUnit &getCU() = 0;
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 65de9d7e65a4..7599121de2b0 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -99,7 +99,7 @@ void EHStreamer::computeActionsTable(
FirstActions.reserve(LandingPads.size());
int FirstAction = 0;
- unsigned SizeActions = 0;
+ unsigned SizeActions = 0; // Total size of all action entries for a function
const LandingPadInfo *PrevLPI = nullptr;
for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
@@ -107,23 +107,24 @@ void EHStreamer::computeActionsTable(
const LandingPadInfo *LPI = *I;
const std::vector<int> &TypeIds = LPI->TypeIds;
unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0;
- unsigned SizeSiteActions = 0;
+ unsigned SizeSiteActions = 0; // Total size of all entries for a landingpad
if (NumShared < TypeIds.size()) {
- unsigned SizeAction = 0;
+ // Size of one action entry (typeid + next action)
+ unsigned SizeActionEntry = 0;
unsigned PrevAction = (unsigned)-1;
if (NumShared) {
unsigned SizePrevIds = PrevLPI->TypeIds.size();
assert(Actions.size());
PrevAction = Actions.size() - 1;
- SizeAction = getSLEB128Size(Actions[PrevAction].NextAction) +
- getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeActionEntry = getSLEB128Size(Actions[PrevAction].NextAction) +
+ getSLEB128Size(Actions[PrevAction].ValueForTypeID);
for (unsigned j = NumShared; j != SizePrevIds; ++j) {
assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
- SizeAction -= getSLEB128Size(Actions[PrevAction].ValueForTypeID);
- SizeAction += -Actions[PrevAction].NextAction;
+ SizeActionEntry -= getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeActionEntry += -Actions[PrevAction].NextAction;
PrevAction = Actions[PrevAction].Previous;
}
}
@@ -136,9 +137,9 @@ void EHStreamer::computeActionsTable(
isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
- int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
- SizeAction = SizeTypeID + getSLEB128Size(NextAction);
- SizeSiteActions += SizeAction;
+ int NextAction = SizeActionEntry ? -(SizeActionEntry + SizeTypeID) : 0;
+ SizeActionEntry = SizeTypeID + getSLEB128Size(NextAction);
+ SizeSiteActions += SizeActionEntry;
ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
Actions.push_back(Action);
@@ -146,7 +147,7 @@ void EHStreamer::computeActionsTable(
}
// Record the first action of the landing pad site.
- FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ FirstAction = SizeActions + SizeSiteActions - SizeActionEntry + 1;
} // else identical - re-use previous FirstAction
// Information used when creating the call-site table. The action record
@@ -344,7 +345,9 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
/// unwound and handling continues.
/// 3. Type ID table contains references to all the C++ typeinfo for all
/// catches in the function. This tables is reverse indexed base 1.
-void EHStreamer::emitExceptionTable() {
+///
+/// Returns the starting symbol of an exception table.
+MCSymbol *EHStreamer::emitExceptionTable() {
const MachineFunction *MF = Asm->MF;
const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
const std::vector<unsigned> &FilterIds = MF->getFilterIds();
@@ -359,9 +362,9 @@ void EHStreamer::emitExceptionTable() {
LandingPads.push_back(&PadInfos[i]);
// Order landing pads lexicographically by type id.
- llvm::sort(LandingPads.begin(), LandingPads.end(),
- [](const LandingPadInfo *L,
- const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; });
+ llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) {
+ return L->TypeIds < R->TypeIds;
+ });
// Compute the actions table and gather the first action index for each
// landing pad site.
@@ -374,6 +377,7 @@ void EHStreamer::emitExceptionTable() {
computeCallSiteTable(CallSites, LandingPads, FirstActions);
bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+ bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
unsigned CallSiteEncoding =
IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128;
bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
@@ -456,8 +460,8 @@ void EHStreamer::emitExceptionTable() {
Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
Asm->OutStreamer->EmitLabel(CstBeginLabel);
- // SjLj Exception handling
- if (IsSJLJ) {
+ // SjLj / Wasm Exception handling
+ if (IsSJLJ || IsWasm) {
unsigned idx = 0;
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
@@ -603,6 +607,7 @@ void EHStreamer::emitExceptionTable() {
}
Asm->EmitAlignment(2);
+ return GCCETSym;
}
void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index b89421a1e067..ce912d032c6d 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -14,8 +14,8 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
-#include "AsmPrinterHandler.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
@@ -85,9 +85,10 @@ protected:
/// zero for the landing pad and the action. Calls marked 'nounwind' have
/// no entry and must not be contained in the try-range of any entry - they
/// form gaps in the table. Entries must be ordered by try-range address.
- void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
- const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
- const SmallVectorImpl<unsigned> &FirstActions);
+ virtual void computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions);
/// Emit landing pads and actions.
///
@@ -108,7 +109,9 @@ protected:
/// found the frame is unwound and handling continues.
/// 3. Type id table contains references to all the C++ typeinfo for all
/// catches in the function. This tables is reversed indexed base 1.
- void emitExceptionTable();
+ ///
+ /// Returns the starting symbol of an exception table.
+ MCSymbol *emitExceptionTable();
virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel);
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 49cc376fcc98..34677ecc9e69 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -15,10 +15,10 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 59a57ed30d10..3479a00def23 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -15,9 +15,9 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Mangler.h"
diff --git a/lib/CodeGen/AsmPrinter/WasmException.cpp b/lib/CodeGen/AsmPrinter/WasmException.cpp
new file mode 100644
index 000000000000..527e5ae50146
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -0,0 +1,97 @@
+//===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing WebAssembly exception info into asm
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WasmException.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+using namespace llvm;
+
+void WasmException::endModule() {
+ // This is the symbol used in 'throw' and 'if_except' instruction to denote
+ // this is a C++ exception. This symbol has to be emitted somewhere once in
+ // the module. Check if the symbol has already been created, i.e., we have at
+ // least one 'throw' or 'if_except' instruction in the module, and emit the
+ // symbol only if so.
+ SmallString<60> NameStr;
+ Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
+ if (Asm->OutContext.lookupSymbol(NameStr)) {
+ MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception");
+ Asm->OutStreamer->EmitLabel(ExceptionSym);
+ }
+}
+
+void WasmException::markFunctionEnd() {
+ // Get rid of any dead landing pads.
+ if (!Asm->MF->getLandingPads().empty()) {
+ auto *NonConstMF = const_cast<MachineFunction *>(Asm->MF);
+ // Wasm does not set BeginLabel and EndLabel information for landing pads,
+ // so we should set the second argument false.
+ NonConstMF->tidyLandingPads(nullptr, /* TidyIfNoBeginLabels */ false);
+ }
+}
+
+void WasmException::endFunction(const MachineFunction *MF) {
+ bool ShouldEmitExceptionTable = false;
+ for (const LandingPadInfo &Info : MF->getLandingPads()) {
+ if (MF->hasWasmLandingPadIndex(Info.LandingPadBlock)) {
+ ShouldEmitExceptionTable = true;
+ break;
+ }
+ }
+ if (!ShouldEmitExceptionTable)
+ return;
+ MCSymbol *LSDALabel = emitExceptionTable();
+ assert(LSDALabel && ".GCC_exception_table has not been emitted!");
+
+ // Wasm requires every data section symbol to have a .size set. So we emit an
+ // end marker and set the size as the difference between the start end the end
+ // marker.
+ MCSymbol *LSDAEndLabel = Asm->createTempSymbol("GCC_except_table_end");
+ Asm->OutStreamer->EmitLabel(LSDAEndLabel);
+ MCContext &OutContext = Asm->OutStreamer->getContext();
+ const MCExpr *SizeExp = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(LSDAEndLabel, OutContext),
+ MCSymbolRefExpr::create(LSDALabel, OutContext), OutContext);
+ Asm->OutStreamer->emitELFSize(LSDALabel, SizeExp);
+}
+
+// Compute the call-site table for wasm EH. Even though we use the same function
+// name to share the common routines, a call site entry in the table corresponds
+// to not a call site for possibly-throwing functions but a landing pad. In wasm
+// EH the VM is responsible for stack unwinding. After an exception occurs and
+// the stack is unwound, the control flow is transferred to wasm 'catch'
+// instruction by the VM, after which the personality function is called from
+// the compiler-generated code. Refer to WasmEHPrepare pass for more
+// information.
+void WasmException::computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ MachineFunction &MF = *Asm->MF;
+ for (unsigned I = 0, N = LandingPads.size(); I < N; ++I) {
+ const LandingPadInfo *Info = LandingPads[I];
+ MachineBasicBlock *LPad = Info->LandingPadBlock;
+ // We don't emit LSDA for single catch (...).
+ if (!MF.hasWasmLandingPadIndex(LPad))
+ continue;
+ // Wasm EH must maintain the EH pads in the order assigned to them by the
+ // WasmEHPrepare pass.
+ unsigned LPadIndex = MF.getWasmLandingPadIndex(LPad);
+ CallSiteEntry Site = {nullptr, nullptr, Info, FirstActions[I]};
+ if (CallSites.size() < LPadIndex + 1)
+ CallSites.resize(LPadIndex + 1);
+ CallSites[LPadIndex] = Site;
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/WasmException.h b/lib/CodeGen/AsmPrinter/WasmException.h
new file mode 100644
index 000000000000..cbdb42457cf8
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/WasmException.h
@@ -0,0 +1,42 @@
+//===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing WebAssembly exception info into asm
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
+
+#include "EHStreamer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+
+namespace llvm {
+
+class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer {
+public:
+ WasmException(AsmPrinter *A) : EHStreamer(A) {}
+
+ void endModule() override;
+ void beginFunction(const MachineFunction *MF) override {}
+ virtual void markFunctionEnd() override;
+ void endFunction(const MachineFunction *MF) override;
+
+protected:
+ // Compute the call site table for wasm EH.
+ void computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) override;
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.h b/lib/CodeGen/AsmPrinter/WinCFGuard.h
index 124e8f04bfad..28f119e35966 100644
--- a/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
-#include "AsmPrinterHandler.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index eff73a58d8d2..cf8e8c69bc2a 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -42,6 +42,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
// MSVC's EH tables are always composed of 32-bit words. All known 64-bit
// platforms use an imagerel32 relocation to refer to symbols.
useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);
+ isAArch64 = Asm->TM.getTargetTriple().isAArch64();
}
WinException::~WinException() {}
@@ -242,6 +243,17 @@ void WinException::endFunclet() {
if (F.hasPersonalityFn())
Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
+ // On funclet exit, we emit a fake "function" end marker, so that the call
+ // to EmitWinEHHandlerData below can calculate the size of the funclet or
+ // function.
+ if (isAArch64) {
+ Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+ MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
+ Asm->OutStreamer->getCurrentSectionOnly());
+ Asm->OutStreamer->SwitchSection(XData);
+ }
+
// Emit an UNWIND_INFO struct describing the prologue.
Asm->OutStreamer->EmitWinEHHandlerData();
@@ -286,7 +298,10 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
return create32bitRef(Asm->getSymbol(GV));
}
-const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+const MCExpr *WinException::getLabel(const MCSymbol *Label) {
+ if (isAArch64)
+ return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
+ Asm->OutContext);
return MCBinaryExpr::createAdd(create32bitRef(Label),
MCConstantExpr::create(1, Asm->OutContext),
Asm->OutContext);
@@ -531,7 +546,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
};
// Emit a label assignment with the SEH frame offset so we can use it for
- // llvm.x86.seh.recoverfp.
+ // llvm.eh.recoverfp.
StringRef FLinkageName =
GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
MCSymbol *ParentFrameOffset =
@@ -588,7 +603,6 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
const MCSymbol *EndLabel, int State) {
auto &OS = *Asm->OutStreamer;
MCContext &Ctx = Asm->OutContext;
-
bool VerboseAsm = OS.isVerboseAsm();
auto AddComment = [&](const Twine &Comment) {
if (VerboseAsm)
@@ -613,9 +627,9 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
}
AddComment("LabelStart");
- OS.EmitValue(getLabelPlusOne(BeginLabel), 4);
+ OS.EmitValue(getLabel(BeginLabel), 4);
AddComment("LabelEnd");
- OS.EmitValue(getLabelPlusOne(EndLabel), 4);
+ OS.EmitValue(getLabel(EndLabel), 4);
AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
: "CatchAll");
OS.EmitValue(FilterOrFinally, 4);
@@ -799,7 +813,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// TypeDescriptor *Type;
// int32_t CatchObjOffset;
// void (*Handler)();
- // int32_t ParentFrameOffset; // x64 only
+ // int32_t ParentFrameOffset; // x64 and AArch64 only
// };
OS.EmitLabel(HandlerMapXData);
for (const WinEHHandlerType &HT : TBME.HandlerArray) {
@@ -901,7 +915,7 @@ void WinException::computeIP2StateTable(
ChangeLabel = StateChange.PreviousEndLabel;
// Emit an entry indicating that PCs after 'Label' have this EH state.
IPToStateTable.push_back(
- std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState));
+ std::make_pair(getLabel(ChangeLabel), StateChange.NewState));
// FIXME: assert that NewState is between CatchLow and CatchHigh.
}
}
diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h
index eed3c4453ffc..37c796f89765 100644
--- a/lib/CodeGen/AsmPrinter/WinException.h
+++ b/lib/CodeGen/AsmPrinter/WinException.h
@@ -38,6 +38,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// True if this is a 64-bit target and we should use image relative offsets.
bool useImageRel32 = false;
+ /// True if we are generating exception handling on Windows for ARM64.
+ bool isAArch64 = false;
+
/// Pointer to the current funclet entry BB.
const MachineBasicBlock *CurrentFuncletEntry = nullptr;
@@ -65,14 +68,14 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable);
- /// Emits the label used with llvm.x86.seh.recoverfp, which is used by
+ /// Emits the label used with llvm.eh.recoverfp, which is used by
/// outlined funclets.
void emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
StringRef FLinkageName);
const MCExpr *create32bitRef(const MCSymbol *Value);
const MCExpr *create32bitRef(const GlobalValue *GV);
- const MCExpr *getLabelPlusOne(const MCSymbol *Label);
+ const MCExpr *getLabel(const MCSymbol *Label);
const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
const MCSymbol *OffsetFrom);
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index e28fc6fb9d4f..95581c09dd1c 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -88,7 +88,10 @@ namespace {
void expandPartwordAtomicRMW(
AtomicRMWInst *I,
TargetLoweringBase::AtomicExpansionKind ExpansionKind);
+ AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+ void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
+ void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
static Value *insertRMWCmpXchgLoop(
@@ -96,6 +99,7 @@ namespace {
AtomicOrdering MemOpOrder,
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
CreateCmpXchgInstFun CreateCmpXchg);
+ bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool isIdempotentRMW(AtomicRMWInst *RMWI);
@@ -258,7 +262,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
isAcquireOrStronger(RMWI->getOrdering()))) {
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(AtomicOrdering::Monotonic);
- } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
+ } else if (CASI &&
+ TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
+ TargetLoweringBase::AtomicExpansionKind::None &&
(isReleaseOrStronger(CASI->getSuccessOrdering()) ||
isAcquireOrStronger(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
@@ -306,6 +312,16 @@ bool AtomicExpand::runOnFunction(Function &F) {
if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
MadeChange = true;
} else {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(RMWI);
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ if (ValueSize < MinCASSize &&
+ (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+ Op == AtomicRMWInst::And)) {
+ RMWI = widenPartwordAtomicRMW(RMWI);
+ MadeChange = true;
+ }
+
MadeChange |= tryExpandAtomicRMW(RMWI);
}
} else if (CASI) {
@@ -322,16 +338,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange = true;
}
- unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
- unsigned ValueSize = getAtomicOpSize(CASI);
- if (ValueSize < MinCASSize) {
- assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
- "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
- expandPartwordCmpXchg(CASI);
- } else {
- if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
- MadeChange |= expandAtomicCmpXchg(CASI);
- }
+ MadeChange |= tryExpandAtomicCmpXchg(CASI);
}
}
return MadeChange;
@@ -400,8 +407,9 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
return expandAtomicLoadToLL(LI);
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicLoadToCmpXchg(LI);
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
- llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
@@ -563,6 +571,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
}
return true;
}
+ case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
+ expandAtomicRMWToMaskedIntrinsic(AI);
+ return true;
+ }
default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
@@ -651,6 +663,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
IRBuilder<> &Builder, Value *Loaded,
Value *Shifted_Inc, Value *Inc,
const PartwordMaskValues &PMV) {
+ // TODO: update to use
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
+ // to merge bits from two values without requiring PMV.Inv_Mask.
switch (Op) {
case AtomicRMWInst::Xchg: {
Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
@@ -659,12 +674,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
}
case AtomicRMWInst::Or:
case AtomicRMWInst::Xor:
- // Or/Xor won't affect any other bits, so can just be done
- // directly.
- return performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+ case AtomicRMWInst::And:
+ llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
- case AtomicRMWInst::And:
case AtomicRMWInst::Nand: {
// The other arithmetic ops need to be masked into place.
Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
@@ -733,6 +746,41 @@ void AtomicExpand::expandPartwordAtomicRMW(
AI->eraseFromParent();
}
+// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
+AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
+ IRBuilder<> Builder(AI);
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+
+ assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+ Op == AtomicRMWInst::And) &&
+ "Unable to widen operation");
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ TLI->getMinCmpXchgSizeInBits() / 8);
+
+ Value *ValOperand_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+
+ Value *NewOperand;
+
+ if (Op == AtomicRMWInst::And)
+ NewOperand =
+ Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
+ else
+ NewOperand = ValOperand_Shifted;
+
+ AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
+ NewOperand, AI->getOrdering());
+
+ Value *FinalOldResult = Builder.CreateTrunc(
+ Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+ return NewAI;
+}
+
void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
// The basic idea here is that we're expanding a cmpxchg of a
// smaller memory size up to a word-sized cmpxchg. To do this, we
@@ -870,6 +918,62 @@ void AtomicExpand::expandAtomicOpToLLSC(
I->eraseFromParent();
}
+void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
+ IRBuilder<> Builder(AI);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ TLI->getMinCmpXchgSizeInBits() / 8);
+
+ // The value operand must be sign-extended for signed min/max so that the
+ // target's signed comparison instructions can be used. Otherwise, just
+ // zero-ext.
+ Instruction::CastOps CastOp = Instruction::ZExt;
+ AtomicRMWInst::BinOp RMWOp = AI->getOperation();
+ if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
+ CastOp = Instruction::SExt;
+
+ Value *ValOperand_Shifted = Builder.CreateShl(
+ Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+ Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
+ Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
+ AI->getOrdering());
+ Value *FinalOldResult = Builder.CreateTrunc(
+ Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+}
+
+void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
+ IRBuilder<> Builder(CI);
+
+ PartwordMaskValues PMV = createMaskInstrs(
+ Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
+ TLI->getMinCmpXchgSizeInBits() / 8);
+
+ Value *CmpVal_Shifted = Builder.CreateShl(
+ Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
+ "CmpVal_Shifted");
+ Value *NewVal_Shifted = Builder.CreateShl(
+ Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
+ "NewVal_Shifted");
+ Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
+ Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
+ CI->getSuccessOrdering());
+ Value *FinalOldVal = Builder.CreateTrunc(
+ Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
+
+ Value *Res = UndefValue::get(CI->getType());
+ Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
+ Value *Success = Builder.CreateICmpEQ(
+ CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+}
+
Value *AtomicExpand::insertRMWLLSCLoop(
IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
AtomicOrdering MemOpOrder,
@@ -1275,6 +1379,28 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
return NewLoaded;
}
+bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(CI);
+
+ switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ if (ValueSize < MinCASSize)
+ expandPartwordCmpXchg(CI);
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC: {
+ assert(ValueSize >= MinCASSize &&
+ "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
+ return expandAtomicCmpXchg(CI);
+ }
+ case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
+ expandAtomicCmpXchgToMaskedIntrinsic(CI);
+ return true;
+ }
+}
+
// Note: This function is exposed externally by AtomicExpandUtils.h
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg) {
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index c7a0c6457164..efbfd5f4ab2c 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -298,7 +298,7 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
/// Whether MI should be counted as an instruction when calculating common tail.
static bool countsAsInstruction(const MachineInstr &MI) {
- return !(MI.isDebugValue() || MI.isCFIInstruction());
+ return !(MI.isDebugInstr() || MI.isCFIInstruction());
}
/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
@@ -865,7 +865,7 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
// Merge MMOs from memory operations in the common block.
if (MBBICommon->mayLoad() || MBBICommon->mayStore())
- MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
+ MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI});
// Drop undef flags if they aren't present in all merged instructions.
for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
MachineOperand &MO = MBBICommon->getOperand(I);
@@ -1363,9 +1363,9 @@ static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII,
MachineBasicBlock &PredMBB) {
auto InsertBefore = PredMBB.getFirstTerminator();
for (MachineInstr &MI : MBB.instrs())
- if (MI.isDebugValue()) {
+ if (MI.isDebugInstr()) {
TII->duplicate(PredMBB, InsertBefore, MI);
- LLVM_DEBUG(dbgs() << "Copied debug value from empty block to pred: "
+ LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to pred: "
<< MI);
}
}
@@ -1375,9 +1375,9 @@ static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII,
MachineBasicBlock &SuccMBB) {
auto InsertBefore = SuccMBB.SkipPHIsAndLabels(SuccMBB.begin());
for (MachineInstr &MI : MBB.instrs())
- if (MI.isDebugValue()) {
+ if (MI.isDebugInstr()) {
TII->duplicate(SuccMBB, InsertBefore, MI);
- LLVM_DEBUG(dbgs() << "Copied debug value from empty block to succ: "
+ LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to succ: "
<< MI);
}
}
diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp
index 7f098cb71657..210699cbf239 100644
--- a/lib/CodeGen/BreakFalseDeps.cpp
+++ b/lib/CodeGen/BreakFalseDeps.cpp
@@ -162,7 +162,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
}
bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
+ unsigned Pref) {
unsigned reg = MI->getOperand(OpIdx).getReg();
unsigned Clearance = RDA->getClearance(MI, reg);
LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp
index 3a9b20aa661d..93939e573b7b 100644
--- a/lib/CodeGen/BuiltinGCs.cpp
+++ b/lib/CodeGen/BuiltinGCs.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/BuiltinGCs.h"
#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/Casting.h"
@@ -28,10 +28,8 @@ namespace {
class ErlangGC : public GCStrategy {
public:
ErlangGC() {
- InitRoots = false;
- NeededSafePoints = 1 << GC::PostCall;
+ NeededSafePoints = true;
UsesMetadata = true;
- CustomRoots = false;
}
};
@@ -41,7 +39,7 @@ public:
class OcamlGC : public GCStrategy {
public:
OcamlGC() {
- NeededSafePoints = 1 << GC::PostCall;
+ NeededSafePoints = true;
UsesMetadata = true;
}
};
@@ -56,10 +54,7 @@ public:
/// while introducing only minor runtime overhead.
class ShadowStackGC : public GCStrategy {
public:
- ShadowStackGC() {
- InitRoots = true;
- CustomRoots = true;
- }
+ ShadowStackGC() {}
};
/// A GCStrategy which serves as an example for the usage of a statepoint based
@@ -74,10 +69,8 @@ public:
UseStatepoints = true;
// These options are all gc.root specific, we specify them so that the
// gc.root lowering code doesn't run.
- InitRoots = false;
- NeededSafePoints = 0;
+ NeededSafePoints = false;
UsesMetadata = false;
- CustomRoots = false;
}
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
@@ -108,10 +101,8 @@ public:
UseStatepoints = true;
// These options are all gc.root specific, we specify them so that the
// gc.root lowering code doesn't run.
- InitRoots = false;
- NeededSafePoints = 0;
+ NeededSafePoints = false;
UsesMetadata = false;
- CustomRoots = false;
}
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
@@ -136,9 +127,5 @@ static GCRegistry::Add<StatepointGC> D("statepoint-example",
"an example strategy for statepoint");
static GCRegistry::Add<CoreCLRGC> E("coreclr", "CoreCLR-compatible GC");
-// Provide hooks to ensure the containing library is fully loaded.
-void llvm::linkErlangGC() {}
-void llvm::linkOcamlGC() {}
-void llvm::linkShadowStackGC() {}
-void llvm::linkStatepointExampleGC() {}
-void llvm::linkCoreCLRGC() {}
+// Provide hook to ensure the containing library is fully loaded.
+void llvm::linkAllBuiltinGCs() {}
diff --git a/lib/CodeGen/CFIInstrInserter.cpp b/lib/CodeGen/CFIInstrInserter.cpp
index 00ebf63fc174..c4799855a2b3 100644
--- a/lib/CodeGen/CFIInstrInserter.cpp
+++ b/lib/CodeGen/CFIInstrInserter.cpp
@@ -207,6 +207,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
case MCCFIInstruction::OpUndefined:
case MCCFIInstruction::OpRegister:
case MCCFIInstruction::OpWindowSave:
+ case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpGnuArgsSize:
break;
}
@@ -317,6 +318,10 @@ unsigned CFIInstrInserter::verify(MachineFunction &MF) {
// outgoing offset and register values of CurrMBB
if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset ||
SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) {
+ // Inconsistent offsets/registers are ok for 'noreturn' blocks because
+ // we don't generate epilogues inside such blocks.
+ if (SuccMBBInfo.MBB->succ_empty() && !SuccMBBInfo.MBB->isReturnBlock())
+ continue;
report(CurrMBBInfo, SuccMBBInfo);
ErrorNum++;
}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index fbdc511eea7f..e76f9f8ed4e7 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -39,6 +39,7 @@ add_llvm_library(LLVMCodeGen
InlineSpiller.cpp
InterferenceCache.cpp
InterleavedAccessPass.cpp
+ InterleavedLoadCombinePass.cpp
IntrinsicLowering.cpp
LatencyPriorityQueue.cpp
LazyMachineBlockFrequencyInfo.cpp
@@ -83,7 +84,6 @@ add_llvm_library(LLVMCodeGen
MachineOperand.cpp
MachineOptimizationRemarkEmitter.cpp
MachineOutliner.cpp
- MachinePassRegistry.cpp
MachinePipeliner.cpp
MachinePostDominators.cpp
MachineRegionInfo.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 57541182cab2..02347b9f0b5c 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -70,15 +70,6 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
return sub == hsub ? hreg : 0;
const TargetRegisterClass *rc = mri.getRegClass(reg);
- if (!tri.enableMultipleCopyHints()) {
- // Only allow physreg hints in rc.
- if (sub == 0)
- return rc->contains(hreg) ? hreg : 0;
-
- // reg:sub should match the physreg hreg.
- return tri.getMatchingSuperReg(hreg, sub, rc);
- }
-
unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
if (rc->contains(CopiedPReg))
return CopiedPReg;
@@ -199,31 +190,19 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
unsigned Reg;
float Weight;
bool IsPhys;
- unsigned HintOrder;
- CopyHint(unsigned R, float W, bool P, unsigned HR) :
- Reg(R), Weight(W), IsPhys(P), HintOrder(HR) {}
+ CopyHint(unsigned R, float W, bool P) :
+ Reg(R), Weight(W), IsPhys(P) {}
bool operator<(const CopyHint &rhs) const {
// Always prefer any physreg hint.
if (IsPhys != rhs.IsPhys)
return (IsPhys && !rhs.IsPhys);
if (Weight != rhs.Weight)
return (Weight > rhs.Weight);
-
- // This is just a temporary way to achive NFC for targets that don't
- // enable multiple copy hints. HintOrder should be removed when all
- // targets return true in enableMultipleCopyHints().
- return (HintOrder < rhs.HintOrder);
-
-#if 0 // Should replace the HintOrder check, see above.
- // (just for the purpose of maintaining the set)
- return Reg < rhs.Reg;
-#endif
+ return Reg < rhs.Reg; // Tie-breaker.
}
};
std::set<CopyHint> CopyHints;
- // Temporary: see comment for HintOrder above.
- unsigned CopyHintOrder = 0;
for (MachineRegisterInfo::reg_instr_iterator
I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
I != E; ) {
@@ -263,8 +242,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
}
// Get allocation hints from copies.
- if (!mi->isCopy() ||
- (TargetHint.first != 0 && !tri.enableMultipleCopyHints()))
+ if (!mi->isCopy())
continue;
unsigned hint = copyHint(mi, li.reg, tri, mri);
if (!hint)
@@ -275,8 +253,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
// FIXME: we probably shouldn't use floats at all.
volatile float hweight = Hint[hint] += weight;
if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint))
- CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint),
- (tri.enableMultipleCopyHints() ? hint : CopyHintOrder++)));
+ CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint)));
}
Hint.clear();
@@ -287,13 +264,13 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
if (TargetHint.first == 0 && TargetHint.second)
mri.clearSimpleHint(li.reg);
+ std::set<unsigned> HintedRegs;
for (auto &Hint : CopyHints) {
- if (TargetHint.first != 0 && Hint.Reg == TargetHint.second)
- // Don't add again the target-type hint.
+ if (!HintedRegs.insert(Hint.Reg).second ||
+ (TargetHint.first != 0 && Hint.Reg == TargetHint.second))
+ // Don't add the same reg twice or the target-type hint again.
continue;
mri.addRegAllocationHint(li.reg, Hint.Reg);
- if (!tri.enableMultipleCopyHints())
- break;
}
// Weakly boost the spill weight of hinted registers.
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 2f845354c570..66166482c78b 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -42,6 +42,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandPassPass(Registry);
+ initializeInterleavedLoadCombinePass(Registry);
initializeInterleavedAccessPass(Registry);
initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c41beb094604..c35f8666fa3c 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -223,8 +223,17 @@ static cl::opt<bool>
namespace {
+enum ExtType {
+ ZeroExtension, // Zero extension has been seen.
+ SignExtension, // Sign extension has been seen.
+ BothExtension // This extension type is used if we saw sext after
+ // ZeroExtension had been set, or if we saw zext after
+ // SignExtension had been set. It makes the type
+ // information of a promoted instruction invalid.
+};
+
using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
-using TypeIsSExt = PointerIntPair<Type *, 1, bool>;
+using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
using SExts = SmallVector<Instruction *, 16>;
using ValueToSExts = DenseMap<Value *, SExts>;
@@ -269,7 +278,7 @@ class TypePromotionTransaction;
/// Keep track of GEPs accessing the same data structures such as structs or
/// arrays that are candidates to be split later because of their large
/// size.
- DenseMap<
+ MapVector<
AssertingVH<Value>,
SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
LargeOffsetGEPMap;
@@ -312,6 +321,24 @@ class TypePromotionTransaction;
}
private:
+ template <typename F>
+ void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
+ // Substituting can cause recursive simplifications, which can invalidate
+ // our iterator. Use a WeakTrackingVH to hold onto it in case this
+ // happens.
+ Value *CurValue = &*CurInstIterator;
+ WeakTrackingVH IterHandle(CurValue);
+
+ f();
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurValue) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
+ }
+
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -389,7 +416,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
OptSize = F.optForSize();
ProfileSummaryInfo *PSI =
- getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
if (PSI->isFunctionHotInCallGraph(&F, *BFI))
F.setSectionPrefix(".hot");
@@ -417,11 +444,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
- // llvm.dbg.value is far away from the value then iSel may not be able
- // handle it properly. iSel will drop llvm.dbg.value if it can not
- // find a node corresponding to the value.
- EverMadeChange |= placeDbgValues(F);
-
if (!DisableBranchOpts)
EverMadeChange |= splitBranchCondition(F);
@@ -432,11 +454,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
- SeenChainsForSExt.clear();
- ValToSExtendedUses.clear();
- RemovedInsts.clear();
- LargeOffsetGEPMap.clear();
- LargeOffsetGEPID.clear();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -456,6 +473,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
I->deleteValue();
EverMadeChange |= MadeChange;
+ SeenChainsForSExt.clear();
+ ValToSExtendedUses.clear();
+ RemovedInsts.clear();
+ LargeOffsetGEPMap.clear();
+ LargeOffsetGEPID.clear();
}
SunkAddrs.clear();
@@ -509,6 +531,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
EverMadeChange |= simplifyOffsetableRelocate(*I);
}
+ // Do this last to clean up use-before-def scenarios introduced by other
+ // preparatory transforms.
+ EverMadeChange |= placeDbgValues(F);
+
return EverMadeChange;
}
@@ -642,7 +668,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
isa<IndirectBrInst>(Pred->getTerminator())))
return true;
- if (BB->getTerminator() != BB->getFirstNonPHI())
+ if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
return true;
// We use a simple cost heuristic which determine skipping merging is
@@ -1156,11 +1182,15 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+ DebugLoc Loc = CI->getDebugLoc();
auto *UAddWithOverflow =
CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+ UAddWithOverflow->setDebugLoc(Loc);
auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+ UAdd->setDebugLoc(Loc);
auto *Overflow =
ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+ Overflow->setDebugLoc(Loc);
CI->replaceAllUsesWith(Overflow);
AddI->replaceAllUsesWith(UAdd);
@@ -1393,6 +1423,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
else
InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
"", &*InsertPt);
+ InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
// Sink the trunc
BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
@@ -1401,6 +1432,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
TruncI->getType(), "", &*TruncInsertPt);
+ InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
MadeChange = true;
@@ -1492,6 +1524,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
else
InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
"", &*InsertPt);
+ InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
MadeChange = true;
}
@@ -1501,8 +1534,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
}
// If we removed all uses, nuke the shift.
- if (ShiftI->use_empty())
+ if (ShiftI->use_empty()) {
+ salvageDebugInfo(*ShiftI);
ShiftI->eraseFromParent();
+ }
return MadeChange;
}
@@ -1673,21 +1708,18 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// Lower all uses of llvm.objectsize.*
ConstantInt *RetVal =
lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
- // Substituting this can cause recursive simplifications, which can
- // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case
- // this
- // happens.
- Value *CurValue = &*CurInstIterator;
- WeakTrackingVH IterHandle(CurValue);
-
- replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- if (IterHandle != CurValue) {
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
- }
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+ });
+ return true;
+ }
+ case Intrinsic::is_constant: {
+ // If is_constant hasn't folded away yet, lower it to false now.
+ Constant *RetVal = ConstantInt::get(II->getType(), 0);
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+ });
return true;
}
case Intrinsic::aarch64_stlxr:
@@ -1704,11 +1736,22 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
return true;
}
case Intrinsic::launder_invariant_group:
- case Intrinsic::strip_invariant_group:
- II->replaceAllUsesWith(II->getArgOperand(0));
+ case Intrinsic::strip_invariant_group: {
+ Value *ArgVal = II->getArgOperand(0);
+ auto it = LargeOffsetGEPMap.find(II);
+ if (it != LargeOffsetGEPMap.end()) {
+ // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
+ // Make sure not to have to deal with iterator invalidation
+ // after possibly adding ArgVal to LargeOffsetGEPMap.
+ auto GEPs = std::move(it->second);
+ LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
+ LargeOffsetGEPMap.erase(II);
+ }
+
+ II->replaceAllUsesWith(ArgVal);
II->eraseFromParent();
return true;
-
+ }
case Intrinsic::cttz:
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
@@ -1854,15 +1897,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
CallInst *CI = TailCalls[i];
CallSite CS(CI);
- // Conservatively require the attributes of the call to match those of the
- // return. Ignore noalias because it doesn't affect the call sequence.
- AttributeList CalleeAttrs = CS.getAttributes();
- if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
- .removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
- .removeAttribute(Attribute::NoAlias))
- continue;
-
// Make sure the call instruction is followed by an unconditional branch to
// the return block.
BasicBlock *CallBB = CI->getParent();
@@ -2328,6 +2362,8 @@ class TypePromotionTransaction {
/// Keep track of the original uses (pair Instruction, Index).
SmallVector<InstructionAndIdx, 4> OriginalUses;
+ /// Keep track of the debug users.
+ SmallVector<DbgValueInst *, 1> DbgValues;
using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
@@ -2341,6 +2377,10 @@ class TypePromotionTransaction {
Instruction *UserI = cast<Instruction>(U.getUser());
OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
}
+ // Record the debug uses separately. They are not in the instruction's
+ // use list, but they are replaced by RAUW.
+ findDbgValues(DbgValues, Inst);
+
// Now, we can replace the uses.
Inst->replaceAllUsesWith(New);
}
@@ -2353,6 +2393,15 @@ class TypePromotionTransaction {
UseIt != EndIt; ++UseIt) {
UseIt->Inst->setOperand(UseIt->Idx, Inst);
}
+ // RAUW has replaced all original uses with references to the new value,
+ // including the debug uses. Since we are undoing the replacements,
+ // the original debug uses must also be reinstated to maintain the
+ // correctness and utility of debug value instructions.
+ for (auto *DVI: DbgValues) {
+ LLVMContext &Ctx = Inst->getType()->getContext();
+ auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
+ DVI->setOperand(0, MV);
+ }
}
};
@@ -2623,15 +2672,159 @@ private:
Value *PromotedOperand) const;
};
+class PhiNodeSet;
+
+/// An iterator for PhiNodeSet.
+class PhiNodeSetIterator {
+ PhiNodeSet * const Set;
+ size_t CurrentIndex = 0;
+
+public:
+ /// The constructor. Start should point to either a valid element, or be equal
+ /// to the size of the underlying SmallVector of the PhiNodeSet.
+ PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
+ PHINode * operator*() const;
+ PhiNodeSetIterator& operator++();
+ bool operator==(const PhiNodeSetIterator &RHS) const;
+ bool operator!=(const PhiNodeSetIterator &RHS) const;
+};
+
+/// Keeps a set of PHINodes.
+///
+/// This is a minimal set implementation for a specific use case:
+/// It is very fast when there are very few elements, but also provides good
+/// performance when there are many. It is similar to SmallPtrSet, but also
+/// provides iteration by insertion order, which is deterministic and stable
+/// across runs. It is also similar to SmallSetVector, but provides removing
+/// elements in O(1) time. This is achieved by not actually removing the element
+/// from the underlying vector, so comes at the cost of using more memory, but
+/// that is fine, since PhiNodeSets are used as short lived objects.
+class PhiNodeSet {
+ friend class PhiNodeSetIterator;
+
+ using MapType = SmallDenseMap<PHINode *, size_t, 32>;
+ using iterator = PhiNodeSetIterator;
+
+ /// Keeps the elements in the order of their insertion in the underlying
+ /// vector. To achieve constant time removal, it never deletes any element.
+ SmallVector<PHINode *, 32> NodeList;
+
+ /// Keeps the elements in the underlying set implementation. This (and not the
+ /// NodeList defined above) is the source of truth on whether an element
+ /// is actually in the collection.
+ MapType NodeMap;
+
+ /// Points to the first valid (not deleted) element when the set is not empty
+ /// and the value is not zero. Equals to the size of the underlying vector
+ /// when the set is empty. When the value is 0, as in the beginning, the
+ /// first element may or may not be valid.
+ size_t FirstValidElement = 0;
+
+public:
+ /// Inserts a new element to the collection.
+ /// \returns true if the element is actually added, i.e. was not in the
+ /// collection before the operation.
+ bool insert(PHINode *Ptr) {
+ if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
+ NodeList.push_back(Ptr);
+ return true;
+ }
+ return false;
+ }
+
+ /// Removes the element from the collection.
+ /// \returns whether the element is actually removed, i.e. was in the
+ /// collection before the operation.
+ bool erase(PHINode *Ptr) {
+ auto it = NodeMap.find(Ptr);
+ if (it != NodeMap.end()) {
+ NodeMap.erase(Ptr);
+ SkipRemovedElements(FirstValidElement);
+ return true;
+ }
+ return false;
+ }
+
+ /// Removes all elements and clears the collection.
+ void clear() {
+ NodeMap.clear();
+ NodeList.clear();
+ FirstValidElement = 0;
+ }
+
+ /// \returns an iterator that will iterate the elements in the order of
+ /// insertion.
+ iterator begin() {
+ if (FirstValidElement == 0)
+ SkipRemovedElements(FirstValidElement);
+ return PhiNodeSetIterator(this, FirstValidElement);
+ }
+
+ /// \returns an iterator that points to the end of the collection.
+ iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
+
+ /// Returns the number of elements in the collection.
+ size_t size() const {
+ return NodeMap.size();
+ }
+
+ /// \returns 1 if the given element is in the collection, and 0 if otherwise.
+ size_t count(PHINode *Ptr) const {
+ return NodeMap.count(Ptr);
+ }
+
+private:
+ /// Updates the CurrentIndex so that it will point to a valid element.
+ ///
+ /// If the element of NodeList at CurrentIndex is valid, it does not
+ /// change it. If there are no more valid elements, it updates CurrentIndex
+ /// to point to the end of the NodeList.
+ void SkipRemovedElements(size_t &CurrentIndex) {
+ while (CurrentIndex < NodeList.size()) {
+ auto it = NodeMap.find(NodeList[CurrentIndex]);
+ // If the element has been deleted and added again later, NodeMap will
+ // point to a different index, so CurrentIndex will still be invalid.
+ if (it != NodeMap.end() && it->second == CurrentIndex)
+ break;
+ ++CurrentIndex;
+ }
+ }
+};
+
+PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
+ : Set(Set), CurrentIndex(Start) {}
+
+PHINode * PhiNodeSetIterator::operator*() const {
+ assert(CurrentIndex < Set->NodeList.size() &&
+ "PhiNodeSet access out of range");
+ return Set->NodeList[CurrentIndex];
+}
+
+PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
+ assert(CurrentIndex < Set->NodeList.size() &&
+ "PhiNodeSet access out of range");
+ ++CurrentIndex;
+ Set->SkipRemovedElements(CurrentIndex);
+ return *this;
+}
+
+bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
+ return CurrentIndex == RHS.CurrentIndex;
+}
+
+bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
+ return !((*this) == RHS);
+}
+
/// Keep track of simplification of Phi nodes.
/// Accept the set of all phi nodes and erase phi node from this set
/// if it is simplified.
class SimplificationTracker {
DenseMap<Value *, Value *> Storage;
const SimplifyQuery &SQ;
- // Tracks newly created Phi nodes. We use a SetVector to get deterministic
- // order when iterating over the set in MatchPhiSet.
- SmallSetVector<PHINode *, 32> AllPhiNodes;
+ // Tracks newly created Phi nodes. The elements are iterated by insertion
+ // order.
+ PhiNodeSet AllPhiNodes;
// Tracks newly created Select nodes.
SmallPtrSet<SelectInst *, 32> AllSelectNodes;
@@ -2663,7 +2856,7 @@ public:
Put(PI, V);
PI->replaceAllUsesWith(V);
if (auto *PHI = dyn_cast<PHINode>(PI))
- AllPhiNodes.remove(PHI);
+ AllPhiNodes.erase(PHI);
if (auto *Select = dyn_cast<SelectInst>(PI))
AllSelectNodes.erase(Select);
PI->eraseFromParent();
@@ -2686,11 +2879,11 @@ public:
assert(Get(To) == To && "Replacement PHI node is already replaced.");
Put(From, To);
From->replaceAllUsesWith(To);
- AllPhiNodes.remove(From);
+ AllPhiNodes.erase(From);
From->eraseFromParent();
}
- SmallSetVector<PHINode *, 32>& newPhiNodes() { return AllPhiNodes; }
+ PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
@@ -2718,8 +2911,7 @@ public:
/// A helper class for combining addressing modes.
class AddressingModeCombiner {
- typedef std::pair<Value *, BasicBlock *> ValueInBB;
- typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping;
+ typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
typedef std::pair<PHINode *, PHINode *> PHIPair;
private:
@@ -2739,10 +2931,10 @@ private:
const SimplifyQuery &SQ;
/// Original Address.
- ValueInBB Original;
+ Value *Original;
public:
- AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue)
+ AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
: CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
/// Get the combined AddrMode
@@ -2838,46 +3030,40 @@ public:
}
private:
- /// Initialize Map with anchor values. For address seen in some BB
+ /// Initialize Map with anchor values. For address seen
/// we set the value of different field saw in this address.
- /// If address is not an instruction than basic block is set to null.
/// At the same time we find a common type for different field we will
/// use to create new Phi/Select nodes. Keep it in CommonType field.
/// Return false if there is no common type found.
bool initializeMap(FoldAddrToValueMapping &Map) {
// Keep track of keys where the value is null. We will need to replace it
// with constant null when we know the common type.
- SmallVector<ValueInBB, 2> NullValue;
+ SmallVector<Value *, 2> NullValue;
Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
for (auto &AM : AddrModes) {
- BasicBlock *BB = nullptr;
- if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue))
- BB = I->getParent();
-
Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
if (DV) {
auto *Type = DV->getType();
if (CommonType && CommonType != Type)
return false;
CommonType = Type;
- Map[{ AM.OriginalValue, BB }] = DV;
+ Map[AM.OriginalValue] = DV;
} else {
- NullValue.push_back({ AM.OriginalValue, BB });
+ NullValue.push_back(AM.OriginalValue);
}
}
assert(CommonType && "At least one non-null value must be!");
- for (auto VIBB : NullValue)
- Map[VIBB] = Constant::getNullValue(CommonType);
+ for (auto *V : NullValue)
+ Map[V] = Constant::getNullValue(CommonType);
return true;
}
- /// We have mapping between value A and basic block where value A
- /// seen to other value B where B was a field in addressing mode represented
- /// by A. Also we have an original value C representing an address in some
- /// basic block. Traversing from C through phi and selects we ended up with
- /// A's in a map. This utility function tries to find a value V which is a
- /// field in addressing mode C and traversing through phi nodes and selects
- /// we will end up in corresponded values B in a map.
+ /// We have mapping between value A and other value B where B was a field in
+ /// addressing mode represented by A. Also we have an original value C
+ /// representing an address we start with. Traversing from C through phi and
+ /// selects we ended up with A's in a map. This utility function tries to find
+ /// a value V which is a field in addressing mode C and traversing through phi
+ /// nodes and selects we will end up in corresponded values B in a map.
/// The utility will create a new Phi/Selects if needed.
// The simple example looks as follows:
// BB1:
@@ -2890,22 +3076,24 @@ private:
// p = phi [p1, BB1], [p2, BB2]
// v = load p
// Map is
- // <p1, BB1> -> b1
- // <p2, BB2> -> b2
+ // p1 -> b1
+ // p2 -> b2
// Request is
- // <p, BB3> -> ?
- // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3
+ // p -> ?
+ // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
Value *findCommon(FoldAddrToValueMapping &Map) {
// Tracks the simplification of newly created phi nodes. The reason we use
// this mapping is because we will add new created Phi nodes in AddrToBase.
// Simplification of Phi nodes is recursive, so some Phi node may
- // be simplified after we added it to AddrToBase.
+ // be simplified after we added it to AddrToBase. In reality this
+ // simplification is possible only if original phi/selects were not
+ // simplified yet.
// Using this mapping we can find the current value in AddrToBase.
SimplificationTracker ST(SQ);
// First step, DFS to create PHI nodes for all intermediate blocks.
// Also fill traverse order for the second step.
- SmallVector<ValueInBB, 32> TraverseOrder;
+ SmallVector<Value *, 32> TraverseOrder;
InsertPlaceholders(Map, TraverseOrder, ST);
// Second Step, fill new nodes by merged values and simplify if possible.
@@ -2935,7 +3123,7 @@ private:
/// Matcher tracks the matched Phi nodes.
bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
SmallSetVector<PHIPair, 8> &Matcher,
- SmallSetVector<PHINode *, 32> &PhiNodesToMatch) {
+ PhiNodeSet &PhiNodesToMatch) {
SmallVector<PHIPair, 8> WorkList;
Matcher.insert({ PHI, Candidate });
WorkList.push_back({ PHI, Candidate });
@@ -2984,11 +3172,12 @@ private:
/// Returns false if this matching fails and creation of new Phi is disabled.
bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
unsigned &PhiNotMatchedCount) {
- // Use a SetVector for Matched to make sure we do replacements (ReplacePhi)
- // in a deterministic order below.
+ // Matched and PhiNodesToMatch iterate their elements in a deterministic
+ // order, so the replacements (ReplacePhi) are also done in a deterministic
+ // order.
SmallSetVector<PHIPair, 8> Matched;
SmallPtrSet<PHINode *, 8> WillNotMatch;
- SmallSetVector<PHINode *, 32> &PhiNodesToMatch = ST.newPhiNodes();
+ PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
while (PhiNodesToMatch.size()) {
PHINode *PHI = *PhiNodesToMatch.begin();
@@ -3023,129 +3212,86 @@ private:
// Just remove all seen values in matcher. They will not match anything.
PhiNotMatchedCount += WillNotMatch.size();
for (auto *P : WillNotMatch)
- PhiNodesToMatch.remove(P);
+ PhiNodesToMatch.erase(P);
}
return true;
}
- /// Fill the placeholder with values from predecessors and simplify it.
+ /// Fill the placeholders with values from predecessors and simplify them.
void FillPlaceholders(FoldAddrToValueMapping &Map,
- SmallVectorImpl<ValueInBB> &TraverseOrder,
+ SmallVectorImpl<Value *> &TraverseOrder,
SimplificationTracker &ST) {
while (!TraverseOrder.empty()) {
- auto Current = TraverseOrder.pop_back_val();
+ Value *Current = TraverseOrder.pop_back_val();
assert(Map.find(Current) != Map.end() && "No node to fill!!!");
- Value *CurrentValue = Current.first;
- BasicBlock *CurrentBlock = Current.second;
Value *V = Map[Current];
if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
// CurrentValue also must be Select.
- auto *CurrentSelect = cast<SelectInst>(CurrentValue);
+ auto *CurrentSelect = cast<SelectInst>(Current);
auto *TrueValue = CurrentSelect->getTrueValue();
- ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue)
- ? CurrentBlock
- : nullptr };
- assert(Map.find(TrueItem) != Map.end() && "No True Value!");
- Select->setTrueValue(ST.Get(Map[TrueItem]));
+ assert(Map.find(TrueValue) != Map.end() && "No True Value!");
+ Select->setTrueValue(ST.Get(Map[TrueValue]));
auto *FalseValue = CurrentSelect->getFalseValue();
- ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue)
- ? CurrentBlock
- : nullptr };
- assert(Map.find(FalseItem) != Map.end() && "No False Value!");
- Select->setFalseValue(ST.Get(Map[FalseItem]));
+ assert(Map.find(FalseValue) != Map.end() && "No False Value!");
+ Select->setFalseValue(ST.Get(Map[FalseValue]));
} else {
// Must be a Phi node then.
PHINode *PHI = cast<PHINode>(V);
+ auto *CurrentPhi = dyn_cast<PHINode>(Current);
// Fill the Phi node with values from predecessors.
- bool IsDefinedInThisBB =
- cast<Instruction>(CurrentValue)->getParent() == CurrentBlock;
- auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue);
- for (auto B : predecessors(CurrentBlock)) {
- Value *PV = IsDefinedInThisBB
- ? CurrentPhi->getIncomingValueForBlock(B)
- : CurrentValue;
- ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr };
- assert(Map.find(item) != Map.end() && "No predecessor Value!");
- PHI->addIncoming(ST.Get(Map[item]), B);
+ for (auto B : predecessors(PHI->getParent())) {
+ Value *PV = CurrentPhi->getIncomingValueForBlock(B);
+ assert(Map.find(PV) != Map.end() && "No predecessor Value!");
+ PHI->addIncoming(ST.Get(Map[PV]), B);
}
}
- // Simplify if possible.
Map[Current] = ST.Simplify(V);
}
}
- /// Starting from value recursively iterates over predecessors up to known
- /// ending values represented in a map. For each traversed block inserts
- /// a placeholder Phi or Select.
+ /// Starting from original value recursively iterates over def-use chain up to
+ /// known ending values represented in a map. For each traversed phi/select
+ /// inserts a placeholder Phi or Select.
/// Reports all new created Phi/Select nodes by adding them to set.
- /// Also reports and order in what basic blocks have been traversed.
+ /// Also reports and order in what values have been traversed.
void InsertPlaceholders(FoldAddrToValueMapping &Map,
- SmallVectorImpl<ValueInBB> &TraverseOrder,
+ SmallVectorImpl<Value *> &TraverseOrder,
SimplificationTracker &ST) {
- SmallVector<ValueInBB, 32> Worklist;
- assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) &&
+ SmallVector<Value *, 32> Worklist;
+ assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
"Address must be a Phi or Select node");
auto *Dummy = UndefValue::get(CommonType);
Worklist.push_back(Original);
while (!Worklist.empty()) {
- auto Current = Worklist.pop_back_val();
- // If value is not an instruction it is something global, constant,
- // parameter and we can say that this value is observable in any block.
- // Set block to null to denote it.
- // Also please take into account that it is how we build anchors.
- if (!isa<Instruction>(Current.first))
- Current.second = nullptr;
+ Value *Current = Worklist.pop_back_val();
// if it is already visited or it is an ending value then skip it.
if (Map.find(Current) != Map.end())
continue;
TraverseOrder.push_back(Current);
- Value *CurrentValue = Current.first;
- BasicBlock *CurrentBlock = Current.second;
// CurrentValue must be a Phi node or select. All others must be covered
// by anchors.
- Instruction *CurrentI = cast<Instruction>(CurrentValue);
- bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock;
-
- unsigned PredCount = pred_size(CurrentBlock);
- // if Current Value is not defined in this basic block we are interested
- // in values in predecessors.
- if (!IsDefinedInThisBB) {
- assert(PredCount && "Unreachable block?!");
- PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
- &CurrentBlock->front());
- Map[Current] = PHI;
- ST.insertNewPhi(PHI);
- // Add all predecessors in work list.
- for (auto B : predecessors(CurrentBlock))
- Worklist.push_back({ CurrentValue, B });
- continue;
- }
- // Value is defined in this basic block.
- if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) {
+ if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
// Is it OK to get metadata from OrigSelect?!
// Create a Select placeholder with dummy value.
- SelectInst *Select =
- SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy,
- OrigSelect->getName(), OrigSelect, OrigSelect);
+ SelectInst *Select = SelectInst::Create(
+ CurrentSelect->getCondition(), Dummy, Dummy,
+ CurrentSelect->getName(), CurrentSelect, CurrentSelect);
Map[Current] = Select;
ST.insertNewSelect(Select);
- // We are interested in True and False value in this basic block.
- Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock });
- Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock });
+ // We are interested in True and False values.
+ Worklist.push_back(CurrentSelect->getTrueValue());
+ Worklist.push_back(CurrentSelect->getFalseValue());
} else {
// It must be a Phi node then.
- auto *CurrentPhi = cast<PHINode>(CurrentI);
- // Create new Phi node for merge of bases.
- assert(PredCount && "Unreachable block?!");
- PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
- &CurrentBlock->front());
+ PHINode *CurrentPhi = cast<PHINode>(Current);
+ unsigned PredCount = CurrentPhi->getNumIncomingValues();
+ PHINode *PHI =
+ PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
Map[Current] = PHI;
ST.insertNewPhi(PHI);
-
- // Add all predecessors in work list.
- for (auto B : predecessors(CurrentBlock))
- Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B });
+ for (Value *P : CurrentPhi->incoming_values())
+ Worklist.push_back(P);
}
}
}
@@ -3277,6 +3423,41 @@ namespace {
/// Hepler class to perform type promotion.
class TypePromotionHelper {
+ /// Utility function to add a promoted instruction \p ExtOpnd to
+ /// \p PromotedInsts and record the type of extension we have seen.
+ static void addPromotedInst(InstrToOrigTy &PromotedInsts,
+ Instruction *ExtOpnd,
+ bool IsSExt) {
+ ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
+ InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
+ if (It != PromotedInsts.end()) {
+ // If the new extension is same as original, the information in
+ // PromotedInsts[ExtOpnd] is still correct.
+ if (It->second.getInt() == ExtTy)
+ return;
+
+ // Now the new extension is different from old extension, we make
+ // the type information invalid by setting extension type to
+ // BothExtension.
+ ExtTy = BothExtension;
+ }
+ PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
+ }
+
+ /// Utility function to query the original type of instruction \p Opnd
+ /// with a matched extension type. If the extension doesn't match, we
+ /// cannot use the information we had on the original type.
+ /// BothExtension doesn't match any extension type.
+ static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
+ Instruction *Opnd,
+ bool IsSExt) {
+ ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
+ InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
+ if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
+ return It->second.getPointer();
+ return nullptr;
+ }
+
/// Utility function to check whether or not a sign or zero extension
/// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
/// either using the operands of \p Inst or promoting \p Inst.
@@ -3465,10 +3646,9 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// I.e., check that trunc just drops extended bits of the same kind of
// the extension.
// #1 get the type of the operand and check the kind of the extended bits.
- const Type *OpndType;
- InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
- if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)
- OpndType = It->second.getPointer();
+ const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
+ if (OpndType)
+ ;
else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
OpndType = Opnd->getOperand(0)->getType();
else
@@ -3596,8 +3776,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
// Remember the original type of the instruction before promotion.
// This is useful to know that the high bits are sign extended bits.
- PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
- ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
+ addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
// Step #1.
TPT.mutateType(ExtOpnd, Ext->getType());
// Step #2.
@@ -3801,8 +3980,13 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
} else {
uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
- ConstantOffset += CI->getSExtValue() * TypeSize;
- } else if (TypeSize) { // Scales of zero don't do anything.
+ const APInt &CVal = CI->getValue();
+ if (CVal.getMinSignedBits() <= 64) {
+ ConstantOffset += CVal.getSExtValue() * TypeSize;
+ continue;
+ }
+ }
+ if (TypeSize) { // Scales of zero don't do anything.
// We only allow one variable index at the moment.
if (VariableOperand != -1)
return false;
@@ -4326,7 +4510,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool PhiOrSelectSeen = false;
SmallVector<Instruction*, 16> AddrModeInsts;
const SimplifyQuery SQ(*DL, TLInfo);
- AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() });
+ AddressingModeCombiner AddrModes(SQ, Addr);
TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
@@ -4943,8 +5127,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
};
// Sorting all the GEPs of the same data structures based on the offsets.
- llvm::sort(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end(),
- compareGEPOffset);
+ llvm::sort(LargeOffsetGEPs, compareGEPOffset);
LargeOffsetGEPs.erase(
std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
LargeOffsetGEPs.end());
@@ -4977,11 +5160,11 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
}
// Generate a new GEP to replace the current one.
- IRBuilder<> Builder(GEP);
+ LLVMContext &Ctx = GEP->getContext();
Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
Type *I8PtrTy =
- Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace());
- Type *I8Ty = Builder.getInt8Ty();
+ Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
+ Type *I8Ty = Type::getInt8Ty(Ctx);
if (!NewBaseGEP) {
// Create a new base if we don't have one yet. Find the insertion
@@ -5017,6 +5200,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
NewGEPBases.insert(NewBaseGEP);
}
+ IRBuilder<> Builder(GEP);
Value *NewGEP = NewBaseGEP;
if (Offset == BaseOffset) {
if (GEP->getType() != I8PtrTy)
@@ -5545,6 +5729,10 @@ static Value *getTrueOrFalseValue(
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
+ // If branch conversion isn't desirable, exit early.
+ if (DisableSelectToBranch || OptSize || !TLI)
+ return false;
+
// Find all consecutive select instructions that share the same condition.
SmallVector<SelectInst *, 2> ASI;
ASI.push_back(SI);
@@ -5566,8 +5754,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
// Can we convert the 'select' to CF ?
- if (DisableSelectToBranch || OptSize || !TLI || VectorCond ||
- SI->getMetadata(LLVMContext::MD_unpredictable))
+ if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
return false;
TargetLowering::SelectSupportKind SelectKind;
@@ -5630,6 +5817,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
EndBlock->getParent(), EndBlock);
TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ TrueBranch->setDebugLoc(SI->getDebugLoc());
}
auto *TrueInst = cast<Instruction>(SI->getTrueValue());
TrueInst->moveBefore(TrueBranch);
@@ -5639,6 +5827,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
EndBlock->getParent(), EndBlock);
FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(SI->getDebugLoc());
}
auto *FalseInst = cast<Instruction>(SI->getFalseValue());
FalseInst->moveBefore(FalseBranch);
@@ -5653,7 +5842,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
EndBlock->getParent(), EndBlock);
- BranchInst::Create(EndBlock, FalseBlock);
+ auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(SI->getDebugLoc());
}
// Insert the real conditional branch based on the original condition.
@@ -5688,6 +5878,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
PN->takeName(SI);
PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+ PN->setDebugLoc(SI->getDebugLoc());
SI->replaceAllUsesWith(PN);
SI->eraseFromParent();
@@ -5799,6 +5990,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
ExtInst->insertBefore(SI);
+ ExtInst->setDebugLoc(SI->getDebugLoc());
SI->setCondition(ExtInst);
for (auto Case : SI->cases()) {
APInt NarrowConst = Case.getCaseValue()->getValue();
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index cd302e78cc3e..68034afe98d5 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -250,8 +250,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
LLVM_DEBUG({
dbgs() << "Scheduling DAG of the packetize region\n";
- for (SUnit &SU : VLIWScheduler->SUnits)
- SU.dumpAll(VLIWScheduler);
+ VLIWScheduler->dump();
});
// Generate MI -> SU map.
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 098afd885f2f..364e1f030942 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -398,6 +398,13 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
return false;
}
+ // Make sure the analyzed branch is conditional; one of the successors
+ // could be a landing pad. (Empty landing pads can be generated on Windows.)
+ if (Cond.empty()) {
+ LLVM_DEBUG(dbgs() << "AnalyzeBranch found an unconditional branch.\n");
+ return false;
+ }
+
// AnalyzeBranch doesn't set FBB on a fall-through branch.
// Make sure it is always set.
FBB = TBB == Succ0 ? Succ1 : Succ0;
diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp
index d7562cbf1e90..ee7683adbcdd 100644
--- a/lib/CodeGen/ExpandMemCmp.cpp
+++ b/lib/CodeGen/ExpandMemCmp.cpp
@@ -66,23 +66,18 @@ class MemCmpExpansion {
// Represents the decomposition in blocks of the expansion. For example,
// comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
// 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}.
- // TODO(courbet): Involve the target more in this computation. On X86, 7
- // bytes can be done more efficiently with two overlaping 4-byte loads than
- // covering the interval with [{4, 0},{2, 4},{1, 6}}.
struct LoadEntry {
LoadEntry(unsigned LoadSize, uint64_t Offset)
: LoadSize(LoadSize), Offset(Offset) {
- assert(Offset % LoadSize == 0 && "invalid load entry");
}
- uint64_t getGEPIndex() const { return Offset / LoadSize; }
-
// The size of the load for this block, in bytes.
- const unsigned LoadSize;
- // The offset of this load WRT the base pointer, in bytes.
- const uint64_t Offset;
+ unsigned LoadSize;
+ // The offset of this load from the base pointer, in bytes.
+ uint64_t Offset;
};
- SmallVector<LoadEntry, 8> LoadSequence;
+ using LoadEntryVector = SmallVector<LoadEntry, 8>;
+ LoadEntryVector LoadSequence;
void createLoadCmpBlocks();
void createResultBlock();
@@ -92,13 +87,23 @@ class MemCmpExpansion {
void emitLoadCompareBlock(unsigned BlockIndex);
void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
unsigned &LoadIndex);
- void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex);
+ void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase();
Value *getMemCmpEqZeroOneBlock();
Value *getMemCmpOneBlock();
+ Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType,
+ uint64_t OffsetBytes);
+
+ static LoadEntryVector
+ computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+ unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
+ static LoadEntryVector
+ computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
+ unsigned MaxNumLoads,
+ unsigned &NumLoadsNonOneByte);
- public:
+public:
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
@@ -110,6 +115,76 @@ class MemCmpExpansion {
Value *getMemCmpExpansion();
};
+MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
+ uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+ const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
+ NumLoadsNonOneByte = 0;
+ LoadEntryVector LoadSequence;
+ uint64_t Offset = 0;
+ while (Size && !LoadSizes.empty()) {
+ const unsigned LoadSize = LoadSizes.front();
+ const uint64_t NumLoadsForThisSize = Size / LoadSize;
+ if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
+ // Do not expand if the total number of loads is larger than what the
+ // target allows. Note that it's important that we exit before completing
+ // the expansion to avoid using a ton of memory to store the expansion for
+ // large sizes.
+ return {};
+ }
+ if (NumLoadsForThisSize > 0) {
+ for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
+ LoadSequence.push_back({LoadSize, Offset});
+ Offset += LoadSize;
+ }
+ if (LoadSize > 1)
+ ++NumLoadsNonOneByte;
+ Size = Size % LoadSize;
+ }
+ LoadSizes = LoadSizes.drop_front();
+ }
+ return LoadSequence;
+}
+
+MemCmpExpansion::LoadEntryVector
+MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
+ const unsigned MaxLoadSize,
+ const unsigned MaxNumLoads,
+ unsigned &NumLoadsNonOneByte) {
+ // These are already handled by the greedy approach.
+ if (Size < 2 || MaxLoadSize < 2)
+ return {};
+
+ // We try to do as many non-overlapping loads as possible starting from the
+ // beginning.
+ const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
+ assert(NumNonOverlappingLoads && "there must be at least one load");
+ // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
+ // an overlapping load.
+ Size = Size - NumNonOverlappingLoads * MaxLoadSize;
+ // Bail if we do not need an overloapping store, this is already handled by
+ // the greedy approach.
+ if (Size == 0)
+ return {};
+ // Bail if the number of loads (non-overlapping + potential overlapping one)
+ // is larger than the max allowed.
+ if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
+ return {};
+
+ // Add non-overlapping loads.
+ LoadEntryVector LoadSequence;
+ uint64_t Offset = 0;
+ for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
+ LoadSequence.push_back({MaxLoadSize, Offset});
+ Offset += MaxLoadSize;
+ }
+
+ // Add the last overlapping load.
+ assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
+ LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
+ NumLoadsNonOneByte = 1;
+ return LoadSequence;
+}
+
// Initialize the basic block structure required for expansion of memcmp call
// with given maximum load size and memcmp size parameter.
// This structure includes:
@@ -133,38 +208,31 @@ MemCmpExpansion::MemCmpExpansion(
Builder(CI) {
assert(Size > 0 && "zero blocks");
// Scale the max size down if the target can load more bytes than we need.
- size_t LoadSizeIndex = 0;
- while (LoadSizeIndex < Options.LoadSizes.size() &&
- Options.LoadSizes[LoadSizeIndex] > Size) {
- ++LoadSizeIndex;
+ llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
+ while (!LoadSizes.empty() && LoadSizes.front() > Size) {
+ LoadSizes = LoadSizes.drop_front();
}
- this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex];
+ assert(!LoadSizes.empty() && "cannot load Size bytes");
+ MaxLoadSize = LoadSizes.front();
// Compute the decomposition.
- uint64_t CurSize = Size;
- uint64_t Offset = 0;
- while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) {
- const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex];
- assert(LoadSize > 0 && "zero load size");
- const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
- if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
- // Do not expand if the total number of loads is larger than what the
- // target allows. Note that it's important that we exit before completing
- // the expansion to avoid using a ton of memory to store the expansion for
- // large sizes.
- LoadSequence.clear();
- return;
- }
- if (NumLoadsForThisSize > 0) {
- for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
- LoadSequence.push_back({LoadSize, Offset});
- Offset += LoadSize;
- }
- if (LoadSize > 1) {
- ++NumLoadsNonOneByte;
- }
- CurSize = CurSize % LoadSize;
+ unsigned GreedyNumLoadsNonOneByte = 0;
+ LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads,
+ GreedyNumLoadsNonOneByte);
+ NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
+ assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+ // If we allow overlapping loads and the load sequence is not already optimal,
+ // use overlapping loads.
+ if (Options.AllowOverlappingLoads &&
+ (LoadSequence.empty() || LoadSequence.size() > 2)) {
+ unsigned OverlappingNumLoadsNonOneByte = 0;
+ auto OverlappingLoads = computeOverlappingLoadSequence(
+ Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte);
+ if (!OverlappingLoads.empty() &&
+ (LoadSequence.empty() ||
+ OverlappingLoads.size() < LoadSequence.size())) {
+ LoadSequence = OverlappingLoads;
+ NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
}
- ++LoadSizeIndex;
}
assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
}
@@ -189,30 +257,32 @@ void MemCmpExpansion::createResultBlock() {
EndBlock->getParent(), EndBlock);
}
+/// Return a pointer to an element of type `LoadSizeType` at offset
+/// `OffsetBytes`.
+Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
+ Type *LoadSizeType,
+ uint64_t OffsetBytes) {
+ if (OffsetBytes > 0) {
+ auto *ByteType = Type::getInt8Ty(CI->getContext());
+ Source = Builder.CreateGEP(
+ ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
+ ConstantInt::get(ByteType, OffsetBytes));
+ }
+ return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
+}
+
// This function creates the IR instructions for loading and comparing 1 byte.
// It loads 1 byte from each source of the memcmp parameters with the given
// GEPIndex. It then subtracts the two loaded values and adds this result to the
// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
- unsigned GEPIndex) {
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
+ unsigned OffsetBytes) {
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
- // Get the base address using the GEPIndex.
- if (GEPIndex != 0) {
- Source1 = Builder.CreateGEP(LoadSizeType, Source1,
- ConstantInt::get(LoadSizeType, GEPIndex));
- Source2 = Builder.CreateGEP(LoadSizeType, Source2,
- ConstantInt::get(LoadSizeType, GEPIndex));
- }
+ Value *Source1 =
+ getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes);
+ Value *Source2 =
+ getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes);
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
@@ -270,24 +340,10 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
IntegerType *LoadSizeType =
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
- // Get the base address using a GEP.
- if (CurLoadEntry.Offset != 0) {
- Source1 = Builder.CreateGEP(
- LoadSizeType, Source1,
- ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
- Source2 = Builder.CreateGEP(
- LoadSizeType, Source2,
- ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
- }
+ Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
+ CurLoadEntry.Offset);
+ Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
+ CurLoadEntry.Offset);
// Get a constant or load a value for each source address.
Value *LoadSrc1 = nullptr;
@@ -378,8 +434,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
if (CurLoadEntry.LoadSize == 1) {
- MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex,
- CurLoadEntry.getGEPIndex());
+ MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
return;
}
@@ -388,25 +443,12 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
- // Get the base address using a GEP.
- if (CurLoadEntry.Offset != 0) {
- Source1 = Builder.CreateGEP(
- LoadSizeType, Source1,
- ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
- Source2 = Builder.CreateGEP(
- LoadSizeType, Source2,
- ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
- }
+ Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
+ CurLoadEntry.Offset);
+ Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
+ CurLoadEntry.Offset);
// Load LoadSizeType from the base address.
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
@@ -694,7 +736,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
if (SizeVal == 0) {
return false;
}
-
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index bc747fc610f8..f2a2bcbb94b1 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -97,6 +97,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
if (MI->allDefsAreDead()) {
MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(3); // SubIdx
+ MI->RemoveOperand(1); // Imm
LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI);
return true;
}
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index fe3d29657942..1c80556dfef5 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -103,16 +103,6 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<GCModuleInfo>();
}
-static const char *DescKind(GC::PointKind Kind) {
- switch (Kind) {
- case GC::PreCall:
- return "pre-call";
- case GC::PostCall:
- return "post-call";
- }
- llvm_unreachable("Invalid point kind");
-}
-
bool Printer::runOnFunction(Function &F) {
if (F.hasGC())
return false;
@@ -129,7 +119,7 @@ bool Printer::runOnFunction(Function &F) {
for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE;
++PI) {
- OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind)
+ OS << "\t" << PI->Label->getName() << ": " << "post-call"
<< ", live = {";
for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index 31ddeadbd97a..e8ccd84b0b93 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -38,7 +38,7 @@ namespace {
/// directed by the GCStrategy. It also performs automatic root initialization
/// and custom intrinsic lowering.
class LowerIntrinsics : public FunctionPass {
- bool PerformDefaultLowering(Function &F, GCStrategy &S);
+ bool DoLowering(Function &F, GCStrategy &S);
public:
static char ID;
@@ -102,13 +102,6 @@ void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTreeWrapperPass>();
}
-static bool NeedsDefaultLoweringPass(const GCStrategy &C) {
- // Default lowering is necessary only if read or write barriers have a default
- // action. The default for roots is no action.
- return !C.customWriteBarrier() || !C.customReadBarrier() ||
- C.initializeRoots();
-}
-
/// doInitialization - If this module uses the GC intrinsics, find them now.
bool LowerIntrinsics::doInitialization(Module &M) {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -148,8 +141,7 @@ static bool CouldBecomeSafePoint(Instruction *I) {
return true;
}
-static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
- unsigned Count) {
+static bool InsertRootInitializers(Function &F, ArrayRef<AllocaInst *> Roots) {
// Scroll past alloca instructions.
BasicBlock::iterator IP = F.getEntryBlock().begin();
while (isa<AllocaInst>(IP))
@@ -166,12 +158,12 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
// Add root initializers.
bool MadeChange = false;
- for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
- if (!InitedRoots.count(*I)) {
+ for (AllocaInst *Root : Roots)
+ if (!InitedRoots.count(Root)) {
StoreInst *SI = new StoreInst(
- ConstantPointerNull::get(cast<PointerType>((*I)->getAllocatedType())),
- *I);
- SI->insertAfter(*I);
+ ConstantPointerNull::get(cast<PointerType>(Root->getAllocatedType())),
+ Root);
+ SI->insertAfter(Root);
MadeChange = true;
}
@@ -188,64 +180,59 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
GCStrategy &S = FI.getStrategy();
- bool MadeChange = false;
-
- if (NeedsDefaultLoweringPass(S))
- MadeChange |= PerformDefaultLowering(F, S);
-
- return MadeChange;
+ return DoLowering(F, S);
}
-bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
- bool LowerWr = !S.customWriteBarrier();
- bool LowerRd = !S.customReadBarrier();
- bool InitRoots = S.initializeRoots();
-
+/// Lower barriers out of existance (if the associated GCStrategy hasn't
+/// already done so...), and insert initializing stores to roots as a defensive
+/// measure. Given we're going to report all roots live at all safepoints, we
+/// need to be able to ensure each root has been initialized by the point the
+/// first safepoint is reached. This really should have been done by the
+/// frontend, but the old API made this non-obvious, so we do a potentially
+/// redundant store just in case.
+bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
SmallVector<AllocaInst *, 32> Roots;
bool MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
- Function *F = CI->getCalledFunction();
- switch (F->getIntrinsicID()) {
- case Intrinsic::gcwrite:
- if (LowerWr) {
- // Replace a write barrier with a simple store.
- Value *St =
- new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
- CI->replaceAllUsesWith(St);
- CI->eraseFromParent();
- }
- break;
- case Intrinsic::gcread:
- if (LowerRd) {
- // Replace a read barrier with a simple load.
- Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
- Ld->takeName(CI);
- CI->replaceAllUsesWith(Ld);
- CI->eraseFromParent();
- }
- break;
- case Intrinsic::gcroot:
- if (InitRoots) {
- // Initialize the GC root, but do not delete the intrinsic. The
- // backend needs the intrinsic to flag the stack slot.
- Roots.push_back(
- cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
- }
- break;
- default:
- continue;
- }
-
+ for (BasicBlock &BB : F)
+ for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) {
+ IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++);
+ if (!CI)
+ continue;
+
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::gcwrite: {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getArgOperand(0),
+ CI->getArgOperand(2), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
MadeChange = true;
+ break;
+ }
+ case Intrinsic::gcread: {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ MadeChange = true;
+ break;
+ }
+ case Intrinsic::gcroot: {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(
+ cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ break;
+ }
}
}
- }
if (Roots.size())
- MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+ MadeChange |= InsertRootInitializers(F, Roots);
return MadeChange;
}
@@ -276,26 +263,18 @@ MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
}
void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
- // Find the return address (next instruction), too, so as to bracket the call
- // instruction.
+ // Find the return address (next instruction), since that's what will be on
+ // the stack when the call is suspended and we need to inspect the stack.
MachineBasicBlock::iterator RAI = CI;
++RAI;
- if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
- MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
- FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
- }
-
- if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
- MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
- FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
- }
+ MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(Label, CI->getDebugLoc());
}
void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
- for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
- ++BBI)
- for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineBasicBlock::iterator MI = MBB.begin(), ME = MBB.end();
MI != ME; ++MI)
if (MI->isCall()) {
// Do not treat tail or sibling call sites as safe points. This is
diff --git a/lib/CodeGen/GlobalISel/CMakeLists.txt b/lib/CodeGen/GlobalISel/CMakeLists.txt
index 4c1da3756b18..da2fd3b239a2 100644
--- a/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -1,8 +1,11 @@
add_llvm_library(LLVMGlobalISel
+ CSEInfo.cpp
+ CSEMIRBuilder.cpp
CallLowering.cpp
GlobalISel.cpp
Combiner.cpp
CombinerHelper.cpp
+ GISelChangeObserver.cpp
IRTranslator.cpp
InstructionSelect.cpp
InstructionSelector.cpp
diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp
new file mode 100644
index 000000000000..89c525c5ba15
--- /dev/null
+++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -0,0 +1,370 @@
+//===- CSEInfo.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "cseinfo"
+
+using namespace llvm;
+char llvm::GISelCSEAnalysisWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
+ "Analysis containing CSE Info", false, true)
+INITIALIZE_PASS_END(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
+ "Analysis containing CSE Info", false, true)
+
+/// -------- UniqueMachineInstr -------------//
+
+void UniqueMachineInstr::Profile(FoldingSetNodeID &ID) {
+ GISelInstProfileBuilder(ID, MI->getMF()->getRegInfo()).addNodeID(MI);
+}
+/// -----------------------------------------
+
+/// --------- CSEConfig ---------- ///
+bool CSEConfig::shouldCSEOpc(unsigned Opc) {
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_TRUNC:
+ return true;
+ }
+ return false;
+}
+
+bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
+ return Opc == TargetOpcode::G_CONSTANT;
+}
+/// -----------------------------------------
+
+/// -------- GISelCSEInfo -------------//
+void GISelCSEInfo::setMF(MachineFunction &MF) {
+ this->MF = &MF;
+ this->MRI = &MF.getRegInfo();
+}
+
+GISelCSEInfo::~GISelCSEInfo() {}
+
+bool GISelCSEInfo::isUniqueMachineInstValid(
+ const UniqueMachineInstr &UMI) const {
+ // Should we check here and assert that the instruction has been fully
+ // constructed?
+ // FIXME: Any other checks required to be done here? Remove this method if
+ // none.
+ return true;
+}
+
+void GISelCSEInfo::invalidateUniqueMachineInstr(UniqueMachineInstr *UMI) {
+ bool Removed = CSEMap.RemoveNode(UMI);
+ (void)Removed;
+ assert(Removed && "Invalidation called on invalid UMI");
+ // FIXME: Should UMI be deallocated/destroyed?
+}
+
+UniqueMachineInstr *GISelCSEInfo::getNodeIfExists(FoldingSetNodeID &ID,
+ MachineBasicBlock *MBB,
+ void *&InsertPos) {
+ auto *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (Node) {
+ if (!isUniqueMachineInstValid(*Node)) {
+ invalidateUniqueMachineInstr(Node);
+ return nullptr;
+ }
+
+ if (Node->MI->getParent() != MBB)
+ return nullptr;
+ }
+ return Node;
+}
+
+void GISelCSEInfo::insertNode(UniqueMachineInstr *UMI, void *InsertPos) {
+ handleRecordedInsts();
+ assert(UMI);
+ UniqueMachineInstr *MaybeNewNode = UMI;
+ if (InsertPos)
+ CSEMap.InsertNode(UMI, InsertPos);
+ else
+ MaybeNewNode = CSEMap.GetOrInsertNode(UMI);
+ if (MaybeNewNode != UMI) {
+ // A similar node exists in the folding set. Let's ignore this one.
+ return;
+ }
+ assert(InstrMapping.count(UMI->MI) == 0 &&
+ "This instruction should not be in the map");
+ InstrMapping[UMI->MI] = MaybeNewNode;
+}
+
+UniqueMachineInstr *GISelCSEInfo::getUniqueInstrForMI(const MachineInstr *MI) {
+ assert(shouldCSE(MI->getOpcode()) && "Trying to CSE an unsupported Node");
+ auto *Node = new (UniqueInstrAllocator) UniqueMachineInstr(MI);
+ return Node;
+}
+
+void GISelCSEInfo::insertInstr(MachineInstr *MI, void *InsertPos) {
+ assert(MI);
+ // If it exists in temporary insts, remove it.
+ TemporaryInsts.remove(MI);
+ auto *Node = getUniqueInstrForMI(MI);
+ insertNode(Node, InsertPos);
+}
+
+MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID,
+ MachineBasicBlock *MBB,
+ void *&InsertPos) {
+ handleRecordedInsts();
+ if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) {
+ LLVM_DEBUG(dbgs() << "CSEInfo: Found Instr " << *Inst->MI << "\n";);
+ return const_cast<MachineInstr *>(Inst->MI);
+ }
+ return nullptr;
+}
+
+void GISelCSEInfo::countOpcodeHit(unsigned Opc) {
+#ifndef NDEBUG
+ if (OpcodeHitTable.count(Opc))
+ OpcodeHitTable[Opc] += 1;
+ else
+ OpcodeHitTable[Opc] = 1;
+#endif
+ // Else do nothing.
+}
+
+void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) {
+ if (shouldCSE(MI->getOpcode())) {
+ TemporaryInsts.insert(MI);
+ LLVM_DEBUG(dbgs() << "CSEInfo: Recording new MI" << *MI << "\n";);
+ }
+}
+
+void GISelCSEInfo::handleRecordedInst(MachineInstr *MI) {
+ assert(shouldCSE(MI->getOpcode()) && "Invalid instruction for CSE");
+ auto *UMI = InstrMapping.lookup(MI);
+ LLVM_DEBUG(dbgs() << "CSEInfo: Handling recorded MI" << *MI << "\n";);
+ if (UMI) {
+ // Invalidate this MI.
+ invalidateUniqueMachineInstr(UMI);
+ InstrMapping.erase(MI);
+ }
+ /// Now insert the new instruction.
+ if (UMI) {
+ /// We'll reuse the same UniqueMachineInstr to avoid the new
+ /// allocation.
+ *UMI = UniqueMachineInstr(MI);
+ insertNode(UMI, nullptr);
+ } else {
+ /// This is a new instruction. Allocate a new UniqueMachineInstr and
+ /// Insert.
+ insertInstr(MI);
+ }
+}
+
+void GISelCSEInfo::handleRemoveInst(MachineInstr *MI) {
+ if (auto *UMI = InstrMapping.lookup(MI)) {
+ invalidateUniqueMachineInstr(UMI);
+ InstrMapping.erase(MI);
+ }
+ TemporaryInsts.remove(MI);
+}
+
+void GISelCSEInfo::handleRecordedInsts() {
+ while (!TemporaryInsts.empty()) {
+ auto *MI = TemporaryInsts.pop_back_val();
+ handleRecordedInst(MI);
+ }
+}
+
+bool GISelCSEInfo::shouldCSE(unsigned Opc) const {
+ // Only GISel opcodes are CSEable
+ if (!isPreISelGenericOpcode(Opc))
+ return false;
+ assert(CSEOpt.get() && "CSEConfig not set");
+ return CSEOpt->shouldCSEOpc(Opc);
+}
+
+void GISelCSEInfo::erasingInstr(MachineInstr &MI) { handleRemoveInst(&MI); }
+void GISelCSEInfo::createdInstr(MachineInstr &MI) { recordNewInstruction(&MI); }
+void GISelCSEInfo::changingInstr(MachineInstr &MI) {
+ // For now, perform erase, followed by insert.
+ erasingInstr(MI);
+ createdInstr(MI);
+}
+void GISelCSEInfo::changedInstr(MachineInstr &MI) { changingInstr(MI); }
+
+void GISelCSEInfo::analyze(MachineFunction &MF) {
+ setMF(MF);
+ for (auto &MBB : MF) {
+ if (MBB.empty())
+ continue;
+ for (MachineInstr &MI : MBB) {
+ if (!shouldCSE(MI.getOpcode()))
+ continue;
+ LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI << "\n";);
+ insertInstr(&MI);
+ }
+ }
+}
+
+void GISelCSEInfo::releaseMemory() {
+ // print();
+ CSEMap.clear();
+ InstrMapping.clear();
+ UniqueInstrAllocator.Reset();
+ TemporaryInsts.clear();
+ CSEOpt.reset();
+ MRI = nullptr;
+ MF = nullptr;
+#ifndef NDEBUG
+ OpcodeHitTable.clear();
+#endif
+}
+
+void GISelCSEInfo::print() {
+#ifndef NDEBUG
+ for (auto &It : OpcodeHitTable) {
+ dbgs() << "CSE Count for Opc " << It.first << " : " << It.second << "\n";
+ };
+#endif
+}
+/// -----------------------------------------
+// ---- Profiling methods for FoldingSetNode --- //
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeID(const MachineInstr *MI) const {
+ addNodeIDMBB(MI->getParent());
+ addNodeIDOpcode(MI->getOpcode());
+ for (auto &Op : MI->operands())
+ addNodeIDMachineOperand(Op);
+ addNodeIDFlag(MI->getFlags());
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDOpcode(unsigned Opc) const {
+ ID.AddInteger(Opc);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const LLT &Ty) const {
+ uint64_t Val = Ty.getUniqueRAWLLTData();
+ ID.AddInteger(Val);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const TargetRegisterClass *RC) const {
+ ID.AddPointer(RC);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const RegisterBank *RB) const {
+ ID.AddPointer(RB);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDImmediate(int64_t Imm) const {
+ ID.AddInteger(Imm);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegNum(unsigned Reg) const {
+ ID.AddInteger(Reg);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const unsigned Reg) const {
+ addNodeIDMachineOperand(MachineOperand::CreateReg(Reg, false));
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDMBB(const MachineBasicBlock *MBB) const {
+ ID.AddPointer(MBB);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
+ if (Flag)
+ ID.AddInteger(Flag);
+ return *this;
+}
+
+const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
+ const MachineOperand &MO) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (!MO.isDef())
+ addNodeIDRegNum(Reg);
+ LLT Ty = MRI.getType(Reg);
+ if (Ty.isValid())
+ addNodeIDRegType(Ty);
+ auto *RB = MRI.getRegBankOrNull(Reg);
+ if (RB)
+ addNodeIDRegType(RB);
+ auto *RC = MRI.getRegClassOrNull(Reg);
+ if (RC)
+ addNodeIDRegType(RC);
+ assert(!MO.isImplicit() && "Unhandled case");
+ } else if (MO.isImm())
+ ID.AddInteger(MO.getImm());
+ else if (MO.isCImm())
+ ID.AddPointer(MO.getCImm());
+ else if (MO.isFPImm())
+ ID.AddPointer(MO.getFPImm());
+ else if (MO.isPredicate())
+ ID.AddInteger(MO.getPredicate());
+ else
+ llvm_unreachable("Unhandled operand type");
+ // Handle other types
+ return *this;
+}
+
+GISelCSEInfo &GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfig> CSEOpt,
+ bool Recompute) {
+ if (!AlreadyComputed || Recompute) {
+ Info.setCSEConfig(std::move(CSEOpt));
+ Info.analyze(*MF);
+ AlreadyComputed = true;
+ }
+ return Info;
+}
+void GISelCSEAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool GISelCSEAnalysisWrapperPass::runOnMachineFunction(MachineFunction &MF) {
+ releaseMemory();
+ Wrapper.setMF(MF);
+ return false;
+}
diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
new file mode 100644
index 000000000000..863efe0c3e34
--- /dev/null
+++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -0,0 +1,231 @@
+//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.cpp - MIBuilder--*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the CSEMIRBuilder class which CSEs as it builds
+/// instructions.
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+
+using namespace llvm;
+
+bool CSEMIRBuilder::dominates(MachineBasicBlock::const_iterator A,
+ MachineBasicBlock::const_iterator B) const {
+ auto MBBEnd = getMBB().end();
+ if (B == MBBEnd)
+ return true;
+ assert(A->getParent() == B->getParent() &&
+ "Iterators should be in same block");
+ const MachineBasicBlock *BBA = A->getParent();
+ MachineBasicBlock::const_iterator I = BBA->begin();
+ for (; &*I != A && &*I != B; ++I)
+ ;
+ return &*I == A;
+}
+
+MachineInstrBuilder
+CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
+ void *&NodeInsertPos) {
+ GISelCSEInfo *CSEInfo = getCSEInfo();
+ assert(CSEInfo && "Can't get here without setting CSEInfo");
+ MachineBasicBlock *CurMBB = &getMBB();
+ MachineInstr *MI =
+ CSEInfo->getMachineInstrIfExists(ID, CurMBB, NodeInsertPos);
+ if (MI) {
+ auto CurrPos = getInsertPt();
+ if (!dominates(MI, CurrPos))
+ CurMBB->splice(CurrPos, CurMBB, MI);
+ return MachineInstrBuilder(getMF(), MI);
+ }
+ return MachineInstrBuilder();
+}
+
+bool CSEMIRBuilder::canPerformCSEForOpc(unsigned Opc) const {
+ const GISelCSEInfo *CSEInfo = getCSEInfo();
+ if (!CSEInfo || !CSEInfo->shouldCSE(Opc))
+ return false;
+ return true;
+}
+
+void CSEMIRBuilder::profileDstOp(const DstOp &Op,
+ GISelInstProfileBuilder &B) const {
+ switch (Op.getDstOpKind()) {
+ case DstOp::DstType::Ty_RC:
+ B.addNodeIDRegType(Op.getRegClass());
+ break;
+ default:
+ B.addNodeIDRegType(Op.getLLTTy(*getMRI()));
+ break;
+ }
+}
+
+void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
+ GISelInstProfileBuilder &B) const {
+ switch (Op.getSrcOpKind()) {
+ case SrcOp::SrcType::Ty_Predicate:
+ B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
+ break;
+ default:
+ B.addNodeIDRegType(Op.getReg());
+ break;
+ }
+}
+
+void CSEMIRBuilder::profileMBBOpcode(GISelInstProfileBuilder &B,
+ unsigned Opc) const {
+ // First add the MBB (Local CSE).
+ B.addNodeIDMBB(&getMBB());
+ // Then add the opcode.
+ B.addNodeIDOpcode(Opc);
+}
+
+void CSEMIRBuilder::profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ Optional<unsigned> Flags,
+ GISelInstProfileBuilder &B) const {
+
+ profileMBBOpcode(B, Opc);
+ // Then add the DstOps.
+ profileDstOps(DstOps, B);
+ // Then add the SrcOps.
+ profileSrcOps(SrcOps, B);
+ // Add Flags if passed in.
+ if (Flags)
+ B.addNodeIDFlag(*Flags);
+}
+
+MachineInstrBuilder CSEMIRBuilder::memoizeMI(MachineInstrBuilder MIB,
+ void *NodeInsertPos) {
+ assert(canPerformCSEForOpc(MIB->getOpcode()) &&
+ "Attempting to CSE illegal op");
+ MachineInstr *MIBInstr = MIB;
+ getCSEInfo()->insertInstr(MIBInstr, NodeInsertPos);
+ return MIB;
+}
+
+bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) {
+ if (DstOps.size() == 1)
+ return true; // always possible to emit copy to just 1 vreg.
+
+ return std::all_of(DstOps.begin(), DstOps.end(), [](const DstOp &Op) {
+ DstOp::DstType DT = Op.getDstOpKind();
+ return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC;
+ });
+}
+
+MachineInstrBuilder
+CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
+ MachineInstrBuilder &MIB) {
+ assert(checkCopyToDefsPossible(DstOps) &&
+ "Impossible return a single MIB with copies to multiple defs");
+ if (DstOps.size() == 1) {
+ const DstOp &Op = DstOps[0];
+ if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg)
+ return buildCopy(Op.getReg(), MIB->getOperand(0).getReg());
+ }
+ return MIB;
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
+ ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ Optional<unsigned> Flag) {
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM: {
+ // Try to constant fold these.
+ assert(SrcOps.size() == 2 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
+ SrcOps[1].getReg(), *getMRI()))
+ return buildConstant(DstOps[0], Cst->getSExtValue());
+ break;
+ }
+ }
+ bool CanCopy = checkCopyToDefsPossible(DstOps);
+ if (!canPerformCSEForOpc(Opc))
+ return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+ // If we can CSE this instruction, but involves generating copies to multiple
+ // regs, give up. This frequently happens to UNMERGEs.
+ if (!CanCopy) {
+ auto MIB = MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+ // CSEInfo would have tracked this instruction. Remove it from the temporary
+ // insts.
+ getCSEInfo()->handleRemoveInst(&*MIB);
+ return MIB;
+ }
+ FoldingSetNodeID ID;
+ GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+ void *InsertPos = nullptr;
+ profileEverything(Opc, DstOps, SrcOps, Flag, ProfBuilder);
+ MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+ if (MIB) {
+ // Handle generating copies here.
+ return generateCopiesIfRequired(DstOps, MIB);
+ }
+ // This instruction does not exist in the CSEInfo. Build it and CSE it.
+ MachineInstrBuilder NewMIB =
+ MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+ return memoizeMI(NewMIB, InsertPos);
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
+ const ConstantInt &Val) {
+ constexpr unsigned Opc = TargetOpcode::G_CONSTANT;
+ if (!canPerformCSEForOpc(Opc))
+ return MachineIRBuilder::buildConstant(Res, Val);
+ FoldingSetNodeID ID;
+ GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+ void *InsertPos = nullptr;
+ profileMBBOpcode(ProfBuilder, Opc);
+ profileDstOp(Res, ProfBuilder);
+ ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateCImm(&Val));
+ MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+ if (MIB) {
+ // Handle generating copies here.
+ return generateCopiesIfRequired({Res}, MIB);
+ }
+ MachineInstrBuilder NewMIB = MachineIRBuilder::buildConstant(Res, Val);
+ return memoizeMI(NewMIB, InsertPos);
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res,
+ const ConstantFP &Val) {
+ constexpr unsigned Opc = TargetOpcode::G_FCONSTANT;
+ if (!canPerformCSEForOpc(Opc))
+ return MachineIRBuilder::buildFConstant(Res, Val);
+ FoldingSetNodeID ID;
+ GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+ void *InsertPos = nullptr;
+ profileMBBOpcode(ProfBuilder, Opc);
+ profileDstOp(Res, ProfBuilder);
+ ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateFPImm(&Val));
+ MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+ if (MIB) {
+ // Handle generating copies here.
+ return generateCopiesIfRequired({Res}, MIB);
+ }
+ MachineInstrBuilder NewMIB = MachineIRBuilder::buildFConstant(Res, Val);
+ return memoizeMI(NewMIB, InsertPos);
+}
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index 07de31bec660..724ecedf3b3f 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -23,6 +23,8 @@
using namespace llvm;
+void CallLowering::anchor() {}
+
bool CallLowering::lowerCall(
MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
@@ -164,7 +166,6 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
// nop in big-endian situations.
return ValReg;
case CCValAssign::AExt: {
- assert(!VA.getLocVT().isVector() && "unexpected vector extend");
auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg);
return MIB->getOperand(0).getReg();
}
@@ -181,3 +182,5 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
}
llvm_unreachable("unable to extend register");
}
+
+void CallLowering::ValueHandler::anchor() {}
diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp
index 0bc5b87de150..45b0e36fd7d9 100644
--- a/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -1,4 +1,4 @@
-//===-- lib/CodeGen/GlobalISel/GICombiner.cpp -----------------------===//
+//===-- lib/CodeGen/GlobalISel/Combiner.cpp -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
-#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@@ -25,20 +28,76 @@
using namespace llvm;
+namespace {
+/// This class acts as the glue the joins the CombinerHelper to the overall
+/// Combine algorithm. The CombinerHelper is intended to report the
+/// modifications it makes to the MIR to the GISelChangeObserver and the
+/// observer subclass will act on these events. In this case, instruction
+/// erasure will cancel any future visits to the erased instruction and
+/// instruction creation will schedule that instruction for a future visit.
+/// Other Combiner implementations may require more complex behaviour from
+/// their GISelChangeObserver subclass.
+class WorkListMaintainer : public GISelChangeObserver {
+ using WorkListTy = GISelWorkList<512>;
+ WorkListTy &WorkList;
+ /// The instructions that have been created but we want to report once they
+ /// have their operands. This is only maintained if debug output is requested.
+ SmallPtrSet<const MachineInstr *, 4> CreatedInstrs;
+
+public:
+ WorkListMaintainer(WorkListTy &WorkList)
+ : GISelChangeObserver(), WorkList(WorkList) {}
+ virtual ~WorkListMaintainer() {
+ }
+
+ void erasingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Erased: " << MI << "\n");
+ WorkList.remove(&MI);
+ }
+ void createdInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n");
+ WorkList.insert(&MI);
+ LLVM_DEBUG(CreatedInstrs.insert(&MI));
+ }
+ void changingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n");
+ WorkList.insert(&MI);
+ }
+ void changedInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n");
+ WorkList.insert(&MI);
+ }
+
+ void reportFullyCreatedInstrs() {
+ LLVM_DEBUG(for (const auto *MI
+ : CreatedInstrs) {
+ dbgs() << "Created: ";
+ MI->print(dbgs());
+ });
+ LLVM_DEBUG(CreatedInstrs.clear());
+ }
+};
+}
+
Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC)
: CInfo(Info), TPC(TPC) {
(void)this->TPC; // FIXME: Remove when used.
}
-bool Combiner::combineMachineInstrs(MachineFunction &MF) {
+bool Combiner::combineMachineInstrs(MachineFunction &MF,
+ GISelCSEInfo *CSEInfo) {
// If the ISel pipeline failed, do not bother running this pass.
// FIXME: Should this be here or in individual combiner passes.
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
+ Builder =
+ CSEInfo ? make_unique<CSEMIRBuilder>() : make_unique<MachineIRBuilder>();
MRI = &MF.getRegInfo();
- Builder.setMF(MF);
+ Builder->setMF(MF);
+ if (CSEInfo)
+ Builder->setCSEInfo(CSEInfo);
LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n');
@@ -46,6 +105,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
bool MFChanged = false;
bool Changed;
+ MachineIRBuilder &B = *Builder.get();
do {
// Collect all instructions. Do a post order traversal for basic blocks and
@@ -53,6 +113,11 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
// down RPOT.
Changed = false;
GISelWorkList<512> WorkList;
+ WorkListMaintainer Observer(WorkList);
+ GISelObserverWrapper WrapperObserver(&Observer);
+ if (CSEInfo)
+ WrapperObserver.addObserver(CSEInfo);
+ RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
for (MachineBasicBlock *MBB : post_order(&MF)) {
if (MBB->empty())
continue;
@@ -71,8 +136,9 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
// Main Loop. Process the instructions here.
while (!WorkList.empty()) {
MachineInstr *CurrInst = WorkList.pop_back_val();
- LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";);
- Changed |= CInfo.combine(*CurrInst, Builder);
+ LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;);
+ Changed |= CInfo.combine(WrapperObserver, *CurrInst, B);
+ Observer.reportFullyCreatedInstrs();
}
MFChanged |= Changed;
} while (Changed);
diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 44e904a6391b..b1c5670a6dec 100644
--- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1,4 +1,4 @@
-//== ---lib/CodeGen/GlobalISel/GICombinerHelper.cpp --------------------- == //
+//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,17 +7,44 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
-#define DEBUG_TYPE "gi-combine"
+#define DEBUG_TYPE "gi-combiner"
using namespace llvm;
-CombinerHelper::CombinerHelper(MachineIRBuilder &B) :
- Builder(B), MRI(Builder.getMF().getRegInfo()) {}
+CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
+ MachineIRBuilder &B)
+ : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
+
+void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg,
+ unsigned ToReg) const {
+ Observer.changingAllUsesOfReg(MRI, FromReg);
+
+ if (MRI.constrainRegAttrs(ToReg, FromReg))
+ MRI.replaceRegWith(FromReg, ToReg);
+ else
+ Builder.buildCopy(ToReg, FromReg);
+
+ Observer.finishedChangingAllUsesOfReg();
+}
+
+void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
+ MachineOperand &FromRegOp,
+ unsigned ToReg) const {
+ assert(FromRegOp.getParent() && "Expected an operand in an MI");
+ Observer.changingInstr(*FromRegOp.getParent());
+
+ FromRegOp.setReg(ToReg);
+
+ Observer.changedInstr(*FromRegOp.getParent());
+}
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::COPY)
@@ -30,12 +57,279 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
// a(sx) = COPY b(sx) -> Replace all uses of a with b.
if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) {
MI.eraseFromParent();
- MRI.replaceRegWith(DstReg, SrcReg);
+ replaceRegWith(MRI, DstReg, SrcReg);
return true;
}
return false;
}
+namespace {
+struct PreferredTuple {
+ LLT Ty; // The result type of the extend.
+ unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
+ MachineInstr *MI;
+};
+
+/// Select a preference between two uses. CurrentUse is the current preference
+/// while *ForCandidate is attributes of the candidate under consideration.
+PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
+ const LLT &TyForCandidate,
+ unsigned OpcodeForCandidate,
+ MachineInstr *MIForCandidate) {
+ if (!CurrentUse.Ty.isValid()) {
+ if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
+ CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+ return CurrentUse;
+ }
+
+ // We permit the extend to hoist through basic blocks but this is only
+ // sensible if the target has extending loads. If you end up lowering back
+ // into a load and extend during the legalizer then the end result is
+ // hoisting the extend up to the load.
+
+ // Prefer defined extensions to undefined extensions as these are more
+ // likely to reduce the number of instructions.
+ if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
+ CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
+ return CurrentUse;
+ else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
+ OpcodeForCandidate != TargetOpcode::G_ANYEXT)
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+
+ // Prefer sign extensions to zero extensions as sign-extensions tend to be
+ // more expensive.
+ if (CurrentUse.Ty == TyForCandidate) {
+ if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
+ OpcodeForCandidate == TargetOpcode::G_ZEXT)
+ return CurrentUse;
+ else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
+ OpcodeForCandidate == TargetOpcode::G_SEXT)
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+ }
+
+ // This is potentially target specific. We've chosen the largest type
+ // because G_TRUNC is usually free. One potential catch with this is that
+ // some targets have a reduced number of larger registers than smaller
+ // registers and this choice potentially increases the live-range for the
+ // larger value.
+ if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+ }
+ return CurrentUse;
+}
+
+/// Find a suitable place to insert some instructions and insert them. This
+/// function accounts for special cases like inserting before a PHI node.
+/// The current strategy for inserting before PHI's is to duplicate the
+/// instructions for each predecessor. However, while that's ok for G_TRUNC
+/// on most targets since it generally requires no code, other targets/cases may
+/// want to try harder to find a dominating block.
+static void InsertInsnsWithoutSideEffectsBeforeUse(
+ MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
+ std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator)>
+ Inserter) {
+ MachineInstr &UseMI = *UseMO.getParent();
+
+ MachineBasicBlock *InsertBB = UseMI.getParent();
+
+ // If the use is a PHI then we want the predecessor block instead.
+ if (UseMI.isPHI()) {
+ MachineOperand *PredBB = std::next(&UseMO);
+ InsertBB = PredBB->getMBB();
+ }
+
+ // If the block is the same block as the def then we want to insert just after
+ // the def instead of at the start of the block.
+ if (InsertBB == DefMI.getParent()) {
+ MachineBasicBlock::iterator InsertPt = &DefMI;
+ Inserter(InsertBB, std::next(InsertPt));
+ return;
+ }
+
+ // Otherwise we want the start of the BB
+ Inserter(InsertBB, InsertBB->getFirstNonPHI());
+}
+} // end anonymous namespace
+
+bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
+ struct InsertionPoint {
+ MachineOperand *UseMO;
+ MachineBasicBlock *InsertIntoBB;
+ MachineBasicBlock::iterator InsertBefore;
+ InsertionPoint(MachineOperand *UseMO, MachineBasicBlock *InsertIntoBB,
+ MachineBasicBlock::iterator InsertBefore)
+ : UseMO(UseMO), InsertIntoBB(InsertIntoBB), InsertBefore(InsertBefore) {
+ }
+ };
+
+ // We match the loads and follow the uses to the extend instead of matching
+ // the extends and following the def to the load. This is because the load
+ // must remain in the same position for correctness (unless we also add code
+ // to find a safe place to sink it) whereas the extend is freely movable.
+ // It also prevents us from duplicating the load for the volatile case or just
+ // for performance.
+
+ if (MI.getOpcode() != TargetOpcode::G_LOAD &&
+ MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
+ MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
+ return false;
+
+ auto &LoadValue = MI.getOperand(0);
+ assert(LoadValue.isReg() && "Result wasn't a register?");
+
+ LLT LoadValueTy = MRI.getType(LoadValue.getReg());
+ if (!LoadValueTy.isScalar())
+ return false;
+
+ // Find the preferred type aside from the any-extends (unless it's the only
+ // one) and non-extending ops. We'll emit an extending load to that type and
+ // and emit a variant of (extend (trunc X)) for the others according to the
+ // relative type sizes. At the same time, pick an extend to use based on the
+ // extend involved in the chosen type.
+ unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
+ ? TargetOpcode::G_ANYEXT
+ : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ZEXT;
+ PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr};
+ for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
+ if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
+ UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
+ UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
+ Preferred = ChoosePreferredUse(Preferred,
+ MRI.getType(UseMI.getOperand(0).getReg()),
+ UseMI.getOpcode(), &UseMI);
+ }
+ }
+
+ // There were no extends
+ if (!Preferred.MI)
+ return false;
+ // It should be impossible to chose an extend without selecting a different
+ // type since by definition the result of an extend is larger.
+ assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
+
+ LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
+
+ // Rewrite the load to the chosen extending load.
+ unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+ Observer.changingInstr(MI);
+ MI.setDesc(
+ Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
+ ? TargetOpcode::G_SEXTLOAD
+ : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
+ ? TargetOpcode::G_ZEXTLOAD
+ : TargetOpcode::G_LOAD));
+
+ // Rewrite all the uses to fix up the types.
+ SmallVector<MachineInstr *, 1> ScheduleForErase;
+ SmallVector<InsertionPoint, 4> ScheduleForInsert;
+ for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
+ MachineInstr *UseMI = UseMO.getParent();
+
+ // If the extend is compatible with the preferred extend then we should fix
+ // up the type and extend so that it uses the preferred use.
+ if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
+ UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
+ unsigned UseDstReg = UseMI->getOperand(0).getReg();
+ MachineOperand &UseSrcMO = UseMI->getOperand(1);
+ const LLT &UseDstTy = MRI.getType(UseDstReg);
+ if (UseDstReg != ChosenDstReg) {
+ if (Preferred.Ty == UseDstTy) {
+ // If the use has the same type as the preferred use, then merge
+ // the vregs and erase the extend. For example:
+ // %1:_(s8) = G_LOAD ...
+ // %2:_(s32) = G_SEXT %1(s8)
+ // %3:_(s32) = G_ANYEXT %1(s8)
+ // ... = ... %3(s32)
+ // rewrites to:
+ // %2:_(s32) = G_SEXTLOAD ...
+ // ... = ... %2(s32)
+ replaceRegWith(MRI, UseDstReg, ChosenDstReg);
+ ScheduleForErase.push_back(UseMO.getParent());
+ } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
+ // If the preferred size is smaller, then keep the extend but extend
+ // from the result of the extending load. For example:
+ // %1:_(s8) = G_LOAD ...
+ // %2:_(s32) = G_SEXT %1(s8)
+ // %3:_(s64) = G_ANYEXT %1(s8)
+ // ... = ... %3(s64)
+ /// rewrites to:
+ // %2:_(s32) = G_SEXTLOAD ...
+ // %3:_(s64) = G_ANYEXT %2:_(s32)
+ // ... = ... %3(s64)
+ replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
+ } else {
+ // If the preferred size is large, then insert a truncate. For
+ // example:
+ // %1:_(s8) = G_LOAD ...
+ // %2:_(s64) = G_SEXT %1(s8)
+ // %3:_(s32) = G_ZEXT %1(s8)
+ // ... = ... %3(s32)
+ /// rewrites to:
+ // %2:_(s64) = G_SEXTLOAD ...
+ // %4:_(s8) = G_TRUNC %2:_(s32)
+ // %3:_(s64) = G_ZEXT %2:_(s8)
+ // ... = ... %3(s64)
+ InsertInsnsWithoutSideEffectsBeforeUse(
+ Builder, MI, UseMO,
+ [&](MachineBasicBlock *InsertIntoBB,
+ MachineBasicBlock::iterator InsertBefore) {
+ ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
+ });
+ }
+ continue;
+ }
+ // The use is (one of) the uses of the preferred use we chose earlier.
+ // We're going to update the load to def this value later so just erase
+ // the old extend.
+ ScheduleForErase.push_back(UseMO.getParent());
+ continue;
+ }
+
+ // The use isn't an extend. Truncate back to the type we originally loaded.
+ // This is free on many targets.
+ InsertInsnsWithoutSideEffectsBeforeUse(
+ Builder, MI, UseMO,
+ [&](MachineBasicBlock *InsertIntoBB,
+ MachineBasicBlock::iterator InsertBefore) {
+ ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
+ });
+ }
+
+ DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+ for (auto &InsertionInfo : ScheduleForInsert) {
+ MachineOperand *UseMO = InsertionInfo.UseMO;
+ MachineBasicBlock *InsertIntoBB = InsertionInfo.InsertIntoBB;
+ MachineBasicBlock::iterator InsertBefore = InsertionInfo.InsertBefore;
+
+ MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
+ if (PreviouslyEmitted) {
+ Observer.changingInstr(*UseMO->getParent());
+ UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
+ Observer.changedInstr(*UseMO->getParent());
+ continue;
+ }
+
+ Builder.setInsertPt(*InsertIntoBB, InsertBefore);
+ unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+ MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+ EmittedInsns[InsertIntoBB] = NewMI;
+ replaceRegOpWith(MRI, *UseMO, NewDstReg);
+ }
+ for (auto &EraseMI : ScheduleForErase) {
+ Observer.erasingInstr(*EraseMI);
+ EraseMI->eraseFromParent();
+ }
+ MI.getOperand(0).setReg(ChosenDstReg);
+ Observer.changedInstr(MI);
+
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
- return tryCombineCopy(MI);
+ if (tryCombineCopy(MI))
+ return true;
+ return tryCombineExtendingLoads(MI);
}
diff --git a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
new file mode 100644
index 000000000000..c693acbbf10b
--- /dev/null
+++ b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -0,0 +1,40 @@
+//===-- lib/CodeGen/GlobalISel/GISelChangeObserver.cpp --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file constains common code to combine machine functions at generic
+// level.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+void GISelChangeObserver::changingAllUsesOfReg(
+ const MachineRegisterInfo &MRI, unsigned Reg) {
+ for (auto &ChangingMI : MRI.use_instructions(Reg)) {
+ changingInstr(ChangingMI);
+ ChangingAllUsesOfReg.insert(&ChangingMI);
+ }
+}
+
+void GISelChangeObserver::finishedChangingAllUsesOfReg() {
+ for (auto *ChangedMI : ChangingAllUsesOfReg)
+ changedInstr(*ChangedMI);
+}
+
+RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF,
+ MachineFunction::Delegate *Del)
+ : MF(MF), Delegate(Del) {
+ // Register this as the delegate for handling insertions and deletions of
+ // instructions.
+ MF.setDelegate(Del);
+}
+
+RAIIDelegateInstaller::~RAIIDelegateInstaller() { MF.resetDelegate(Delegate); }
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 80da50562d32..95f6274aa068 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -75,11 +76,16 @@
using namespace llvm;
+static cl::opt<bool>
+ EnableCSEInIRTranslator("enable-cse-in-irtranslator",
+ cl::desc("Should enable CSE in irtranslator"),
+ cl::Optional, cl::init(false));
char IRTranslator::ID = 0;
INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
@@ -104,9 +110,44 @@ IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
}
+#ifndef NDEBUG
+namespace {
+/// Verify that every instruction created has the same DILocation as the
+/// instruction being translated.
+class DILocationVerifier : public GISelChangeObserver {
+ const Instruction *CurrInst = nullptr;
+
+public:
+ DILocationVerifier() = default;
+ ~DILocationVerifier() = default;
+
+ const Instruction *getCurrentInst() const { return CurrInst; }
+ void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
+
+ void erasingInstr(MachineInstr &MI) override {}
+ void changingInstr(MachineInstr &MI) override {}
+ void changedInstr(MachineInstr &MI) override {}
+
+ void createdInstr(MachineInstr &MI) override {
+ assert(getCurrentInst() && "Inserted instruction without a current MI");
+
+ // Only print the check message if we're actually checking it.
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
+ << " was copied to " << MI);
+#endif
+ assert(CurrInst->getDebugLoc() == MI.getDebugLoc() &&
+ "Line info was not transferred to all instructions");
+ }
+};
+} // namespace
+#endif // ifndef NDEBUG
+
+
void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -185,7 +226,7 @@ ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
unsigned Idx = 0;
while (auto Elt = C.getAggregateElement(Idx++)) {
auto EltRegs = getOrCreateVRegs(*Elt);
- std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs));
+ llvm::copy(EltRegs, std::back_inserter(*VRegs));
}
} else {
assert(SplitTys.size() == 1 && "unexpectedly split LLT");
@@ -279,7 +320,12 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
unsigned Res = getOrCreateVReg(U);
- MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+ auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+ if (isa<Instruction>(U)) {
+ MachineInstr *FBinOpMI = FBinOp.getInstr();
+ const Instruction &I = cast<Instruction>(U);
+ FBinOpMI->copyIRFlags(I);
+ }
return true;
}
@@ -295,6 +341,13 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
}
+bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(1)));
+ return true;
+}
+
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
const CmpInst *CI = dyn_cast<CmpInst>(&U);
@@ -312,8 +365,10 @@ bool IRTranslator::translateCompare(const User &U,
else if (Pred == CmpInst::FCMP_TRUE)
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
- else
- MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
+ else {
+ auto FCmp = MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
+ FCmp->copyIRFlags(*CI);
+ }
return true;
}
@@ -323,14 +378,16 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
const Value *Ret = RI.getReturnValue();
if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
Ret = nullptr;
+
+ ArrayRef<unsigned> VRegs;
+ if (Ret)
+ VRegs = getOrCreateVRegs(*Ret);
+
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
- // FIXME: this interface should simplify when CallLowering gets adapted to
- // multiple VRegs per Value.
- unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0;
- return CLI->lowerReturn(MIRBuilder, Ret, VReg);
+ return CLI->lowerReturn(MIRBuilder, Ret, VRegs);
}
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -353,7 +410,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
MIRBuilder.buildBr(TgtBB);
// Link successors.
- for (const BasicBlock *Succ : BrInst.successors())
+ for (const BasicBlock *Succ : successors(&BrInst))
CurBB.addSuccessor(&getMBB(*Succ));
return true;
}
@@ -413,7 +470,7 @@ bool IRTranslator::translateIndirectBr(const User &U,
// Link successors.
MachineBasicBlock &CurBB = MIRBuilder.getMBB();
- for (const BasicBlock *Succ : BrInst.successors())
+ for (const BasicBlock *Succ : successors(&BrInst))
CurBB.addSuccessor(&getMBB(*Succ));
return true;
@@ -544,8 +601,15 @@ bool IRTranslator::translateSelect(const User &U,
ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
- for (unsigned i = 0; i < ResRegs.size(); ++i)
- MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
+ const SelectInst &SI = cast<SelectInst>(U);
+ const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition());
+ for (unsigned i = 0; i < ResRegs.size(); ++i) {
+ auto Select =
+ MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
+ if (Cmp && isa<FPMathOperator>(Cmp)) {
+ Select->copyIRFlags(*Cmp);
+ }
+ }
return true;
}
@@ -704,29 +768,22 @@ void IRTranslator::getStackGuard(unsigned DstReg,
return;
MachinePointerInfo MPInfo(Global);
- MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1);
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
- *MemRefs =
+ MachineMemOperand *MemRef =
MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
DL->getPointerABIAlignment(0));
- MIB.setMemRefs(MemRefs, MemRefs + 1);
+ MIB.setMemRefs({MemRef});
}
bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder) {
ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
- auto MIB = MIRBuilder.buildInstr(Op)
- .addDef(ResRegs[0])
- .addDef(ResRegs[1])
- .addUse(getOrCreateVReg(*CI.getOperand(0)))
- .addUse(getOrCreateVReg(*CI.getOperand(1)));
-
- if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
- unsigned Zero = getOrCreateVReg(
- *Constant::getNullValue(Type::getInt1Ty(CI.getContext())));
- MIB.addUse(Zero);
- }
+ MIRBuilder.buildInstr(Op)
+ .addDef(ResRegs[0])
+ .addDef(ResRegs[1])
+ .addUse(getOrCreateVReg(*CI.getOperand(0)))
+ .addUse(getOrCreateVReg(*CI.getOperand(1)));
return true;
}
@@ -763,9 +820,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// instructions (in fact, they get ignored if they *do* exist).
MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
getOrCreateFrameIndex(*AI), DI.getDebugLoc());
- } else
- MIRBuilder.buildDirectDbgValue(getOrCreateVReg(*Address),
- DI.getVariable(), DI.getExpression());
+ } else {
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
+ DI.getVariable(), DI.getExpression());
+ }
+ return true;
+ }
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
+ assert(DI.getLabel() && "Missing label");
+
+ assert(DI.getLabel()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+
+ MIRBuilder.buildDbgLabel(DI.getLabel());
return true;
}
case Intrinsic::vaend:
@@ -807,55 +878,86 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
case Intrinsic::uadd_with_overflow:
- return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder);
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
case Intrinsic::sadd_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
case Intrinsic::usub_with_overflow:
- return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBE, MIRBuilder);
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
case Intrinsic::ssub_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
case Intrinsic::umul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
case Intrinsic::smul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
- case Intrinsic::pow:
- MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
+ case Intrinsic::pow: {
+ auto Pow = MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)))
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
+ Pow->copyIRFlags(CI);
return true;
- case Intrinsic::exp:
- MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
+ }
+ case Intrinsic::exp: {
+ auto Exp = MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Exp->copyIRFlags(CI);
return true;
- case Intrinsic::exp2:
- MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
+ }
+ case Intrinsic::exp2: {
+ auto Exp2 = MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Exp2->copyIRFlags(CI);
return true;
- case Intrinsic::log:
- MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
+ }
+ case Intrinsic::log: {
+ auto Log = MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Log->copyIRFlags(CI);
return true;
- case Intrinsic::log2:
- MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
+ }
+ case Intrinsic::log2: {
+ auto Log2 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Log2->copyIRFlags(CI);
return true;
- case Intrinsic::fabs:
- MIRBuilder.buildInstr(TargetOpcode::G_FABS)
+ }
+ case Intrinsic::log10: {
+ auto Log10 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG10)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Log10->copyIRFlags(CI);
return true;
- case Intrinsic::fma:
- MIRBuilder.buildInstr(TargetOpcode::G_FMA)
+ }
+ case Intrinsic::fabs: {
+ auto Fabs = MIRBuilder.buildInstr(TargetOpcode::G_FABS)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Fabs->copyIRFlags(CI);
+ return true;
+ }
+ case Intrinsic::trunc:
+ MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::round:
+ MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::fma: {
+ auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)))
.addUse(getOrCreateVReg(*CI.getArgOperand(1)))
.addUse(getOrCreateVReg(*CI.getArgOperand(2)));
+ FMA->copyIRFlags(CI);
return true;
+ }
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
@@ -867,11 +969,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
// TODO: Revisit this to see if we should move this part of the
// lowering to the combiner.
- MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2);
+ auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2});
+ FMA->copyIRFlags(CI);
} else {
LLT Ty = getLLTForType(*CI.getType(), *DL);
- auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1);
- MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2);
+ auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1});
+ FMul->copyIRFlags(CI);
+ auto FAdd = MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2});
+ FAdd->copyIRFlags(CI);
}
return true;
}
@@ -893,6 +998,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
return true;
}
+ case Intrinsic::is_constant:
+ // If this wasn't constant-folded away by now, then it's not a
+ // constant.
+ MIRBuilder.buildConstant(getOrCreateVReg(CI), 0);
+ return true;
case Intrinsic::stackguard:
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
@@ -902,15 +1012,50 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getStackGuard(GuardVal, MIRBuilder);
AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
+ int FI = getOrCreateFrameIndex(*Slot);
+ MF->getFrameInfo().setStackProtectorIndex(FI);
+
MIRBuilder.buildStore(
GuardVal, getOrCreateVReg(*Slot),
- *MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(*MF,
- getOrCreateFrameIndex(*Slot)),
- MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
- PtrTy.getSizeInBits() / 8, 8));
+ *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile,
+ PtrTy.getSizeInBits() / 8, 8));
return true;
}
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz: {
+ ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
+ bool isTrailing = ID == Intrinsic::cttz;
+ unsigned Opcode = isTrailing
+ ? Cst->isZero() ? TargetOpcode::G_CTTZ
+ : TargetOpcode::G_CTTZ_ZERO_UNDEF
+ : Cst->isZero() ? TargetOpcode::G_CTLZ
+ : TargetOpcode::G_CTLZ_ZERO_UNDEF;
+ MIRBuilder.buildInstr(Opcode)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ }
+ case Intrinsic::ctpop: {
+ MIRBuilder.buildInstr(TargetOpcode::G_CTPOP)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ }
+ case Intrinsic::invariant_start: {
+ LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
+ unsigned Undef = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildUndef(Undef);
+ return true;
+ }
+ case Intrinsic::invariant_end:
+ return true;
+ case Intrinsic::ceil:
+ MIRBuilder.buildInstr(TargetOpcode::G_FCEIL)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
}
return false;
}
@@ -1101,7 +1246,6 @@ bool IRTranslator::translateLandingPad(const User &U,
const LandingPadInst &LP = cast<LandingPadInst>(U);
MachineBasicBlock &MBB = MIRBuilder.getMBB();
- addLandingPadInfo(LP, MBB);
MBB.setIsEHPad();
@@ -1279,7 +1423,22 @@ bool IRTranslator::translateExtractElement(const User &U,
}
unsigned Res = getOrCreateVReg(U);
unsigned Val = getOrCreateVReg(*U.getOperand(0));
- unsigned Idx = getOrCreateVReg(*U.getOperand(1));
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
+ unsigned Idx = 0;
+ if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
+ if (CI->getBitWidth() != PreferredVecIdxWidth) {
+ APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
+ auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
+ Idx = getOrCreateVReg(*NewIdxCI);
+ }
+ }
+ if (!Idx)
+ Idx = getOrCreateVReg(*U.getOperand(1));
+ if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
+ const LLT &VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
+ Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx)->getOperand(0).getReg();
+ }
MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
return true;
}
@@ -1299,7 +1458,7 @@ bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
SmallVector<MachineInstr *, 4> Insts;
for (auto Reg : getOrCreateVRegs(PI)) {
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg);
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
Insts.push_back(MIB.getInstr());
}
@@ -1402,9 +1561,18 @@ bool IRTranslator::translateAtomicRMW(const User &U,
}
void IRTranslator::finishPendingPhis() {
+#ifndef NDEBUG
+ DILocationVerifier Verifier;
+ GISelObserverWrapper WrapperObserver(&Verifier);
+ RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
+#endif // ifndef NDEBUG
for (auto &Phi : PendingPHIs) {
const PHINode *PI = Phi.first;
ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
+ EntryBuilder->setDebugLoc(PI->getDebugLoc());
+#ifndef NDEBUG
+ Verifier.setCurrentInst(PI);
+#endif // ifndef NDEBUG
// All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
// won't create extra control flow here, otherwise we need to find the
@@ -1435,15 +1603,19 @@ void IRTranslator::finishPendingPhis() {
bool IRTranslator::valueIsSplit(const Value &V,
SmallVectorImpl<uint64_t> *Offsets) {
SmallVector<LLT, 4> SplitTys;
+ if (Offsets && !Offsets->empty())
+ Offsets->clear();
computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
return SplitTys.size() > 1;
}
bool IRTranslator::translate(const Instruction &Inst) {
- CurBuilder.setDebugLoc(Inst.getDebugLoc());
+ CurBuilder->setDebugLoc(Inst.getDebugLoc());
+ EntryBuilder->setDebugLoc(Inst.getDebugLoc());
switch(Inst.getOpcode()) {
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
- case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: \
+ return translate##OPCODE(Inst, *CurBuilder.get());
#include "llvm/IR/Instruction.def"
default:
return false;
@@ -1452,11 +1624,11 @@ bool IRTranslator::translate(const Instruction &Inst) {
bool IRTranslator::translate(const Constant &C, unsigned Reg) {
if (auto CI = dyn_cast<ConstantInt>(&C))
- EntryBuilder.buildConstant(Reg, *CI);
+ EntryBuilder->buildConstant(Reg, *CI);
else if (auto CF = dyn_cast<ConstantFP>(&C))
- EntryBuilder.buildFConstant(Reg, *CF);
+ EntryBuilder->buildFConstant(Reg, *CF);
else if (isa<UndefValue>(C))
- EntryBuilder.buildUndef(Reg);
+ EntryBuilder->buildUndef(Reg);
else if (isa<ConstantPointerNull>(C)) {
// As we are trying to build a constant val of 0 into a pointer,
// insert a cast to make them correct with respect to types.
@@ -1464,35 +1636,36 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
unsigned ZeroReg = getOrCreateVReg(*ZeroVal);
- EntryBuilder.buildCast(Reg, ZeroReg);
+ EntryBuilder->buildCast(Reg, ZeroReg);
} else if (auto GV = dyn_cast<GlobalValue>(&C))
- EntryBuilder.buildGlobalValue(Reg, GV);
+ EntryBuilder->buildGlobalValue(Reg, GV);
else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
if (!CAZ->getType()->isVectorTy())
return false;
// Return the scalar if it is a <1 x Ty> vector.
if (CAZ->getNumElements() == 1)
return translate(*CAZ->getElementValue(0u), Reg);
- std::vector<unsigned> Ops;
+ SmallVector<unsigned, 4> Ops;
for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
Constant &Elt = *CAZ->getElementValue(i);
Ops.push_back(getOrCreateVReg(Elt));
}
- EntryBuilder.buildMerge(Reg, Ops);
+ EntryBuilder->buildBuildVector(Reg, Ops);
} else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
// Return the scalar if it is a <1 x Ty> vector.
if (CV->getNumElements() == 1)
return translate(*CV->getElementAsConstant(0), Reg);
- std::vector<unsigned> Ops;
+ SmallVector<unsigned, 4> Ops;
for (unsigned i = 0; i < CV->getNumElements(); ++i) {
Constant &Elt = *CV->getElementAsConstant(i);
Ops.push_back(getOrCreateVReg(Elt));
}
- EntryBuilder.buildMerge(Reg, Ops);
+ EntryBuilder->buildBuildVector(Reg, Ops);
} else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
switch(CE->getOpcode()) {
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
- case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: \
+ return translate##OPCODE(*CE, *EntryBuilder.get());
#include "llvm/IR/Instruction.def"
default:
return false;
@@ -1504,9 +1677,9 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
}
- EntryBuilder.buildMerge(Reg, Ops);
+ EntryBuilder->buildBuildVector(Reg, Ops);
} else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
- EntryBuilder.buildBlockAddress(Reg, BA);
+ EntryBuilder->buildBlockAddress(Reg, BA);
} else
return false;
@@ -1523,8 +1696,8 @@ void IRTranslator::finalizeFunction() {
// MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
// to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
// destroying it twice (in ~IRTranslator() and ~LLVMContext())
- EntryBuilder = MachineIRBuilder();
- CurBuilder = MachineIRBuilder();
+ EntryBuilder.reset();
+ CurBuilder.reset();
}
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
@@ -1532,12 +1705,30 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
const Function &F = MF->getFunction();
if (F.empty())
return false;
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ // Set the CSEConfig and run the analysis.
+ GISelCSEInfo *CSEInfo = nullptr;
+ TPC = &getAnalysis<TargetPassConfig>();
+ bool IsO0 = TPC->getOptLevel() == CodeGenOpt::Level::None;
+ // Disable CSE for O0.
+ bool EnableCSE = !IsO0 && EnableCSEInIRTranslator;
+ if (EnableCSE) {
+ EntryBuilder = make_unique<CSEMIRBuilder>(CurMF);
+ std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
+ CSEInfo = &Wrapper.get(std::move(Config));
+ EntryBuilder->setCSEInfo(CSEInfo);
+ CurBuilder = make_unique<CSEMIRBuilder>(CurMF);
+ CurBuilder->setCSEInfo(CSEInfo);
+ } else {
+ EntryBuilder = make_unique<MachineIRBuilder>();
+ CurBuilder = make_unique<MachineIRBuilder>();
+ }
CLI = MF->getSubtarget().getCallLowering();
- CurBuilder.setMF(*MF);
- EntryBuilder.setMF(*MF);
+ CurBuilder->setMF(*MF);
+ EntryBuilder->setMF(*MF);
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
- TPC = &getAnalysis<TargetPassConfig>();
ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
assert(PendingPHIs.empty() && "stale PHIs");
@@ -1556,7 +1747,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Setup a separate basic-block for the arguments and constants
MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
MF->push_back(EntryBB);
- EntryBuilder.setMBB(*EntryBB);
+ EntryBuilder->setMBB(*EntryBB);
// Create all blocks, in IR order, to preserve the layout.
for (const BasicBlock &BB: F) {
@@ -1593,7 +1784,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
}
}
- if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
+ if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
@@ -1610,38 +1801,54 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
assert(VRegs.empty() && "VRegs already populated?");
VRegs.push_back(VArg);
} else {
- unpackRegs(*ArgIt, VArg, EntryBuilder);
+ unpackRegs(*ArgIt, VArg, *EntryBuilder.get());
}
ArgIt++;
}
// Need to visit defs before uses when translating instructions.
- ReversePostOrderTraversal<const Function *> RPOT(&F);
- for (const BasicBlock *BB : RPOT) {
- MachineBasicBlock &MBB = getMBB(*BB);
- // Set the insertion point of all the following translations to
- // the end of this basic block.
- CurBuilder.setMBB(MBB);
-
- for (const Instruction &Inst : *BB) {
- if (translate(Inst))
- continue;
-
- OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
- Inst.getDebugLoc(), BB);
- R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
-
- if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
- std::string InstStrStorage;
- raw_string_ostream InstStr(InstStrStorage);
- InstStr << Inst;
+ GISelObserverWrapper WrapperObserver;
+ if (EnableCSE && CSEInfo)
+ WrapperObserver.addObserver(CSEInfo);
+ {
+ ReversePostOrderTraversal<const Function *> RPOT(&F);
+#ifndef NDEBUG
+ DILocationVerifier Verifier;
+ WrapperObserver.addObserver(&Verifier);
+#endif // ifndef NDEBUG
+ RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
+ for (const BasicBlock *BB : RPOT) {
+ MachineBasicBlock &MBB = getMBB(*BB);
+ // Set the insertion point of all the following translations to
+ // the end of this basic block.
+ CurBuilder->setMBB(MBB);
+
+ for (const Instruction &Inst : *BB) {
+#ifndef NDEBUG
+ Verifier.setCurrentInst(&Inst);
+#endif // ifndef NDEBUG
+ if (translate(Inst))
+ continue;
+
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ Inst.getDebugLoc(), BB);
+ R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
+
+ if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << Inst;
+
+ R << ": '" << InstStr.str() << "'";
+ }
- R << ": '" << InstStr.str() << "'";
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
}
-
- reportTranslationError(*MF, *TPC, *ORE, R);
- return false;
}
+#ifndef NDEBUG
+ WrapperObserver.removeObserver(&Verifier);
+#endif
}
finishPendingPhis();
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 5e77fcbb0ed9..38913e4afcba 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -80,5 +80,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
return true;
return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
- MI.implicit_operands().begin() == MI.implicit_operands().end();
+ empty(MI.implicit_operands());
}
diff --git a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 344f573a67f5..94eab9ae00c8 100644
--- a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -45,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet(
SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit;
return [=](const LegalityQuery &Query) {
TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
- Query.MMODescrs[MMOIdx].Size};
+ Query.MMODescrs[MMOIdx].SizeInBits};
return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) !=
TypesAndMemSize.end();
};
@@ -82,7 +82,7 @@ LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
- return !isPowerOf2_32(Query.MMODescrs[MMOIdx].Size /* In Bytes */);
+ return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8);
};
}
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index 9a2aac998a84..84131e59948c 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -16,6 +16,9 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -32,11 +35,17 @@
using namespace llvm;
+static cl::opt<bool>
+ EnableCSEInLegalizer("enable-cse-in-legalizer",
+ cl::desc("Should enable CSE in Legalizer"),
+ cl::Optional, cl::init(false));
+
char Legalizer::ID = 0;
INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
false)
@@ -47,6 +56,8 @@ Legalizer::Legalizer() : MachineFunctionPass(ID) {
void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ AU.addPreserved<GISelCSEAnalysisWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -64,9 +75,54 @@ static bool isArtifact(const MachineInstr &MI) {
case TargetOpcode::G_SEXT:
case TargetOpcode::G_MERGE_VALUES:
case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
return true;
}
}
+using InstListTy = GISelWorkList<256>;
+using ArtifactListTy = GISelWorkList<128>;
+
+namespace {
+class LegalizerWorkListManager : public GISelChangeObserver {
+ InstListTy &InstList;
+ ArtifactListTy &ArtifactList;
+
+public:
+ LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts)
+ : InstList(Insts), ArtifactList(Arts) {}
+
+ void createdInstr(MachineInstr &MI) override {
+ // Only legalize pre-isel generic instructions.
+ // Legalization process could generate Target specific pseudo
+ // instructions with generic types. Don't record them
+ if (isPreISelGenericOpcode(MI.getOpcode())) {
+ if (isArtifact(MI))
+ ArtifactList.insert(&MI);
+ else
+ InstList.insert(&MI);
+ }
+ LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI);
+ }
+
+ void erasingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << ".. .. Erasing: " << MI);
+ InstList.remove(&MI);
+ ArtifactList.remove(&MI);
+ }
+
+ void changingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << ".. .. Changing MI: " << MI);
+ }
+
+ void changedInstr(MachineInstr &MI) override {
+ // When insts change, we want to revisit them to legalize them again.
+ // We'll consider them the same as created.
+ LLVM_DEBUG(dbgs() << ".. .. Changed MI: " << MI);
+ createdInstr(MI);
+ }
+};
+} // namespace
bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// If the ISel pipeline failed, do not bother running that pass.
@@ -76,15 +132,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
init(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
- LegalizerHelper Helper(MF);
const size_t NumBlocks = MF.size();
MachineRegisterInfo &MRI = MF.getRegInfo();
// Populate Insts
- GISelWorkList<256> InstList;
- GISelWorkList<128> ArtifactList;
+ InstListTy InstList;
+ ArtifactListTy ArtifactList;
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
// Perform legalization bottom up so we can DCE as we legalize.
// Traverse BB in RPOT and within each basic block, add insts top down,
@@ -103,24 +160,34 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
InstList.insert(&MI);
}
}
- Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
- // Only legalize pre-isel generic instructions.
- // Legalization process could generate Target specific pseudo
- // instructions with generic types. Don't record them
- if (isPreISelGenericOpcode(MI->getOpcode())) {
- if (isArtifact(*MI))
- ArtifactList.insert(MI);
- else
- InstList.insert(MI);
- }
- LLVM_DEBUG(dbgs() << ".. .. New MI: " << *MI;);
- });
+ std::unique_ptr<MachineIRBuilder> MIRBuilder;
+ GISelCSEInfo *CSEInfo = nullptr;
+ bool IsO0 = TPC.getOptLevel() == CodeGenOpt::Level::None;
+ // Disable CSE for O0.
+ bool EnableCSE = !IsO0 && EnableCSEInLegalizer;
+ if (EnableCSE) {
+ MIRBuilder = make_unique<CSEMIRBuilder>();
+ std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
+ CSEInfo = &Wrapper.get(std::move(Config));
+ MIRBuilder->setCSEInfo(CSEInfo);
+ } else
+ MIRBuilder = make_unique<MachineIRBuilder>();
+ // This observer keeps the worklist updated.
+ LegalizerWorkListManager WorkListObserver(InstList, ArtifactList);
+ // We want both WorkListObserver as well as CSEInfo to observe all changes.
+ // Use the wrapper observer.
+ GISelObserverWrapper WrapperObserver(&WorkListObserver);
+ if (EnableCSE && CSEInfo)
+ WrapperObserver.addObserver(CSEInfo);
+ // Now install the observer as the delegate to MF.
+ // This will keep all the observers notified about new insertions/deletions.
+ RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
+ LegalizerHelper Helper(MF, WrapperObserver, *MIRBuilder.get());
const LegalizerInfo &LInfo(Helper.getLegalizerInfo());
- LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo);
- auto RemoveDeadInstFromLists = [&InstList,
- &ArtifactList](MachineInstr *DeadMI) {
- InstList.remove(DeadMI);
- ArtifactList.remove(DeadMI);
+ LegalizationArtifactCombiner ArtCombiner(*MIRBuilder.get(), MF.getRegInfo(),
+ LInfo);
+ auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
+ WrapperObserver.erasingInstr(*DeadMI);
};
bool Changed = false;
do {
@@ -138,7 +205,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// Error out if we couldn't legalize this instruction. We may want to
// fall back to DAG ISel instead in the future.
if (Res == LegalizerHelper::UnableToLegalize) {
- Helper.MIRBuilder.stopRecordingInsertions();
+ Helper.MIRBuilder.stopObservingChanges();
reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
"unable to legalize instruction", MI);
return false;
@@ -149,7 +216,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *ArtifactList.pop_back_val();
assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ LLVM_DEBUG(dbgs() << MI << "Is dead\n");
RemoveDeadInstFromLists(&MI);
MI.eraseFromParentAndMarkDBGValuesForRemoval();
continue;
@@ -157,7 +224,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineInstr *, 4> DeadInstructions;
if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {
for (auto *DeadMI : DeadInstructions) {
- LLVM_DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI);
+ LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
RemoveDeadInstFromLists(DeadMI);
DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
}
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 87086af121b7..b3fc94cdec60 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -15,24 +15,37 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-
#define DEBUG_TYPE "legalizer"
using namespace llvm;
using namespace LegalizeActions;
-LegalizerHelper::LegalizerHelper(MachineFunction &MF)
- : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) {
+LegalizerHelper::LegalizerHelper(MachineFunction &MF,
+ GISelChangeObserver &Observer,
+ MachineIRBuilder &Builder)
+ : MIRBuilder(Builder), MRI(MF.getRegInfo()),
+ LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
MIRBuilder.setMF(MF);
+ MIRBuilder.setChangeObserver(Observer);
}
+LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
+ GISelChangeObserver &Observer,
+ MachineIRBuilder &B)
+ : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
+ MIRBuilder.setMF(MF);
+ MIRBuilder.setChangeObserver(Observer);
+}
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
@@ -59,8 +72,8 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
case Custom:
LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
- return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
- : UnableToLegalize;
+ return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
+ : UnableToLegalize;
default:
LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
return UnableToLegalize;
@@ -77,17 +90,20 @@ void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
switch (Opcode) {
case TargetOpcode::G_SDIV:
- assert(Size == 32 && "Unsupported size");
- return RTLIB::SDIV_I32;
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
case TargetOpcode::G_UDIV:
- assert(Size == 32 && "Unsupported size");
- return RTLIB::UDIV_I32;
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
case TargetOpcode::G_SREM:
- assert(Size == 32 && "Unsupported size");
- return RTLIB::SREM_I32;
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
case TargetOpcode::G_UREM:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
assert(Size == 32 && "Unsupported size");
- return RTLIB::UREM_I32;
+ return RTLIB::CTLZ_I32;
case TargetOpcode::G_FADD:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
@@ -184,8 +200,9 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
- case TargetOpcode::G_UREM: {
- Type *HLTy = Type::getInt32Ty(Ctx);
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+ Type *HLTy = IntegerType::get(Ctx, Size);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
return Status;
@@ -289,7 +306,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
for (int i = 0; i < NumParts; ++i)
DstRegs.push_back(
MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -319,7 +341,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
CarryIn = CarryOut;
}
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -375,7 +400,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(SegReg);
}
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -436,7 +465,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
}
assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -462,12 +495,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
unsigned SrcReg = 0;
unsigned Adjustment = i * NarrowSize / 8;
+ unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
- NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
- MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
- MMO.getOrdering(), MMO.getFailureOrdering());
+ NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
+ MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
Adjustment);
@@ -477,7 +510,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(DstReg);
}
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -504,12 +540,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
for (int i = 0; i < NumParts; ++i) {
unsigned DstReg = 0;
unsigned Adjustment = i * NarrowSize / 8;
+ unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
- NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
- MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
- MMO.getOrdering(), MMO.getFailureOrdering());
+ NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
+ MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
Adjustment);
@@ -537,11 +573,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(DstReg);
}
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_OR: {
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR: {
// Legalize bitwise operation:
// A = BinOp<Ty> B, C
// into:
@@ -580,11 +621,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// Do the operation on each small part.
for (int i = 0; i < NumParts; ++i)
- MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]);
+ MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
+ {SrcsReg1[i], SrcsReg2[i]});
// Gather the destination registers into the final destination.
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -594,7 +639,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
unsigned OpIdx, unsigned ExtOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
- auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg());
+ auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
MO.setReg(ExtB->getOperand(0).getReg());
}
@@ -603,7 +648,7 @@ void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
MachineOperand &MO = MI.getOperand(OpIdx);
unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt);
+ MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
MO.setReg(DstExt);
}
@@ -614,6 +659,69 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_USUBO: {
+ if (TypeIdx == 1)
+ return UnableToLegalize; // TODO
+ auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
+ {MI.getOperand(2).getReg()});
+ auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
+ {MI.getOperand(3).getReg()});
+ unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
+ ? TargetOpcode::G_ADD
+ : TargetOpcode::G_SUB;
+ // Do the arithmetic in the larger type.
+ auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
+ LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
+ APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
+ auto AndOp = MIRBuilder.buildInstr(
+ TargetOpcode::G_AND, {WideTy},
+ {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
+ // There is no overflow if the AndOp is the same as NewOp.
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
+ AndOp);
+ // Now trunc the NewOp to the original result.
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTPOP: {
+ // First ZEXT the input.
+ auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
+ LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
+ if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
+ // The count is the same in the larger type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit =
+ APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
+ MIBSrc = MIRBuilder.buildInstr(
+ TargetOpcode::G_OR, {WideTy},
+ {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
+ }
+ // Perform the operation at the larger size.
+ auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
+ // This is already the correct result for CTPOP and CTTZs
+ if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
+ MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
+ // The correct result is NewOp - (Difference in widety and current ty).
+ unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
+ MIBNewOp = MIRBuilder.buildInstr(
+ TargetOpcode::G_SUB, {WideTy},
+ {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
+ }
+ auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ // Make the original instruction a trunc now, and update its source.
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
+ MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
@@ -624,87 +732,100 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
// Perform operation at larger width (any extension is fine here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SHL:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
// The "number of bits to shift" operand must preserve its value as an
// unsigned integer:
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SDIV:
case TargetOpcode::G_SREM:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ASHR:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
// The "number of bits to shift" operand must preserve its value as an
// unsigned integer:
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UDIV:
case TargetOpcode::G_UREM:
case TargetOpcode::G_LSHR:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SELECT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- // Perform operation at larger width (any extension is fine here, high bits
- // don't affect the result) and then truncate the result back to the
- // original type.
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changingInstr(MI);
+ if (TypeIdx == 0) {
+ // Perform operation at larger width (any extension is fine here, high
+ // bits don't affect the result) and then truncate the result back to the
+ // original type.
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ } else {
+ // Explicit extension is required here since high bits affect the result.
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ }
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
if (TypeIdx != 0)
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SITOFP:
if (TypeIdx != 1)
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UITOFP:
if (TypeIdx != 1)
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INSERT:
if (TypeIdx != 0)
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_LOAD:
@@ -717,8 +838,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
LLVM_FALLTHROUGH;
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
+ Observer.changingInstr(MI);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_STORE: {
@@ -726,18 +848,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
WideTy != LLT::scalar(8))
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_CONSTANT: {
MachineOperand &SrcMO = MI.getOperand(1);
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
+ Observer.changingInstr(MI);
SrcMO.setCImm(ConstantInt::get(Ctx, Val));
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FCONSTANT: {
@@ -755,28 +879,38 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
default:
llvm_unreachable("Unhandled fp widen type");
}
+ Observer.changingInstr(MI);
SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_BRCOND:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FCMP:
+ Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else {
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
}
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ICMP:
+ Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else {
@@ -787,18 +921,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarSrc(MI, WideTy, 2, ExtOpcode);
widenScalarSrc(MI, WideTy, 3, ExtOpcode);
}
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_GEP:
assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
assert(TypeIdx == 0 && "Expecting only Idx 0");
+ Observer.changingInstr(MI);
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
@@ -808,9 +944,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MachineBasicBlock &MBB = *MI.getParent();
MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ if (TypeIdx != 2)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_FCEIL:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
}
}
@@ -984,6 +1136,30 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTPOP:
+ return lowerBitCount(MI, TypeIdx, Ty);
+ case G_UADDE: {
+ unsigned Res = MI.getOperand(0).getReg();
+ unsigned CarryOut = MI.getOperand(1).getReg();
+ unsigned LHS = MI.getOperand(2).getReg();
+ unsigned RHS = MI.getOperand(3).getReg();
+ unsigned CarryIn = MI.getOperand(4).getReg();
+
+ unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
+ unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
+
+ MIRBuilder.buildAdd(TmpRes, LHS, RHS);
+ MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
+ MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -993,10 +1169,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
// FIXME: Don't know how to handle secondary types yet.
if (TypeIdx != 0)
return UnableToLegalize;
+
+ MIRBuilder.setInstr(MI);
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
- case TargetOpcode::G_ADD: {
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ SmallVector<unsigned, 2> DstRegs;
+
unsigned NarrowSize = NarrowTy.getSizeInBits();
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Size = MRI.getType(DstReg).getSizeInBits();
@@ -1006,7 +1186,29 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
if (Size % NarrowSize != 0)
return UnableToLegalize;
- MIRBuilder.setInstr(MI);
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUndef(TmpReg);
+ DstRegs.push_back(TmpReg);
+ }
+
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_ADD: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
+ int NumParts = Size / NarrowSize;
+ // FIXME: Don't know how to handle the situation where the small vectors
+ // aren't all the same size yet.
+ if (Size % NarrowSize != 0)
+ return UnableToLegalize;
SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
@@ -1018,9 +1220,164 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
DstRegs.push_back(DstReg);
}
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE: {
+ bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
+ unsigned ValReg = MI.getOperand(0).getReg();
+ unsigned AddrReg = MI.getOperand(1).getReg();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ unsigned Size = MRI.getType(ValReg).getSizeInBits();
+ unsigned NumParts = Size / NarrowSize;
+
+ SmallVector<unsigned, 8> NarrowRegs;
+ if (!IsLoad)
+ extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
+
+ const LLT OffsetTy =
+ LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
+ MachineFunction &MF = *MI.getMF();
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
+ unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
+ unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
+ unsigned NewAddrReg = 0;
+ MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
+ MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
+ MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
+ NarrowTy.getSizeInBits() / 8, Alignment);
+ if (IsLoad) {
+ unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
+ NarrowRegs.push_back(Dst);
+ MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
+ } else {
+ MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
+ }
+ }
+ if (IsLoad) {
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
+ else
+ MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
+ }
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ unsigned Opc = MI.getOpcode();
+ auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ auto isSupported = [this](const LegalityQuery &Q) {
+ auto QAction = LI.getAction(Q).Action;
+ return QAction == Legal || QAction == Libcall || QAction == Custom;
+ };
+ switch (Opc) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+ // This trivially expands to CTLZ.
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_CTLZ: {
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned Len = Ty.getSizeInBits();
+ if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
+ // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
+ auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
+ {Ty}, {SrcReg});
+ auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
+ auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
+ auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+ SrcReg, MIBZero);
+ MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
+ MIBCtlzZU);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // for now, we do this:
+ // NewLen = NextPowerOf2(Len);
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // Upto NewLen/2
+ // return Len - popcount(x);
+ //
+ // Ref: "Hacker's Delight" by Henry Warren
+ unsigned Op = SrcReg;
+ unsigned NewLen = PowerOf2Ceil(Len);
+ for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
+ auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
+ auto MIBOp = MIRBuilder.buildInstr(
+ TargetOpcode::G_OR, {Ty},
+ {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
+ {Op, MIBShiftAmt})});
+ Op = MIBOp->getOperand(0).getReg();
+ }
+ auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
+ MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
+ {MIRBuilder.buildConstant(Ty, Len), MIBPop});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
+ // This trivially expands to CTTZ.
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_CTTZ: {
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned Len = Ty.getSizeInBits();
+ if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
+ // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
+ // zero.
+ auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
+ {Ty}, {SrcReg});
+ auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
+ auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
+ auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+ SrcReg, MIBZero);
+ MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
+ MIBCttzZU);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // Ref: "Hacker's Delight" by Henry Warren
+ auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
+ auto MIBNot =
+ MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
+ auto MIBTmp = MIRBuilder.buildInstr(
+ TargetOpcode::G_AND, {Ty},
+ {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
+ {SrcReg, MIBCstNeg1})});
+ if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
+ isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
+ auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
+ MIRBuilder.buildInstr(
+ TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
+ {MIBCstLen,
+ MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
+ MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
+ return Legalized;
+ }
}
}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index ae061b64a38c..fa36ede5b976 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -51,7 +52,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
OS << Opcode << ", MMOs={";
for (const auto &MMODescr : MMODescrs) {
- OS << MMODescr.Size << ", ";
+ OS << MMODescr.SizeInBits << ", ";
}
OS << "}";
@@ -219,7 +220,7 @@ void LegalizerInfo::computeTables() {
Opcode, TypeIdx, ElementSize,
moreToWiderTypesAndLessToWidest(NumElementsActions));
}
- llvm::sort(ElementSizesSeen.begin(), ElementSizesSeen.end());
+ llvm::sort(ElementSizesSeen);
SizeChangeStrategy VectorElementSizeChangeStrategy =
&unsupportedForDifferentSizes;
if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() &&
@@ -298,8 +299,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(
std::initializer_list<unsigned> Opcodes) {
unsigned Representative = *Opcodes.begin();
- assert(Opcodes.begin() != Opcodes.end() &&
- Opcodes.begin() + 1 != Opcodes.end() &&
+ assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() &&
"Initializer list must have at least two opcodes");
for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I)
@@ -376,7 +376,8 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
}
bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
return false;
}
@@ -584,7 +585,7 @@ const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) {
for (const MachineBasicBlock &MBB : MF)
for (const MachineInstr &MI : MBB)
if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
- return &MI;
+ return &MI;
}
return nullptr;
}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 3271b54aa830..1f5611061994 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -10,6 +10,7 @@
/// This file implements the MachineIRBuidler class.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -22,73 +23,72 @@
using namespace llvm;
-void MachineIRBuilderBase::setMF(MachineFunction &MF) {
+void MachineIRBuilder::setMF(MachineFunction &MF) {
State.MF = &MF;
State.MBB = nullptr;
State.MRI = &MF.getRegInfo();
State.TII = MF.getSubtarget().getInstrInfo();
State.DL = DebugLoc();
State.II = MachineBasicBlock::iterator();
- State.InsertedInstr = nullptr;
+ State.Observer = nullptr;
}
-void MachineIRBuilderBase::setMBB(MachineBasicBlock &MBB) {
+void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) {
State.MBB = &MBB;
State.II = MBB.end();
assert(&getMF() == MBB.getParent() &&
"Basic block is in a different function");
}
-void MachineIRBuilderBase::setInstr(MachineInstr &MI) {
+void MachineIRBuilder::setInstr(MachineInstr &MI) {
assert(MI.getParent() && "Instruction is not part of a basic block");
setMBB(*MI.getParent());
State.II = MI.getIterator();
}
-void MachineIRBuilderBase::setInsertPt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator II) {
+void MachineIRBuilder::setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; }
+
+void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II) {
assert(MBB.getParent() == &getMF() &&
"Basic block is in a different function");
State.MBB = &MBB;
State.II = II;
}
-void MachineIRBuilderBase::recordInsertion(MachineInstr *InsertedInstr) const {
- if (State.InsertedInstr)
- State.InsertedInstr(InsertedInstr);
+void MachineIRBuilder::recordInsertion(MachineInstr *InsertedInstr) const {
+ if (State.Observer)
+ State.Observer->createdInstr(*InsertedInstr);
}
-void MachineIRBuilderBase::recordInsertions(
- std::function<void(MachineInstr *)> Inserted) {
- State.InsertedInstr = std::move(Inserted);
+void MachineIRBuilder::setChangeObserver(GISelChangeObserver &Observer) {
+ State.Observer = &Observer;
}
-void MachineIRBuilderBase::stopRecordingInsertions() {
- State.InsertedInstr = nullptr;
-}
+void MachineIRBuilder::stopObservingChanges() { State.Observer = nullptr; }
//------------------------------------------------------------------------------
// Build instruction variants.
//------------------------------------------------------------------------------
-MachineInstrBuilder MachineIRBuilderBase::buildInstr(unsigned Opcode) {
+MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
return insertInstr(buildInstrNoInsert(Opcode));
}
-MachineInstrBuilder MachineIRBuilderBase::buildInstrNoInsert(unsigned Opcode) {
+MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode));
return MIB;
}
-MachineInstrBuilder MachineIRBuilderBase::insertInstr(MachineInstrBuilder MIB) {
+MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
getMBB().insert(getInsertPt(), MIB);
recordInsertion(MIB);
return MIB;
}
MachineInstrBuilder
-MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
- const MDNode *Expr) {
+MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(
@@ -99,8 +99,9 @@ MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
/*IsIndirect*/ false, Reg, Variable, Expr));
}
-MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue(
- unsigned Reg, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder
+MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(
@@ -111,9 +112,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue(
/*IsIndirect*/ true, Reg, Variable, Expr));
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable,
- const MDNode *Expr) {
+MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
+ const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(
@@ -126,8 +127,9 @@ MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable,
.addMetadata(Expr);
}
-MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue(
- const Constant &C, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
+ const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(
@@ -149,16 +151,24 @@ MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue(
return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
}
-MachineInstrBuilder MachineIRBuilderBase::buildFrameIndex(unsigned Res,
- int Idx) {
+MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
+ assert(isa<DILabel>(Label) && "not a label");
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(State.DL) &&
+ "Expected inlined-at fields to agree");
+ auto MIB = buildInstr(TargetOpcode::DBG_LABEL);
+
+ return MIB.addMetadata(Label);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
return buildInstr(TargetOpcode::G_FRAME_INDEX)
.addDef(Res)
.addFrameIndex(Idx);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) {
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
+ const GlobalValue *GV) {
assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
assert(getMRI()->getType(Res).getAddressSpace() ==
GV->getType()->getAddressSpace() &&
@@ -169,17 +179,14 @@ MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) {
.addGlobalAddress(GV);
}
-void MachineIRBuilderBase::validateBinaryOp(unsigned Res, unsigned Op0,
- unsigned Op1) {
- assert((getMRI()->getType(Res).isScalar() ||
- getMRI()->getType(Res).isVector()) &&
- "invalid operand type");
- assert(getMRI()->getType(Res) == getMRI()->getType(Op0) &&
- getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch");
+void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
+ const LLT &Op1) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0 && Res == Op1) && "type mismatch");
}
-MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0,
- unsigned Op1) {
+MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
+ unsigned Op1) {
assert(getMRI()->getType(Res).isPointer() &&
getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
assert(getMRI()->getType(Op1).isScalar() && "invalid offset type");
@@ -191,8 +198,8 @@ MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0,
}
Optional<MachineInstrBuilder>
-MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0,
- const LLT &ValueTy, uint64_t Value) {
+MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
+ const LLT &ValueTy, uint64_t Value) {
assert(Res == 0 && "Res is a result argument");
assert(ValueTy.isScalar() && "invalid offset type");
@@ -208,9 +215,8 @@ MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0,
return buildGEP(Res, Op0, TmpReg);
}
-MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res,
- unsigned Op0,
- uint32_t NumBits) {
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+ uint32_t NumBits) {
assert(getMRI()->getType(Res).isPointer() &&
getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
@@ -220,24 +226,23 @@ MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res,
.addImm(NumBits);
}
-MachineInstrBuilder MachineIRBuilderBase::buildBr(MachineBasicBlock &Dest) {
+MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilderBase::buildBrIndirect(unsigned Tgt) {
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination");
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
}
-MachineInstrBuilder MachineIRBuilderBase::buildCopy(unsigned Res, unsigned Op) {
- assert(getMRI()->getType(Res) == LLT() || getMRI()->getType(Op) == LLT() ||
- getMRI()->getType(Res) == getMRI()->getType(Op));
- return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::COPY, Res, Op);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) {
- LLT Ty = getMRI()->getType(Res);
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+ const ConstantInt &Val) {
+ LLT Ty = Res.getLLTTy(*getMRI());
assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
@@ -246,48 +251,55 @@ MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) {
NewVal = ConstantInt::get(getMF().getFunction().getContext(),
Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
- return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);
+ auto MIB = buildInstr(TargetOpcode::G_CONSTANT);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addCImm(NewVal);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilderBase::buildConstant(unsigned Res,
- int64_t Val) {
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+ int64_t Val) {
auto IntN = IntegerType::get(getMF().getFunction().getContext(),
- getMRI()->getType(Res).getSizeInBits());
+ Res.getLLTTy(*getMRI()).getSizeInBits());
ConstantInt *CI = ConstantInt::get(IntN, Val, true);
return buildConstant(Res, *CI);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildFConstant(unsigned Res, const ConstantFP &Val) {
- assert(getMRI()->getType(Res).isScalar() && "invalid operand type");
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+ const ConstantFP &Val) {
+ assert(Res.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
- return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val);
+ auto MIB = buildInstr(TargetOpcode::G_FCONSTANT);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addFPImm(&Val);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilderBase::buildFConstant(unsigned Res,
- double Val) {
- LLT DstTy = getMRI()->getType(Res);
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+ double Val) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
auto &Ctx = getMF().getFunction().getContext();
auto *CFP =
ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits()));
return buildFConstant(Res, *CFP);
}
-MachineInstrBuilder MachineIRBuilderBase::buildBrCond(unsigned Tst,
- MachineBasicBlock &Dest) {
+MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
+ MachineBasicBlock &Dest) {
assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");
return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilderBase::buildLoad(unsigned Res, unsigned Addr,
- MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
+ MachineMemOperand &MMO) {
return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res,
- unsigned Addr, MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
+ unsigned Res,
+ unsigned Addr,
+ MachineMemOperand &MMO) {
assert(getMRI()->getType(Res).isValid() && "invalid operand type");
assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
@@ -297,9 +309,8 @@ MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res,
.addMemOperand(&MMO);
}
-MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val,
- unsigned Addr,
- MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
+ MachineMemOperand &MMO) {
assert(getMRI()->getType(Val).isValid() && "invalid operand type");
assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
@@ -309,83 +320,73 @@ MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val,
.addMemOperand(&MMO);
}
-MachineInstrBuilder MachineIRBuilderBase::buildUAdde(unsigned Res,
- unsigned CarryOut,
- unsigned Op0, unsigned Op1,
- unsigned CarryIn) {
- assert(getMRI()->getType(Res).isScalar() && "invalid operand type");
- assert(getMRI()->getType(Res) == getMRI()->getType(Op0) &&
- getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch");
- assert(getMRI()->getType(CarryOut).isScalar() && "invalid operand type");
- assert(getMRI()->getType(CarryOut) == getMRI()->getType(CarryIn) &&
- "type mismatch");
-
- return buildInstr(TargetOpcode::G_UADDE)
- .addDef(Res)
- .addDef(CarryOut)
- .addUse(Op0)
- .addUse(Op1)
- .addUse(CarryIn);
+MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res,
+ const DstOp &CarryOut,
+ const SrcOp &Op0,
+ const SrcOp &Op1,
+ const SrcOp &CarryIn) {
+ return buildInstr(TargetOpcode::G_UADDE, {Res, CarryOut},
+ {Op0, Op1, CarryIn});
}
-MachineInstrBuilder MachineIRBuilderBase::buildAnyExt(unsigned Res,
- unsigned Op) {
- validateTruncExt(Res, Op, true);
- return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_ANYEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildSExt(unsigned Res, unsigned Op) {
- validateTruncExt(Res, Op, true);
- return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildSExt(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_SEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildZExt(unsigned Res, unsigned Op) {
- validateTruncExt(Res, Op, true);
- return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_ZEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildExtOrTrunc(unsigned ExtOpc,
- unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
+ const DstOp &Res,
+ const SrcOp &Op) {
assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc ||
TargetOpcode::G_SEXT == ExtOpc) &&
"Expecting Extending Opc");
- assert(getMRI()->getType(Res).isScalar() ||
- getMRI()->getType(Res).isVector());
- assert(getMRI()->getType(Res).isScalar() == getMRI()->getType(Op).isScalar());
+ assert(Res.getLLTTy(*getMRI()).isScalar() ||
+ Res.getLLTTy(*getMRI()).isVector());
+ assert(Res.getLLTTy(*getMRI()).isScalar() ==
+ Op.getLLTTy(*getMRI()).isScalar());
unsigned Opcode = TargetOpcode::COPY;
- if (getMRI()->getType(Res).getSizeInBits() >
- getMRI()->getType(Op).getSizeInBits())
+ if (Res.getLLTTy(*getMRI()).getSizeInBits() >
+ Op.getLLTTy(*getMRI()).getSizeInBits())
Opcode = ExtOpc;
- else if (getMRI()->getType(Res).getSizeInBits() <
- getMRI()->getType(Op).getSizeInBits())
+ else if (Res.getLLTTy(*getMRI()).getSizeInBits() <
+ Op.getLLTTy(*getMRI()).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
else
- assert(getMRI()->getType(Res) == getMRI()->getType(Op));
+ assert(Res.getLLTTy(*getMRI()) == Op.getLLTTy(*getMRI()));
- return buildInstr(Opcode).addDef(Res).addUse(Op);
+ return buildInstr(Opcode, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildSExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(const DstOp &Res,
+ const SrcOp &Op) {
return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildZExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(const DstOp &Res,
+ const SrcOp &Op) {
return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildAnyExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(const DstOp &Res,
+ const SrcOp &Op) {
return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst,
- unsigned Src) {
- LLT SrcTy = getMRI()->getType(Src);
- LLT DstTy = getMRI()->getType(Dst);
+MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
+ const SrcOp &Src) {
+ LLT SrcTy = Src.getLLTTy(*getMRI());
+ LLT DstTy = Dst.getLLTTy(*getMRI());
if (SrcTy == DstTy)
return buildCopy(Dst, Src);
@@ -399,11 +400,11 @@ MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst,
Opcode = TargetOpcode::G_BITCAST;
}
- return buildInstr(Opcode).addDef(Dst).addUse(Src);
+ return buildInstr(Opcode, Dst, Src);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) {
+MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
+ uint64_t Index) {
#ifndef NDEBUG
assert(getMRI()->getType(Src).isValid() && "invalid operand type");
assert(getMRI()->getType(Res).isValid() && "invalid operand type");
@@ -424,8 +425,8 @@ MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) {
.addImm(Index);
}
-void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
- ArrayRef<uint64_t> Indices) {
+void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+ ArrayRef<uint64_t> Indices) {
#ifndef NDEBUG
assert(Ops.size() == Indices.size() && "incompatible args");
assert(!Ops.empty() && "invalid trivial sequence");
@@ -465,56 +466,67 @@ void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
}
}
-MachineInstrBuilder MachineIRBuilderBase::buildUndef(unsigned Res) {
- return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res);
+MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
+ return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {});
}
-MachineInstrBuilder MachineIRBuilderBase::buildMerge(unsigned Res,
- ArrayRef<unsigned> Ops) {
-
-#ifndef NDEBUG
- assert(!Ops.empty() && "invalid trivial sequence");
- LLT Ty = getMRI()->getType(Ops[0]);
- for (auto Reg : Ops)
- assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list");
- assert(Ops.size() * getMRI()->getType(Ops[0]).getSizeInBits() ==
- getMRI()->getType(Res).getSizeInBits() &&
- "input operands do not cover output register");
-#endif
+MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
+ ArrayRef<unsigned> Ops) {
+ // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
+}
- if (Ops.size() == 1)
- return buildCast(Res, Ops[0]);
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
+ const SrcOp &Op) {
+ // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<DstOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
+}
- MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES);
- MIB.addDef(Res);
- for (unsigned i = 0; i < Ops.size(); ++i)
- MIB.addUse(Ops[i]);
- return MIB;
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
+ const SrcOp &Op) {
+ // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<DstOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildUnmerge(ArrayRef<unsigned> Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
+ ArrayRef<unsigned> Ops) {
+ // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
-#ifndef NDEBUG
- assert(!Res.empty() && "invalid trivial sequence");
- LLT Ty = getMRI()->getType(Res[0]);
- for (auto Reg : Res)
- assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list");
- assert(Res.size() * getMRI()->getType(Res[0]).getSizeInBits() ==
- getMRI()->getType(Op).getSizeInBits() &&
- "input operands do not cover output register");
-#endif
+MachineInstrBuilder
+MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
+ ArrayRef<unsigned> Ops) {
+ // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
+}
- MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_UNMERGE_VALUES);
- for (unsigned i = 0; i < Res.size(); ++i)
- MIB.addDef(Res[i]);
- MIB.addUse(Op);
- return MIB;
+MachineInstrBuilder
+MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<unsigned> Ops) {
+ // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec);
}
-MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res,
- unsigned Src, unsigned Op,
- unsigned Index) {
+MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
+ unsigned Op, unsigned Index) {
assert(Index + getMRI()->getType(Op).getSizeInBits() <=
getMRI()->getType(Res).getSizeInBits() &&
"insertion past the end of a register");
@@ -531,9 +543,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res,
.addImm(Index);
}
-MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID,
- unsigned Res,
- bool HasSideEffects) {
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ unsigned Res,
+ bool HasSideEffects) {
auto MIB =
buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
: TargetOpcode::G_INTRINSIC);
@@ -543,133 +555,52 @@ MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID,
return MIB;
}
-MachineInstrBuilder MachineIRBuilderBase::buildTrunc(unsigned Res,
- unsigned Op) {
- validateTruncExt(Res, Op, false);
- return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildFPTrunc(unsigned Res,
- unsigned Op) {
- validateTruncExt(Res, Op, false);
- return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildICmp(CmpInst::Predicate Pred,
- unsigned Res, unsigned Op0,
- unsigned Op1) {
-#ifndef NDEBUG
- assert(getMRI()->getType(Op0) == getMRI()->getType(Op0) && "type mismatch");
- assert(CmpInst::isIntPredicate(Pred) && "invalid predicate");
- if (getMRI()->getType(Op0).isScalar() || getMRI()->getType(Op0).isPointer())
- assert(getMRI()->getType(Res).isScalar() && "type mismatch");
- else
- assert(getMRI()->getType(Res).isVector() &&
- getMRI()->getType(Res).getNumElements() ==
- getMRI()->getType(Op0).getNumElements() &&
- "type mismatch");
-#endif
-
- return buildInstr(TargetOpcode::G_ICMP)
- .addDef(Res)
- .addPredicate(Pred)
- .addUse(Op0)
- .addUse(Op1);
+MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
+ const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1});
}
-MachineInstrBuilder MachineIRBuilderBase::buildFCmp(CmpInst::Predicate Pred,
- unsigned Res, unsigned Op0,
- unsigned Op1) {
-#ifndef NDEBUG
- assert((getMRI()->getType(Op0).isScalar() ||
- getMRI()->getType(Op0).isVector()) &&
- "invalid operand type");
- assert(getMRI()->getType(Op0) == getMRI()->getType(Op1) && "type mismatch");
- assert(CmpInst::isFPPredicate(Pred) && "invalid predicate");
- if (getMRI()->getType(Op0).isScalar())
- assert(getMRI()->getType(Res).isScalar() && "type mismatch");
- else
- assert(getMRI()->getType(Res).isVector() &&
- getMRI()->getType(Res).getNumElements() ==
- getMRI()->getType(Op0).getNumElements() &&
- "type mismatch");
-#endif
+MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
+ const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
- return buildInstr(TargetOpcode::G_FCMP)
- .addDef(Res)
- .addPredicate(Pred)
- .addUse(Op0)
- .addUse(Op1);
+ return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1});
}
-MachineInstrBuilder MachineIRBuilderBase::buildSelect(unsigned Res,
- unsigned Tst,
- unsigned Op0,
- unsigned Op1) {
-#ifndef NDEBUG
- LLT ResTy = getMRI()->getType(Res);
- assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
- "invalid operand type");
- assert(ResTy == getMRI()->getType(Op0) && ResTy == getMRI()->getType(Op1) &&
- "type mismatch");
- if (ResTy.isScalar() || ResTy.isPointer())
- assert(getMRI()->getType(Tst).isScalar() && "type mismatch");
- else
- assert((getMRI()->getType(Tst).isScalar() ||
- (getMRI()->getType(Tst).isVector() &&
- getMRI()->getType(Tst).getNumElements() ==
- getMRI()->getType(Op0).getNumElements())) &&
- "type mismatch");
-#endif
+MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res,
+ const SrcOp &Tst,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
- return buildInstr(TargetOpcode::G_SELECT)
- .addDef(Res)
- .addUse(Tst)
- .addUse(Op0)
- .addUse(Op1);
+ return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1});
}
MachineInstrBuilder
-MachineIRBuilderBase::buildInsertVectorElement(unsigned Res, unsigned Val,
- unsigned Elt, unsigned Idx) {
-#ifndef NDEBUG
- LLT ResTy = getMRI()->getType(Res);
- LLT ValTy = getMRI()->getType(Val);
- LLT EltTy = getMRI()->getType(Elt);
- LLT IdxTy = getMRI()->getType(Idx);
- assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");
- assert(IdxTy.isScalar() && "invalid operand type");
- assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch");
- assert(ResTy.getElementType() == EltTy && "type mismatch");
-#endif
-
- return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT)
- .addDef(Res)
- .addUse(Val)
- .addUse(Elt)
- .addUse(Idx);
+MachineIRBuilder::buildInsertVectorElement(const DstOp &Res, const SrcOp &Val,
+ const SrcOp &Elt, const SrcOp &Idx) {
+ return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT, Res, {Val, Elt, Idx});
}
MachineInstrBuilder
-MachineIRBuilderBase::buildExtractVectorElement(unsigned Res, unsigned Val,
- unsigned Idx) {
-#ifndef NDEBUG
- LLT ResTy = getMRI()->getType(Res);
- LLT ValTy = getMRI()->getType(Val);
- LLT IdxTy = getMRI()->getType(Idx);
- assert(ValTy.isVector() && "invalid operand type");
- assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type");
- assert(IdxTy.isScalar() && "invalid operand type");
- assert(ValTy.getElementType() == ResTy && "type mismatch");
-#endif
-
- return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT)
- .addDef(Res)
- .addUse(Val)
- .addUse(Idx);
+MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val,
+ const SrcOp &Idx) {
+ return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT, Res, {Val, Idx});
}
-MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess(
+MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal,
unsigned NewVal, MachineMemOperand &MMO) {
#ifndef NDEBUG
@@ -697,9 +628,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess(
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
- unsigned CmpVal, unsigned NewVal,
- MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
+ unsigned CmpVal, unsigned NewVal,
+ MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
LLT AddrTy = getMRI()->getType(Addr);
@@ -721,10 +652,11 @@ MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
.addMemOperand(&MMO);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
- unsigned Addr, unsigned Val,
- MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
+ unsigned OldValRes,
+ unsigned Addr,
+ unsigned Val,
+ MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
LLT AddrTy = getMRI()->getType(Addr);
@@ -743,74 +675,75 @@ MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val,
MMO);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWOr(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(unsigned OldValRes,
+ unsigned Addr,
+ unsigned Val,
+ MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
+MachineIRBuilder::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
#ifndef NDEBUG
assert(getMRI()->getType(Res).isPointer() && "invalid res type");
#endif
@@ -818,12 +751,9 @@ MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA);
}
-void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,
- bool IsExtend) {
+void MachineIRBuilder::validateTruncExt(const LLT &DstTy, const LLT &SrcTy,
+ bool IsExtend) {
#ifndef NDEBUG
- LLT SrcTy = getMRI()->getType(Src);
- LLT DstTy = getMRI()->getType(Dst);
-
if (DstTy.isVector()) {
assert(SrcTy.isVector() && "mismatched cast between vector and non-vector");
assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
@@ -839,3 +769,236 @@ void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,
"invalid widening trunc");
#endif
}
+
+void MachineIRBuilder::validateSelectOp(const LLT &ResTy, const LLT &TstTy,
+ const LLT &Op0Ty, const LLT &Op1Ty) {
+#ifndef NDEBUG
+ assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
+ "invalid operand type");
+ assert((ResTy == Op0Ty && ResTy == Op1Ty) && "type mismatch");
+ if (ResTy.isScalar() || ResTy.isPointer())
+ assert(TstTy.isScalar() && "type mismatch");
+ else
+ assert((TstTy.isScalar() ||
+ (TstTy.isVector() &&
+ TstTy.getNumElements() == Op0Ty.getNumElements())) &&
+ "type mismatch");
+#endif
+}
+
+MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
+ ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ Optional<unsigned> Flags) {
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_SELECT: {
+ assert(DstOps.size() == 1 && "Invalid select");
+ assert(SrcOps.size() == 3 && "Invalid select");
+ validateSelectOp(
+ DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI()));
+ break;
+ }
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM: {
+ // All these are binary ops.
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 2 && "Invalid Srcs");
+ validateBinaryOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()));
+ break;
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()), true);
+ break;
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_FPTRUNC:
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()), false);
+ break;
+ }
+ case TargetOpcode::COPY:
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ assert(DstOps[0].getLLTTy(*getMRI()) == LLT() ||
+ SrcOps[0].getLLTTy(*getMRI()) == LLT() ||
+ DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI()));
+ break;
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_ICMP: {
+ assert(DstOps.size() == 1 && "Invalid Dst Operands");
+ assert(SrcOps.size() == 3 && "Invalid Src Operands");
+ // For F/ICMP, the first src operand is the predicate, followed by
+ // the two comparands.
+ assert(SrcOps[0].getSrcOpKind() == SrcOp::SrcType::Ty_Predicate &&
+ "Expecting predicate");
+ assert([&]() -> bool {
+ CmpInst::Predicate Pred = SrcOps[0].getPredicate();
+ return Opc == TargetOpcode::G_ICMP ? CmpInst::isIntPredicate(Pred)
+ : CmpInst::isFPPredicate(Pred);
+ }() && "Invalid predicate");
+ assert(SrcOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) &&
+ "Type mismatch");
+ assert([&]() -> bool {
+ LLT Op0Ty = SrcOps[1].getLLTTy(*getMRI());
+ LLT DstTy = DstOps[0].getLLTTy(*getMRI());
+ if (Op0Ty.isScalar() || Op0Ty.isPointer())
+ return DstTy.isScalar();
+ else
+ return DstTy.isVector() &&
+ DstTy.getNumElements() == Op0Ty.getNumElements();
+ }() && "Type Mismatch");
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ assert(!DstOps.empty() && "Invalid trivial sequence");
+ assert(SrcOps.size() == 1 && "Invalid src for Unmerge");
+ assert(std::all_of(DstOps.begin(), DstOps.end(),
+ [&, this](const DstOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ DstOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in output list");
+ assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input operands do not cover output register");
+ break;
+ }
+ case TargetOpcode::G_MERGE_VALUES: {
+ assert(!SrcOps.empty() && "invalid trivial sequence");
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in input list");
+ assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input operands do not cover output register");
+ if (SrcOps.size() == 1)
+ return buildCast(DstOps[0], SrcOps[0]);
+ if (DstOps[0].getLLTTy(*getMRI()).isVector())
+ return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps);
+ break;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+ assert(DstOps.size() == 1 && "Invalid Dst size");
+ assert(SrcOps.size() == 2 && "Invalid Src size");
+ assert(SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type");
+ assert((DstOps[0].getLLTTy(*getMRI()).isScalar() ||
+ DstOps[0].getLLTTy(*getMRI()).isPointer()) &&
+ "Invalid operand type");
+ assert(SrcOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand type");
+ assert(SrcOps[0].getLLTTy(*getMRI()).getElementType() ==
+ DstOps[0].getLLTTy(*getMRI()) &&
+ "Type mismatch");
+ break;
+ }
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ assert(DstOps.size() == 1 && "Invalid dst size");
+ assert(SrcOps.size() == 3 && "Invalid src size");
+ assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type");
+ assert(DstOps[0].getLLTTy(*getMRI()).getElementType() ==
+ SrcOps[1].getLLTTy(*getMRI()) &&
+ "Type mismatch");
+ assert(SrcOps[2].getLLTTy(*getMRI()).isScalar() && "Invalid index");
+ assert(DstOps[0].getLLTTy(*getMRI()).getNumElements() ==
+ SrcOps[0].getLLTTy(*getMRI()).getNumElements() &&
+ "Type mismatch");
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR: {
+ assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+ "Must have at least 2 operands");
+ assert(DstOps.size() == 1 && "Invalid DstOps");
+ assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ "Res type must be a vector");
+ assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in input list");
+ assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input scalars do not exactly cover the outpur vector register");
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+ assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+ "Must have at least 2 operands");
+ assert(DstOps.size() == 1 && "Invalid DstOps");
+ assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ "Res type must be a vector");
+ assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in input list");
+ if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits())
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps);
+ break;
+ }
+ case TargetOpcode::G_CONCAT_VECTORS: {
+ assert(DstOps.size() == 1 && "Invalid DstOps");
+ assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+ "Must have at least 2 operands");
+ assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+ [&, this](const SrcOp &Op) {
+ return (Op.getLLTTy(*getMRI()).isVector() &&
+ Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI()));
+ }) &&
+ "type mismatch in input list");
+ assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input vectors do not exactly cover the outpur vector register");
+ break;
+ }
+ case TargetOpcode::G_UADDE: {
+ assert(DstOps.size() == 2 && "Invalid no of dst operands");
+ assert(SrcOps.size() == 3 && "Invalid no of src operands");
+ assert(DstOps[0].getLLTTy(*getMRI()).isScalar() && "Invalid operand");
+ assert((DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI())) &&
+ (DstOps[0].getLLTTy(*getMRI()) == SrcOps[1].getLLTTy(*getMRI())) &&
+ "Invalid operand");
+ assert(DstOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand");
+ assert(DstOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) &&
+ "type mismatch");
+ break;
+ }
+ }
+
+ auto MIB = buildInstr(Opc);
+ for (const DstOp &Op : DstOps)
+ Op.addDefToMIB(*getMRI(), MIB);
+ for (const SrcOp &Op : SrcOps)
+ Op.addSrcToMIB(MIB);
+ if (Flags)
+ MIB->setFlags(*Flags);
+ return MIB;
+}
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 9e2d48d1dc42..dcc8b7cc23c5 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -115,8 +115,8 @@ bool RegBankSelect::assignmentMatch(
// By default we assume we will have to repair something.
OnlyAssign = false;
// Each part of a break down needs to end up in a different register.
- // In other word, Reg assignement does not match.
- if (ValMapping.NumBreakDowns > 1)
+ // In other word, Reg assignment does not match.
+ if (ValMapping.NumBreakDowns != 1)
return false;
const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
@@ -140,7 +140,7 @@ bool RegBankSelect::repairReg(
return false;
assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented");
// An empty range of new register means no repairing.
- assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair");
+ assert(!empty(NewVRegs) && "We should not have to repair");
// Assume we are repairing a use and thus, the original reg will be
// the source of the repairing.
@@ -528,7 +528,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
bool RegBankSelect::applyMapping(
MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
- // OpdMapper will hold all the information needed for the rewritting.
+ // OpdMapper will hold all the information needed for the rewriting.
RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI);
// First, place the repairing code.
@@ -714,18 +714,23 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
// - Terminators must be the last instructions:
// * Before, move the insert point before the first terminator.
// * After, we have to split the outcoming edges.
- unsigned Reg = MO.getReg();
if (Before) {
// Check whether Reg is defined by any terminator.
- MachineBasicBlock::iterator It = MI;
- for (auto Begin = MI.getParent()->begin();
- --It != Begin && It->isTerminator();)
- if (It->modifiesRegister(Reg, &TRI)) {
- // Insert the repairing code right after the definition.
- addInsertPoint(*It, /*Before*/ false);
- return;
- }
- addInsertPoint(*It, /*Before*/ true);
+ MachineBasicBlock::reverse_iterator It = MI;
+ auto REnd = MI.getParent()->rend();
+
+ for (; It != REnd && It->isTerminator(); ++It) {
+ assert(!It->modifiesRegister(MO.getReg(), &TRI) &&
+ "copy insertion in middle of terminators not handled");
+ }
+
+ if (It == REnd) {
+ addInsertPoint(*MI.getParent()->begin(), true);
+ return;
+ }
+
+ // We are sure to be right before the first terminator.
+ addInsertPoint(*It, /*Before*/ false);
return;
}
// Make sure Reg is not redefined by other terminators, otherwise
@@ -733,7 +738,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end();
++It != End;)
// The machine verifier should reject this kind of code.
- assert(It->modifiesRegister(Reg, &TRI) && "Do not know where to split");
+ assert(It->modifiesRegister(MO.getReg(), &TRI) &&
+ "Do not know where to split");
// Split each outcoming edges.
MachineBasicBlock &Src = *MI.getParent();
for (auto &Succ : Src.successors())
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index dd15567ef1c1..28404e52d6ea 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -426,7 +426,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
"This mapping is too complex for this function");
iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
OpdMapper.getVRegs(OpIdx);
- if (NewRegs.begin() == NewRegs.end()) {
+ if (empty(NewRegs)) {
LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 1a5f88743d5f..59cbf93e7cd1 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -137,7 +137,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
// If we can move an instruction, we can remove it. Otherwise, it has
// a side-effect of some sort.
bool SawStore = false;
- if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore))
+ if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore) && !MI.isPHI())
return false;
// Instructions without side-effects are dead iff they only define dead vregs.
@@ -235,6 +235,57 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
return APF;
}
+Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
+ const unsigned Op2,
+ const MachineRegisterInfo &MRI) {
+ auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
+ if (MaybeOp1Cst && MaybeOp2Cst) {
+ LLT Ty = MRI.getType(Op1);
+ APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
+ APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
+ switch (Opcode) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ return C1 + C2;
+ case TargetOpcode::G_AND:
+ return C1 & C2;
+ case TargetOpcode::G_ASHR:
+ return C1.ashr(C2);
+ case TargetOpcode::G_LSHR:
+ return C1.lshr(C2);
+ case TargetOpcode::G_MUL:
+ return C1 * C2;
+ case TargetOpcode::G_OR:
+ return C1 | C2;
+ case TargetOpcode::G_SHL:
+ return C1 << C2;
+ case TargetOpcode::G_SUB:
+ return C1 - C2;
+ case TargetOpcode::G_XOR:
+ return C1 ^ C2;
+ case TargetOpcode::G_UDIV:
+ if (!C2.getBoolValue())
+ break;
+ return C1.udiv(C2);
+ case TargetOpcode::G_SDIV:
+ if (!C2.getBoolValue())
+ break;
+ return C1.sdiv(C2);
+ case TargetOpcode::G_UREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.urem(C2);
+ case TargetOpcode::G_SREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.srem(C2);
+ }
+ }
+ return None;
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 9f7f5e392a9a..d3364952f244 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -461,6 +461,8 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
unsigned CurIdx = 0;
for (j = i; j != -1; j = GlobalSet.find_next(j)) {
Type *Ty = Globals[j]->getValueType();
+
+ // Make sure we use the same alignment AsmPrinter would use.
unsigned Align = DL.getPreferredAlignment(Globals[j]);
unsigned Padding = alignTo(MergedSize, Align) - MergedSize;
MergedSize += Padding;
@@ -516,6 +518,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
GlobalVariable::NotThreadLocal, AddrSpace);
MergedGV->setAlignment(MaxAlign);
+ MergedGV->setSection(Globals[i]->getSection());
const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
@@ -599,16 +602,15 @@ bool GlobalMerge::doInitialization(Module &M) {
IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
auto &DL = M.getDataLayout();
- DenseMap<unsigned, SmallVector<GlobalVariable *, 16>> Globals, ConstGlobals,
- BSSGlobals;
+ DenseMap<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 16>>
+ Globals, ConstGlobals, BSSGlobals;
bool Changed = false;
setMustKeepGlobalVariables(M);
// Grab all non-const globals.
for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
- if (GV.isDeclaration() || GV.isThreadLocal() ||
- GV.hasSection() || GV.hasImplicitSection())
+ if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
continue;
// It's not safe to merge globals that may be preempted
@@ -623,6 +625,7 @@ bool GlobalMerge::doInitialization(Module &M) {
assert(PT && "Global variable is not a pointer!");
unsigned AddressSpace = PT->getAddressSpace();
+ StringRef Section = GV.getSection();
// Ignore all 'special' globals.
if (GV.getName().startswith("llvm.") ||
@@ -636,27 +639,27 @@ bool GlobalMerge::doInitialization(Module &M) {
Type *Ty = GV.getValueType();
if (DL.getTypeAllocSize(Ty) < MaxOffset) {
if (TM &&
- TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
- BSSGlobals[AddressSpace].push_back(&GV);
+ TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSS())
+ BSSGlobals[{AddressSpace, Section}].push_back(&GV);
else if (GV.isConstant())
- ConstGlobals[AddressSpace].push_back(&GV);
+ ConstGlobals[{AddressSpace, Section}].push_back(&GV);
else
- Globals[AddressSpace].push_back(&GV);
+ Globals[{AddressSpace, Section}].push_back(&GV);
}
}
for (auto &P : Globals)
if (P.second.size() > 1)
- Changed |= doMerge(P.second, M, false, P.first);
+ Changed |= doMerge(P.second, M, false, P.first.first);
for (auto &P : BSSGlobals)
if (P.second.size() > 1)
- Changed |= doMerge(P.second, M, false, P.first);
+ Changed |= doMerge(P.second, M, false, P.first.first);
if (EnableGlobalMergeOnConst)
for (auto &P : ConstGlobals)
if (P.second.size() > 1)
- Changed |= doMerge(P.second, M, true, P.first);
+ Changed |= doMerge(P.second, M, true, P.first.first);
return Changed;
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index f12d00071b24..ceeba639ee09 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -273,7 +273,7 @@ namespace {
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> *LaterRedefs = nullptr);
+ SmallSet<MCPhysReg, 4> *LaterRedefs = nullptr);
void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
bool IgnoreBr = false);
@@ -1366,12 +1366,12 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
// Before stepping forward past MI, remember which regs were live
// before MI. This is needed to set the Undef flag only when reg is
// dead.
- SparseSet<unsigned> LiveBeforeMI;
+ SparseSet<MCPhysReg, identity<MCPhysReg>> LiveBeforeMI;
LiveBeforeMI.setUniverse(TRI->getNumRegs());
for (unsigned Reg : Redefs)
LiveBeforeMI.insert(Reg);
- SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Clobbers;
Redefs.stepForward(MI, Clobbers);
// Now add the implicit uses for each of the clobbered values.
@@ -1444,7 +1444,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
Redefs.init(*TRI);
if (MRI->tracksLiveness()) {
- // Initialize liveins to the first BB. These are potentiall redefined by
+ // Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
Redefs.addLiveIns(CvtMBB);
Redefs.addLiveIns(NextMBB);
@@ -1740,7 +1740,7 @@ bool IfConverter::IfConvertDiamondCommon(
if (MRI->tracksLiveness()) {
for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
- SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Dummy;
Redefs.stepForward(MI, Dummy);
}
}
@@ -1806,13 +1806,13 @@ bool IfConverter::IfConvertDiamondCommon(
// generate:
// sub r0, r1, #1
// addne r0, r1, #1
- SmallSet<unsigned, 4> RedefsByFalse;
- SmallSet<unsigned, 4> ExtUses;
+ SmallSet<MCPhysReg, 4> RedefsByFalse;
+ SmallSet<MCPhysReg, 4> ExtUses;
if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {
for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) {
if (FI.isDebugInstr())
continue;
- SmallVector<unsigned, 4> Defs;
+ SmallVector<MCPhysReg, 4> Defs;
for (const MachineOperand &MO : FI.operands()) {
if (!MO.isReg())
continue;
@@ -1830,7 +1830,7 @@ bool IfConverter::IfConvertDiamondCommon(
}
}
- for (unsigned Reg : Defs) {
+ for (MCPhysReg Reg : Defs) {
if (!ExtUses.count(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
@@ -1976,7 +1976,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
}
static bool MaySpeculate(const MachineInstr &MI,
- SmallSet<unsigned, 4> &LaterRedefs) {
+ SmallSet<MCPhysReg, 4> &LaterRedefs) {
bool SawStore = true;
if (!MI.isSafeToMove(nullptr, SawStore))
return false;
@@ -1999,7 +1999,7 @@ static bool MaySpeculate(const MachineInstr &MI,
void IfConverter::PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> *LaterRedefs) {
+ SmallSet<MCPhysReg, 4> *LaterRedefs) {
bool AnyUnpred = false;
bool MaySpec = LaterRedefs != nullptr;
for (MachineInstr &I : make_range(BBI.BB->begin(), E)) {
@@ -2148,7 +2148,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// Calculate the edge probability for the edge from ToBBI.BB to Succ,
// which is a portion of the edge probability from FromMBB to Succ. The
// portion ratio is the edge probability from ToBBI.BB to FromMBB (if
- // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
+ // FromBBI is a successor of ToBBI.BB. See comment below for exception).
NewProb = MBPI->getEdgeProbability(&FromMBB, Succ);
// To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index 0a447bc613b1..f411ee6745d0 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -90,7 +90,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// A data type for representing the result computed by \c
/// computeDependence. States whether it is okay to reorder the
/// instruction passed to \c computeDependence with at most one
- /// depednency.
+ /// dependency.
struct DependenceResult {
/// Can we actually re-order \p MI with \p Insts (see \c
/// computeDependence).
@@ -344,11 +344,11 @@ ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
return AR_MayAlias;
continue;
}
- llvm::AliasResult AAResult = AA->alias(
- MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
- MMO1->getAAInfo()),
- MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
- MMO2->getAAInfo()));
+ llvm::AliasResult AAResult =
+ AA->alias(MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
+ MMO1->getAAInfo()),
+ MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
+ MMO2->getAAInfo()));
if (AAResult != NoAlias)
return AR_MayAlias;
}
@@ -360,10 +360,10 @@ ImplicitNullChecks::SuitabilityResult
ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts) {
int64_t Offset;
- unsigned BaseReg;
+ MachineOperand *BaseOp;
- if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) ||
- BaseReg != PointerReg)
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) ||
+ !BaseOp->isReg() || BaseOp->getReg() != PointerReg)
return SR_Unsuitable;
// We want the mem access to be issued at a sane offset from PointerReg,
@@ -651,7 +651,7 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr(
}
}
- MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
return MIB;
}
diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp
new file mode 100644
index 000000000000..989fa164ad2d
--- /dev/null
+++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -0,0 +1,1359 @@
+//===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+//
+// This file defines the interleaved-load-combine pass. The pass searches for
+// ShuffleVectorInstruction that execute interleaving loads. If a matching
+// pattern is found, it adds a combined load and further instructions in a
+// pattern that is detectable by InterleavedAccesPass. The old instructions are
+// left dead to be removed later. The pass is specifically designed to be
+// executed just before InterleavedAccesPass to find any left-over instances
+// that are not detected within former passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <algorithm>
+#include <cassert>
+#include <list>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "interleaved-load-combine"
+
+namespace {
+
+/// Statistic counter
+STATISTIC(NumInterleavedLoadCombine, "Number of combined loads");
+
+/// Option to disable the pass
+static cl::opt<bool> DisableInterleavedLoadCombine(
+ "disable-" DEBUG_TYPE, cl::init(false), cl::Hidden,
+ cl::desc("Disable combining of interleaved loads"));
+
+struct VectorInfo;
+
+struct InterleavedLoadCombineImpl {
+public:
+ InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA,
+ TargetMachine &TM)
+ : F(F), DT(DT), MSSA(MSSA),
+ TLI(*TM.getSubtargetImpl(F)->getTargetLowering()),
+ TTI(TM.getTargetTransformInfo(F)) {}
+
+ /// Scan the function for interleaved load candidates and execute the
+ /// replacement if applicable.
+ bool run();
+
+private:
+ /// Function this pass is working on
+ Function &F;
+
+ /// Dominator Tree Analysis
+ DominatorTree &DT;
+
+ /// Memory Alias Analyses
+ MemorySSA &MSSA;
+
+ /// Target Lowering Information
+ const TargetLowering &TLI;
+
+ /// Target Transform Information
+ const TargetTransformInfo TTI;
+
+ /// Find the instruction in sets LIs that dominates all others, return nullptr
+ /// if there is none.
+ LoadInst *findFirstLoad(const std::set<LoadInst *> &LIs);
+
+ /// Replace interleaved load candidates. It does additional
+ /// analyses if this makes sense. Returns true on success and false
+ /// of nothing has been changed.
+ bool combine(std::list<VectorInfo> &InterleavedLoad,
+ OptimizationRemarkEmitter &ORE);
+
+ /// Given a set of VectorInfo containing candidates for a given interleave
+ /// factor, find a set that represents a 'factor' interleaved load.
+ bool findPattern(std::list<VectorInfo> &Candidates,
+ std::list<VectorInfo> &InterleavedLoad, unsigned Factor,
+ const DataLayout &DL);
+}; // InterleavedLoadCombine
+
+/// First Order Polynomial on an n-Bit Integer Value
+///
+/// Polynomial(Value) = Value * B + A + E*2^(n-e)
+///
+/// A and B are the coefficients. E*2^(n-e) is an error within 'e' most
+/// significant bits. It is introduced if an exact computation cannot be proven
+/// (e.q. division by 2).
+///
+/// As part of this optimization multiple loads will be combined. It necessary
+/// to prove that loads are within some relative offset to each other. This
+/// class is used to prove relative offsets of values loaded from memory.
+///
+/// Representing an integer in this form is sound since addition in two's
+/// complement is associative (trivial) and multiplication distributes over the
+/// addition (see Proof(1) in Polynomial::mul). Further, both operations
+/// commute.
+//
+// Example:
+// declare @fn(i64 %IDX, <4 x float>* %PTR) {
+// %Pa1 = add i64 %IDX, 2
+// %Pa2 = lshr i64 %Pa1, 1
+// %Pa3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pa2
+// %Va = load <4 x float>, <4 x float>* %Pa3
+//
+// %Pb1 = add i64 %IDX, 4
+// %Pb2 = lshr i64 %Pb1, 1
+// %Pb3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pb2
+// %Vb = load <4 x float>, <4 x float>* %Pb3
+// ... }
+//
+// The goal is to prove that two loads load consecutive addresses.
+//
+// In this case the polynomials are constructed by the following
+// steps.
+//
+// The number tag #e specifies the error bits.
+//
+// Pa_0 = %IDX #0
+// Pa_1 = %IDX + 2 #0 | add 2
+// Pa_2 = %IDX/2 + 1 #1 | lshr 1
+// Pa_3 = %IDX/2 + 1 #1 | GEP, step signext to i64
+// Pa_4 = (%IDX/2)*16 + 16 #0 | GEP, multiply index by sizeof(4) for floats
+// Pa_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components
+//
+// Pb_0 = %IDX #0
+// Pb_1 = %IDX + 4 #0 | add 2
+// Pb_2 = %IDX/2 + 2 #1 | lshr 1
+// Pb_3 = %IDX/2 + 2 #1 | GEP, step signext to i64
+// Pb_4 = (%IDX/2)*16 + 32 #0 | GEP, multiply index by sizeof(4) for floats
+// Pb_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components
+//
+// Pb_5 - Pa_5 = 16 #0 | subtract to get the offset
+//
+// Remark: %PTR is not maintained within this class. So in this instance the
+// offset of 16 can only be assumed if the pointers are equal.
+//
+class Polynomial {
+ /// Operations on B
+ enum BOps {
+ LShr,
+ Mul,
+ SExt,
+ Trunc,
+ };
+
+ /// Number of Error Bits e
+ unsigned ErrorMSBs;
+
+ /// Value
+ Value *V;
+
+ /// Coefficient B
+ SmallVector<std::pair<BOps, APInt>, 4> B;
+
+ /// Coefficient A
+ APInt A;
+
+public:
+ Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() {
+ IntegerType *Ty = dyn_cast<IntegerType>(V->getType());
+ if (Ty) {
+ ErrorMSBs = 0;
+ this->V = V;
+ A = APInt(Ty->getBitWidth(), 0);
+ }
+ }
+
+ Polynomial(const APInt &A, unsigned ErrorMSBs = 0)
+ : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(A) {}
+
+ Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)
+ : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(BitWidth, A) {}
+
+ Polynomial() : ErrorMSBs((unsigned)-1), V(NULL), B(), A() {}
+
+ /// Increment and clamp the number of undefined bits.
+ void incErrorMSBs(unsigned amt) {
+ if (ErrorMSBs == (unsigned)-1)
+ return;
+
+ ErrorMSBs += amt;
+ if (ErrorMSBs > A.getBitWidth())
+ ErrorMSBs = A.getBitWidth();
+ }
+
+ /// Decrement and clamp the number of undefined bits.
+ void decErrorMSBs(unsigned amt) {
+ if (ErrorMSBs == (unsigned)-1)
+ return;
+
+ if (ErrorMSBs > amt)
+ ErrorMSBs -= amt;
+ else
+ ErrorMSBs = 0;
+ }
+
+ /// Apply an add on the polynomial
+ Polynomial &add(const APInt &C) {
+ // Note: Addition is associative in two's complement even when in case of
+ // signed overflow.
+ //
+ // Error bits can only propagate into higher significant bits. As these are
+ // already regarded as undefined, there is no change.
+ //
+ // Theorem: Adding a constant to a polynomial does not change the error
+ // term.
+ //
+ // Proof:
+ //
+ // Since the addition is associative and commutes:
+ //
+ // (B + A + E*2^(n-e)) + C = B + (A + C) + E*2^(n-e)
+ // [qed]
+
+ if (C.getBitWidth() != A.getBitWidth()) {
+ ErrorMSBs = (unsigned)-1;
+ return *this;
+ }
+
+ A += C;
+ return *this;
+ }
+
+ /// Apply a multiplication onto the polynomial.
+ Polynomial &mul(const APInt &C) {
+ // Note: Multiplication distributes over the addition
+ //
+ // Theorem: Multiplication distributes over the addition
+ //
+ // Proof(1):
+ //
+ // (B+A)*C =-
+ // = (B + A) + (B + A) + .. {C Times}
+ // addition is associative and commutes, hence
+ // = B + B + .. {C Times} .. + A + A + .. {C times}
+ // = B*C + A*C
+ // (see (function add) for signed values and overflows)
+ // [qed]
+ //
+ // Theorem: If C has c trailing zeros, errors bits in A or B are shifted out
+ // to the left.
+ //
+ // Proof(2):
+ //
+ // Let B' and A' be the n-Bit inputs with some unknown errors EA,
+ // EB at e leading bits. B' and A' can be written down as:
+ //
+ // B' = B + 2^(n-e)*EB
+ // A' = A + 2^(n-e)*EA
+ //
+ // Let C' be an input with c trailing zero bits. C' can be written as
+ //
+ // C' = C*2^c
+ //
+ // Therefore we can compute the result by using distributivity and
+ // commutativity.
+ //
+ // (B'*C' + A'*C') = [B + 2^(n-e)*EB] * C' + [A + 2^(n-e)*EA] * C' =
+ // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
+ // = (B'+A') * C' =
+ // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
+ // = [B + A + 2^(n-e)*EB + 2^(n-e)*EA] * C' =
+ // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C' =
+ // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C*2^c =
+ // = (B + A) * C' + C*(EB + EA)*2^(n-e)*2^c =
+ //
+ // Let EC be the final error with EC = C*(EB + EA)
+ //
+ // = (B + A)*C' + EC*2^(n-e)*2^c =
+ // = (B + A)*C' + EC*2^(n-(e-c))
+ //
+ // Since EC is multiplied by 2^(n-(e-c)) the resulting error contains c
+ // less error bits than the input. c bits are shifted out to the left.
+ // [qed]
+
+ if (C.getBitWidth() != A.getBitWidth()) {
+ ErrorMSBs = (unsigned)-1;
+ return *this;
+ }
+
+ // Multiplying by one is a no-op.
+ if (C.isOneValue()) {
+ return *this;
+ }
+
+ // Multiplying by zero removes the coefficient B and defines all bits.
+ if (C.isNullValue()) {
+ ErrorMSBs = 0;
+ deleteB();
+ }
+
+ // See Proof(2): Trailing zero bits indicate a left shift. This removes
+ // leading bits from the result even if they are undefined.
+ decErrorMSBs(C.countTrailingZeros());
+
+ A *= C;
+ pushBOperation(Mul, C);
+ return *this;
+ }
+
+ /// Apply a logical shift right on the polynomial
+ Polynomial &lshr(const APInt &C) {
+ // Theorem(1): (B + A + E*2^(n-e)) >> 1 => (B >> 1) + (A >> 1) + E'*2^(n-e')
+ // where
+ // e' = e + 1,
+ // E is a e-bit number,
+ // E' is a e'-bit number,
+ // holds under the following precondition:
+ // pre(1): A % 2 = 0
+ // pre(2): e < n, (see Theorem(2) for the trivial case with e=n)
+ // where >> expresses a logical shift to the right, with adding zeros.
+ //
+ // We need to show that for every, E there is a E'
+ //
+ // B = b_h * 2^(n-1) + b_m * 2 + b_l
+ // A = a_h * 2^(n-1) + a_m * 2 (pre(1))
+ //
+ // where a_h, b_h, b_l are single bits, and a_m, b_m are (n-2) bit numbers
+ //
+ // Let X = (B + A + E*2^(n-e)) >> 1
+ // Let Y = (B >> 1) + (A >> 1) + E*2^(n-e) >> 1
+ //
+ // X = [B + A + E*2^(n-e)] >> 1 =
+ // = [ b_h * 2^(n-1) + b_m * 2 + b_l +
+ // + a_h * 2^(n-1) + a_m * 2 +
+ // + E * 2^(n-e) ] >> 1 =
+ //
+ // The sum is built by putting the overflow of [a_m + b+n] into the term
+ // 2^(n-1). As there are no more bits beyond 2^(n-1) the overflow within
+ // this bit is discarded. This is expressed by % 2.
+ //
+ // The bit in position 0 cannot overflow into the term (b_m + a_m).
+ //
+ // = [ ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-1) +
+ // + ((b_m + a_m) % 2^(n-2)) * 2 +
+ // + b_l + E * 2^(n-e) ] >> 1 =
+ //
+ // The shift is computed by dividing the terms by 2 and by cutting off
+ // b_l.
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-(e+1)) =
+ //
+ // by the definition in the Theorem e+1 = e'
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-e') =
+ //
+ // Compute Y by applying distributivity first
+ //
+ // Y = (B >> 1) + (A >> 1) + E*2^(n-e') =
+ // = (b_h * 2^(n-1) + b_m * 2 + b_l) >> 1 +
+ // + (a_h * 2^(n-1) + a_m * 2) >> 1 +
+ // + E * 2^(n-e) >> 1 =
+ //
+ // Again, the shift is computed by dividing the terms by 2 and by cutting
+ // off b_l.
+ //
+ // = b_h * 2^(n-2) + b_m +
+ // + a_h * 2^(n-2) + a_m +
+ // + E * 2^(n-(e+1)) =
+ //
+ // Again, the sum is built by putting the overflow of [a_m + b+n] into
+ // the term 2^(n-1). But this time there is room for a second bit in the
+ // term 2^(n-2) we add this bit to a new term and denote it o_h in a
+ // second step.
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] >> 1) * 2^(n-1) +
+ // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-(e+1)) =
+ //
+ // Let o_h = [b_h + a_h + (b_m + a_m) >> (n-2)] >> 1
+ // Further replace e+1 by e'.
+ //
+ // = o_h * 2^(n-1) +
+ // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-e') =
+ //
+ // Move o_h into the error term and construct E'. To ensure that there is
+ // no 2^x with negative x, this step requires pre(2) (e < n).
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + o_h * 2^(e'-1) * 2^(n-e') + | pre(2), move 2^(e'-1)
+ // | out of the old exponent
+ // + E * 2^(n-e') =
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + [o_h * 2^(e'-1) + E] * 2^(n-e') + | move 2^(e'-1) out of
+ // | the old exponent
+ //
+ // Let E' = o_h * 2^(e'-1) + E
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E' * 2^(n-e')
+ //
+ // Because X and Y are distinct only in there error terms and E' can be
+ // constructed as shown the theorem holds.
+ // [qed]
+ //
+ // For completeness in case of the case e=n it is also required to show that
+ // distributivity can be applied.
+ //
+ // In this case Theorem(1) transforms to (the pre-condition on A can also be
+ // dropped)
+ //
+ // Theorem(2): (B + A + E) >> 1 => (B >> 1) + (A >> 1) + E'
+ // where
+ // A, B, E, E' are two's complement numbers with the same bit
+ // width
+ //
+ // Let A + B + E = X
+ // Let (B >> 1) + (A >> 1) = Y
+ //
+ // Therefore we need to show that for every X and Y there is an E' which
+ // makes the equation
+ //
+ // X = Y + E'
+ //
+ // hold. This is trivially the case for E' = X - Y.
+ //
+ // [qed]
+ //
+ // Remark: Distributing lshr with and arbitrary number n can be expressed as
+ // ((((B + A) lshr 1) lshr 1) ... ) {n times}.
+ // This construction induces n additional error bits at the left.
+
+ if (C.getBitWidth() != A.getBitWidth()) {
+ ErrorMSBs = (unsigned)-1;
+ return *this;
+ }
+
+ if (C.isNullValue())
+ return *this;
+
+ // Test if the result will be zero
+ unsigned shiftAmt = C.getZExtValue();
+ if (shiftAmt >= C.getBitWidth())
+ return mul(APInt(C.getBitWidth(), 0));
+
+ // The proof that shiftAmt LSBs are zero for at least one summand is only
+ // possible for the constant number.
+ //
+ // If this can be proven add shiftAmt to the error counter
+ // `ErrorMSBs`. Otherwise set all bits as undefined.
+ if (A.countTrailingZeros() < shiftAmt)
+ ErrorMSBs = A.getBitWidth();
+ else
+ incErrorMSBs(shiftAmt);
+
+ // Apply the operation.
+ pushBOperation(LShr, C);
+ A = A.lshr(shiftAmt);
+
+ return *this;
+ }
+
+ /// Apply a sign-extend or truncate operation on the polynomial.
+ Polynomial &sextOrTrunc(unsigned n) {
+ if (n < A.getBitWidth()) {
+ // Truncate: Clearly undefined Bits on the MSB side are removed
+ // if there are any.
+ decErrorMSBs(A.getBitWidth() - n);
+ A = A.trunc(n);
+ pushBOperation(Trunc, APInt(sizeof(n) * 8, n));
+ }
+ if (n > A.getBitWidth()) {
+ // Extend: Clearly extending first and adding later is different
+ // to adding first and extending later in all extended bits.
+ incErrorMSBs(n - A.getBitWidth());
+ A = A.sext(n);
+ pushBOperation(SExt, APInt(sizeof(n) * 8, n));
+ }
+
+ return *this;
+ }
+
+ /// Test if there is a coefficient B.
+ bool isFirstOrder() const { return V != nullptr; }
+
+ /// Test coefficient B of two Polynomials are equal.
+ bool isCompatibleTo(const Polynomial &o) const {
+ // The polynomial use different bit width.
+ if (A.getBitWidth() != o.A.getBitWidth())
+ return false;
+
+ // If neither Polynomial has the Coefficient B.
+ if (!isFirstOrder() && !o.isFirstOrder())
+ return true;
+
+ // The index variable is different.
+ if (V != o.V)
+ return false;
+
+ // Check the operations.
+ if (B.size() != o.B.size())
+ return false;
+
+ auto ob = o.B.begin();
+ for (auto &b : B) {
+ if (b != *ob)
+ return false;
+ ob++;
+ }
+
+ return true;
+ }
+
+ /// Subtract two polynomials, return an undefined polynomial if
+ /// subtraction is not possible.
+ Polynomial operator-(const Polynomial &o) const {
+ // Return an undefined polynomial if incompatible.
+ if (!isCompatibleTo(o))
+ return Polynomial();
+
+ // If the polynomials are compatible (meaning they have the same
+ // coefficient on B), B is eliminated. Thus a polynomial solely
+ // containing A is returned
+ return Polynomial(A - o.A, std::max(ErrorMSBs, o.ErrorMSBs));
+ }
+
+ /// Subtract a constant from a polynomial,
+ Polynomial operator-(uint64_t C) const {
+ Polynomial Result(*this);
+ Result.A -= C;
+ return Result;
+ }
+
+ /// Add a constant to a polynomial,
+ Polynomial operator+(uint64_t C) const {
+ Polynomial Result(*this);
+ Result.A += C;
+ return Result;
+ }
+
+ /// Returns true if it can be proven that two Polynomials are equal.
+ bool isProvenEqualTo(const Polynomial &o) {
+ // Subtract both polynomials and test if it is fully defined and zero.
+ Polynomial r = *this - o;
+ return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue());
+ }
+
+ /// Print the polynomial into a stream.
+ void print(raw_ostream &OS) const {
+ OS << "[{#ErrBits:" << ErrorMSBs << "} ";
+
+ if (V) {
+ for (auto b : B)
+ OS << "(";
+ OS << "(" << *V << ") ";
+
+ for (auto b : B) {
+ switch (b.first) {
+ case LShr:
+ OS << "LShr ";
+ break;
+ case Mul:
+ OS << "Mul ";
+ break;
+ case SExt:
+ OS << "SExt ";
+ break;
+ case Trunc:
+ OS << "Trunc ";
+ break;
+ }
+
+ OS << b.second << ") ";
+ }
+ }
+
+ OS << "+ " << A << "]";
+ }
+
+private:
+ void deleteB() {
+ V = nullptr;
+ B.clear();
+ }
+
+ void pushBOperation(const BOps Op, const APInt &C) {
+ if (isFirstOrder()) {
+ B.push_back(std::make_pair(Op, C));
+ return;
+ }
+ }
+};
+
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS, const Polynomial &S) {
+ S.print(OS);
+ return OS;
+}
+#endif
+
+/// VectorInfo stores abstract the following information for each vector
+/// element:
+///
+/// 1) The the memory address loaded into the element as Polynomial
+/// 2) a set of load instruction necessary to construct the vector,
+/// 3) a set of all other instructions that are necessary to create the vector and
+/// 4) a pointer value that can be used as relative base for all elements.
+struct VectorInfo {
+private:
+ VectorInfo(const VectorInfo &c) : VTy(c.VTy) {
+ llvm_unreachable(
+ "Copying VectorInfo is neither implemented nor necessary,");
+ }
+
+public:
+ /// Information of a Vector Element
+ struct ElementInfo {
+ /// Offset Polynomial.
+ Polynomial Ofs;
+
+ /// The Load Instruction used to Load the entry. LI is null if the pointer
+ /// of the load instruction does not point on to the entry
+ LoadInst *LI;
+
+ ElementInfo(Polynomial Offset = Polynomial(), LoadInst *LI = nullptr)
+ : Ofs(Offset), LI(LI) {}
+ };
+
+ /// Basic-block the load instructions are within
+ BasicBlock *BB;
+
+ /// Pointer value of all participation load instructions
+ Value *PV;
+
+ /// Participating load instructions
+ std::set<LoadInst *> LIs;
+
+ /// Participating instructions
+ std::set<Instruction *> Is;
+
+ /// Final shuffle-vector instruction
+ ShuffleVectorInst *SVI;
+
+ /// Information of the offset for each vector element
+ ElementInfo *EI;
+
+ /// Vector Type
+ VectorType *const VTy;
+
+ VectorInfo(VectorType *VTy)
+ : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) {
+ EI = new ElementInfo[VTy->getNumElements()];
+ }
+
+ virtual ~VectorInfo() { delete[] EI; }
+
+ unsigned getDimension() const { return VTy->getNumElements(); }
+
+ /// Test if the VectorInfo can be part of an interleaved load with the
+ /// specified factor.
+ ///
+ /// \param Factor of the interleave
+ /// \param DL Targets Datalayout
+ ///
+ /// \returns true if this is possible and false if not
+ bool isInterleaved(unsigned Factor, const DataLayout &DL) const {
+ unsigned Size = DL.getTypeAllocSize(VTy->getElementType());
+ for (unsigned i = 1; i < getDimension(); i++) {
+ if (!EI[i].Ofs.isProvenEqualTo(EI[0].Ofs + i * Factor * Size)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /// Recursively computes the vector information stored in V.
+ ///
+ /// This function delegates the work to specialized implementations
+ ///
+ /// \param V Value to operate on
+ /// \param Result Result of the computation
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool compute(Value *V, VectorInfo &Result, const DataLayout &DL) {
+ ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
+ if (SVI)
+ return computeFromSVI(SVI, Result, DL);
+ LoadInst *LI = dyn_cast<LoadInst>(V);
+ if (LI)
+ return computeFromLI(LI, Result, DL);
+ BitCastInst *BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ return computeFromBCI(BCI, Result, DL);
+ return false;
+ }
+
+ /// BitCastInst specialization to compute the vector information.
+ ///
+ /// \param BCI BitCastInst to operate on
+ /// \param Result Result of the computation
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool computeFromBCI(BitCastInst *BCI, VectorInfo &Result,
+ const DataLayout &DL) {
+ Instruction *Op = dyn_cast<Instruction>(BCI->getOperand(0));
+
+ if (!Op)
+ return false;
+
+ VectorType *VTy = dyn_cast<VectorType>(Op->getType());
+ if (!VTy)
+ return false;
+
+ // We can only cast from large to smaller vectors
+ if (Result.VTy->getNumElements() % VTy->getNumElements())
+ return false;
+
+ unsigned Factor = Result.VTy->getNumElements() / VTy->getNumElements();
+ unsigned NewSize = DL.getTypeAllocSize(Result.VTy->getElementType());
+ unsigned OldSize = DL.getTypeAllocSize(VTy->getElementType());
+
+ if (NewSize * Factor != OldSize)
+ return false;
+
+ VectorInfo Old(VTy);
+ if (!compute(Op, Old, DL))
+ return false;
+
+ for (unsigned i = 0; i < Result.VTy->getNumElements(); i += Factor) {
+ for (unsigned j = 0; j < Factor; j++) {
+ Result.EI[i + j] =
+ ElementInfo(Old.EI[i / Factor].Ofs + j * NewSize,
+ j == 0 ? Old.EI[i / Factor].LI : nullptr);
+ }
+ }
+
+ Result.BB = Old.BB;
+ Result.PV = Old.PV;
+ Result.LIs.insert(Old.LIs.begin(), Old.LIs.end());
+ Result.Is.insert(Old.Is.begin(), Old.Is.end());
+ Result.Is.insert(BCI);
+ Result.SVI = nullptr;
+
+ return true;
+ }
+
+ /// ShuffleVectorInst specialization to compute vector information.
+ ///
+ /// \param SVI ShuffleVectorInst to operate on
+ /// \param Result Result of the computation
+ ///
+ /// Compute the left and the right side vector information and merge them by
+ /// applying the shuffle operation. This function also ensures that the left
+ /// and right side have compatible loads. This means that all loads are with
+ /// in the same basic block and are based on the same pointer.
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result,
+ const DataLayout &DL) {
+ VectorType *ArgTy = dyn_cast<VectorType>(SVI->getOperand(0)->getType());
+ assert(ArgTy && "ShuffleVector Operand is not a VectorType");
+
+ // Compute the left hand vector information.
+ VectorInfo LHS(ArgTy);
+ if (!compute(SVI->getOperand(0), LHS, DL))
+ LHS.BB = nullptr;
+
+ // Compute the right hand vector information.
+ VectorInfo RHS(ArgTy);
+ if (!compute(SVI->getOperand(1), RHS, DL))
+ RHS.BB = nullptr;
+
+ // Neither operand produced sensible results?
+ if (!LHS.BB && !RHS.BB)
+ return false;
+ // Only RHS produced sensible results?
+ else if (!LHS.BB) {
+ Result.BB = RHS.BB;
+ Result.PV = RHS.PV;
+ }
+ // Only LHS produced sensible results?
+ else if (!RHS.BB) {
+ Result.BB = LHS.BB;
+ Result.PV = LHS.PV;
+ }
+ // Both operands produced sensible results?
+ else if ((LHS.BB == RHS.BB) && (LHS.PV == RHS.PV)) {
+ Result.BB = LHS.BB;
+ Result.PV = LHS.PV;
+ }
+ // Both operands produced sensible results but they are incompatible.
+ else {
+ return false;
+ }
+
+ // Merge and apply the operation on the offset information.
+ if (LHS.BB) {
+ Result.LIs.insert(LHS.LIs.begin(), LHS.LIs.end());
+ Result.Is.insert(LHS.Is.begin(), LHS.Is.end());
+ }
+ if (RHS.BB) {
+ Result.LIs.insert(RHS.LIs.begin(), RHS.LIs.end());
+ Result.Is.insert(RHS.Is.begin(), RHS.Is.end());
+ }
+ Result.Is.insert(SVI);
+ Result.SVI = SVI;
+
+ int j = 0;
+ for (int i : SVI->getShuffleMask()) {
+ assert((i < 2 * (signed)ArgTy->getNumElements()) &&
+ "Invalid ShuffleVectorInst (index out of bounds)");
+
+ if (i < 0)
+ Result.EI[j] = ElementInfo();
+ else if (i < (signed)ArgTy->getNumElements()) {
+ if (LHS.BB)
+ Result.EI[j] = LHS.EI[i];
+ else
+ Result.EI[j] = ElementInfo();
+ } else {
+ if (RHS.BB)
+ Result.EI[j] = RHS.EI[i - ArgTy->getNumElements()];
+ else
+ Result.EI[j] = ElementInfo();
+ }
+ j++;
+ }
+
+ return true;
+ }
+
+ /// LoadInst specialization to compute vector information.
+ ///
+ /// This function also acts as abort condition to the recursion.
+ ///
+ /// \param LI LoadInst to operate on
+ /// \param Result Result of the computation
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool computeFromLI(LoadInst *LI, VectorInfo &Result,
+ const DataLayout &DL) {
+ Value *BasePtr;
+ Polynomial Offset;
+
+ if (LI->isVolatile())
+ return false;
+
+ if (LI->isAtomic())
+ return false;
+
+ // Get the base polynomial
+ computePolynomialFromPointer(*LI->getPointerOperand(), Offset, BasePtr, DL);
+
+ Result.BB = LI->getParent();
+ Result.PV = BasePtr;
+ Result.LIs.insert(LI);
+ Result.Is.insert(LI);
+
+ for (unsigned i = 0; i < Result.getDimension(); i++) {
+ Value *Idx[2] = {
+ ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(LI->getContext()), i),
+ };
+ int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, makeArrayRef(Idx, 2));
+ Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);
+ }
+
+ return true;
+ }
+
+ /// Recursively compute polynomial of a value.
+ ///
+ /// \param BO Input binary operation
+ /// \param Result Result polynomial
+ static void computePolynomialBinOp(BinaryOperator &BO, Polynomial &Result) {
+ Value *LHS = BO.getOperand(0);
+ Value *RHS = BO.getOperand(1);
+
+ // Find the RHS Constant if any
+ ConstantInt *C = dyn_cast<ConstantInt>(RHS);
+ if ((!C) && BO.isCommutative()) {
+ C = dyn_cast<ConstantInt>(LHS);
+ if (C)
+ std::swap(LHS, RHS);
+ }
+
+ switch (BO.getOpcode()) {
+ case Instruction::Add:
+ if (!C)
+ break;
+
+ computePolynomial(*LHS, Result);
+ Result.add(C->getValue());
+ return;
+
+ case Instruction::LShr:
+ if (!C)
+ break;
+
+ computePolynomial(*LHS, Result);
+ Result.lshr(C->getValue());
+ return;
+
+ default:
+ break;
+ }
+
+ Result = Polynomial(&BO);
+ }
+
+ /// Recursively compute polynomial of a value
+ ///
+ /// \param V input value
+ /// \param Result result polynomial
+ static void computePolynomial(Value &V, Polynomial &Result) {
+ if (isa<BinaryOperator>(&V))
+ computePolynomialBinOp(*dyn_cast<BinaryOperator>(&V), Result);
+ else
+ Result = Polynomial(&V);
+ }
+
+ /// Compute the Polynomial representation of a Pointer type.
+ ///
+ /// \param Ptr input pointer value
+ /// \param Result result polynomial
+ /// \param BasePtr pointer the polynomial is based on
+ /// \param DL Datalayout of the target machine
+ static void computePolynomialFromPointer(Value &Ptr, Polynomial &Result,
+ Value *&BasePtr,
+ const DataLayout &DL) {
+ // Not a pointer type? Return an undefined polynomial
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr.getType());
+ if (!PtrTy) {
+ Result = Polynomial();
+ BasePtr = nullptr;
+ }
+ unsigned PointerBits =
+ DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());
+
+ /// Skip pointer casts. Return Zero polynomial otherwise
+ if (isa<CastInst>(&Ptr)) {
+ CastInst &CI = *cast<CastInst>(&Ptr);
+ switch (CI.getOpcode()) {
+ case Instruction::BitCast:
+ computePolynomialFromPointer(*CI.getOperand(0), Result, BasePtr, DL);
+ break;
+ default:
+ BasePtr = &Ptr;
+ Polynomial(PointerBits, 0);
+ break;
+ }
+ }
+ /// Resolve GetElementPtrInst.
+ else if (isa<GetElementPtrInst>(&Ptr)) {
+ GetElementPtrInst &GEP = *cast<GetElementPtrInst>(&Ptr);
+
+ APInt BaseOffset(PointerBits, 0);
+
+ // Check if we can compute the Offset with accumulateConstantOffset
+ if (GEP.accumulateConstantOffset(DL, BaseOffset)) {
+ Result = Polynomial(BaseOffset);
+ BasePtr = GEP.getPointerOperand();
+ return;
+ } else {
+ // Otherwise we allow that the last index operand of the GEP is
+ // non-constant.
+ unsigned idxOperand, e;
+ SmallVector<Value *, 4> Indices;
+ for (idxOperand = 1, e = GEP.getNumOperands(); idxOperand < e;
+ idxOperand++) {
+ ConstantInt *IDX = dyn_cast<ConstantInt>(GEP.getOperand(idxOperand));
+ if (!IDX)
+ break;
+ Indices.push_back(IDX);
+ }
+
+ // It must also be the last operand.
+ if (idxOperand + 1 != e) {
+ Result = Polynomial();
+ BasePtr = nullptr;
+ return;
+ }
+
+ // Compute the polynomial of the index operand.
+ computePolynomial(*GEP.getOperand(idxOperand), Result);
+
+ // Compute base offset from zero based index, excluding the last
+ // variable operand.
+ BaseOffset =
+ DL.getIndexedOffsetInType(GEP.getSourceElementType(), Indices);
+
+ // Apply the operations of GEP to the polynomial.
+ unsigned ResultSize = DL.getTypeAllocSize(GEP.getResultElementType());
+ Result.sextOrTrunc(PointerBits);
+ Result.mul(APInt(PointerBits, ResultSize));
+ Result.add(BaseOffset);
+ BasePtr = GEP.getPointerOperand();
+ }
+ }
+ // All other instructions are handled by using the value as base pointer and
+ // a zero polynomial.
+ else {
+ BasePtr = &Ptr;
+ Polynomial(DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()), 0);
+ }
+ }
+
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const {
+ if (PV)
+ OS << *PV;
+ else
+ OS << "(none)";
+ OS << " + ";
+ for (unsigned i = 0; i < getDimension(); i++)
+ OS << ((i == 0) ? "[" : ", ") << EI[i].Ofs;
+ OS << "]";
+ }
+#endif
+};
+
+} // anonymous namespace
+
+bool InterleavedLoadCombineImpl::findPattern(
+ std::list<VectorInfo> &Candidates, std::list<VectorInfo> &InterleavedLoad,
+ unsigned Factor, const DataLayout &DL) {
+ for (auto C0 = Candidates.begin(), E0 = Candidates.end(); C0 != E0; ++C0) {
+ unsigned i;
+ // Try to find an interleaved load using the front of Worklist as first line
+ unsigned Size = DL.getTypeAllocSize(C0->VTy->getElementType());
+
+ // List containing iterators pointing to the VectorInfos of the candidates
+ std::vector<std::list<VectorInfo>::iterator> Res(Factor, Candidates.end());
+
+ for (auto C = Candidates.begin(), E = Candidates.end(); C != E; C++) {
+ if (C->VTy != C0->VTy)
+ continue;
+ if (C->BB != C0->BB)
+ continue;
+ if (C->PV != C0->PV)
+ continue;
+
+ // Check the current value matches any of factor - 1 remaining lines
+ for (i = 1; i < Factor; i++) {
+ if (C->EI[0].Ofs.isProvenEqualTo(C0->EI[0].Ofs + i * Size)) {
+ Res[i] = C;
+ }
+ }
+
+ for (i = 1; i < Factor; i++) {
+ if (Res[i] == Candidates.end())
+ break;
+ }
+ if (i == Factor) {
+ Res[0] = C0;
+ break;
+ }
+ }
+
+ if (Res[0] != Candidates.end()) {
+ // Move the result into the output
+ for (unsigned i = 0; i < Factor; i++) {
+ InterleavedLoad.splice(InterleavedLoad.end(), Candidates, Res[i]);
+ }
+
+ return true;
+ }
+ }
+ return false;
+}
+
+LoadInst *
+InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) {
+ assert(!LIs.empty() && "No load instructions given.");
+
+ // All LIs are within the same BB. Select the first for a reference.
+ BasicBlock *BB = (*LIs.begin())->getParent();
+ BasicBlock::iterator FLI =
+ std::find_if(BB->begin(), BB->end(), [&LIs](Instruction &I) -> bool {
+ return is_contained(LIs, &I);
+ });
+ assert(FLI != BB->end());
+
+ return cast<LoadInst>(FLI);
+}
+
+bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
+ OptimizationRemarkEmitter &ORE) {
+ LLVM_DEBUG(dbgs() << "Checking interleaved load\n");
+
+ // The insertion point is the LoadInst which loads the first values. The
+ // following tests are used to proof that the combined load can be inserted
+ // just before InsertionPoint.
+ LoadInst *InsertionPoint = InterleavedLoad.front().EI[0].LI;
+
+ // Test if the offset is computed
+ if (!InsertionPoint)
+ return false;
+
+ std::set<LoadInst *> LIs;
+ std::set<Instruction *> Is;
+ std::set<Instruction *> SVIs;
+
+ unsigned InterleavedCost;
+ unsigned InstructionCost = 0;
+
+ // Get the interleave factor
+ unsigned Factor = InterleavedLoad.size();
+
+ // Merge all input sets used in analysis
+ for (auto &VI : InterleavedLoad) {
+ // Generate a set of all load instructions to be combined
+ LIs.insert(VI.LIs.begin(), VI.LIs.end());
+
+ // Generate a set of all instructions taking part in load
+ // interleaved. This list excludes the instructions necessary for the
+ // polynomial construction.
+ Is.insert(VI.Is.begin(), VI.Is.end());
+
+ // Generate the set of the final ShuffleVectorInst.
+ SVIs.insert(VI.SVI);
+ }
+
+ // There is nothing to combine.
+ if (LIs.size() < 2)
+ return false;
+
+ // Test if all participating instruction will be dead after the
+ // transformation. If intermediate results are used, no performance gain can
+ // be expected. Also sum the cost of the Instructions beeing left dead.
+ for (auto &I : Is) {
+ // Compute the old cost
+ InstructionCost +=
+ TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+
+ // The final SVIs are allowed not to be dead, all uses will be replaced
+ if (SVIs.find(I) != SVIs.end())
+ continue;
+
+ // If there are users outside the set to be eliminated, we abort the
+ // transformation. No gain can be expected.
+ for (const auto &U : I->users()) {
+ if (Is.find(dyn_cast<Instruction>(U)) == Is.end())
+ return false;
+ }
+ }
+
+ // We know that all LoadInst are within the same BB. This guarantees that
+ // either everything or nothing is loaded.
+ LoadInst *First = findFirstLoad(LIs);
+
+ // To be safe that the loads can be combined, iterate over all loads and test
+ // that the corresponding defining access dominates first LI. This guarantees
+ // that there are no aliasing stores in between the loads.
+ auto FMA = MSSA.getMemoryAccess(First);
+ for (auto LI : LIs) {
+ auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess();
+ if (!MSSA.dominates(MADef, FMA))
+ return false;
+ }
+ assert(!LIs.empty() && "There are no LoadInst to combine");
+
+ // It is necessary that insertion point dominates all final ShuffleVectorInst.
+ for (auto &VI : InterleavedLoad) {
+ if (!DT.dominates(InsertionPoint, VI.SVI))
+ return false;
+ }
+
+ // All checks are done. Add instructions detectable by InterleavedAccessPass
+ // The old instruction will are left dead.
+ IRBuilder<> Builder(InsertionPoint);
+ Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType();
+ unsigned ElementsPerSVI =
+ InterleavedLoad.front().SVI->getType()->getNumElements();
+ VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI);
+
+ SmallVector<unsigned, 4> Indices;
+ for (unsigned i = 0; i < Factor; i++)
+ Indices.push_back(i);
+ InterleavedCost = TTI.getInterleavedMemoryOpCost(
+ Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlignment(),
+ InsertionPoint->getPointerAddressSpace());
+
+ if (InterleavedCost >= InstructionCost) {
+ return false;
+ }
+
+ // Create a pointer cast for the wide load.
+ auto CI = Builder.CreatePointerCast(InsertionPoint->getOperand(0),
+ ILTy->getPointerTo(),
+ "interleaved.wide.ptrcast");
+
+ // Create the wide load and update the MemorySSA.
+ auto LI = Builder.CreateAlignedLoad(CI, InsertionPoint->getAlignment(),
+ "interleaved.wide.load");
+ auto MSSAU = MemorySSAUpdater(&MSSA);
+ MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
+ LI, nullptr, MSSA.getMemoryAccess(InsertionPoint)));
+ MSSAU.insertUse(MSSALoad);
+
+ // Create the final SVIs and replace all uses.
+ int i = 0;
+ for (auto &VI : InterleavedLoad) {
+ SmallVector<uint32_t, 4> Mask;
+ for (unsigned j = 0; j < ElementsPerSVI; j++)
+ Mask.push_back(i + j * Factor);
+
+ Builder.SetInsertPoint(VI.SVI);
+ auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()),
+ Mask, "interleaved.shuffle");
+ VI.SVI->replaceAllUsesWith(SVI);
+ i++;
+ }
+
+ NumInterleavedLoadCombine++;
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Combined Interleaved Load", LI)
+ << "Load interleaved combined with factor "
+ << ore::NV("Factor", Factor);
+ });
+
+ return true;
+}
+
+bool InterleavedLoadCombineImpl::run() {
+ OptimizationRemarkEmitter ORE(&F);
+ bool changed = false;
+ unsigned MaxFactor = TLI.getMaxSupportedInterleaveFactor();
+
+ auto &DL = F.getParent()->getDataLayout();
+
+ // Start with the highest factor to avoid combining and recombining.
+ for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) {
+ std::list<VectorInfo> Candidates;
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto SVI = dyn_cast<ShuffleVectorInst>(&I)) {
+
+ Candidates.emplace_back(SVI->getType());
+
+ if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) {
+ Candidates.pop_back();
+ continue;
+ }
+
+ if (!Candidates.back().isInterleaved(Factor, DL)) {
+ Candidates.pop_back();
+ }
+ }
+ }
+ }
+
+ std::list<VectorInfo> InterleavedLoad;
+ while (findPattern(Candidates, InterleavedLoad, Factor, DL)) {
+ if (combine(InterleavedLoad, ORE)) {
+ changed = true;
+ } else {
+ // Remove the first element of the Interleaved Load but put the others
+ // back on the list and continue searching
+ Candidates.splice(Candidates.begin(), InterleavedLoad,
+ std::next(InterleavedLoad.begin()),
+ InterleavedLoad.end());
+ }
+ InterleavedLoad.clear();
+ }
+ }
+
+ return changed;
+}
+
+namespace {
+/// This pass combines interleaved loads into a pattern detectable by
+/// InterleavedAccessPass.
+struct InterleavedLoadCombine : public FunctionPass {
+ static char ID;
+
+ InterleavedLoadCombine() : FunctionPass(ID) {
+ initializeInterleavedLoadCombinePass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Interleaved Load Combine Pass";
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (DisableInterleavedLoadCombine)
+ return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName()
+ << "\n");
+
+ return InterleavedLoadCombineImpl(
+ F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<MemorySSAWrapperPass>().getMSSA(),
+ TPC->getTM<TargetMachine>())
+ .run();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+};
+} // anonymous namespace
+
+char InterleavedLoadCombine::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+ InterleavedLoadCombine, DEBUG_TYPE,
+ "Combine interleaved loads into wide loads and shufflevector instructions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(
+ InterleavedLoadCombine, DEBUG_TYPE,
+ "Combine interleaved loads into wide loads and shufflevector instructions",
+ false, false)
+
+FunctionPass *
+llvm::createInterleavedLoadCombinePass() {
+ auto P = new InterleavedLoadCombine();
+ return P;
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 2cd389ce2c11..52e832cc38c1 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -40,14 +40,14 @@ static cl::opt<bool> EnableTrapUnreachable("trap-unreachable",
cl::desc("Enable generating trap for unreachable"));
void LLVMTargetMachine::initAsmInfo() {
- MRI = TheTarget.createMCRegInfo(getTargetTriple().str());
- MII = TheTarget.createMCInstrInfo();
+ MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
+ MII.reset(TheTarget.createMCInstrInfo());
// FIXME: Having an MCSubtargetInfo on the target machine is a hack due
// to some backends having subtarget feature dependent module level
// code generation. This is similar to the hack in the AsmPrinter for
// module level assembly etc.
- STI = TheTarget.createMCSubtargetInfo(getTargetTriple().str(), getTargetCPU(),
- getTargetFeatureString());
+ STI.reset(TheTarget.createMCSubtargetInfo(
+ getTargetTriple().str(), getTargetCPU(), getTargetFeatureString()));
MCAsmInfo *TmpAsmInfo =
TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str());
@@ -71,7 +71,7 @@ void LLVMTargetMachine::initAsmInfo() {
if (Options.ExceptionModel != ExceptionHandling::None)
TmpAsmInfo->setExceptionsType(Options.ExceptionModel);
- AsmInfo = TmpAsmInfo;
+ AsmInfo.reset(TmpAsmInfo);
}
LLVMTargetMachine::LLVMTargetMachine(const Target &T,
@@ -95,29 +95,22 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) {
}
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
-static MCContext *
-addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
- bool DisableVerify, bool &WillCompleteCodeGenPipeline,
- raw_pwrite_stream &Out, MachineModuleInfo *MMI) {
+static TargetPassConfig *
+addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM,
+ bool DisableVerify, MachineModuleInfo &MMI) {
// Targets may override createPassConfig to provide a target-specific
// subclass.
- TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+ TargetPassConfig *PassConfig = TM.createPassConfig(PM);
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
- WillCompleteCodeGenPipeline = PassConfig->willCompleteCodeGenPipeline();
PM.add(PassConfig);
- if (!MMI)
- MMI = new MachineModuleInfo(TM);
- PM.add(MMI);
+ PM.add(&MMI);
if (PassConfig->addISelPasses())
return nullptr;
PassConfig->addMachinePasses();
PassConfig->setInitialized();
- if (!WillCompleteCodeGenPipeline)
- PM.add(createPrintMIRPass(Out));
-
- return &MMI->getContext();
+ return PassConfig;
}
bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
@@ -201,14 +194,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
bool DisableVerify,
MachineModuleInfo *MMI) {
// Add common CodeGen passes.
- bool WillCompleteCodeGenPipeline = true;
- MCContext *Context = addPassesToGenerateCode(
- this, PM, DisableVerify, WillCompleteCodeGenPipeline, Out, MMI);
- if (!Context)
+ if (!MMI)
+ MMI = new MachineModuleInfo(this);
+ TargetPassConfig *PassConfig =
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+ if (!PassConfig)
return true;
- if (WillCompleteCodeGenPipeline &&
- addAsmPrinter(PM, Out, DwoOut, FileType, *Context))
+ if (!TargetPassConfig::willCompleteCodeGenPipeline()) {
+ PM.add(createPrintMIRPass(Out));
+ } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext()))
return true;
PM.add(createFreeMachineFunctionPass());
@@ -224,14 +219,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
raw_pwrite_stream &Out,
bool DisableVerify) {
// Add common CodeGen passes.
- bool WillCompleteCodeGenPipeline = true;
- Ctx = addPassesToGenerateCode(this, PM, DisableVerify,
- WillCompleteCodeGenPipeline, Out,
- /*MachineModuleInfo*/ nullptr);
- if (!Ctx)
+ MachineModuleInfo *MMI = new MachineModuleInfo(this);
+ TargetPassConfig *PassConfig =
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+ if (!PassConfig)
return true;
- assert(WillCompleteCodeGenPipeline && "CodeGen pipeline has been altered");
+ assert(TargetPassConfig::willCompleteCodeGenPipeline() &&
+ "Cannot emit MC with limited codegen pipeline");
+ Ctx = &MMI->getContext();
if (Options.MCOptions.MCSaveTempLabels)
Ctx->setAllowTemporaryLabels(false);
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 5dbce841cfd5..f9f33a98a9d1 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -145,9 +145,9 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
LLVM_DUMP_METHOD void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
dbgs() << "Latency Priority Queue\n";
dbgs() << " Number of Queue Entries: " << Queue.size() << "\n";
- for (auto const &SU : Queue) {
+ for (const SUnit *SU : Queue) {
dbgs() << " ";
- SU->dump(DAG);
+ DAG->dumpNode(*SU);
}
}
#endif
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index 417bd9d5aebe..fc0ebea2d36c 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -258,7 +258,8 @@ private:
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited);
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks);
bool ExtendRanges(MachineFunction &MF);
@@ -323,8 +324,10 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
raw_ostream &Out) const {
Out << '\n' << msg << '\n';
for (const MachineBasicBlock &BB : MF) {
- const auto &L = V.lookup(&BB);
- Out << "MBB: " << BB.getName() << ":\n";
+ const VarLocSet &L = V.lookup(&BB);
+ if (L.empty())
+ continue;
+ Out << "MBB: " << BB.getNumber() << ":\n";
for (unsigned VLL : L) {
const VarLoc &VL = VarLocIDs[VLL];
Out << " Var: " << VL.Var.getVar()->getName();
@@ -470,16 +473,21 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
MachineFunction *MF, unsigned &Reg) {
const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
int FI;
- const MachineMemOperand *MMO;
+ SmallVector<const MachineMemOperand*, 1> Accesses;
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
return false;
// To identify a spill instruction, use the same criteria as in AsmPrinter.
- if (!((TII->isStoreToStackSlotPostFE(MI, FI) ||
- TII->hasStoreToStackSlot(MI, MMO, FI)) &&
- FrameInfo.isSpillSlotObjectIndex(FI)))
+ if (!((TII->isStoreToStackSlotPostFE(MI, FI) &&
+ FrameInfo.isSpillSlotObjectIndex(FI)) ||
+ (TII->hasStoreToStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, [&FrameInfo](const MachineMemOperand *MMO) {
+ return FrameInfo.isSpillSlotObjectIndex(
+ cast<FixedStackPseudoSourceValue>(MMO->getPseudoValue())
+ ->getFrameIndex());
+ }))))
return false;
auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
@@ -599,7 +607,7 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
LLVM_DEBUG(for (unsigned ID
: OpenRanges.getVarLocs()) {
// Copy OpenRanges to OutLocs, if not already present.
- dbgs() << "Add to OutLocs: ";
+ dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": ";
VarLocIDs[ID].dump();
});
VarLocSet &VLS = OutLocs[CurMBB];
@@ -626,10 +634,12 @@ bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
/// This routine joins the analysis results of all incoming edges in @MBB by
/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
/// source variable in all the predecessors of @MBB reside in the same location.
-bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
- VarLocInMBB &InLocs, const VarLocMap &VarLocIDs,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited) {
- LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
+bool LiveDebugValues::join(
+ MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
+ const VarLocMap &VarLocIDs,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) {
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
VarLocSet InLocsT; // Temporary incoming locations.
@@ -641,8 +651,11 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
// Ignore unvisited predecessor blocks. As we are processing
// the blocks in reverse post-order any unvisited block can
// be considered to not remove any incoming values.
- if (!Visited.count(p))
+ if (!Visited.count(p)) {
+ LLVM_DEBUG(dbgs() << " ignoring unvisited pred MBB: " << p->getNumber()
+ << "\n");
continue;
+ }
auto OL = OutLocs.find(p);
// Join is null in case of empty OutLocs from any of the pred.
if (OL == OutLocs.end())
@@ -654,14 +667,32 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
InLocsT = OL->second;
else
InLocsT &= OL->second;
+
+ LLVM_DEBUG({
+ if (!InLocsT.empty()) {
+ for (auto ID : InLocsT)
+ dbgs() << " gathered candidate incoming var: "
+ << VarLocIDs[ID].Var.getVar()->getName() << "\n";
+ }
+ });
+
NumVisited++;
}
// Filter out DBG_VALUES that are out of scope.
VarLocSet KillSet;
- for (auto ID : InLocsT)
- if (!VarLocIDs[ID].dominates(MBB))
- KillSet.set(ID);
+ bool IsArtificial = ArtificialBlocks.count(&MBB);
+ if (!IsArtificial) {
+ for (auto ID : InLocsT) {
+ if (!VarLocIDs[ID].dominates(MBB)) {
+ KillSet.set(ID);
+ LLVM_DEBUG({
+ auto Name = VarLocIDs[ID].Var.getVar()->getName();
+ dbgs() << " killing " << Name << ", it doesn't dominate MBB\n";
+ });
+ }
+ }
+ }
InLocsT.intersectWithComplement(KillSet);
// As we are processing blocks in reverse post-order we
@@ -712,6 +743,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
VarLocInMBB InLocs; // Ranges that are incoming after joining.
TransferMap Transfers; // DBG_VALUEs associated with spills.
+ // Blocks which are artificial, i.e. blocks which exclusively contain
+ // instructions without locations, or with line 0 locations.
+ SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+
DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
std::priority_queue<unsigned int, std::vector<unsigned int>,
@@ -733,6 +768,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
dontTransferChanges);
+ auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
+ if (const DebugLoc &DL = MI.getDebugLoc())
+ return DL.getLine() != 0;
+ return false;
+ };
+ for (auto &MBB : MF)
+ if (none_of(MBB.instrs(), hasNonArtificialLocation))
+ ArtificialBlocks.insert(&MBB);
+
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
"OutLocs after initialization", dbgs()));
@@ -758,7 +802,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
while (!Worklist.empty()) {
MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
Worklist.pop();
- MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited);
+ MBBJoined =
+ join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, ArtificialBlocks);
Visited.insert(MBB);
if (MBBJoined) {
MBBJoined = false;
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 3ff03ec4a7ee..d0d889782a35 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -132,14 +132,18 @@ private:
unsigned WasIndirect : 1;
};
-/// LocMap - Map of where a user value is live, and its location.
+/// Map of where a user value is live, and its location.
using LocMap = IntervalMap<SlotIndex, DbgValueLocation, 4>;
+/// Map of stack slot offsets for spilled locations.
+/// Non-spilled locations are not added to the map.
+using SpillOffsetMap = DenseMap<unsigned, unsigned>;
+
namespace {
class LDVImpl;
-/// UserValue - A user value is a part of a debug info user variable.
+/// A user value is a part of a debug info user variable.
///
/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
/// holds part of a user variable. The part is identified by a byte offset.
@@ -166,26 +170,26 @@ class UserValue {
/// lexical scope.
SmallSet<SlotIndex, 2> trimmedDefs;
- /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+ /// Insert a DBG_VALUE into MBB at Idx for LocNo.
void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
- SlotIndex StopIdx,
- DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS,
+ SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled,
+ unsigned SpillOffset, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI);
- /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+ /// Replace OldLocNo ranges with NewRegs ranges where NewRegs
/// is live. Returns true if any changes were made.
bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
LiveIntervals &LIS);
public:
- /// UserValue - Create a new UserValue.
+ /// Create a new UserValue.
UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
LocMap::Allocator &alloc)
: Variable(var), Expression(expr), dl(std::move(L)), leader(this),
locInts(alloc) {}
- /// getLeader - Get the leader of this value's equivalence class.
+ /// Get the leader of this value's equivalence class.
UserValue *getLeader() {
UserValue *l = leader;
while (l != l->leader)
@@ -193,10 +197,10 @@ public:
return leader = l;
}
- /// getNext - Return the next UserValue in the equivalence class.
+ /// Return the next UserValue in the equivalence class.
UserValue *getNext() const { return next; }
- /// match - Does this UserValue match the parameters?
+ /// Does this UserValue match the parameters?
bool match(const DILocalVariable *Var, const DIExpression *Expr,
const DILocation *IA) const {
// FIXME: The fragment should be part of the equivalence class, but not
@@ -204,7 +208,7 @@ public:
return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
}
- /// merge - Merge equivalence classes.
+ /// Merge equivalence classes.
static UserValue *merge(UserValue *L1, UserValue *L2) {
L2 = L2->getLeader();
if (!L1)
@@ -256,10 +260,10 @@ public:
return locations.size() - 1;
}
- /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+ /// Ensure that all virtual register locations are mapped.
void mapVirtRegs(LDVImpl *LDV);
- /// addDef - Add a definition point to this value.
+ /// Add a definition point to this value.
void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
// Add a singular (Idx,Idx) -> Loc mapping.
@@ -271,63 +275,71 @@ public:
I.setValue(Loc);
}
- /// extendDef - Extend the current definition as far as possible down.
+ /// Extend the current definition as far as possible down.
+ ///
/// Stop when meeting an existing def or when leaving the live
- /// range of VNI.
- /// End points where VNI is no longer live are added to Kills.
- /// @param Idx Starting point for the definition.
- /// @param Loc Location number to propagate.
- /// @param LR Restrict liveness to where LR has the value VNI. May be null.
- /// @param VNI When LR is not null, this is the value to restrict to.
- /// @param Kills Append end points of VNI's live range to Kills.
- /// @param LIS Live intervals analysis.
+ /// range of VNI. End points where VNI is no longer live are added to Kills.
+ ///
+ /// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
+ /// data-flow analysis to propagate them beyond basic block boundaries.
+ ///
+ /// \param Idx Starting point for the definition.
+ /// \param Loc Location number to propagate.
+ /// \param LR Restrict liveness to where LR has the value VNI. May be null.
+ /// \param VNI When LR is not null, this is the value to restrict to.
+ /// \param [out] Kills Append end points of VNI's live range to Kills.
+ /// \param LIS Live intervals analysis.
void extendDef(SlotIndex Idx, DbgValueLocation Loc,
LiveRange *LR, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS);
- /// addDefsFromCopies - The value in LI/LocNo may be copies to other
- /// registers. Determine if any of the copies are available at the kill
- /// points, and add defs if possible.
- /// @param LI Scan for copies of the value in LI->reg.
- /// @param LocNo Location number of LI->reg.
- /// @param WasIndirect Indicates if the original use of LI->reg was indirect
- /// @param Kills Points where the range of LocNo could be extended.
- /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+ /// The value in LI/LocNo may be copies to other registers. Determine if
+ /// any of the copies are available at the kill points, and add defs if
+ /// possible.
+ ///
+ /// \param LI Scan for copies of the value in LI->reg.
+ /// \param LocNo Location number of LI->reg.
+ /// \param WasIndirect Indicates if the original use of LI->reg was indirect
+ /// \param Kills Points where the range of LocNo could be extended.
+ /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here.
void addDefsFromCopies(
LiveInterval *LI, unsigned LocNo, bool WasIndirect,
const SmallVectorImpl<SlotIndex> &Kills,
SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS);
- /// computeIntervals - Compute the live intervals of all locations after
- /// collecting all their def points.
+ /// Compute the live intervals of all locations after collecting all their
+ /// def points.
void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
LiveIntervals &LIS, LexicalScopes &LS);
- /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+ /// Replace OldReg ranges with NewRegs ranges where NewRegs is
/// live. Returns true if any changes were made.
bool splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
LiveIntervals &LIS);
- /// rewriteLocations - Rewrite virtual register locations according to the
- /// provided virtual register map. Record which locations were spilled.
- void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
- BitVector &SpilledLocations);
+ /// Rewrite virtual register locations according to the provided virtual
+ /// register map. Record the stack slot offsets for the locations that
+ /// were spilled.
+ void rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ SpillOffsetMap &SpillOffsets);
- /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ /// Recreate DBG_VALUE instruction from data structures.
void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
- const BitVector &SpilledLocations);
+ const SpillOffsetMap &SpillOffsets);
- /// getDebugLoc - Return DebugLoc of this UserValue.
+ /// Return DebugLoc of this UserValue.
DebugLoc getDebugLoc() { return dl;}
void print(raw_ostream &, const TargetRegisterInfo *);
};
-/// LDVImpl - Implementation of the LiveDebugVariables pass.
+/// Implementation of the LiveDebugVariables pass.
class LDVImpl {
LiveDebugVariables &pass;
LocMap::Allocator allocator;
@@ -341,7 +353,7 @@ class LDVImpl {
/// Whether the machine function is modified during the pass.
bool ModifiedMF = false;
- /// userValues - All allocated UserValue instances.
+ /// All allocated UserValue instances.
SmallVector<std::unique_ptr<UserValue>, 8> userValues;
/// Map virtual register to eq class leader.
@@ -352,27 +364,31 @@ class LDVImpl {
using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
UVMap userVarMap;
- /// getUserValue - Find or create a UserValue.
+ /// Find or create a UserValue.
UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
const DebugLoc &DL);
- /// lookupVirtReg - Find the EC leader for VirtReg or null.
+ /// Find the EC leader for VirtReg or null.
UserValue *lookupVirtReg(unsigned VirtReg);
- /// handleDebugValue - Add DBG_VALUE instruction to our maps.
- /// @param MI DBG_VALUE instruction
- /// @param Idx Last valid SLotIndex before instruction.
- /// @return True if the DBG_VALUE instruction should be deleted.
+ /// Add DBG_VALUE instruction to our maps.
+ ///
+ /// \param MI DBG_VALUE instruction
+ /// \param Idx Last valid SLotIndex before instruction.
+ ///
+ /// \returns True if the DBG_VALUE instruction should be deleted.
bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
- /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
- /// a UserValue def for each instruction.
- /// @param mf MachineFunction to be scanned.
- /// @return True if any debug values were found.
+ /// Collect and erase all DBG_VALUE instructions, adding a UserValue def
+ /// for each instruction.
+ ///
+ /// \param mf MachineFunction to be scanned.
+ ///
+ /// \returns True if any debug values were found.
bool collectDebugValues(MachineFunction &mf);
- /// computeIntervals - Compute the live intervals of all user values after
- /// collecting all their def points.
+ /// Compute the live intervals of all user values after collecting all
+ /// their def points.
void computeIntervals();
public:
@@ -380,7 +396,7 @@ public:
bool runOnMachineFunction(MachineFunction &mf);
- /// clear - Release all memory.
+ /// Release all memory.
void clear() {
MF = nullptr;
userValues.clear();
@@ -393,13 +409,13 @@ public:
ModifiedMF = false;
}
- /// mapVirtReg - Map virtual register to an equivalence class.
+ /// Map virtual register to an equivalence class.
void mapVirtReg(unsigned VirtReg, UserValue *EC);
- /// splitRegister - Replace all references to OldReg with NewRegs.
+ /// Replace all references to OldReg with NewRegs.
void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
- /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ /// Recreate DBG_VALUE instruction from data structures.
void emitDebugValues(VirtRegMap *VRM);
void print(raw_ostream&);
@@ -578,30 +594,33 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
MachineBasicBlock *MBB = &*MFI;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
MBBI != MBBE;) {
- if (!MBBI->isDebugValue()) {
+ // Use the first debug instruction in the sequence to get a SlotIndex
+ // for following consecutive debug instructions.
+ if (!MBBI->isDebugInstr()) {
++MBBI;
continue;
}
- // DBG_VALUE has no slot index, use the previous instruction instead.
+ // Debug instructions has no slot index. Use the previous
+ // non-debug instruction's SlotIndex as its SlotIndex.
SlotIndex Idx =
MBBI == MBB->begin()
? LIS->getMBBStartIdx(MBB)
: LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot();
- // Handle consecutive DBG_VALUE instructions with the same slot index.
+ // Handle consecutive debug instructions with the same slot index.
do {
- if (handleDebugValue(*MBBI, Idx)) {
+ // Only handle DBG_VALUE in handleDebugValue(). Skip all other
+ // kinds of debug instructions.
+ if (MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) {
MBBI = MBB->erase(MBBI);
Changed = true;
} else
++MBBI;
- } while (MBBI != MBBE && MBBI->isDebugValue());
+ } while (MBBI != MBBE && MBBI->isDebugInstr());
}
}
return Changed;
}
-/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
-/// data-flow analysis to propagate them beyond basic block boundaries.
void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS) {
@@ -752,7 +771,15 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
}
SmallVector<SlotIndex, 16> Kills;
extendDef(Idx, Loc, LI, VNI, &Kills, LIS);
- if (LI)
+ // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that
+ // if the original location for example is %vreg0:sub_hi, and we find a
+ // full register copy in addDefsFromCopies (at the moment it only handles
+ // full register copies), then we must add the sub1 sub-register index to
+ // the new location. However, that is only possible if the new virtual
+ // register is of the same regclass (or if there is an equivalent
+ // sub-register in that regclass). For now, simply skip handling copies if
+ // a sub-register is involved.
+ if (LI && !LocMO.getSubReg())
addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
LIS);
continue;
@@ -1039,8 +1066,10 @@ splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) {
static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
}
-void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
- BitVector &SpilledLocations) {
+void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ SpillOffsetMap &SpillOffsets) {
// Build a set of new locations with new numbers so we can coalesce our
// IntervalMap if two vreg intervals collapse to the same physical location.
// Use MapVector instead of SetVector because MapVector::insert returns the
@@ -1049,10 +1078,11 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
// FIXME: This will be problematic if we ever support direct and indirect
// frame index locations, i.e. expressing both variables in memory and
// 'int x, *px = &x'. The "spilled" bit must become part of the location.
- MapVector<MachineOperand, bool> NewLocations;
+ MapVector<MachineOperand, std::pair<bool, unsigned>> NewLocations;
SmallVector<unsigned, 4> LocNoMap(locations.size());
for (unsigned I = 0, E = locations.size(); I != E; ++I) {
bool Spilled = false;
+ unsigned SpillOffset = 0;
MachineOperand Loc = locations[I];
// Only virtual registers are rewritten.
if (Loc.isReg() && Loc.getReg() &&
@@ -1065,7 +1095,16 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
// non-existent sub-register, and %noreg is exactly what we want.
Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
} else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
- // FIXME: Translate SubIdx to a stackslot offset.
+ // Retrieve the stack slot offset.
+ unsigned SpillSize;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterClass *TRC = MRI.getRegClass(VirtReg);
+ bool Success = TII.getStackSlotRange(TRC, Loc.getSubReg(), SpillSize,
+ SpillOffset, MF);
+
+ // FIXME: Invalidate the location if the offset couldn't be calculated.
+ (void)Success;
+
Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
Spilled = true;
} else {
@@ -1076,20 +1115,22 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
// Insert this location if it doesn't already exist and record a mapping
// from the old number to the new number.
- auto InsertResult = NewLocations.insert({Loc, Spilled});
+ auto InsertResult = NewLocations.insert({Loc, {Spilled, SpillOffset}});
unsigned NewLocNo = std::distance(NewLocations.begin(), InsertResult.first);
LocNoMap[I] = NewLocNo;
}
- // Rewrite the locations and record which ones were spill slots.
+ // Rewrite the locations and record the stack slot offsets for spills.
locations.clear();
- SpilledLocations.clear();
- SpilledLocations.resize(NewLocations.size());
+ SpillOffsets.clear();
for (auto &Pair : NewLocations) {
+ bool Spilled;
+ unsigned SpillOffset;
+ std::tie(Spilled, SpillOffset) = Pair.second;
locations.push_back(Pair.first);
- if (Pair.second) {
+ if (Spilled) {
unsigned NewLocNo = std::distance(&*NewLocations.begin(), &Pair);
- SpilledLocations.set(NewLocNo);
+ SpillOffsets[NewLocNo] = SpillOffset;
}
}
@@ -1158,10 +1199,9 @@ findNextInsertLocation(MachineBasicBlock *MBB,
}
void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
- SlotIndex StopIdx,
- DbgValueLocation Loc, bool Spilled,
- LiveIntervals &LIS,
- const TargetInstrInfo &TII,
+ SlotIndex StopIdx, DbgValueLocation Loc,
+ bool Spilled, unsigned SpillOffset,
+ LiveIntervals &LIS, const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI) {
SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB);
// Only search within the current MBB.
@@ -1184,12 +1224,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// If the location was spilled, the new DBG_VALUE will be indirect. If the
// original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate
- // that the original virtual register was a pointer.
+ // that the original virtual register was a pointer. Also, add the stack slot
+ // offset for the spilled register to the expression.
const DIExpression *Expr = Expression;
bool IsIndirect = Loc.wasIndirect();
if (Spilled) {
- if (IsIndirect)
- Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+ auto Deref = IsIndirect ? DIExpression::WithDeref : DIExpression::NoDeref;
+ Expr =
+ DIExpression::prepend(Expr, DIExpression::NoDeref, SpillOffset, Deref);
IsIndirect = true;
}
@@ -1208,14 +1250,17 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
- const BitVector &SpilledLocations) {
+ const SpillOffsetMap &SpillOffsets) {
MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
SlotIndex Start = I.start();
SlotIndex Stop = I.stop();
DbgValueLocation Loc = I.value();
- bool Spilled = !Loc.isUndef() ? SpilledLocations.test(Loc.locNo()) : false;
+ auto SpillIt =
+ !Loc.isUndef() ? SpillOffsets.find(Loc.locNo()) : SpillOffsets.end();
+ bool Spilled = SpillIt != SpillOffsets.end();
+ unsigned SpillOffset = Spilled ? SpillIt->second : 0;
// If the interval start was trimmed to the lexical scope insert the
// DBG_VALUE at the previous index (otherwise it appears after the
@@ -1228,7 +1273,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
+ insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII,
+ TRI);
// This interval may span multiple basic blocks.
// Insert a DBG_VALUE into each one.
while (Stop > MBBEnd) {
@@ -1238,7 +1284,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
break;
MBBEnd = LIS.getMBBEndIdx(&*MBB);
LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
+ insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII,
+ TRI);
}
LLVM_DEBUG(dbgs() << '\n');
if (MBB == MFEnd)
@@ -1253,11 +1300,11 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
if (!MF)
return;
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- BitVector SpilledLocations;
+ SpillOffsetMap SpillOffsets;
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
LLVM_DEBUG(userValues[i]->print(dbgs(), TRI));
- userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations);
- userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations);
+ userValues[i]->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
}
EmitDone = true;
}
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index aa35880b063a..0060399c2b04 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -39,13 +39,6 @@ public:
LiveDebugVariables();
~LiveDebugVariables() override;
- /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
- /// @param OldReg Old virtual register that is going away.
- /// @param NewReg New register holding the user variables.
- /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
- /// register.
- void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
-
/// splitRegister - Move any user variables in OldReg to the live ranges in
/// NewRegs where they are live. Mark the values as unavailable where no new
/// register is live.
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 83dd982587c6..2340b6abd87c 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -1310,17 +1310,17 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
MachineOperand &MO = *RI;
MachineInstr *MI = RI->getParent();
++RI;
- // DBG_VALUE instructions don't have slot indexes, so get the index of the
- // instruction before them.
- // Normally, DBG_VALUE instructions are removed before this function is
- // called, but it is not a requirement.
- SlotIndex Idx;
- if (MI->isDebugValue())
- Idx = LIS.getSlotIndexes()->getIndexBefore(*MI);
- else
- Idx = LIS.getInstructionIndex(*MI);
- LiveQueryResult LRQ = LI.Query(Idx);
- const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ const VNInfo *VNI;
+ if (MI->isDebugValue()) {
+ // DBG_VALUE instructions don't have slot indexes, so get the index of
+ // the instruction before them. The value is defined there too.
+ SlotIndex Idx = LIS.getSlotIndexes()->getIndexBefore(*MI);
+ VNI = LI.Query(Idx).valueOut();
+ } else {
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
+ LiveQueryResult LRQ = LI.Query(Idx);
+ VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ }
// In the case of an <undef> use that isn't tied to any def, VNI will be
// NULL. If the use is tied to a def, VNI will be the defined value.
if (!VNI)
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index 86c6c8e29f9a..619643acb6d3 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -29,8 +29,8 @@ using namespace llvm;
/// The clobbers set will be the list of live registers clobbered
/// by the regmask.
void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
- SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers) {
- SparseSet<unsigned>::iterator LRI = LiveRegs.begin();
+ SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> *Clobbers) {
+ RegisterSet::iterator LRI = LiveRegs.begin();
while (LRI != LiveRegs.end()) {
if (MO.clobbersPhysReg(*LRI)) {
if (Clobbers)
@@ -83,7 +83,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
/// on accurate kill flags. If possible use stepBackward() instead of this
/// function.
void LivePhysRegs::stepForward(const MachineInstr &MI,
- SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
+ SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) {
// Remove killed registers from the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg() && !O->isDebug()) {
@@ -142,7 +142,7 @@ LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
#endif
bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
- unsigned Reg) const {
+ MCPhysReg Reg) const {
if (LiveRegs.count(Reg))
return false;
if (MRI.isReserved(Reg))
@@ -157,7 +157,7 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
/// Add live-in registers of basic block \p MBB to \p LiveRegs.
void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
for (const auto &LI : MBB.liveins()) {
- unsigned Reg = LI.PhysReg;
+ MCPhysReg Reg = LI.PhysReg;
LaneBitmask Mask = LI.LaneMask;
MCSubRegIndexIterator S(Reg, TRI);
assert(Mask.any() && "Invalid livein mask");
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 04324943dfad..70e135ab1aff 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -364,7 +364,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
#ifndef NDEBUG
if (MBB->pred_empty()) {
MBB->getParent()->verify();
- errs() << "Use of " << printReg(PhysReg)
+ errs() << "Use of " << printReg(PhysReg, MRI->getTargetRegisterInfo())
<< " does not have a corresponding definition on every path:\n";
const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
if (MI != nullptr)
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index f90ce0c8cd2a..795028e97929 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -328,7 +328,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Sort the frame references by local offset.
// Use frame index as a tie-breaker in case MI's have the same offset.
- llvm::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+ llvm::sort(FrameReferenceInsns);
MachineBasicBlock *Entry = &Fn.front();
diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp
index fa43d13b1b85..f17c23619ed5 100644
--- a/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -134,10 +134,10 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions,
StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
}
- llvm::sort(StringInstrMap.begin(), StringInstrMap.end(),
- [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
- return (a.first < b.first);
- });
+ llvm::sort(StringInstrMap,
+ [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
+ return (a.first < b.first);
+ });
for (auto &II : StringInstrMap) {
@@ -677,8 +677,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
std::vector<MachineInstr *> VisitedMIs;
- std::copy(Candidates.begin(), Candidates.end(),
- std::back_inserter(VisitedMIs));
+ llvm::copy(Candidates, std::back_inserter(VisitedMIs));
std::vector<TypedVReg> VRegs;
for (auto candidate : Candidates) {
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index da05c9a22785..265877c2f5b4 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -202,6 +202,9 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("contract", MIToken::kw_contract)
.Case("afn", MIToken::kw_afn)
.Case("reassoc", MIToken::kw_reassoc)
+ .Case("nuw" , MIToken::kw_nuw)
+ .Case("nsw" , MIToken::kw_nsw)
+ .Case("exact" , MIToken::kw_exact)
.Case("debug-location", MIToken::kw_debug_location)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
@@ -217,6 +220,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("undefined", MIToken::kw_cfi_undefined)
.Case("register", MIToken::kw_cfi_register)
.Case("window_save", MIToken::kw_cfi_window_save)
+ .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state)
.Case("blockaddress", MIToken::kw_blockaddress)
.Case("intrinsic", MIToken::kw_intrinsic)
.Case("target-index", MIToken::kw_target_index)
@@ -245,6 +249,9 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("successors", MIToken::kw_successors)
.Case("floatpred", MIToken::kw_floatpred)
.Case("intpred", MIToken::kw_intpred)
+ .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
+ .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
+ .Case("unknown-size", MIToken::kw_unknown_size)
.Default(MIToken::Identifier);
}
@@ -460,6 +467,53 @@ static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
ErrorCallback);
}
+static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "<mcsymbol ";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ auto Start = C;
+ C.advance(Rule.size());
+
+ // Try a simple unquoted name.
+ if (C.peek() != '"') {
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ StringRef String = Start.upto(C).drop_front(Rule.size());
+ if (C.peek() != '>') {
+ ErrorCallback(C.location(),
+ "expected the '<mcsymbol ...' to be closed by a '>'");
+ Token.reset(MIToken::Error, Start.remaining());
+ return Start;
+ }
+ C.advance();
+
+ Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String);
+ return C;
+ }
+
+ // Otherwise lex out a quoted name.
+ Cursor R = lexStringConstant(C, ErrorCallback);
+ if (!R) {
+ ErrorCallback(C.location(),
+ "unable to parse quoted string from opening quote");
+ Token.reset(MIToken::Error, Start.remaining());
+ return Start;
+ }
+ StringRef String = Start.upto(R).drop_front(Rule.size());
+ if (R.peek() != '>') {
+ ErrorCallback(R.location(),
+ "expected the '<mcsymbol ...' to be closed by a '>'");
+ Token.reset(MIToken::Error, Start.remaining());
+ return Start;
+ }
+ R.advance();
+
+ Token.reset(MIToken::MCSymbol, Start.upto(R))
+ .setOwnedStringValue(unescapeQuotedString(String));
+ return R;
+}
+
static bool isValidHexFloatingPointPrefix(char C) {
return C == 'H' || C == 'K' || C == 'L' || C == 'M';
}
@@ -523,6 +577,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
.Case("!noalias", MIToken::md_noalias)
.Case("!range", MIToken::md_range)
.Case("!DIExpression", MIToken::md_diexpr)
+ .Case("!DILocation", MIToken::md_dilocation)
.Default(MIToken::Error);
}
@@ -657,6 +712,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return R.remaining();
if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
return R.remaining();
+ if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback))
+ return R.remaining();
if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
return R.remaining();
if (Cursor R = maybeLexNumericalLiteral(C, Token))
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index e21c71532f79..ceff79087d81 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -71,6 +71,9 @@ struct MIToken {
kw_contract,
kw_afn,
kw_reassoc,
+ kw_nuw,
+ kw_nsw,
+ kw_exact,
kw_debug_location,
kw_cfi_same_value,
kw_cfi_offset,
@@ -86,6 +89,7 @@ struct MIToken {
kw_cfi_restore_state,
kw_cfi_undefined,
kw_cfi_window_save,
+ kw_cfi_aarch64_negate_ra_sign_state,
kw_blockaddress,
kw_intrinsic,
kw_target_index,
@@ -113,6 +117,9 @@ struct MIToken {
kw_successors,
kw_floatpred,
kw_intpred,
+ kw_pre_instr_symbol,
+ kw_post_instr_symbol,
+ kw_unknown_size,
// Named metadata keywords
md_tbaa,
@@ -120,6 +127,7 @@ struct MIToken {
md_noalias,
md_range,
md_diexpr,
+ md_dilocation,
// Identifier tokens
Identifier,
@@ -132,6 +140,7 @@ struct MIToken {
NamedGlobalValue,
GlobalValue,
ExternalSymbol,
+ MCSymbol,
// Other tokens
IntegerLiteral,
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index a61e7872f1ae..6f2d8bb53ac8 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/MIRPrinter.h"
@@ -54,6 +55,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -221,8 +223,10 @@ public:
bool parseSubRegisterIndexOperand(MachineOperand &Dest);
bool parseJumpTableIndexOperand(MachineOperand &Dest);
bool parseExternalSymbolOperand(MachineOperand &Dest);
+ bool parseMCSymbolOperand(MachineOperand &Dest);
bool parseMDNode(MDNode *&Node);
bool parseDIExpression(MDNode *&Expr);
+ bool parseDILocation(MDNode *&Expr);
bool parseMetadataOperand(MachineOperand &Dest);
bool parseCFIOffset(int &Offset);
bool parseCFIRegister(unsigned &Reg);
@@ -250,6 +254,7 @@ public:
bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID);
bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
+ bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
private:
/// Convert the integer literal in the current token into an unsigned integer.
@@ -346,6 +351,9 @@ private:
/// Return true if the name isn't a name of a target MMO flag.
bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
+ /// Get or create an MCSymbol for a given name.
+ MCSymbol *getOrCreateMCSymbol(StringRef Name);
+
/// parseStringConstant
/// ::= StringConstant
bool parseStringConstant(std::string &Result);
@@ -737,12 +745,16 @@ bool MIParser::parse(MachineInstr *&MI) {
return true;
// Parse the remaining machine operands.
- while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
+ while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) &&
+ Token.isNot(MIToken::kw_post_instr_symbol) &&
+ Token.isNot(MIToken::kw_debug_location) &&
Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
auto Loc = Token.location();
Optional<unsigned> TiedDefIdx;
if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
return true;
+ if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg())
+ MO.setIsDebug();
Operands.push_back(
ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
@@ -753,14 +765,29 @@ bool MIParser::parse(MachineInstr *&MI) {
lex();
}
+ MCSymbol *PreInstrSymbol = nullptr;
+ if (Token.is(MIToken::kw_pre_instr_symbol))
+ if (parsePreOrPostInstrSymbol(PreInstrSymbol))
+ return true;
+ MCSymbol *PostInstrSymbol = nullptr;
+ if (Token.is(MIToken::kw_post_instr_symbol))
+ if (parsePreOrPostInstrSymbol(PostInstrSymbol))
+ return true;
+
DebugLoc DebugLocation;
if (Token.is(MIToken::kw_debug_location)) {
lex();
- if (Token.isNot(MIToken::exclaim))
- return error("expected a metadata node after 'debug-location'");
MDNode *Node = nullptr;
- if (parseMDNode(Node))
- return true;
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(Node))
+ return true;
+ } else if (Token.is(MIToken::md_dilocation)) {
+ if (parseDILocation(Node))
+ return true;
+ } else
+ return error("expected a metadata node after 'debug-location'");
+ if (!isa<DILocation>(Node))
+ return error("referenced metadata is not a DILocation");
DebugLocation = DebugLoc(Node);
}
@@ -795,12 +822,12 @@ bool MIParser::parse(MachineInstr *&MI) {
MI->addOperand(MF, Operand.Operand);
if (assignRegisterTies(*MI, Operands))
return true;
- if (MemOperands.empty())
- return false;
- MachineInstr::mmo_iterator MemRefs =
- MF.allocateMemRefsArray(MemOperands.size());
- std::copy(MemOperands.begin(), MemOperands.end(), MemRefs);
- MI->setMemRefs(MemRefs, MemRefs + MemOperands.size());
+ if (PreInstrSymbol)
+ MI->setPreInstrSymbol(MF, PreInstrSymbol);
+ if (PostInstrSymbol)
+ MI->setPostInstrSymbol(MF, PostInstrSymbol);
+ if (!MemOperands.empty())
+ MI->setMemRefs(MF, MemOperands);
return false;
}
@@ -876,6 +903,9 @@ bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
} else if (Token.is(MIToken::md_diexpr)) {
if (parseDIExpression(Node))
return true;
+ } else if (Token.is(MIToken::md_dilocation)) {
+ if (parseDILocation(Node))
+ return true;
} else
return error("expected a metadata node");
if (Token.isNot(MIToken::Eof))
@@ -945,7 +975,10 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_arcp) ||
Token.is(MIToken::kw_contract) ||
Token.is(MIToken::kw_afn) ||
- Token.is(MIToken::kw_reassoc)) {
+ Token.is(MIToken::kw_reassoc) ||
+ Token.is(MIToken::kw_nuw) ||
+ Token.is(MIToken::kw_nsw) ||
+ Token.is(MIToken::kw_exact)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
@@ -965,6 +998,12 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::FmAfn;
if (Token.is(MIToken::kw_reassoc))
Flags |= MachineInstr::FmReassoc;
+ if (Token.is(MIToken::kw_nuw))
+ Flags |= MachineInstr::NoUWrap;
+ if (Token.is(MIToken::kw_nsw))
+ Flags |= MachineInstr::NoSWrap;
+ if (Token.is(MIToken::kw_exact))
+ Flags |= MachineInstr::IsExact;
lex();
}
@@ -1573,6 +1612,16 @@ bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::MCSymbol));
+ MCSymbol *Symbol = getOrCreateMCSymbol(Token.stringValue());
+ lex();
+ Dest = MachineOperand::CreateMCSymbol(Symbol);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::SubRegisterIndex));
StringRef Name = Token.stringValue();
@@ -1643,6 +1692,109 @@ bool MIParser::parseDIExpression(MDNode *&Expr) {
return false;
}
+bool MIParser::parseDILocation(MDNode *&Loc) {
+ assert(Token.is(MIToken::md_dilocation));
+ lex();
+
+ bool HaveLine = false;
+ unsigned Line = 0;
+ unsigned Column = 0;
+ MDNode *Scope = nullptr;
+ MDNode *InlinedAt = nullptr;
+ bool ImplicitCode = false;
+
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+
+ if (Token.isNot(MIToken::rparen)) {
+ do {
+ if (Token.is(MIToken::Identifier)) {
+ if (Token.stringValue() == "line") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNot(MIToken::IntegerLiteral) ||
+ Token.integerValue().isSigned())
+ return error("expected unsigned integer");
+ Line = Token.integerValue().getZExtValue();
+ HaveLine = true;
+ lex();
+ continue;
+ }
+ if (Token.stringValue() == "column") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNot(MIToken::IntegerLiteral) ||
+ Token.integerValue().isSigned())
+ return error("expected unsigned integer");
+ Column = Token.integerValue().getZExtValue();
+ lex();
+ continue;
+ }
+ if (Token.stringValue() == "scope") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (parseMDNode(Scope))
+ return error("expected metadata node");
+ if (!isa<DIScope>(Scope))
+ return error("expected DIScope node");
+ continue;
+ }
+ if (Token.stringValue() == "inlinedAt") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(InlinedAt))
+ return true;
+ } else if (Token.is(MIToken::md_dilocation)) {
+ if (parseDILocation(InlinedAt))
+ return true;
+ } else
+ return error("expected metadata node");
+ if (!isa<DILocation>(InlinedAt))
+ return error("expected DILocation node");
+ continue;
+ }
+ if (Token.stringValue() == "isImplicitCode") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (!Token.is(MIToken::Identifier))
+ return error("expected true/false");
+ // As far as I can see, we don't have any existing need for parsing
+ // true/false in MIR yet. Do it ad-hoc until there's something else
+ // that needs it.
+ if (Token.stringValue() == "true")
+ ImplicitCode = true;
+ else if (Token.stringValue() == "false")
+ ImplicitCode = false;
+ else
+ return error("expected true/false");
+ lex();
+ continue;
+ }
+ }
+ return error(Twine("invalid DILocation argument '") +
+ Token.stringValue() + "'");
+ } while (consumeIfPresent(MIToken::comma));
+ }
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ if (!HaveLine)
+ return error("DILocation requires line number");
+ if (!Scope)
+ return error("DILocation requires a scope");
+
+ Loc = DILocation::get(MF.getFunction().getContext(), Line, Column, Scope,
+ InlinedAt, ImplicitCode);
+ return false;
+}
+
bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
MDNode *Node = nullptr;
if (Token.is(MIToken::exclaim)) {
@@ -1779,6 +1931,9 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
case MIToken::kw_cfi_window_save:
CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
break;
+ case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ break;
case MIToken::kw_cfi_escape: {
std::string Values;
if (parseCFIEscapeValues(Values))
@@ -2050,6 +2205,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
return parseJumpTableIndexOperand(Dest);
case MIToken::ExternalSymbol:
return parseExternalSymbolOperand(Dest);
+ case MIToken::MCSymbol:
+ return parseMCSymbolOperand(Dest);
case MIToken::SubRegisterIndex:
return parseSubRegisterIndexOperand(Dest);
case MIToken::md_diexpr:
@@ -2069,6 +2226,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
case MIToken::kw_cfi_restore_state:
case MIToken::kw_cfi_undefined:
case MIToken::kw_cfi_window_save:
+ case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
return parseCFIOperand(Dest);
case MIToken::kw_blockaddress:
return parseBlockAddressOperand(Dest);
@@ -2423,7 +2581,7 @@ bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) {
return false;
}
- return error("expected an atomic scope, ordering or a size integer literal");
+ return error("expected an atomic scope, ordering or a size specification");
}
bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
@@ -2462,11 +2620,17 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseOptionalAtomicOrdering(FailureOrder))
return true;
- if (Token.isNot(MIToken::IntegerLiteral))
- return error("expected the size integer literal after memory operation");
+ if (Token.isNot(MIToken::IntegerLiteral) &&
+ Token.isNot(MIToken::kw_unknown_size))
+ return error("expected the size integer literal or 'unknown-size' after "
+ "memory operation");
uint64_t Size;
- if (getUint64(Size))
- return true;
+ if (Token.is(MIToken::IntegerLiteral)) {
+ if (getUint64(Size))
+ return true;
+ } else if (Token.is(MIToken::kw_unknown_size)) {
+ Size = MemoryLocation::UnknownSize;
+ }
lex();
MachinePointerInfo Ptr = MachinePointerInfo();
@@ -2483,7 +2647,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseMachinePointerInfo(Ptr))
return true;
}
- unsigned BaseAlignment = Size;
+ unsigned BaseAlignment = (Size != MemoryLocation::UnknownSize ? Size : 1);
AAMDNodes AAInfo;
MDNode *Range = nullptr;
while (consumeIfPresent(MIToken::comma)) {
@@ -2529,6 +2693,24 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
return false;
}
+bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
+ assert((Token.is(MIToken::kw_pre_instr_symbol) ||
+ Token.is(MIToken::kw_post_instr_symbol)) &&
+ "Invalid token for a pre- post-instruction symbol!");
+ lex();
+ if (Token.isNot(MIToken::MCSymbol))
+ return error("expected a symbol after 'pre-instr-symbol'");
+ Symbol = getOrCreateMCSymbol(Token.stringValue());
+ lex();
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ return false;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ return false;
+}
+
void MIParser::initNames2InstrOpCodes() {
if (!Names2InstrOpCodes.empty())
return;
@@ -2759,6 +2941,15 @@ bool MIParser::getMMOTargetFlag(StringRef Name,
return false;
}
+MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
+ // FIXME: Currently we can't recognize temporary or local symbols and call all
+ // of the appropriate forms to create them. However, this handles basic cases
+ // well as most of the special aspects are recognized by a prefix on their
+ // name, and the input names should already be unique. For test cases, keeping
+ // the symbol name out of the symbol table isn't terribly important.
+ return MF.getContext().getOrCreateSymbol(Name);
+}
+
bool MIParser::parseStringConstant(std::string &Result) {
if (Token.isNot(MIToken::StringConstant))
return error("expected string constant");
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 3d2db97acb48..00da92a92ec6 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -355,6 +355,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (YamlMF.Alignment)
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
+ MF.setHasWinCFI(YamlMF.HasWinCFI);
if (YamlMF.Legalized)
MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
@@ -580,6 +581,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
MFI.setHasCalls(YamlMFI.HasCalls);
if (YamlMFI.MaxCallFrameSize != ~0u)
MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
+ MFI.setCVBytesOfCalleeSavedRegisters(YamlMFI.CVBytesOfCalleeSavedRegisters);
MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
MFI.setHasVAStart(YamlMFI.HasVAStart);
MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index bf8cd1489ec5..d9dcc428943f 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -50,6 +50,7 @@
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -195,6 +196,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.Name = MF.getName();
YamlMF.Alignment = MF.getAlignment();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
+ YamlMF.HasWinCFI = MF.hasWinCFI();
YamlMF.Legalized = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Legalized);
@@ -327,6 +329,8 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
YamlMFI.HasCalls = MFI.hasCalls();
YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed()
? MFI.getMaxCallFrameSize() : ~0u;
+ YamlMFI.CVBytesOfCalleeSavedRegisters =
+ MFI.getCVBytesOfCalleeSavedRegisters();
YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
YamlMFI.HasVAStart = MFI.hasVAStart();
YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
@@ -397,18 +401,20 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
yaml::StringValue Reg;
printRegMIR(CSInfo.getReg(), Reg, TRI);
- auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
- assert(StackObjectInfo != StackObjectOperandMapping.end() &&
- "Invalid stack object index");
- const FrameIndexOperand &StackObject = StackObjectInfo->second;
- if (StackObject.IsFixed) {
- YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
- YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
- CSInfo.isRestored();
- } else {
- YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
- YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
- CSInfo.isRestored();
+ if (!CSInfo.isSpilledToReg()) {
+ auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ if (StackObject.IsFixed) {
+ YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
+ CSInfo.isRestored();
+ } else {
+ YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
+ CSInfo.isRestored();
+ }
}
}
for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
@@ -694,6 +700,12 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "afn ";
if (MI.getFlag(MachineInstr::FmReassoc))
OS << "reassoc ";
+ if (MI.getFlag(MachineInstr::NoUWrap))
+ OS << "nuw ";
+ if (MI.getFlag(MachineInstr::NoSWrap))
+ OS << "nsw ";
+ if (MI.getFlag(MachineInstr::IsExact))
+ OS << "exact ";
OS << TII->getName(MI.getOpcode());
if (I < E)
@@ -708,6 +720,23 @@ void MIPrinter::print(const MachineInstr &MI) {
NeedComma = true;
}
+ // Print any optional symbols attached to this instruction as-if they were
+ // operands.
+ if (MCSymbol *PreInstrSymbol = MI.getPreInstrSymbol()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " pre-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PreInstrSymbol);
+ NeedComma = true;
+ }
+ if (MCSymbol *PostInstrSymbol = MI.getPostInstrSymbol()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " post-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PostInstrSymbol);
+ NeedComma = true;
+ }
+
if (const DebugLoc &DL = MI.getDebugLoc()) {
if (NeedComma)
OS << ',';
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 38e8369dc739..03771bc5dae1 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -110,6 +110,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
// use/def lists.
MachineFunction *MF = Parent->getParent();
N->AddRegOperandsToUseLists(MF->getRegInfo());
+ MF->handleInsertion(*N);
}
/// When we remove an instruction from a basic block list, we update its parent
@@ -118,8 +119,10 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
assert(N->getParent() && "machine instruction not in a basic block");
// Remove from the use/def lists.
- if (MachineFunction *MF = N->getMF())
+ if (MachineFunction *MF = N->getMF()) {
+ MF->handleRemoval(*N);
N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
+ }
N->setParent(nullptr);
}
@@ -359,7 +362,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Print human readable probabilities as comments.
OS << "; ";
for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
- const BranchProbability &BP = *getProbabilityIterator(I);
+ const BranchProbability &BP = getSuccProbability(I);
if (I != succ_begin())
OS << ", ";
OS << printMBBReference(**I) << '('
@@ -458,7 +461,7 @@ bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
}
void MachineBasicBlock::sortUniqueLiveIns() {
- llvm::sort(LiveIns.begin(), LiveIns.end(),
+ llvm::sort(LiveIns,
[](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
return LI0.PhysReg < LI1.PhysReg;
});
@@ -1375,13 +1378,53 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
unsigned Neighborhood) const {
unsigned N = Neighborhood;
- // Start by searching backwards from Before, looking for kills, reads or defs.
+ // Try searching forwards from Before, looking for reads or defs.
const_iterator I(Before);
+ for (; I != end() && N > 0; ++I) {
+ if (I->isDebugInstr())
+ continue;
+
+ --N;
+
+ MachineOperandIteratorBase::PhysRegInfo Info =
+ ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+
+ // Register is live when we read it here.
+ if (Info.Read)
+ return LQR_Live;
+ // Register is dead if we can fully overwrite or clobber it here.
+ if (Info.FullyDefined || Info.Clobbered)
+ return LQR_Dead;
+ }
+
+ // If we reached the end, it is safe to clobber Reg at the end of a block of
+ // no successor has it live in.
+ if (I == end()) {
+ for (MachineBasicBlock *S : successors()) {
+ for (const MachineBasicBlock::RegisterMaskPair &LI : S->liveins()) {
+ if (TRI->regsOverlap(LI.PhysReg, Reg))
+ return LQR_Live;
+ }
+ }
+
+ return LQR_Dead;
+ }
+
+
+ N = Neighborhood;
+
+ // Start by searching backwards from Before, looking for kills, reads or defs.
+ I = const_iterator(Before);
// If this is the first insn in the block, don't search backwards.
if (I != begin()) {
do {
--I;
+ if (I->isDebugInstr())
+ continue;
+
+ --N;
+
MachineOperandIteratorBase::PhysRegInfo Info =
ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
@@ -1406,39 +1449,20 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
// Register must be live if we read it.
if (Info.Read)
return LQR_Live;
- } while (I != begin() && --N > 0);
+
+ } while (I != begin() && N > 0);
}
// Did we get to the start of the block?
if (I == begin()) {
// If so, the register's state is definitely defined by the live-in state.
- for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid();
- ++RAI)
- if (isLiveIn(*RAI))
+ for (const MachineBasicBlock::RegisterMaskPair &LI : liveins())
+ if (TRI->regsOverlap(LI.PhysReg, Reg))
return LQR_Live;
return LQR_Dead;
}
- N = Neighborhood;
-
- // Try searching forwards from Before, looking for reads or defs.
- I = const_iterator(Before);
- // If this is the last insn in the block, don't search forwards.
- if (I != end()) {
- for (++I; I != end() && N > 0; ++I, --N) {
- MachineOperandIteratorBase::PhysRegInfo Info =
- ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
-
- // Register is live when we read it here.
- if (Info.Read)
- return LQR_Live;
- // Register is dead if we can fully overwrite or clobber it here.
- if (Info.FullyDefined || Info.Clobbered)
- return LQR_Dead;
- }
- }
-
// At this point we have no idea of the liveness of the register.
return LQR_Unknown;
}
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 21350df624e7..4fee9c4ea027 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -316,7 +316,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// A type for a block filter set.
using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
- /// Pair struct containing basic block and taildup profitiability
+ /// Pair struct containing basic block and taildup profitability
struct BlockAndTailDupResult {
MachineBasicBlock *BB;
bool ShouldTailDup;
@@ -2497,7 +2497,8 @@ void MachineBlockPlacement::alignBlocks() {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F->getFunction().optForSize())
+ if (F->getFunction().optForMinSize() ||
+ (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize()))
return;
BlockChain &FunctionChain = *BlockToChain[&F->front()];
if (FunctionChain.begin() == FunctionChain.end())
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 6c92b1d426d6..6ee8571c28aa 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -180,6 +180,10 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
continue;
LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
LLVM_DEBUG(dbgs() << "*** to: " << *MI);
+
+ // Update matching debug values.
+ DefMI->changeDebugValuesDefReg(SrcReg);
+
// Propagate SrcReg of copies to MI.
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
@@ -231,6 +235,21 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
return false;
}
+static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
+ const MachineFunction &MF,
+ const TargetRegisterInfo &TRI) {
+ // MachineRegisterInfo::isConstantPhysReg directly called by
+ // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
+ // reserved registers to be frozen. That doesn't cause a problem post-ISel as
+ // most (if not all) targets freeze reserved registers right after ISel.
+ //
+ // It does cause issues mid-GlobalISel, however, hence the additional
+ // reservedRegsFrozen check.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return TRI.isCallerPreservedPhysReg(Reg, MF) ||
+ (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
+}
+
/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
/// physical registers (except for dead defs of physical registers). It also
/// returns the physical register def by reference if it's the only one and the
@@ -250,7 +269,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
// Reading either caller preserved or constant physregs is ok.
- if (!MRI->isCallerPreservedOrConstPhysReg(Reg))
+ if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
}
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index 0c6efff7bb40..f51b482e20e3 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -231,6 +231,8 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
// Get the first instruction that uses MO
MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
RI++;
+ if (RI == MRI->reg_end())
+ continue;
MachineInstr *UseMO = RI->getParent();
unsigned LatencyOp = 0;
if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 3bf8147a06c3..19879fe89007 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -74,58 +74,154 @@ DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
namespace {
-using RegList = SmallVector<unsigned, 4>;
-using SourceMap = DenseMap<unsigned, RegList>;
-using Reg2MIMap = DenseMap<unsigned, MachineInstr *>;
-
- class MachineCopyPropagation : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- const MachineRegisterInfo *MRI;
-
- public:
- static char ID; // Pass identification, replacement for typeid
+class CopyTracker {
+ struct CopyInfo {
+ MachineInstr *MI;
+ SmallVector<unsigned, 4> DefRegs;
+ bool Avail;
+ };
- MachineCopyPropagation() : MachineFunctionPass(ID) {
- initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ DenseMap<unsigned, CopyInfo> Copies;
+
+public:
+ /// Mark all of the given registers and their subregisters as unavailable for
+ /// copying.
+ void markRegsUnavailable(ArrayRef<unsigned> Regs,
+ const TargetRegisterInfo &TRI) {
+ for (unsigned Reg : Regs) {
+ // Source of copy is no longer available for propagation.
+ for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+ auto CI = Copies.find(*RUI);
+ if (CI != Copies.end())
+ CI->second.Avail = false;
+ }
}
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
+ /// Clobber a single register, removing it from the tracker's copy maps.
+ void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+ for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+ auto I = Copies.find(*RUI);
+ if (I != Copies.end()) {
+ // When we clobber the source of a copy, we need to clobber everything
+ // it defined.
+ markRegsUnavailable(I->second.DefRegs, TRI);
+ // When we clobber the destination of a copy, we need to clobber the
+ // whole register it defined.
+ if (MachineInstr *MI = I->second.MI)
+ markRegsUnavailable({MI->getOperand(0).getReg()}, TRI);
+ // Now we can erase the copy.
+ Copies.erase(I);
+ }
}
+ }
+
+ /// Add this copy's registers into the tracker's copy maps.
+ void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) {
+ assert(MI->isCopy() && "Tracking non-copy?");
+
+ unsigned Def = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
- bool runOnMachineFunction(MachineFunction &MF) override;
+ // Remember Def is defined by the copy.
+ for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
+ Copies[*RUI] = {MI, {}, true};
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
+ // Remember source that's copied to Def. Once it's clobbered, then
+ // it's no longer available for copy propagation.
+ for (MCRegUnitIterator RUI(Src, &TRI); RUI.isValid(); ++RUI) {
+ auto I = Copies.insert({*RUI, {nullptr, {}, false}});
+ auto &Copy = I.first->second;
+ if (!is_contained(Copy.DefRegs, Def))
+ Copy.DefRegs.push_back(Def);
}
+ }
+
+ bool hasAnyCopies() {
+ return !Copies.empty();
+ }
- private:
- void ClobberRegister(unsigned Reg);
- void ReadRegister(unsigned Reg);
- void CopyPropagateBlock(MachineBasicBlock &MBB);
- bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
- void forwardUses(MachineInstr &MI);
- bool isForwardableRegClassCopy(const MachineInstr &Copy,
- const MachineInstr &UseI, unsigned UseIdx);
- bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
+ MachineInstr *findCopyForUnit(unsigned RegUnit, const TargetRegisterInfo &TRI,
+ bool MustBeAvailable = false) {
+ auto CI = Copies.find(RegUnit);
+ if (CI == Copies.end())
+ return nullptr;
+ if (MustBeAvailable && !CI->second.Avail)
+ return nullptr;
+ return CI->second.MI;
+ }
- /// Candidates for deletion.
- SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
+ MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
+ const TargetRegisterInfo &TRI) {
+ // We check the first RegUnit here, since we'll only be interested in the
+ // copy if it copies the entire register anyway.
+ MCRegUnitIterator RUI(Reg, &TRI);
+ MachineInstr *AvailCopy =
+ findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true);
+ if (!AvailCopy ||
+ !TRI.isSubRegisterEq(AvailCopy->getOperand(0).getReg(), Reg))
+ return nullptr;
+
+ // Check that the available copy isn't clobbered by any regmasks between
+ // itself and the destination.
+ unsigned AvailSrc = AvailCopy->getOperand(1).getReg();
+ unsigned AvailDef = AvailCopy->getOperand(0).getReg();
+ for (const MachineInstr &MI :
+ make_range(AvailCopy->getIterator(), DestCopy.getIterator()))
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isRegMask())
+ if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
+ return nullptr;
+
+ return AvailCopy;
+ }
- /// Def -> available copies map.
- Reg2MIMap AvailCopyMap;
+ void clear() {
+ Copies.clear();
+ }
+};
- /// Def -> copies map.
- Reg2MIMap CopyMap;
+class MachineCopyPropagation : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ const MachineRegisterInfo *MRI;
- /// Src -> Def map
- SourceMap SrcMap;
+public:
+ static char ID; // Pass identification, replacement for typeid
- bool Changed;
- };
+ MachineCopyPropagation() : MachineFunctionPass(ID) {
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ void ClobberRegister(unsigned Reg);
+ void ReadRegister(unsigned Reg);
+ void CopyPropagateBlock(MachineBasicBlock &MBB);
+ bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
+ void forwardUses(MachineInstr &MI);
+ bool isForwardableRegClassCopy(const MachineInstr &Copy,
+ const MachineInstr &UseI, unsigned UseIdx);
+ bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
+
+ /// Candidates for deletion.
+ SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
+
+ CopyTracker Tracker;
+
+ bool Changed;
+};
} // end anonymous namespace
@@ -136,54 +232,13 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
"Machine Copy Propagation Pass", false, false)
-/// Remove any entry in \p Map where the register is a subregister or equal to
-/// a register contained in \p Regs.
-static void removeRegsFromMap(Reg2MIMap &Map, const RegList &Regs,
- const TargetRegisterInfo &TRI) {
- for (unsigned Reg : Regs) {
- // Source of copy is no longer available for propagation.
- for (MCSubRegIterator SR(Reg, &TRI, true); SR.isValid(); ++SR)
- Map.erase(*SR);
- }
-}
-
-/// Remove any entry in \p Map that is marked clobbered in \p RegMask.
-/// The map will typically have a lot fewer entries than the regmask clobbers,
-/// so this is more efficient than iterating the clobbered registers and calling
-/// ClobberRegister() on them.
-static void removeClobberedRegsFromMap(Reg2MIMap &Map,
- const MachineOperand &RegMask) {
- for (Reg2MIMap::iterator I = Map.begin(), E = Map.end(), Next; I != E;
- I = Next) {
- Next = std::next(I);
- unsigned Reg = I->first;
- if (RegMask.clobbersPhysReg(Reg))
- Map.erase(I);
- }
-}
-
-void MachineCopyPropagation::ClobberRegister(unsigned Reg) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- CopyMap.erase(*AI);
- AvailCopyMap.erase(*AI);
-
- SourceMap::iterator SI = SrcMap.find(*AI);
- if (SI != SrcMap.end()) {
- removeRegsFromMap(AvailCopyMap, SI->second, *TRI);
- SrcMap.erase(SI);
- }
- }
-}
-
void MachineCopyPropagation::ReadRegister(unsigned Reg) {
// If 'Reg' is defined by a copy, the copy is no longer a candidate
// for elimination.
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- Reg2MIMap::iterator CI = CopyMap.find(*AI);
- if (CI != CopyMap.end()) {
- LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: ";
- CI->second->dump());
- MaybeDeadCopies.remove(CI->second);
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
+ if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
+ MaybeDeadCopies.remove(Copy);
}
}
}
@@ -219,15 +274,14 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
return false;
// Search for an existing copy.
- Reg2MIMap::iterator CI = AvailCopyMap.find(Def);
- if (CI == AvailCopyMap.end())
+ MachineInstr *PrevCopy = Tracker.findAvailCopy(Copy, Def, *TRI);
+ if (!PrevCopy)
return false;
// Check that the existing copy uses the correct sub registers.
- MachineInstr &PrevCopy = *CI->second;
- if (PrevCopy.getOperand(0).isDead())
+ if (PrevCopy->getOperand(0).isDead())
return false;
- if (!isNopCopy(PrevCopy, Src, Def, TRI))
+ if (!isNopCopy(*PrevCopy, Src, Def, TRI))
return false;
LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
@@ -238,7 +292,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
unsigned CopyDef = Copy.getOperand(0).getReg();
assert(CopyDef == Src || CopyDef == Def);
for (MachineInstr &MI :
- make_range(PrevCopy.getIterator(), Copy.getIterator()))
+ make_range(PrevCopy->getIterator(), Copy.getIterator()))
MI.clearRegisterKills(CopyDef, TRI);
Copy.eraseFromParent();
@@ -314,7 +368,7 @@ bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
/// Look for available copies whose destination register is used by \p MI and
/// replace the use in \p MI with the copy's source register.
void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
- if (AvailCopyMap.empty())
+ if (!Tracker.hasAnyCopies())
return;
// Look for non-tied explicit vreg uses that have an active COPY
@@ -341,13 +395,12 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (!MOUse.isRenamable())
continue;
- auto CI = AvailCopyMap.find(MOUse.getReg());
- if (CI == AvailCopyMap.end())
+ MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg(), *TRI);
+ if (!Copy)
continue;
- MachineInstr &Copy = *CI->second;
- unsigned CopyDstReg = Copy.getOperand(0).getReg();
- const MachineOperand &CopySrc = Copy.getOperand(1);
+ unsigned CopyDstReg = Copy->getOperand(0).getReg();
+ const MachineOperand &CopySrc = Copy->getOperand(1);
unsigned CopySrcReg = CopySrc.getReg();
// FIXME: Don't handle partial uses of wider COPYs yet.
@@ -362,7 +415,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg))
continue;
- if (!isForwardableRegClassCopy(Copy, MI, OpIdx))
+ if (!isForwardableRegClassCopy(*Copy, MI, OpIdx))
continue;
if (hasImplicitOverlap(MI, MOUse))
@@ -376,7 +429,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
<< "\n with " << printReg(CopySrcReg, TRI)
- << "\n in " << MI << " from " << Copy);
+ << "\n in " << MI << " from " << *Copy);
MOUse.setReg(CopySrcReg);
if (!CopySrc.isRenamable())
@@ -386,7 +439,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
// Clear kill markers that may have been invalidated.
for (MachineInstr &KMI :
- make_range(Copy.getIterator(), std::next(MI.getIterator())))
+ make_range(Copy->getIterator(), std::next(MI.getIterator())))
KMI.clearRegisterKills(CopySrcReg, TRI);
++NumCopyForwards;
@@ -459,28 +512,17 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// %xmm2 = copy %xmm0
// ...
// %xmm2 = copy %xmm9
- ClobberRegister(Def);
+ Tracker.clobberRegister(Def, *TRI);
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- ClobberRegister(Reg);
+ Tracker.clobberRegister(Reg, *TRI);
}
- // Remember Def is defined by the copy.
- for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid();
- ++SR) {
- CopyMap[*SR] = MI;
- AvailCopyMap[*SR] = MI;
- }
-
- // Remember source that's copied to Def. Once it's clobbered, then
- // it's no longer available for copy propagation.
- RegList &DestList = SrcMap[Src];
- if (!is_contained(DestList, Def))
- DestList.push_back(Def);
+ Tracker.trackCopy(MI, *TRI);
continue;
}
@@ -494,7 +536,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// later.
if (MO.isTied())
ReadRegister(Reg);
- ClobberRegister(Reg);
+ Tracker.clobberRegister(Reg, *TRI);
}
forwardUses(*MI);
@@ -541,6 +583,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
MaybeDead->dump());
+ // Make sure we invalidate any entries in the copy maps before erasing
+ // the instruction.
+ Tracker.clobberRegister(Reg, *TRI);
+
// erase() will return the next valid iterator pointing to the next
// element after the erased one.
DI = MaybeDeadCopies.erase(DI);
@@ -548,22 +594,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
Changed = true;
++NumDeletes;
}
-
- removeClobberedRegsFromMap(AvailCopyMap, *RegMask);
- removeClobberedRegsFromMap(CopyMap, *RegMask);
- for (SourceMap::iterator I = SrcMap.begin(), E = SrcMap.end(), Next;
- I != E; I = Next) {
- Next = std::next(I);
- if (RegMask->clobbersPhysReg(I->first)) {
- removeRegsFromMap(AvailCopyMap, I->second, *TRI);
- SrcMap.erase(I);
- }
- }
}
// Any previous copy definition or reading the Defs is no longer available.
for (unsigned Reg : Defs)
- ClobberRegister(Reg);
+ Tracker.clobberRegister(Reg, *TRI);
}
// If MBB doesn't have successors, delete the copies whose defs are not used.
@@ -574,6 +609,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
MaybeDead->dump());
assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
+
+ // Update matching debug values.
+ assert(MaybeDead->isCopy());
+ MaybeDead->changeDebugValuesDefReg(MaybeDead->getOperand(1).getReg());
+
MaybeDead->eraseFromParent();
Changed = true;
++NumDeletes;
@@ -581,9 +621,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
}
MaybeDeadCopies.clear();
- AvailCopyMap.clear();
- CopyMap.clear();
- SrcMap.clear();
+ Tracker.clear();
}
bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index dd668bcf6193..3495319670a5 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -99,6 +99,9 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
llvm_unreachable("Invalid machine function property");
}
+// Pin the vtable to this file.
+void MachineFunction::Delegate::anchor() {}
+
void MachineFunctionProperties::print(raw_ostream &OS) const {
const char *Separator = "";
for (BitVector::size_type I = 0; I < Properties.size(); ++I) {
@@ -127,7 +130,8 @@ static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
return STI->getFrameLowering()->getStackAlignment();
}
-MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target,
+MachineFunction::MachineFunction(const Function &F,
+ const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI,
unsigned FunctionNum, MachineModuleInfo &mmi)
: F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) {
@@ -135,6 +139,16 @@ MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target,
init();
}
+void MachineFunction::handleInsertion(MachineInstr &MI) {
+ if (TheDelegate)
+ TheDelegate->MF_HandleInsertion(MI);
+}
+
+void MachineFunction::handleRemoval(MachineInstr &MI) {
+ if (TheDelegate)
+ TheDelegate->MF_HandleRemoval(MI);
+}
+
void MachineFunction::init() {
// Assume the function starts in SSA form with correct liveness.
Properties.set(MachineFunctionProperties::Property::IsSSA);
@@ -233,6 +247,11 @@ void MachineFunction::clear() {
WinEHInfo->~WinEHFuncInfo();
Allocator.Deallocate(WinEHInfo);
}
+
+ if (WasmEHInfo) {
+ WasmEHInfo->~WasmEHFuncInfo();
+ Allocator.Deallocate(WasmEHInfo);
+ }
}
const DataLayout &MachineFunction::getDataLayout() const {
@@ -406,82 +425,17 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MMO->getOrdering(), MMO->getFailureOrdering());
}
-MachineInstr::mmo_iterator
-MachineFunction::allocateMemRefsArray(unsigned long Num) {
- return Allocator.Allocate<MachineMemOperand *>(Num);
-}
-
-std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
-MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
- MachineInstr::mmo_iterator End) {
- // Count the number of load mem refs.
- unsigned Num = 0;
- for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
- if ((*I)->isLoad())
- ++Num;
-
- // Allocate a new array and populate it with the load information.
- MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
- unsigned Index = 0;
- for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
- if ((*I)->isLoad()) {
- if (!(*I)->isStore())
- // Reuse the MMO.
- Result[Index] = *I;
- else {
- // Clone the MMO and unset the store flag.
- MachineMemOperand *JustLoad =
- getMachineMemOperand((*I)->getPointerInfo(),
- (*I)->getFlags() & ~MachineMemOperand::MOStore,
- (*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getAAInfo(), nullptr,
- (*I)->getSyncScopeID(), (*I)->getOrdering(),
- (*I)->getFailureOrdering());
- Result[Index] = JustLoad;
- }
- ++Index;
- }
- }
- return std::make_pair(Result, Result + Num);
-}
-
-std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
-MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
- MachineInstr::mmo_iterator End) {
- // Count the number of load mem refs.
- unsigned Num = 0;
- for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
- if ((*I)->isStore())
- ++Num;
-
- // Allocate a new array and populate it with the store information.
- MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
- unsigned Index = 0;
- for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
- if ((*I)->isStore()) {
- if (!(*I)->isLoad())
- // Reuse the MMO.
- Result[Index] = *I;
- else {
- // Clone the MMO and unset the load flag.
- MachineMemOperand *JustStore =
- getMachineMemOperand((*I)->getPointerInfo(),
- (*I)->getFlags() & ~MachineMemOperand::MOLoad,
- (*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getAAInfo(), nullptr,
- (*I)->getSyncScopeID(), (*I)->getOrdering(),
- (*I)->getFailureOrdering());
- Result[Index] = JustStore;
- }
- ++Index;
- }
- }
- return std::make_pair(Result, Result + Num);
+MachineInstr::ExtraInfo *
+MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
+ MCSymbol *PreInstrSymbol,
+ MCSymbol *PostInstrSymbol) {
+ return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
+ PostInstrSymbol);
}
const char *MachineFunction::createExternalSymbolName(StringRef Name) {
char *Dest = Allocator.Allocate<char>(Name.size() + 1);
- std::copy(Name.begin(), Name.end(), Dest);
+ llvm::copy(Name, Dest);
Dest[Name.size()] = 0;
return Dest;
}
@@ -678,6 +632,46 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
MCSymbol *LandingPadLabel = Ctx.createTempSymbol();
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
LP.LandingPadLabel = LandingPadLabel;
+
+ const Instruction *FirstI = LandingPad->getBasicBlock()->getFirstNonPHI();
+ if (const auto *LPI = dyn_cast<LandingPadInst>(FirstI)) {
+ if (const auto *PF =
+ dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()))
+ getMMI().addPersonality(PF);
+
+ if (LPI->isCleanup())
+ addCleanup(LandingPad);
+
+ // FIXME: New EH - Add the clauses in reverse order. This isn't 100%
+ // correct, but we need to do it this way because of how the DWARF EH
+ // emitter processes the clauses.
+ for (unsigned I = LPI->getNumClauses(); I != 0; --I) {
+ Value *Val = LPI->getClause(I - 1);
+ if (LPI->isCatch(I - 1)) {
+ addCatchTypeInfo(LandingPad,
+ dyn_cast<GlobalValue>(Val->stripPointerCasts()));
+ } else {
+ // Add filters in a list.
+ auto *CVal = cast<Constant>(Val);
+ SmallVector<const GlobalValue *, 4> FilterList;
+ for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
+ II != IE; ++II)
+ FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+
+ addFilterTypeInfo(LandingPad, FilterList);
+ }
+ }
+
+ } else if (const auto *CPI = dyn_cast<CatchPadInst>(FirstI)) {
+ for (unsigned I = CPI->getNumArgOperands(); I != 0; --I) {
+ Value *TypeInfo = CPI->getArgOperand(I - 1)->stripPointerCasts();
+ addCatchTypeInfo(LandingPad, dyn_cast<GlobalValue>(TypeInfo));
+ }
+
+ } else {
+ assert(isa<CleanupPadInst>(FirstI) && "Invalid landingpad!");
+ }
+
return LandingPadLabel;
}
@@ -697,7 +691,8 @@ void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad,
LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
}
-void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+void MachineFunction::tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap,
+ bool TidyIfNoBeginLabels) {
for (unsigned i = 0; i != LandingPads.size(); ) {
LandingPadInfo &LandingPad = LandingPads[i];
if (LandingPad.LandingPadLabel &&
@@ -712,24 +707,25 @@ void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
continue;
}
- for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
- MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
- MCSymbol *EndLabel = LandingPad.EndLabels[j];
- if ((BeginLabel->isDefined() ||
- (LPMap && (*LPMap)[BeginLabel] != 0)) &&
- (EndLabel->isDefined() ||
- (LPMap && (*LPMap)[EndLabel] != 0))) continue;
-
- LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
- LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
- --j;
- --e;
- }
+ if (TidyIfNoBeginLabels) {
+ for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+ MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad.EndLabels[j];
+ if ((BeginLabel->isDefined() || (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+ (EndLabel->isDefined() || (LPMap && (*LPMap)[EndLabel] != 0)))
+ continue;
+
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ --j;
+ --e;
+ }
- // Remove landing pads with no try-ranges.
- if (LandingPads[i].BeginLabels.empty()) {
- LandingPads.erase(LandingPads.begin() + i);
- continue;
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
}
// If there is no landing pad, ensure that the list of typeids is empty.
@@ -806,36 +802,6 @@ try_next:;
return FilterID;
}
-void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) {
- MachineFunction &MF = *MBB.getParent();
- if (const auto *PF = dyn_cast<Function>(
- I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
- MF.getMMI().addPersonality(PF);
-
- if (I.isCleanup())
- MF.addCleanup(&MBB);
-
- // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
- // but we need to do it this way because of how the DWARF EH emitter
- // processes the clauses.
- for (unsigned i = I.getNumClauses(); i != 0; --i) {
- Value *Val = I.getClause(i - 1);
- if (I.isCatch(i - 1)) {
- MF.addCatchTypeInfo(&MBB,
- dyn_cast<GlobalValue>(Val->stripPointerCasts()));
- } else {
- // Add filters in a list.
- Constant *CVal = cast<Constant>(Val);
- SmallVector<const GlobalValue *, 4> FilterList;
- for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
- II != IE; ++II)
- FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
-
- MF.addFilterTypeInfo(&MBB, FilterList);
- }
- }
-}
-
/// \}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 67ac95740e3e..5db4e299fa70 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -23,11 +23,13 @@
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
using namespace llvm;
+using namespace ore;
Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
const std::string &Banner) const {
@@ -57,9 +59,43 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
llvm_unreachable("MachineFunctionProperties check failed");
}
#endif
+ // Collect the MI count of the function before the pass.
+ unsigned CountBefore, CountAfter;
+
+ // Check if the user asked for size remarks.
+ bool ShouldEmitSizeRemarks =
+ F.getParent()->shouldEmitInstrCountChangedRemark();
+
+ // If we want size remarks, collect the number of MachineInstrs in our
+ // MachineFunction before the pass runs.
+ if (ShouldEmitSizeRemarks)
+ CountBefore = MF.getInstructionCount();
bool RV = runOnMachineFunction(MF);
+ if (ShouldEmitSizeRemarks) {
+ // We wanted size remarks. Check if there was a change to the number of
+ // MachineInstrs in the module. Emit a remark if there was a change.
+ CountAfter = MF.getInstructionCount();
+ if (CountBefore != CountAfter) {
+ MachineOptimizationRemarkEmitter MORE(MF, nullptr);
+ MORE.emit([&]() {
+ int64_t Delta = static_cast<int64_t>(CountAfter) -
+ static_cast<int64_t>(CountBefore);
+ MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
+ MF.getFunction().getSubprogram(),
+ &MF.front());
+ R << NV("Pass", getPassName())
+ << ": Function: " << NV("Function", F.getName()) << ": "
+ << "MI Instruction count changed from "
+ << NV("MIInstrsBefore", CountBefore) << " to "
+ << NV("MIInstrsAfter", CountAfter)
+ << "; Delta: " << NV("Delta", Delta);
+ return R;
+ });
+ }
+ }
+
MFProps.set(SetProperties);
MFProps.reset(ClearedProperties);
return RV;
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 55d9defced3a..9c96ba748778 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -38,6 +39,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
+ AU.addUsedIfAvailable<SlotIndexes>();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 96fcfdb72ad7..764a84c7e132 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/Operator.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -131,8 +132,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
- debugLoc(MI.getDebugLoc()) {
+ : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -315,71 +315,201 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
--NumOperands;
}
-/// addMemOperand - Add a MachineMemOperand to the machine instruction.
-/// This function should be used only occasionally. The setMemRefs function
-/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::dropMemRefs(MachineFunction &MF) {
+ if (memoperands_empty())
+ return;
+
+ // See if we can just drop all of our extra info.
+ if (!getPreInstrSymbol() && !getPostInstrSymbol()) {
+ Info.clear();
+ return;
+ }
+ if (!getPostInstrSymbol()) {
+ Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
+ return;
+ }
+ if (!getPreInstrSymbol()) {
+ Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
+ return;
+ }
+
+ // Otherwise allocate a fresh extra info with just these symbols.
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo({}, getPreInstrSymbol(), getPostInstrSymbol()));
+}
+
+void MachineInstr::setMemRefs(MachineFunction &MF,
+ ArrayRef<MachineMemOperand *> MMOs) {
+ if (MMOs.empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+
+ // Try to store a single MMO inline.
+ if (MMOs.size() == 1 && !getPreInstrSymbol() && !getPostInstrSymbol()) {
+ Info.set<EIIK_MMO>(MMOs[0]);
+ return;
+ }
+
+ // Otherwise create an extra info struct with all of our info.
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo(MMOs, getPreInstrSymbol(), getPostInstrSymbol()));
+}
+
void MachineInstr::addMemOperand(MachineFunction &MF,
MachineMemOperand *MO) {
- mmo_iterator OldMemRefs = MemRefs;
- unsigned OldNumMemRefs = NumMemRefs;
+ SmallVector<MachineMemOperand *, 2> MMOs;
+ MMOs.append(memoperands_begin(), memoperands_end());
+ MMOs.push_back(MO);
+ setMemRefs(MF, MMOs);
+}
- unsigned NewNum = NumMemRefs + 1;
- mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) {
+ if (this == &MI)
+ // Nothing to do for a self-clone!
+ return;
- std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
- NewMemRefs[NewNum - 1] = MO;
- setMemRefs(NewMemRefs, NewMemRefs + NewNum);
+ assert(&MF == MI.getMF() &&
+ "Invalid machine functions when cloning memory refrences!");
+ // See if we can just steal the extra info already allocated for the
+ // instruction. We can do this whenever the pre- and post-instruction symbols
+ // are the same (including null).
+ if (getPreInstrSymbol() == MI.getPreInstrSymbol() &&
+ getPostInstrSymbol() == MI.getPostInstrSymbol()) {
+ Info = MI.Info;
+ return;
+ }
+
+ // Otherwise, fall back on a copy-based clone.
+ setMemRefs(MF, MI.memoperands());
}
/// Check to see if the MMOs pointed to by the two MemRefs arrays are
/// identical.
-static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) {
- auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end();
- auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end();
- if ((E1 - I1) != (E2 - I2))
+static bool hasIdenticalMMOs(ArrayRef<MachineMemOperand *> LHS,
+ ArrayRef<MachineMemOperand *> RHS) {
+ if (LHS.size() != RHS.size())
return false;
- for (; I1 != E1; ++I1, ++I2) {
- if (**I1 != **I2)
- return false;
+
+ auto LHSPointees = make_pointee_range(LHS);
+ auto RHSPointees = make_pointee_range(RHS);
+ return std::equal(LHSPointees.begin(), LHSPointees.end(),
+ RHSPointees.begin());
+}
+
+void MachineInstr::cloneMergedMemRefs(MachineFunction &MF,
+ ArrayRef<const MachineInstr *> MIs) {
+ // Try handling easy numbers of MIs with simpler mechanisms.
+ if (MIs.empty()) {
+ dropMemRefs(MF);
+ return;
}
- return true;
+ if (MIs.size() == 1) {
+ cloneMemRefs(MF, *MIs[0]);
+ return;
+ }
+ // Because an empty memoperands list provides *no* information and must be
+ // handled conservatively (assuming the instruction can do anything), the only
+ // way to merge with it is to drop all other memoperands.
+ if (MIs[0]->memoperands_empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+
+ // Handle the general case.
+ SmallVector<MachineMemOperand *, 2> MergedMMOs;
+ // Start with the first instruction.
+ assert(&MF == MIs[0]->getMF() &&
+ "Invalid machine functions when cloning memory references!");
+ MergedMMOs.append(MIs[0]->memoperands_begin(), MIs[0]->memoperands_end());
+ // Now walk all the other instructions and accumulate any different MMOs.
+ for (const MachineInstr &MI : make_pointee_range(MIs.slice(1))) {
+ assert(&MF == MI.getMF() &&
+ "Invalid machine functions when cloning memory references!");
+
+ // Skip MIs with identical operands to the first. This is a somewhat
+ // arbitrary hack but will catch common cases without being quadratic.
+ // TODO: We could fully implement merge semantics here if needed.
+ if (hasIdenticalMMOs(MIs[0]->memoperands(), MI.memoperands()))
+ continue;
+
+ // Because an empty memoperands list provides *no* information and must be
+ // handled conservatively (assuming the instruction can do anything), the
+ // only way to merge with it is to drop all other memoperands.
+ if (MI.memoperands_empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+
+ // Otherwise accumulate these into our temporary buffer of the merged state.
+ MergedMMOs.append(MI.memoperands_begin(), MI.memoperands_end());
+ }
+
+ setMemRefs(MF, MergedMMOs);
}
-std::pair<MachineInstr::mmo_iterator, unsigned>
-MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
+void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+ MCSymbol *OldSymbol = getPreInstrSymbol();
+ if (OldSymbol == Symbol)
+ return;
+ if (OldSymbol && !Symbol) {
+ // We're removing a symbol rather than adding one. Try to clean up any
+ // extra info carried around.
+ if (Info.is<EIIK_PreInstrSymbol>()) {
+ Info.clear();
+ return;
+ }
- // If either of the incoming memrefs are empty, we must be conservative and
- // treat this as if we've exhausted our space for memrefs and dropped them.
- if (memoperands_empty() || Other.memoperands_empty())
- return std::make_pair(nullptr, 0);
+ if (memoperands_empty()) {
+ assert(getPostInstrSymbol() &&
+ "Should never have only a single symbol allocated out-of-line!");
+ Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
+ return;
+ }
- // If both instructions have identical memrefs, we don't need to merge them.
- // Since many instructions have a single memref, and we tend to merge things
- // like pairs of loads from the same location, this catches a large number of
- // cases in practice.
- if (hasIdenticalMMOs(*this, Other))
- return std::make_pair(MemRefs, NumMemRefs);
+ // Otherwise fallback on the generic update.
+ } else if (!Info || Info.is<EIIK_PreInstrSymbol>()) {
+ // If we don't have any other extra info, we can store this inline.
+ Info.set<EIIK_PreInstrSymbol>(Symbol);
+ return;
+ }
- // TODO: consider uniquing elements within the operand lists to reduce
- // space usage and fall back to conservative information less often.
- size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs;
+ // Otherwise, allocate a full new set of extra info.
+ // FIXME: Maybe we should make the symbols in the extra info mutable?
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo(memoperands(), Symbol, getPostInstrSymbol()));
+}
- // If we don't have enough room to store this many memrefs, be conservative
- // and drop them. Otherwise, we'd fail asserts when trying to add them to
- // the new instruction.
- if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs))
- return std::make_pair(nullptr, 0);
+void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+ MCSymbol *OldSymbol = getPostInstrSymbol();
+ if (OldSymbol == Symbol)
+ return;
+ if (OldSymbol && !Symbol) {
+ // We're removing a symbol rather than adding one. Try to clean up any
+ // extra info carried around.
+ if (Info.is<EIIK_PostInstrSymbol>()) {
+ Info.clear();
+ return;
+ }
+
+ if (memoperands_empty()) {
+ assert(getPreInstrSymbol() &&
+ "Should never have only a single symbol allocated out-of-line!");
+ Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
+ return;
+ }
- MachineFunction *MF = getMF();
- mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
- mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
- MemBegin);
- MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(),
- MemEnd);
- assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs &&
- "missing memrefs");
+ // Otherwise fallback on the generic update.
+ } else if (!Info || Info.is<EIIK_PostInstrSymbol>()) {
+ // If we don't have any other extra info, we can store this inline.
+ Info.set<EIIK_PostInstrSymbol>(Symbol);
+ return;
+ }
- return std::make_pair(MemBegin, CombinedNumMemRefs);
+ // Otherwise, allocate a full new set of extra info.
+ // FIXME: Maybe we should make the symbols in the extra info mutable?
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol));
}
uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
@@ -388,7 +518,42 @@ uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
return getFlags() | Other.getFlags();
}
-bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+void MachineInstr::copyIRFlags(const Instruction &I) {
+ // Copy the wrapping flags.
+ if (const OverflowingBinaryOperator *OB =
+ dyn_cast<OverflowingBinaryOperator>(&I)) {
+ if (OB->hasNoSignedWrap())
+ setFlag(MachineInstr::MIFlag::NoSWrap);
+ if (OB->hasNoUnsignedWrap())
+ setFlag(MachineInstr::MIFlag::NoUWrap);
+ }
+
+ // Copy the exact flag.
+ if (const PossiblyExactOperator *PE = dyn_cast<PossiblyExactOperator>(&I))
+ if (PE->isExact())
+ setFlag(MachineInstr::MIFlag::IsExact);
+
+ // Copy the fast-math flags.
+ if (const FPMathOperator *FP = dyn_cast<FPMathOperator>(&I)) {
+ const FastMathFlags Flags = FP->getFastMathFlags();
+ if (Flags.noNaNs())
+ setFlag(MachineInstr::MIFlag::FmNoNans);
+ if (Flags.noInfs())
+ setFlag(MachineInstr::MIFlag::FmNoInfs);
+ if (Flags.noSignedZeros())
+ setFlag(MachineInstr::MIFlag::FmNsz);
+ if (Flags.allowReciprocal())
+ setFlag(MachineInstr::MIFlag::FmArcp);
+ if (Flags.allowContract())
+ setFlag(MachineInstr::MIFlag::FmContract);
+ if (Flags.approxFunc())
+ setFlag(MachineInstr::MIFlag::FmAfn);
+ if (Flags.allowReassoc())
+ setFlag(MachineInstr::MIFlag::FmReassoc);
+ }
+}
+
+bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const {
assert(!isBundledWithPred() && "Must be called on bundle header");
for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
if (MII->getDesc().getFlags() & Mask) {
@@ -768,9 +933,7 @@ int MachineInstr::findRegisterUseOperandIdx(
unsigned MOReg = MO.getReg();
if (!MOReg)
continue;
- if (MOReg == Reg || (TRI && TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- TargetRegisterInfo::isPhysicalRegister(Reg) &&
- TRI->isSubRegister(MOReg, Reg)))
+ if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg)))
if (!isKill || MO.isKill())
return i;
}
@@ -1050,10 +1213,13 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
int64_t OffsetA = MMOa->getOffset();
int64_t OffsetB = MMOb->getOffset();
-
int64_t MinOffset = std::min(OffsetA, OffsetB);
- int64_t WidthA = MMOa->getSize();
- int64_t WidthB = MMOb->getSize();
+
+ uint64_t WidthA = MMOa->getSize();
+ uint64_t WidthB = MMOb->getSize();
+ bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
+ bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
+
const Value *ValA = MMOa->getValue();
const Value *ValB = MMOb->getValue();
bool SameVal = (ValA && ValB && (ValA == ValB));
@@ -1069,6 +1235,8 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
}
if (SameVal) {
+ if (!KnownWidthA || !KnownWidthB)
+ return true;
int64_t MaxOffset = std::max(OffsetA, OffsetB);
int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
return (MinOffset + LowWidth > MaxOffset);
@@ -1083,13 +1251,15 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
- int64_t Overlapa = WidthA + OffsetA - MinOffset;
- int64_t Overlapb = WidthB + OffsetB - MinOffset;
+ int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
+ : MemoryLocation::UnknownSize;
+ int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
+ : MemoryLocation::UnknownSize;
AliasResult AAResult = AA->alias(
- MemoryLocation(ValA, Overlapa,
+ MemoryLocation(ValA, OverlapA,
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
- MemoryLocation(ValB, Overlapb,
+ MemoryLocation(ValB, OverlapB,
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
return (AAResult != NoAlias);
@@ -1294,7 +1464,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
SmallBitVector PrintedTypes(8);
- bool ShouldPrintRegisterTies = hasComplexRegisterTies();
+ bool ShouldPrintRegisterTies = IsStandalone || hasComplexRegisterTies();
auto getTiedOperandIdx = [&](unsigned OpIdx) {
if (!ShouldPrintRegisterTies)
return 0U;
@@ -1343,6 +1513,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "afn ";
if (getFlag(MachineInstr::FmReassoc))
OS << "reassoc ";
+ if (getFlag(MachineInstr::NoUWrap))
+ OS << "nuw ";
+ if (getFlag(MachineInstr::NoSWrap))
+ OS << "nsw ";
+ if (getFlag(MachineInstr::IsExact))
+ OS << "exact ";
// Print the opcode name.
if (TII)
@@ -1486,6 +1662,25 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
}
+ // Print any optional symbols attached to this instruction as-if they were
+ // operands.
+ if (MCSymbol *PreInstrSymbol = getPreInstrSymbol()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " pre-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PreInstrSymbol);
+ }
+ if (MCSymbol *PostInstrSymbol = getPostInstrSymbol()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " post-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PostInstrSymbol);
+ }
+
if (!SkipDebugLoc) {
if (const DebugLoc &DL = getDebugLoc()) {
if (!FirstOp)
@@ -1605,7 +1800,8 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
// Trim unneeded kill operands.
while (!DeadOps.empty()) {
unsigned OpIdx = DeadOps.back();
- if (getOperand(OpIdx).isImplicit())
+ if (getOperand(OpIdx).isImplicit() &&
+ (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
RemoveOperand(OpIdx);
else
getOperand(OpIdx).setIsKill(false);
@@ -1669,7 +1865,8 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
// Trim unneeded dead operands.
while (!DeadOps.empty()) {
unsigned OpIdx = DeadOps.back();
- if (getOperand(OpIdx).isImplicit())
+ if (getOperand(OpIdx).isImplicit() &&
+ (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
RemoveOperand(OpIdx);
else
getOperand(OpIdx).setIsDead(false);
@@ -1876,3 +2073,30 @@ void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) {
Orig.getOperand(1).ChangeToImmediate(0U);
Orig.getOperand(3).setMetadata(Expr);
}
+
+void MachineInstr::collectDebugValues(
+ SmallVectorImpl<MachineInstr *> &DbgValues) {
+ MachineInstr &MI = *this;
+ if (!MI.getOperand(0).isReg())
+ return;
+
+ MachineBasicBlock::iterator DI = MI; ++DI;
+ for (MachineBasicBlock::iterator DE = MI.getParent()->end();
+ DI != DE; ++DI) {
+ if (!DI->isDebugValue())
+ return;
+ if (DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
+ DbgValues.push_back(&*DI);
+ }
+}
+
+void MachineInstr::changeDebugValuesDefReg(unsigned Reg) {
+ // Collect matching debug values.
+ SmallVector<MachineInstr *, 2> DbgValues;
+ collectDebugValues(DbgValues);
+
+ // Propagate Reg to debug value instructions.
+ for (auto *DBI : DbgValues)
+ DBI->getOperand(0).setReg(Reg);
+}
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index ed16a2b6084c..ae378cc8c464 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -105,6 +105,16 @@ bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
return llvm::finalizeBundles(MF);
}
+/// Return the first found DebugLoc that has a DILocation, given a range of
+/// instructions. The search range is from FirstMI to LastMI (exclusive). If no
+/// DILocation is found, then an empty location is returned.
+static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ for (auto MII = FirstMI; MII != LastMI; ++MII)
+ if (MII->getDebugLoc().get())
+ return MII->getDebugLoc();
+ return DebugLoc();
+}
/// finalizeBundle - Finalize a machine instruction bundle which includes
/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
@@ -123,7 +133,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
MachineInstrBuilder MIB =
- BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE));
+ BuildMI(MF, getDebugLoc(FirstMI, LastMI), TII->get(TargetOpcode::BUNDLE));
Bundle.prepend(MIB);
SmallVector<unsigned, 32> LocalDefs;
@@ -135,9 +145,9 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
SmallSet<unsigned, 8> KilledUseSet;
SmallSet<unsigned, 8> UndefUseSet;
SmallVector<MachineOperand*, 4> Defs;
- for (; FirstMI != LastMI; ++FirstMI) {
- for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = FirstMI->getOperand(i);
+ for (auto MII = FirstMI; MII != LastMI; ++MII) {
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
if (!MO.isReg())
continue;
if (MO.isDef()) {
@@ -215,6 +225,15 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
getImplRegState(true));
}
+
+ // Set FrameSetup/FrameDestroy for the bundle. If any of the instructions got
+ // the property, then also set it on the bundle.
+ for (auto MII = FirstMI; MII != LastMI; ++MII) {
+ if (MII->getFlag(MachineInstr::FrameSetup))
+ MIB.setMIFlag(MachineInstr::FrameSetup);
+ if (MII->getFlag(MachineInstr::FrameDestroy))
+ MIB.setMIFlag(MachineInstr::FrameDestroy);
+ }
}
/// finalizeBundle - Same functionality as the previous finalizeBundle except
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 7332b7162030..58fd1f238420 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -463,8 +463,12 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
if (PhysRegDefs.test(*AS))
PhysRegClobbers.set(*AS);
- PhysRegDefs.set(*AS);
}
+ // Need a second loop because MCRegAliasIterator can visit the same
+ // register twice.
+ for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
+ PhysRegDefs.set(*AS);
+
if (PhysRegClobbers.test(Reg))
// MI defined register is seen defined by another instruction in
// the loop, it cannot be a LICM candidate.
@@ -497,8 +501,7 @@ void MachineLICMBase::HoistRegionPostRA() {
// Walk the entire region, count number of defs for each register, and
// collect potential LICM candidates.
- const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
- for (MachineBasicBlock *BB : Blocks) {
+ for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
// If the header of the loop containing this basic block is a landing pad,
// then don't try to hoist instructions out of this loop.
const MachineLoop *ML = MLI->getLoopFor(BB);
@@ -570,8 +573,7 @@ void MachineLICMBase::HoistRegionPostRA() {
/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
/// sure it is not killed by any instructions in the loop.
void MachineLICMBase::AddToLiveIns(unsigned Reg) {
- const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
- for (MachineBasicBlock *BB : Blocks) {
+ for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
for (MachineInstr &MI : *BB) {
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 639cd80768fc..6ef8de88f8b1 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -194,7 +194,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
}
-MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
+MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
: ImmutablePass(ID), TM(*TM),
Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
TM->getObjFileLowering(), nullptr, false) {
@@ -206,10 +206,11 @@ MachineModuleInfo::~MachineModuleInfo() = default;
bool MachineModuleInfo::doInitialization(Module &M) {
ObjFileMMI = nullptr;
CurCallSite = 0;
- DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
+ UsesVAFloatArgument = UsesMorestackAddr = false;
HasSplitStack = HasNosplitStack = false;
AddrLabelSymbols = nullptr;
TheModule = &M;
+ DbgInfoAvailable = !empty(M.debug_compile_units());
return false;
}
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 07b173bc94f8..7b4f64bfe60d 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
// Out of line virtual method.
void MachineModuleInfoMachO::anchor() {}
void MachineModuleInfoELF::anchor() {}
+void MachineModuleInfoCOFF::anchor() {}
using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>;
static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) {
diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp
index 8098333832b4..05e51e1873cf 100644
--- a/lib/CodeGen/MachineOperand.cpp
+++ b/lib/CodeGen/MachineOperand.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -460,7 +461,8 @@ static void printIRValueReference(raw_ostream &OS, const Value &V,
printLLVMNameWithoutPrefix(OS, V.getName());
return;
}
- MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V));
+ int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
+ MachineOperand::printIRSlotNumber(OS, Slot);
}
static void printSyncScope(raw_ostream &OS, const LLVMContext &Context,
@@ -695,6 +697,11 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
if (MCSymbol *Label = CFI.getLabel())
MachineOperand::printSymbol(OS, *Label);
break;
+ case MCCFIInstruction::OpNegateRAState:
+ OS << "negate_ra_sign_state ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
default:
// TODO: Print the other CFI Operations.
OS << "<unserializable cfi directive>";
@@ -742,10 +749,10 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "undef ";
if (isEarlyClobber())
OS << "early-clobber ";
- if (isDebug())
- OS << "debug-use ";
if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable())
OS << "renamable ";
+ // isDebug() is exactly true for register operands of a DBG_VALUE. So we
+ // simply infer it when parsing and do not need to print it.
const MachineRegisterInfo *MRI = nullptr;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -1078,7 +1085,11 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (getFailureOrdering() != AtomicOrdering::NotAtomic)
OS << toIRString(getFailureOrdering()) << ' ';
- OS << getSize();
+ if (getSize() == MemoryLocation::UnknownSize)
+ OS << "unknown-size";
+ else
+ OS << getSize();
+
if (const Value *Val = getValue()) {
OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
printIRValueReference(OS, *Val, MST);
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index a712afec0959..ad96c0e579e4 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -128,9 +128,6 @@ struct SuffixTreeNode {
/// mapping by tacking that character on the end of the current string.
DenseMap<unsigned, SuffixTreeNode *> Children;
- /// A flag set to false if the node has been pruned from the tree.
- bool IsInTree = true;
-
/// The start index of this node's substring in the main string.
unsigned StartIdx = EmptyIdx;
@@ -167,15 +164,6 @@ struct SuffixTreeNode {
/// construction algorithm O(N^2) rather than O(N).
SuffixTreeNode *Link = nullptr;
- /// The parent of this node. Every node except for the root has a parent.
- SuffixTreeNode *Parent = nullptr;
-
- /// The number of times this node's string appears in the tree.
- ///
- /// This is equal to the number of leaf children of the string. It represents
- /// the number of suffixes that the node's string is a prefix of.
- unsigned OccurrenceCount = 0;
-
/// The length of the string formed by concatenating the edge labels from the
/// root to this node.
unsigned ConcatLen = 0;
@@ -200,9 +188,8 @@ struct SuffixTreeNode {
return *EndIdx - StartIdx + 1;
}
- SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link,
- SuffixTreeNode *Parent)
- : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
+ SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
+ : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
SuffixTreeNode() {}
};
@@ -231,14 +218,18 @@ struct SuffixTreeNode {
/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
class SuffixTree {
public:
- /// Stores each leaf node in the tree.
- ///
- /// This is used for finding outlining candidates.
- std::vector<SuffixTreeNode *> LeafVector;
-
/// Each element is an integer representing an instruction in the module.
ArrayRef<unsigned> Str;
+ /// A repeated substring in the tree.
+ struct RepeatedSubstring {
+ /// The length of the string.
+ unsigned Length;
+
+ /// The start indices of each occurrence.
+ std::vector<unsigned> StartIndices;
+ };
+
private:
/// Maintains each node in the tree.
SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
@@ -291,7 +282,7 @@ private:
assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
SuffixTreeNode *N = new (NodeAllocator.Allocate())
- SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent);
+ SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
Parent.Children[Edge] = N;
return N;
@@ -314,7 +305,7 @@ private:
unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
SuffixTreeNode *N = new (NodeAllocator.Allocate())
- SuffixTreeNode(StartIdx, E, Root, Parent);
+ SuffixTreeNode(StartIdx, E, Root);
if (Parent)
Parent->Children[Edge] = N;
@@ -322,41 +313,27 @@ private:
}
/// Set the suffix indices of the leaves to the start indices of their
- /// respective suffixes. Also stores each leaf in \p LeafVector at its
- /// respective suffix index.
+ /// respective suffixes.
///
/// \param[in] CurrNode The node currently being visited.
- /// \param CurrIdx The current index of the string being visited.
- void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) {
+ /// \param CurrNodeLen The concatenation of all node sizes from the root to
+ /// this node. Used to produce suffix indices.
+ void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
- // Store the length of the concatenation of all strings from the root to
- // this node.
- if (!CurrNode.isRoot()) {
- if (CurrNode.ConcatLen == 0)
- CurrNode.ConcatLen = CurrNode.size();
-
- if (CurrNode.Parent)
- CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
- }
-
+ // Store the concatenation of lengths down from the root.
+ CurrNode.ConcatLen = CurrNodeLen;
// Traverse the tree depth-first.
for (auto &ChildPair : CurrNode.Children) {
assert(ChildPair.second && "Node had a null child!");
- setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size());
+ setSuffixIndices(*ChildPair.second,
+ CurrNodeLen + ChildPair.second->size());
}
- // Is this node a leaf?
- if (IsLeaf) {
- // If yes, give it a suffix index and bump its parent's occurrence count.
- CurrNode.SuffixIdx = Str.size() - CurrIdx;
- assert(CurrNode.Parent && "CurrNode had no parent!");
- CurrNode.Parent->OccurrenceCount++;
-
- // Store the leaf in the leaf vector for pruning later.
- LeafVector[CurrNode.SuffixIdx] = &CurrNode;
- }
+ // Is this node a leaf? If it is, give it a suffix index.
+ if (IsLeaf)
+ CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
}
/// Construct the suffix tree for the prefix of the input ending at
@@ -461,7 +438,6 @@ private:
// Make the old node a child of the split node and update its start
// index. This is the node n from the diagram.
NextNode->StartIdx += Active.Len;
- NextNode->Parent = SplitNode;
SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
// SplitNode is an internal node, update the suffix link.
@@ -495,9 +471,7 @@ public:
/// \param Str The string to construct the suffix tree for.
SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
- Root->IsInTree = true;
Active.Node = Root;
- LeafVector = std::vector<SuffixTreeNode *>(Str.size());
// Keep track of the number of suffixes we have to add of the current
// prefix.
@@ -518,6 +492,117 @@ public:
assert(Root && "Root node can't be nullptr!");
setSuffixIndices(*Root, 0);
}
+
+
+ /// Iterator for finding all repeated substrings in the suffix tree.
+ struct RepeatedSubstringIterator {
+ private:
+ /// The current node we're visiting.
+ SuffixTreeNode *N = nullptr;
+
+ /// The repeated substring associated with this node.
+ RepeatedSubstring RS;
+
+ /// The nodes left to visit.
+ std::vector<SuffixTreeNode *> ToVisit;
+
+ /// The minimum length of a repeated substring to find.
+ /// Since we're outlining, we want at least two instructions in the range.
+ /// FIXME: This may not be true for targets like X86 which support many
+ /// instruction lengths.
+ const unsigned MinLength = 2;
+
+ /// Move the iterator to the next repeated substring.
+ void advance() {
+ // Clear the current state. If we're at the end of the range, then this
+ // is the state we want to be in.
+ RS = RepeatedSubstring();
+ N = nullptr;
+
+ // Each leaf node represents a repeat of a string.
+ std::vector<SuffixTreeNode *> LeafChildren;
+
+ // Continue visiting nodes until we find one which repeats more than once.
+ while (!ToVisit.empty()) {
+ SuffixTreeNode *Curr = ToVisit.back();
+ ToVisit.pop_back();
+ LeafChildren.clear();
+
+ // Keep track of the length of the string associated with the node. If
+ // it's too short, we'll quit.
+ unsigned Length = Curr->ConcatLen;
+
+ // Iterate over each child, saving internal nodes for visiting, and
+ // leaf nodes in LeafChildren. Internal nodes represent individual
+ // strings, which may repeat.
+ for (auto &ChildPair : Curr->Children) {
+ // Save all of this node's children for processing.
+ if (!ChildPair.second->isLeaf())
+ ToVisit.push_back(ChildPair.second);
+
+ // It's not an internal node, so it must be a leaf. If we have a
+ // long enough string, then save the leaf children.
+ else if (Length >= MinLength)
+ LeafChildren.push_back(ChildPair.second);
+ }
+
+ // The root never represents a repeated substring. If we're looking at
+ // that, then skip it.
+ if (Curr->isRoot())
+ continue;
+
+ // Do we have any repeated substrings?
+ if (LeafChildren.size() >= 2) {
+ // Yes. Update the state to reflect this, and then bail out.
+ N = Curr;
+ RS.Length = Length;
+ for (SuffixTreeNode *Leaf : LeafChildren)
+ RS.StartIndices.push_back(Leaf->SuffixIdx);
+ break;
+ }
+ }
+
+ // At this point, either NewRS is an empty RepeatedSubstring, or it was
+ // set in the above loop. Similarly, N is either nullptr, or the node
+ // associated with NewRS.
+ }
+
+ public:
+ /// Return the current repeated substring.
+ RepeatedSubstring &operator*() { return RS; }
+
+ RepeatedSubstringIterator &operator++() {
+ advance();
+ return *this;
+ }
+
+ RepeatedSubstringIterator operator++(int I) {
+ RepeatedSubstringIterator It(*this);
+ advance();
+ return It;
+ }
+
+ bool operator==(const RepeatedSubstringIterator &Other) {
+ return N == Other.N;
+ }
+ bool operator!=(const RepeatedSubstringIterator &Other) {
+ return !(*this == Other);
+ }
+
+ RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) {
+ // Do we have a non-null node?
+ if (N) {
+ // Yes. At the first step, we need to visit all of N's children.
+ // Note: This means that we visit N last.
+ ToVisit.push_back(N);
+ advance();
+ }
+ }
+};
+
+ typedef RepeatedSubstringIterator iterator;
+ iterator begin() { return iterator(Root); }
+ iterator end() { return iterator(nullptr); }
};
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -537,9 +622,8 @@ struct InstructionMapper {
DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
InstructionIntegerMap;
- /// Corresponcence from unsigned integers to \p MachineInstrs.
- /// Inverse of \p InstructionIntegerMap.
- DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
+ /// Correspondence between \p MachineBasicBlocks and target-defined flags.
+ DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
/// The vector of unsigned integers that the module is mapped to.
std::vector<unsigned> UnsignedVec;
@@ -548,17 +632,39 @@ struct InstructionMapper {
/// at index i in \p UnsignedVec for each index i.
std::vector<MachineBasicBlock::iterator> InstrList;
+ // Set if we added an illegal number in the previous step.
+ // Since each illegal number is unique, we only need one of them between
+ // each range of legal numbers. This lets us make sure we don't add more
+ // than one illegal number per range.
+ bool AddedIllegalLastTime = false;
+
/// Maps \p *It to a legal integer.
///
- /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
- /// \p IntegerInstructionMap, and \p LegalInstrNumber.
+ /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
+ /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
///
/// \returns The integer that \p *It was mapped to.
- unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
+ unsigned mapToLegalUnsigned(
+ MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
+ bool &HaveLegalRange, unsigned &NumLegalInBlock,
+ std::vector<unsigned> &UnsignedVecForMBB,
+ std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ // We added something legal, so we should unset the AddedLegalLastTime
+ // flag.
+ AddedIllegalLastTime = false;
+
+ // If we have at least two adjacent legal instructions (which may have
+ // invisible instructions in between), remember that.
+ if (CanOutlineWithPrevInstr)
+ HaveLegalRange = true;
+ CanOutlineWithPrevInstr = true;
+
+ // Keep track of the number of legal instructions we insert.
+ NumLegalInBlock++;
// Get the integer for this instruction or give it the current
// LegalInstrNumber.
- InstrList.push_back(It);
+ InstrListForMBB.push_back(It);
MachineInstr &MI = *It;
bool WasInserted;
DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
@@ -568,12 +674,10 @@ struct InstructionMapper {
unsigned MINumber = ResultIt->second;
// There was an insertion.
- if (WasInserted) {
+ if (WasInserted)
LegalInstrNumber++;
- IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
- }
- UnsignedVec.push_back(MINumber);
+ UnsignedVecForMBB.push_back(MINumber);
// Make sure we don't overflow or use any integers reserved by the DenseMap.
if (LegalInstrNumber >= IllegalInstrNumber)
@@ -589,14 +693,26 @@ struct InstructionMapper {
/// Maps \p *It to an illegal integer.
///
- /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
+ /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
+ /// IllegalInstrNumber.
///
/// \returns The integer that \p *It was mapped to.
- unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
+ unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
+ bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
+ std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ // Can't outline an illegal instruction. Set the flag.
+ CanOutlineWithPrevInstr = false;
+
+ // Only add one illegal number per range of legal numbers.
+ if (AddedIllegalLastTime)
+ return IllegalInstrNumber;
+
+ // Remember that we added an illegal number last time.
+ AddedIllegalLastTime = true;
unsigned MINumber = IllegalInstrNumber;
- InstrList.push_back(It);
- UnsignedVec.push_back(IllegalInstrNumber);
+ InstrListForMBB.push_back(It);
+ UnsignedVecForMBB.push_back(IllegalInstrNumber);
IllegalInstrNumber--;
assert(LegalInstrNumber < IllegalInstrNumber &&
@@ -623,40 +739,78 @@ struct InstructionMapper {
/// \param TII \p TargetInstrInfo for the function.
void convertToUnsignedVec(MachineBasicBlock &MBB,
const TargetInstrInfo &TII) {
- unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB);
+ unsigned Flags = 0;
+
+ // Don't even map in this case.
+ if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
+ return;
+
+ // Store info for the MBB for later outlining.
+ MBBFlagsMap[&MBB] = Flags;
+
+ MachineBasicBlock::iterator It = MBB.begin();
- for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
- It++) {
+ // The number of instructions in this block that will be considered for
+ // outlining.
+ unsigned NumLegalInBlock = 0;
+ // True if we have at least two legal instructions which aren't separated
+ // by an illegal instruction.
+ bool HaveLegalRange = false;
+
+ // True if we can perform outlining given the last mapped (non-invisible)
+ // instruction. This lets us know if we have a legal range.
+ bool CanOutlineWithPrevInstr = false;
+
+ // FIXME: Should this all just be handled in the target, rather than using
+ // repeated calls to getOutliningType?
+ std::vector<unsigned> UnsignedVecForMBB;
+ std::vector<MachineBasicBlock::iterator> InstrListForMBB;
+
+ for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) {
// Keep track of where this instruction is in the module.
switch (TII.getOutliningType(It, Flags)) {
case InstrType::Illegal:
- mapToIllegalUnsigned(It);
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
+ UnsignedVecForMBB, InstrListForMBB);
break;
case InstrType::Legal:
- mapToLegalUnsigned(It);
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
break;
case InstrType::LegalTerminator:
- mapToLegalUnsigned(It);
- InstrList.push_back(It);
- UnsignedVec.push_back(IllegalInstrNumber);
- IllegalInstrNumber--;
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
+ // The instruction also acts as a terminator, so we have to record that
+ // in the string.
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
break;
case InstrType::Invisible:
+ // Normally this is set by mapTo(Blah)Unsigned, but we just want to
+ // skip this instruction. So, unset the flag here.
+ AddedIllegalLastTime = false;
break;
}
}
- // After we're done every insertion, uniquely terminate this part of the
- // "string". This makes sure we won't match across basic block or function
- // boundaries since the "end" is encoded uniquely and thus appears in no
- // repeated substring.
- InstrList.push_back(MBB.end());
- UnsignedVec.push_back(IllegalInstrNumber);
- IllegalInstrNumber--;
+ // Are there enough legal instructions in the block for outlining to be
+ // possible?
+ if (HaveLegalRange) {
+ // After we're done every insertion, uniquely terminate this part of the
+ // "string". This makes sure we won't match across basic block or function
+ // boundaries since the "end" is encoded uniquely and thus appears in no
+ // repeated substring.
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
+ InstrListForMBB.end());
+ UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
+ UnsignedVecForMBB.end());
+ }
}
InstructionMapper() {
@@ -692,9 +846,6 @@ struct MachineOutliner : public ModulePass {
/// Set when the pass is constructed in TargetPassConfig.
bool RunOnAllFunctions = true;
- // Collection of IR functions created by the outliner.
- std::vector<Function *> CreatedIRFunctions;
-
StringRef getPassName() const override { return "Machine Outliner"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -717,7 +868,8 @@ struct MachineOutliner : public ModulePass {
/// Remark output explaining that a function was outlined.
void emitOutlinedFunctionRemark(OutlinedFunction &OF);
- /// Find all repeated substrings that satisfy the outlining cost model.
+ /// Find all repeated substrings that satisfy the outlining cost model by
+ /// constructing a suffix tree.
///
/// If a substring appears at least twice, then it must be represented by
/// an internal node which appears in at least two suffixes. Each suffix
@@ -726,73 +878,25 @@ struct MachineOutliner : public ModulePass {
/// internal node represents a beneficial substring, then we use each of
/// its leaf children to find the locations of its substring.
///
- /// \param ST A suffix tree to query.
/// \param Mapper Contains outlining mapping information.
- /// \param[out] CandidateList Filled with candidates representing each
- /// beneficial substring.
/// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
/// each type of candidate.
- ///
- /// \returns The length of the longest candidate found.
- unsigned
- findCandidates(SuffixTree &ST,
- InstructionMapper &Mapper,
- std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList);
-
- /// Replace the sequences of instructions represented by the
- /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
- /// described in \p FunctionList.
+ void findCandidates(InstructionMapper &Mapper,
+ std::vector<OutlinedFunction> &FunctionList);
+
+ /// Replace the sequences of instructions represented by \p OutlinedFunctions
+ /// with calls to functions.
///
/// \param M The module we are outlining from.
- /// \param CandidateList A list of candidates to be outlined.
/// \param FunctionList A list of functions to be inserted into the module.
/// \param Mapper Contains the instruction mappings for the module.
- bool outline(Module &M,
- const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
+ bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
InstructionMapper &Mapper);
/// Creates a function for \p OF and inserts it into the module.
- MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
- InstructionMapper &Mapper);
-
- /// Find potential outlining candidates and store them in \p CandidateList.
- ///
- /// For each type of potential candidate, also build an \p OutlinedFunction
- /// struct containing the information to build the function for that
- /// candidate.
- ///
- /// \param[out] CandidateList Filled with outlining candidates for the module.
- /// \param[out] FunctionList Filled with functions corresponding to each type
- /// of \p Candidate.
- /// \param ST The suffix tree for the module.
- ///
- /// \returns The length of the longest candidate found. 0 if there are none.
- unsigned
- buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
- SuffixTree &ST, InstructionMapper &Mapper);
-
- /// Helper function for pruneOverlaps.
- /// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
- void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList);
-
- /// Remove any overlapping candidates that weren't handled by the
- /// suffix tree's pruning method.
- ///
- /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
- /// If a short candidate is chosen for outlining, then a longer candidate
- /// which has that short candidate as a suffix is chosen, the tree's pruning
- /// method will not find it. Thus, we need to prune before outlining as well.
- ///
- /// \param[in,out] CandidateList A list of outlining candidates.
- /// \param[in,out] FunctionList A list of functions to be outlined.
- /// \param Mapper Contains instruction mapping info for outlining.
- /// \param MaxCandidateLen The length of the longest candidate.
- void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper, unsigned MaxCandidateLen);
+ MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
+ InstructionMapper &Mapper,
+ unsigned Name);
/// Construct a suffix tree on the instructions in \p M and outline repeated
/// strings from that tree.
@@ -802,13 +906,31 @@ struct MachineOutliner : public ModulePass {
/// function for remark emission.
DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
DISubprogram *SP;
- for (const std::shared_ptr<Candidate> &C : OF.Candidates)
- if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram()))
+ for (const Candidate &C : OF.Candidates)
+ if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
return SP;
return nullptr;
}
-};
+ /// Populate and \p InstructionMapper with instruction-to-integer mappings.
+ /// These are used to construct a suffix tree.
+ void populateMapper(InstructionMapper &Mapper, Module &M,
+ MachineModuleInfo &MMI);
+
+ /// Initialize information necessary to output a size remark.
+ /// FIXME: This should be handled by the pass manager, not the outliner.
+ /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
+ /// pass manager.
+ void initSizeRemarkInfo(
+ const Module &M, const MachineModuleInfo &MMI,
+ StringMap<unsigned> &FunctionToInstrCount);
+
+ /// Emit the remark.
+ // FIXME: This should be handled by the pass manager, not the outliner.
+ void emitInstrCountChangedRemark(
+ const Module &M, const MachineModuleInfo &MMI,
+ const StringMap<unsigned> &FunctionToInstrCount);
+};
} // Anonymous namespace.
char MachineOutliner::ID = 0;
@@ -828,6 +950,10 @@ INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
void MachineOutliner::emitNotOutliningCheaperRemark(
unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
OutlinedFunction &OF) {
+ // FIXME: Right now, we arbitrarily choose some Candidate from the
+ // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
+ // We should probably sort these by function name or something to make sure
+ // the remarks are stable.
Candidate &C = CandidatesForRepeatedSeq.front();
MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
MORE.emit([&]() {
@@ -861,7 +987,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
MBB->findDebugLoc(MBB->begin()), MBB);
R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
- << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
+ << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
<< "from " << NV("NumOccurrences", OF.getOccurrenceCount())
<< " locations. "
<< "(Found at: ";
@@ -869,12 +995,8 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
// Tell the user the other places the candidate was found.
for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
- // Skip over things that were pruned.
- if (!OF.Candidates[i]->InCandidateList)
- continue;
-
R << NV((Twine("StartLoc") + Twine(i)).str(),
- OF.Candidates[i]->front()->getDebugLoc());
+ OF.Candidates[i].front()->getDebugLoc());
if (i != e - 1)
R << ", ";
}
@@ -884,95 +1006,65 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
MORE.emit(R);
}
-unsigned MachineOutliner::findCandidates(
- SuffixTree &ST, InstructionMapper &Mapper,
- std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList) {
- CandidateList.clear();
+void
+MachineOutliner::findCandidates(InstructionMapper &Mapper,
+ std::vector<OutlinedFunction> &FunctionList) {
FunctionList.clear();
- unsigned MaxLen = 0;
-
- // FIXME: Visit internal nodes instead of leaves.
- for (SuffixTreeNode *Leaf : ST.LeafVector) {
- assert(Leaf && "Leaves in LeafVector cannot be null!");
- if (!Leaf->IsInTree)
- continue;
-
- assert(Leaf->Parent && "All leaves must have parents!");
- SuffixTreeNode &Parent = *(Leaf->Parent);
-
- // If it doesn't appear enough, or we already outlined from it, skip it.
- if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
- continue;
-
- // Figure out if this candidate is beneficial.
- unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();
-
- // Too short to be beneficial; skip it.
- // FIXME: This isn't necessarily true for, say, X86. If we factor in
- // instruction lengths we need more information than this.
- if (StringLen < 2)
- continue;
-
- // If this is a beneficial class of candidate, then every one is stored in
- // this vector.
- std::vector<Candidate> CandidatesForRepeatedSeq;
-
- // Figure out the call overhead for each instance of the sequence.
- for (auto &ChildPair : Parent.Children) {
- SuffixTreeNode *M = ChildPair.second;
-
- if (M && M->IsInTree && M->isLeaf()) {
- // Never visit this leaf again.
- M->IsInTree = false;
- unsigned StartIdx = M->SuffixIdx;
- unsigned EndIdx = StartIdx + StringLen - 1;
+ SuffixTree ST(Mapper.UnsignedVec);
- // Trick: Discard some candidates that would be incompatible with the
- // ones we've already found for this sequence. This will save us some
- // work in candidate selection.
- //
- // If two candidates overlap, then we can't outline them both. This
- // happens when we have candidates that look like, say
- //
- // AA (where each "A" is an instruction).
- //
- // We might have some portion of the module that looks like this:
- // AAAAAA (6 A's)
- //
- // In this case, there are 5 different copies of "AA" in this range, but
- // at most 3 can be outlined. If only outlining 3 of these is going to
- // be unbeneficial, then we ought to not bother.
- //
- // Note that two things DON'T overlap when they look like this:
- // start1...end1 .... start2...end2
- // That is, one must either
- // * End before the other starts
- // * Start after the other ends
- if (std::all_of(CandidatesForRepeatedSeq.begin(),
- CandidatesForRepeatedSeq.end(),
- [&StartIdx, &EndIdx](const Candidate &C) {
- return (EndIdx < C.getStartIdx() ||
- StartIdx > C.getEndIdx());
- })) {
- // It doesn't overlap with anything, so we can outline it.
- // Each sequence is over [StartIt, EndIt].
- // Save the candidate and its location.
-
- MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
- MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
-
- CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
- EndIt, StartIt->getParent(),
- FunctionList.size());
- }
+ // First, find dall of the repeated substrings in the tree of minimum length
+ // 2.
+ std::vector<Candidate> CandidatesForRepeatedSeq;
+ for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
+ CandidatesForRepeatedSeq.clear();
+ SuffixTree::RepeatedSubstring RS = *It;
+ unsigned StringLen = RS.Length;
+ for (const unsigned &StartIdx : RS.StartIndices) {
+ unsigned EndIdx = StartIdx + StringLen - 1;
+ // Trick: Discard some candidates that would be incompatible with the
+ // ones we've already found for this sequence. This will save us some
+ // work in candidate selection.
+ //
+ // If two candidates overlap, then we can't outline them both. This
+ // happens when we have candidates that look like, say
+ //
+ // AA (where each "A" is an instruction).
+ //
+ // We might have some portion of the module that looks like this:
+ // AAAAAA (6 A's)
+ //
+ // In this case, there are 5 different copies of "AA" in this range, but
+ // at most 3 can be outlined. If only outlining 3 of these is going to
+ // be unbeneficial, then we ought to not bother.
+ //
+ // Note that two things DON'T overlap when they look like this:
+ // start1...end1 .... start2...end2
+ // That is, one must either
+ // * End before the other starts
+ // * Start after the other ends
+ if (std::all_of(
+ CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(),
+ [&StartIdx, &EndIdx](const Candidate &C) {
+ return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
+ })) {
+ // It doesn't overlap with anything, so we can outline it.
+ // Each sequence is over [StartIt, EndIt].
+ // Save the candidate and its location.
+
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
+ MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+ MachineBasicBlock *MBB = StartIt->getParent();
+
+ CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
+ EndIt, MBB, FunctionList.size(),
+ Mapper.MBBFlagsMap[MBB]);
}
}
// We've found something we might want to outline.
// Create an OutlinedFunction to store it and check if it'd be beneficial
// to outline.
- if (CandidatesForRepeatedSeq.empty())
+ if (CandidatesForRepeatedSeq.size() < 2)
continue;
// Arbitrarily choose a TII from the first candidate.
@@ -983,179 +1075,33 @@ unsigned MachineOutliner::findCandidates(
OutlinedFunction OF =
TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
- // If we deleted every candidate, then there's nothing to outline.
- if (OF.Candidates.empty())
+ // If we deleted too many candidates, then there's nothing worth outlining.
+ // FIXME: This should take target-specified instruction sizes into account.
+ if (OF.Candidates.size() < 2)
continue;
- std::vector<unsigned> Seq;
- for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
- Seq.push_back(ST.Str[i]);
- OF.Sequence = Seq;
- OF.Name = FunctionList.size();
-
// Is it better to outline this candidate than not?
if (OF.getBenefit() < 1) {
emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
continue;
}
- if (StringLen > MaxLen)
- MaxLen = StringLen;
-
- // The function is beneficial. Save its candidates to the candidate list
- // for pruning.
- for (std::shared_ptr<Candidate> &C : OF.Candidates)
- CandidateList.push_back(C);
FunctionList.push_back(OF);
-
- // Move to the next function.
- Parent.IsInTree = false;
- }
-
- return MaxLen;
-}
-
-// Remove C from the candidate space, and update its OutlinedFunction.
-void MachineOutliner::prune(Candidate &C,
- std::vector<OutlinedFunction> &FunctionList) {
- // Get the OutlinedFunction associated with this Candidate.
- OutlinedFunction &F = FunctionList[C.FunctionIdx];
-
- // Update C's associated function's occurrence count.
- F.decrement();
-
- // Remove C from the CandidateList.
- C.InCandidateList = false;
-
- LLVM_DEBUG(dbgs() << "- Removed a Candidate \n";
- dbgs() << "--- Num fns left for candidate: "
- << F.getOccurrenceCount() << "\n";
- dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
- << "\n";);
-}
-
-void MachineOutliner::pruneOverlaps(
- std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
- unsigned MaxCandidateLen) {
-
- // Return true if this candidate became unbeneficial for outlining in a
- // previous step.
- auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) {
-
- // Check if the candidate was removed in a previous step.
- if (!C.InCandidateList)
- return true;
-
- // C must be alive. Check if we should remove it.
- if (FunctionList[C.FunctionIdx].getBenefit() < 1) {
- prune(C, FunctionList);
- return true;
- }
-
- // C is in the list, and F is still beneficial.
- return false;
- };
-
- // TODO: Experiment with interval trees or other interval-checking structures
- // to lower the time complexity of this function.
- // TODO: Can we do better than the simple greedy choice?
- // Check for overlaps in the range.
- // This is O(MaxCandidateLen * CandidateList.size()).
- for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
- It++) {
- Candidate &C1 = **It;
-
- // If C1 was already pruned, or its function is no longer beneficial for
- // outlining, move to the next candidate.
- if (ShouldSkipCandidate(C1))
- continue;
-
- // The minimum start index of any candidate that could overlap with this
- // one.
- unsigned FarthestPossibleIdx = 0;
-
- // Either the index is 0, or it's at most MaxCandidateLen indices away.
- if (C1.getStartIdx() > MaxCandidateLen)
- FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen;
-
- // Compare against the candidates in the list that start at most
- // FarthestPossibleIdx indices away from C1. There are at most
- // MaxCandidateLen of these.
- for (auto Sit = It + 1; Sit != Et; Sit++) {
- Candidate &C2 = **Sit;
-
- // Is this candidate too far away to overlap?
- if (C2.getStartIdx() < FarthestPossibleIdx)
- break;
-
- // If C2 was already pruned, or its function is no longer beneficial for
- // outlining, move to the next candidate.
- if (ShouldSkipCandidate(C2))
- continue;
-
- // Do C1 and C2 overlap?
- //
- // Not overlapping:
- // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
- //
- // We sorted our candidate list so C2Start <= C1Start. We know that
- // C2End > C2Start since each candidate has length >= 2. Therefore, all we
- // have to check is C2End < C2Start to see if we overlap.
- if (C2.getEndIdx() < C1.getStartIdx())
- continue;
-
- // C1 and C2 overlap.
- // We need to choose the better of the two.
- //
- // Approximate this by picking the one which would have saved us the
- // most instructions before any pruning.
-
- // Is C2 a better candidate?
- if (C2.Benefit > C1.Benefit) {
- // Yes, so prune C1. Since C1 is dead, we don't have to compare it
- // against anything anymore, so break.
- prune(C1, FunctionList);
- break;
- }
-
- // Prune C2 and move on to the next candidate.
- prune(C2, FunctionList);
- }
}
}
-unsigned MachineOutliner::buildCandidateList(
- std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
- InstructionMapper &Mapper) {
-
- std::vector<unsigned> CandidateSequence; // Current outlining candidate.
- unsigned MaxCandidateLen = 0; // Length of the longest candidate.
-
- MaxCandidateLen =
- findCandidates(ST, Mapper, CandidateList, FunctionList);
-
- // Sort the candidates in decending order. This will simplify the outlining
- // process when we have to remove the candidates from the mapping by
- // allowing us to cut them out without keeping track of an offset.
- std::stable_sort(
- CandidateList.begin(), CandidateList.end(),
- [](const std::shared_ptr<Candidate> &LHS,
- const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; });
-
- return MaxCandidateLen;
-}
-
MachineFunction *
-MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
- InstructionMapper &Mapper) {
+MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
+ InstructionMapper &Mapper,
+ unsigned Name) {
// Create the function name. This should be unique. For now, just hash the
// module name and include it in the function name plus the number of this
// function.
std::ostringstream NameStream;
- NameStream << "OUTLINED_FUNCTION_" << OF.Name;
+ // FIXME: We should have a better naming scheme. This should be stable,
+ // regardless of changes to the outliner's cost model/traversal order.
+ NameStream << "OUTLINED_FUNCTION_" << Name;
// Create the function using an IR-level function.
LLVMContext &C = M.getContext();
@@ -1176,8 +1122,14 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
F->addFnAttr(Attribute::OptimizeForSize);
F->addFnAttr(Attribute::MinSize);
- // Save F so that we can add debug info later if we need to.
- CreatedIRFunctions.push_back(F);
+ // Include target features from an arbitrary candidate for the outlined
+ // function. This makes sure the outlined function knows what kinds of
+ // instructions are going into it. This is fine, since all parent functions
+ // must necessarily support the instructions that are in the outlined region.
+ Candidate &FirstCand = OF.Candidates.front();
+ const Function &ParentFn = FirstCand.getMF()->getFunction();
+ if (ParentFn.hasFnAttribute("target-features"))
+ F->addFnAttr(ParentFn.getFnAttribute("target-features"));
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
@@ -1192,12 +1144,10 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
- // Copy over the instructions for the function using the integer mappings in
- // its sequence.
- for (unsigned Str : OF.Sequence) {
- MachineInstr *NewMI =
- MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
- NewMI->dropMemRefs();
+ for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
+ ++I) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ NewMI->dropMemRefs(MF);
// Don't keep debug information for outlined instructions.
NewMI->setDebugLoc(DebugLoc());
@@ -1206,6 +1156,10 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
TII.buildOutlinedFrame(MBB, MF, OF);
+ // Outlined functions shouldn't preserve liveness.
+ MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
+ MF.getRegInfo().freezeReservedRegs(MF);
+
// If there's a DISubprogram associated with this outlined function, then
// emit debug info for the outlined function.
if (DISubprogram *SP = getSubprogramOrNull(OF)) {
@@ -1214,118 +1168,127 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
DIBuilder DB(M, true, CU);
DIFile *Unit = SP->getFile();
Mangler Mg;
-
- // Walk over each IR function we created in the outliner and create
- // DISubprograms for each function.
- for (Function *F : CreatedIRFunctions) {
- // Get the mangled name of the function for the linkage name.
- std::string Dummy;
- llvm::raw_string_ostream MangledNameStream(Dummy);
- Mg.getNameWithPrefix(MangledNameStream, F, false);
-
- DISubprogram *SP = DB.createFunction(
- Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
- Unit /* File */,
- 0 /* Line 0 is reserved for compiler-generated code. */,
- DB.createSubroutineType(
- DB.getOrCreateTypeArray(None)), /* void type */
- false, true, 0, /* Line 0 is reserved for compiler-generated code. */
- DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
- true /* Outlined code is optimized code by definition. */);
-
- // Don't add any new variables to the subprogram.
- DB.finalizeSubprogram(SP);
-
- // Attach subprogram to the function.
- F->setSubprogram(SP);
- }
-
+ // Get the mangled name of the function for the linkage name.
+ std::string Dummy;
+ llvm::raw_string_ostream MangledNameStream(Dummy);
+ Mg.getNameWithPrefix(MangledNameStream, F, false);
+
+ DISubprogram *OutlinedSP = DB.createFunction(
+ Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
+ Unit /* File */,
+ 0 /* Line 0 is reserved for compiler-generated code. */,
+ DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
+ 0, /* Line 0 is reserved for compiler-generated code. */
+ DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
+ /* Outlined code is optimized code by definition. */
+ DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
+
+ // Don't add any new variables to the subprogram.
+ DB.finalizeSubprogram(OutlinedSP);
+
+ // Attach subprogram to the function.
+ F->setSubprogram(OutlinedSP);
// We're done with the DIBuilder.
DB.finalize();
}
- // Outlined functions shouldn't preserve liveness.
- MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
- MF.getRegInfo().freezeReservedRegs(MF);
return &MF;
}
-bool MachineOutliner::outline(
- Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) {
+bool MachineOutliner::outline(Module &M,
+ std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper) {
bool OutlinedSomething = false;
- // Replace the candidates with calls to their respective outlined functions.
- for (const std::shared_ptr<Candidate> &Cptr : CandidateList) {
- Candidate &C = *Cptr;
- // Was the candidate removed during pruneOverlaps?
- if (!C.InCandidateList)
- continue;
- // If not, then look at its OutlinedFunction.
- OutlinedFunction &OF = FunctionList[C.FunctionIdx];
+ // Number to append to the current outlined function.
+ unsigned OutlinedFunctionNum = 0;
- // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
+ // Sort by benefit. The most beneficial functions should be outlined first.
+ std::stable_sort(
+ FunctionList.begin(), FunctionList.end(),
+ [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
+ return LHS.getBenefit() > RHS.getBenefit();
+ });
+
+ // Walk over each function, outlining them as we go along. Functions are
+ // outlined greedily, based off the sort above.
+ for (OutlinedFunction &OF : FunctionList) {
+ // If we outlined something that overlapped with a candidate in a previous
+ // step, then we can't outline from it.
+ erase_if(OF.Candidates, [&Mapper](Candidate &C) {
+ return std::any_of(
+ Mapper.UnsignedVec.begin() + C.getStartIdx(),
+ Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
+ [](unsigned I) { return (I == static_cast<unsigned>(-1)); });
+ });
+
+ // If we made it unbeneficial to outline this function, skip it.
if (OF.getBenefit() < 1)
continue;
- // Does this candidate have a function yet?
- if (!OF.MF) {
- OF.MF = createOutlinedFunction(M, OF, Mapper);
- emitOutlinedFunctionRemark(OF);
- FunctionsCreated++;
- }
-
+ // It's beneficial. Create the function and outline its sequence's
+ // occurrences.
+ OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
+ emitOutlinedFunctionRemark(OF);
+ FunctionsCreated++;
+ OutlinedFunctionNum++; // Created a function, move to the next name.
MachineFunction *MF = OF.MF;
- MachineBasicBlock &MBB = *C.getMBB();
- MachineBasicBlock::iterator StartIt = C.front();
- MachineBasicBlock::iterator EndIt = C.back();
- assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!");
- assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!");
-
const TargetSubtargetInfo &STI = MF->getSubtarget();
const TargetInstrInfo &TII = *STI.getInstrInfo();
- // Insert a call to the new function and erase the old sequence.
- auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C);
-
- // If the caller tracks liveness, then we need to make sure that anything
- // we outline doesn't break liveness assumptions.
- // The outlined functions themselves currently don't track liveness, but
- // we should make sure that the ranges we yank things out of aren't
- // wrong.
- if (MBB.getParent()->getProperties().hasProperty(
- MachineFunctionProperties::Property::TracksLiveness)) {
- // Helper lambda for adding implicit def operands to the call instruction.
- auto CopyDefs = [&CallInst](MachineInstr &MI) {
- for (MachineOperand &MOP : MI.operands()) {
- // Skip over anything that isn't a register.
- if (!MOP.isReg())
- continue;
-
- // If it's a def, add it to the call instruction.
- if (MOP.isDef())
- CallInst->addOperand(
- MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */
- true /* isImp = true */));
- }
- };
+ // Replace occurrences of the sequence with calls to the new function.
+ for (Candidate &C : OF.Candidates) {
+ MachineBasicBlock &MBB = *C.getMBB();
+ MachineBasicBlock::iterator StartIt = C.front();
+ MachineBasicBlock::iterator EndIt = C.back();
+
+ // Insert the call.
+ auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
+
+ // If the caller tracks liveness, then we need to make sure that
+ // anything we outline doesn't break liveness assumptions. The outlined
+ // functions themselves currently don't track liveness, but we should
+ // make sure that the ranges we yank things out of aren't wrong.
+ if (MBB.getParent()->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness)) {
+ // Helper lambda for adding implicit def operands to the call
+ // instruction.
+ auto CopyDefs = [&CallInst](MachineInstr &MI) {
+ for (MachineOperand &MOP : MI.operands()) {
+ // Skip over anything that isn't a register.
+ if (!MOP.isReg())
+ continue;
+
+ // If it's a def, add it to the call instruction.
+ if (MOP.isDef())
+ CallInst->addOperand(MachineOperand::CreateReg(
+ MOP.getReg(), true, /* isDef = true */
+ true /* isImp = true */));
+ }
+ };
+ // Copy over the defs in the outlined range.
+ // First inst in outlined range <-- Anything that's defined in this
+ // ... .. range has to be added as an
+ // implicit Last inst in outlined range <-- def to the call
+ // instruction.
+ std::for_each(CallInst, std::next(EndIt), CopyDefs);
+ }
- // Copy over the defs in the outlined range.
- // First inst in outlined range <-- Anything that's defined in this
- // ... .. range has to be added as an implicit
- // Last inst in outlined range <-- def to the call instruction.
- std::for_each(CallInst, std::next(EndIt), CopyDefs);
- }
+ // Erase from the point after where the call was inserted up to, and
+ // including, the final instruction in the sequence.
+ // Erase needs one past the end, so we need std::next there too.
+ MBB.erase(std::next(StartIt), std::next(EndIt));
- // Erase from the point after where the call was inserted up to, and
- // including, the final instruction in the sequence.
- // Erase needs one past the end, so we need std::next there too.
- MBB.erase(std::next(StartIt), std::next(EndIt));
- OutlinedSomething = true;
+ // Keep track of what we removed by marking them all as -1.
+ std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(),
+ Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
+ [](unsigned &I) { I = static_cast<unsigned>(-1); });
+ OutlinedSomething = true;
- // Statistics.
- NumOutlined++;
+ // Statistics.
+ NumOutlined++;
+ }
}
LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
@@ -1333,34 +1296,8 @@ bool MachineOutliner::outline(
return OutlinedSomething;
}
-bool MachineOutliner::runOnModule(Module &M) {
- // Check if there's anything in the module. If it's empty, then there's
- // nothing to outline.
- if (M.empty())
- return false;
-
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
-
- // If the user passed -enable-machine-outliner=always or
- // -enable-machine-outliner, the pass will run on all functions in the module.
- // Otherwise, if the target supports default outlining, it will run on all
- // functions deemed by the target to be worth outlining from by default. Tell
- // the user how the outliner is running.
- LLVM_DEBUG(
- dbgs() << "Machine Outliner: Running on ";
- if (RunOnAllFunctions)
- dbgs() << "all functions";
- else
- dbgs() << "target-default functions";
- dbgs() << "\n"
- );
-
- // If the user specifies that they want to outline from linkonceodrs, set
- // it here.
- OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
-
- InstructionMapper Mapper;
-
+void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
+ MachineModuleInfo &MMI) {
// Build instruction mappings for each function in the module. Start by
// iterating over each Function in M.
for (Function &F : M) {
@@ -1395,7 +1332,11 @@ bool MachineOutliner::runOnModule(Module &M) {
for (MachineBasicBlock &MBB : *MF) {
// If there isn't anything in MBB, then there's no point in outlining from
// it.
- if (MBB.empty())
+ // If there are fewer than 2 instructions in the MBB, then it can't ever
+ // contain something worth outlining.
+ // FIXME: This should be based off of the maximum size in B of an outlined
+ // call versus the size in B of the MBB.
+ if (MBB.empty() || MBB.size() < 2)
continue;
// Check if MBB could be the target of an indirect branch. If it is, then
@@ -1407,21 +1348,133 @@ bool MachineOutliner::runOnModule(Module &M) {
Mapper.convertToUnsignedVec(MBB, *TII);
}
}
+}
- // Construct a suffix tree, use it to find candidates, and then outline them.
- SuffixTree ST(Mapper.UnsignedVec);
- std::vector<std::shared_ptr<Candidate>> CandidateList;
+void MachineOutliner::initSizeRemarkInfo(
+ const Module &M, const MachineModuleInfo &MMI,
+ StringMap<unsigned> &FunctionToInstrCount) {
+ // Collect instruction counts for every function. We'll use this to emit
+ // per-function size remarks later.
+ for (const Function &F : M) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+
+ // We only care about MI counts here. If there's no MachineFunction at this
+ // point, then there won't be after the outliner runs, so let's move on.
+ if (!MF)
+ continue;
+ FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
+ }
+}
+
+void MachineOutliner::emitInstrCountChangedRemark(
+ const Module &M, const MachineModuleInfo &MMI,
+ const StringMap<unsigned> &FunctionToInstrCount) {
+ // Iterate over each function in the module and emit remarks.
+ // Note that we won't miss anything by doing this, because the outliner never
+ // deletes functions.
+ for (const Function &F : M) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+
+ // The outliner never deletes functions. If we don't have a MF here, then we
+ // didn't have one prior to outlining either.
+ if (!MF)
+ continue;
+
+ std::string Fname = F.getName();
+ unsigned FnCountAfter = MF->getInstructionCount();
+ unsigned FnCountBefore = 0;
+
+ // Check if the function was recorded before.
+ auto It = FunctionToInstrCount.find(Fname);
+
+ // Did we have a previously-recorded size? If yes, then set FnCountBefore
+ // to that.
+ if (It != FunctionToInstrCount.end())
+ FnCountBefore = It->second;
+
+ // Compute the delta and emit a remark if there was a change.
+ int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
+ static_cast<int64_t>(FnCountBefore);
+ if (FnDelta == 0)
+ continue;
+
+ MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
+ DiagnosticLocation(),
+ &MF->front());
+ R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
+ << ": Function: "
+ << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
+ << ": MI instruction count changed from "
+ << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
+ FnCountBefore)
+ << " to "
+ << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
+ FnCountAfter)
+ << "; Delta: "
+ << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
+ return R;
+ });
+ }
+}
+
+bool MachineOutliner::runOnModule(Module &M) {
+ // Check if there's anything in the module. If it's empty, then there's
+ // nothing to outline.
+ if (M.empty())
+ return false;
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+
+ // If the user passed -enable-machine-outliner=always or
+ // -enable-machine-outliner, the pass will run on all functions in the module.
+ // Otherwise, if the target supports default outlining, it will run on all
+ // functions deemed by the target to be worth outlining from by default. Tell
+ // the user how the outliner is running.
+ LLVM_DEBUG(
+ dbgs() << "Machine Outliner: Running on ";
+ if (RunOnAllFunctions)
+ dbgs() << "all functions";
+ else
+ dbgs() << "target-default functions";
+ dbgs() << "\n"
+ );
+
+ // If the user specifies that they want to outline from linkonceodrs, set
+ // it here.
+ OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
+ InstructionMapper Mapper;
+
+ // Prepare instruction mappings for the suffix tree.
+ populateMapper(Mapper, M, MMI);
std::vector<OutlinedFunction> FunctionList;
// Find all of the outlining candidates.
- unsigned MaxCandidateLen =
- buildCandidateList(CandidateList, FunctionList, ST, Mapper);
-
- // Remove candidates that overlap with other candidates.
- pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen);
+ findCandidates(Mapper, FunctionList);
+
+ // If we've requested size remarks, then collect the MI counts of every
+ // function before outlining, and the MI counts after outlining.
+ // FIXME: This shouldn't be in the outliner at all; it should ultimately be
+ // the pass manager's responsibility.
+ // This could pretty easily be placed in outline instead, but because we
+ // really ultimately *don't* want this here, it's done like this for now
+ // instead.
+
+ // Check if we want size remarks.
+ bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
+ StringMap<unsigned> FunctionToInstrCount;
+ if (ShouldEmitSizeRemarks)
+ initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
// Outline each of the candidates and return true if something was outlined.
- bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper);
+ bool OutlinedSomething = outline(M, FunctionList, Mapper);
+
+ // If we outlined something, we definitely changed the MI count of the
+ // module. If we've asked for size remarks, then output them.
+ // FIXME: This should be in the pass manager.
+ if (ShouldEmitSizeRemarks && OutlinedSomething)
+ emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount);
return OutlinedSomething;
}
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
deleted file mode 100644
index 3ee3e40b27e2..000000000000
--- a/lib/CodeGen/MachinePassRegistry.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the machine function pass registry for register allocators
-// and instruction schedulers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachinePassRegistry.h"
-
-using namespace llvm;
-
-void MachinePassRegistryListener::anchor() { }
-
-/// setDefault - Set the default constructor by name.
-void MachinePassRegistry::setDefault(StringRef Name) {
- MachinePassCtor Ctor = nullptr;
- for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) {
- if (R->getName() == Name) {
- Ctor = R->getCtor();
- break;
- }
- }
- assert(Ctor && "Unregistered pass name");
- setDefault(Ctor);
-}
-
-/// Add - Adds a function pass to the registration list.
-///
-void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
- Node->setNext(List);
- List = Node;
- if (Listener) Listener->NotifyAdd(Node->getName(),
- Node->getCtor(),
- Node->getDescription());
-}
-
-
-/// Remove - Removes a function pass from the registration list.
-///
-void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
- for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
- if (*I == Node) {
- if (Listener) Listener->NotifyRemove(Node->getName());
- *I = (*I)->getNext();
- break;
- }
- }
-}
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 9bb00aaef86d..4d451bdd7f69 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -9,34 +9,6 @@
//
// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
//
-// Software pipelining (SWP) is an instruction scheduling technique for loops
-// that overlap loop iterations and exploits ILP via a compiler transformation.
-//
-// Swing Modulo Scheduling is an implementation of software pipelining
-// that generates schedules that are near optimal in terms of initiation
-// interval, register requirements, and stage count. See the papers:
-//
-// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
-// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
-// Conference on Parallel Architectures and Compilation Techiniques.
-//
-// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
-// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
-// Transactions on Computers, Vol. 50, No. 3, 2001.
-//
-// "An Implementation of Swing Modulo Scheduling With Extensions for
-// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
-// Urbana-Chambpain, 2005.
-//
-//
-// The SMS algorithm consists of three main steps after computing the minimal
-// initiation interval (MII).
-// 1) Analyze the dependence graph and compute information about each
-// instruction in the graph.
-// 2) Order the nodes (instructions) by priority based upon the heuristics
-// described in the algorithm.
-// 3) Attempt to schedule the nodes in the specified order using the MII.
-//
// This SMS implementation is a target-independent back-end pass. When enabled,
// the pass runs just prior to the register allocation pass, while the machine
// IR is in SSA form. If software pipelining is successful, then the original
@@ -83,13 +55,11 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePipeliner.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -171,552 +141,15 @@ static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
cl::ReallyHidden, cl::init(false),
cl::ZeroOrMore, cl::desc("Ignore RecMII"));
-namespace {
-
-class NodeSet;
-class SMSchedule;
-
-/// The main class in the implementation of the target independent
-/// software pipeliner pass.
-class MachinePipeliner : public MachineFunctionPass {
-public:
- MachineFunction *MF = nullptr;
- const MachineLoopInfo *MLI = nullptr;
- const MachineDominatorTree *MDT = nullptr;
- const InstrItineraryData *InstrItins;
- const TargetInstrInfo *TII = nullptr;
- RegisterClassInfo RegClassInfo;
-
-#ifndef NDEBUG
- static int NumTries;
-#endif
-
- /// Cache the target analysis information about the loop.
- struct LoopInfo {
- MachineBasicBlock *TBB = nullptr;
- MachineBasicBlock *FBB = nullptr;
- SmallVector<MachineOperand, 4> BrCond;
- MachineInstr *LoopInductionVar = nullptr;
- MachineInstr *LoopCompare = nullptr;
- };
- LoopInfo LI;
-
- static char ID;
-
- MachinePipeliner() : MachineFunctionPass(ID) {
- initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<MachineLoopInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<LiveIntervals>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
-private:
- void preprocessPhiNodes(MachineBasicBlock &B);
- bool canPipelineLoop(MachineLoop &L);
- bool scheduleLoop(MachineLoop &L);
- bool swingModuloScheduler(MachineLoop &L);
-};
-
-/// This class builds the dependence graph for the instructions in a loop,
-/// and attempts to schedule the instructions using the SMS algorithm.
-class SwingSchedulerDAG : public ScheduleDAGInstrs {
- MachinePipeliner &Pass;
- /// The minimum initiation interval between iterations for this schedule.
- unsigned MII = 0;
- /// Set to true if a valid pipelined schedule is found for the loop.
- bool Scheduled = false;
- MachineLoop &Loop;
- LiveIntervals &LIS;
- const RegisterClassInfo &RegClassInfo;
-
- /// A toplogical ordering of the SUnits, which is needed for changing
- /// dependences and iterating over the SUnits.
- ScheduleDAGTopologicalSort Topo;
-
- struct NodeInfo {
- int ASAP = 0;
- int ALAP = 0;
- int ZeroLatencyDepth = 0;
- int ZeroLatencyHeight = 0;
-
- NodeInfo() = default;
- };
- /// Computed properties for each node in the graph.
- std::vector<NodeInfo> ScheduleInfo;
-
- enum OrderKind { BottomUp = 0, TopDown = 1 };
- /// Computed node ordering for scheduling.
- SetVector<SUnit *> NodeOrder;
-
- using NodeSetType = SmallVector<NodeSet, 8>;
- using ValueMapTy = DenseMap<unsigned, unsigned>;
- using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
- using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
-
- /// Instructions to change when emitting the final schedule.
- DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
-
- /// We may create a new instruction, so remember it because it
- /// must be deleted when the pass is finished.
- SmallPtrSet<MachineInstr *, 4> NewMIs;
-
- /// Ordered list of DAG postprocessing steps.
- std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
-
- /// Helper class to implement Johnson's circuit finding algorithm.
- class Circuits {
- std::vector<SUnit> &SUnits;
- SetVector<SUnit *> Stack;
- BitVector Blocked;
- SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
- SmallVector<SmallVector<int, 4>, 16> AdjK;
- unsigned NumPaths;
- static unsigned MaxPaths;
-
- public:
- Circuits(std::vector<SUnit> &SUs)
- : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
-
- /// Reset the data structures used in the circuit algorithm.
- void reset() {
- Stack.clear();
- Blocked.reset();
- B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
- NumPaths = 0;
- }
-
- void createAdjacencyStructure(SwingSchedulerDAG *DAG);
- bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
- void unblock(int U);
- };
-
-public:
- SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
- const RegisterClassInfo &rci)
- : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
- RegClassInfo(rci), Topo(SUnits, &ExitSU) {
- P.MF->getSubtarget().getSMSMutations(Mutations);
- }
-
- void schedule() override;
- void finishBlock() override;
-
- /// Return true if the loop kernel has been scheduled.
- bool hasNewSchedule() { return Scheduled; }
-
- /// Return the earliest time an instruction may be scheduled.
- int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
-
- /// Return the latest time an instruction my be scheduled.
- int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
-
- /// The mobility function, which the number of slots in which
- /// an instruction may be scheduled.
- int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
-
- /// The depth, in the dependence graph, for a node.
- unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
-
- /// The maximum unweighted length of a path from an arbitrary node to the
- /// given node in which each edge has latency 0
- int getZeroLatencyDepth(SUnit *Node) {
- return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
- }
-
- /// The height, in the dependence graph, for a node.
- unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
-
- /// The maximum unweighted length of a path from the given node to an
- /// arbitrary node in which each edge has latency 0
- int getZeroLatencyHeight(SUnit *Node) {
- return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
- }
-
- /// Return true if the dependence is a back-edge in the data dependence graph.
- /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
- /// using an anti dependence from a Phi to an instruction.
- bool isBackedge(SUnit *Source, const SDep &Dep) {
- if (Dep.getKind() != SDep::Anti)
- return false;
- return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
- }
-
- bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
-
- /// The distance function, which indicates that operation V of iteration I
- /// depends on operations U of iteration I-distance.
- unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
- // Instructions that feed a Phi have a distance of 1. Computing larger
- // values for arrays requires data dependence information.
- if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
- return 1;
- return 0;
- }
-
- /// Set the Minimum Initiation Interval for this schedule attempt.
- void setMII(unsigned mii) { MII = mii; }
-
- void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
-
- void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
-
- /// Return the new base register that was stored away for the changed
- /// instruction.
- unsigned getInstrBaseReg(SUnit *SU) {
- DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
- InstrChanges.find(SU);
- if (It != InstrChanges.end())
- return It->second.first;
- return 0;
- }
-
- void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
- Mutations.push_back(std::move(Mutation));
- }
-
-private:
- void addLoopCarriedDependences(AliasAnalysis *AA);
- void updatePhiDependences();
- void changeDependences();
- unsigned calculateResMII();
- unsigned calculateRecMII(NodeSetType &RecNodeSets);
- void findCircuits(NodeSetType &NodeSets);
- void fuseRecs(NodeSetType &NodeSets);
- void removeDuplicateNodes(NodeSetType &NodeSets);
- void computeNodeFunctions(NodeSetType &NodeSets);
- void registerPressureFilter(NodeSetType &NodeSets);
- void colocateNodeSets(NodeSetType &NodeSets);
- void checkNodeSets(NodeSetType &NodeSets);
- void groupRemainingNodes(NodeSetType &NodeSets);
- void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
- SetVector<SUnit *> &NodesAdded);
- void computeNodeOrder(NodeSetType &NodeSets);
- void checkValidNodeOrder(const NodeSetType &Circuits) const;
- bool schedulePipeline(SMSchedule &Schedule);
- void generatePipelinedLoop(SMSchedule &Schedule);
- void generateProlog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
- MBBVectorTy &PrologBBs);
- void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
- MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
- void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
- MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum,
- unsigned CurStageNum, bool IsLast);
- void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
- MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum,
- unsigned CurStageNum, bool IsLast);
- void removeDeadInstructions(MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs);
- void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule);
- void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
- ValueMapTy *VRMap);
- bool computeDelta(MachineInstr &MI, unsigned &Delta);
- void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
- unsigned Num);
- MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
- unsigned InstStageNum);
- MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
- unsigned InstStageNum,
- SMSchedule &Schedule);
- void updateInstruction(MachineInstr *NewMI, bool LastDef,
- unsigned CurStageNum, unsigned InstrStageNum,
- SMSchedule &Schedule, ValueMapTy *VRMap);
- MachineInstr *findDefInLoop(unsigned Reg);
- unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
- unsigned LoopStage, ValueMapTy *VRMap,
- MachineBasicBlock *BB);
- void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap);
- void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
- InstrMapTy &InstrMap, unsigned CurStageNum,
- unsigned PhiNum, MachineInstr *Phi,
- unsigned OldReg, unsigned NewReg,
- unsigned PrevReg = 0);
- bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
- unsigned &OffsetPos, unsigned &NewBase,
- int64_t &NewOffset);
- void postprocessDAG();
-};
-
-/// A NodeSet contains a set of SUnit DAG nodes with additional information
-/// that assigns a priority to the set.
-class NodeSet {
- SetVector<SUnit *> Nodes;
- bool HasRecurrence = false;
- unsigned RecMII = 0;
- int MaxMOV = 0;
- unsigned MaxDepth = 0;
- unsigned Colocate = 0;
- SUnit *ExceedPressure = nullptr;
- unsigned Latency = 0;
-
-public:
- using iterator = SetVector<SUnit *>::const_iterator;
-
- NodeSet() = default;
- NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
- Latency = 0;
- for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
- for (const SDep &Succ : Nodes[i]->Succs)
- if (Nodes.count(Succ.getSUnit()))
- Latency += Succ.getLatency();
- }
-
- bool insert(SUnit *SU) { return Nodes.insert(SU); }
-
- void insert(iterator S, iterator E) { Nodes.insert(S, E); }
-
- template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
- return Nodes.remove_if(P);
- }
-
- unsigned count(SUnit *SU) const { return Nodes.count(SU); }
-
- bool hasRecurrence() { return HasRecurrence; };
-
- unsigned size() const { return Nodes.size(); }
-
- bool empty() const { return Nodes.empty(); }
-
- SUnit *getNode(unsigned i) const { return Nodes[i]; };
-
- void setRecMII(unsigned mii) { RecMII = mii; };
-
- void setColocate(unsigned c) { Colocate = c; };
-
- void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
-
- bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
-
- int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
-
- int getRecMII() { return RecMII; }
-
- /// Summarize node functions for the entire node set.
- void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
- for (SUnit *SU : *this) {
- MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
- MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
- }
- }
-
- unsigned getLatency() { return Latency; }
-
- unsigned getMaxDepth() { return MaxDepth; }
-
- void clear() {
- Nodes.clear();
- RecMII = 0;
- HasRecurrence = false;
- MaxMOV = 0;
- MaxDepth = 0;
- Colocate = 0;
- ExceedPressure = nullptr;
- }
-
- operator SetVector<SUnit *> &() { return Nodes; }
-
- /// Sort the node sets by importance. First, rank them by recurrence MII,
- /// then by mobility (least mobile done first), and finally by depth.
- /// Each node set may contain a colocate value which is used as the first
- /// tie breaker, if it's set.
- bool operator>(const NodeSet &RHS) const {
- if (RecMII == RHS.RecMII) {
- if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
- return Colocate < RHS.Colocate;
- if (MaxMOV == RHS.MaxMOV)
- return MaxDepth > RHS.MaxDepth;
- return MaxMOV < RHS.MaxMOV;
- }
- return RecMII > RHS.RecMII;
- }
-
- bool operator==(const NodeSet &RHS) const {
- return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
- MaxDepth == RHS.MaxDepth;
- }
-
- bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
-
- iterator begin() { return Nodes.begin(); }
- iterator end() { return Nodes.end(); }
-
- void print(raw_ostream &os) const {
- os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
- << " depth " << MaxDepth << " col " << Colocate << "\n";
- for (const auto &I : Nodes)
- os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
- os << "\n";
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
-#endif
-};
-
-/// This class represents the scheduled code. The main data structure is a
-/// map from scheduled cycle to instructions. During scheduling, the
-/// data structure explicitly represents all stages/iterations. When
-/// the algorithm finshes, the schedule is collapsed into a single stage,
-/// which represents instructions from different loop iterations.
-///
-/// The SMS algorithm allows negative values for cycles, so the first cycle
-/// in the schedule is the smallest cycle value.
-class SMSchedule {
-private:
- /// Map from execution cycle to instructions.
- DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
-
- /// Map from instruction to execution cycle.
- std::map<SUnit *, int> InstrToCycle;
-
- /// Map for each register and the max difference between its uses and def.
- /// The first element in the pair is the max difference in stages. The
- /// second is true if the register defines a Phi value and loop value is
- /// scheduled before the Phi.
- std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
-
- /// Keep track of the first cycle value in the schedule. It starts
- /// as zero, but the algorithm allows negative values.
- int FirstCycle = 0;
-
- /// Keep track of the last cycle value in the schedule.
- int LastCycle = 0;
-
- /// The initiation interval (II) for the schedule.
- int InitiationInterval = 0;
-
- /// Target machine information.
- const TargetSubtargetInfo &ST;
-
- /// Virtual register information.
- MachineRegisterInfo &MRI;
-
- std::unique_ptr<DFAPacketizer> Resources;
-
-public:
- SMSchedule(MachineFunction *mf)
- : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
- Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
-
- void reset() {
- ScheduledInstrs.clear();
- InstrToCycle.clear();
- RegToStageDiff.clear();
- FirstCycle = 0;
- LastCycle = 0;
- InitiationInterval = 0;
- }
-
- /// Set the initiation interval for this schedule.
- void setInitiationInterval(int ii) { InitiationInterval = ii; }
-
- /// Return the first cycle in the completed schedule. This
- /// can be a negative value.
- int getFirstCycle() const { return FirstCycle; }
-
- /// Return the last cycle in the finalized schedule.
- int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
-
- /// Return the cycle of the earliest scheduled instruction in the dependence
- /// chain.
- int earliestCycleInChain(const SDep &Dep);
-
- /// Return the cycle of the latest scheduled instruction in the dependence
- /// chain.
- int latestCycleInChain(const SDep &Dep);
+namespace llvm {
- void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
- int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
- bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
+// A command line option to enable the CopyToPhi DAG mutation.
+cl::opt<bool>
+ SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
+ cl::init(true), cl::ZeroOrMore,
+ cl::desc("Enable CopyToPhi DAG Mutation"));
- /// Iterators for the cycle to instruction map.
- using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
- using const_sched_iterator =
- DenseMap<int, std::deque<SUnit *>>::const_iterator;
-
- /// Return true if the instruction is scheduled at the specified stage.
- bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
- return (stageScheduled(SU) == (int)StageNum);
- }
-
- /// Return the stage for a scheduled instruction. Return -1 if
- /// the instruction has not been scheduled.
- int stageScheduled(SUnit *SU) const {
- std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
- if (it == InstrToCycle.end())
- return -1;
- return (it->second - FirstCycle) / InitiationInterval;
- }
-
- /// Return the cycle for a scheduled instruction. This function normalizes
- /// the first cycle to be 0.
- unsigned cycleScheduled(SUnit *SU) const {
- std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
- assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
- return (it->second - FirstCycle) % InitiationInterval;
- }
-
- /// Return the maximum stage count needed for this schedule.
- unsigned getMaxStageCount() {
- return (LastCycle - FirstCycle) / InitiationInterval;
- }
-
- /// Return the max. number of stages/iterations that can occur between a
- /// register definition and its uses.
- unsigned getStagesForReg(int Reg, unsigned CurStage) {
- std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
- if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
- return 1;
- return Stages.first;
- }
-
- /// The number of stages for a Phi is a little different than other
- /// instructions. The minimum value computed in RegToStageDiff is 1
- /// because we assume the Phi is needed for at least 1 iteration.
- /// This is not the case if the loop value is scheduled prior to the
- /// Phi in the same stage. This function returns the number of stages
- /// or iterations needed between the Phi definition and any uses.
- unsigned getStagesForPhi(int Reg) {
- std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
- if (Stages.second)
- return Stages.first;
- return Stages.first - 1;
- }
-
- /// Return the instructions that are scheduled at the specified cycle.
- std::deque<SUnit *> &getInstructions(int cycle) {
- return ScheduledInstrs[cycle];
- }
-
- bool isValidSchedule(SwingSchedulerDAG *SSD);
- void finalizeSchedule(SwingSchedulerDAG *SSD);
- void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
- std::deque<SUnit *> &Insts);
- bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
- bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
- MachineOperand &MO);
- void print(raw_ostream &os) const;
- void dump() const;
-};
-
-} // end anonymous namespace
+} // end namespace llvm
unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
char MachinePipeliner::ID = 0;
@@ -884,12 +317,9 @@ void SwingSchedulerDAG::schedule() {
addLoopCarriedDependences(AA);
updatePhiDependences();
Topo.InitDAGTopologicalSorting();
- postprocessDAG();
changeDependences();
- LLVM_DEBUG({
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this);
- });
+ postprocessDAG();
+ LLVM_DEBUG(dump());
NodeSetType NodeSets;
findCircuits(NodeSets);
@@ -1101,11 +531,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
// First, perform the cheaper check that compares the base register.
// If they are the same and the load offset is less than the store
// offset, then mark the dependence as loop carried potentially.
- unsigned BaseReg1, BaseReg2;
+ MachineOperand *BaseOp1, *BaseOp2;
int64_t Offset1, Offset2;
- if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) &&
- TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
- if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+ if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
+ TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
+ if (BaseOp1->isIdenticalTo(*BaseOp2) &&
+ (int)Offset1 < (int)Offset2) {
assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
"What happened to the chain edge?");
SDep Dep(Load, SDep::Barrier);
@@ -1139,9 +570,9 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
continue;
}
AliasResult AAResult = AA->alias(
- MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
+ MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
MMO1->getAAInfo()),
- MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
+ MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
MMO2->getAAInfo()));
if (AAResult != NoAlias) {
@@ -1298,6 +729,7 @@ void SwingSchedulerDAG::changeDependences() {
// Add a dependence between the new instruction and the instruction
// that defines the new base.
SDep Dep(&I, SDep::Anti, NewBase);
+ Topo.AddPred(LastSU, &I);
LastSU->addPred(Dep);
// Remember the base and offset information so that we can update the
@@ -1509,9 +941,9 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
}
OutputDeps[N] = BackEdge;
}
- // Do not process a boundary node and a back-edge is processed only
- // if it goes to a Phi.
- if (SI.getSUnit()->isBoundaryNode() ||
+ // Do not process a boundary node, an artificial node.
+ // A back-edge is processed only if it goes to a Phi.
+ if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
(SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
continue;
int N = SI.getSUnit()->NodeNum;
@@ -1535,7 +967,7 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
}
}
}
- // Add back-eges in the adjacency matrix for the output dependences.
+ // Add back-edges in the adjacency matrix for the output dependences.
for (auto &OD : OutputDeps)
if (!Added.test(OD.second)) {
AdjK[OD.first].push_back(OD.second);
@@ -1564,7 +996,8 @@ bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
++NumPaths;
break;
} else if (!Blocked.test(W)) {
- if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
+ if (circuit(W, S, NodeSets,
+ Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
F = true;
}
}
@@ -1604,7 +1037,7 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
// but we do this to find the circuits, and then change them back.
swapAntiDependences(SUnits);
- Circuits Cir(SUnits);
+ Circuits Cir(SUnits, Topo);
// Create the adjacency structure.
Cir.createAdjacencyStructure(this);
for (int i = 0, e = SUnits.size(); i != e; ++i) {
@@ -1616,6 +1049,85 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
swapAntiDependences(SUnits);
}
+// Create artificial dependencies between the source of COPY/REG_SEQUENCE that
+// is loop-carried to the USE in next iteration. This will help pipeliner avoid
+// additional copies that are needed across iterations. An artificial dependence
+// edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
+
+// PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
+// SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
+// PHI-------True-Dep------> USEOfPhi
+
+// The mutation creates
+// USEOfPHI -------Artificial-Dep---> SRCOfCopy
+
+// This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
+// (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
+// late to avoid additional copies across iterations. The possible scheduling
+// order would be
+// USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
+
+void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
+ for (SUnit &SU : DAG->SUnits) {
+ // Find the COPY/REG_SEQUENCE instruction.
+ if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
+ continue;
+
+ // Record the loop carried PHIs.
+ SmallVector<SUnit *, 4> PHISUs;
+ // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
+ SmallVector<SUnit *, 4> SrcSUs;
+
+ for (auto &Dep : SU.Preds) {
+ SUnit *TmpSU = Dep.getSUnit();
+ MachineInstr *TmpMI = TmpSU->getInstr();
+ SDep::Kind DepKind = Dep.getKind();
+ // Save the loop carried PHI.
+ if (DepKind == SDep::Anti && TmpMI->isPHI())
+ PHISUs.push_back(TmpSU);
+ // Save the source of COPY/REG_SEQUENCE.
+ // If the source has no pre-decessors, we will end up creating cycles.
+ else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
+ SrcSUs.push_back(TmpSU);
+ }
+
+ if (PHISUs.size() == 0 || SrcSUs.size() == 0)
+ continue;
+
+ // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
+ // SUnit to the container.
+ SmallVector<SUnit *, 8> UseSUs;
+ for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
+ for (auto &Dep : (*I)->Succs) {
+ if (Dep.getKind() != SDep::Data)
+ continue;
+
+ SUnit *TmpSU = Dep.getSUnit();
+ MachineInstr *TmpMI = TmpSU->getInstr();
+ if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
+ PHISUs.push_back(TmpSU);
+ continue;
+ }
+ UseSUs.push_back(TmpSU);
+ }
+ }
+
+ if (UseSUs.size() == 0)
+ continue;
+
+ SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
+ // Add the artificial dependencies if it does not form a cycle.
+ for (auto I : UseSUs) {
+ for (auto Src : SrcSUs) {
+ if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
+ Src->addPred(SDep(I, SDep::Artificial));
+ SDAG->Topo.AddPred(Src, I);
+ }
+ }
+ }
+ }
+}
+
/// Return true for DAG nodes that we ignore when computing the cost functions.
/// We ignore the back-edge recurrence in order to avoid unbounded recursion
/// in the calculation of the ASAP, ALAP, etc functions.
@@ -1638,8 +1150,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
E = Topo.end();
I != E; ++I) {
- SUnit *SU = &SUnits[*I];
- SU->dump(this);
+ const SUnit &SU = SUnits[*I];
+ dumpNode(SU);
}
});
@@ -1864,8 +1376,7 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
RecRPTracker.closeBottom();
std::vector<SUnit *> SUnits(NS.begin(), NS.end());
- llvm::sort(SUnits.begin(), SUnits.end(),
- [](const SUnit *A, const SUnit *B) {
+ llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
return A->NodeNum > B->NodeNum;
});
@@ -2672,7 +2183,7 @@ void SwingSchedulerDAG::generateExistingPhis(
else if (PrologStage >= AccessStage + StageDiff + np &&
VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
- // Check if the Phi has already been scheduled, but the loop intruction
+ // Check if the Phi has already been scheduled, but the loop instruction
// is either another Phi, or doesn't occur in the loop.
else if (PrologStage >= AccessStage + StageDiff + np) {
// If the Phi references another Phi, we need to examine the other
@@ -2725,7 +2236,7 @@ void SwingSchedulerDAG::generateExistingPhis(
VRMap[PrevStage - np + 1].count(Def))
PhiOp2 = VRMap[PrevStage - np + 1][Def];
// Use the loop value defined in the kernel.
- else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
+ else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
// Use the value defined by the Phi, unless we're generating the first
@@ -2739,35 +2250,38 @@ void SwingSchedulerDAG::generateExistingPhis(
// references another Phi, and the other Phi is scheduled in an
// earlier stage. We can try to reuse an existing Phi up until the last
// stage of the current Phi.
- if (LoopDefIsPhi && (int)(PrologStage - np) >= StageScheduled) {
- int LVNumStages = Schedule.getStagesForPhi(LoopVal);
- int StageDiff = (StageScheduled - LoopValStage);
- LVNumStages -= StageDiff;
- // Make sure the loop value Phi has been processed already.
- if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
- NewReg = PhiOp2;
- unsigned ReuseStage = CurStageNum;
- if (Schedule.isLoopCarried(this, *PhiInst))
- ReuseStage -= LVNumStages;
- // Check if the Phi to reuse has been generated yet. If not, then
- // there is nothing to reuse.
- if (VRMap[ReuseStage - np].count(LoopVal)) {
- NewReg = VRMap[ReuseStage - np][LoopVal];
-
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, Def, NewReg);
- // Update the map with the new Phi name.
- VRMap[CurStageNum - np][Def] = NewReg;
- PhiOp2 = NewReg;
- if (VRMap[LastStageNum - np - 1].count(LoopVal))
- PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
-
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
- continue;
+ if (LoopDefIsPhi) {
+ if (static_cast<int>(PrologStage - np) >= StageScheduled) {
+ int LVNumStages = Schedule.getStagesForPhi(LoopVal);
+ int StageDiff = (StageScheduled - LoopValStage);
+ LVNumStages -= StageDiff;
+ // Make sure the loop value Phi has been processed already.
+ if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
+ NewReg = PhiOp2;
+ unsigned ReuseStage = CurStageNum;
+ if (Schedule.isLoopCarried(this, *PhiInst))
+ ReuseStage -= LVNumStages;
+ // Check if the Phi to reuse has been generated yet. If not, then
+ // there is nothing to reuse.
+ if (VRMap[ReuseStage - np].count(LoopVal)) {
+ NewReg = VRMap[ReuseStage - np][LoopVal];
+
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+ &*BBI, Def, NewReg);
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ PhiOp2 = NewReg;
+ if (VRMap[LastStageNum - np - 1].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
+
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ continue;
+ }
}
- } else if (InKernel && StageDiff > 0 &&
- VRMap[CurStageNum - StageDiff - np].count(LoopVal))
+ }
+ if (InKernel && StageDiff > 0 &&
+ VRMap[CurStageNum - StageDiff - np].count(LoopVal))
PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
}
@@ -3143,11 +2657,16 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
/// during each iteration. Set Delta to the amount of the change.
bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
return false;
+ if (!BaseOp->isReg())
+ return false;
+
+ unsigned BaseReg = BaseOp->getReg();
+
MachineRegisterInfo &MRI = MF.getRegInfo();
// Check if there is a Phi. If so, get the definition in the loop.
MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
@@ -3175,28 +2694,26 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
return;
// If the instruction has memory operands, then adjust the offset
// when the instruction appears in different stages.
- unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
- if (NumRefs == 0)
+ if (NewMI.memoperands_empty())
return;
- MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
- unsigned Refs = 0;
+ SmallVector<MachineMemOperand *, 2> NewMMOs;
for (MachineMemOperand *MMO : NewMI.memoperands()) {
if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
(!MMO->getValue())) {
- NewMemRefs[Refs++] = MMO;
+ NewMMOs.push_back(MMO);
continue;
}
unsigned Delta;
if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
int64_t AdjOffset = Delta * Num;
- NewMemRefs[Refs++] =
- MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
} else {
- NewMI.dropMemRefs();
- return;
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
}
}
- NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
+ NewMI.setMemRefs(MF, NewMMOs);
}
/// Clone the instruction for the new pipelined loop and update the
@@ -3552,19 +3069,19 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
return true;
- unsigned BaseRegS, BaseRegD;
+ MachineOperand *BaseOpS, *BaseOpD;
int64_t OffsetS, OffsetD;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
- !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
+ if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
+ !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
return true;
- if (BaseRegS != BaseRegD)
+ if (!BaseOpS->isIdenticalTo(*BaseOpD))
return true;
// Check that the base register is incremented by a constant value for each
// iteration.
- MachineInstr *Def = MRI.getVRegDef(BaseRegS);
+ MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
if (!Def || !Def->isPHI())
return true;
unsigned InitVal = 0;
@@ -3983,7 +3500,7 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
};
// sort, so that we can perform a binary search
- llvm::sort(Indices.begin(), Indices.end(), CompareKey);
+ llvm::sort(Indices, CompareKey);
bool Valid = true;
(void)Valid;
@@ -4193,6 +3710,14 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
LLVM_DEBUG(dump(););
}
+void NodeSet::print(raw_ostream &os) const {
+ os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
+ << " depth " << MaxDepth << " col " << Colocate << "\n";
+ for (const auto &I : Nodes)
+ os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
+ os << "\n";
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the schedule information to the given output.
void SMSchedule::print(raw_ostream &os) const {
@@ -4211,4 +3736,9 @@ void SMSchedule::print(raw_ostream &os) const {
/// Utility function used for debugging to print the schedule.
LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
+
#endif
+
+
+
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index f632a9bd457f..6e5ca45d5e5e 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -93,36 +93,29 @@ bool
MachineRegisterInfo::constrainRegAttrs(unsigned Reg,
unsigned ConstrainingReg,
unsigned MinNumRegs) {
- auto const *OldRC = getRegClassOrNull(Reg);
- auto const *RC = getRegClassOrNull(ConstrainingReg);
- // A virtual register at any point must have either a low-level type
- // or a class assigned, but not both. The only exception is the internals of
- // GlobalISel's instruction selection pass, which is allowed to temporarily
- // introduce registers with types and classes both.
- assert((OldRC || getType(Reg).isValid()) && "Reg has neither class nor type");
- assert((!OldRC || !getType(Reg).isValid()) && "Reg has class and type both");
- assert((RC || getType(ConstrainingReg).isValid()) &&
- "ConstrainingReg has neither class nor type");
- assert((!RC || !getType(ConstrainingReg).isValid()) &&
- "ConstrainingReg has class and type both");
- if (OldRC && RC)
- return ::constrainRegClass(*this, Reg, OldRC, RC, MinNumRegs);
- // If one of the virtual registers is generic (used in generic machine
- // instructions, has a low-level type, doesn't have a class), and the other is
- // concrete (used in target specific instructions, doesn't have a low-level
- // type, has a class), we can not unify them.
- if (OldRC || RC)
+ const LLT RegTy = getType(Reg);
+ const LLT ConstrainingRegTy = getType(ConstrainingReg);
+ if (RegTy.isValid() && ConstrainingRegTy.isValid() &&
+ RegTy != ConstrainingRegTy)
return false;
- // At this point, both registers are guaranteed to have a valid low-level
- // type, and they must agree.
- if (getType(Reg) != getType(ConstrainingReg))
- return false;
- auto const *OldRB = getRegBankOrNull(Reg);
- auto const *RB = getRegBankOrNull(ConstrainingReg);
- if (OldRB)
- return !RB || RB == OldRB;
- if (RB)
- setRegBank(Reg, *RB);
+ const auto ConstrainingRegCB = getRegClassOrRegBank(ConstrainingReg);
+ if (!ConstrainingRegCB.isNull()) {
+ const auto RegCB = getRegClassOrRegBank(Reg);
+ if (RegCB.isNull())
+ setRegClassOrRegBank(Reg, ConstrainingRegCB);
+ else if (RegCB.is<const TargetRegisterClass *>() !=
+ ConstrainingRegCB.is<const TargetRegisterClass *>())
+ return false;
+ else if (RegCB.is<const TargetRegisterClass *>()) {
+ if (!::constrainRegClass(
+ *this, Reg, RegCB.get<const TargetRegisterClass *>(),
+ ConstrainingRegCB.get<const TargetRegisterClass *>(), MinNumRegs))
+ return false;
+ } else if (RegCB != ConstrainingRegCB)
+ return false;
+ }
+ if (ConstrainingRegTy.isValid())
+ setType(Reg, ConstrainingRegTy);
return true;
}
@@ -177,11 +170,17 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
return Reg;
}
+unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg,
+ StringRef Name) {
+ unsigned Reg = createIncompleteVirtualRegister(Name);
+ VRegInfo[Reg].first = VRegInfo[VReg].first;
+ setType(Reg, getType(VReg));
+ if (TheDelegate)
+ TheDelegate->MRI_NoteNewVirtualRegister(Reg);
+ return Reg;
+}
+
void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
- // Check that VReg doesn't have a class.
- assert((getRegClassOrRegBank(VReg).isNull() ||
- !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) &&
- "Can't set the size of a non-generic virtual register");
VRegToType.grow(VReg);
VRegToType[VReg] = Ty;
}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 502d18f08f93..90dad9d399fe 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -41,6 +41,7 @@
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -100,8 +101,11 @@ static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
cl::desc("Only schedule this function"));
static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
cl::desc("Only schedule this MBB#"));
+static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
+ cl::desc("Print schedule DAGs"));
#else
-static bool ViewMISchedDAGs = false;
+static const bool ViewMISchedDAGs = false;
+static const bool PrintDAGs = false;
#endif // NDEBUG
/// Avoid quadratic complexity in unusually large basic blocks by limiting the
@@ -237,7 +241,8 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-MachinePassRegistry MachineSchedRegistry::Registry;
+MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
+ MachineSchedRegistry::Registry;
/// A dummy default scheduler factory indicates whether the scheduler
/// is overridden on the command line.
@@ -633,7 +638,7 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
+ dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -670,7 +675,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- PredSU->dump(this);
+ dumpNode(*PredSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -764,10 +769,8 @@ void ScheduleDAGMI::schedule() {
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
- LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
- for (const SUnit &SU
- : SUnits) SU.dumpAll(this);
- if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
+ LLVM_DEBUG(dump());
+ if (PrintDAGs) dump();
if (ViewMISchedDAGs) viewGraph();
// Initialize the strategy before modifying the DAG.
@@ -920,7 +923,7 @@ void ScheduleDAGMI::placeDebugValues() {
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
if (SUnit *SU = getSUnit(&(*MI)))
- SU->dump(this);
+ dumpNode(*SU);
else
dbgs() << "Missing SUnit\n";
}
@@ -1171,6 +1174,29 @@ void ScheduleDAGMILive::updatePressureDiffs(
}
}
+void ScheduleDAGMILive::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (EntrySU.getInstr() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits) {
+ dumpNodeAll(SU);
+ if (ShouldTrackPressure) {
+ dbgs() << " Pressure Diff : ";
+ getPressureDiff(&SU).dump(*TRI);
+ }
+ dbgs() << " Single Issue : ";
+ if (SchedModel.mustBeginGroup(SU.getInstr()) &&
+ SchedModel.mustEndGroup(SU.getInstr()))
+ dbgs() << "true;";
+ else
+ dbgs() << "false;";
+ dbgs() << '\n';
+ }
+ if (ExitSU.getInstr() != nullptr)
+ dumpNodeAll(ExitSU);
+#endif
+}
+
/// schedule - Called back from MachineScheduler::runOnMachineFunction
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
/// only includes instructions that have DAG nodes, not scheduling boundaries.
@@ -1197,22 +1223,8 @@ void ScheduleDAGMILive::schedule() {
// This may initialize a DFSResult to be used for queue priority.
SchedImpl->initialize(this);
- LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
- for (const SUnit &SU
- : SUnits) {
- SU.dumpAll(this);
- if (ShouldTrackPressure) {
- dbgs() << " Pressure Diff : ";
- getPressureDiff(&SU).dump(*TRI);
- }
- dbgs() << " Single Issue : ";
- if (SchedModel.mustBeginGroup(SU.getInstr()) &&
- SchedModel.mustEndGroup(SU.getInstr()))
- dbgs() << "true;";
- else
- dbgs() << "false;";
- dbgs() << '\n';
- } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
+ LLVM_DEBUG(dump());
+ if (PrintDAGs) dump();
if (ViewMISchedDAGs) viewGraph();
// Initialize ready queues now that the DAG and priority data are finalized.
@@ -1472,15 +1484,40 @@ namespace {
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
struct MemOpInfo {
SUnit *SU;
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
- : SU(su), BaseReg(reg), Offset(ofs) {}
+ MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs)
+ : SU(su), BaseOp(Op), Offset(ofs) {}
+
+ bool operator<(const MemOpInfo &RHS) const {
+ if (BaseOp->getType() != RHS.BaseOp->getType())
+ return BaseOp->getType() < RHS.BaseOp->getType();
+
+ if (BaseOp->isReg())
+ return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
+ std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
+ RHS.SU->NodeNum);
+ if (BaseOp->isFI()) {
+ const MachineFunction &MF =
+ *BaseOp->getParent()->getParent()->getParent();
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ bool StackGrowsDown = TFI.getStackGrowthDirection() ==
+ TargetFrameLowering::StackGrowsDown;
+ // Can't use tuple comparison here since we might need to use a
+ // different order when the stack grows down.
+ if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
+ return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
+ : BaseOp->getIndex() < RHS.BaseOp->getIndex();
+
+ if (Offset != RHS.Offset)
+ return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
+
+ return SU->NodeNum < RHS.SU->NodeNum;
+ }
- bool operator<(const MemOpInfo&RHS) const {
- return std::tie(BaseReg, Offset, SU->NodeNum) <
- std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
+ llvm_unreachable("MemOpClusterMutation only supports register or frame "
+ "index bases.");
}
};
@@ -1536,21 +1573,21 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
for (SUnit *SU : MemOps) {
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
- MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
+ if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
+ MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
}
if (MemOpRecords.size() < 2)
return;
- llvm::sort(MemOpRecords.begin(), MemOpRecords.end());
+ llvm::sort(MemOpRecords);
unsigned ClusterLength = 1;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
SUnit *SUa = MemOpRecords[Idx].SU;
SUnit *SUb = MemOpRecords[Idx+1].SU;
- if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
- *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
+ if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
+ *MemOpRecords[Idx + 1].BaseOp,
ClusterLength) &&
DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
@@ -2397,6 +2434,52 @@ initResourceDelta(const ScheduleDAGMI *DAG,
}
}
+/// Compute remaining latency. We need this both to determine whether the
+/// overall schedule has become latency-limited and whether the instructions
+/// outside this zone are resource or latency limited.
+///
+/// The "dependent" latency is updated incrementally during scheduling as the
+/// max height/depth of scheduled nodes minus the cycles since it was
+/// scheduled:
+/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+///
+/// The "independent" latency is the max ready queue depth:
+/// ILat = max N.depth for N in Available|Pending
+///
+/// RemainingLatency is the greater of independent and dependent latency.
+///
+/// These computations are expensive, especially in DAGs with many edges, so
+/// only do them if necessary.
+static unsigned computeRemLatency(SchedBoundary &CurrZone) {
+ unsigned RemLatency = CurrZone.getDependentLatency();
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Available.elements()));
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Pending.elements()));
+ return RemLatency;
+}
+
+/// Returns true if the current cycle plus remaning latency is greater than
+/// the critical path in the scheduling region.
+bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
+ SchedBoundary &CurrZone,
+ bool ComputeRemLatency,
+ unsigned &RemLatency) const {
+ // The current cycle is already greater than the critical path, so we are
+ // already latency limited and don't need to compute the remaining latency.
+ if (CurrZone.getCurrCycle() > Rem.CriticalPath)
+ return true;
+
+ // If we haven't scheduled anything yet, then we aren't latency limited.
+ if (CurrZone.getCurrCycle() == 0)
+ return false;
+
+ if (ComputeRemLatency)
+ RemLatency = computeRemLatency(CurrZone);
+
+ return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
+}
+
/// Set the CandPolicy given a scheduling zone given the current resources and
/// latencies inside and outside the zone.
void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
@@ -2406,46 +2489,32 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
// inside and outside this zone. Potential stalls should be considered before
// following this policy.
- // Compute remaining latency. We need this both to determine whether the
- // overall schedule has become latency-limited and whether the instructions
- // outside this zone are resource or latency limited.
- //
- // The "dependent" latency is updated incrementally during scheduling as the
- // max height/depth of scheduled nodes minus the cycles since it was
- // scheduled:
- // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
- //
- // The "independent" latency is the max ready queue depth:
- // ILat = max N.depth for N in Available|Pending
- //
- // RemainingLatency is the greater of independent and dependent latency.
- unsigned RemLatency = CurrZone.getDependentLatency();
- RemLatency = std::max(RemLatency,
- CurrZone.findMaxLatency(CurrZone.Available.elements()));
- RemLatency = std::max(RemLatency,
- CurrZone.findMaxLatency(CurrZone.Pending.elements()));
-
// Compute the critical resource outside the zone.
unsigned OtherCritIdx = 0;
unsigned OtherCount =
OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
bool OtherResLimited = false;
- if (SchedModel->hasInstrSchedModel())
+ unsigned RemLatency = 0;
+ bool RemLatencyComputed = false;
+ if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
+ RemLatency = computeRemLatency(CurrZone);
+ RemLatencyComputed = true;
OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
OtherCount, RemLatency);
+ }
// Schedule aggressively for latency in PostRA mode. We don't check for
// acyclic latency during PostRA, and highly out-of-order processors will
// skip PostRA scheduling.
- if (!OtherResLimited) {
- if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
- Policy.ReduceLatency |= true;
- LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
- << " RemainingLatency " << RemLatency << " + "
- << CurrZone.getCurrCycle() << "c > CritPath "
- << Rem.CriticalPath << "\n");
- }
+ if (!OtherResLimited &&
+ (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
+ RemLatency))) {
+ Policy.ReduceLatency |= true;
+ LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
+ << " RemainingLatency " << RemLatency << " + "
+ << CurrZone.getCurrCycle() << "c > CritPath "
+ << Rem.CriticalPath << "\n");
}
// If the same resource is limiting inside and outside the zone, do nothing.
if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
@@ -2473,7 +2542,7 @@ const char *GenericSchedulerBase::getReasonStr(
switch (Reason) {
case NoCand: return "NOCAND ";
case Only1: return "ONLY1 ";
- case PhysRegCopy: return "PREG-COPY ";
+ case PhysReg: return "PHYS-REG ";
case RegExcess: return "REG-EXCESS";
case RegCritical: return "REG-CRIT ";
case Stall: return "STALL ";
@@ -2809,24 +2878,41 @@ unsigned getWeakLeft(const SUnit *SU, bool isTop) {
/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
/// with the operation that produces or consumes the physreg. We'll do this when
/// regalloc has support for parallel copies.
-int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+int biasPhysReg(const SUnit *SU, bool isTop) {
const MachineInstr *MI = SU->getInstr();
- if (!MI->isCopy())
- return 0;
- unsigned ScheduledOper = isTop ? 1 : 0;
- unsigned UnscheduledOper = isTop ? 0 : 1;
- // If we have already scheduled the physreg produce/consumer, immediately
- // schedule the copy.
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(ScheduledOper).getReg()))
- return 1;
- // If the physreg is at the boundary, defer it. Otherwise schedule it
- // immediately to free the dependent. We can hoist the copy later.
- bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(UnscheduledOper).getReg()))
- return AtBoundary ? -1 : 1;
+ if (MI->isCopy()) {
+ unsigned ScheduledOper = isTop ? 1 : 0;
+ unsigned UnscheduledOper = isTop ? 0 : 1;
+ // If we have already scheduled the physreg produce/consumer, immediately
+ // schedule the copy.
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(ScheduledOper).getReg()))
+ return 1;
+ // If the physreg is at the boundary, defer it. Otherwise schedule it
+ // immediately to free the dependent. We can hoist the copy later.
+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(UnscheduledOper).getReg()))
+ return AtBoundary ? -1 : 1;
+ }
+
+ if (MI->isMoveImmediate()) {
+ // If we have a move immediate and all successors have been assigned, bias
+ // towards scheduling this later. Make sure all register defs are to
+ // physical registers.
+ bool DoBias = true;
+ for (const MachineOperand &Op : MI->defs()) {
+ if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
+ DoBias = false;
+ break;
+ }
+ }
+
+ if (DoBias)
+ return isTop ? -1 : 1;
+ }
+
return 0;
}
} // end namespace llvm
@@ -2887,9 +2973,9 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
return;
}
- if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
- biasPhysRegCopy(Cand.SU, Cand.AtTop),
- TryCand, Cand, PhysRegCopy))
+ // Bias PhysReg Defs and copies to their uses and defined respectively.
+ if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+ biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
return;
// Avoid exceeding the target's limit.
@@ -3136,7 +3222,7 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
return SU;
}
-void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
MachineBasicBlock::iterator InsertPos = SU->getInstr();
if (!isTop)
++InsertPos;
@@ -3151,10 +3237,10 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
continue;
MachineInstr *Copy = DepSU->getInstr();
- if (!Copy->isCopy())
+ if (!Copy->isCopy() && !Copy->isMoveImmediate())
continue;
LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
- Dep.getSUnit()->dump(DAG));
+ DAG->dumpNode(*Dep.getSUnit()));
DAG->moveInstruction(Copy, InsertPos);
}
}
@@ -3165,18 +3251,18 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
/// does.
///
/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
-/// them here. See comments in biasPhysRegCopy.
+/// them here. See comments in biasPhysReg.
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
- reschedulePhysRegCopies(SU, true);
+ reschedulePhysReg(SU, true);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
- reschedulePhysRegCopies(SU, false);
+ reschedulePhysReg(SU, false);
}
}
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 1fd40f757351..cdc597db6401 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -513,25 +513,6 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
return true;
}
-/// collectDebgValues - Scan instructions following MI and collect any
-/// matching DBG_VALUEs.
-static void collectDebugValues(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DbgValues) {
- DbgValues.clear();
- if (!MI.getOperand(0).isReg())
- return;
-
- MachineBasicBlock::iterator DI = MI; ++DI;
- for (MachineBasicBlock::iterator DE = MI.getParent()->end();
- DI != DE; ++DI) {
- if (!DI->isDebugValue())
- return;
- if (DI->getOperand(0).isReg() &&
- DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
- DbgValues.push_back(&*DI);
- }
-}
-
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
@@ -735,9 +716,12 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
!PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
return false;
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ return false;
+
+ if (!BaseOp->isReg())
return false;
if (!(MI.mayLoad() && !MI.isPredicable()))
@@ -750,15 +734,21 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
(MBP.Predicate == MachineBranchPredicate::PRED_NE ||
MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
- MBP.LHS.getReg() == BaseReg;
+ MBP.LHS.getReg() == BaseOp->getReg();
}
-/// Sink an instruction and its associated debug instructions.
+/// Sink an instruction and its associated debug instructions. If the debug
+/// instructions to be sunk are already known, they can be provided in DbgVals.
static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
- MachineBasicBlock::iterator InsertPos) {
- // Collect matching debug values.
+ MachineBasicBlock::iterator InsertPos,
+ SmallVectorImpl<MachineInstr *> *DbgVals = nullptr) {
+ // If debug values are provided use those, otherwise call collectDebugValues.
SmallVector<MachineInstr *, 2> DbgValuesToSink;
- collectDebugValues(MI, DbgValuesToSink);
+ if (DbgVals)
+ DbgValuesToSink.insert(DbgValuesToSink.begin(),
+ DbgVals->begin(), DbgVals->end());
+ else
+ MI.collectDebugValues(DbgValuesToSink);
// If we cannot find a location to use (merge with), then we erase the debug
// location to prevent debug-info driven tools from potentially reporting
@@ -970,6 +960,9 @@ private:
/// Track which register units have been modified and used.
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+ /// Track DBG_VALUEs of (unmodified) register units.
+ DenseMap<unsigned, TinyPtrVector<MachineInstr*>> SeenDbgInstrs;
+
/// Sink Copy instructions unused in the same block close to their uses in
/// successors.
bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
@@ -1056,8 +1049,11 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
SmallVectorImpl<unsigned> &UsedOpsInCopy,
SmallVectorImpl<unsigned> &DefedRegsInCopy) {
- for (auto DefReg : DefedRegsInCopy)
- SuccBB->removeLiveIn(DefReg);
+ MachineFunction &MF = *SuccBB->getParent();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (unsigned DefReg : DefedRegsInCopy)
+ for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
+ SuccBB->removeLiveIn(*S);
for (auto U : UsedOpsInCopy) {
unsigned Reg = MI->getOperand(U).getReg();
if (!SuccBB->isLiveIn(Reg))
@@ -1121,11 +1117,34 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// block and the current instruction.
ModifiedRegUnits.clear();
UsedRegUnits.clear();
+ SeenDbgInstrs.clear();
for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
MachineInstr *MI = &*I;
++I;
+ // Track the operand index for use in Copy.
+ SmallVector<unsigned, 2> UsedOpsInCopy;
+ // Track the register number defed in Copy.
+ SmallVector<unsigned, 2> DefedRegsInCopy;
+
+ // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
+ // for DBG_VALUEs later, record them when they're encountered.
+ if (MI->isDebugValue()) {
+ auto &MO = MI->getOperand(0);
+ if (MO.isReg() && TRI->isPhysicalRegister(MO.getReg())) {
+ // Bail if we can already tell the sink would be rejected, rather
+ // than needlessly accumulating lots of DBG_VALUEs.
+ if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ ModifiedRegUnits, UsedRegUnits))
+ continue;
+
+ // Record debug use of this register.
+ SeenDbgInstrs[MO.getReg()].push_back(MI);
+ }
+ continue;
+ }
+
if (MI->isDebugInstr())
continue;
@@ -1139,11 +1158,6 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
continue;
}
- // Track the operand index for use in Copy.
- SmallVector<unsigned, 2> UsedOpsInCopy;
- // Track the register number defed in Copy.
- SmallVector<unsigned, 2> DefedRegsInCopy;
-
// Don't sink the COPY if it would violate a register dependency.
if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits)) {
@@ -1165,11 +1179,21 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
"Unexpected predecessor");
+ // Collect DBG_VALUEs that must sink with this copy.
+ SmallVector<MachineInstr *, 4> DbgValsToSink;
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned reg = MO.getReg();
+ for (auto *MI : SeenDbgInstrs.lookup(reg))
+ DbgValsToSink.push_back(MI);
+ }
+
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
- performSink(*MI, *SuccBB, InsertPos);
+ performSink(*MI, *SuccBB, InsertPos, &DbgValsToSink);
updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
Changed = true;
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 79ca6adf95c4..e62ed3094651 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -218,8 +218,7 @@ computeHeightResources(const MachineBasicBlock *MBB) {
// The trace tail is done.
if (!TBI->Succ) {
TBI->Tail = MBB->getNumber();
- std::copy(PRCycles.begin(), PRCycles.end(),
- ProcResourceHeights.begin() + PROffset);
+ llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
return;
}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 318776136e24..534d3699db29 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -23,6 +23,7 @@
// the verifier errors.
//===----------------------------------------------------------------------===//
+#include "LiveRangeCalc.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -108,6 +109,7 @@ namespace {
using RegMap = DenseMap<unsigned, const MachineInstr *>;
using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
+ const MachineInstr *FirstNonPHI;
const MachineInstr *FirstTerminator;
BlockSet FunctionBlocks;
@@ -248,6 +250,7 @@ namespace {
void report_context(const LiveRange::Segment &S) const;
void report_context(const VNInfo &VNI) const;
void report_context(SlotIndex Pos) const;
+ void report_context(MCPhysReg PhysReg) const;
void report_context_liverange(const LiveRange &LR) const;
void report_context_lanemask(LaneBitmask LaneMask) const;
void report_context_vreg(unsigned VReg) const;
@@ -261,6 +264,7 @@ namespace {
LaneBitmask LaneMask = LaneBitmask::getNone());
void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
+ bool SubRangeCheck = false,
LaneBitmask LaneMask = LaneBitmask::getNone());
void markReachable(const MachineBasicBlock *MBB);
@@ -362,6 +366,13 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
const bool isFunctionFailedISel = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel);
+
+ // If we're mid-GlobalISel and we already triggered the fallback path then
+ // it's expected that the MIR is somewhat broken but that's ok since we'll
+ // reset it and clear the FailedISel attribute in ResetMachineFunctions.
+ if (isFunctionFailedISel)
+ return foundErrors;
+
isFunctionRegBankSelected =
!isFunctionFailedISel &&
MF.getProperties().hasProperty(
@@ -530,6 +541,10 @@ void MachineVerifier::report_context_liverange(const LiveRange &LR) const {
errs() << "- liverange: " << LR << '\n';
}
+void MachineVerifier::report_context(MCPhysReg PReg) const {
+ errs() << "- p. register: " << printReg(PReg, TRI) << '\n';
+}
+
void MachineVerifier::report_context_vreg(unsigned VReg) const {
errs() << "- v. register: " << printReg(VReg, TRI) << '\n';
}
@@ -599,6 +614,7 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i,
void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
FirstTerminator = nullptr;
+ FirstNonPHI = nullptr;
if (!MF->getProperties().hasProperty(
MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) {
@@ -608,6 +624,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
MBB->getIterator() != MBB->getParent()->begin()) {
report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB);
+ report_context(LI.PhysReg);
}
}
}
@@ -666,7 +683,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// out the bottom of the function.
} else if (MBB->succ_size() == LandingPadSuccs.size()) {
// It's possible that the block legitimately ends with a noreturn
- // call or an unreachable, in which case it won't actuall fall
+ // call or an unreachable, in which case it won't actually fall
// out of the block.
} else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
report("MBB exits via unconditional fall-through but doesn't have "
@@ -767,7 +784,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
"isn't a terminator instruction!", MBB);
}
if (Cond.empty()) {
- report("MBB exits via conditinal branch/branch but there's no "
+ report("MBB exits via conditional branch/branch but there's no "
"condition!", MBB);
}
} else {
@@ -880,9 +897,15 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
<< MI->getNumOperands() << " given.\n";
}
- if (MI->isPHI() && MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::NoPHIs))
- report("Found PHI instruction with NoPHIs property set", MI);
+ if (MI->isPHI()) {
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs))
+ report("Found PHI instruction with NoPHIs property set", MI);
+
+ if (FirstNonPHI)
+ report("Found PHI instruction after non-PHI", MI);
+ } else if (FirstNonPHI == nullptr)
+ FirstNonPHI = MI;
// Check the tied operands.
if (MI->isInlineAsm())
@@ -1038,6 +1061,89 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_MERGE_VALUES: {
+ // G_MERGE_VALUES should only be used to merge scalars into a larger scalar,
+ // e.g. s2N = MERGE sN, sN
+ // Merging multiple scalars into a vector is not allowed, should use
+ // G_BUILD_VECTOR for that.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (DstTy.isVector() || SrcTy.isVector())
+ report("G_MERGE_VALUES cannot operate on vectors", MI);
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(MI->getNumOperands()-1).getReg());
+ // For now G_UNMERGE can split vectors.
+ for (unsigned i = 0; i < MI->getNumOperands()-1; ++i) {
+ if (MRI->getType(MI->getOperand(i).getReg()) != DstTy)
+ report("G_UNMERGE_VALUES destination types do not match", MI);
+ }
+ if (SrcTy.getSizeInBits() !=
+ (DstTy.getSizeInBits() * (MI->getNumOperands() - 1))) {
+ report("G_UNMERGE_VALUES source operand does not cover dest operands",
+ MI);
+ }
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR: {
+ // Source types must be scalars, dest type a vector. Total size of scalars
+ // must match the dest vector size.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isVector() || SrcEltTy.isVector())
+ report("G_BUILD_VECTOR must produce a vector from scalar operands", MI);
+ for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
+ if (MRI->getType(MI->getOperand(1).getReg()) !=
+ MRI->getType(MI->getOperand(i).getReg()))
+ report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
+ }
+ if (DstTy.getSizeInBits() !=
+ SrcEltTy.getSizeInBits() * (MI->getNumOperands() - 1))
+ report("G_BUILD_VECTOR src operands total size don't match dest "
+ "size.",
+ MI);
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+ // Source types must be scalars, dest type a vector. Scalar types must be
+ // larger than the dest vector elt type, as this is a truncating operation.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isVector() || SrcEltTy.isVector())
+ report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands",
+ MI);
+ for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
+ if (MRI->getType(MI->getOperand(1).getReg()) !=
+ MRI->getType(MI->getOperand(i).getReg()))
+ report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous",
+ MI);
+ }
+ if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits())
+ report("G_BUILD_VECTOR_TRUNC source operand types are not larger than "
+ "dest elt type",
+ MI);
+ break;
+ }
+ case TargetOpcode::G_CONCAT_VECTORS: {
+ // Source types should be vectors, and total size should match the dest
+ // vector size.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isVector() || !SrcTy.isVector())
+ report("G_CONCAT_VECTOR requires vector source and destination operands",
+ MI);
+ for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
+ if (MRI->getType(MI->getOperand(1).getReg()) !=
+ MRI->getType(MI->getOperand(i).getReg()))
+ report("G_CONCAT_VECTOR source operand types are not homogeneous", MI);
+ }
+ if (DstTy.getNumElements() !=
+ SrcTy.getNumElements() * (MI->getNumOperands() - 1))
+ report("G_CONCAT_VECTOR num dest and source elements should match", MI);
+ break;
+ }
case TargetOpcode::COPY: {
if (foundErrors)
break;
@@ -1395,7 +1501,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
- LaneBitmask LaneMask) {
+ bool SubRangeCheck, LaneBitmask LaneMask) {
if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) {
assert(VNI && "NULL valno is not allowed");
if (VNI->def != DefIdx) {
@@ -1419,25 +1525,14 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
if (MO->isDead()) {
LiveQueryResult LRQ = LR.Query(DefIdx);
if (!LRQ.isDeadDef()) {
- // In case of physregs we can have a non-dead definition on another
- // operand.
- bool otherDef = false;
- if (!TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
- const MachineInstr &MI = *MO->getParent();
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() || MO.isDead())
- continue;
- unsigned Reg = MO.getReg();
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- if (*Units == VRegOrUnit) {
- otherDef = true;
- break;
- }
- }
- }
- }
-
- if (!otherDef) {
+ assert(TargetRegisterInfo::isVirtualRegister(VRegOrUnit) &&
+ "Expecting a virtual register.");
+ // A dead subreg def only tells us that the specific subreg is dead. There
+ // could be other non-dead defs of other subregs, or we could have other
+ // parts of the register being live through the instruction. So unless we
+ // are checking liveness for a subrange it is ok for the live range to
+ // continue, given that we have a dead def of a subregister.
+ if (SubRangeCheck || MO->getSubReg() == 0) {
report("Live range continues after dead def flag", MO, MONum);
report_context_liverange(LR);
report_context_vreg_regunit(VRegOrUnit);
@@ -1532,10 +1627,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// get a report for its operand.
if (Bad) {
for (const MachineOperand &MOP : MI->uses()) {
- if (!MOP.isReg())
+ if (!MOP.isReg() || !MOP.isImplicit())
continue;
- if (!MOP.isImplicit())
+
+ if (!TargetRegisterInfo::isPhysicalRegister(MOP.getReg()))
continue;
+
for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
++SubRegs) {
if (*SubRegs == Reg) {
@@ -1593,7 +1690,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
for (const LiveInterval::SubRange &SR : LI.subranges()) {
if ((SR.LaneMask & MOMask).none())
continue;
- checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask);
+ checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
}
}
} else {
@@ -2116,6 +2213,13 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Skip this block.
++MFI;
}
+
+ SmallVector<SlotIndex, 4> Undefs;
+ if (LaneMask.any()) {
+ LiveInterval &OwnerLI = LiveInts->getInterval(Reg);
+ OwnerLI.computeSubRangeUndefs(Undefs, LaneMask, *MRI, *Indexes);
+ }
+
while (true) {
assert(LiveInts->isLiveInToMBB(LR, &*MFI));
// We don't know how to track physregs into a landing pad.
@@ -2141,7 +2245,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// instruction with subregister intervals
// only one of the subregisters (not necessarily the current one) needs to
// be defined.
- if (!PVNI && (LaneMask.none() || !IsPHI) ) {
+ if (!PVNI && (LaneMask.none() || !IsPHI)) {
+ if (LiveRangeCalc::isJointlyDominated(*PI, Undefs, *Indexes))
+ continue;
report("Register not marked live out of predecessor", *PI);
report_context(LR, Reg, LaneMask);
report_context(*VNI);
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 62dadbba0c1a..82b6d642c73b 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -67,8 +67,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
SI.setLatency(0);
LLVM_DEBUG(
- dbgs() << "Macro fuse: "; FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
- SecondSU.print(dbgs(), &DAG); dbgs() << " / ";
+ dbgs() << "Macro fuse: "; DAG.dumpNodeName(FirstSU); dbgs() << " - ";
+ DAG.dumpNodeName(SecondSU); dbgs() << " / ";
dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - "
<< DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';);
@@ -80,8 +80,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
if (SI.isWeak() || isHazard(SI) ||
SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU))
continue;
- LLVM_DEBUG(dbgs() << " Bind "; SecondSU.print(dbgs(), &DAG);
- dbgs() << " - "; SU->print(dbgs(), &DAG); dbgs() << '\n';);
+ LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(SecondSU);
+ dbgs() << " - "; DAG.dumpNodeName(*SU); dbgs() << '\n';);
DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial));
}
@@ -92,8 +92,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
SUnit *SU = SI.getSUnit();
if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU))
continue;
- LLVM_DEBUG(dbgs() << " Bind "; SU->print(dbgs(), &DAG); dbgs() << " - ";
- FirstSU.print(dbgs(), &DAG); dbgs() << '\n';);
+ LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(*SU); dbgs() << " - ";
+ DAG.dumpNodeName(FirstSU); dbgs() << '\n';);
DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial));
}
// ExitSU comes last by design, which acts like an implicit dependency
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index befa8422d399..770f6c5b0403 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -90,10 +90,10 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
}
/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
-/// are copies of SingleValReg, possibly via copies through other PHIs. If
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
/// SingleValReg is zero on entry, it is set to the register with the single
-/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
-/// have been scanned.
+/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned. PHIs may be grouped by cycle, several cycles or chains.
bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
unsigned &SingleValReg,
InstrSet &PHIsInCycle) {
@@ -119,8 +119,10 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
if (SrcMI && SrcMI->isCopy() &&
!SrcMI->getOperand(0).getSubReg() &&
!SrcMI->getOperand(1).getSubReg() &&
- TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
- SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) {
+ SrcReg = SrcMI->getOperand(1).getReg();
+ SrcMI = MRI->getVRegDef(SrcReg);
+ }
if (!SrcMI)
return false;
@@ -129,7 +131,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
return false;
} else {
// Fail if there is more than one non-phi/non-move register.
- if (SingleValReg != 0)
+ if (SingleValReg != 0 && SingleValReg != SrcReg)
return false;
SingleValReg = SrcReg;
}
@@ -180,6 +182,9 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
continue;
+ // for the case SingleValReg taken from copy instr
+ MRI->clearKillFlags(SingleValReg);
+
MRI->replaceRegWith(OldReg, SingleValReg);
MI->eraseFromParent();
++NumPHICycles;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 7a5c20000066..b9801c6fd97b 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -153,8 +153,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
// This pass takes the function out of SSA form.
MRI->leaveSSA();
- // Split critical edges to help the coalescer. This does not yet support
- // updating LiveIntervals, so we disable it.
+ // Split critical edges to help the coalescer.
if (!DisableEdgeSplitting && (LV || LIS)) {
MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
for (auto &MBB : MF)
@@ -197,12 +196,11 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
/// predecessor basic blocks.
bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
- MachineBasicBlock &MBB) {
+ MachineBasicBlock &MBB) {
if (MBB.empty() || !MBB.front().isPHI())
return false; // Quick exit for basic blocks without PHIs.
- // Get an iterator to the first instruction after the last PHI node (this may
- // also be the end of the basic block).
+ // Get an iterator to the last PHI node.
MachineBasicBlock::iterator LastPHIIt =
std::prev(MBB.SkipPHIsAndLabels(MBB.begin()));
@@ -212,26 +210,26 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
return true;
}
-/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs.
+/// Return true if all defs of VirtReg are implicit-defs.
/// This includes registers with no defs.
static bool isImplicitlyDefined(unsigned VirtReg,
- const MachineRegisterInfo *MRI) {
- for (MachineInstr &DI : MRI->def_instructions(VirtReg))
+ const MachineRegisterInfo &MRI) {
+ for (MachineInstr &DI : MRI.def_instructions(VirtReg))
if (!DI.isImplicitDef())
return false;
return true;
}
-/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
-/// are implicit_def's.
-static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
- const MachineRegisterInfo *MRI) {
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
- if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI))
+/// Return true if all sources of the phi node are implicit_def's, or undef's.
+static bool allPhiOperandsUndefined(const MachineInstr &MPhi,
+ const MachineRegisterInfo &MRI) {
+ for (unsigned I = 1, E = MPhi.getNumOperands(); I != E; I += 2) {
+ const MachineOperand &MO = MPhi.getOperand(I);
+ if (!isImplicitlyDefined(MO.getReg(), MRI) && !MO.isUndef())
return false;
+ }
return true;
}
-
/// LowerPHINode - Lower the PHI node at the top of the specified block.
void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
MachineBasicBlock::iterator LastPHIIt) {
@@ -256,8 +254,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// after any remaining phi nodes) which copies the new incoming register
// into the phi node destination.
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- if (isSourceDefinedByImplicitDef(MPhi, MRI))
- // If all sources of a PHI node are implicit_def, just emit an
+ if (allPhiOperandsUndefined(*MPhi, *MRI))
+ // If all sources of a PHI node are implicit_def or undef uses, just emit an
// implicit_def instead of a copy.
BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
@@ -374,7 +372,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
- isImplicitlyDefined(SrcReg, MRI);
+ isImplicitlyDefined(SrcReg, *MRI);
assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
"Machine PHI Operands must all be virtual registers!");
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 215da630caf4..dd0a5fe1b39d 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -256,7 +256,7 @@ void SchedulePostRATDList::exitRegion() {
LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
- SU->dump(this);
+ dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
}
@@ -414,11 +414,7 @@ void SchedulePostRATDList::schedule() {
postprocessDAG();
LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
- LLVM_DEBUG(for (const SUnit &SU
- : SUnits) {
- SU.dumpAll(this);
- dbgs() << '\n';
- });
+ LLVM_DEBUG(dump());
AvailableQueue.initNodes(SUnits);
ListScheduleTopDown();
@@ -465,7 +461,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
+ dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -502,7 +498,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
/// the Available queue.
void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() &&
diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 8f88ef78828a..b0e9ac03612d 100644
--- a/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -7,13 +7,15 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass implements IR lowering for the llvm.load.relative intrinsic.
+// This pass implements IR lowering for the llvm.load.relative and llvm.objc.*
+// intrinsics.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -55,11 +57,129 @@ static bool lowerLoadRelative(Function &F) {
return Changed;
}
+static bool lowerObjCCall(Function &F, const char *NewFn,
+ bool setNonLazyBind = false) {
+ if (F.use_empty())
+ return false;
+
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = F.getParent();
+ Constant* FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
+
+ if (Function* Fn = dyn_cast<Function>(FCache)) {
+ Fn->setLinkage(F.getLinkage());
+ if (setNonLazyBind && !Fn->isWeakForLinker()) {
+ // If we have Native ARC, set nonlazybind attribute for these APIs for
+ // performance.
+ Fn->addFnAttr(Attribute::NonLazyBind);
+ }
+ }
+
+ for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
+ auto *CI = dyn_cast<CallInst>(I->getUser());
+ assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
+ ++I;
+
+ IRBuilder<> Builder(CI->getParent(), CI->getIterator());
+ SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
+ CallInst *NewCI = Builder.CreateCall(FCache, Args);
+ NewCI->setName(CI->getName());
+ NewCI->setTailCallKind(CI->getTailCallKind());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ CI->eraseFromParent();
+ }
+
+ return true;
+}
+
static bool lowerIntrinsics(Module &M) {
bool Changed = false;
for (Function &F : M) {
- if (F.getName().startswith("llvm.load.relative."))
+ if (F.getName().startswith("llvm.load.relative.")) {
Changed |= lowerLoadRelative(F);
+ continue;
+ }
+ switch (F.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::objc_autorelease:
+ Changed |= lowerObjCCall(F, "objc_autorelease");
+ break;
+ case Intrinsic::objc_autoreleasePoolPop:
+ Changed |= lowerObjCCall(F, "objc_autoreleasePoolPop");
+ break;
+ case Intrinsic::objc_autoreleasePoolPush:
+ Changed |= lowerObjCCall(F, "objc_autoreleasePoolPush");
+ break;
+ case Intrinsic::objc_autoreleaseReturnValue:
+ Changed |= lowerObjCCall(F, "objc_autoreleaseReturnValue");
+ break;
+ case Intrinsic::objc_copyWeak:
+ Changed |= lowerObjCCall(F, "objc_copyWeak");
+ break;
+ case Intrinsic::objc_destroyWeak:
+ Changed |= lowerObjCCall(F, "objc_destroyWeak");
+ break;
+ case Intrinsic::objc_initWeak:
+ Changed |= lowerObjCCall(F, "objc_initWeak");
+ break;
+ case Intrinsic::objc_loadWeak:
+ Changed |= lowerObjCCall(F, "objc_loadWeak");
+ break;
+ case Intrinsic::objc_loadWeakRetained:
+ Changed |= lowerObjCCall(F, "objc_loadWeakRetained");
+ break;
+ case Intrinsic::objc_moveWeak:
+ Changed |= lowerObjCCall(F, "objc_moveWeak");
+ break;
+ case Intrinsic::objc_release:
+ Changed |= lowerObjCCall(F, "objc_release", true);
+ break;
+ case Intrinsic::objc_retain:
+ Changed |= lowerObjCCall(F, "objc_retain", true);
+ break;
+ case Intrinsic::objc_retainAutorelease:
+ Changed |= lowerObjCCall(F, "objc_retainAutorelease");
+ break;
+ case Intrinsic::objc_retainAutoreleaseReturnValue:
+ Changed |= lowerObjCCall(F, "objc_retainAutoreleaseReturnValue");
+ break;
+ case Intrinsic::objc_retainAutoreleasedReturnValue:
+ Changed |= lowerObjCCall(F, "objc_retainAutoreleasedReturnValue");
+ break;
+ case Intrinsic::objc_retainBlock:
+ Changed |= lowerObjCCall(F, "objc_retainBlock");
+ break;
+ case Intrinsic::objc_storeStrong:
+ Changed |= lowerObjCCall(F, "objc_storeStrong");
+ break;
+ case Intrinsic::objc_storeWeak:
+ Changed |= lowerObjCCall(F, "objc_storeWeak");
+ break;
+ case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
+ Changed |= lowerObjCCall(F, "objc_unsafeClaimAutoreleasedReturnValue");
+ break;
+ case Intrinsic::objc_retainedObject:
+ Changed |= lowerObjCCall(F, "objc_retainedObject");
+ break;
+ case Intrinsic::objc_unretainedObject:
+ Changed |= lowerObjCCall(F, "objc_unretainedObject");
+ break;
+ case Intrinsic::objc_unretainedPointer:
+ Changed |= lowerObjCCall(F, "objc_unretainedPointer");
+ break;
+ case Intrinsic::objc_retain_autorelease:
+ Changed |= lowerObjCCall(F, "objc_retain_autorelease");
+ break;
+ case Intrinsic::objc_sync_enter:
+ Changed |= lowerObjCCall(F, "objc_sync_enter");
+ break;
+ case Intrinsic::objc_sync_exit:
+ Changed |= lowerObjCCall(F, "objc_sync_exit");
+ break;
+ }
}
return Changed;
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index fc62c8caf59e..23754e487a18 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -75,6 +75,10 @@ using namespace llvm;
using MBBVector = SmallVector<MachineBasicBlock *, 4>;
+STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
+STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
+
+
namespace {
class PEI : public MachineFunctionPass {
@@ -168,6 +172,7 @@ using StackObjSet = SmallSetVector<int, 8>;
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
bool PEI::runOnMachineFunction(MachineFunction &MF) {
+ NumFuncSeen++;
const Function &F = MF.getFunction();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
@@ -357,6 +362,11 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
// Now that we know which registers need to be saved and restored, allocate
// stack slots for them.
for (auto &CS : CSI) {
+ // If the target has spilled this register to another register, we don't
+ // need to allocate a stack slot.
+ if (CS.isSpilledToReg())
+ continue;
+
unsigned Reg = CS.getReg();
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
@@ -454,7 +464,22 @@ static void updateLiveness(MachineFunction &MF) {
if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
}
+ // If callee-saved register is spilled to another register rather than
+ // spilling to stack, the destination register has to be marked as live for
+ // each MBB between the prologue and epilogue so that it is not clobbered
+ // before it is reloaded in the epilogue. The Visited set contains all
+ // blocks outside of the region delimited by prologue/epilogue.
+ if (CSI[i].isSpilledToReg()) {
+ for (MachineBasicBlock &MBB : MF) {
+ if (Visited.count(&MBB))
+ continue;
+ MCPhysReg DstReg = CSI[i].getDstReg();
+ if (!MBB.isLiveIn(DstReg))
+ MBB.addLiveIn(DstReg);
+ }
+ }
}
+
}
/// Insert restore code for the callee-saved registers used in the function.
@@ -530,6 +555,9 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (!CSI.empty()) {
+ if (!MFI.hasCalls())
+ NumLeafFuncWithSpills++;
+
for (MachineBasicBlock *SaveBlock : SaveBlocks) {
insertCSRSaves(*SaveBlock, CSI);
// Update the live-in information of all the blocks up to the save
@@ -1090,7 +1118,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
MachineOperand &Offset = MI.getOperand(i + 1);
int refOffset = TFI->getFrameIndexReferencePreferSP(
MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
- Offset.setImm(Offset.getImm() + refOffset);
+ Offset.setImm(Offset.getImm() + refOffset + SPAdj);
MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
continue;
}
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 86fd87450521..6ca8d86e3f8e 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -25,7 +25,7 @@ static const char *const PSVNames[] = {
"Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
"GlobalValueCallEntry", "ExternalSymbolCallEntry"};
-PseudoSourceValue::PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII)
+PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
: Kind(Kind) {
AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind);
}
@@ -81,7 +81,7 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
}
CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(
- PSVKind Kind, const TargetInstrInfo &TII)
+ unsigned Kind, const TargetInstrInfo &TII)
: PseudoSourceValue(Kind, TII) {}
bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
index 3318e109155b..d8958715c6b4 100644
--- a/lib/CodeGen/README.txt
+++ b/lib/CodeGen/README.txt
@@ -156,8 +156,8 @@ doing the wrong thing.
//===---------------------------------------------------------------------===//
It would be really nice to be able to write patterns in .td files for copies,
-which would eliminate a bunch of explicit predicates on them (e.g. no side
-effects). Once this is in place, it would be even better to have tblgen
+which would eliminate a bunch of explicit predicates on them (e.g. no side
+effects). Once this is in place, it would be even better to have tblgen
synthesize the various copy insertion/inspection methods in TargetInstrInfo.
//===---------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp
index 050fef5d25ed..a9f0a9387297 100644
--- a/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -157,7 +157,7 @@ bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
// Sorting all reaching defs found for a ceartin reg unit in a given BB.
for (MBBDefsInfo &MBBDefs : MBBReachingDefs) {
for (MBBRegUnitDefs &RegUnitDefs : MBBDefs)
- llvm::sort(RegUnitDefs.begin(), RegUnitDefs.end());
+ llvm::sort(RegUnitDefs);
}
return false;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 7b57c6cbcdb8..eb3a4e481f5d 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -54,7 +54,7 @@ using namespace llvm;
STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads , "Number of loads added");
-STATISTIC(NumCopies, "Number of copies coalesced");
+STATISTIC(NumCoalesced, "Number of copies coalesced");
static RegisterRegAlloc
fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
@@ -88,7 +88,7 @@ namespace {
unsigned short LastOpNum = 0; ///< OpNum on LastUse.
bool Dirty = false; ///< Register needs spill.
- explicit LiveReg(unsigned v) : VirtReg(v) {}
+ explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {}
unsigned getSparseSetIndex() const {
return TargetRegisterInfo::virtReg2Index(VirtReg);
@@ -96,14 +96,13 @@ namespace {
};
using LiveRegMap = SparseSet<LiveReg>;
-
/// This map contains entries for each virtual register that is currently
/// available in a physical register.
LiveRegMap LiveVirtRegs;
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>> LiveDbgValueMap;
+ DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
- /// Track the state of a physical register.
+ /// State of a physical register.
enum RegState {
/// A disabled register is not available for allocation, but an alias may
/// be in use. A register can only be moved out of the disabled state if
@@ -123,18 +122,18 @@ namespace {
/// register. In that case, LiveVirtRegs contains the inverse mapping.
};
- /// One of the RegState enums, or a virtreg.
+ /// Maps each physical register to a RegState enum or a virtual register.
std::vector<unsigned> PhysRegState;
SmallVector<unsigned, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
- /// Set of register units.
- using UsedInInstrSet = SparseSet<unsigned>;
-
+ using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
/// Set of register units that are used in the current instruction, and so
/// cannot be allocated.
- UsedInInstrSet UsedInInstr;
+ RegUnitSet UsedInInstr;
+
+ void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
/// Mark a physreg as used in this instruction.
void markRegUsedInInstr(MCPhysReg PhysReg) {
@@ -150,12 +149,8 @@ namespace {
return false;
}
- /// This flag is set when LiveRegMap will be cleared completely after
- /// spilling all live registers. LiveRegMap entries should not be erased.
- bool isBulkSpilling = false;
-
enum : unsigned {
- spillClean = 1,
+ spillClean = 50,
spillDirty = 100,
spillImpossible = ~0u
};
@@ -180,16 +175,18 @@ namespace {
private:
bool runOnMachineFunction(MachineFunction &MF) override;
+
void allocateBasicBlock(MachineBasicBlock &MBB);
+ void allocateInstruction(MachineInstr &MI);
+ void handleDebugValue(MachineInstr &MI);
void handleThroughOperands(MachineInstr &MI,
SmallVectorImpl<unsigned> &VirtDead);
- int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass &RC);
bool isLastUseOfLocalReg(const MachineOperand &MO) const;
void addKillFlag(const LiveReg &LRI);
- void killVirtReg(LiveRegMap::iterator LRI);
+ void killVirtReg(LiveReg &LR);
void killVirtReg(unsigned VirtReg);
- void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+ void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
void usePhysReg(MachineOperand &MO);
@@ -206,15 +203,19 @@ namespace {
return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
}
- LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg);
- LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,
- unsigned Hint);
- LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint);
- LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+ MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
+ unsigned Hint);
+ LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
+ unsigned Hint);
void spillAll(MachineBasicBlock::iterator MI);
- bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg);
+ bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+
+ int getStackSpaceFor(unsigned VirtReg);
+ void spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ MCPhysReg AssignedReg, bool Kill);
+ void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ MCPhysReg PhysReg);
void dumpState();
};
@@ -226,10 +227,13 @@ char RegAllocFast::ID = 0;
INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
false)
+void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
+ PhysRegState[PhysReg] = NewState;
+}
+
/// This allocates space for the specified virtual register to be held on the
/// stack.
-int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
- const TargetRegisterClass &RC) {
+int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
// Find the location Reg would belong...
int SS = StackSlotForVirtReg[VirtReg];
// Already has space allocated?
@@ -237,6 +241,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
return SS;
// Allocate a new stack object for this spill location...
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
unsigned Size = TRI->getSpillSize(RC);
unsigned Align = TRI->getSpillAlignment(RC);
int FrameIdx = MFI->CreateSpillStackObject(Size, Align);
@@ -246,6 +251,46 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
return FrameIdx;
}
+/// Insert spill instruction for \p AssignedReg before \p Before. Update
+/// DBG_VALUEs with \p VirtReg operands with the stack slot.
+void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ MCPhysReg AssignedReg, bool Kill) {
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
+ << " in " << printReg(AssignedReg, TRI));
+ int FI = getStackSpaceFor(VirtReg);
+ LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
+
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI);
+ ++NumStores;
+
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // identify spilled location as the place to find corresponding variable's
+ // value.
+ SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg];
+ for (MachineInstr *DBG : LRIDbgValues) {
+ MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI);
+ assert(NewDV->getParent() == MBB && "dangling parent pointer");
+ (void)NewDV;
+ LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV);
+ }
+ // Now this register is spilled there is should not be any DBG_VALUE
+ // pointing to this register because they are all pointing to spilled value
+ // now.
+ LRIDbgValues.clear();
+}
+
+/// Insert reload instruction for \p PhysReg before \p Before.
+void RegAllocFast::reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ MCPhysReg PhysReg) {
+ LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
+ << printReg(PhysReg, TRI) << '\n');
+ int FI = getStackSpaceFor(VirtReg);
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI);
+ ++NumLoads;
+}
+
/// Return true if MO is the only remaining reference to its virtual register,
/// and it is guaranteed to be a block-local register.
bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const {
@@ -281,14 +326,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) {
}
/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) {
- addKillFlag(*LRI);
- assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+void RegAllocFast::killVirtReg(LiveReg &LR) {
+ addKillFlag(LR);
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
"Broken RegState mapping");
- PhysRegState[LRI->PhysReg] = regFree;
- // Erase from LiveVirtRegs unless we're spilling in bulk.
- if (!isBulkSpilling)
- LiveVirtRegs.erase(LRI);
+ setPhysRegState(LR.PhysReg, regFree);
+ LR.PhysReg = 0;
}
/// Mark virtreg as no longer available.
@@ -296,8 +339,8 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- if (LRI != LiveVirtRegs.end())
- killVirtReg(LRI);
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
+ killVirtReg(*LRI);
}
/// This method spills the value specified by VirtReg into the corresponding
@@ -307,63 +350,41 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
- spillVirtReg(MI, LRI);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Spilling unmapped virtual register");
+ spillVirtReg(MI, *LRI);
}
/// Do the actual work of spilling.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
- LiveRegMap::iterator LRI) {
- LiveReg &LR = *LRI;
- assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
if (LR.Dirty) {
// If this physreg is used by the instruction, we want to kill it on the
// instruction, not on the spill.
bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
LR.Dirty = false;
- LLVM_DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) << " in "
- << printReg(LR.PhysReg, TRI));
- const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg);
- int FI = getStackSpaceFor(LRI->VirtReg, RC);
- LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n");
- TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI);
- ++NumStores; // Update statistics
-
- // If this register is used by DBG_VALUE then insert new DBG_VALUE to
- // identify spilled location as the place to find corresponding variable's
- // value.
- SmallVectorImpl<MachineInstr *> &LRIDbgValues =
- LiveDbgValueMap[LRI->VirtReg];
- for (MachineInstr *DBG : LRIDbgValues) {
- MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI);
- assert(NewDV->getParent() == MBB && "dangling parent pointer");
- (void)NewDV;
- LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:"
- << "\n"
- << *NewDV);
- }
- // Now this register is spilled there is should not be any DBG_VALUE
- // pointing to this register because they are all pointing to spilled value
- // now.
- LRIDbgValues.clear();
+
+ spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
+
if (SpillKill)
LR.LastUse = nullptr; // Don't kill register again
}
- killVirtReg(LRI);
+ killVirtReg(LR);
}
/// Spill all dirty virtregs without killing them.
void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
- if (LiveVirtRegs.empty()) return;
- isBulkSpilling = true;
+ if (LiveVirtRegs.empty())
+ return;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
// of spilling here is deterministic, if arbitrary.
- for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end();
- I != E; ++I)
- spillVirtReg(MI, I);
+ for (LiveReg &LR : LiveVirtRegs) {
+ if (!LR.PhysReg)
+ continue;
+ spillVirtReg(MI, LR);
+ }
LiveVirtRegs.clear();
- isBulkSpilling = false;
}
/// Handle the direct use of a physical register. Check that the register is
@@ -417,12 +438,12 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
case regFree:
if (TRI->isSuperRegister(PhysReg, Alias)) {
// Leave the superregister in the working set.
- PhysRegState[Alias] = regFree;
+ setPhysRegState(Alias, regFree);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
}
// Some other alias was in the working set - clear it.
- PhysRegState[Alias] = regDisabled;
+ setPhysRegState(Alias, regDisabled);
break;
default:
llvm_unreachable("Instruction uses an alias of an allocated register");
@@ -430,7 +451,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
}
// All aliases are disabled, bring register into working set.
- PhysRegState[PhysReg] = regFree;
+ setPhysRegState(PhysReg, regFree);
MO.setIsKill();
}
@@ -448,12 +469,12 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
LLVM_FALLTHROUGH;
case regFree:
case regReserved:
- PhysRegState[PhysReg] = NewState;
+ setPhysRegState(PhysReg, NewState);
return;
}
// This is a disabled register, disable all aliases.
- PhysRegState[PhysReg] = NewState;
+ setPhysRegState(PhysReg, NewState);
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
MCPhysReg Alias = *AI;
switch (unsigned VirtReg = PhysRegState[Alias]) {
@@ -464,7 +485,7 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
LLVM_FALLTHROUGH;
case regFree:
case regReserved:
- PhysRegState[Alias] = regDisabled;
+ setPhysRegState(Alias, regDisabled);
if (TRI->isSuperRegister(PhysReg, Alias))
return;
break;
@@ -472,9 +493,9 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
}
}
-/// Return the cost of spilling clearing out PhysReg and aliases so it is
-/// free for allocation. Returns 0 when PhysReg is free or disabled with all
-/// aliases disabled - it can be allocated directly.
+/// Return the cost of spilling clearing out PhysReg and aliases so it is free
+/// for allocation. Returns 0 when PhysReg is free or disabled with all aliases
+/// disabled - it can be allocated directly.
/// \returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
if (isRegUsedInInstr(PhysReg)) {
@@ -492,9 +513,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
<< printReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default: {
- LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- return I->Dirty ? spillDirty : spillClean;
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ return LRI->Dirty ? spillDirty : spillClean;
}
}
@@ -512,9 +534,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
case regReserved:
return spillImpossible;
default: {
- LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- Cost += I->Dirty ? spillDirty : spillClean;
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ Cost += LRI->Dirty ? spillDirty : spillClean;
break;
}
}
@@ -526,31 +549,27 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
/// proper container for VirtReg now. The physical register must not be used
/// for anything else when this is called.
void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
- LLVM_DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to "
- << printReg(PhysReg, TRI) << "\n");
- PhysRegState[PhysReg] = LR.VirtReg;
- assert(!LR.PhysReg && "Already assigned a physreg");
+ unsigned VirtReg = LR.VirtReg;
+ LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
+ << printReg(PhysReg, TRI) << '\n');
+ assert(LR.PhysReg == 0 && "Already assigned a physreg");
+ assert(PhysReg != 0 && "Trying to assign no register");
LR.PhysReg = PhysReg;
-}
-
-RegAllocFast::LiveRegMap::iterator
-RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) {
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
- assignVirtToPhysReg(*LRI, PhysReg);
- return LRI;
+ setPhysRegState(PhysReg, VirtReg);
}
/// Allocates a physical register for VirtReg.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
- LiveRegMap::iterator LRI, unsigned Hint) {
- const unsigned VirtReg = LRI->VirtReg;
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
+ const unsigned VirtReg = LR.VirtReg;
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
- // Take hint when possible.
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
+ << " in class " << TRI->getRegClassName(&RC) << '\n');
+
+ // Take hint when possible.
if (TargetRegisterInfo::isPhysicalRegister(Hint) &&
MRI->isAllocatable(Hint) && RC.contains(Hint)) {
// Ignore the hint if we would have to spill a dirty register.
@@ -558,67 +577,62 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
if (Cost < spillDirty) {
if (Cost)
definePhysReg(MI, Hint, regFree);
- // definePhysReg may kill virtual registers and modify LiveVirtRegs.
- // That invalidates LRI, so run a new lookup for VirtReg.
- return assignVirtToPhysReg(VirtReg, Hint);
+ assignVirtToPhysReg(LR, Hint);
+ return;
}
}
// First try to find a completely free register.
- ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(&RC);
- for (MCPhysReg PhysReg : AO) {
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ for (MCPhysReg PhysReg : AllocationOrder) {
if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
- assignVirtToPhysReg(*LRI, PhysReg);
- return LRI;
+ assignVirtToPhysReg(LR, PhysReg);
+ return;
}
}
- LLVM_DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from "
- << TRI->getRegClassName(&RC) << "\n");
-
- unsigned BestReg = 0;
+ MCPhysReg BestReg = 0;
unsigned BestCost = spillImpossible;
- for (MCPhysReg PhysReg : AO) {
+ for (MCPhysReg PhysReg : AllocationOrder) {
+ LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
unsigned Cost = calcSpillCost(PhysReg);
- LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n");
- LLVM_DEBUG(dbgs() << "\tCost: " << Cost << "\n");
- LLVM_DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
- // Cost is 0 when all aliases are already disabled.
+ LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n');
+ // Immediate take a register with cost 0.
if (Cost == 0) {
- assignVirtToPhysReg(*LRI, PhysReg);
- return LRI;
+ assignVirtToPhysReg(LR, PhysReg);
+ return;
+ }
+ if (Cost < BestCost) {
+ BestReg = PhysReg;
+ BestCost = Cost;
}
- if (Cost < BestCost)
- BestReg = PhysReg, BestCost = Cost;
}
- if (BestReg) {
- definePhysReg(MI, BestReg, regFree);
- // definePhysReg may kill virtual registers and modify LiveVirtRegs.
- // That invalidates LRI, so run a new lookup for VirtReg.
- return assignVirtToPhysReg(VirtReg, BestReg);
+ if (!BestReg) {
+ // Nothing we can do: Report an error and keep going with an invalid
+ // allocation.
+ if (MI.isInlineAsm())
+ MI.emitError("inline assembly requires more registers than available");
+ else
+ MI.emitError("ran out of registers during register allocation");
+ definePhysReg(MI, *AllocationOrder.begin(), regFree);
+ assignVirtToPhysReg(LR, *AllocationOrder.begin());
+ return;
}
- // Nothing we can do. Report an error and keep going with a bad allocation.
- if (MI.isInlineAsm())
- MI.emitError("inline assembly requires more registers than available");
- else
- MI.emitError("ran out of registers during register allocation");
- definePhysReg(MI, *AO.begin(), regFree);
- return assignVirtToPhysReg(VirtReg, *AO.begin());
+ definePhysReg(MI, BestReg, regFree);
+ assignVirtToPhysReg(LR, BestReg);
}
/// Allocates a register for VirtReg and mark it as dirty.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
- unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- if (New) {
+ if (!LRI->PhysReg) {
// If there is no hint, peek at the only use of this register.
if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
MRI->hasOneNonDBGUse(VirtReg)) {
@@ -627,7 +641,7 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
if (UseMI.isCopyLike())
Hint = UseMI.getOperand(0).getReg();
}
- LRI = allocVirtReg(MI, LRI, Hint);
+ allocVirtReg(MI, *LRI, Hint);
} else if (LRI->LastUse) {
// Redefining a live register - kill at the last use, unless it is this
// instruction defining VirtReg multiple times.
@@ -639,40 +653,35 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
LRI->LastOpNum = OpNum;
LRI->Dirty = true;
markRegUsedInInstr(LRI->PhysReg);
- return LRI;
+ return LRI->PhysReg;
}
/// Make sure VirtReg is available in a physreg and return it.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
- unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ unsigned VirtReg,
+ unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
MachineOperand &MO = MI.getOperand(OpNum);
- if (New) {
- LRI = allocVirtReg(MI, LRI, Hint);
- const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
- int FrameIndex = getStackSpaceFor(VirtReg, RC);
- LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
- << printReg(LRI->PhysReg, TRI) << "\n");
- TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI);
- ++NumLoads;
+ if (!LRI->PhysReg) {
+ allocVirtReg(MI, *LRI, Hint);
+ reload(MI, VirtReg, LRI->PhysReg);
} else if (LRI->Dirty) {
if (isLastUseOfLocalReg(MO)) {
- LLVM_DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n');
if (MO.isUse())
MO.setIsKill();
else
MO.setIsDead();
} else if (MO.isKill()) {
- LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n');
MO.setIsKill(false);
} else if (MO.isDead()) {
- LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n');
MO.setIsDead(false);
}
} else if (MO.isKill()) {
@@ -680,25 +689,24 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
// register would be killed immediately, and there might be a second use:
// %foo = OR killed %x, %x
// This would cause a second reload of %x into a different register.
- LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n');
MO.setIsKill(false);
} else if (MO.isDead()) {
- LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n');
MO.setIsDead(false);
}
assert(LRI->PhysReg && "Register not assigned");
LRI->LastUse = &MI;
LRI->LastOpNum = OpNum;
markRegUsedInInstr(LRI->PhysReg);
- return LRI;
+ return *LRI;
}
/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
/// may invalidate any operand pointers. Return true if the operand kills its
/// register.
-bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum,
+bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
MCPhysReg PhysReg) {
- MachineOperand &MO = MI.getOperand(OpNum);
bool Dead = MO.isDead();
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
@@ -761,7 +769,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
SmallVector<unsigned, 8> PartialDefs;
LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
@@ -770,17 +778,17 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
<< ") is tied to operand " << MI.findTiedOperandIdx(I)
<< ".\n");
- LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
- MCPhysReg PhysReg = LRI->PhysReg;
- setPhysReg(MI, I, PhysReg);
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+ MCPhysReg PhysReg = LR.PhysReg;
+ setPhysReg(MI, MO, PhysReg);
// Note: we don't update the def operand yet. That would cause the normal
// def-scan to attempt spilling.
} else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
- LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n');
// Reload the register, but don't assign to the operand just yet.
// That would confuse the later phys-def processing pass.
- LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
- PartialDefs.push_back(LRI->PhysReg);
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+ PartialDefs.push_back(LR.PhysReg);
}
}
@@ -793,9 +801,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
if (!MO.isEarlyClobber())
continue;
// Note: defineVirtReg may invalidate MO.
- LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0);
- MCPhysReg PhysReg = LRI->PhysReg;
- if (setPhysReg(MI, I, PhysReg))
+ MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0);
+ if (setPhysReg(MI, MI.getOperand(I), PhysReg))
VirtDead.push_back(Reg);
}
@@ -828,11 +835,12 @@ void RegAllocFast::dumpState() {
break;
default: {
dbgs() << '=' << printReg(PhysRegState[Reg]);
- LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- if (I->Dirty)
+ LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ if (LRI->Dirty)
dbgs() << "*";
- assert(I->PhysReg == Reg && "Bad inverse map");
+ assert(LRI->PhysReg == Reg && "Bad inverse map");
break;
}
}
@@ -841,6 +849,8 @@ void RegAllocFast::dumpState() {
// Check that LiveVirtRegs is the inverse.
for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
e = LiveVirtRegs.end(); i != e; ++i) {
+ if (!i->PhysReg)
+ continue;
assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
"Bad map key");
assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
@@ -850,6 +860,199 @@ void RegAllocFast::dumpState() {
}
#endif
+void RegAllocFast::allocateInstruction(MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // If this is a copy, we may be able to coalesce.
+ unsigned CopySrcReg = 0;
+ unsigned CopyDstReg = 0;
+ unsigned CopySrcSub = 0;
+ unsigned CopyDstSub = 0;
+ if (MI.isCopy()) {
+ CopyDstReg = MI.getOperand(0).getReg();
+ CopySrcReg = MI.getOperand(1).getReg();
+ CopyDstSub = MI.getOperand(0).getSubReg();
+ CopySrcSub = MI.getOperand(1).getSubReg();
+ }
+
+ // Track registers used by instruction.
+ UsedInInstr.clear();
+
+ // First scan.
+ // Mark physreg uses and early clobbers as used.
+ // Find the end of the virtreg operands
+ unsigned VirtOpEnd = 0;
+ bool hasTiedOps = false;
+ bool hasEarlyClobbers = false;
+ bool hasPartialRedefs = false;
+ bool hasPhysDefs = false;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask()) {
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+ continue;
+ }
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg) continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ VirtOpEnd = i+1;
+ if (MO.isUse()) {
+ hasTiedOps = hasTiedOps ||
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+ } else {
+ if (MO.isEarlyClobber())
+ hasEarlyClobbers = true;
+ if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
+ hasPartialRedefs = true;
+ }
+ continue;
+ }
+ if (!MRI->isAllocatable(Reg)) continue;
+ if (MO.isUse()) {
+ usePhysReg(MO);
+ } else if (MO.isEarlyClobber()) {
+ definePhysReg(MI, Reg,
+ (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
+ hasEarlyClobbers = true;
+ } else
+ hasPhysDefs = true;
+ }
+
+ // The instruction may have virtual register operands that must be allocated
+ // the same register at use-time and def-time: early clobbers and tied
+ // operands. If there are also physical defs, these registers must avoid
+ // both physical defs and uses, making them more constrained than normal
+ // operands.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
+ // We didn't detect inline asm tied operands above, so just make this extra
+ // pass for all inline asm.
+ if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
+ handleThroughOperands(MI, VirtDead);
+ // Don't attempt coalescing when we have funny stuff going on.
+ CopyDstReg = 0;
+ // Pretend we have early clobbers so the use operands get marked below.
+ // This is not necessary for the common case of a single tied use.
+ hasEarlyClobbers = true;
+ }
+
+ // Second scan.
+ // Allocate virtreg uses.
+ for (unsigned I = 0; I != VirtOpEnd; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
+ MCPhysReg PhysReg = LR.PhysReg;
+ CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
+ if (setPhysReg(MI, MO, PhysReg))
+ killVirtReg(LR);
+ }
+ }
+
+ // Track registers defined by instruction - early clobbers and tied uses at
+ // this point.
+ UsedInInstr.clear();
+ if (hasEarlyClobbers) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ // Look for physreg defs and tied uses.
+ if (!MO.isDef() && !MO.isTied()) continue;
+ markRegUsedInInstr(Reg);
+ }
+ }
+
+ unsigned DefOpEnd = MI.getNumOperands();
+ if (MI.isCall()) {
+ // Spill all virtregs before a call. This serves one purpose: If an
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots.
+ // Note: although this is appealing to just consider all definitions
+ // as call-clobbered, this is not correct because some of those
+ // definitions may be used later on and we do not want to reuse
+ // those for virtual registers in between.
+ LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
+ spillAll(MI);
+ }
+
+ // Third scan.
+ // Allocate defs and collect dead defs.
+ for (unsigned I = 0; I != DefOpEnd; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!MRI->isAllocatable(Reg)) continue;
+ definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+ continue;
+ }
+ MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
+ if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
+ VirtDead.push_back(Reg);
+ CopyDstReg = 0; // cancel coalescing;
+ } else
+ CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
+ }
+
+ // Kill dead defs after the scan to ensure that multiple defs of the same
+ // register are allocated identically. We didn't need to do this for uses
+ // because we are crerating our own kill flags, and they are always at the
+ // last use.
+ for (unsigned VirtReg : VirtDead)
+ killVirtReg(VirtReg);
+ VirtDead.clear();
+
+ LLVM_DEBUG(dbgs() << "<< " << MI);
+ if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
+ LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n");
+ Coalesced.push_back(&MI);
+ }
+}
+
+void RegAllocFast::handleDebugValue(MachineInstr &MI) {
+ MachineOperand &MO = MI.getOperand(0);
+
+ // Ignore DBG_VALUEs that aren't based on virtual registers. These are
+ // mostly constants and frame indices.
+ if (!MO.isReg())
+ return;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return;
+
+ // See if this virtual register has already been allocated to a physical
+ // register or spilled to a stack slot.
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
+ setPhysReg(MI, MO, LRI->PhysReg);
+ } else {
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS != -1) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ updateDbgValueForSpill(MI, SS);
+ LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI);
+ return;
+ }
+
+ // We can't allocate a physreg for a DebugValue, sorry!
+ LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
+
+ // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
+ // that future spills of Reg will have DBG_VALUEs.
+ LiveDbgValueMap[Reg].push_back(&MI);
+}
+
void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
@@ -869,206 +1072,19 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// Otherwise, sequentially allocate each instruction in the MBB.
for (MachineInstr &MI : MBB) {
- const MCInstrDesc &MCID = MI.getDesc();
- LLVM_DEBUG(dbgs() << "\n>> " << MI << "Regs:"; dumpState());
+ LLVM_DEBUG(
+ dbgs() << "\n>> " << MI << "Regs:";
+ dumpState()
+ );
- // Debug values are not allowed to change codegen in any way.
+ // Special handling for debug values. Note that they are not allowed to
+ // affect codegen of the other instructions in any way.
if (MI.isDebugValue()) {
- MachineInstr *DebugMI = &MI;
- MachineOperand &MO = DebugMI->getOperand(0);
-
- // Ignore DBG_VALUEs that aren't based on virtual registers. These are
- // mostly constants and frame indices.
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
-
- // See if this virtual register has already been allocated to a physical
- // register or spilled to a stack slot.
- LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
- if (LRI != LiveVirtRegs.end())
- setPhysReg(*DebugMI, 0, LRI->PhysReg);
- else {
- int SS = StackSlotForVirtReg[Reg];
- if (SS != -1) {
- // Modify DBG_VALUE now that the value is in a spill slot.
- updateDbgValueForSpill(*DebugMI, SS);
- LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:"
- << "\t" << *DebugMI);
- continue;
- }
-
- // We can't allocate a physreg for a DebugValue, sorry!
- LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
- MO.setReg(0);
- }
-
- // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
- // that future spills of Reg will have DBG_VALUEs.
- LiveDbgValueMap[Reg].push_back(DebugMI);
+ handleDebugValue(MI);
continue;
}
- if (MI.isDebugLabel())
- continue;
-
- // If this is a copy, we may be able to coalesce.
- unsigned CopySrcReg = 0;
- unsigned CopyDstReg = 0;
- unsigned CopySrcSub = 0;
- unsigned CopyDstSub = 0;
- if (MI.isCopy()) {
- CopyDstReg = MI.getOperand(0).getReg();
- CopySrcReg = MI.getOperand(1).getReg();
- CopyDstSub = MI.getOperand(0).getSubReg();
- CopySrcSub = MI.getOperand(1).getSubReg();
- }
-
- // Track registers used by instruction.
- UsedInInstr.clear();
-
- // First scan.
- // Mark physreg uses and early clobbers as used.
- // Find the end of the virtreg operands
- unsigned VirtOpEnd = 0;
- bool hasTiedOps = false;
- bool hasEarlyClobbers = false;
- bool hasPartialRedefs = false;
- bool hasPhysDefs = false;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- // Make sure MRI knows about registers clobbered by regmasks.
- if (MO.isRegMask()) {
- MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
- continue;
- }
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg) continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- VirtOpEnd = i+1;
- if (MO.isUse()) {
- hasTiedOps = hasTiedOps ||
- MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
- } else {
- if (MO.isEarlyClobber())
- hasEarlyClobbers = true;
- if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
- hasPartialRedefs = true;
- }
- continue;
- }
- if (!MRI->isAllocatable(Reg)) continue;
- if (MO.isUse()) {
- usePhysReg(MO);
- } else if (MO.isEarlyClobber()) {
- definePhysReg(MI, Reg,
- (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
- hasEarlyClobbers = true;
- } else
- hasPhysDefs = true;
- }
-
- // The instruction may have virtual register operands that must be allocated
- // the same register at use-time and def-time: early clobbers and tied
- // operands. If there are also physical defs, these registers must avoid
- // both physical defs and uses, making them more constrained than normal
- // operands.
- // Similarly, if there are multiple defs and tied operands, we must make
- // sure the same register is allocated to uses and defs.
- // We didn't detect inline asm tied operands above, so just make this extra
- // pass for all inline asm.
- if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
- (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
- handleThroughOperands(MI, VirtDead);
- // Don't attempt coalescing when we have funny stuff going on.
- CopyDstReg = 0;
- // Pretend we have early clobbers so the use operands get marked below.
- // This is not necessary for the common case of a single tied use.
- hasEarlyClobbers = true;
- }
-
- // Second scan.
- // Allocate virtreg uses.
- for (unsigned I = 0; I != VirtOpEnd; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
- if (MO.isUse()) {
- LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg);
- MCPhysReg PhysReg = LRI->PhysReg;
- CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
- if (setPhysReg(MI, I, PhysReg))
- killVirtReg(LRI);
- }
- }
-
- // Track registers defined by instruction - early clobbers and tied uses at
- // this point.
- UsedInInstr.clear();
- if (hasEarlyClobbers) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- // Look for physreg defs and tied uses.
- if (!MO.isDef() && !MO.isTied()) continue;
- markRegUsedInInstr(Reg);
- }
- }
-
- unsigned DefOpEnd = MI.getNumOperands();
- if (MI.isCall()) {
- // Spill all virtregs before a call. This serves one purpose: If an
- // exception is thrown, the landing pad is going to expect to find
- // registers in their spill slots.
- // Note: although this is appealing to just consider all definitions
- // as call-clobbered, this is not correct because some of those
- // definitions may be used later on and we do not want to reuse
- // those for virtual registers in between.
- LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
- spillAll(MI);
- }
-
- // Third scan.
- // Allocate defs and collect dead defs.
- for (unsigned I = 0; I != DefOpEnd; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
- continue;
- unsigned Reg = MO.getReg();
-
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (!MRI->isAllocatable(Reg)) continue;
- definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
- continue;
- }
- LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg);
- MCPhysReg PhysReg = LRI->PhysReg;
- if (setPhysReg(MI, I, PhysReg)) {
- VirtDead.push_back(Reg);
- CopyDstReg = 0; // cancel coalescing;
- } else
- CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
- }
-
- // Kill dead defs after the scan to ensure that multiple defs of the same
- // register are allocated identically. We didn't need to do this for uses
- // because we are crerating our own kill flags, and they are always at the
- // last use.
- for (unsigned VirtReg : VirtDead)
- killVirtReg(VirtReg);
- VirtDead.clear();
-
- if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
- LLVM_DEBUG(dbgs() << "-- coalescing: " << MI);
- Coalesced.push_back(&MI);
- } else {
- LLVM_DEBUG(dbgs() << "<< " << MI);
- }
+ allocateInstruction(MI);
}
// Spill all physical registers holding virtual registers now.
@@ -1079,12 +1095,11 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// LiveVirtRegs might refer to the instrs.
for (MachineInstr *MI : Coalesced)
MBB.erase(MI);
- NumCopies += Coalesced.size();
+ NumCoalesced += Coalesced.size();
LLVM_DEBUG(MBB.dump());
}
-/// Allocates registers for a function.
bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
<< "********** Function: " << MF.getName() << '\n');
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 3333e1f2fb8b..81b21b442437 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -318,7 +318,7 @@ class RAGreedy : public MachineFunctionPass,
/// Track new eviction.
/// The Evictor vreg has evicted the Evictee vreg from Physreg.
- /// \param PhysReg The phisical register Evictee was evicted from.
+ /// \param PhysReg The physical register Evictee was evicted from.
/// \param Evictor The evictor Vreg that evicted Evictee.
/// \param Evictee The evictee Vreg.
void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {
@@ -449,8 +449,8 @@ private:
BlockFrequency calcSpillCost();
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
- void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
- void growRegion(GlobalSplitCandidate &Cand);
+ bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ bool growRegion(GlobalSplitCandidate &Cand);
bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order);
@@ -1183,7 +1183,10 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
BC.Number = BI.MBB->getNumber();
Intf.moveToBlock(BC.Number);
BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
- BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = (BI.LiveOut &&
+ !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
+ ? SpillPlacement::PrefReg
+ : SpillPlacement::DontCare;
BC.ChangesValue = BI.FirstDef.isValid();
if (!Intf.hasInterference())
@@ -1203,6 +1206,13 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
} else if (Intf.first() < BI.LastInstr) {
++Ins;
}
+
+ // Abort if the spill cannot be inserted at the MBB' start
+ if (((BC.Entry == SpillPlacement::MustSpill) ||
+ (BC.Entry == SpillPlacement::PrefSpill)) &&
+ SlotIndex::isEarlierInstr(BI.FirstInstr,
+ SA->getFirstSplitPoint(BC.Number)))
+ return false;
}
// Interference for the live-out value.
@@ -1232,7 +1242,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
/// addThroughConstraints - Add constraints and links to SpillPlacer from the
/// live-through blocks in Blocks.
-void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
ArrayRef<unsigned> Blocks) {
const unsigned GroupSize = 8;
SpillPlacement::BlockConstraint BCS[GroupSize];
@@ -1256,6 +1266,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
assert(B < GroupSize && "Array overflow");
BCS[B].Number = Number;
+ // Abort if the spill cannot be inserted at the MBB' start
+ MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
+ if (!MBB->empty() &&
+ SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()),
+ SA->getFirstSplitPoint(Number)))
+ return false;
// Interference for the live-in value.
if (Intf.first() <= Indexes->getMBBStartIdx(Number))
BCS[B].Entry = SpillPlacement::MustSpill;
@@ -1276,9 +1292,10 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
SpillPlacer->addConstraints(makeArrayRef(BCS, B));
SpillPlacer->addLinks(makeArrayRef(TBS, T));
+ return true;
}
-void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Keep track of through blocks that have not been added to SpillPlacer.
BitVector Todo = SA->getThroughBlocks();
SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
@@ -1314,9 +1331,10 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Compute through constraints from the interference, or assume that all
// through blocks prefer spilling when forming compact regions.
auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
- if (Cand.PhysReg)
- addThroughConstraints(Cand.Intf, NewBlocks);
- else
+ if (Cand.PhysReg) {
+ if (!addThroughConstraints(Cand.Intf, NewBlocks))
+ return false;
+ } else
// Provide a strong negative bias on through blocks to prevent unwanted
// liveness on loop backedges.
SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
@@ -1326,6 +1344,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
SpillPlacer->iterate();
}
LLVM_DEBUG(dbgs() << ", v=" << Visited);
+ return true;
}
/// calcCompactRegion - Compute the set of edge bundles that should be live
@@ -1356,7 +1375,11 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
return false;
}
- growRegion(Cand);
+ if (!growRegion(Cand)) {
+ LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+ return false;
+ }
+
SpillPlacer->finish();
if (!Cand.LiveBundles.any()) {
@@ -1886,7 +1909,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
});
continue;
}
- growRegion(Cand);
+ if (!growRegion(Cand)) {
+ LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+ continue;
+ }
SpillPlacer->finish();
@@ -2188,7 +2214,11 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
///
unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs) {
- assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ // TODO: the function currently only handles a single UseBlock; it should be
+ // possible to generalize.
+ if (SA->getUseBlocks().size() != 1)
+ return 0;
+
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
// Note that it is possible to have an interval that is live-in or live-out
@@ -3120,18 +3150,23 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
// Handle blocks that were not included in subloops.
if (Loops->getLoopFor(MBB) == L)
for (MachineInstr &MI : *MBB) {
- const MachineMemOperand *MMO;
+ SmallVector<const MachineMemOperand *, 2> Accesses;
+ auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
+ return MFI.isSpillSlotObjectIndex(
+ cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+ ->getFrameIndex());
+ };
if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI))
++Reloads;
- else if (TII->hasLoadFromStackSlot(MI, MMO, FI) &&
- MFI.isSpillSlotObjectIndex(FI))
+ else if (TII->hasLoadFromStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, isSpillSlotAccess))
++FoldedReloads;
else if (TII->isStoreToStackSlot(MI, FI) &&
MFI.isSpillSlotObjectIndex(FI))
++Spills;
- else if (TII->hasStoreToStackSlot(MI, MMO, FI) &&
- MFI.isSpillSlotObjectIndex(FI))
+ else if (TII->hasStoreToStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, isSpillSlotAccess))
++FoldedSpills;
}
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index f1c442ac38ae..66c7c5cd7dbf 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -81,7 +81,7 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- const TargetMachine &TM = MF.getTarget();
+ const LLVMTargetMachine &TM = MF.getTarget();
LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
<< " -------------------- \n");
@@ -166,28 +166,27 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
}
// Insert any register fully saved via subregisters.
- for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
- if (SavedRegs.test(PReg))
- continue;
-
- // Check if PReg is fully covered by its subregs.
- bool CoveredBySubRegs = false;
- for (const TargetRegisterClass *RC : TRI.regclasses())
- if (RC->CoveredBySubRegs && RC->contains(PReg)) {
- CoveredBySubRegs = true;
- break;
- }
- if (!CoveredBySubRegs)
- continue;
-
- // Add PReg to SavedRegs if all subregs are saved.
- bool AllSubRegsSaved = true;
- for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
- if (!SavedRegs.test(*SR)) {
- AllSubRegsSaved = false;
- break;
- }
- if (AllSubRegsSaved)
- SavedRegs.set(PReg);
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
+ if (!RC->CoveredBySubRegs)
+ continue;
+
+ for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
+ if (SavedRegs.test(PReg))
+ continue;
+
+ // Check if PReg is fully covered by its subregs.
+ if (!RC->contains(PReg))
+ continue;
+
+ // Add PReg to SavedRegs if all subregs are saved.
+ bool AllSubRegsSaved = true;
+ for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
+ if (!SavedRegs.test(*SR)) {
+ AllSubRegsSaved = false;
+ break;
+ }
+ if (AllSubRegsSaved)
+ SavedRegs.set(PReg);
+ }
}
}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index cad13a60efd2..2a06d5e95fbb 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -16,6 +16,7 @@
#include "RegisterCoalescer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -69,6 +70,7 @@ STATISTIC(NumReMats , "Number of instructions re-materialized");
STATISTIC(NumInflated , "Number of register classes inflated");
STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
+STATISTIC(NumShrinkToUses, "Number of shrinkToUses called");
static cl::opt<bool> EnableJoining("join-liveintervals",
cl::desc("Coalesce copies (default=true)"),
@@ -94,6 +96,15 @@ VerifyCoalescing("verify-coalescing",
cl::desc("Verify machine instrs before and after register coalescing"),
cl::Hidden);
+static cl::opt<unsigned> LateRematUpdateThreshold(
+ "late-remat-update-threshold", cl::Hidden,
+ cl::desc("During rematerialization for a copy, if the def instruction has "
+ "many other copy uses to be rematerialized, delay the multiple "
+ "separate live interval update work and do them all at once after "
+ "all those rematerialization are done. It will save a lot of "
+ "repeated work. "),
+ cl::init(100));
+
namespace {
class RegisterCoalescer : public MachineFunctionPass,
@@ -137,6 +148,11 @@ namespace {
/// Virtual registers to be considered for register class inflation.
SmallVector<unsigned, 8> InflateRegs;
+ /// The collection of live intervals which should have been updated
+ /// immediately after rematerialiation but delayed until
+ /// lateLiveIntervalUpdate is called.
+ DenseSet<unsigned> ToBeUpdated;
+
/// Recursively eliminate dead defs in DeadDefs.
void eliminateDeadDefs();
@@ -157,6 +173,13 @@ namespace {
/// was made.
bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
+ /// If one def has many copy like uses, and those copy uses are all
+ /// rematerialized, the live interval update needed for those
+ /// rematerializations will be delayed and done all at once instead
+ /// of being done multiple times. This is to save compile cost because
+ /// live interval update is costly.
+ void lateLiveIntervalUpdate();
+
/// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
/// src/dst of the copy instruction CopyMI. This returns true if the copy
/// was successfully coalesced away. If it is not currently possible to
@@ -203,8 +226,12 @@ namespace {
/// If the source value number is defined by a commutable instruction and
/// its other operand is coalesced to the copy dest register, see if we
/// can transform the copy into a noop by commuting the definition.
- /// This returns true if an interval was modified.
- bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+ /// This returns a pair of two flags:
+ /// - the first element is true if an interval was modified,
+ /// - the second element is true if the destination interval needs
+ /// to be shrunk after deleting the copy.
+ std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI);
/// We found a copy which can be moved to its less frequent predecessor.
bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
@@ -258,6 +285,7 @@ namespace {
/// mentioned method returns true.
void shrinkToUses(LiveInterval *LI,
SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
+ NumShrinkToUses++;
if (LIS->shrinkToUses(LI, Dead)) {
/// Check whether or not \p LI is composed by multiple connected
/// components and if that is the case, fix that.
@@ -662,17 +690,32 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
/// Copy segments with value number @p SrcValNo from liverange @p Src to live
/// range @Dst and use value number @p DstValNo there.
-static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
- const LiveRange &Src, const VNInfo *SrcValNo) {
+static std::pair<bool,bool>
+addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
+ const VNInfo *SrcValNo) {
+ bool Changed = false;
+ bool MergedWithDead = false;
for (const LiveRange::Segment &S : Src.segments) {
if (S.valno != SrcValNo)
continue;
- Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
- }
+ // This is adding a segment from Src that ends in a copy that is about
+ // to be removed. This segment is going to be merged with a pre-existing
+ // segment in Dst. This works, except in cases when the corresponding
+ // segment in Dst is dead. For example: adding [192r,208r:1) from Src
+ // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
+ // Recognized such cases, so that the segments can be shrunk.
+ LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
+ LiveRange::Segment &Merged = *Dst.addSegment(Added);
+ if (Merged.end.isDead())
+ MergedWithDead = true;
+ Changed = true;
+ }
+ return std::make_pair(Changed, MergedWithDead);
}
-bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
- MachineInstr *CopyMI) {
+std::pair<bool,bool>
+RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
assert(!CP.isPhys());
LiveInterval &IntA =
@@ -710,19 +753,19 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
assert(AValNo && !AValNo->isUnused() && "COPY source not live");
if (AValNo->isPHIDef())
- return false;
+ return { false, false };
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
- return false;
+ return { false, false };
if (!DefMI->isCommutable())
- return false;
+ return { false, false };
// If DefMI is a two-address instruction then commuting it will change the
// destination register.
int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
assert(DefIdx != -1);
unsigned UseOpIdx;
if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
- return false;
+ return { false, false };
// FIXME: The code below tries to commute 'UseOpIdx' operand with some other
// commutable operand which is expressed by 'CommuteAnyOperandIndex'value
@@ -735,17 +778,17 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// op#2<->op#3) of commute transformation should be considered/tried here.
unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
- return false;
+ return { false, false };
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
unsigned NewReg = NewDstMO.getReg();
if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
- return false;
+ return { false, false };
// Make sure there are no other definitions of IntB that would reach the
// uses which the new definition can reach.
if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
- return false;
+ return { false, false };
// If some of the uses of IntA.reg is already coalesced away, return false.
// It's not possible to determine whether it's safe to perform the coalescing.
@@ -758,7 +801,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
continue;
// If this use is tied to a def, we can't rewrite the register.
if (UseMI->isRegTiedToDefOperand(OpNo))
- return false;
+ return { false, false };
}
LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
@@ -770,11 +813,11 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *NewMI =
TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
- return false;
+ return { false, false };
if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
!MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
- return false;
+ return { false, false };
if (NewMI != DefMI) {
LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
MachineBasicBlock::iterator Pos = DefMI;
@@ -848,37 +891,58 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// Extend BValNo by merging in IntA live segments of AValNo. Val# definition
// is updated.
+ bool ShrinkB = false;
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- if (IntB.hasSubRanges()) {
+ if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
if (!IntA.hasSubRanges()) {
LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
IntA.createSubRangeFrom(Allocator, Mask, IntA);
+ } else if (!IntB.hasSubRanges()) {
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg);
+ IntB.createSubRangeFrom(Allocator, Mask, IntB);
}
SlotIndex AIdx = CopyIdx.getRegSlot(true);
+ LaneBitmask MaskA;
for (LiveInterval::SubRange &SA : IntA.subranges()) {
VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
assert(ASubValNo != nullptr);
+ MaskA |= SA.LaneMask;
IntB.refineSubRanges(Allocator, SA.LaneMask,
- [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
+ [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB]
+ (LiveInterval::SubRange &SR) {
VNInfo *BSubValNo = SR.empty()
? SR.getNextValue(CopyIdx, Allocator)
: SR.getVNInfoAt(CopyIdx);
assert(BSubValNo != nullptr);
- addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ ShrinkB |= P.second;
+ if (P.first)
+ BSubValNo->def = ASubValNo->def;
});
}
+ // Go over all subranges of IntB that have not been covered by IntA,
+ // and delete the segments starting at CopyIdx. This can happen if
+ // IntA has undef lanes that are defined in IntB.
+ for (LiveInterval::SubRange &SB : IntB.subranges()) {
+ if ((SB.LaneMask & MaskA).any())
+ continue;
+ if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
+ if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
+ SB.removeSegment(*S, true);
+ }
}
BValNo->def = AValNo->def;
- addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
+ auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
+ ShrinkB |= P.second;
LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
LIS->removeVRegDefAt(IntA, AValNo->def);
LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
++numCommutes;
- return true;
+ return { true, ShrinkB };
}
/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
@@ -1067,6 +1131,20 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
assert(BValNo && "All sublanes should be live");
LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
BValNo->markUnused();
+ // We can have a situation where the result of the original copy is live,
+ // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
+ // the copy appear as an endpoint from pruneValue(), but we don't want it
+ // to because the copy has been removed. We can go ahead and remove that
+ // endpoint; there is no other situation here that there could be a use at
+ // the same place as we know that the copy is a full copy.
+ for (unsigned I = 0; I != EndPoints.size(); ) {
+ if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
+ EndPoints[I] = EndPoints.back();
+ EndPoints.pop_back();
+ continue;
+ }
+ ++I;
+ }
LIS->extendToIndices(SR, EndPoints);
}
// If any dead defs were extended, truncate them.
@@ -1107,7 +1185,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
- assert(ValNo && "CopyMI input register not live");
+ if (!ValNo)
+ return false;
if (ValNo->isPHIDef() || ValNo->isUnused())
return false;
MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
@@ -1365,24 +1444,40 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
++NumReMats;
- // The source interval can become smaller because we removed a use.
- shrinkToUses(&SrcInt, &DeadDefs);
- if (!DeadDefs.empty()) {
- // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
- // to describe DstReg instead.
+ // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
+ // to describe DstReg instead.
+ if (MRI->use_nodbg_empty(SrcReg)) {
for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugValue()) {
- UseMO.setReg(DstReg);
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ UseMO.substPhysReg(DstReg, *TRI);
+ else
+ UseMO.setReg(DstReg);
// Move the debug value directly after the def of the rematerialized
// value in DstReg.
MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
}
}
- eliminateDeadDefs();
}
+ if (ToBeUpdated.count(SrcReg))
+ return true;
+
+ unsigned NumCopyUses = 0;
+ for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
+ if (UseMO.getParent()->isCopyLike())
+ NumCopyUses++;
+ }
+ if (NumCopyUses < LateRematUpdateThreshold) {
+ // The source interval can become smaller because we removed a use.
+ shrinkToUses(&SrcInt, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs();
+ } else {
+ ToBeUpdated.insert(SrcReg);
+ }
return true;
}
@@ -1751,9 +1846,18 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// If we can eliminate the copy without merging the live segments, do so
// now.
if (!CP.isPartial() && !CP.isPhys()) {
- if (adjustCopiesBackFrom(CP, CopyMI) ||
- removeCopyByCommutingDef(CP, CopyMI)) {
+ bool Changed = adjustCopiesBackFrom(CP, CopyMI);
+ bool Shrink = false;
+ if (!Changed)
+ std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
+ if (Changed) {
deleteInstr(CopyMI);
+ if (Shrink) {
+ unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ LiveInterval &DstLI = LIS->getInterval(DstReg);
+ shrinkToUses(&DstLI);
+ LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n');
+ }
LLVM_DEBUG(dbgs() << "\tTrivial!\n");
return true;
}
@@ -1806,6 +1910,13 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
LI.removeEmptySubRanges();
}
+
+ // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
+ // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
+ // is not up-to-date, need to update the merged live interval here.
+ if (ToBeUpdated.count(CP.getSrcReg()))
+ ShrinkMainRange = true;
+
if (ShrinkMainRange) {
LiveInterval &LI = LIS->getInterval(CP.getDstReg());
shrinkToUses(&LI);
@@ -2397,8 +2508,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// We normally expect IMPLICIT_DEF values to be live only until the end
// of their block. If the value is really live longer and gets pruned in
// another block, this flag is cleared again.
+ //
+ // Clearing the valid lanes is deferred until it is sure this can be
+ // erased.
V.ErasableImplicitDef = true;
- V.ValidLanes &= ~V.WriteLanes;
}
}
}
@@ -2453,20 +2566,25 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
Other.computeAssignment(V.OtherVNI->id, *this);
Val &OtherV = Other.Vals[V.OtherVNI->id];
- // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
- // This shouldn't normally happen, but ProcessImplicitDefs can leave such
- // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
- // technically.
- //
- // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
- // to erase the IMPLICIT_DEF instruction.
- if (OtherV.ErasableImplicitDef && DefMI &&
- DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
- LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
- << " extends into "
- << printMBBReference(*DefMI->getParent())
- << ", keeping it.\n");
- OtherV.ErasableImplicitDef = false;
+ if (OtherV.ErasableImplicitDef) {
+ // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+ // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+ // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+ // technically.
+ //
+ // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+ // to erase the IMPLICIT_DEF instruction.
+ if (DefMI &&
+ DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+ LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " extends into "
+ << printMBBReference(*DefMI->getParent())
+ << ", keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
+ } else {
+ // We deferred clearing these lanes in case we needed to save them
+ OtherV.ValidLanes &= ~OtherV.WriteLanes;
+ }
}
// Allow overlapping PHI values. Any real interference would show up in a
@@ -2509,6 +2627,12 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
return CR_Erase;
}
+ // The remaining checks apply to the lanes, which aren't tracked here. This
+ // was already decided to be OK via the following CR_Replace condition.
+ // CR_Replace.
+ if (SubRangeJoin)
+ return CR_Replace;
+
// If the lanes written by this instruction were all undef in OtherVNI, it is
// still safe to join the live ranges. This can't be done with a simple value
// mapping, though - OtherVNI will map to multiple values:
@@ -2590,8 +2714,18 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
Val &OtherV = Other.Vals[V.OtherVNI->id];
// We cannot erase an IMPLICIT_DEF if we don't have valid values for all
// its lanes.
- if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
+ if (OtherV.ErasableImplicitDef &&
+ TrackSubRegLiveness &&
+ (OtherV.WriteLanes & ~V.ValidLanes).any()) {
+ LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
+
OtherV.ErasableImplicitDef = false;
+ // The valid lanes written by the implicit_def were speculatively cleared
+ // before, so make this more conservative. It may be better to track this,
+ // I haven't found a testcase where it matters.
+ OtherV.ValidLanes = LaneBitmask::getAll();
+ }
+
OtherV.Pruned = true;
LLVM_FALLTHROUGH;
}
@@ -3290,6 +3424,18 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
|| LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
}
+void RegisterCoalescer::lateLiveIntervalUpdate() {
+ for (unsigned reg : ToBeUpdated) {
+ if (!LIS->hasInterval(reg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(reg);
+ shrinkToUses(&LI, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs();
+ }
+ ToBeUpdated.clear();
+}
+
bool RegisterCoalescer::
copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
bool Progress = false;
@@ -3459,12 +3605,14 @@ void RegisterCoalescer::joinAllIntervals() {
}
copyCoalesceInMBB(MBBs[i].MBB);
}
+ lateLiveIntervalUpdate();
coalesceLocals();
// Joining intervals can allow other intervals to be joined. Iteratively join
// until we make no progress.
while (copyCoalesceWorkList(WorkList))
/* empty */ ;
+ lateLiveIntervalUpdate();
}
void RegisterCoalescer::releaseMemory() {
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 51414de518fd..1099e468e885 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -681,8 +681,7 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
PressureDiff::iterator J;
for (J = std::next(I); J != E && J->isValid(); ++J, ++I)
*I = *J;
- if (J != E)
- *I = *J;
+ *I = PressureChange();
}
}
}
diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp
index 6a31118cc562..6b9880a8913f 100644
--- a/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/lib/CodeGen/RegisterUsageInfo.cpp
@@ -40,7 +40,7 @@ INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
char PhysicalRegisterUsageInfo::ID = 0;
-void PhysicalRegisterUsageInfo::setTargetMachine(const TargetMachine &TM) {
+void PhysicalRegisterUsageInfo::setTargetMachine(const LLVMTargetMachine &TM) {
this->TM = &TM;
}
@@ -81,7 +81,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
// sort the vector to print analysis in alphabatic order of function name.
llvm::sort(
- FPRMPairVector.begin(), FPRMPairVector.end(),
+ FPRMPairVector,
[](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool {
return A->first->getName() < B->first->getName();
});
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index cbbbf7c385aa..c356fb57ac6d 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -260,8 +260,14 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
const Value *AllocaPtr,
uint64_t AllocaSize) {
- // All MemIntrinsics have destination address in Arg0 and size in Arg2.
- if (MI->getRawDest() != U) return true;
+ if (auto MTI = dyn_cast<MemTransferInst>(MI)) {
+ if (MTI->getRawSource() != U && MTI->getRawDest() != U)
+ return true;
+ } else {
+ if (MI->getRawDest() != U)
+ return true;
+ }
+
const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
// Non-constant size => unsafe. FIXME: try SCEV getRange.
if (!Len) return false;
@@ -318,11 +324,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
case Instruction::Invoke: {
ImmutableCallSite CS(I);
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end)
- continue;
- }
+ if (I->isLifetimeStartOrEnd())
+ continue;
if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
@@ -775,6 +778,10 @@ bool SafeStack::run() {
++NumUnsafeStackRestorePointsFunctions;
IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
+ // Calls must always have a debug location, or else inlining breaks. So
+ // we explicitly set a artificial debug location here.
+ if (DISubprogram *SP = F.getSubprogram())
+ IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP));
if (SafeStackUsePointerAddress) {
Value *Fn = F.getParent()->getOrInsertFunction(
"__safestack_pointer_address", StackPtrTy->getPointerTo(0));
diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp
index 329458778a98..726c38002817 100644
--- a/lib/CodeGen/SafeStackColoring.cpp
+++ b/lib/CodeGen/SafeStackColoring.cpp
@@ -46,11 +46,10 @@ const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
}
bool StackColoring::readMarker(Instruction *I, bool *IsStart) {
- auto *II = dyn_cast<IntrinsicInst>(I);
- if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end))
+ if (!I->isLifetimeStartOrEnd())
return false;
+ auto *II = cast<IntrinsicInst>(I);
*IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start;
return true;
}
@@ -172,7 +171,9 @@ void StackColoring::calculateLocalLiveness() {
BitVector LocalLiveIn;
for (auto *PredBB : predecessors(BB)) {
LivenessMap::const_iterator I = BlockLiveness.find(PredBB);
- assert(I != BlockLiveness.end() && "Predecessor not found");
+ // If a predecessor is unreachable, ignore it.
+ if (I == BlockLiveness.end())
+ continue;
LocalLiveIn |= I->second.LiveOut;
}
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 9387722bfebd..2684f92b3a93 100644
--- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -77,6 +77,21 @@ FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
return new ScalarizeMaskedMemIntrin();
}
+static bool isConstantIntVector(Value *Mask) {
+ Constant *C = dyn_cast<Constant>(Mask);
+ if (!C)
+ return false;
+
+ unsigned NumElts = Mask->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *CElt = C->getAggregateElement(i);
+ if (!CElt || !isa<ConstantInt>(CElt))
+ return false;
+ }
+
+ return true;
+}
+
// Translate a masked load intrinsic like
// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
// <16 x i1> %mask, <16 x i32> %passthru)
@@ -85,32 +100,29 @@ FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
//
// %1 = bitcast i8* %addr to i32*
// %2 = extractelement <16 x i1> %mask, i32 0
-// %3 = icmp eq i1 %2, true
-// br i1 %3, label %cond.load, label %else
+// br i1 %2, label %cond.load, label %else
//
// cond.load: ; preds = %0
-// %4 = getelementptr i32* %1, i32 0
-// %5 = load i32* %4
-// %6 = insertelement <16 x i32> undef, i32 %5, i32 0
+// %3 = getelementptr i32* %1, i32 0
+// %4 = load i32* %3
+// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
// br label %else
//
// else: ; preds = %0, %cond.load
-// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
-// %7 = extractelement <16 x i1> %mask, i32 1
-// %8 = icmp eq i1 %7, true
-// br i1 %8, label %cond.load1, label %else2
+// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
+// %6 = extractelement <16 x i1> %mask, i32 1
+// br i1 %6, label %cond.load1, label %else2
//
// cond.load1: ; preds = %else
-// %9 = getelementptr i32* %1, i32 1
-// %10 = load i32* %9
-// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
+// %7 = getelementptr i32* %1, i32 1
+// %8 = load i32* %7
+// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
// br label %else2
//
// else2: ; preds = %else, %cond.load1
-// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
-// %12 = extractelement <16 x i1> %mask, i32 2
-// %13 = icmp eq i1 %12, true
-// br i1 %13, label %cond.load4, label %else5
+// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
+// %10 = extractelement <16 x i1> %mask, i32 2
+// br i1 %10, label %cond.load4, label %else5
//
static void scalarizeMaskedLoad(CallInst *CI) {
Value *Ptr = CI->getArgOperand(0);
@@ -119,25 +131,19 @@ static void scalarizeMaskedLoad(CallInst *CI) {
Value *Src0 = CI->getArgOperand(3);
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
- assert(VecType && "Unexpected return type of masked load intrinsic");
+ VectorType *VecType = cast<VectorType>(CI->getType());
- Type *EltTy = CI->getType()->getVectorElementType();
+ Type *EltTy = VecType->getElementType();
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
- BasicBlock *CondBlock = nullptr;
- BasicBlock *PrevIfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
// Short-cut if the mask is all-true.
- bool IsAllOnesMask =
- isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
-
- if (IsAllOnesMask) {
+ if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
@@ -145,21 +151,19 @@ static void scalarizeMaskedLoad(CallInst *CI) {
}
// Adjust alignment for the scalar instruction.
- AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8);
+ AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
// Bitcast %addr fron i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
- Value *UndefVal = UndefValue::get(VecType);
-
// The result vector
- Value *VResult = UndefVal;
+ Value *VResult = Src0;
- if (isa<ConstantVector>(Mask)) {
+ if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
@@ -167,35 +171,21 @@ static void scalarizeMaskedLoad(CallInst *CI) {
VResult =
Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
}
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
return;
}
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // %to_load = icmp eq i1 %mask_1, true
- // br i1 %to_load, label %cond.load, label %else
+ // br i1 %mask_1, label %cond.load, label %else
//
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
Value *Predicate =
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1));
// Create "cond" block
//
@@ -203,30 +193,34 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// %Elt = load i32* %EltAddr
// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
//
- CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
+ "cond.load");
Builder.SetInsertPoint(InsertPt);
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock =
CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
OldBr->eraseFromParent();
- PrevIfBlock = IfBlock;
+ BasicBlock *PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
+
+ // Create the phi to join the new and previous value.
+ PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(NewVResult, CondBlock);
+ Phi->addIncoming(VResult, PrevIfBlock);
+ VResult = Phi;
}
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
}
@@ -238,24 +232,22 @@ static void scalarizeMaskedLoad(CallInst *CI) {
//
// %1 = bitcast i8* %addr to i32*
// %2 = extractelement <16 x i1> %mask, i32 0
-// %3 = icmp eq i1 %2, true
-// br i1 %3, label %cond.store, label %else
+// br i1 %2, label %cond.store, label %else
//
// cond.store: ; preds = %0
-// %4 = extractelement <16 x i32> %val, i32 0
-// %5 = getelementptr i32* %1, i32 0
-// store i32 %4, i32* %5
+// %3 = extractelement <16 x i32> %val, i32 0
+// %4 = getelementptr i32* %1, i32 0
+// store i32 %3, i32* %4
// br label %else
//
// else: ; preds = %0, %cond.store
-// %6 = extractelement <16 x i1> %mask, i32 1
-// %7 = icmp eq i1 %6, true
-// br i1 %7, label %cond.store1, label %else2
+// %5 = extractelement <16 x i1> %mask, i32 1
+// br i1 %5, label %cond.store1, label %else2
//
// cond.store1: ; preds = %else
-// %8 = extractelement <16 x i32> %val, i32 1
-// %9 = getelementptr i32* %1, i32 1
-// store i32 %8, i32* %9
+// %6 = extractelement <16 x i32> %val, i32 1
+// %7 = getelementptr i32* %1, i32 1
+// store i32 %6, i32* %7
// br label %else2
// . . .
static void scalarizeMaskedStore(CallInst *CI) {
@@ -265,8 +257,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
Value *Mask = CI->getArgOperand(3);
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = dyn_cast<VectorType>(Src->getType());
- assert(VecType && "Unexpected data type in masked store intrinsic");
+ VectorType *VecType = cast<VectorType>(Src->getType());
Type *EltTy = VecType->getElementType();
@@ -277,26 +268,23 @@ static void scalarizeMaskedStore(CallInst *CI) {
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
// Short-cut if the mask is all-true.
- bool IsAllOnesMask =
- isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
-
- if (IsAllOnesMask) {
+ if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
Builder.CreateAlignedStore(Src, Ptr, AlignVal);
CI->eraseFromParent();
return;
}
// Adjust alignment for the scalar instruction.
- AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8);
+ AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
// Bitcast %addr fron i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
- if (isa<ConstantVector>(Mask)) {
+ if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
Value *Gep =
@@ -311,13 +299,10 @@ static void scalarizeMaskedStore(CallInst *CI) {
// Fill the "else" block, created in the previous iteration
//
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // %to_store = icmp eq i1 %mask_1, true
- // br i1 %to_store, label %cond.store, label %else
+ // br i1 %mask_1, label %cond.store, label %else
//
Value *Predicate =
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1));
// Create "cond" block
//
@@ -339,7 +324,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
OldBr->eraseFromParent();
IfBlock = NewIfBlock;
}
@@ -352,30 +337,28 @@ static void scalarizeMaskedStore(CallInst *CI) {
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
//
-// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> %Mask, i32 0
-// % ToLoad0 = icmp eq i1 % Mask0, true
-// br i1 % ToLoad0, label %cond.load, label %else
+// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// %Mask0 = extractelement <16 x i1> %Mask, i32 0
+// br i1 %Mask0, label %cond.load, label %else
//
// cond.load:
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// % Load0 = load i32, i32* % Ptr0, align 4
-// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
+// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// %Load0 = load i32, i32* %Ptr0, align 4
+// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
// br label %else
//
// else:
-// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
-// % Mask1 = extractelement <16 x i1> %Mask, i32 1
-// % ToLoad1 = icmp eq i1 % Mask1, true
-// br i1 % ToLoad1, label %cond.load1, label %else2
+// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
+// %Mask1 = extractelement <16 x i1> %Mask, i32 1
+// br i1 %Mask1, label %cond.load1, label %else2
//
// cond.load1:
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// % Load1 = load i32, i32* % Ptr1, align 4
-// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
+// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// %Load1 = load i32, i32* %Ptr1, align 4
+// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
// br label %else2
// . . .
-// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
// ret <16 x i32> %Result
static void scalarizeMaskedGather(CallInst *CI) {
Value *Ptrs = CI->getArgOperand(0);
@@ -383,32 +366,24 @@ static void scalarizeMaskedGather(CallInst *CI) {
Value *Mask = CI->getArgOperand(2);
Value *Src0 = CI->getArgOperand(3);
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
-
- assert(VecType && "Unexpected return type of masked load intrinsic");
+ VectorType *VecType = cast<VectorType>(CI->getType());
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
- BasicBlock *CondBlock = nullptr;
- BasicBlock *PrevIfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
- Value *UndefVal = UndefValue::get(VecType);
-
// The result vector
- Value *VResult = UndefVal;
+ Value *VResult = Src0;
unsigned VectorWidth = VecType->getNumElements();
// Shorten the way if the mask is a vector of constants.
- bool IsConstMask = isa<ConstantVector>(Mask);
-
- if (IsConstMask) {
+ if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
"Ptr" + Twine(Idx));
@@ -417,35 +392,20 @@ static void scalarizeMaskedGather(CallInst *CI) {
VResult = Builder.CreateInsertElement(
VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
}
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
return;
}
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
- // %ToLoad1 = icmp eq i1 %Mask1, true
- // br i1 %ToLoad1, label %cond.load, label %else
+ // br i1 %Mask1, label %cond.load, label %else
//
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
"Mask" + Twine(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1),
- "ToLoad" + Twine(Idx));
// Create "cond" block
//
@@ -453,31 +413,33 @@ static void scalarizeMaskedGather(CallInst *CI) {
// %Elt = load i32* %EltAddr
// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
//
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Builder.SetInsertPoint(InsertPt);
Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
"Ptr" + Twine(Idx));
LoadInst *Load =
Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
- "Res" + Twine(Idx));
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
OldBr->eraseFromParent();
- PrevIfBlock = IfBlock;
+ BasicBlock *PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
+
+ PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(NewVResult, CondBlock);
+ Phi->addIncoming(VResult, PrevIfBlock);
+ VResult = Phi;
}
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
}
@@ -487,26 +449,24 @@ static void scalarizeMaskedGather(CallInst *CI) {
// to a chain of basic blocks, that stores element one-by-one if
// the appropriate mask bit is set.
//
-// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> % Mask, i32 0
-// % ToStore0 = icmp eq i1 % Mask0, true
-// br i1 %ToStore0, label %cond.store, label %else
+// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// %Mask0 = extractelement <16 x i1> %Mask, i32 0
+// br i1 %Mask0, label %cond.store, label %else
//
// cond.store:
-// % Elt0 = extractelement <16 x i32> %Src, i32 0
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// store i32 %Elt0, i32* % Ptr0, align 4
+// %Elt0 = extractelement <16 x i32> %Src, i32 0
+// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* %Ptr0, align 4
// br label %else
//
// else:
-// % Mask1 = extractelement <16 x i1> % Mask, i32 1
-// % ToStore1 = icmp eq i1 % Mask1, true
-// br i1 % ToStore1, label %cond.store1, label %else2
+// %Mask1 = extractelement <16 x i1> %Mask, i32 1
+// br i1 %Mask1, label %cond.store1, label %else2
//
// cond.store1:
-// % Elt1 = extractelement <16 x i32> %Src, i32 1
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// store i32 % Elt1, i32* % Ptr1, align 4
+// %Elt1 = extractelement <16 x i32> %Src, i32 1
+// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 %Elt1, i32* %Ptr1, align 4
// br label %else2
// . . .
static void scalarizeMaskedScatter(CallInst *CI) {
@@ -531,11 +491,9 @@ static void scalarizeMaskedScatter(CallInst *CI) {
unsigned VectorWidth = Src->getType()->getVectorNumElements();
// Shorten the way if the mask is a vector of constants.
- bool IsConstMask = isa<ConstantVector>(Mask);
-
- if (IsConstMask) {
+ if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
"Elt" + Twine(Idx));
@@ -546,24 +504,21 @@ static void scalarizeMaskedScatter(CallInst *CI) {
CI->eraseFromParent();
return;
}
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
- // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
- // % ToStore = icmp eq i1 % Mask1, true
- // br i1 % ToStore, label %cond.store, label %else
+ // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
+ // br i1 %Mask1, label %cond.store, label %else
//
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
"Mask" + Twine(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1),
- "ToStore" + Twine(Idx));
// Create "cond" block
//
- // % Elt1 = extractelement <16 x i32> %Src, i32 1
- // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
- // %store i32 % Elt1, i32* % Ptr1
+ // %Elt1 = extractelement <16 x i32> %Src, i32 1
+ // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+ // %store i32 %Elt1, i32* %Ptr1
//
BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
Builder.SetInsertPoint(InsertPt);
@@ -578,7 +533,7 @@ static void scalarizeMaskedScatter(CallInst *CI) {
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
OldBr->eraseFromParent();
IfBlock = NewIfBlock;
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 46064012d9d8..6c135b3d69d6 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -68,39 +68,36 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
return &TII->get(Node->getMachineOpcode());
}
-LLVM_DUMP_METHOD
-raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+LLVM_DUMP_METHOD void SDep::dump(const TargetRegisterInfo *TRI) const {
switch (getKind()) {
- case Data: OS << "Data"; break;
- case Anti: OS << "Anti"; break;
- case Output: OS << "Out "; break;
- case Order: OS << "Ord "; break;
+ case Data: dbgs() << "Data"; break;
+ case Anti: dbgs() << "Anti"; break;
+ case Output: dbgs() << "Out "; break;
+ case Order: dbgs() << "Ord "; break;
}
switch (getKind()) {
case Data:
- OS << " Latency=" << getLatency();
+ dbgs() << " Latency=" << getLatency();
if (TRI && isAssignedRegDep())
- OS << " Reg=" << printReg(getReg(), TRI);
+ dbgs() << " Reg=" << printReg(getReg(), TRI);
break;
case Anti:
case Output:
- OS << " Latency=" << getLatency();
+ dbgs() << " Latency=" << getLatency();
break;
case Order:
- OS << " Latency=" << getLatency();
+ dbgs() << " Latency=" << getLatency();
switch(Contents.OrdKind) {
- case Barrier: OS << " Barrier"; break;
+ case Barrier: dbgs() << " Barrier"; break;
case MayAliasMem:
- case MustAliasMem: OS << " Memory"; break;
- case Artificial: OS << " Artificial"; break;
- case Weak: OS << " Weak"; break;
- case Cluster: OS << " Cluster"; break;
+ case MustAliasMem: dbgs() << " Memory"; break;
+ case Artificial: dbgs() << " Artificial"; break;
+ case Weak: dbgs() << " Weak"; break;
+ case Cluster: dbgs() << " Cluster"; break;
}
break;
}
-
- return OS;
}
bool SUnit::addPred(const SDep &D, bool Required) {
@@ -337,33 +334,7 @@ void SUnit::biasCriticalPath() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
-raw_ostream &SUnit::print(raw_ostream &OS,
- const SUnit *Entry, const SUnit *Exit) const {
- if (this == Entry)
- OS << "EntrySU";
- else if (this == Exit)
- OS << "ExitSU";
- else
- OS << "SU(" << NodeNum << ")";
- return OS;
-}
-
-LLVM_DUMP_METHOD
-raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const {
- return print(OS, &G->EntrySU, &G->ExitSU);
-}
-
-LLVM_DUMP_METHOD
-void SUnit::dump(const ScheduleDAG *G) const {
- print(dbgs(), G);
- dbgs() << ": ";
- G->dumpNode(this);
-}
-
-LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
- dump(G);
-
+LLVM_DUMP_METHOD void SUnit::dumpAttributes() const {
dbgs() << " # preds left : " << NumPredsLeft << "\n";
dbgs() << " # succs left : " << NumSuccsLeft << "\n";
if (WeakPredsLeft)
@@ -374,21 +345,38 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " Latency : " << Latency << "\n";
dbgs() << " Depth : " << getDepth() << "\n";
dbgs() << " Height : " << getHeight() << "\n";
+}
+
+LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeName(const SUnit &SU) const {
+ if (&SU == &EntrySU)
+ dbgs() << "EntrySU";
+ else if (&SU == &ExitSU)
+ dbgs() << "ExitSU";
+ else
+ dbgs() << "SU(" << SU.NodeNum << ")";
+}
- if (Preds.size() != 0) {
+LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeAll(const SUnit &SU) const {
+ dumpNode(SU);
+ SU.dumpAttributes();
+ if (SU.Preds.size() > 0) {
dbgs() << " Predecessors:\n";
- for (const SDep &Dep : Preds) {
+ for (const SDep &Dep : SU.Preds) {
dbgs() << " ";
- Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
- Dep.print(dbgs(), G->TRI); dbgs() << '\n';
+ dumpNodeName(*Dep.getSUnit());
+ dbgs() << ": ";
+ Dep.dump(TRI);
+ dbgs() << '\n';
}
}
- if (Succs.size() != 0) {
+ if (SU.Succs.size() > 0) {
dbgs() << " Successors:\n";
- for (const SDep &Dep : Succs) {
+ for (const SDep &Dep : SU.Succs) {
dbgs() << " ";
- Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
- Dep.print(dbgs(), G->TRI); dbgs() << '\n';
+ dumpNodeName(*Dep.getSUnit());
+ dbgs() << ": ";
+ Dep.dump(TRI);
+ dbgs() << '\n';
}
}
}
@@ -406,7 +394,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
}
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnit.dump(this);
+ dumpNode(SUnit);
dbgs() << "has not been scheduled!\n";
AnyNotSched = true;
}
@@ -415,7 +403,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
unsigned(std::numeric_limits<int>::max())) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnit.dump(this);
+ dumpNode(SUnit);
dbgs() << "has an unexpected "
<< (isBottomUp ? "Height" : "Depth") << " value!\n";
AnyNotSched = true;
@@ -424,7 +412,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
if (SUnit.NumSuccsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnit.dump(this);
+ dumpNode(SUnit);
dbgs() << "has successors left!\n";
AnyNotSched = true;
}
@@ -432,7 +420,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
if (SUnit.NumPredsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnit.dump(this);
+ dumpNode(SUnit);
dbgs() << "has predecessors left!\n";
AnyNotSched = true;
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index d1c5ddabb975..99406ed1496a 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -234,6 +234,11 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
// Ask the target if address-backscheduling is desirable, and if so how much.
const TargetSubtargetInfo &ST = MF.getSubtarget();
+ // Only use any non-zero latency for real defs/uses, in contrast to
+ // "fake" operands added by regalloc.
+ const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
+ bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
+ !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
if (!Uses.contains(*Alias))
@@ -257,11 +262,18 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
Dep = SDep(SU, SDep::Data, *Alias);
RegUse = UseSU->getInstr();
}
- Dep.setLatency(
- SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
- UseOp));
+ const MCInstrDesc *UseMIDesc =
+ (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
+ bool ImplicitPseudoUse =
+ (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
+ !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
+ if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
+ Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp));
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ } else
+ Dep.setLatency(0);
- ST.adjustSchedDependency(SU, UseSU, Dep);
UseSU->addPred(Dep);
}
}
@@ -996,7 +1008,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
for (auto &I : loads)
for (auto *SU : I.second)
NodeNums.push_back(SU->NodeNum);
- llvm::sort(NodeNums.begin(), NodeNums.end());
+ llvm::sort(NodeNums);
// The N last elements in NodeNums will be removed, and the SU with
// the lowest NodeNum of them will become the new BarrierChain to
@@ -1097,10 +1109,22 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
}
}
-void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
- // Cannot completely remove virtual function even in release mode.
+void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dumpNodeName(SU);
+ dbgs() << ": ";
+ SU.getInstr()->dump();
+#endif
+}
+
+void ScheduleDAGInstrs::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- SU->getInstr()->dump();
+ if (EntrySU.getInstr() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits)
+ dumpNodeAll(SU);
+ if (ExitSU.getInstr() != nullptr)
+ dumpNodeAll(ExitSU);
#endif
}
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b8bfe69a76e1..4301372179b8 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -157,8 +157,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (!freeUnits) {
LLVM_DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
- LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
- LLVM_DEBUG(DAG->dumpNode(SU));
+ LLVM_DEBUG(DAG->dumpNode(*SU));
return Hazard;
}
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a8c4b85df321..ff5505c97721 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
@@ -83,6 +84,7 @@ STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
STATISTIC(SlicedLoads, "Number of load sliced");
+STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
@@ -249,6 +251,11 @@ namespace {
SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
+ // Scalars have size 0 to distinguish from singleton vectors.
+ SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
+ bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
+ bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
+
/// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
/// load.
///
@@ -257,8 +264,9 @@ namespace {
/// \param EltNo index of the vector element to load.
/// \param OriginalLoad load that EVE came from to be replaced.
/// \returns EVE on success SDValue() on failure.
- SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
- SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
+ SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+ SDValue EltNo,
+ LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -285,6 +293,8 @@ namespace {
SDValue visitADD(SDNode *N);
SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
+ SDValue visitADDSAT(SDNode *N);
+ SDValue visitSUBSAT(SDNode *N);
SDValue visitADDC(SDNode *N);
SDValue visitUADDO(SDNode *N);
SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
@@ -318,6 +328,7 @@ namespace {
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
+ SDValue visitFunnelShift(SDNode *N);
SDValue visitRotate(SDNode *N);
SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
@@ -350,6 +361,7 @@ namespace {
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitFPOW(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
SDValue visitFP_TO_SINT(SDNode *N);
@@ -364,6 +376,8 @@ namespace {
SDValue visitFFLOOR(SDNode *N);
SDValue visitFMINNUM(SDNode *N);
SDValue visitFMAXNUM(SDNode *N);
+ SDValue visitFMINIMUM(SDNode *N);
+ SDValue visitFMAXIMUM(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -393,7 +407,7 @@ namespace {
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1);
+ SDValue N1, SDNodeFlags Flags);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
@@ -401,11 +415,14 @@ namespace {
SDValue foldVSelectOfConstants(SDNode *N);
SDValue foldBinOpIntoSelect(SDNode *BO);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
- SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
+ SDValue convertSelectOfFPConstantsToLoadOffset(
+ const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
@@ -455,7 +472,6 @@ namespace {
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue convertBuildVecZextToZext(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
- SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
@@ -482,6 +498,10 @@ namespace {
/// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
+ // Helper for findBetterNeighborChains. Walk up store chain add additional
+ // chained stores that do not overlap and can be parallelized.
+ bool parallelizeChainedStores(StoreSDNode *St);
+
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -515,7 +535,7 @@ namespace {
EVT &MemVT, unsigned ShAmt = 0);
/// Used by BackwardsPropagateMask to find suitable loads.
- bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
+ bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
SmallPtrSetImpl<SDNode*> &NodesWithConsts,
ConstantSDNode *Mask, SDNode *&NodeToMask);
/// Attempt to propagate a given AND node back to load leaves so that they
@@ -865,12 +885,6 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-static SDValue peekThroughBitcast(SDValue V) {
- while (V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
- return V;
-}
-
// Returns the SDNode if it is a constant float BuildVector
// or constant float.
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
@@ -901,50 +915,23 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
return true;
}
-// Determines if it is a constant null integer or a splatted vector of a
-// constant null integer (with no undefs).
-// Build vector implicit truncation is not an issue for null values.
-static bool isNullConstantOrNullSplatConstant(SDValue N) {
- // TODO: may want to use peekThroughBitcast() here.
- if (ConstantSDNode *Splat = isConstOrConstSplat(N))
- return Splat->isNullValue();
- return false;
-}
-
-// Determines if it is a constant integer of one or a splatted vector of a
-// constant integer of one (with no undefs).
-// Do not permit build vector implicit truncation.
-static bool isOneConstantOrOneSplatConstant(SDValue N) {
- // TODO: may want to use peekThroughBitcast() here.
- unsigned BitWidth = N.getScalarValueSizeInBits();
- if (ConstantSDNode *Splat = isConstOrConstSplat(N))
- return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
- return false;
-}
-
-// Determines if it is a constant integer of all ones or a splatted vector of a
-// constant integer of all ones (with no undefs).
-// Do not permit build vector implicit truncation.
-static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
- N = peekThroughBitcast(N);
- unsigned BitWidth = N.getScalarValueSizeInBits();
- if (ConstantSDNode *Splat = isConstOrConstSplat(N))
- return Splat->isAllOnesValue() &&
- Splat->getAPIntValue().getBitWidth() == BitWidth;
- return false;
-}
-
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
// undef's.
-static bool isAnyConstantBuildVector(const SDNode *N) {
- return ISD::isBuildVectorOfConstantSDNodes(N) ||
- ISD::isBuildVectorOfConstantFPSDNodes(N);
+static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ return isConstantOrConstantVector(V, NoOpaques) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1) {
+ SDValue N1, SDNodeFlags Flags) {
+ // Don't reassociate reductions.
+ if (Flags.hasVectorReduction())
+ return SDValue();
+
EVT VT = N0.getValueType();
- if (N0.getOpcode() == Opc) {
+ if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
@@ -964,7 +951,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
}
}
- if (N1.getOpcode() == Opc) {
+ if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
@@ -1501,6 +1488,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
+ case ISD::SADDSAT:
+ case ISD::UADDSAT: return visitADDSAT(N);
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: return visitSUBSAT(N);
case ISD::ADDC: return visitADDC(N);
case ISD::UADDO: return visitUADDO(N);
case ISD::SUBC: return visitSUBC(N);
@@ -1532,6 +1523,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
+ case ISD::FSHL:
+ case ISD::FSHR: return visitFunnelShift(N);
case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
@@ -1564,6 +1557,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::FPOW: return visitFPOW(N);
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
@@ -1576,6 +1570,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FFLOOR: return visitFFLOOR(N);
case ISD::FMINNUM: return visitFMINNUM(N);
case ISD::FMAXNUM: return visitFMAXNUM(N);
+ case ISD::FMINIMUM: return visitFMINIMUM(N);
+ case ISD::FMAXIMUM: return visitFMAXIMUM(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
@@ -1855,8 +1851,11 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
// can be tried again once they have new operands.
AddUsersToWorklist(N);
do {
+ // Do as a single replacement to avoid rewalking use lists.
+ SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
+ Ops.push_back(N->getOperand(i));
+ DAG.ReplaceAllUsesWith(N, Ops.data());
} while (!N->use_empty());
deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -1870,17 +1869,7 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
}
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
- auto BinOpcode = BO->getOpcode();
- assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
- BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
- BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
- BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
- BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
- BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
- BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
- BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
- BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
- "Unexpected binary operator");
+ assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
@@ -1910,11 +1899,11 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
// propagate non constant operands into select. I.e.:
// and (select Cond, 0, -1), X --> select Cond, 0, X
// or X, (select Cond, -1, 0) --> select Cond, -1, X
- bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
- (isNullConstantOrNullSplatConstant(CT) ||
- isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
- (isNullConstantOrNullSplatConstant(CF) ||
- isAllOnesConstantOrAllOnesSplatConstant(CF));
+ auto BinOpcode = BO->getOpcode();
+ bool CanFoldNonConst =
+ (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
+ (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
+ (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!CanFoldNonConst &&
@@ -2009,10 +1998,8 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return SDValue();
// The shift must be of a 'not' value.
- // TODO: Use isBitwiseNot() if it works with vectors.
SDValue Not = ShiftOp.getOperand(0);
- if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
- !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
+ if (!Not.hasOneUse() || !isBitwiseNot(Not))
return SDValue();
// The shift must be moving the sign bit to the least-significant-bit.
@@ -2085,7 +2072,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// add (zext i1 X), -1 -> sext (not i1 X)
// because most (?) targets generate better code for the zext form.
if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
- isOneConstantOrOneSplatConstant(N1)) {
+ isOneOrOneSplat(N1)) {
SDValue X = N0.getOperand(0);
if ((!LegalOperations ||
(TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
@@ -2110,17 +2097,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return NewSel;
// reassociate add
- if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
+ if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
return RADD;
// fold ((0-A) + B) -> B-A
- if (N0.getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N0.getOperand(0)))
+ if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
// fold (A + (0-B)) -> A-B
- if (N1.getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+ if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
// fold (A+(B-A)) -> B
@@ -2178,7 +2163,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
// fold (add (xor a, -1), 1) -> (sub 0, a)
- if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
N0.getOperand(0));
@@ -2191,6 +2176,49 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitADDSAT(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ // TODO SimplifyVBinOp
+
+ // fold (add_sat x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ }
+
+ // fold (add_sat x, undef) -> -1
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getAllOnesConstant(DL, VT);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ // canonicalize constant to RHS
+ if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, VT, N1, N0);
+ // fold (add_sat c1, c2) -> c3
+ return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
+ N1.getNode());
+ }
+
+ // fold (add_sat x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ // If it cannot overflow, transform into an add.
+ if (Opcode == ISD::UADDSAT)
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+
+ return SDValue();
+}
+
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
bool Masked = false;
@@ -2235,7 +2263,7 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
+ isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N0,
DAG.getNode(ISD::SHL, DL, VT,
N1.getOperand(0).getOperand(1),
@@ -2248,8 +2276,7 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
// (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
// and similar xforms where the inner op is either ~0 or 0.
- if (NumSignBits == DestBits &&
- isOneConstantOrOneSplatConstant(N1->getOperand(1)))
+ if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
}
@@ -2380,7 +2407,7 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
- if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
DAG.getConstant(0, DL, VT),
N0.getOperand(0));
@@ -2539,8 +2566,7 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
- SelectionDAG &DAG, bool LegalOperations,
- bool LegalTypes) {
+ SelectionDAG &DAG, bool LegalOperations) {
if (!VT.isVector())
return DAG.getConstant(0, DL, VT);
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
@@ -2567,7 +2593,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
- return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// fold (sub c1, c2) -> c1-c2
@@ -2586,7 +2612,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
}
- if (isNullConstantOrNullSplatConstant(N0)) {
+ if (isNullOrNullSplat(N0)) {
unsigned BitWidth = VT.getScalarSizeInBits();
// Right-shifting everything out but the sign bit followed by negation is
// the same as flipping arithmetic/logical shift type without the negation:
@@ -2617,12 +2643,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
- if (isAllOnesConstantOrAllOnesSplatConstant(N0))
+ if (isAllOnesOrAllOnesSplat(N0))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
// fold (A - (0-B)) -> A+B
- if (N1.getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+ if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
// fold A-(A-B) -> B
@@ -2676,14 +2701,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold (X - (-Y * Z)) -> (X + (Y * Z))
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
if (N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
+ isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1));
return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
}
if (N1.getOperand(1).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
+ isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
N1.getOperand(0),
N1.getOperand(1).getOperand(1));
@@ -2756,6 +2781,43 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ // TODO SimplifyVBinOp
+
+ // fold (sub_sat x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (sub_sat x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (sub_sat x, x) -> 0
+ if (N0 == N1)
+ return DAG.getConstant(0, DL, VT);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // fold (sub_sat c1, c2) -> c3
+ return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
+ N1.getNode());
+ }
+
+ // fold (sub_sat x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2931,6 +2993,39 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
getShiftAmountTy(N0.getValueType()))));
}
+ // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
+ // mul x, (2^N + 1) --> add (shl x, N), x
+ // mul x, (2^N - 1) --> sub (shl x, N), x
+ // Examples: x * 33 --> (x << 5) + x
+ // x * 15 --> (x << 4) - x
+ // x * -33 --> -((x << 5) + x)
+ // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
+ if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
+ // TODO: We could handle more general decomposition of any constant by
+ // having the target set a limit on number of ops and making a
+ // callback to determine that sequence (similar to sqrt expansion).
+ unsigned MathOp = ISD::DELETED_NODE;
+ APInt MulC = ConstValue1.abs();
+ if ((MulC - 1).isPowerOf2())
+ MathOp = ISD::ADD;
+ else if ((MulC + 1).isPowerOf2())
+ MathOp = ISD::SUB;
+
+ if (MathOp != ISD::DELETED_NODE) {
+ unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
+ : (MulC + 1).logBase2();
+ assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
+ "Not expecting multiply-by-constant that could have simplified");
+ SDLoc DL(N);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(ShAmt, DL, VT));
+ SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
+ if (ConstValue1.isNegative())
+ R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
+ return R;
+ }
+ }
+
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
if (N0.getOpcode() == ISD::SHL &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
@@ -2974,7 +3069,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0.getOperand(1), N1));
// reassociate mul
- if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
+ if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
return SDValue();
@@ -3076,7 +3171,16 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
- if (DAG.isUndef(N->getOpcode(), {N0, N1}))
+ unsigned Opc = N->getOpcode();
+ bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // X / undef -> undef
+ // X % undef -> undef
+ // X / 0 -> undef
+ // X % 0 -> undef
+ // NOTE: This includes vectors where any divisor element is zero/undef.
+ if (DAG.isUndef(Opc, {N0, N1}))
return DAG.getUNDEF(VT);
// undef / X -> 0
@@ -3084,6 +3188,26 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
+ // 0 / X -> 0
+ // 0 % X -> 0
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ if (N0C && N0C->isNullValue())
+ return N0;
+
+ // X / X -> 1
+ // X % X -> 0
+ if (N0 == N1)
+ return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
+
+ // X / 1 -> X
+ // X % 1 -> 0
+ // If this is a boolean op (single-bit element type), we can't have
+ // division-by-zero or remainder-by-zero, so assume the divisor is 1.
+ // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
+ // it's a 1.
+ if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
+ return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
+
return SDValue();
}
@@ -3105,9 +3229,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
- // fold (sdiv X, 1) -> X
- if (N1C && N1C->isOne())
- return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
@@ -3128,8 +3249,19 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
- if (SDValue V = visitSDIVLike(N0, N1, N))
+ if (SDValue V = visitSDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
+ { N0, N1 })) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
return V;
+ }
// sdiv, srem -> sdivrem
// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
@@ -3148,8 +3280,6 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT CCVT = getSetCCResultType(VT);
unsigned BitWidth = VT.getScalarSizeInBits();
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
@@ -3166,8 +3296,7 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// FIXME: We check for the exact bit here because the generic lowering gives
// better results in that case. The target-specific lowering should learn how
// to handle exact sdivs efficiently.
- if (!N->getFlags().hasExact() &&
- ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {
+ if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
// Target-specific implementation of sdiv x, pow2.
if (SDValue Res = BuildSDIVPow2(N))
return Res;
@@ -3218,7 +3347,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
@@ -3245,9 +3375,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
- // fold (udiv X, 1) -> X
- if (N1C && N1C->isOne())
- return N0;
// fold (udiv X, -1) -> select(X == -1, 1, 0)
if (N1C && N1C->getAPIntValue().isAllOnesValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
@@ -3260,8 +3387,19 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (SDValue V = visitUDIVLike(N0, N1, N))
+ if (SDValue V = visitUDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
+ { N0, N1 })) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
return V;
+ }
// sdiv, srem -> sdivrem
// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
@@ -3278,8 +3416,6 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1)) {
@@ -3311,7 +3447,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -3380,8 +3517,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
SDValue OptimizedDiv =
isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
- if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
- OptimizedDiv.getOpcode() != ISD::SDIVREM) {
+ if (OptimizedDiv.getNode()) {
+ // If the equivalent Div node also exists, update its users.
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
+ { N0, N1 }))
+ CombineTo(DivNode, OptimizedDiv);
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
AddToWorklist(OptimizedDiv.getNode());
@@ -3468,6 +3609,19 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
+ // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
+ SDLoc DL(N);
+ unsigned NumEltBits = VT.getScalarSizeInBits();
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ SDValue SRLAmt = DAG.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
+
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
@@ -3495,18 +3649,16 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp) {
// If the high half is not needed, just compute the low half.
bool HiExists = N->hasAnyUseOfValue(1);
- if (!HiExists &&
- (!LegalOperations ||
- TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
+ if (!HiExists && (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
return CombineTo(N, Res, Res);
}
// If the low half is not needed, just compute the high half.
bool LoExists = N->hasAnyUseOfValue(0);
- if (!LoExists &&
- (!LegalOperations ||
- TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+ if (!LoExists && (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
return CombineTo(N, Res, Res);
}
@@ -3522,7 +3674,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
SDValue LoOpt = combine(Lo.getNode());
if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
(!LegalOperations ||
- TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+ TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
return CombineTo(N, LoOpt, LoOpt);
}
@@ -3532,7 +3684,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
SDValue HiOpt = combine(Hi.getNode());
if (HiOpt.getNode() && HiOpt != Hi &&
(!LegalOperations ||
- TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+ TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
return CombineTo(N, HiOpt, HiOpt);
}
@@ -3664,59 +3816,94 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return SDValue();
}
-/// If this is a binary operator with two operands of the same opcode, try to
-/// simplify it.
-SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+/// If this is a bitwise logic instruction and both operands have the same
+/// opcode, try to sink the other opcode after the logic instruction.
+SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+ unsigned LogicOpcode = N->getOpcode();
+ unsigned HandOpcode = N0.getOpcode();
+ assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+ LogicOpcode == ISD::XOR) && "Expected logic opcode");
+ assert(HandOpcode == N1.getOpcode() && "Bad input!");
// Bail early if none of these transforms apply.
- if (N0.getNumOperands() == 0) return SDValue();
-
- // For each of OP in AND/OR/XOR:
- // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
- // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
- // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
- // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
- // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
- //
- // do not sink logical op inside of a vector extend, since it may combine
- // into a vsetcc.
- EVT Op0VT = N0.getOperand(0).getValueType();
- if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
- N0.getOpcode() == ISD::SIGN_EXTEND ||
- N0.getOpcode() == ISD::BSWAP ||
- // Avoid infinite looping with PromoteIntBinOp.
- (N0.getOpcode() == ISD::ANY_EXTEND &&
- (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
- (N0.getOpcode() == ISD::TRUNCATE &&
- (!TLI.isZExtFree(VT, Op0VT) ||
- !TLI.isTruncateFree(Op0VT, VT)) &&
- TLI.isTypeLegal(Op0VT))) &&
- !VT.isVector() &&
- Op0VT == N1.getOperand(0).getValueType() &&
- (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
- SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
- N0.getOperand(0).getValueType(),
- N0.getOperand(0), N1.getOperand(0));
- AddToWorklist(ORNode.getNode());
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
- }
-
- // For each of OP in SHL/SRL/SRA/AND...
- // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
- // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
- // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
- if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
- N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ if (N0.getNumOperands() == 0)
+ return SDValue();
+
+ // FIXME: We should check number of uses of the operands to not increase
+ // the instruction count for all transforms.
+
+ // Handle size-changing casts.
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N1.getOperand(0);
+ EVT XVT = X.getValueType();
+ SDLoc DL(N);
+ if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
+ HandOpcode == ISD::SIGN_EXTEND) {
+ // If both operands have other uses, this transform would create extra
+ // instructions without eliminating anything.
+ if (!N0.hasOneUse() && !N1.hasOneUse())
+ return SDValue();
+ // We need matching integer source types.
+ if (XVT != Y.getValueType())
+ return SDValue();
+ // Don't create an illegal op during or after legalization. Don't ever
+ // create an unsupported vector op.
+ if ((VT.isVector() || LegalOperations) &&
+ !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
+ return SDValue();
+ // Avoid infinite looping with PromoteIntBinOp.
+ // TODO: Should we apply desirable/legal constraints to all opcodes?
+ if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
+ !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
+ return SDValue();
+ // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
+ if (HandOpcode == ISD::TRUNCATE) {
+ // If both operands have other uses, this transform would create extra
+ // instructions without eliminating anything.
+ if (!N0.hasOneUse() && !N1.hasOneUse())
+ return SDValue();
+ // We need matching source types.
+ if (XVT != Y.getValueType())
+ return SDValue();
+ // Don't create an illegal op during or after legalization.
+ if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
+ return SDValue();
+ // Be extra careful sinking truncate. If it's free, there's no benefit in
+ // widening a binop. Also, don't create a logic op on an illegal type.
+ if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
+ return SDValue();
+ if (!TLI.isTypeLegal(XVT))
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // For binops SHL/SRL/SRA/AND:
+ // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
+ if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
+ HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
N0.getOperand(1) == N1.getOperand(1)) {
- SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
- N0.getOperand(0).getValueType(),
- N0.getOperand(0), N1.getOperand(0));
- AddToWorklist(ORNode.getNode());
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
- ORNode, N0.getOperand(1));
+ // If either operand has other uses, this transform is not an improvement.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
+ }
+
+ // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
+ if (HandOpcode == ISD::BSWAP) {
+ // If either operand has other uses, this transform is not an improvement.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
}
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
@@ -3726,21 +3913,12 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// we don't want to undo this promotion.
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
- if ((N0.getOpcode() == ISD::BITCAST ||
- N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+ if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
Level <= AfterLegalizeTypes) {
- SDValue In0 = N0.getOperand(0);
- SDValue In1 = N1.getOperand(0);
- EVT In0Ty = In0.getValueType();
- EVT In1Ty = In1.getValueType();
- SDLoc DL(N);
- // If both incoming values are integers, and the original types are the
- // same.
- if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
- SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
- SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
- AddToWorklist(Op.getNode());
- return BC;
+ // Input types must be integer and the same.
+ if (XVT.isInteger() && XVT == Y.getValueType()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
}
}
@@ -3756,61 +3934,44 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// If both shuffles use the same mask, and both shuffles have the same first
// or second operand, then it might still be profitable to move the shuffle
// after the xor/and/or operation.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
- ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
- ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
-
- assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+ if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
+ auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
+ assert(X.getValueType() == Y.getValueType() &&
"Inputs to shuffles are not the same type");
// Check that both shuffles use the same mask. The masks are known to be of
// the same length because the result vector type is the same.
// Check also that shuffles have only one use to avoid introducing extra
// instructions.
- if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
- SVN0->getMask().equals(SVN1->getMask())) {
- SDValue ShOp = N0->getOperand(1);
-
- // Don't try to fold this node if it requires introducing a
- // build vector of all zeros that might be illegal at this stage.
- if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
- if (!LegalTypes)
- ShOp = DAG.getConstant(0, SDLoc(N), VT);
- else
- ShOp = SDValue();
- }
+ if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
+ !SVN0->getMask().equals(SVN1->getMask()))
+ return SDValue();
- // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
- // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C)
- // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
- if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
- SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- N0->getOperand(0), N1->getOperand(0));
- AddToWorklist(NewNode.getNode());
- return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
- SVN0->getMask());
- }
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ SDValue ShOp = N0.getOperand(1);
+ if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+ ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
- // Don't try to fold this node if it requires introducing a
- // build vector of all zeros that might be illegal at this stage.
- ShOp = N0->getOperand(0);
- if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
- if (!LegalTypes)
- ShOp = DAG.getConstant(0, SDLoc(N), VT);
- else
- ShOp = SDValue();
- }
+ // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
+ if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
+ }
- // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
- // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B))
- // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
- if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
- SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- N0->getOperand(1), N1->getOperand(1));
- AddToWorklist(NewNode.getNode());
- return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
- SVN0->getMask());
- }
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ ShOp = N0.getOperand(0);
+ if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+ ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
+ if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
+ N1.getOperand(1));
+ return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
}
}
@@ -3846,8 +4007,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
bool IsInteger = OpVT.isInteger();
if (LR == RR && CC0 == CC1 && IsInteger) {
- bool IsZero = isNullConstantOrNullSplatConstant(LR);
- bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
+ bool IsZero = isNullOrNullSplat(LR);
+ bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
// All bits clear?
bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
@@ -4149,7 +4310,7 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
}
bool DAGCombiner::SearchForAndLoads(SDNode *N,
- SmallPtrSetImpl<LoadSDNode*> &Loads,
+ SmallVectorImpl<LoadSDNode*> &Loads,
SmallPtrSetImpl<SDNode*> &NodesWithConsts,
ConstantSDNode *Mask,
SDNode *&NodeToMask) {
@@ -4186,7 +4347,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
// Use LE to convert equal sized loads to zext.
if (ExtVT.bitsLE(Load->getMemoryVT()))
- Loads.insert(Load);
+ Loads.push_back(Load);
continue;
}
@@ -4251,7 +4412,7 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
if (isa<LoadSDNode>(N->getOperand(0)))
return false;
- SmallPtrSet<LoadSDNode*, 8> Loads;
+ SmallVector<LoadSDNode*, 8> Loads;
SmallPtrSet<SDNode*, 2> NodesWithConsts;
SDNode *FixupNode = nullptr;
if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
@@ -4399,7 +4560,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
// fold (and x, -1) -> x
if (isAllOnesConstant(N1))
@@ -4414,7 +4575,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return NewSel;
// reassociate and
- if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
+ if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
// Try to convert a constant mask AND into a shuffle clear mask.
@@ -4563,9 +4724,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Res = ReduceLoadWidth(N)) {
LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
-
AddToWorklist(N);
- CombineTo(LN0, Res, Res.getValue(1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
return SDValue(N, 0);
}
}
@@ -4585,8 +4745,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
if (N0.getOpcode() == N1.getOpcode())
- if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
- return Tmp;
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
// Masking the negated extension of a boolean is just the zero-extended
// boolean:
@@ -4596,7 +4756,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Note: the SimplifyDemandedBits fold below can make an information-losing
// transform, and then we have no way to find this better fold.
if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
- if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
+ if (isNullOrNullSplat(N0.getOperand(0))) {
SDValue SubRHS = N0.getOperand(1);
if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
@@ -5124,16 +5284,16 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return BSwap;
// reassociate or
- if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
+ if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
- // iff (c1 & c2) != 0.
- auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
- return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
+ // iff (c1 & c2) != 0 or c1/c2 are undef.
+ auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
+ return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
};
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
- ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
if (SDValue COR = DAG.FoldConstantArithmetic(
ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
@@ -5144,8 +5304,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
- if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
- return Tmp;
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
@@ -5257,9 +5417,9 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
// Compute the shift amount we need to extract to complete the rotate.
const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
- APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
- if (NeededShiftAmt.isNegative())
+ if (OppShiftCst->getAPIntValue().ugt(VTWidth))
return SDValue();
+ APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
// Normalize the bitwidth of the two mul/udiv/shift constant operands.
APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
APInt OppLHSAmt = OppLHSCst->getAPIntValue();
@@ -5340,8 +5500,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
unsigned MaskLoBits = 0;
if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
- KnownBits Known;
- DAG.computeKnownBits(Neg.getOperand(0), Known);
+ KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
unsigned Bits = Log2_64(EltSize);
if (NegC->getAPIntValue().getActiveBits() <= Bits &&
((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
@@ -5363,8 +5522,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// Pos'. The truncation is redundant for the purpose of the equality.
if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
- KnownBits Known;
- DAG.computeKnownBits(Pos.getOperand(0), Known);
+ KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
MaskLoBits))
@@ -5894,7 +6052,7 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
assert(N->getOpcode() == ISD::XOR);
// Don't touch 'not' (i.e. where y = -1).
- if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
+ if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
return SDValue();
EVT VT = N->getValueType(0);
@@ -5911,7 +6069,7 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
SDValue Xor0 = Xor.getOperand(0);
SDValue Xor1 = Xor.getOperand(1);
// Don't touch 'not' (i.e. where y = -1).
- if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
+ if (isAllOnesOrAllOnesSplat(Xor1))
return false;
if (Other == Xor0)
std::swap(Xor0, Xor1);
@@ -5977,8 +6135,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// fold (xor x, undef) -> undef
if (N0.isUndef())
return N0;
@@ -5988,11 +6147,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -6001,19 +6160,18 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return NewSel;
// reassociate xor
- if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
+ if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
// fold !(x cc y) -> (x !cc y)
+ unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
- bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
- isInt);
-
+ LHS.getValueType().isInteger());
if (!LegalOperations ||
TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
- switch (N0.getOpcode()) {
+ switch (N0Opcode) {
default:
llvm_unreachable("Unhandled SetCC Equivalent!");
case ISD::SETCC:
@@ -6026,54 +6184,74 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
- if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
- N0.getNode()->hasOneUse() &&
+ if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
SDValue V = N0.getOperand(0);
- SDLoc DL(N0);
- V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
- DAG.getConstant(1, DL, V.getValueType()));
+ SDLoc DL0(N0);
+ V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
+ DAG.getConstant(1, DL0, V.getValueType()));
AddToWorklist(V.getNode());
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
- (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
- unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
+ return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
if (isAllOnesConstant(N1) && N0.hasOneUse() &&
- (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
- unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
+ return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
}
}
// fold (xor (and x, y), y) -> (and (not x), y)
- if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
- N0->getOperand(1) == N1) {
- SDValue X = N0->getOperand(0);
+ if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
+ SDValue X = N0.getOperand(0);
SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
AddToWorklist(NotX.getNode());
- return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
+ return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
+ }
+
+ if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
+ ConstantSDNode *XorC = isConstOrConstSplat(N1);
+ ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ if (XorC && ShiftC) {
+ // Don't crash on an oversized shift. We can not guarantee that a bogus
+ // shift has been simplified to undef.
+ uint64_t ShiftAmt = ShiftC->getLimitedValue();
+ if (ShiftAmt < BitWidth) {
+ APInt Ones = APInt::getAllOnesValue(BitWidth);
+ Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
+ if (XorC->getAPIntValue() == Ones) {
+ // If the xor constant is a shifted -1, do a 'not' before the shift:
+ // xor (X << ShiftC), XorC --> (not X) << ShiftC
+ // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
+ SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
+ return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
+ }
+ }
+ }
}
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
- SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
- SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
+ SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
+ SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
SDValue S0 = S.getOperand(0);
@@ -6081,14 +6259,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
unsigned OpSizeInBits = VT.getScalarSizeInBits();
if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
if (C->getAPIntValue() == (OpSizeInBits - 1))
- return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+ return DAG.getNode(ISD::ABS, DL, VT, S0);
}
}
}
// fold (xor x, x) -> 0
if (N0 == N1)
- return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// fold (xor (shl 1, x), -1) -> (rotl ~1, x)
// Here is a concrete example of this equivalence:
@@ -6108,17 +6286,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// consistent result.
// - Pushing the zero left requires shifting one bits in from the right.
// A rotate left of ~1 is a nice way of achieving the desired result.
- if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
- && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
- SDLoc DL(N);
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
+ isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
N0.getOperand(1));
}
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
- if (N0.getOpcode() == N1.getOpcode())
- if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
- return Tmp;
+ if (N0Opcode == N1.getOpcode())
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
// Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
if (SDValue MM = unfoldMaskedMerge(N))
@@ -6134,6 +6311,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+ // Do not turn a 'not' into a regular xor.
+ if (isBitwiseNot(N->getOperand(0)))
+ return SDValue();
+
SDNode *LHS = N->getOperand(0).getNode();
if (!LHS->hasOneUse()) return SDValue();
@@ -6191,7 +6372,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
return SDValue();
}
- if (!TLI.isDesirableToCommuteWithShift(LHS))
+ if (!TLI.isDesirableToCommuteWithShift(N, Level))
return SDValue();
// Fold the constants, shifting the binop RHS by the shift amount.
@@ -6239,9 +6420,16 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
unsigned Bitsize = VT.getScalarSizeInBits();
// fold (rot x, 0) -> x
- if (isNullConstantOrNullSplatConstant(N1))
+ if (isNullOrNullSplat(N1))
return N0;
+ // fold (rot x, c) -> x iff (c % BitSize) == 0
+ if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
+ APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
+ if (DAG.MaskedValueIsZero(N1, ModuloMask))
+ return N0;
+ }
+
// fold (rot x, c) -> (rot x, c % BitSize)
if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
if (Cst->getAPIntValue().uge(Bitsize)) {
@@ -6284,6 +6472,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -6318,22 +6509,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
- // fold (shl 0, x) -> 0
- if (isNullConstantOrNullSplatConstant(N0))
- return N0;
- // fold (shl x, c >= size(x)) -> undef
- // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
- auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
- return Val->getAPIntValue().uge(OpSizeInBits);
- };
- if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
- return DAG.getUNDEF(VT);
- // fold (shl x, 0) -> x
- if (N1C && N1C->isNullValue())
- return N0;
- // fold (shl undef, x) -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -6454,7 +6629,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
- if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
+ TLI.shouldFoldShiftPairToMask(N, Level)) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t c1 = N0C1->getZExtValue();
if (c1 < OpSizeInBits) {
@@ -6495,7 +6671,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
N0.getNode()->hasOneUse() &&
isConstantOrConstantVector(N1, /* No Opaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
+ isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
AddToWorklist(Shl0.getNode());
@@ -6522,6 +6699,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -6542,16 +6722,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
- // fold (sra x, c >= size(x)) -> undef
- // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
- auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
- return Val->getAPIntValue().uge(OpSizeInBits);
- };
- if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
- return DAG.getUNDEF(VT);
- // fold (sra x, 0) -> x
- if (N1C && N1C->isNullValue())
- return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -6571,31 +6741,30 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ // clamp (add c1, c2) to max shift.
if (N0.getOpcode() == ISD::SRA) {
SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
+ EVT ShiftSVT = ShiftVT.getScalarType();
+ SmallVector<SDValue, 16> ShiftValues;
- auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
- ConstantSDNode *RHS) {
+ auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
- return (c1 + c2).uge(OpSizeInBits);
- };
- if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
- return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
- DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
-
- auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
- ConstantSDNode *RHS) {
- APInt c1 = LHS->getAPIntValue();
- APInt c2 = RHS->getAPIntValue();
- zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
- return (c1 + c2).ult(OpSizeInBits);
+ APInt Sum = c1 + c2;
+ unsigned ShiftSum =
+ Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
+ ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
+ return true;
};
- if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
- SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
- return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
+ SDValue ShiftValue;
+ if (VT.isVector())
+ ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
+ else
+ ShiftValue = ShiftValues[0];
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
}
}
@@ -6689,6 +6858,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -6703,19 +6875,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
- // fold (srl 0, x) -> 0
- if (isNullConstantOrNullSplatConstant(N0))
- return N0;
- // fold (srl x, c >= size(x)) -> undef
- // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
- auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
- return Val->getAPIntValue().uge(OpSizeInBits);
- };
- if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
- return DAG.getUNDEF(VT);
- // fold (srl x, 0) -> x
- if (N1C && N1C->isNullValue())
- return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -6819,8 +6978,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
- KnownBits Known;
- DAG.computeKnownBits(N0.getOperand(0), Known);
+ KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
@@ -6906,6 +7064,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ bool IsFSHL = N->getOpcode() == ISD::FSHL;
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // fold (fshl N0, N1, 0) -> N0
+ // fold (fshr N0, N1, 0) -> N1
+ if (isPowerOf2_32(BitWidth))
+ if (DAG.MaskedValueIsZero(
+ N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
+ return IsFSHL ? N0 : N1;
+
+ // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+ if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+ if (Cst->getAPIntValue().uge(BitWidth)) {
+ uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
+ DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+ }
+ }
+
+ // fold (fshl N0, N0, N2) -> (rotl N0, N2)
+ // fold (fshr N0, N0, N2) -> (rotr N0, N2)
+ // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
+ // is legal as well we might be better off avoiding non-constant (BW - N2).
+ unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ if (N0 == N1 && hasOperation(RotOpc, VT))
+ return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7012,6 +7205,16 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
return SDValue();
}
+// FIXME: This should be checking for no signed zeros on individual operands, as
+// well as no nans.
+static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
+ const TargetOptions &Options = DAG.getTarget().Options;
+ EVT VT = LHS.getValueType();
+
+ return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+ DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
+}
+
/// Generate Min/Max node
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
@@ -7020,6 +7223,7 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
+ EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
switch (CC) {
case ISD::SETOLT:
case ISD::SETOLE:
@@ -7027,8 +7231,15 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
case ISD::SETLE:
case ISD::SETULT:
case ISD::SETULE: {
+ // Since it's known never nan to get here already, either fminnum or
+ // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
+ // expanded in terms of it.
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
- if (TLI.isOperationLegal(Opcode, VT))
+ if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
return SDValue();
}
@@ -7038,8 +7249,12 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
case ISD::SETGE:
case ISD::SETUGT:
case ISD::SETUGE: {
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
- if (TLI.isOperationLegal(Opcode, VT))
+ if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
return SDValue();
}
@@ -7150,15 +7365,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
EVT VT0 = N0.getValueType();
SDLoc DL(N);
- // fold (select C, X, X) -> X
- if (N1 == N2)
- return N1;
-
- if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
- // fold (select true, X, Y) -> X
- // fold (select false, X, Y) -> Y
- return !N0C->isNullValue() ? N1 : N2;
- }
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
// fold (select X, X, Y) -> (or X, Y)
// fold (select X, 1, Y) -> (or C, Y)
@@ -7264,32 +7472,54 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
}
- // fold selects based on a setcc into other things, such as min/max/abs
+ // Fold selects based on a setcc into other things, such as min/max/abs.
if (N0.getOpcode() == ISD::SETCC) {
- // select x, y (fcmp lt x, y) -> fminnum x, y
- // select x, y (fcmp gt x, y) -> fmaxnum x, y
- //
- // This is OK if we don't care about what happens if either operand is a
- // NaN.
- //
-
- // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
- // no signed zeros as well as no nans.
- const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
- DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- if (SDValue FMinMax = combineMinNumMaxNum(
- DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ // select (fcmp lt x, y), x, y -> fminnum x, y
+ // select (fcmp gt x, y), x, y -> fmaxnum x, y
+ //
+ // This is OK if we don't care what happens if either operand is a NaN.
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
+ if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
+ CC, TLI, DAG))
return FMinMax;
+
+ // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
+ // This is conservatively limited to pre-legal-operations to give targets
+ // a chance to reverse the transform if they want to do that. Also, it is
+ // unlikely that the pattern would be formed late, so it's probably not
+ // worth going through the other checks.
+ if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
+ CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
+ N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
+ auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
+ auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
+ if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
+ // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
+ // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
+ //
+ // The IR equivalent of this transform would have this form:
+ // %a = add %x, C
+ // %c = icmp ugt %x, ~C
+ // %r = select %c, -1, %a
+ // =>
+ // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
+ // %u0 = extractvalue %u, 0
+ // %u1 = extractvalue %u, 1
+ // %r = select %u1, -1, %u0
+ SDVTList VTs = DAG.getVTList(VT, VT0);
+ SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
+ return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
+ }
}
- if ((!LegalOperations &&
- TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
- TLI.isOperationLegal(ISD::SELECT_CC, VT))
- return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
- N0.getOperand(1), N1, N2, N0.getOperand(2));
+ if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
+ (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
+ N0.getOperand(2));
+
return SimplifySelect(DL, N0, N1, N2);
}
@@ -7388,7 +7618,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
TargetLowering::TypeSplitVector)
return SDValue();
- SDValue MaskLo, MaskHi, Lo, Hi;
+ SDValue MaskLo, MaskHi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
EVT LoVT, HiVT;
@@ -7416,17 +7646,15 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
Alignment, MSC->getAAInfo(), MSC->getRanges());
SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
- Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
- DL, OpsLo, MMO);
+ SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+ DataLo.getValueType(), DL, OpsLo, MMO);
- SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
- Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ // The order of the Scatter operation after split is well defined. The "Hi"
+ // part comes after the "Lo". So these two operations should be chained one
+ // after another.
+ SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+ DL, OpsHi, MMO);
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
@@ -7525,9 +7753,9 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
SDValue MaskLo, MaskHi, Lo, Hi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
- SDValue Src0 = MGT->getValue();
- SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+ SDValue PassThru = MGT->getPassThru();
+ SDValue PassThruLo, PassThruHi;
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
@@ -7550,11 +7778,11 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
+ SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
MMO);
- SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
+ SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
MMO);
@@ -7599,9 +7827,9 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
SDValue MaskLo, MaskHi, Lo, Hi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
- SDValue Src0 = MLD->getSrc0();
- SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+ SDValue PassThru = MLD->getPassThru();
+ SDValue PassThruLo, PassThruHi;
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
@@ -7625,8 +7853,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
- Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
- ISD::NON_EXTLOAD, MLD->isExpandingLoad());
+ Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
+ MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
MLD->isExpandingLoad());
@@ -7637,8 +7865,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
MLD->getAAInfo(), MLD->getRanges());
- Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
- ISD::NON_EXTLOAD, MLD->isExpandingLoad());
+ Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
+ MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -7717,9 +7945,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N2 = N->getOperand(2);
SDLoc DL(N);
- // fold (vselect C, X, X) -> X
- if (N1 == N2)
- return N1;
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
@@ -7754,12 +7981,26 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
}
+ // vselect x, y (fcmp lt x, y) -> fminnum x, y
+ // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+ EVT VT = N->getValueType(0);
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (SDValue FMinMax = combineMinNumMaxNum(
+ DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ return FMinMax;
+ }
+
// If this select has a condition (setcc) with narrower operands than the
// select, try to widen the compare to match the select width.
// TODO: This should be extended to handle any constant.
// TODO: This could be extended to handle non-loading patterns, but that
// requires thorough testing to avoid regressions.
- if (isNullConstantOrNullSplatConstant(RHS)) {
+ if (isNullOrNullSplat(RHS)) {
EVT NarrowVT = LHS.getValueType();
EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
EVT SetCCVT = getSetCCResultType(LHS.getValueType());
@@ -7902,9 +8143,8 @@ SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
/// Vector extends are not folded if operations are legal; this is to
/// avoid introducing illegal build_vector dag nodes.
-static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
- SelectionDAG &DAG, bool LegalTypes,
- bool LegalOperations) {
+static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG, bool LegalTypes) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7918,16 +8158,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
// fold (zext c1) -> c1
// fold (aext c1) -> c1
if (isa<ConstantSDNode>(N0))
- return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
+ return DAG.getNode(Opcode, SDLoc(N), VT, N0);
// fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
// fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
// fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
EVT SVT = VT.getScalarType();
- if (!(VT.isVector() &&
- (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
+ if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
- return nullptr;
+ return SDValue();
// We can fold this node into a build_vector.
unsigned VTBits = SVT.getSizeInBits();
@@ -7936,10 +8175,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned NumElts = VT.getVectorNumElements();
SDLoc DL(N);
- for (unsigned i=0; i != NumElts; ++i) {
- SDValue Op = N0->getOperand(i);
- if (Op->isUndef()) {
- Elts.push_back(DAG.getUNDEF(SVT));
+ // For zero-extensions, UNDEF elements still guarantee to have the upper
+ // bits set to zero.
+ bool IsZext =
+ Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = N0.getOperand(i);
+ if (Op.isUndef()) {
+ Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
continue;
}
@@ -7953,7 +8197,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
}
- return DAG.getBuildVector(VT, DL, Elts).getNode();
+ return DAG.getBuildVector(VT, DL, Elts);
}
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
@@ -8269,7 +8513,7 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- if ((LegalOperations || LN0->isVolatile()) &&
+ if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
return {};
@@ -8359,9 +8603,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
@@ -8498,21 +8741,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// if this is the case.
EVT SVT = getSetCCResultType(N00VT);
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- if (VT.getSizeInBits() == SVT.getSizeInBits())
- return DAG.getSetCC(DL, VT, N00, N01, CC);
-
- // If the desired elements are smaller or larger than the source
- // elements, we can use a matching integer vector type and then
- // truncate/sign extend.
- EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
- if (SVT == MatchingVecType) {
- SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
- return DAG.getSExtOrTrunc(VsetCC, DL, VT);
+ // If we already have the desired type, don't change it.
+ if (SVT != N0.getValueType()) {
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
+ return DAG.getSetCC(DL, VT, N00, N01, CC);
+
+ // If the desired elements are smaller or larger than the source
+ // elements, we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVecType) {
+ SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+ return DAG.getSExtOrTrunc(VsetCC, DL, VT);
+ }
}
}
@@ -8569,40 +8815,37 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
KnownBits &Known) {
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
- DAG.computeKnownBits(Op, Known);
+ Known = DAG.computeKnownBits(Op);
return true;
}
- if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
- cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+ if (N.getOpcode() != ISD::SETCC ||
+ N.getValueType().getScalarType() != MVT::i1 ||
+ cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
return false;
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
assert(Op0.getValueType() == Op1.getValueType());
- if (isNullConstant(Op0))
+ if (isNullOrNullSplat(Op0))
Op = Op1;
- else if (isNullConstant(Op1))
+ else if (isNullOrNullSplat(Op1))
Op = Op0;
else
return false;
- DAG.computeKnownBits(Op, Known);
+ Known = DAG.computeKnownBits(Op);
- if (!(Known.Zero | 1).isAllOnesValue())
- return false;
-
- return true;
+ return (Known.Zero | 1).isAllOnesValue();
}
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
// fold (zext (zext x)) -> (zext x)
// fold (zext (aext x)) -> (zext x)
@@ -8613,17 +8856,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate x)) -> (zext x) or
// (zext (truncate x)) -> (truncate x)
// This is valid when the truncated bits of x are already zero.
- // FIXME: We should extend this to work for vectors too.
SDValue Op;
KnownBits Known;
- if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
+ if (isTruncateOf(DAG, N0, Op, Known)) {
APInt TruncatedBits =
- (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
- APInt(Op.getValueSizeInBits(), 0) :
- APInt::getBitsSet(Op.getValueSizeInBits(),
- N0.getValueSizeInBits(),
- std::min(Op.getValueSizeInBits(),
- VT.getSizeInBits()));
+ (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
+ APInt(Op.getScalarValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getScalarValueSizeInBits(),
+ N0.getScalarValueSizeInBits(),
+ std::min(Op.getScalarValueSizeInBits(),
+ VT.getScalarSizeInBits()));
if (TruncatedBits.isSubsetOf(Known.Zero))
return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
}
@@ -8851,9 +9093,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
@@ -8968,17 +9209,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/any extend
- else {
- EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
- SDValue VsetCC =
- DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
- }
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
+ SDValue VsetCC =
+ DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
}
// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -9025,6 +9265,26 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
}
+ // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
+ // than X. Just move the AssertZext in front of the truncate and drop the
+ // AssertSExt.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::AssertSext &&
+ Opcode == ISD::AssertZext) {
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
+ "Asserting zero/sign-extended bits to a type larger than the "
+ "truncated destination does not provide information");
+
+ if (AssertVT.bitsLT(BigA_AssertVT)) {
+ SDLoc DL(N);
+ SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+ BigA.getOperand(0), N1);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
+ }
+ }
+
return SDValue();
}
@@ -9046,6 +9306,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (VT.isVector())
return SDValue();
+ unsigned ShAmt = 0;
+ bool HasShiftedOffset = false;
// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {
@@ -9073,15 +9335,25 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.
auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!AndC || !AndC->getAPIntValue().isMask())
+ if (!AndC)
+ return SDValue();
+
+ const APInt &Mask = AndC->getAPIntValue();
+ unsigned ActiveBits = 0;
+ if (Mask.isMask()) {
+ ActiveBits = Mask.countTrailingOnes();
+ } else if (Mask.isShiftedMask()) {
+ ShAmt = Mask.countTrailingZeros();
+ APInt ShiftedMask = Mask.lshr(ShAmt);
+ ActiveBits = ShiftedMask.countTrailingOnes();
+ HasShiftedOffset = true;
+ } else
return SDValue();
- unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
ExtType = ISD::ZEXTLOAD;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
}
- unsigned ShAmt = 0;
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
SDValue SRL = N0;
if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
@@ -9150,13 +9422,16 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
- // For big endian targets, we need to adjust the offset to the pointer to
- // load the correct bytes.
- if (DAG.getDataLayout().isBigEndian()) {
+ auto AdjustBigEndianShift = [&](unsigned ShAmt) {
unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
- ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
- }
+ return LVTStoreBits - EVTStoreBits - ShAmt;
+ };
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (DAG.getDataLayout().isBigEndian())
+ ShAmt = AdjustBigEndianShift(ShAmt);
EVT PtrType = N0.getOperand(1).getValueType();
uint64_t PtrOff = ShAmt / 8;
@@ -9204,6 +9479,21 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
}
+ if (HasShiftedOffset) {
+ // Recalculate the shift amount after it has been altered to calculate
+ // the offset.
+ if (DAG.getDataLayout().isBigEndian())
+ ShAmt = AdjustBigEndianShift(ShAmt);
+
+ // We're using a shifted mask, so the load now has an offset. This means
+ // that data has been loaded into the lower bytes than it would have been
+ // before, so we need to shl the loaded data into the correct position in the
+ // register.
+ SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
+ Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ }
+
// Return the new loaded value.
return Result;
}
@@ -9235,12 +9525,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (sext x)) -> (sext x)
// fold (sext_in_reg (aext x)) -> (sext x)
- // if x is small enough.
+ // if x is small enough or if we know that x has more than 1 sign bit and the
+ // sign_extend_inreg is extending from one of them.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getScalarValueSizeInBits() <= EVTBits &&
+ unsigned N00Bits = N00.getScalarValueSizeInBits();
+ if ((N00Bits <= EVTBits ||
+ (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
// fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
@@ -9250,7 +9543,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
if (!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
- return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
+ N0.getOperand(0));
}
// fold (sext_in_reg (zext x)) -> (sext x)
@@ -9345,9 +9639,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
if (N0.isUndef())
return DAG.getUNDEF(VT);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
return SDValue();
}
@@ -9359,9 +9655,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
if (N0.isUndef())
return DAG.getUNDEF(VT);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
return SDValue();
}
@@ -9458,8 +9756,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
SDValue Amt = N0.getOperand(1);
- KnownBits Known;
- DAG.computeKnownBits(Amt, Known);
+ KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
SDLoc SL(N);
@@ -9636,6 +9933,32 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
+ // Narrow a suitable binary operation with a non-opaque constant operand by
+ // moving it ahead of the truncate. This is limited to pre-legalization
+ // because targets may prefer a wider type during later combines and invert
+ // this transform.
+ switch (N0.getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ if (!LegalOperations && N0.hasOneUse() &&
+ (isConstantOrConstantVector(N0.getOperand(0), true) ||
+ isConstantOrConstantVector(N0.getOperand(1), true))) {
+ // TODO: We already restricted this to pre-legalization, but for vectors
+ // we are extra cautious to not create an unsupported operation.
+ // Target-specific changes are likely needed to avoid regressions here.
+ if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
+ SDLoc DL(N);
+ SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
+ SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
+ return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+ }
+ }
+ }
+
return SDValue();
}
@@ -9694,11 +10017,11 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
return SDValue();
- // TODO: Use splat values for the constant-checking below and remove this
- // restriction.
+ // TODO: Handle cases where the integer constant is a different scalar
+ // bitwidth to the FP.
SDValue N0 = N->getOperand(0);
EVT SourceVT = N0.getValueType();
- if (SourceVT.isVector())
+ if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
return SDValue();
unsigned FPOpcode;
@@ -9706,25 +10029,35 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
switch (N0.getOpcode()) {
case ISD::AND:
FPOpcode = ISD::FABS;
- SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
+ SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
break;
case ISD::XOR:
FPOpcode = ISD::FNEG;
- SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
+ SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
+ break;
+ case ISD::OR:
+ FPOpcode = ISD::FABS;
+ SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
break;
- // TODO: ISD::OR --> ISD::FNABS?
default:
return SDValue();
}
// Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
// Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
+ // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
+ // fneg (fabs X)
SDValue LogicOp0 = N0.getOperand(0);
- ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
LogicOp0.getOpcode() == ISD::BITCAST &&
- LogicOp0->getOperand(0).getValueType() == VT)
- return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
+ LogicOp0.getOperand(0).getValueType() == VT) {
+ SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
+ NumFPLogicOpsConv++;
+ if (N0.getOpcode() == ISD::OR)
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
+ return FPOp;
+ }
return SDValue();
}
@@ -9737,33 +10070,32 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return DAG.getUNDEF(VT);
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
- // Only do this before legalize, since afterward the target may be depending
- // on the bitconvert.
+ // Only do this before legalize types, since we might create an illegal
+ // scalar type. Even if we knew we wouldn't create an illegal scalar type
+ // we can only do this before legalize ops, since the target maybe
+ // depending on the bitcast.
// First check to see if this is all constant.
if (!LegalTypes &&
N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
- VT.isVector()) {
- bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
-
- EVT DestEltVT = N->getValueType(0).getVectorElementType();
- assert(!DestEltVT.isVector() &&
- "Element type of vector ValueType must not be vector!");
- if (isSimple)
- return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
- }
+ VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
+ VT.getVectorElementType());
// If the input is a constant, let getNode fold it.
- // We always need to check that this is just a fp -> int or int -> conversion
- // otherwise we will get back N which will confuse the caller into thinking
- // we used CombineTo. This can block target combines from running. If we can't
- // allowed legal operations, we need to ensure the resulting operation will be
- // legal.
- // TODO: Maybe we should check that the return value isn't N explicitly?
- if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
- (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
- (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
- (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
- return DAG.getBitcast(VT, N0);
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ // If we can't allow illegal operations, we need to check that this is just
+ // a fp -> int or int -> conversion and that the resulting operation will
+ // be legal.
+ if (!LegalOperations ||
+ (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::Constant, VT))) {
+ SDValue C = DAG.getBitcast(VT, N0);
+ if (C.getNode() != N)
+ return C;
+ }
+ }
// (conv (conv x, t1), t2) -> (conv x, t2)
if (N0.getOpcode() == ISD::BITCAST)
@@ -9772,12 +10104,16 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (conv (load x)) -> (load (conv*)x)
// If the resultant load doesn't need a higher alignment than the original!
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- // Do not change the width of a volatile load.
- !cast<LoadSDNode>(N0)->isVolatile() &&
// Do not remove the cast if the types differ in endian layout.
TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ // If the load is volatile, we only want to change the load type if the
+ // resulting load is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
unsigned OrigAlign = LN0->getAlignment();
@@ -9934,7 +10270,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// float vectors bitcast to integer vectors) into shuffles.
// bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
- N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
!(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
@@ -10000,15 +10336,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// If this is a conversion of N elements of one type to N elements of another
// type, convert each element. This handles FP<->INT cases.
if (SrcBitSize == DstBitSize) {
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
- BV->getValueType(0).getVectorNumElements());
-
- // Due to the FP element handling below calling this routine recursively,
- // we can end up with a scalar-to-vector node here.
- if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
- DAG.getBitcast(DstEltVT, BV->getOperand(0)));
-
SmallVector<SDValue, 8> Ops;
for (SDValue Op : BV->op_values()) {
// If the vector element type is not legal, the BUILD_VECTOR operands
@@ -10018,6 +10345,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
Ops.push_back(DAG.getBitcast(DstEltVT, Op));
AddToWorklist(Ops.back().getNode());
}
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
return DAG.getBuildVector(VT, SDLoc(BV), Ops);
}
@@ -10651,17 +10980,18 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
- // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
- // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
+ // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
+ // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
- auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
- if (XC1 && XC1->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
- if (XC1 && XC1->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
+ if (C->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
+ if (C->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ }
}
return SDValue();
};
@@ -10671,29 +11001,30 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
if (SDValue FMA = FuseFADD(N1, N0, Flags))
return FMA;
- // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
- // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
- // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
- // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
+ // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
+ // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
+ // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
+ // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
- auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
- if (XC0 && XC0->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- Y, Flags);
- if (XC0 && XC0->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
-
- auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
- if (XC1 && XC1->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
- if (XC1 && XC1->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
+ if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
+ if (C0->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ Y, Flags);
+ if (C0->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ }
+ if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
+ if (C1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ if (C1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
+ }
}
return SDValue();
};
@@ -10706,14 +11037,6 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
return SDValue();
}
-static bool isFMulNegTwo(SDValue &N) {
- if (N.getOpcode() != ISD::FMUL)
- return false;
- if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
- return CFP->isExactlyValue(-2.0);
- return false;
-}
-
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -10737,6 +11060,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+ // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
+ ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
+ if (N1C && N1C->isZero())
+ if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
+ return N0;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -10752,23 +11081,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FSUB, DL, VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations), Flags);
- // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
- // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
- if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
- (isFMulNegTwo(N1) && N1.hasOneUse())) {
- bool N1IsFMul = isFMulNegTwo(N1);
- SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
- SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
- return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
- }
+ auto isFMulNegTwo = [](SDValue FMul) {
+ if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
+ return false;
+ auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
+ return C && C->isExactlyValue(-2.0);
+ };
- ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
- if (N1C && N1C->isZero()) {
- if (N1C->isNegative() || Options.UnsafeFPMath ||
- Flags.hasNoSignedZeros()) {
- // fold (fadd A, 0) -> A
- return N0;
- }
+ // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
+ if (isFMulNegTwo(N0)) {
+ SDValue B = N0.getOperand(0);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
+ }
+ // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
+ if (isFMulNegTwo(N1)) {
+ SDValue B = N1.getOperand(0);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
}
// No FP constant should be created after legalization as Instruction
@@ -10887,8 +11217,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -10920,9 +11250,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return DAG.getConstantFP(0.0f, DL, VT);
}
- // (fsub 0, B) -> -B
+ // (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
- if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
+ if (N0CFP->isNegative() ||
+ (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
@@ -10930,27 +11261,22 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
+ if ((Options.UnsafeFPMath ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
+ && N1.getOpcode() == ISD::FADD) {
+ // X - (X + Y) -> -Y
+ if (N0 == N1->getOperand(0))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
+ // X - (Y + X) -> -Y
+ if (N0 == N1->getOperand(1))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
+ }
+
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, DL, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations), Flags);
- // If 'unsafe math' is enabled, fold lots of things.
- if (Options.UnsafeFPMath) {
- // (fsub x, (fadd x, y)) -> (fneg y)
- // (fsub x, (fadd y, x)) -> (fneg y)
- if (N1.getOpcode() == ISD::FADD) {
- SDValue N10 = N1->getOperand(0);
- SDValue N11 = N1->getOperand(1);
-
- if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
- return GetNegatedExpression(N11, DAG, LegalOperations);
-
- if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
- return GetNegatedExpression(N10, DAG, LegalOperations);
- }
- }
-
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
AddToWorklist(Fused.getNode());
@@ -10963,8 +11289,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -11002,26 +11328,16 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
- if (N0.getOpcode() == ISD::FMUL) {
- // Fold scalars or any vector constants (not just splats).
- // This fold is done in general by InstCombine, but extra fmul insts
- // may have been generated during lowering.
+ if (isConstantFPBuildVectorOrConstantFP(N1) &&
+ N0.getOpcode() == ISD::FMUL) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
- auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
- auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
- auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
-
- // Check 1: Make sure that the first operand of the inner multiply is NOT
- // a constant. Otherwise, we may induce infinite looping.
- if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
- // Check 2: Make sure that the second operand of the inner multiply and
- // the second operand of the outer multiply are constants.
- if ((N1CFP && isConstOrConstSplatFP(N01)) ||
- (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
- }
+ // Avoid an infinite loop by making sure that N00 is not a constant
+ // (the inner multiply has not been constant folded yet).
+ if (isConstantFPBuildVectorOrConstantFP(N01) &&
+ !isConstantFPBuildVectorOrConstantFP(N00)) {
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
}
}
@@ -11445,15 +11761,15 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
if (N0CFP && N1CFP) // Constant fold
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
- if (N1CFP) {
- const APFloat &V = N1CFP->getValueAPF();
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
+ const APFloat &V = N1C->getValueAPF();
// copysign(x, c1) -> fabs(x) iff ispos(c1)
// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
if (!V.isNegative()) {
@@ -11489,6 +11805,72 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFPOW(SDNode *N) {
+ ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
+ if (!ExponentC)
+ return SDValue();
+
+ // Try to convert x ** (1/3) into cube root.
+ // TODO: Handle the various flavors of long double.
+ // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
+ // Some range near 1/3 should be fine.
+ EVT VT = N->getValueType(0);
+ if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
+ (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
+ // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
+ // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
+ // pow(-val, 1/3) = nan; cbrt(-val) = -num.
+ // For regular numbers, rounding may cause the results to differ.
+ // Therefore, we require { nsz ninf nnan afn } for this transform.
+ // TODO: We could select out the special cases if we don't have nsz/ninf.
+ SDNodeFlags Flags = N->getFlags();
+ if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ // Do not create a cbrt() libcall if the target does not have it, and do not
+ // turn a pow that has lowering support into a cbrt() libcall.
+ if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
+ (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
+ DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
+ return SDValue();
+
+ return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+ }
+
+ // Try to convert x ** (1/4) into square roots.
+ // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
+ // TODO: This could be extended (using a target hook) to handle smaller
+ // power-of-2 fractional exponents.
+ if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
+ // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
+ // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
+ // For regular numbers, rounding may cause the results to differ.
+ // Therefore, we require { nsz ninf afn } for this transform.
+ // TODO: We could select out the special cases if we don't have nsz/ninf.
+ SDNodeFlags Flags = N->getFlags();
+ if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ // Don't double the number of libcalls. We are trying to inline fast code.
+ if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
+ return SDValue();
+
+ // Assume that libcalls are the smallest code.
+ // TODO: This restriction should probably be lifted for vectors.
+ if (DAG.getMachineFunction().getFunction().optForSize())
+ return SDValue();
+
+ // pow(X, 0.25) --> sqrt(sqrt(X))
+ SDLoc DL(N);
+ SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
+ return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ }
+
+ return SDValue();
+}
+
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
// This optimization is guarded by a function attribute because it may produce
@@ -11538,8 +11920,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
// If the input is a legal type, and SINT_TO_FP is not legal on this target,
// but UINT_TO_FP is legal on this target, try to convert.
- if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
- TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+ if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
+ hasOperation(ISD::UINT_TO_FP, OpVT)) {
// If the sign bit is known to be zero, we can change this to UINT_TO_FP.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
@@ -11595,8 +11977,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
// If the input is a legal type, and UINT_TO_FP is not legal on this target,
// but SINT_TO_FP is legal on this target, try to convert.
- if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
- TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+ if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
+ hasOperation(ISD::SINT_TO_FP, OpVT)) {
// If the sign bit is known to be zero, we can change this to SINT_TO_FP.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
@@ -11917,7 +12299,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
+static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
+ APFloat (*Op)(const APFloat &, const APFloat &)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
@@ -11927,36 +12310,31 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
+ return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
}
// Canonicalize to constant on RHS.
if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
return SDValue();
}
-SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- EVT VT = N->getValueType(0);
- const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
+ return visitFMinMax(DAG, N, minnum);
+}
- if (N0CFP && N1CFP) {
- const APFloat &C0 = N0CFP->getValueAPF();
- const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
- }
+SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
+ return visitFMinMax(DAG, N, maxnum);
+}
- // Canonicalize to constant on RHS.
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
+SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
+ return visitFMinMax(DAG, N, minimum);
+}
- return SDValue();
+SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
+ return visitFMinMax(DAG, N, maximum);
}
SDValue DAGCombiner::visitFABS(SDNode *N) {
@@ -11976,11 +12354,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
- // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
- // constant pool values.
- if (!TLI.isFAbsFree(VT) &&
- N0.getOpcode() == ISD::BITCAST &&
- N0.getNode()->hasOneUse()) {
+ // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
+ if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
@@ -12512,8 +12887,15 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (TryNext)
continue;
- // Check for #2
- if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+ // Check for #2.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ // Ptr is predecessor to both N and Op.
+ Visited.insert(Ptr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(Op);
+ if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
+ !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
SDValue Result = isLoad
? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
BasePtr, Offset, AM)
@@ -12571,6 +12953,157 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
}
+static inline int numVectorEltsOrZero(EVT T) {
+ return T.isVector() ? T.getVectorNumElements() : 0;
+}
+
+bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
+ Val = ST->getValue();
+ EVT STType = Val.getValueType();
+ EVT STMemType = ST->getMemoryVT();
+ if (STType == STMemType)
+ return true;
+ if (isTypeLegal(STMemType))
+ return false; // fail.
+ if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
+ TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
+ Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
+ STType.isInteger() && STMemType.isInteger()) {
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
+ Val = DAG.getBitcast(STMemType, Val);
+ return true;
+ }
+ return false; // fail.
+}
+
+bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
+ EVT LDMemType = LD->getMemoryVT();
+ EVT LDType = LD->getValueType(0);
+ assert(Val.getValueType() == LDMemType &&
+ "Attempting to extend value of non-matching type");
+ if (LDType == LDMemType)
+ return true;
+ if (LDMemType.isInteger() && LDType.isInteger()) {
+ switch (LD->getExtensionType()) {
+ case ISD::NON_EXTLOAD:
+ Val = DAG.getBitcast(LDType, Val);
+ return true;
+ case ISD::EXTLOAD:
+ Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::SEXTLOAD:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::ZEXTLOAD:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ }
+ }
+ return false;
+}
+
+SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
+ if (OptLevel == CodeGenOpt::None || LD->isVolatile())
+ return SDValue();
+ SDValue Chain = LD->getOperand(0);
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
+ if (!ST || ST->isVolatile())
+ return SDValue();
+
+ EVT LDType = LD->getValueType(0);
+ EVT LDMemType = LD->getMemoryVT();
+ EVT STMemType = ST->getMemoryVT();
+ EVT STType = ST->getValue().getValueType();
+
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
+ int64_t Offset;
+ if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ return SDValue();
+
+ // Normalize for Endianness. After this Offset=0 will denote that the least
+ // significant bit in the loaded value maps to the least significant bit in
+ // the stored value). With Offset=n (for n > 0) the loaded value starts at the
+ // n:th least significant byte of the stored value.
+ if (DAG.getDataLayout().isBigEndian())
+ Offset = (STMemType.getStoreSizeInBits() -
+ LDMemType.getStoreSizeInBits()) / 8 - Offset;
+
+ // Check that the stored value cover all bits that are loaded.
+ bool STCoversLD =
+ (Offset >= 0) &&
+ (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
+
+ auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
+ if (LD->isIndexed()) {
+ bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
+ LD->getAddressingMode() == ISD::POST_DEC);
+ unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
+ SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
+ LD->getOperand(1), LD->getOperand(2));
+ SDValue Ops[] = {Val, Idx, Chain};
+ return CombineTo(LD, Ops, 3);
+ }
+ return CombineTo(LD, Val, Chain);
+ };
+
+ if (!STCoversLD)
+ return SDValue();
+
+ // Memory as copy space (potentially masked).
+ if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
+ // Simple case: Direct non-truncating forwarding
+ if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
+ return ReplaceLd(LD, ST->getValue(), Chain);
+ // Can we model the truncate and extension with an and mask?
+ if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
+ !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
+ // Mask to size of LDMemType
+ auto Mask =
+ DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
+ STMemType.getSizeInBits()),
+ SDLoc(ST), STType);
+ auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
+ return ReplaceLd(LD, Val, Chain);
+ }
+ }
+
+ // TODO: Deal with nonzero offset.
+ if (LD->getBasePtr().isUndef() || Offset != 0)
+ return SDValue();
+ // Model necessary truncations / extenstions.
+ SDValue Val;
+ // Truncate Value To Stored Memory Size.
+ do {
+ if (!getTruncatedStoreValue(ST, Val))
+ continue;
+ if (!isTypeLegal(LDMemType))
+ continue;
+ if (STMemType != LDMemType) {
+ // TODO: Support vectors? This requires extract_subvector/bitcast.
+ if (!STMemType.isVector() && !LDMemType.isVector() &&
+ STMemType.isInteger() && LDMemType.isInteger())
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
+ else
+ continue;
+ }
+ if (!extendLoadedValueToExtension(LD, Val))
+ continue;
+ return ReplaceLd(LD, Val, Chain);
+ } while (false);
+
+ // On failure, cleanup dead nodes we may have created.
+ if (Val->use_empty())
+ deleteAndRecombine(Val.getNode());
+ return SDValue();
+}
+
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@@ -12637,17 +13170,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// If this load is directly stored, replace the load value with the stored
// value.
- // TODO: Handle store large -> read small portion.
- // TODO: Handle TRUNCSTORE/LOADEXT
- if (OptLevel != CodeGenOpt::None &&
- ISD::isNormalLoad(N) && !LD->isVolatile()) {
- if (ISD::isNON_TRUNCStore(Chain.getNode())) {
- StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
- if (PrevST->getBasePtr() == Ptr &&
- PrevST->getValue().getValueType() == N->getValueType(0))
- return CombineTo(N, PrevST->getOperand(1), Chain);
- }
- }
+ if (auto V = ForwardStoreValueToDirectLoad(LD))
+ return V;
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
@@ -13055,8 +13579,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
// Sort the slices so that elements that are likely to be next to each
// other in memory are next to each other in the list.
- llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
- [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
});
@@ -13689,7 +14212,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SDValue Val = St->getValue();
// If constant is of the wrong type, convert it now.
if (MemVT != Val.getValueType()) {
- Val = peekThroughBitcast(Val);
+ Val = peekThroughBitcasts(Val);
// Deal with constants of wrong size.
if (ElementSizeBits != Val.getValueSizeInBits()) {
EVT IntMemVT =
@@ -13715,7 +14238,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue Val = peekThroughBitcast(St->getValue());
+ SDValue Val = peekThroughBitcasts(St->getValue());
// All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
// type MemVT. If the underlying value is not the correct
// type, but it is an extraction of an appropriate vector we
@@ -13725,19 +14248,17 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
if ((MemVT != Val.getValueType()) &&
(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
- SDValue Vec = Val.getOperand(0);
EVT MemVTScalarTy = MemVT.getScalarType();
// We may need to add a bitcast here to get types to line up.
- if (MemVTScalarTy != Vec.getValueType()) {
- unsigned Elts = Vec.getValueType().getSizeInBits() /
- MemVTScalarTy.getSizeInBits();
- EVT NewVecTy =
- EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
- Vec = DAG.getBitcast(NewVecTy, Vec);
+ if (MemVTScalarTy != Val.getValueType().getScalarType()) {
+ Val = DAG.getBitcast(MemVT, Val);
+ } else {
+ unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
+ : ISD::EXTRACT_VECTOR_ELT;
+ SDValue Vec = Val.getOperand(0);
+ SDValue Idx = Val.getOperand(1);
+ Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
}
- auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
- : ISD::EXTRACT_VECTOR_ELT;
- Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
}
Ops.push_back(Val);
}
@@ -13762,7 +14283,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
SDValue Val = St->getValue();
- Val = peekThroughBitcast(Val);
+ Val = peekThroughBitcasts(Val);
StoreInt <<= ElementSizeBits;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
StoreInt |= C->getAPIntValue()
@@ -13825,7 +14346,7 @@ void DAGCombiner::getStoreMergeCandidates(
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
EVT MemVT = St->getMemoryVT();
- SDValue Val = peekThroughBitcast(St->getValue());
+ SDValue Val = peekThroughBitcasts(St->getValue());
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
return;
@@ -13859,7 +14380,7 @@ void DAGCombiner::getStoreMergeCandidates(
int64_t &Offset) -> bool {
if (Other->isVolatile() || Other->isIndexed())
return false;
- SDValue Val = peekThroughBitcast(Other->getValue());
+ SDValue Val = peekThroughBitcasts(Other->getValue());
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
@@ -13966,11 +14487,12 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
Worklist.push_back(RootNode);
while (!Worklist.empty()) {
auto N = Worklist.pop_back_val();
+ if (!Visited.insert(N).second)
+ continue; // Already present in Visited.
if (N->getOpcode() == ISD::TokenFactor) {
for (SDValue Op : N->ops())
Worklist.push_back(Op.getNode());
}
- Visited.insert(N);
}
// Don't count pruning nodes towards max.
@@ -13983,14 +14505,14 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
// in candidate selection and can be
// safely ignored
// * Value (Op 1) -> Cycles may happen (e.g. through load chains)
- // * Address (Op 2) -> Merged addresses may only vary by a fixed constant
- // and so no cycles are possible.
- // * (Op 3) -> appears to always be undef. Cannot be source of cycle.
- //
- // Thus we need only check predecessors of the value operands.
- auto *Op = N->getOperand(1).getNode();
- if (Visited.insert(Op).second)
- Worklist.push_back(Op);
+ // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
+ // but aren't necessarily fromt the same base node, so
+ // cycles possible (e.g. via indexed store).
+ // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
+ // non-indexed stores). Not constant on all targets (e.g. ARM)
+ // and so can participate in a cycle.
+ for (unsigned j = 1; j < N->getNumOperands(); ++j)
+ Worklist.push_back(N->getOperand(j).getNode());
}
// Search through DAG. We can stop early if we find a store node.
for (unsigned i = 0; i < NumStores; ++i)
@@ -14023,7 +14545,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Perform an early exit check. Do not bother looking at stored values that
// are not constants, loads, or extracted vector elements.
- SDValue StoredVal = peekThroughBitcast(St->getValue());
+ SDValue StoredVal = peekThroughBitcasts(St->getValue());
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
@@ -14044,10 +14566,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Sort the memory operands according to their distance from the
// base pointer.
- llvm::sort(StoreNodes.begin(), StoreNodes.end(),
- [](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase;
- });
+ llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
// Store Merge attempts to merge the lowest stores. This generally
// works out as if successful, as the remaining stores are checked
@@ -14292,7 +14813,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue Val = peekThroughBitcast(St->getValue());
+ SDValue Val = peekThroughBitcasts(St->getValue());
LoadSDNode *Ld = cast<LoadSDNode>(Val);
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
@@ -14640,8 +15161,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
EVT SVT = Value.getOperand(0).getValueType();
+ // If the store is volatile, we only want to change the store type if the
+ // resulting store is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
if (((!LegalOperations && !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
+ TLI.isOperationLegal(ISD::STORE, SVT)) &&
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
unsigned OrigAlign = ST->getAlignment();
bool Fast = false;
@@ -14692,7 +15218,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// FIXME: is there such a thing as a truncating indexed store?
if (ST->isTruncatingStore() && ST->isUnindexed() &&
- Value.getValueType().isInteger()) {
+ Value.getValueType().isInteger() &&
+ (!isa<ConstantSDNode>(Value) ||
+ !cast<ConstantSDNode>(Value)->isOpaque())) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
@@ -14976,6 +15504,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return InVec;
EVT VT = InVec.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
// Remove redundant insertions:
// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
@@ -14983,12 +15512,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
return InVec;
- // We must know which element is being inserted for folds below here.
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
- if (!IndexC)
+ if (!IndexC) {
+ // If this is variable insert to undef vector, it might be better to splat:
+ // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
+ if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
+ SmallVector<SDValue, 8> Ops(NumElts, InVal);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
return SDValue();
- unsigned Elt = IndexC->getZExtValue();
+ }
+ // We must know which element is being inserted for folds below here.
+ unsigned Elt = IndexC->getZExtValue();
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
return Shuf;
@@ -15026,11 +15562,11 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
Ops.append(InVec.getNode()->op_begin(),
InVec.getNode()->op_end());
} else if (InVec.isUndef()) {
- unsigned NElts = VT.getVectorNumElements();
- Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
+ Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
} else {
return SDValue();
}
+ assert(Ops.size() == NumElts && "Unexpected vector size");
// Insert the element
if (Elt < Ops.size()) {
@@ -15044,8 +15580,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return DAG.getBuildVector(VT, DL, Ops);
}
-SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
- SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+ SDValue EltNo,
+ LoadSDNode *OriginalLoad) {
assert(!OriginalLoad->isVolatile());
EVT ResultVT = EVE->getValueType(0);
@@ -15127,70 +15664,132 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
return SDValue(EVE, 0);
}
+/// Transform a vector binary operation into a scalar binary operation by moving
+/// the math/logic after an extract element of a vector.
+static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
+ bool LegalOperations) {
+ SDValue Vec = ExtElt->getOperand(0);
+ SDValue Index = ExtElt->getOperand(1);
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
+ return SDValue();
+
+ // Targets may want to avoid this to prevent an expensive register transfer.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldScalarizeBinop(Vec))
+ return SDValue();
+
+ // Extracting an element of a vector constant is constant-folded, so this
+ // transform is just replacing a vector op with a scalar op while moving the
+ // extract.
+ SDValue Op0 = Vec.getOperand(0);
+ SDValue Op1 = Vec.getOperand(1);
+ if (isAnyConstantBuildVector(Op0, true) ||
+ isAnyConstantBuildVector(Op1, true)) {
+ // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
+ // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
+ SDLoc DL(ExtElt);
+ EVT VT = ExtElt->getValueType(0);
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
+ return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
- // (vextract (scalar_to_vector val, 0) -> val
- SDValue InVec = N->getOperand(0);
- EVT VT = InVec.getValueType();
- EVT NVT = N->getValueType(0);
+ SDValue VecOp = N->getOperand(0);
+ SDValue Index = N->getOperand(1);
+ EVT ScalarVT = N->getValueType(0);
+ EVT VecVT = VecOp.getValueType();
+ if (VecOp.isUndef())
+ return DAG.getUNDEF(ScalarVT);
- if (InVec.isUndef())
- return DAG.getUNDEF(NVT);
+ // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
+ //
+ // This only really matters if the index is non-constant since other combines
+ // on the constant elements already work.
+ SDLoc DL(N);
+ if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ Index == VecOp.getOperand(2)) {
+ SDValue Elt = VecOp.getOperand(1);
+ return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
+ }
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // (vextract (scalar_to_vector val, 0) -> val
+ if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
- SDValue InOp = InVec.getOperand(0);
- if (InOp.getValueType() != NVT) {
- assert(InOp.getValueType().isInteger() && NVT.isInteger());
- return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
+ SDValue InOp = VecOp.getOperand(0);
+ if (InOp.getValueType() != ScalarVT) {
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
}
return InOp;
}
- SDValue EltNo = N->getOperand(1);
- ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
-
// extract_vector_elt of out-of-bounds element -> UNDEF
- if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
- return DAG.getUNDEF(NVT);
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (IndexC && IndexC->getAPIntValue().uge(NumElts))
+ return DAG.getUNDEF(ScalarVT);
// extract_vector_elt (build_vector x, y), 1 -> y
- if (ConstEltNo &&
- InVec.getOpcode() == ISD::BUILD_VECTOR &&
- TLI.isTypeLegal(VT) &&
- (InVec.hasOneUse() ||
- TLI.aggressivelyPreferBuildVectorSources(VT))) {
- SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
+ if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
+ TLI.isTypeLegal(VecVT) &&
+ (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
+ SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
EVT InEltVT = Elt.getValueType();
// Sometimes build_vector's scalar input types do not match result type.
- if (NVT == InEltVT)
+ if (ScalarVT == InEltVT)
return Elt;
// TODO: It may be useful to truncate if free if the build_vector implicitly
// converts.
}
- // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
- bool isLE = DAG.getDataLayout().isLittleEndian();
- unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
- if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
- ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
- SDValue BCSrc = InVec.getOperand(0);
- if (BCSrc.getValueType().isScalarInteger())
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
+ // TODO: These transforms should not require the 'hasOneUse' restriction, but
+ // there are regressions on multiple targets without it. We can end up with a
+ // mess of scalar and vector code if we reduce only part of the DAG to scalar.
+ if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
+ VecOp.hasOneUse()) {
+ // The vector index of the LSBs of the source depend on the endian-ness.
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ unsigned ExtractIndex = IndexC->getZExtValue();
+ // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
+ unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
+ SDValue BCSrc = VecOp.getOperand(0);
+ if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
+ return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
+
+ if (LegalTypes && BCSrc.getValueType().isInteger() &&
+ BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
+ // trunc i64 X to i32
+ SDValue X = BCSrc.getOperand(0);
+ assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
+ "Extract element and scalar to vector can't change element type "
+ "from FP to integer.");
+ unsigned XBitWidth = X.getValueSizeInBits();
+ unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+ BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
+
+ // An extract element return value type can be wider than its vector
+ // operand element type. In that case, the high bits are undefined, so
+ // it's possible that we may need to extend rather than truncate.
+ if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
+ assert(XBitWidth % VecEltBitWidth == 0 &&
+ "Scalar bitwidth must be a multiple of vector element bitwidth");
+ return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
+ }
+ }
}
- // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
- //
- // This only really matters if the index is non-constant since other combines
- // on the constant elements already work.
- if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
- EltNo == InVec.getOperand(2)) {
- SDValue Elt = InVec.getOperand(1);
- return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
- }
+ if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
+ return BO;
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
@@ -15198,30 +15797,29 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// patterns. For example on AVX, extracting elements from a wide vector
// without using extract_subvector. However, if we can find an underlying
// scalar value, then we can always use that.
- if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
- int NumElem = VT.getVectorNumElements();
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+ if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
// Find the new index to extract from.
- int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
+ int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
// Extracting an undef index is undef.
if (OrigElt == -1)
- return DAG.getUNDEF(NVT);
+ return DAG.getUNDEF(ScalarVT);
// Select the right vector half to extract from.
SDValue SVInVec;
- if (OrigElt < NumElem) {
- SVInVec = InVec->getOperand(0);
+ if (OrigElt < (int)NumElts) {
+ SVInVec = VecOp.getOperand(0);
} else {
- SVInVec = InVec->getOperand(1);
- OrigElt -= NumElem;
+ SVInVec = VecOp.getOperand(1);
+ OrigElt -= NumElts;
}
if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
SDValue InOp = SVInVec.getOperand(OrigElt);
- if (InOp.getValueType() != NVT) {
- assert(InOp.getValueType().isInteger() && NVT.isInteger());
- InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
+ if (InOp.getValueType() != ScalarVT) {
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+ InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
}
return InOp;
@@ -15232,136 +15830,131 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (!LegalOperations ||
// FIXME: Should really be just isOperationLegalOrCustom.
- TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
- TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
+ TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
- DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
+ DAG.getConstant(OrigElt, DL, IndexTy));
}
}
// If only EXTRACT_VECTOR_ELT nodes use the source vector we can
// simplify it based on the (valid) extraction indices.
- if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
+ if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- Use->getOperand(0) == InVec &&
+ Use->getOperand(0) == VecOp &&
isa<ConstantSDNode>(Use->getOperand(1));
})) {
- APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
- for (SDNode *Use : InVec->uses()) {
+ APInt DemandedElts = APInt::getNullValue(NumElts);
+ for (SDNode *Use : VecOp->uses()) {
auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
- if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
+ if (CstElt->getAPIntValue().ult(NumElts))
DemandedElts.setBit(CstElt->getZExtValue());
}
- if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
+ if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
+ // We simplified the vector operand of this extract element. If this
+ // extract is not dead, visit it again so it is folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
return SDValue(N, 0);
+ }
}
- bool BCNumEltsChanged = false;
- EVT ExtVT = VT.getVectorElementType();
- EVT LVT = ExtVT;
-
+ // Everything under here is trying to match an extract of a loaded value.
// If the result of load has to be truncated, then it's not necessarily
// profitable.
- if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VecVT.getVectorElementType();
+ EVT LVT = ExtVT;
+ if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
return SDValue();
- if (InVec.getOpcode() == ISD::BITCAST) {
+ if (VecOp.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
+ if (!VecOp.hasOneUse())
return SDValue();
- EVT BCVT = InVec.getOperand(0).getValueType();
+ EVT BCVT = VecOp.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
- if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ if (NumElts != BCVT.getVectorNumElements())
BCNumEltsChanged = true;
- InVec = InVec.getOperand(0);
+ VecOp = VecOp.getOperand(0);
ExtVT = BCVT.getVectorElementType();
}
- // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
- if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
- ISD::isNormalLoad(InVec.getNode()) &&
- !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
- SDValue Index = N->getOperand(1);
- if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
- if (!OrigLoad->isVolatile()) {
- return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
- OrigLoad);
- }
- }
+ // extract (vector load $addr), i --> load $addr + i * size
+ if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
+ ISD::isNormalLoad(VecOp.getNode()) &&
+ !Index->hasPredecessor(VecOp.getNode())) {
+ auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
+ if (VecLoad && !VecLoad->isVolatile())
+ return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
}
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
- if (!LegalOperations) return SDValue();
+ if (!LegalOperations || !IndexC)
+ return SDValue();
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+ int Elt = IndexC->getZExtValue();
+ LoadSDNode *LN0 = nullptr;
+ if (ISD::isNormalLoad(VecOp.getNode())) {
+ LN0 = cast<LoadSDNode>(VecOp);
+ } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ VecOp.getOperand(0).getValueType() == ExtVT &&
+ ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
- if (ConstEltNo) {
- int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
+ }
+ if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
- LoadSDNode *LN0 = nullptr;
- const ShuffleVectorSDNode *SVN = nullptr;
- if (ISD::isNormalLoad(InVec.getNode())) {
- LN0 = cast<LoadSDNode>(InVec);
- } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- InVec.getOperand(0).getValueType() == ExtVT &&
- ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
- LN0 = cast<LoadSDNode>(InVec.getOperand(0));
- } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
- // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
- // =>
- // (load $addr+1*size)
+ // Select the input vector, guarding against out of range extract vector.
+ int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
+ VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
+ if (VecOp.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- // If the bit convert changed the number of elements, it is unsafe
- // to examine the mask.
- if (BCNumEltsChanged)
+ if (!VecOp.hasOneUse())
return SDValue();
- // Select the input vector, guarding against out of range extract vector.
- unsigned NumElems = VT.getVectorNumElements();
- int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
- InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
-
- if (InVec.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- InVec = InVec.getOperand(0);
- }
- if (ISD::isNormalLoad(InVec.getNode())) {
- LN0 = cast<LoadSDNode>(InVec);
- Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
- EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
- }
+ VecOp = VecOp.getOperand(0);
}
+ if (ISD::isNormalLoad(VecOp.getNode())) {
+ LN0 = cast<LoadSDNode>(VecOp);
+ Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
+ Index = DAG.getConstant(Elt, DL, Index.getValueType());
+ }
+ }
- // Make sure we found a non-volatile load and the extractelement is
- // the only use.
- if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
- return SDValue();
-
- // If Idx was -1 above, Elt is going to be -1, so just return undef.
- if (Elt == -1)
- return DAG.getUNDEF(LVT);
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+ return SDValue();
- return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
- }
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LVT);
- return SDValue();
+ return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
}
// Simplify (build_vec (ext )) to (bitcast (build_vec ))
@@ -15477,77 +16070,6 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
return DAG.getBitcast(VT, BV);
}
-SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
- EVT VT = N->getValueType(0);
-
- unsigned NumInScalars = N->getNumOperands();
- SDLoc DL(N);
-
- EVT SrcVT = MVT::Other;
- unsigned Opcode = ISD::DELETED_NODE;
- unsigned NumDefs = 0;
-
- for (unsigned i = 0; i != NumInScalars; ++i) {
- SDValue In = N->getOperand(i);
- unsigned Opc = In.getOpcode();
-
- if (Opc == ISD::UNDEF)
- continue;
-
- // If all scalar values are floats and converted from integers.
- if (Opcode == ISD::DELETED_NODE &&
- (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
- Opcode = Opc;
- }
-
- if (Opc != Opcode)
- return SDValue();
-
- EVT InVT = In.getOperand(0).getValueType();
-
- // If all scalar values are typed differently, bail out. It's chosen to
- // simplify BUILD_VECTOR of integer types.
- if (SrcVT == MVT::Other)
- SrcVT = InVT;
- if (SrcVT != InVT)
- return SDValue();
- NumDefs++;
- }
-
- // If the vector has just one element defined, it's not worth to fold it into
- // a vectorized one.
- if (NumDefs < 2)
- return SDValue();
-
- assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
- && "Should only handle conversion from integer to float.");
- assert(SrcVT != MVT::Other && "Cannot determine source type!");
-
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
-
- if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
- return SDValue();
-
- // Just because the floating-point vector type is legal does not necessarily
- // mean that the corresponding integer vector type is.
- if (!isTypeLegal(NVT))
- return SDValue();
-
- SmallVector<SDValue, 8> Opnds;
- for (unsigned i = 0; i != NumInScalars; ++i) {
- SDValue In = N->getOperand(i);
-
- if (In.isUndef())
- Opnds.push_back(DAG.getUNDEF(SrcVT));
- else
- Opnds.push_back(In.getOperand(0));
- }
- SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
- AddToWorklist(BV.getNode());
-
- return DAG.getNode(Opcode, DL, VT, BV);
-}
-
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
@@ -15669,6 +16191,78 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
return Shuffle;
}
+static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
+ assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+
+ // First, determine where the build vector is not undef.
+ // TODO: We could extend this to handle zero elements as well as undefs.
+ int NumBVOps = BV->getNumOperands();
+ int ZextElt = -1;
+ for (int i = 0; i != NumBVOps; ++i) {
+ SDValue Op = BV->getOperand(i);
+ if (Op.isUndef())
+ continue;
+ if (ZextElt == -1)
+ ZextElt = i;
+ else
+ return SDValue();
+ }
+ // Bail out if there's no non-undef element.
+ if (ZextElt == -1)
+ return SDValue();
+
+ // The build vector contains some number of undef elements and exactly
+ // one other element. That other element must be a zero-extended scalar
+ // extracted from a vector at a constant index to turn this into a shuffle.
+ // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+ SDValue Zext = BV->getOperand(ZextElt);
+ if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
+ Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+ return SDValue();
+
+ // The zero-extend must be a multiple of the source size.
+ SDValue Extract = Zext.getOperand(0);
+ unsigned DestSize = Zext.getValueSizeInBits();
+ unsigned SrcSize = Extract.getValueSizeInBits();
+ if (DestSize % SrcSize != 0)
+ return SDValue();
+
+ // Create a shuffle mask that will combine the extracted element with zeros
+ // and undefs.
+ int ZextRatio = DestSize / SrcSize;
+ int NumMaskElts = NumBVOps * ZextRatio;
+ SmallVector<int, 32> ShufMask(NumMaskElts, -1);
+ for (int i = 0; i != NumMaskElts; ++i) {
+ if (i / ZextRatio == ZextElt) {
+ // The low bits of the (potentially translated) extracted element map to
+ // the source vector. The high bits map to zero. We will use a zero vector
+ // as the 2nd source operand of the shuffle, so use the 1st element of
+ // that vector (mask value is number-of-elements) for the high bits.
+ if (i % ZextRatio == 0)
+ ShufMask[i] = Extract.getConstantOperandVal(1);
+ else
+ ShufMask[i] = NumMaskElts;
+ }
+
+ // Undef elements of the build vector remain undef because we initialize
+ // the shuffle mask with -1.
+ }
+
+ // Turn this into a shuffle with zero if that's legal.
+ EVT VecVT = Extract.getOperand(0).getValueType();
+ if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
+ return SDValue();
+
+ // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
+ // bitcast (shuffle V, ZeroVec, VectorMask)
+ SDLoc DL(BV);
+ SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
+ SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
+ ShufMask);
+ return DAG.getBitcast(BV->getValueType(0), Shuf);
+}
+
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If the types of the vectors we're extracting from allow it,
// turn this into a vector_shuffle node.
@@ -15680,6 +16274,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
if (!isTypeLegal(VT))
return SDValue();
+ if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
+ return V;
+
// May only combine to shuffle after legalize if shuffle is legal.
if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
return SDValue();
@@ -15943,7 +16540,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// TODO: Maybe this is useful for non-splat too?
if (!LegalOperations) {
if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
- Splat = peekThroughBitcast(Splat);
+ Splat = peekThroughBitcasts(Splat);
EVT SrcVT = Splat.getValueType();
if (SrcVT.isVector()) {
unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
@@ -15994,9 +16591,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
- if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
- return V;
-
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
@@ -16078,8 +16672,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
SmallVector<int, 8> Mask;
for (SDValue Op : N->ops()) {
- // Peek through any bitcast.
- Op = peekThroughBitcast(Op);
+ Op = peekThroughBitcasts(Op);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (Op.isUndef()) {
@@ -16096,9 +16689,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
// We want the EVT of the original extraction to correctly scale the
// extraction index.
EVT ExtVT = ExtVec.getValueType();
-
- // Peek through any bitcast.
- ExtVec = peekThroughBitcast(ExtVec);
+ ExtVec = peekThroughBitcasts(ExtVec);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (ExtVec.isUndef()) {
@@ -16162,11 +16753,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
- // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
- if (In->getOpcode() == ISD::BITCAST &&
- !In->getOperand(0).getValueType().isVector()) {
- SDValue Scalar = In->getOperand(0);
+ SDValue Scalar = peekThroughOneUseBitcasts(In);
+ // concat_vectors(scalar_to_vector(scalar), undef) ->
+ // scalar_to_vector(scalar)
+ if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ Scalar.hasOneUse()) {
+ EVT SVT = Scalar.getValueType().getVectorElementType();
+ if (SVT == Scalar.getOperand(0).getValueType())
+ Scalar = Scalar.getOperand(0);
+ }
+
+ // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
+ if (!Scalar.getValueType().isVector()) {
// If the bitcast type isn't legal, it might be a trunc of a legal type;
// look through the trunc so we can still do the transform:
// concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
@@ -16175,7 +16774,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
Scalar = Scalar->getOperand(0);
- EVT SclTy = Scalar->getValueType(0);
+ EVT SclTy = Scalar.getValueType();
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
@@ -16303,60 +16902,93 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
-/// If we are extracting a subvector produced by a wide binary operator with at
-/// at least one operand that was the result of a vector concatenation, then try
-/// to use the narrow vector operands directly to avoid the concatenation and
-/// extraction.
+/// If we are extracting a subvector produced by a wide binary operator try
+/// to use a narrow binary operator and/or avoid concatenation and extraction.
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
// The extract index must be a constant, so we can map it to a concat operand.
- auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!ExtractIndex)
- return SDValue();
-
- // Only handle the case where we are doubling and then halving. A larger ratio
- // may require more than two narrow binops to replace the wide binop.
- EVT VT = Extract->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
- "Extract index is not a multiple of the vector length.");
- if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
+ auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!ExtractIndexC)
return SDValue();
// We are looking for an optionally bitcasted wide vector binary operator
// feeding an extract subvector.
- SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
-
- // TODO: The motivating case for this transform is an x86 AVX1 target. That
- // target has temptingly almost legal versions of bitwise logic ops in 256-bit
- // flavors, but no other 256-bit integer support. This could be extended to
- // handle any binop, but that may require fixing/adding other folds to avoid
- // codegen regressions.
- unsigned BOpcode = BinOp.getOpcode();
- if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+ SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
+ if (!ISD::isBinaryOp(BinOp.getNode()))
return SDValue();
- // The binop must be a vector type, so we can chop it in half.
+ // The binop must be a vector type, so we can extract some fraction of it.
EVT WideBVT = BinOp.getValueType();
if (!WideBVT.isVector())
return SDValue();
+ EVT VT = Extract->getValueType(0);
+ unsigned ExtractIndex = ExtractIndexC->getZExtValue();
+ assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
+ "Extract index is not a multiple of the vector length.");
+
+ // Bail out if this is not a proper multiple width extraction.
+ unsigned WideWidth = WideBVT.getSizeInBits();
+ unsigned NarrowWidth = VT.getSizeInBits();
+ if (WideWidth % NarrowWidth != 0)
+ return SDValue();
+
+ // Bail out if we are extracting a fraction of a single operation. This can
+ // occur because we potentially looked through a bitcast of the binop.
+ unsigned NarrowingRatio = WideWidth / NarrowWidth;
+ unsigned WideNumElts = WideBVT.getVectorNumElements();
+ if (WideNumElts % NarrowingRatio != 0)
+ return SDValue();
+
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
- WideBVT.getVectorNumElements() / 2);
+ WideNumElts / NarrowingRatio);
+ unsigned BOpcode = BinOp.getOpcode();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
return SDValue();
- // Peek through bitcasts of the binary operator operands if needed.
- SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
- SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
+ // If extraction is cheap, we don't need to look at the binop operands
+ // for concat ops. The narrow binop alone makes this transform profitable.
+ // We can't just reuse the original extract index operand because we may have
+ // bitcasted.
+ unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
+ unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
+ EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
+ if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
+ BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
+ // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
+ SDLoc DL(Extract);
+ SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), NewExtIndex);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), NewExtIndex);
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
+ BinOp.getNode()->getFlags());
+ return DAG.getBitcast(VT, NarrowBinOp);
+ }
+
+ // Only handle the case where we are doubling and then halving. A larger ratio
+ // may require more than two narrow binops to replace the wide binop.
+ if (NarrowingRatio != 2)
+ return SDValue();
+
+ // TODO: The motivating case for this transform is an x86 AVX1 target. That
+ // target has temptingly almost legal versions of bitwise logic ops in 256-bit
+ // flavors, but no other 256-bit integer support. This could be extended to
+ // handle any binop, but that may require fixing/adding other folds to avoid
+ // codegen regressions.
+ if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+ return SDValue();
// We need at least one concatenation operation of a binop operand to make
// this transform worthwhile. The concat must double the input vector sizes.
// TODO: Should we also handle INSERT_SUBVECTOR patterns?
+ SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
+ SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
bool ConcatL =
LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
bool ConcatR =
@@ -16365,11 +16997,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
// If one of the binop operands was not the result of a concat, we must
- // extract a half-sized operand for our new narrow binop. We can't just reuse
- // the original extract index operand because we may have bitcasted.
- unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
- unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
- EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
+ // extract a half-sized operand for our new narrow binop.
SDLoc DL(Extract);
// extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
@@ -16397,17 +17025,19 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
if (DAG.getDataLayout().isBigEndian())
return SDValue();
- // TODO: The one-use check is overly conservative. Check the cost of the
- // extract instead or remove that condition entirely.
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
- !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
+ return SDValue();
+
+ // Allow targets to opt-out.
+ EVT VT = Extract->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
return SDValue();
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
- EVT VT = Extract->getValueType(0);
SDLoc DL(Extract);
SDValue BaseAddr = Ld->getOperand(1);
unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
@@ -16440,9 +17070,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// Vi if possible
// Only operand 0 is checked as 'concat' assumes all inputs of the same
// type.
- if (V->getOpcode() == ISD::CONCAT_VECTORS &&
+ if (V.getOpcode() == ISD::CONCAT_VECTORS &&
isa<ConstantSDNode>(N->getOperand(1)) &&
- V->getOperand(0).getValueType() == NVT) {
+ V.getOperand(0).getValueType() == NVT) {
unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
@@ -16450,13 +17080,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return V->getOperand(Idx / NumElems);
}
- // Skip bitcasting
- V = peekThroughBitcast(V);
+ V = peekThroughBitcasts(V);
// If the input is a build vector. Try to make a smaller build vector.
- if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ if (V.getOpcode() == ISD::BUILD_VECTOR) {
if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
- EVT InVT = V->getValueType(0);
+ EVT InVT = V.getValueType();
unsigned ExtractSize = NVT.getSizeInBits();
unsigned EltSize = InVT.getScalarSizeInBits();
// Only do this if we won't split any elements.
@@ -16489,16 +17118,16 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
}
- if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
// Handle only simple case where vector being inserted and vector
// being extracted are of same size.
- EVT SmallVT = V->getOperand(1).getValueType();
+ EVT SmallVT = V.getOperand(1).getValueType();
if (!NVT.bitsEq(SmallVT))
return SDValue();
// Only handle cases where both indexes are constants.
- ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
- ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
if (InsIdx && ExtIdx) {
// Combine:
@@ -16508,11 +17137,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// otherwise => (extract_subvec V1, ExtIdx)
if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
- return DAG.getBitcast(NVT, V->getOperand(1));
+ return DAG.getBitcast(NVT, V.getOperand(1));
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
- DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
- N->getOperand(1));
+ DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
+ N->getOperand(1));
}
}
@@ -16613,14 +17242,17 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SDValue N0 = SVN->getOperand(0);
SDValue N1 = SVN->getOperand(1);
- if (!N0->hasOneUse() || !N1->hasOneUse())
+ if (!N0->hasOneUse())
return SDValue();
// If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
// discussed above.
if (!N1.isUndef()) {
- bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
- bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
+ if (!N1->hasOneUse())
+ return SDValue();
+
+ bool N0AnyConst = isAnyConstantBuildVector(N0);
+ bool N1AnyConst = isAnyConstantBuildVector(N1);
if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
return SDValue();
if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -16686,8 +17318,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
const TargetLowering &TLI,
- bool LegalOperations,
- bool LegalTypes) {
+ bool LegalOperations) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
@@ -16723,11 +17354,14 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
- if (!LegalTypes || TLI.isTypeLegal(OutVT))
+ // Never create an illegal type. Only create unsupported operations if we
+ // are pre-legalization.
+ if (TLI.isTypeLegal(OutVT))
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
return DAG.getBitcast(VT,
- DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
+ DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
+ SDLoc(SVN), OutVT, N0));
}
return SDValue();
@@ -16747,7 +17381,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
if (!VT.isInteger() || IsBigEndian)
return SDValue();
- SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
+ SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
unsigned Opcode = N0.getOpcode();
if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
@@ -17032,7 +17666,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue(N, 0);
// Match shuffles that can be converted to any_vector_extend_in_reg.
- if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
+ if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
return V;
// Combine "truncate_vector_in_reg" style shuffles.
@@ -17050,7 +17684,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
return Res;
@@ -17060,15 +17694,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
N1.isUndef() && Level < AfterLegalizeVectorOps &&
TLI.isTypeLegal(VT)) {
-
- // Peek through the bitcast only if there is one user.
- SDValue BC0 = N0;
- while (BC0.getOpcode() == ISD::BITCAST) {
- if (!BC0.hasOneUse())
- break;
- BC0 = BC0.getOperand(0);
- }
-
auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
if (Scale == 1)
return SmallVector<int, 8>(Mask.begin(), Mask.end());
@@ -17079,7 +17704,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
NewMask.push_back(M < 0 ? -1 : Scale * M + s);
return NewMask;
};
-
+
+ SDValue BC0 = peekThroughOneUseBitcasts(N0);
if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
EVT SVT = VT.getScalarType();
EVT InnerVT = BC0->getValueType(0);
@@ -17322,12 +17948,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N1.isUndef())
return N0;
- // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
- // us to pull BITCASTs from input to output.
- if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
- if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
- return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
-
// If this is an insert of an extracted vector into an undef vector, we can
// just use the input to the extract.
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
@@ -17375,6 +17995,14 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
N1, N2);
+ // Eliminate an intermediate insert into an undef vector:
+ // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
+ // insert_subvector undef, X, N2
+ if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
+ N1.getOperand(1), N2);
+
if (!isa<ConstantSDNode>(N2))
return SDValue();
@@ -17410,6 +18038,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
+ // Simplify source operands based on insertion.
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -17447,7 +18079,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
- SDValue RHS = peekThroughBitcast(N->getOperand(1));
+ SDValue RHS = peekThroughBitcasts(N->getOperand(1));
SDLoc DL(N);
// Make sure we're not running after operation legalization where it
@@ -17677,31 +18309,64 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
LLD->getBasePtr().getValueType()))
return false;
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
+ return false;
+
// Check that the select condition doesn't reach either load. If so,
// folding this will induce a cycle into the DAG. If not, this is safe to
// xform, so create a select of the addresses.
+
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
+ // Always fail if LLD and RLD are not independent. TheSelect is a
+ // predecessor to all Nodes in question so we need not search past it.
+
+ Visited.insert(TheSelect);
+ Worklist.push_back(LLD);
+ Worklist.push_back(RLD);
+
+ if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
+ return false;
+
SDValue Addr;
if (TheSelect->getOpcode() == ISD::SELECT) {
+ // We cannot do this optimization if any pair of {RLD, LLD} is a
+ // predecessor to {RLD, LLD, CondNode}. As we've already compared the
+ // Loads, we only need to check if CondNode is a successor to one of the
+ // loads. We can further avoid this if there's no use of their chain
+ // value.
SDNode *CondNode = TheSelect->getOperand(0).getNode();
- if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
- (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
- return false;
- // The loads must not depend on one another.
- if (LLD->isPredecessorOf(RLD) ||
- RLD->isPredecessorOf(LLD))
+ Worklist.push_back(CondNode);
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
+ (RLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
return false;
+
Addr = DAG.getSelect(SDLoc(TheSelect),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0), LLD->getBasePtr(),
RLD->getBasePtr());
} else { // Otherwise SELECT_CC
+ // We cannot do this optimization if any pair of {RLD, LLD} is a
+ // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
+ // the Loads, we only need to check if CondLHS/CondRHS is a successor to
+ // one of the loads. We can further avoid this if there's no use of their
+ // chain value.
+
SDNode *CondLHS = TheSelect->getOperand(0).getNode();
SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+ Worklist.push_back(CondLHS);
+ Worklist.push_back(CondRHS);
if ((LLD->hasAnyUseOfValue(1) &&
- (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
(RLD->hasAnyUseOfValue(1) &&
- (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
return false;
Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
@@ -17816,6 +18481,63 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
+/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+/// in it. This may be a win when the constant is not otherwise available
+/// because it replaces two constant pool loads with one.
+SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
+ const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
+ return SDValue();
+
+ // If we are before legalize types, we want the other legalization to happen
+ // first (for example, to avoid messing with soft float).
+ auto *TV = dyn_cast<ConstantFPSDNode>(N2);
+ auto *FV = dyn_cast<ConstantFPSDNode>(N3);
+ EVT VT = N2.getValueType();
+ if (!TV || !FV || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // If a constant can be materialized without loads, this does not make sense.
+ if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
+ TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
+ TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
+ return SDValue();
+
+ // If both constants have multiple uses, then we won't need to do an extra
+ // load. The values are likely around in registers for other users.
+ if (!TV->hasOneUse() && !FV->hasOneUse())
+ return SDValue();
+
+ Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue()) };
+ Type *FPTy = Elts[0]->getType();
+ const DataLayout &TD = DAG.getDataLayout();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get offsets to the 0 and 1 elements of the array, so we can select between
+ // them.
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
+ SDValue Cond =
+ DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
+ AddToWorklist(Cond.getNode());
+ SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
+ AddToWorklist(CstOffset.getNode());
+ CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
+ AddToWorklist(CPIdx.getNode());
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(
+ DAG.getMachineFunction()), Alignment);
+}
+
/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
@@ -17824,75 +18546,26 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// (x ? y : y) -> y.
if (N2 == N3) return N2;
+ EVT CmpOpVT = N0.getValueType();
EVT VT = N2.getValueType();
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
- // Determine if the condition we're dealing with is constant
- SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
- N0, N1, CC, DL, false);
+ // Determine if the condition we're dealing with is constant.
+ SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
+ false);
if (SCC.getNode()) AddToWorklist(SCC.getNode());
- if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
// fold select_cc true, x, y -> x
// fold select_cc false, x, y -> y
return !SCCC->isNullValue() ? N2 : N3;
}
- // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
- // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
- // in it. This is a win when the constant is not otherwise available because
- // it replaces two constant pool loads with one. We only do this if the FP
- // type is known to be legal, because if it isn't, then we are before legalize
- // types an we want the other legalization to happen first (e.g. to avoid
- // messing with soft float) and if the ConstantFP is not legal, because if
- // it is legal, we may not need to store the FP constant in a constant pool.
- if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
- if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
- if (TLI.isTypeLegal(N2.getValueType()) &&
- (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
- TargetLowering::Legal &&
- !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
- !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
- // If both constants have multiple uses, then we won't need to do an
- // extra load, they are likely around in registers for other users.
- (TV->hasOneUse() || FV->hasOneUse())) {
- Constant *Elts[] = {
- const_cast<ConstantFP*>(FV->getConstantFPValue()),
- const_cast<ConstantFP*>(TV->getConstantFPValue())
- };
- Type *FPTy = Elts[0]->getType();
- const DataLayout &TD = DAG.getDataLayout();
-
- // Create a ConstantArray of the two constants.
- Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
- SDValue CPIdx =
- DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
- TD.getPrefTypeAlignment(FPTy));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
-
- // Get the offsets to the 0 and 1 element of the array so that we can
- // select between them.
- SDValue Zero = DAG.getIntPtrConstant(0, DL);
- unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
- SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
-
- SDValue Cond = DAG.getSetCC(DL,
- getSetCCResultType(N0.getValueType()),
- N0, N1, CC);
- AddToWorklist(Cond.getNode());
- SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
- Cond, One, Zero);
- AddToWorklist(CstOffset.getNode());
- CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
- CstOffset);
- AddToWorklist(CPIdx.getNode());
- return DAG.getLoad(
- TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- Alignment);
- }
- }
+ if (SDValue V =
+ convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
+ return V;
if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
return V;
@@ -17906,7 +18579,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
SDValue AndLHS = N0->getOperand(0);
- ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
@@ -17927,48 +18600,48 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
}
// fold select C, 16, 0 -> shl C, 4
- if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
- TLI.getBooleanContents(N0.getValueType()) ==
- TargetLowering::ZeroOrOneBooleanContent) {
+ bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
+ bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
+
+ if ((Fold || Swap) &&
+ TLI.getBooleanContents(CmpOpVT) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
+
+ if (Swap) {
+ CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
+ std::swap(N2C, N3C);
+ }
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.
if (NotExtCompare && N2C->isOne())
return SDValue();
- // Get a SetCC of the condition
- // NOTE: Don't create a SETCC if it's not legal on this target.
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
- SDValue Temp, SCC;
- // cast from setcc result type to select result type
- if (LegalTypes) {
- SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
- N0, N1, CC);
- if (N2.getValueType().bitsLT(SCC.getValueType()))
- Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
- N2.getValueType());
- else
- Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
- N2.getValueType(), SCC);
- } else {
- SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
- Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
- N2.getValueType(), SCC);
- }
+ SDValue Temp, SCC;
+ // zext (setcc n0, n1)
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
+ if (VT.bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ } else {
+ SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ }
- AddToWorklist(SCC.getNode());
- AddToWorklist(Temp.getNode());
+ AddToWorklist(SCC.getNode());
+ AddToWorklist(Temp.getNode());
- if (N2C->isOne())
- return Temp;
+ if (N2C->isOne())
+ return Temp;
- // shl setcc result by log2 n2c
- return DAG.getNode(
- ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
- getShiftAmountTy(Temp.getValueType())));
- }
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ SDLoc(Temp),
+ getShiftAmountTy(Temp.getValueType())));
}
// Check to see if this is an integer abs.
@@ -17988,18 +18661,16 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
- EVT XType = N0.getValueType();
- if (SubC && SubC->isNullValue() && XType.isInteger()) {
+ if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
SDLoc DL(N0);
- SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
- N0,
- DAG.getConstant(XType.getSizeInBits() - 1, DL,
- getShiftAmountTy(N0.getValueType())));
- SDValue Add = DAG.getNode(ISD::ADD, DL,
- XType, N0, Shift);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
+ DL,
+ getShiftAmountTy(CmpOpVT)));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
}
}
@@ -18060,21 +18731,14 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
if (DAG.getMachineFunction().getFunction().optForMinSize())
return SDValue();
- ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
- if (!C)
- return SDValue();
-
- // Avoid division by zero.
- if (C->isNullValue())
- return SDValue();
-
SmallVector<SDNode *, 8> Built;
- SDValue S =
- TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
+ if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
- for (SDNode *N : Built)
- AddToWorklist(N);
- return S;
+ return SDValue();
}
/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
@@ -18089,11 +18753,13 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
return SDValue();
SmallVector<SDNode *, 8> Built;
- SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built);
+ if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
- for (SDNode *N : Built)
- AddToWorklist(N);
- return S;
+ return SDValue();
}
/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
@@ -18106,21 +18772,14 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
if (DAG.getMachineFunction().getFunction().optForMinSize())
return SDValue();
- ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
- if (!C)
- return SDValue();
-
- // Avoid division by zero.
- if (C->isNullValue())
- return SDValue();
-
SmallVector<SDNode *, 8> Built;
- SDValue S =
- TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
+ if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
- for (SDNode *N : Built)
- AddToWorklist(N);
- return S;
+ return SDValue();
}
/// Determines the LogBase2 value for a non-null input value using the
@@ -18576,6 +19235,11 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+// TODO: Replace with with std::monostate when we move to C++17.
+struct UnitT { } Unit;
+bool operator==(const UnitT &, const UnitT &) { return true; }
+bool operator!=(const UnitT &, const UnitT &) { return false; }
+
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
// redundant, as this function gets called when visiting every store
@@ -18588,13 +19252,22 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// the nodes that will eventually be candidates, and then not be able
// to go from a partially-merged state to the desired final
// fully-merged state.
-bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None)
- return false;
+
+bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ StoreSDNode *STChain = St;
+ // Intervals records which offsets from BaseIndex have been covered. In
+ // the common case, every store writes to the immediately previous address
+ // space and thus merged with the previous interval at insertion time.
+
+ using IMap =
+ llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
+ IMap::Allocator A;
+ IMap Intervals(A);
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@@ -18604,76 +19277,114 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (BasePtr.getBase().isUndef())
return false;
- SmallVector<StoreSDNode *, 8> ChainedStores;
- ChainedStores.push_back(St);
+ // Add ST's interval.
+ Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
- // Walk up the chain and look for nodes with offsets from the same
- // base pointer. Stop when reaching an instruction with a different kind
- // or instruction which has a different base pointer.
- StoreSDNode *Index = St;
- while (Index) {
+ while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
// If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 0)->hasOneUse())
+ if (!SDValue(Chain, 0)->hasOneUse())
break;
-
- if (Index->isVolatile() || Index->isIndexed())
+ if (Chain->isVolatile() || Chain->isIndexed())
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
-
+ const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
// Check that the base pointer is the same as the original one.
- if (!BasePtr.equalBaseIndex(Ptr, DAG))
+ int64_t Offset;
+ if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
+ break;
+ int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
+ // Make sure we don't overlap with other intervals by checking the ones to
+ // the left or right before inserting.
+ auto I = Intervals.find(Offset);
+ // If there's a next interval, we should end before it.
+ if (I != Intervals.end() && I.start() < (Offset + Length))
+ break;
+ // If there's a previous interval, we should start after it.
+ if (I != Intervals.begin() && (--I).stop() <= Offset)
break;
+ Intervals.insert(Offset, Offset + Length, Unit);
- // Walk up the chain to find the next store node, ignoring any
- // intermediate loads. Any other kind of node will halt the loop.
- SDNode *NextInChain = Index->getChain().getNode();
- while (true) {
- if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
- // We found a store node. Use it for the next iteration.
- if (STn->isVolatile() || STn->isIndexed()) {
- Index = nullptr;
- break;
- }
- ChainedStores.push_back(STn);
- Index = STn;
- break;
- } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
- NextInChain = Ldn->getChain().getNode();
- continue;
- } else {
- Index = nullptr;
- break;
- }
- }// end while
+ ChainedStores.push_back(Chain);
+ STChain = Chain;
}
- // At this point, ChainedStores lists all of the Store nodes
- // reachable by iterating up through chain nodes matching the above
- // conditions. For each such store identified, try to find an
- // earlier chain to attach the store to which won't violate the
- // required ordering.
- bool MadeChangeToSt = false;
- SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+ // If we didn't find a chained store, exit.
+ if (ChainedStores.size() == 0)
+ return false;
+
+ // Improve all chained stores (St and ChainedStores members) starting from
+ // where the store chain ended and return single TokenFactor.
+ SDValue NewChain = STChain->getChain();
+ SmallVector<SDValue, 8> TFOps;
+ for (unsigned I = ChainedStores.size(); I;) {
+ StoreSDNode *S = ChainedStores[--I];
+ SDValue BetterChain = FindBetterChain(S, NewChain);
+ S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
+ S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
+ TFOps.push_back(SDValue(S, 0));
+ ChainedStores[I] = S;
+ }
+
+ // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
+ SDValue BetterChain = FindBetterChain(St, NewChain);
+ SDValue NewST;
+ if (St->isTruncatingStore())
+ NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand());
+ else
+ NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemOperand());
- for (StoreSDNode *ChainedStore : ChainedStores) {
- SDValue Chain = ChainedStore->getChain();
- SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+ TFOps.push_back(NewST);
- if (Chain != BetterChain) {
- if (ChainedStore == St)
- MadeChangeToSt = true;
- BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
- }
- }
+ // If we improved every element of TFOps, then we've lost the dependence on
+ // NewChain to successors of St and we need to add it back to TFOps. Do so at
+ // the beginning to keep relative order consistent with FindBetterChains.
+ auto hasImprovedChain = [&](SDValue ST) -> bool {
+ return ST->getOperand(0) != NewChain;
+ };
+ bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
+ if (AddNewChain)
+ TFOps.insert(TFOps.begin(), NewChain);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+ CombineTo(St, TF);
+
+ AddToWorklist(STChain);
+ // Add TF operands worklist in reverse order.
+ for (auto I = TF->getNumOperands(); I;)
+ AddToWorklist(TF->getOperand(--I).getNode());
+ AddToWorklist(TF.getNode());
+ return true;
+}
+
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
- // Do all replacements after finding the replacements to make to avoid making
- // the chains more complicated by introducing new TokenFactors.
- for (auto Replacement : BetterChains)
- replaceStoreChain(Replacement.first, Replacement.second);
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
- return MadeChangeToSt;
+ // We must have a base and an offset.
+ if (!BasePtr.getBase().getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.getBase().isUndef())
+ return false;
+
+ // Directly improve a chain of disjoint stores starting at St.
+ if (parallelizeChainedStores(St))
+ return true;
+
+ // Improve St's Chain..
+ SDValue BetterChain = FindBetterChain(St, St->getChain());
+ if (St->getChain() != BetterChain) {
+ replaceStoreChain(St, BetterChain);
+ return true;
+ }
+ return false;
}
/// This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 795ade588b8f..a9a3c44ea0c9 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -89,6 +89,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -110,6 +111,7 @@
#include <utility>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "isel"
@@ -545,6 +547,15 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
"Invalid iterator!");
while (I != E) {
+ if (LastFlushPoint == I)
+ LastFlushPoint = E;
+ if (SavedInsertPt == I)
+ SavedInsertPt = E;
+ if (EmitStartPt == I)
+ EmitStartPt = E.isValid() ? &*E : nullptr;
+ if (LastLocalValue == I)
+ LastLocalValue = E.isValid() ? &*E : nullptr;
+
MachineInstr *Dead = &*I;
++I;
Dead->eraseFromParent();
@@ -1426,6 +1437,18 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
}
return true;
}
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst *DI = cast<DbgLabelInst>(II);
+ assert(DI->getLabel() && "Missing label");
+ if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
+ return true;
+ }
case Intrinsic::objectsize: {
ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -1436,6 +1459,14 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
+ case Intrinsic::is_constant: {
+ Constant *ResCI = ConstantInt::get(II->getType(), 0);
+ unsigned ResultReg = getRegForValue(ResCI);
+ if (!ResultReg)
+ return false;
+ updateValueMap(II, ResultReg);
+ return true;
+ }
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
@@ -1565,7 +1596,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
MachineInstr *SavedLastLocalValue = getLastLocalValue();
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
- if (isa<TerminatorInst>(I)) {
+ if (I->isTerminator()) {
if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
// PHI node handling may have generated local value instructions,
// even though it failed to handle all PHI nodes.
@@ -1629,7 +1660,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
DbgLoc = DebugLoc();
// Undo phi node updates, because they will be added again by SelectionDAG.
- if (isa<TerminatorInst>(I)) {
+ if (I->isTerminator()) {
// PHI node handling may have generated local value instructions.
// We remove them because SelectionDAGISel will generate them again.
removeDeadLocalValueCode(SavedLastLocalValue);
@@ -1680,7 +1711,10 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
/// Emit an FNeg operation.
bool FastISel::selectFNeg(const User *I) {
- unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+ Value *X;
+ if (!match(I, m_FNeg(m_Value(X))))
+ return false;
+ unsigned OpReg = getRegForValue(X);
if (!OpReg)
return false;
bool OpRegIsKill = hasTrivialKill(I);
@@ -1770,11 +1804,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return selectBinaryOp(I, ISD::FADD);
case Instruction::Sub:
return selectBinaryOp(I, ISD::SUB);
- case Instruction::FSub:
+ case Instruction::FSub:
// FNeg is currently represented in LLVM IR as a special case of FSub.
- if (BinaryOperator::isFNeg(I))
- return selectFNeg(I);
- return selectBinaryOp(I, ISD::FSUB);
+ return selectFNeg(I) || selectBinaryOp(I, ISD::FSUB);
case Instruction::Mul:
return selectBinaryOp(I, ISD::MUL);
case Instruction::FMul:
@@ -2211,7 +2243,7 @@ unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
/// might result in multiple MBB's for one BB. As such, the start of the
/// BB might correspond to a different MBB than the end.
bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
- const TerminatorInst *TI = LLVMBB->getTerminator();
+ const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index d3c31911d677..fba728625b07 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -579,9 +579,18 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const
const Value *
FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
if (VirtReg2Value.empty()) {
+ SmallVector<EVT, 4> ValueVTs;
for (auto &P : ValueMap) {
- VirtReg2Value[P.second] = P.first;
+ ValueVTs.clear();
+ ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(),
+ P.first->getType(), ValueVTs);
+ unsigned Reg = P.second;
+ for (EVT VT : ValueVTs) {
+ unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ VirtReg2Value[Reg++] = P.first;
+ }
}
}
- return VirtReg2Value[Vreg];
+ return VirtReg2Value.lookup(Vreg);
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index d6171f3177d7..6a6114677cc2 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -524,7 +524,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
Reg = R->getReg();
DefMI = nullptr;
} else {
- Reg = getVR(Node->getOperand(0), VRBaseMap);
+ Reg = R ? R->getReg() : getVR(Node->getOperand(0), VRBaseMap);
DefMI = MRI->getVRegDef(Reg);
}
@@ -652,6 +652,12 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
unsigned NumOps = Node->getNumOperands();
+ // If the input pattern has a chain, then the root of the corresponding
+ // output pattern will get a chain as well. This can happen to be a
+ // REG_SEQUENCE (which is not "guarded" by countOperands/CountResults).
+ if (NumOps && Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ --NumOps; // Ignore chain if it exists.
+
assert((NumOps & 1) == 1 &&
"REG_SEQUENCE must have an odd number of operands!");
for (unsigned i = 1; i != NumOps; ++i) {
@@ -694,6 +700,20 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
+ SD->setIsEmitted();
+
+ if (SD->isInvalidated()) {
+ // An invalidated SDNode must generate an undef DBG_VALUE: although the
+ // original value is no longer computed, earlier DBG_VALUEs live ranges
+ // must not leak into later code.
+ auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
+ MIB.addReg(0U);
+ MIB.addReg(0U, RegState::Debug);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+ return &*MIB;
+ }
+
if (SD->getKind() == SDDbgValue::FRAMEIX) {
// Stack address; this needs to be lowered in target-dependent fashion.
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
@@ -735,6 +755,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
MIB.addImm(CI->getSExtValue());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
MIB.addFPImm(CF);
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Note: This assumes that all nullptr constants are zero-valued.
+ MIB.addImm(0);
} else {
// Could be an Undef. In any case insert an Undef so we can see what we
// dropped.
@@ -868,6 +891,15 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Flags.hasAllowReassociation())
MI->setFlag(MachineInstr::MIFlag::FmReassoc);
+
+ if (Flags.hasNoUnsignedWrap())
+ MI->setFlag(MachineInstr::MIFlag::NoUWrap);
+
+ if (Flags.hasNoSignedWrap())
+ MI->setFlag(MachineInstr::MIFlag::NoSWrap);
+
+ if (Flags.hasExact())
+ MI->setFlag(MachineInstr::MIFlag::IsExact);
}
// Emit all of the actual operands of this instruction, adding them to the
@@ -886,9 +918,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
RegState::EarlyClobber);
- // Transfer all of the memory reference descriptions of this instruction.
- MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
- cast<MachineSDNode>(Node)->memoperands_end());
+ // Set the memory reference descriptions of this instruction now that it is
+ // part of the function.
+ MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands());
// Insert the instruction into position in the block. This needs to
// happen before any custom inserter hook is called so that the
@@ -950,7 +982,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
// Finally mark unused registers as dead.
- if (!UsedRegs.empty() || II.getImplicitDefs())
+ if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
// Run post-isel target hook to adjust this instruction if needed.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2b7ba1ffb309..d3aea37f944d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -176,7 +176,6 @@ private:
SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
- SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
SDValue ExpandInsertToVectorThroughStack(SDValue Op);
@@ -239,7 +238,7 @@ public:
} // end anonymous namespace
/// Return a vector shuffle operation which
-/// performs the same shuffe in terms of order or result bytes, but on a type
+/// performs the same shuffle in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(
@@ -1060,6 +1059,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::FRAMEADDR:
case ISD::RETURNADDR:
case ISD::ADDROFRETURNADDR:
+ case ISD::SPONENTRY:
// These operations lie about being legal: when they claim to be legal,
// they should actually be custom-lowered.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1094,6 +1094,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -1107,6 +1108,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -1114,6 +1121,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0));
break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ break;
+ }
+ case ISD::SMULFIX: {
+ unsigned Scale = Node->getConstantOperandVal(2);
+ Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+ Node->getValueType(0), Scale);
+ break;
+ }
+ case ISD::MSCATTER:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::MSTORE:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -1148,6 +1176,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
}
break;
+ case ISD::FSHL:
+ case ISD::FSHR:
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
case ISD::SHL_PARTS: {
@@ -1247,6 +1277,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// Caches for hasPredecessorHelper
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(Op.getNode());
Worklist.push_back(Idx.getNode());
SDValue StackPtr, Ch;
for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
@@ -1489,24 +1520,20 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
// Get the signbit at the right position for MagAsInt.
int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
+ EVT ShiftVT = IntVT;
+ if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
+ SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
+ ShiftVT = MagVT;
+ }
+ if (ShiftAmount > 0) {
+ SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, ShiftVT);
+ SignBit = DAG.getNode(ISD::SRL, DL, ShiftVT, SignBit, ShiftCnst);
+ } else if (ShiftAmount < 0) {
+ SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT);
+ SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst);
+ }
if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
- if (ShiftAmount > 0) {
- SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT);
- SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst);
- } else if (ShiftAmount < 0) {
- SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT);
- SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst);
- }
SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
- } else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
- SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
- if (ShiftAmount > 0) {
- SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT);
- SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst);
- } else if (ShiftAmount < 0) {
- SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT);
- SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst);
- }
}
// Store the part with the modified sign and convert back to float.
@@ -2303,9 +2330,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
EVT DestVT,
const SDLoc &dl) {
+ EVT SrcVT = Op0.getValueType();
+
// TODO: Should any fast-math-flags be set for the created nodes?
LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
- if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+ if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
"expansion\n");
@@ -2350,116 +2379,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// subtract the bias
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
// final result
- SDValue Result;
- // handle final rounding
- if (DestVT == MVT::f64) {
- // do nothing
- Result = Sub;
- } else if (DestVT.bitsLT(MVT::f64)) {
- Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
- DAG.getIntPtrConstant(0, dl));
- } else if (DestVT.bitsGT(MVT::f64)) {
- Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
- }
+ SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
return Result;
}
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
// Code below here assumes !isSigned without checking again.
- // Implementation of unsigned i64 to f64 following the algorithm in
- // __floatundidf in compiler_rt. This implementation has the advantage
- // of performing rounding correctly, both in the default rounding mode
- // and in all alternate rounding modes.
- // TODO: Generalize this for use with other types.
- if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
- LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
- SDValue TwoP52 =
- DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
- SDValue TwoP84PlusTwoP52 =
- DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl,
- MVT::f64);
- SDValue TwoP84 =
- DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64);
-
- SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
- SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
- DAG.getConstant(32, dl, MVT::i64));
- SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
- SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
- SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
- SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
- SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
- TwoP84PlusTwoP52);
- return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
- }
-
- // TODO: Generalize this for use with other types.
- if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
- LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
- // For unsigned conversions, convert them to signed conversions using the
- // algorithm from the x86_64 __floatundidf in compiler_rt.
- if (!isSigned) {
- SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
-
- SDValue ShiftConst = DAG.getConstant(
- 1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout()));
- SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
- SDValue AndConst = DAG.getConstant(1, dl, MVT::i64);
- SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
- SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
-
- SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
- SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
-
- // TODO: This really should be implemented using a branch rather than a
- // select. We happen to get lucky and machinesink does the right
- // thing most of the time. This would be a good candidate for a
- //pseudo-op, or, even better, for whole-function isel.
- SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
- Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
- return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast);
- }
-
- // Otherwise, implement the fully general conversion.
-
- SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
- DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64));
- SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
- DAG.getConstant(UINT64_C(0x800), dl, MVT::i64));
- SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
- DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64));
- SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2,
- DAG.getConstant(UINT64_C(0), dl, MVT::i64),
- ISD::SETNE);
- SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0);
- SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0,
- DAG.getConstant(UINT64_C(0x0020000000000000), dl,
- MVT::i64),
- ISD::SETUGE);
- SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0);
- EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout());
-
- SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
- DAG.getConstant(32, dl, SHVT));
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
- SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
- SDValue TwoP32 =
- DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl,
- MVT::f64);
- SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
- SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
- SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
- SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
- return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
- DAG.getIntPtrConstant(0, dl));
- }
-
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
- SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()),
- Op0,
- DAG.getConstant(0, dl, Op0.getValueType()),
- ISD::SETLT);
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
+ DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
SDValue Zero = DAG.getIntPtrConstant(0, dl),
Four = DAG.getIntPtrConstant(4, dl);
SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(),
@@ -2469,7 +2398,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// as a negative number. To counteract this, the dynamic code adds an
// offset depending on the data type.
uint64_t FF;
- switch (Op0.getSimpleValueType().SimpleTy) {
+ switch (SrcVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported integer type!");
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
@@ -2618,22 +2547,22 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
// swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
// swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
// swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
return Tmp;
}
@@ -2709,126 +2638,6 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
}
}
-/// Expand the specified bitcount instruction into operations.
-SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
- const SDLoc &dl) {
- switch (Opc) {
- default: llvm_unreachable("Cannot expand this yet!");
- case ISD::CTPOP: {
- EVT VT = Op.getValueType();
- EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- unsigned Len = VT.getSizeInBits();
-
- assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
- "CTPOP not implemented for this type.");
-
- // This is the "best" algorithm from
- // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-
- SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)),
- dl, VT);
- SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)),
- dl, VT);
- SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)),
- dl, VT);
- SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)),
- dl, VT);
-
- // v = v - ((v >> 1) & 0x55555555...)
- Op = DAG.getNode(ISD::SUB, dl, VT, Op,
- DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRL, dl, VT, Op,
- DAG.getConstant(1, dl, ShVT)),
- Mask55));
- // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
- Op = DAG.getNode(ISD::ADD, dl, VT,
- DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
- DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRL, dl, VT, Op,
- DAG.getConstant(2, dl, ShVT)),
- Mask33));
- // v = (v + (v >> 4)) & 0x0F0F0F0F...
- Op = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::ADD, dl, VT, Op,
- DAG.getNode(ISD::SRL, dl, VT, Op,
- DAG.getConstant(4, dl, ShVT))),
- Mask0F);
- // v = (v * 0x01010101...) >> (Len - 8)
- Op = DAG.getNode(ISD::SRL, dl, VT,
- DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
- DAG.getConstant(Len - 8, dl, ShVT));
-
- return Op;
- }
- case ISD::CTLZ_ZERO_UNDEF:
- // This trivially expands to CTLZ.
- return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
- case ISD::CTLZ: {
- EVT VT = Op.getValueType();
- unsigned Len = VT.getSizeInBits();
-
- if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
- EVT SetCCVT = getSetCCResultType(VT);
- SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
- SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
- DAG.getConstant(Len, dl, VT), CTLZ);
- }
-
- // for now, we do this:
- // x = x | (x >> 1);
- // x = x | (x >> 2);
- // ...
- // x = x | (x >>16);
- // x = x | (x >>32); // for 64-bit input
- // return popcount(~x);
- //
- // Ref: "Hacker's Delight" by Henry Warren
- EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) {
- SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
- Op = DAG.getNode(ISD::OR, dl, VT, Op,
- DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
- }
- Op = DAG.getNOT(dl, Op, VT);
- return DAG.getNode(ISD::CTPOP, dl, VT, Op);
- }
- case ISD::CTTZ_ZERO_UNDEF:
- // This trivially expands to CTTZ.
- return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
- case ISD::CTTZ: {
- EVT VT = Op.getValueType();
- unsigned Len = VT.getSizeInBits();
-
- if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
- EVT SetCCVT = getSetCCResultType(VT);
- SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
- SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
- DAG.getConstant(Len, dl, VT), CTTZ);
- }
-
- // for now, we use: { return popcount(~x & (x - 1)); }
- // unless the target has ctlz but not ctpop, in which case we use:
- // { return 32 - nlz(~x & (x-1)); }
- // Ref: "Hacker's Delight" by Henry Warren
- SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNOT(dl, Op, VT),
- DAG.getNode(ISD::SUB, dl, VT, Op,
- DAG.getConstant(1, dl, VT)));
- // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
- if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
- return DAG.getNode(ISD::SUB, dl, VT,
- DAG.getConstant(VT.getSizeInBits(), dl, VT),
- DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
- return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
- }
- }
-}
-
bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Trying to expand node\n");
SmallVector<SDValue, 8> Results;
@@ -2836,13 +2645,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
bool NeedInvert;
switch (Node->getOpcode()) {
+ case ISD::ABS:
+ if (TLI.expandABS(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTPOP:
+ if (TLI.expandCTPOP(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
+ if (TLI.expandCTLZ(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
- Results.push_back(Tmp1);
+ if (TLI.expandCTTZ(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
break;
case ISD::BITREVERSE:
Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
@@ -3037,8 +2856,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
+ Results.push_back(Tmp1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::SINT_TO_FP:
Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
Node->getOperand(0), Node->getValueType(0), dl);
Results.push_back(Tmp1);
@@ -3047,29 +2871,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
Results.push_back(Tmp1);
break;
- case ISD::FP_TO_UINT: {
- SDValue True, False;
- EVT VT = Node->getOperand(0).getValueType();
- EVT NVT = Node->getValueType(0);
- APFloat apf(DAG.EVTToAPFloatSemantics(VT),
- APInt::getNullValue(VT.getSizeInBits()));
- APInt x = APInt::getSignMask(NVT.getSizeInBits());
- (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
- Tmp1 = DAG.getConstantFP(apf, dl, VT);
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT),
- Node->getOperand(0),
- Tmp1, ISD::SETLT);
- True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
- // TODO: Should any fast-math-flags be set for the FSUB?
- False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
- DAG.getNode(ISD::FSUB, dl, VT,
- Node->getOperand(0), Tmp1));
- False = DAG.getNode(ISD::XOR, dl, NVT, False,
- DAG.getConstant(x, dl, NVT));
- Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False);
- Results.push_back(Tmp1);
+ case ISD::FP_TO_UINT:
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
break;
- }
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
@@ -3256,7 +3061,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
-
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG))
+ Results.push_back(Expanded);
+ break;
+ }
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
@@ -3464,6 +3274,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
+ case ISD::FSHL:
+ case ISD::FSHR:
+ if (TLI.expandFunnelShift(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (TLI.expandROT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ Results.push_back(TLI.expandAddSubSat(Node, DAG));
+ break;
+ case ISD::SMULFIX:
+ Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG));
+ break;
case ISD::SADDO:
case ISD::SSUBO: {
SDValue LHS = Node->getOperand(0);
@@ -3856,10 +3685,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
- // If we expanded the SETCC by inverting the condition code, then wrap
- // the existing SETCC in a NOT to restore the intended condition.
- if (NeedInvert)
- Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0));
+ assert(!NeedInvert && "Don't know how to invert BR_CC!");
// If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
// node.
@@ -3903,46 +3729,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ReplaceNode(SDValue(Node, 0), Result);
break;
}
- case ISD::ROTL:
- case ISD::ROTR: {
- bool IsLeft = Node->getOpcode() == ISD::ROTL;
- SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1);
- EVT ResVT = Node->getValueType(0);
- EVT OpVT = Op0.getValueType();
- assert(OpVT == ResVT &&
- "The result and the operand types of rotate should match");
- EVT ShVT = Op1.getValueType();
- SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT);
-
- // If a rotate in the other direction is legal, use it.
- unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
- if (TLI.isOperationLegal(RevRot, ResVT)) {
- SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
- Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub));
- break;
- }
-
- // Otherwise,
- // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
- // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
- //
- assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) &&
- "Expecting the type bitwidth to be a power of 2");
- unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
- unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
- SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT,
- Width, DAG.getConstant(1, dl, ShVT));
- SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
- SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1);
- SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1);
-
- SDValue Or = DAG.getNode(ISD::OR, dl, ResVT,
- DAG.getNode(ShOpc, dl, ResVT, Op0, And0),
- DAG.getNode(HsOpc, dl, ResVT, Op0, And1));
- Results.push_back(Or);
- break;
- }
-
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -3962,7 +3748,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
return false;
}
- LLVM_DEBUG(dbgs() << "Succesfully expanded node\n");
+ LLVM_DEBUG(dbgs() << "Successfully expanded node\n");
ReplaceNode(Node, Results.data());
return true;
}
@@ -4035,11 +3821,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
}
case ISD::FMINNUM:
+ case ISD::STRICT_FMINNUM:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
RTLIB::FMIN_F80, RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128));
break;
case ISD::FMAXNUM:
+ case ISD::STRICT_FMAXNUM:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
RTLIB::FMAX_F80, RTLIB::FMAX_F128,
RTLIB::FMAX_PPCF128));
@@ -4050,6 +3838,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128));
break;
+ case ISD::FCBRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128));
+ break;
case ISD::FSIN:
case ISD::STRICT_FSIN:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
@@ -4132,16 +3925,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::EXP2_PPCF128));
break;
case ISD::FTRUNC:
+ case ISD::STRICT_FTRUNC:
Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128));
break;
case ISD::FFLOOR:
+ case ISD::STRICT_FFLOOR:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128));
break;
case ISD::FCEIL:
+ case ISD::STRICT_FCEIL:
Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
RTLIB::CEIL_F80, RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128));
@@ -4161,6 +3957,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::NEARBYINT_PPCF128));
break;
case ISD::FROUND:
+ case ISD::STRICT_FROUND:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
RTLIB::ROUND_F64,
RTLIB::ROUND_F80,
@@ -4192,6 +3989,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::DIV_PPCF128));
break;
case ISD::FREM:
+ case ISD::STRICT_FREM:
Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
RTLIB::REM_F80, RTLIB::REM_F128,
RTLIB::REM_PPCF128));
@@ -4264,6 +4062,21 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::MUL_I16, RTLIB::MUL_I32,
RTLIB::MUL_I64, RTLIB::MUL_I128));
break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("LibCall explicitly requested, but not available");
+ case MVT::i32:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false));
+ break;
+ case MVT::i64:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false));
+ break;
+ case MVT::i128:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false));
+ break;
+ }
+ break;
}
// Replace the original node with the legalized result.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 9aa0ea15f3b7..4644e9588e7b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1750,6 +1750,11 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
SDValue R = SDValue();
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ return false;
+ }
+
// Nodes that use a promotion-requiring floating point operand, but doesn't
// produce a promotion-requiring floating point result, need to be legalized
// to use the promoted float operand. Nodes that produce at least one
@@ -1778,15 +1783,16 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) {
SDValue Op = N->getOperand(0);
EVT OpVT = Op->getValueType(0);
- EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
- assert (IVT == N->getValueType(0) && "Bitcast to type of different size");
-
SDValue Promoted = GetPromotedFloat(N->getOperand(0));
EVT PromotedVT = Promoted->getValueType(0);
// Convert the promoted float value to the desired IVT.
- return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT,
- Promoted);
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+ SDValue Convert = DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N),
+ IVT, Promoted);
+ // The final result type might not be an scalar so we need a bitcast. The
+ // bitcast will be further legalized if needed.
+ return DAG.getBitcast(N->getValueType(0), Convert);
}
// Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by
@@ -1904,8 +1910,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
// Binary FP Operations
case ISD::FADD:
case ISD::FDIV:
- case ISD::FMAXNAN:
- case ISD::FMINNAN:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
case ISD::FMAXNUM:
case ISD::FMINNUM:
case ISD::FMUL:
@@ -1941,8 +1947,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) {
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT,
- N->getOperand(0));
+ // Input type isn't guaranteed to be a scalar int so bitcast if not. The
+ // bitcast will be legalized further if necessary.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(),
+ N->getOperand(0).getValueType().getSizeInBits());
+ SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0));
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast);
}
SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) {
@@ -2133,9 +2143,9 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
SDValue TrueVal = GetPromotedFloat(N->getOperand(2));
SDValue FalseVal = GetPromotedFloat(N->getOperand(3));
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
- N->getOperand(0), N->getOperand(1), TrueVal, FalseVal,
- N->getOperand(4));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ TrueVal.getNode()->getValueType(0), N->getOperand(0),
+ N->getOperand(1), TrueVal, FalseVal, N->getOperand(4));
}
// Construct a SDNode that transforms the SINT or UINT operand to the promoted
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 63a1ea13a5f5..5fbc70fce60d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -118,6 +118,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
+ case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;
+
case ISD::AND:
case ISD::OR:
case ISD::XOR:
@@ -138,9 +140,17 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SMULO:
case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+ case ISD::ADDE:
+ case ISD::SUBE:
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
+ case ISD::SMULFIX: Res = PromoteIntRes_SMULFIX(N); break;
+
case ISD::ATOMIC_LOAD:
Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
@@ -269,8 +279,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
case TargetLowering::TypePromoteFloat: {
// Convert the promoted float by hand.
- SDValue PromotedOp = GetPromotedFloat(InOp);
- return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, GetPromotedFloat(InOp));
break;
}
case TargetLowering::TypeExpandInteger:
@@ -305,12 +315,45 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
// make us bitcast between two vectors which are legalized in different ways.
if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+ // If the output type is also a vector and widening it to the same size
+ // as the widened input type would be a legal type, we can widen the bitcast
+ // and handle the promotion after.
+ if (NOutVT.isVector()) {
+ unsigned WidenInSize = NInVT.getSizeInBits();
+ unsigned OutSize = OutVT.getSizeInBits();
+ if (WidenInSize % OutSize == 0) {
+ unsigned Scale = WidenInSize / OutSize;
+ EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
+ OutVT.getVectorElementType(),
+ OutVT.getVectorNumElements() * Scale);
+ if (isTypeLegal(WideOutVT)) {
+ InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
+ DAG.getConstant(0, dl, IdxTy));
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
+ }
+ }
+ }
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
CreateStackStoreLoad(InOp, OutVT));
}
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
+static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ // If the value won't fit in the prefered type, just use something safe. It
+ // will be legalized when the shift is expanded.
+ if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+ ShiftVT = MVT::i32;
+ return ShiftVT;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
EVT OVT = N->getValueType(0);
@@ -318,10 +361,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(
- ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
- DAG.getConstant(DiffBits, dl,
- TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+ EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl, ShiftVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@@ -331,10 +373,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(
- ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
- DAG.getConstant(DiffBits, dl,
- TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+ EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ return DAG.getNode(ISD::SRL, dl, NVT,
+ DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl, ShiftVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -399,8 +441,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
- N->getOperand(1));
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // If the input also needs to be promoted, do that first so we can get a
+ // get a good idea for the output type.
+ if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue In = GetPromotedInteger(Op0);
+
+ // If the new type is larger than NVT, use it. We probably won't need to
+ // promote it again.
+ EVT SVT = In.getValueType().getScalarType();
+ if (SVT.bitsGE(NVT)) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
+ return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
+ }
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
}
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
@@ -438,6 +498,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ return DAG.getNode(N->getOpcode(), dl, NVT);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -483,11 +550,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
+ SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
- N->getMask(), ExtSrc0, N->getMemoryVT(),
+ N->getMask(), ExtPassThru, N->getMemoryVT(),
N->getMemOperand(), ISD::SEXTLOAD);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -497,12 +564,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue ExtSrc0 = GetPromotedInteger(N->getValue());
- assert(NVT == ExtSrc0.getValueType() &&
+ SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+ assert(NVT == ExtPassThru.getValueType() &&
"Gather result type and the passThru agrument type should be the same");
SDLoc dl(N);
- SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
+ SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
@@ -534,6 +601,61 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
return SDValue(Res.getNode(), 1);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
+ // For promoting iN -> iM, this can be expanded by
+ // 1. ANY_EXTEND iN to iM
+ // 2. SHL by M-N
+ // 3. [US][ADD|SUB]SAT
+ // 4. L/ASHR by M-N
+ SDLoc dl(N);
+ SDValue Op1 = N->getOperand(0);
+ SDValue Op2 = N->getOperand(1);
+ unsigned OldBits = Op1.getScalarValueSizeInBits();
+
+ unsigned Opcode = N->getOpcode();
+ unsigned ShiftOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ ShiftOp = ISD::SRA;
+ break;
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ ShiftOp = ISD::SRL;
+ break;
+ default:
+ llvm_unreachable("Expected opcode to be signed or unsigned saturation "
+ "addition or subtraction");
+ }
+
+ SDValue Op1Promoted = GetPromotedInteger(Op1);
+ SDValue Op2Promoted = GetPromotedInteger(Op2);
+
+ EVT PromotedType = Op1Promoted.getValueType();
+ unsigned NewBits = PromotedType.getScalarSizeInBits();
+ unsigned SHLAmount = NewBits - OldBits;
+ EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
+ Op2Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+
+ SDValue Result =
+ DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) {
+ // Can just promote the operands then continue with operation.
+ SDLoc dl(N);
+ SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ EVT PromotedType = Op1Promoted.getValueType();
+ return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
+ N->getOperand(2));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
@@ -763,6 +885,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
+// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
+// the third operand of ADDE/SUBE nodes is carry flag, which differs from
+// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
@@ -960,6 +1085,13 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
+
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
+
+ case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
+
+ case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -981,9 +1113,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
/// shared among BR_CC, SELECT_CC, and SETCC handlers.
void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
ISD::CondCode CCCode) {
- // We have to insert explicit sign or zero extends. Note that we could
- // insert sign extends for ALL conditions, but zero extend is cheaper on
- // many machines (an AND instead of two shifts), so prefer it.
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions. For those operations where either
+ // zero or sign extension would be valid, use SExtOrZExtPromotedInteger
+ // which will choose the cheapest for the target.
switch (CCCode) {
default: llvm_unreachable("Unknown integer comparison!");
case ISD::SETEQ:
@@ -994,7 +1127,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
// We would prefer to promote the comparison operand with sign extension.
// If the width of OpL/OpR excluding the duplicated sign bits is no greater
// than the width of NewLHS/NewRH, we can avoid inserting real truncate
- // instruction, which is redudant eventually.
+ // instruction, which is redundant eventually.
unsigned OpLEffectiveBits =
OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
unsigned OpREffectiveBits =
@@ -1004,8 +1137,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
NewLHS = OpL;
NewRHS = OpR;
} else {
- NewLHS = ZExtPromotedInteger(NewLHS);
- NewRHS = ZExtPromotedInteger(NewRHS);
+ NewLHS = SExtOrZExtPromotedInteger(NewLHS);
+ NewRHS = SExtOrZExtPromotedInteger(NewRHS);
}
break;
}
@@ -1013,11 +1146,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
case ISD::SETUGT:
case ISD::SETULE:
case ISD::SETULT:
- // ALL of these operations will work if we either sign or zero extend
- // the operands (including the unsigned comparisons!). Zero extend is
- // usually a simpler/cheaper operation, so prefer it.
- NewLHS = ZExtPromotedInteger(NewLHS);
- NewRHS = ZExtPromotedInteger(NewRHS);
+ NewLHS = SExtOrZExtPromotedInteger(NewLHS);
+ NewRHS = SExtOrZExtPromotedInteger(NewRHS);
break;
case ISD::SETGE:
case ISD::SETGT:
@@ -1219,28 +1349,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
SDLoc dl(N);
bool TruncateStore = false;
- if (OpNo == 2) {
- // Mask comes before the data operand. If the data operand is legal, we just
- // promote the mask.
- // When the data operand has illegal type, we should legalize the data
- // operand first. The mask will be promoted/splitted/widened according to
- // the data operand type.
- if (TLI.isTypeLegal(DataVT)) {
- Mask = PromoteTargetBoolean(Mask, DataVT);
- // Update in place.
- SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
- NewOps[2] = Mask;
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
- }
-
- if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
- return PromoteIntOp_MSTORE(N, 3);
- if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
- return WidenVecOp_MSTORE(N, 3);
- assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
- return SplitVecOp_MSTORE(N, 3);
+ if (OpNo == 3) {
+ Mask = PromoteTargetBoolean(Mask, DataVT);
+ // Update in place.
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[3] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
} else { // Data operand
- assert(OpNo == 3 && "Unexpected operand for promotion");
+ assert(OpNo == 1 && "Unexpected operand for promotion");
DataOp = GetPromotedInteger(DataOp);
TruncateStore = true;
}
@@ -1274,14 +1390,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
- SDValue Res = SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
- // updated in place.
- if (Res.getNode() == N)
- return Res;
-
- ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
- ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- return SDValue();
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
@@ -1342,6 +1451,30 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) {
+ SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+ return SDValue(
+ DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
+ // Promote the RETURNADDR/FRAMEADDR argument to a supported integer width.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1 && "Don't know how to promote this operand!");
+ // Promote the rw, locality, and cache type arguments to a supported integer
+ // width.
+ SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+ SDValue Op3 = ZExtPromotedInteger(N->getOperand(3));
+ SDValue Op4 = ZExtPromotedInteger(N->getOperand(4));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+ Op2, Op3, Op4),
+ 0);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
@@ -1475,6 +1608,12 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
case ISD::UMULO:
case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
+ case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1595,8 +1734,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
- KnownBits Known;
- DAG.computeKnownBits(N->getOperand(1), Known);
+ KnownBits Known = DAG.computeKnownBits(N->getOperand(1));
// If we don't know anything about the high bits, exit.
if (((Known.Zero|Known.One) & HighBitMask) == 0)
@@ -2437,6 +2575,101 @@ void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), R.getValue(2));
}
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Result = TLI.expandAddSubSat(N, DAG);
+ SplitInteger(Result, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ uint64_t Scale = N->getConstantOperandVal(2);
+ if (!Scale) {
+ SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(LHS, LL, LH);
+ GetExpandedInteger(RHS, RL, RH);
+ SmallVector<SDValue, 4> Result;
+
+ if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+ LL, LH, RL, RH)) {
+ report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI.");
+ return;
+ }
+
+ unsigned VTSize = VT.getScalarSizeInBits();
+ unsigned NVTSize = NVT.getScalarSizeInBits();
+ EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+
+ // Shift whole amount by scale.
+ SDValue ResultLL = Result[0];
+ SDValue ResultLH = Result[1];
+ SDValue ResultHL = Result[2];
+ SDValue ResultHH = Result[3];
+
+ // After getting the multplication result in 4 parts, we need to perform a
+ // shift right by the amount of the scale to get the result in that scale.
+ // Let's say we multiply 2 64 bit numbers. The resulting value can be held in
+ // 128 bits that are cut into 4 32-bit parts:
+ //
+ // HH HL LH LL
+ // |---32---|---32---|---32---|---32---|
+ // 128 96 64 32 0
+ //
+ // |------VTSize-----|
+ //
+ // |NVTSize-|
+ //
+ // The resulting Lo and Hi will only need to be one of these 32-bit parts
+ // after shifting.
+ if (Scale < NVTSize) {
+ // If the scale is less than the size of the VT we expand to, the Hi and
+ // Lo of the result will be in the first 2 parts of the result after
+ // shifting right. This only requires shifting by the scale as far as the
+ // third part in the result (ResultHL).
+ SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy);
+ SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy);
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt);
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt));
+ Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+ } else if (Scale == NVTSize) {
+ // If the scales are equal, Lo and Hi are ResultLH and Result HL,
+ // respectively. Avoid shifting to prevent undefined behavior.
+ Lo = ResultLH;
+ Hi = ResultHL;
+ } else if (Scale < VTSize) {
+ // If the scale is instead less than the old VT size, but greater than or
+ // equal to the expanded VT size, the first part of the result (ResultLL) is
+ // no longer a part of Lo because it would be scaled out anyway. Instead we
+ // can start shifting right from the fourth part (ResultHH) to the second
+ // part (ResultLH), and Result LH will be the new Lo.
+ SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy);
+ SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy);
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+ Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
+ } else {
+ llvm_unreachable(
+ "Expected the scale to be less than the width of the operands");
+ }
+}
+
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue &Lo, SDValue &Hi) {
SDValue LHS = Node->getOperand(0);
@@ -2705,25 +2938,56 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
EVT VT = N->getValueType(0);
SDLoc dl(N);
- // A divide for UMULO should be faster than a function call.
if (N->getOpcode() == ISD::UMULO) {
+ // This section expands the operation into the following sequence of
+ // instructions. `iNh` here refers to a type which has half the bit width of
+ // the type the original operation operated on.
+ //
+ // %0 = %LHS.HI != 0 && %RHS.HI != 0
+ // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
+ // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
+ // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
+ // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh
+ // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 )
+ //
+ // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 }
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
-
- SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
- SplitInteger(MUL, Lo, Hi);
-
- // A divide for UMULO will be faster than a function call. Select to
- // make sure we aren't using 0.
- SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT),
- RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ);
- SDValue NotZero = DAG.getSelect(dl, VT, isZero,
- DAG.getConstant(1, dl, VT), RHS);
- SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
- SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
- ISD::SETNE);
- Overflow = DAG.getSelect(dl, N->getValueType(1), isZero,
- DAG.getConstant(0, dl, N->getValueType(1)),
- Overflow);
+ SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
+ SplitInteger(LHS, LHSLow, LHSHigh);
+ SplitInteger(RHS, RHSLow, RHSHigh);
+ EVT HalfVT = LHSLow.getValueType()
+ , BitVT = N->getValueType(1);
+ SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT);
+ SDVTList VTFullAddO = DAG.getVTList(VT, BitVT);
+
+ SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
+ SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
+ DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
+ DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
+
+ SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
+ SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
+ One.getValue(0));
+
+ SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
+ SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
+ Two.getValue(0));
+
+ // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
+ // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
+ // operation recursively legalized?).
+ //
+ // Many backends understand this pattern and will convert into LOHI
+ // themselves, if applicable.
+ SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
+ SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh);
+ SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1));
+ SplitInteger(Five, Lo, Hi);
ReplaceValueWith(SDValue(N, 1), Overflow);
return;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 135922d6f267..032000f6cb79 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -281,6 +281,20 @@ private:
return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
}
+ // Get a promoted operand and sign or zero extend it to the final size
+ // (depending on TargetLoweringInfo::isSExtCheaperThanZExt). For a given
+ // subtarget and type, the choice of sign or zero-extension will be
+ // consistent.
+ SDValue SExtOrZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc DL(Op);
+ Op = GetPromotedInteger(Op);
+ if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType());
+ }
+
// Integer Result Promotion.
void PromoteIntegerResult(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
@@ -330,6 +344,9 @@ private:
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
+ SDValue PromoteIntRes_SMULFIX(SDNode *N);
+ SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -360,6 +377,9 @@ private:
SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
+ SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SMULFIX(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -414,6 +434,8 @@ private:
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SMULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -650,6 +672,7 @@ private:
SDValue ScalarizeVecRes_BinOp(SDNode *N);
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_StrictFPOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
@@ -668,6 +691,8 @@ private:
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_SMULFIX(SDNode *N);
+
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
@@ -703,6 +728,8 @@ private:
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -780,6 +807,7 @@ private:
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
+ SDValue WidenVecRes_StrictFP(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
@@ -796,6 +824,7 @@ private:
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
@@ -844,9 +873,6 @@ private:
/// MaskVT to ToMaskVT if needed with vector extension or truncation.
SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);
- /// Get the target mask VT, and widen if needed.
- EVT getSETCCWidenedResultTy(SDValue SetCC);
-
//===--------------------------------------------------------------------===//
// Generic Splitting: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index df3134828af5..b9d370441c3e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -327,7 +327,7 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
NumElements >>= 1;
SplitInteger(Op, Parts[0], Parts[1]);
if (DAG.getDataLayout().isBigEndian())
- std::swap(Parts[0], Parts[1]);
+ std::swap(Parts[0], Parts[1]);
IntegerToVector(Parts[0], NumElements, Ops, EltVT);
IntegerToVector(Parts[1], NumElements, Ops, EltVT);
} else {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3a98a7a904cb..4923a529c21b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -86,9 +86,10 @@ class VectorLegalizer {
/// operations to legalize them.
SDValue Expand(SDValue Op);
- /// Implements expansion for FNEG; falls back to UnrollVectorOp if
- /// FSUB isn't legal.
- ///
+ /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
+ /// FP_TO_SINT isn't legal.
+ SDValue ExpandFP_TO_UINT(SDValue Op);
+
/// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
/// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
@@ -116,6 +117,12 @@ class VectorLegalizer {
/// the remaining lanes, finally bitcasting to the proper type.
SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
+ /// Implement expand-based legalization of ABS vector operations.
+ /// If following expanding is legal/custom then do it:
+ /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1))
+ /// else unroll the operation.
+ SDValue ExpandABS(SDValue Op);
+
/// Expand bswap of vectors into a shuffle if legal.
SDValue ExpandBSWAP(SDValue Op);
@@ -128,8 +135,13 @@ class VectorLegalizer {
SDValue ExpandFNEG(SDValue Op);
SDValue ExpandFSUB(SDValue Op);
SDValue ExpandBITREVERSE(SDValue Op);
+ SDValue ExpandCTPOP(SDValue Op);
SDValue ExpandCTLZ(SDValue Op);
- SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
+ SDValue ExpandCTTZ(SDValue Op);
+ SDValue ExpandFunnelShift(SDValue Op);
+ SDValue ExpandROT(SDValue Op);
+ SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
+ SDValue ExpandAddSubSat(SDValue Op);
SDValue ExpandStrictFPOp(SDValue Op);
/// Implements vector promotion.
@@ -226,7 +238,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
Op.getResNo());
- bool HasVectorValue = false;
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -240,16 +251,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom:
if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
- if (Lowered == Result)
- return TranslateLegalizeResults(Op, Lowered);
- Changed = true;
- if (Lowered->getNumValues() != Op->getNumValues()) {
- // This expanded to something other than the load. Assume the
- // lowering code took care of any chain values, and just handle the
- // returned value.
- assert(Result.getValue(1).use_empty() &&
- "There are still live users of the old chain!");
- return LegalizeOp(Lowered);
+ assert(Lowered->getNumValues() == Op->getNumValues() &&
+ "Unexpected number of results");
+ if (Lowered != Result) {
+ // Make sure the new code is also legal.
+ Lowered = LegalizeOp(Lowered);
+ Changed = true;
}
return TranslateLegalizeResults(Op, Lowered);
}
@@ -272,7 +279,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom: {
SDValue Lowered = TLI.LowerOperation(Result, DAG);
- Changed = Lowered != Result;
+ if (Lowered != Result) {
+ // Make sure the new code is also legal.
+ Lowered = LegalizeOp(Lowered);
+ Changed = true;
+ }
return TranslateLegalizeResults(Op, Lowered);
}
case TargetLowering::Expand:
@@ -280,9 +291,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return LegalizeOp(ExpandStore(Op));
}
}
- } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
- HasVectorValue = true;
+ }
+ bool HasVectorValue = false;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
J != E;
++J)
@@ -298,6 +309,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -311,6 +323,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -321,6 +339,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
@@ -338,8 +358,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
+ case ISD::FSHL:
+ case ISD::FSHR:
case ISD::ROTL:
case ISD::ROTR:
+ case ISD::ABS:
case ISD::BSWAP:
case ISD::BITREVERSE:
case ISD::CTLZ:
@@ -361,8 +384,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FABS:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
@@ -394,8 +419,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
case ISD::FCANONICALIZE:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
+ case ISD::SMULFIX: {
+ unsigned Scale = Node->getConstantOperandVal(2);
+ Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+ Node->getValueType(0), Scale);
+ break;
+ }
case ISD::FP_ROUND_INREG:
Action = TLI.getOperationAction(Node->getOpcode(),
cast<VTSDNode>(Node->getOperand(1))->getVT());
@@ -405,14 +440,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
- case ISD::MSCATTER:
- Action = TLI.getOperationAction(Node->getOpcode(),
- cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
- break;
- case ISD::MSTORE:
- Action = TLI.getOperationAction(Node->getOpcode(),
- cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
- break;
}
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@@ -720,6 +747,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandVSELECT(Op);
case ISD::SELECT:
return ExpandSELECT(Op);
+ case ISD::FP_TO_UINT:
+ return ExpandFP_TO_UINT(Op);
case ISD::UINT_TO_FP:
return ExpandUINT_TO_FLOAT(Op);
case ISD::FNEG:
@@ -728,17 +757,37 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandFSUB(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
+ case ISD::ABS:
+ return ExpandABS(Op);
case ISD::BITREVERSE:
return ExpandBITREVERSE(Op);
+ case ISD::CTPOP:
+ return ExpandCTPOP(Op);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
return ExpandCTLZ(Op);
+ case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- return ExpandCTTZ_ZERO_UNDEF(Op);
+ return ExpandCTTZ(Op);
+ case ISD::FSHL:
+ case ISD::FSHR:
+ return ExpandFunnelShift(Op);
+ case ISD::ROTL:
+ case ISD::ROTR:
+ return ExpandROT(Op);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ return ExpandFMINNUM_FMAXNUM(Op);
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ return ExpandAddSubSat(Op);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -752,6 +801,12 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
return ExpandStrictFPOp(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
@@ -866,7 +921,7 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
// First build an any-extend node which can be legalized above when we
// recurse through it.
- Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
+ Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
// Now we need sign extend. Do this by shifting the elements. Even if these
// aren't legal operations, they have a better chance of being legalized
@@ -1024,10 +1079,35 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
}
+SDValue VectorLegalizer::ExpandABS(SDValue Op) {
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ if (TLI.expandABS(Op.getNode(), Result, DAG))
+ return Result;
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG))
+ return Result;
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
EVT VT = Op.getOperand(0).getValueType();
SDLoc DL(Op);
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
+ return Result;
+
// Make sure that the SINT_TO_FP and SRL instructions are available.
if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
@@ -1086,56 +1166,55 @@ SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandCTPOP(Op.getNode(), Result, DAG))
+ return Result;
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
- EVT VT = Op.getValueType();
- unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+ SDValue Result;
+ if (TLI.expandCTLZ(Op.getNode(), Result, DAG))
+ return Result;
- // If the non-ZERO_UNDEF version is supported we can use that instead.
- if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
- TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
- SDLoc DL(Op);
- return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
- }
+ return DAG.UnrollVectorOp(Op.getNode());
+}
- // If CTPOP is available we can lower with a CTPOP based method:
- // u16 ctlz(u16 x) {
- // x |= (x >> 1);
- // x |= (x >> 2);
- // x |= (x >> 4);
- // x |= (x >> 8);
- // return ctpop(~x);
- // }
- // Ref: "Hacker's Delight" by Henry Warren
- if (isPowerOf2_32(NumBitsPerElt) &&
- TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
- TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
- TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
- SDLoc DL(Op);
- SDValue Res = Op.getOperand(0);
- EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandCTTZ(Op.getNode(), Result, DAG))
+ return Result;
- for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
- Res = DAG.getNode(
- ISD::OR, DL, VT, Res,
- DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
+ return DAG.UnrollVectorOp(Op.getNode());
+}
- Res = DAG.getNOT(DL, Res, VT);
- return DAG.getNode(ISD::CTPOP, DL, VT, Res);
- }
+SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandFunnelShift(Op.getNode(), Result, DAG))
+ return Result;
- // Otherwise go ahead and unroll.
return DAG.UnrollVectorOp(Op.getNode());
}
-SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
- // If the non-ZERO_UNDEF version is supported we can use that instead.
- if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
- SDLoc DL(Op);
- return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
- }
+SDValue VectorLegalizer::ExpandROT(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandROT(Op.getNode(), Result, DAG))
+ return Result;
- // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))
+ return Expanded;
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
+ if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
+ return Expanded;
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -1183,7 +1262,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
AddLegalizedOperand(Op.getValue(0), Result);
AddLegalizedOperand(Op.getValue(1), NewChain);
- return NewChain;
+ return Op.getResNo() ? NewChain : Result;
}
SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f5d9dd234afd..f367e9358576 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -113,13 +113,20 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+
case ISD::FPOW:
case ISD::FREM:
case ISD::FSUB:
@@ -139,6 +146,35 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMA:
R = ScalarizeVecRes_TernaryOp(N);
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
+ R = ScalarizeVecRes_StrictFPOp(N);
+ break;
+ case ISD::SMULFIX:
+ R = ScalarizeVecRes_SMULFIX(N);
+ break;
}
// If R is null, the sub-method took care of registering the result.
@@ -161,6 +197,44 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
Op0.getValueType(), Op0, Op1, Op2);
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
+ Op2);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
+ EVT VT = N->getValueType(0).getVectorElementType();
+ unsigned NumOpers = N->getNumOperands();
+ SDValue Chain = N->getOperand(0);
+ EVT ValueVTs[] = {VT, MVT::Other};
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 4> Opers;
+
+ // The Chain is the first operand.
+ Opers.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOpers; ++i) {
+ SDValue Oper = N->getOperand(i);
+
+ if (Oper.getValueType().isVector())
+ Oper = GetScalarizedVector(Oper);
+
+ Opers.push_back(Oper);
+ }
+
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -731,8 +805,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::FDIV:
@@ -750,6 +824,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA:
@@ -759,6 +837,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -772,8 +851,17 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
+ case ISD::SMULFIX:
+ SplitVecRes_SMULFIX(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -811,6 +899,20 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
Op0Hi, Op1Hi, Op2Hi);
}
+void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ SDLoc dl(N);
+ SDValue Op2 = N->getOperand(2);
+
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2);
+}
+
void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// We know the result is a vector. The input may be either a vector or a
@@ -1238,7 +1340,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue Ch = MLD->getChain();
SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask();
- SDValue Src0 = MLD->getSrc0();
+ SDValue PassThru = MLD->getPassThru();
unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
@@ -1259,18 +1361,18 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue Src0Lo, Src0Hi;
- if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Src0, Src0Lo, Src0Hi);
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
else
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
- Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO,
ExtType, MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
@@ -1282,7 +1384,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
MLD->getRanges());
- Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
ExtType, MLD->isExpandingLoad());
// Build a factor node to remember that this load is independent of the
@@ -1305,7 +1407,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ch = MGT->getChain();
SDValue Ptr = MGT->getBasePtr();
SDValue Mask = MGT->getMask();
- SDValue Src0 = MGT->getValue();
+ SDValue PassThru = MGT->getPassThru();
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
unsigned Alignment = MGT->getOriginalAlignment();
@@ -1322,11 +1424,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
// Split MemoryVT
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue Src0Lo, Src0Hi;
- if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Src0, Src0Lo, Src0Hi);
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
else
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1339,11 +1441,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
+ SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
MMO);
- SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
+ SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
MMO);
@@ -1620,13 +1722,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
- Res = SplitVecOp_TruncateHelper(N);
- else
- Res = SplitVecOp_UnaryOp(N);
- break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
@@ -1634,6 +1729,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
else
Res = SplitVecOp_UnaryOp(N);
break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
@@ -1746,10 +1843,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
case ISD::VECREDUCE_FMAX:
- CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN;
+ CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
break;
case ISD::VECREDUCE_FMIN:
- CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN;
+ CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
break;
default:
llvm_unreachable("Unexpected reduce ISD node");
@@ -1860,6 +1957,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Load back the required element.
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+
+ // FIXME: This is to handle i1 vectors with elements promoted to i8.
+ // i1 vector handling needs general improvement.
+ if (N->getValueType(0).bitsLT(EltVT)) {
+ SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0));
+ }
+
return DAG.getExtLoad(
ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
@@ -1886,7 +1992,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
SDValue Mask = MGT->getMask();
- SDValue Src0 = MGT->getValue();
+ SDValue PassThru = MGT->getPassThru();
unsigned Alignment = MGT->getOriginalAlignment();
SDValue MaskLo, MaskHi;
@@ -1900,11 +2006,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue Src0Lo, Src0Hi;
- if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Src0, Src0Lo, Src0Hi);
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
else
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1917,7 +2023,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
+ SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
OpsLo, MMO);
@@ -1927,7 +2033,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
Alignment, MGT->getAAInfo(),
MGT->getRanges());
- SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
+ SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
OpsHi, MMO);
@@ -2164,16 +2270,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
unsigned InElementSize = InVT.getScalarSizeInBits();
unsigned OutElementSize = OutVT.getScalarSizeInBits();
+ // Determine the split output VT. If its legal we can just split dirctly.
+ EVT LoOutVT, HiOutVT;
+ std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
+ assert(LoOutVT == HiOutVT && "Unequal split?");
+
// If the input elements are only 1/2 the width of the result elements,
// just use the normal splitting. Our trick only work if there's room
// to split more than once.
- if (InElementSize <= OutElementSize * 2)
+ if (isTypeLegal(LoOutVT) ||
+ InElementSize <= OutElementSize * 2)
return SplitVecOp_UnaryOp(N);
SDLoc DL(N);
+ // Don't touch if this will be scalarized.
+ EVT FinalVT = InVT;
+ while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+ FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext());
+
+ if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector)
+ return SplitVecOp_UnaryOp(N);
+
// Get the split input vector.
SDValue InLoVec, InHiVec;
GetSplitVector(InVec, InLoVec, InHiVec);
+
// Truncate them to 1/2 the element size.
EVT HalfElementVT = IsFloat ?
EVT::getFloatingPointVT(InElementSize/2) :
@@ -2298,12 +2419,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::XOR:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
Res = WidenVecRes_Binary(N);
break;
@@ -2320,6 +2445,33 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
+ Res = WidenVecRes_StrictFP(N);
+ break;
+
case ISD::FCOPYSIGN:
Res = WidenVecRes_FCOPYSIGN(N);
break;
@@ -2353,11 +2505,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
- case ISD::BITREVERSE:
- case ISD::BSWAP:
- case ISD::CTLZ:
- case ISD::CTPOP:
- case ISD::CTTZ:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -2368,12 +2515,37 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG10:
case ISD::FLOG2:
case ISD::FNEARBYINT:
- case ISD::FNEG:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
- case ISD::FTRUNC:
+ case ISD::FTRUNC: {
+ // We're going to widen this vector op to a legal type by padding with undef
+ // elements. If the wide vector op is eventually going to be expanded to
+ // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
+ // libcalls on the undef elements. We are assuming that if the scalar op
+ // requires expanding, then the vector op needs expanding too.
+ EVT VT = N->getValueType(0);
+ if (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
+ EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+ "Target supports vector op, but scalar requires expansion?");
+ Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ break;
+ }
+ }
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those like
+ // any other unary ops.
+ LLVM_FALLTHROUGH;
+
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FNEG:
+ case ISD::FCANONICALIZE:
Res = WidenVecRes_Unary(N);
break;
case ISD::FMA:
@@ -2405,6 +2577,88 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
+// Given a vector of operations that have been broken up to widen, see
+// if we can collect them together into the next widest legal VT. This
+// implementation is trap-safe.
+static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
+ SmallVectorImpl<SDValue> &ConcatOps,
+ unsigned ConcatEnd, EVT VT, EVT MaxVT,
+ EVT WidenVT) {
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ SDLoc dl(ConcatOps[0]);
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ int Idx = 0;
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, SubConcatOps);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ makeArrayRef(ConcatOps.data(), NumOps));
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// Binary op widening for operations that can trap.
unsigned Opcode = N->getOpcode();
@@ -2477,75 +2731,119 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
}
}
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
+ return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
+ // StrictFP op widening for operations that can trap.
+ unsigned NumOpers = N->getNumOperands();
+ unsigned Opcode = N->getOpcode();
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
- // while (Some element of ConcatOps is not of type MaxVT) {
- // From the end of ConcatOps, collect elements of the same type and put
- // them into an op of the next larger supported type
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SmallVector<SDValue, 4> InOps;
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ SmallVector<SDValue, 16> Chains;
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // The Chain is the first operand.
+ InOps.push_back(N->getOperand(0));
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOpers; ++i) {
+ SDValue Oper = N->getOperand(i);
+
+ if (Oper.getValueType().isVector()) {
+ assert(Oper.getValueType() == N->getValueType(0) &&
+ "Invalid operand type to widen!");
+ Oper = GetWidenedVector(Oper);
+ }
+
+ InOps.push_back(Oper);
+ }
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
// }
- while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
- Idx = ConcatEnd - 1;
- VT = ConcatOps[Idx--].getValueType();
- while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
- Idx--;
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SmallVector<SDValue, 4> EOps;
+
+ for (unsigned i = 0; i < NumOpers; ++i) {
+ SDValue Op = InOps[i];
+
+ if (Op.getValueType().isVector())
+ Op = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
- EVT NextVT;
+ EOps.push_back(Op);
+ }
+
+ EVT OperVT[] = {VT, MVT::Other};
+ SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps);
+ ConcatOps[ConcatEnd++] = Oper;
+ Chains.push_back(Oper.getValue(1));
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
do {
- NextSize *= 2;
- NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
- } while (!TLI.isTypeLegal(NextVT));
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
- if (!VT.isVector()) {
- // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
- SDValue VecOp = DAG.getUNDEF(NextVT);
- unsigned NumToInsert = ConcatEnd - Idx - 1;
- for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
- VecOp = DAG.getNode(
- ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SmallVector<SDValue, 4> EOps;
+
+ for (unsigned i = 0; i < NumOpers; ++i) {
+ SDValue Op = InOps[i];
+
+ if (Op.getValueType().isVector())
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
+ DAG.getConstant(Idx, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ EOps.push_back(Op);
+ }
+
+ EVT WidenVT[] = {WidenEltVT, MVT::Other};
+ SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps);
+ ConcatOps[ConcatEnd++] = Oper;
+ Chains.push_back(Oper.getValue(1));
}
- ConcatOps[Idx+1] = VecOp;
- ConcatEnd = Idx + 2;
- } else {
- // Vector type, create a CONCAT_VECTORS of type NextVT
- SDValue undefVec = DAG.getUNDEF(VT);
- unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
- SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
- unsigned RealVals = ConcatEnd - Idx - 1;
- unsigned SubConcatEnd = 0;
- unsigned SubConcatIdx = Idx + 1;
- while (SubConcatEnd < RealVals)
- SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
- while (SubConcatEnd < OpsToConcat)
- SubConcatOps[SubConcatEnd++] = undefVec;
- ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
- NextVT, SubConcatOps);
- ConcatEnd = SubConcatIdx + 1;
+ CurNumElts = 0;
}
}
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
+ // Build a factor node to remember all the Ops that have been created.
+ SDValue NewChain;
+ if (Chains.size() == 1)
+ NewChain = Chains[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
- // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
- unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
- if (NumOps != ConcatEnd ) {
- SDValue UndefVal = DAG.getUNDEF(MaxVT);
- for (unsigned j = ConcatEnd; j < NumOps; ++j)
- ConcatOps[j] = UndefVal;
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- makeArrayRef(ConcatOps.data(), NumOps));
+ return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -2575,10 +2873,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// If both input and result vector types are of same width, extend
// operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
// accepts fewer elements in the result than in the input.
+ if (Opcode == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
if (Opcode == ISD::SIGN_EXTEND)
- return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
if (Opcode == ISD::ZERO_EXTEND)
- return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
}
}
@@ -2591,11 +2891,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (WidenNumElts % InVTNumElts == 0) {
// Widen the input and call convert on the widened input vector.
unsigned NumConcat = WidenNumElts/InVTNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = InOp;
- SDValue UndefVal = DAG.getUNDEF(InVT);
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = UndefVal;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVec);
@@ -2614,11 +2911,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
// Otherwise unroll into some nasty scalar code and rebuild the vector.
- SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = WidenVT.getVectorElementType();
- unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
- unsigned i;
- for (i=0; i < MinElts; ++i) {
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
SDValue Val = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
@@ -2628,10 +2926,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
}
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for (; i < WidenNumElts; ++i)
- Ops[i] = UndefVal;
-
return DAG.getBuildVector(WidenVT, DL, Ops);
}
@@ -2654,11 +2948,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
switch (Opcode) {
case ISD::ANY_EXTEND_VECTOR_INREG:
- return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT);
case ISD::SIGN_EXTEND_VECTOR_INREG:
- return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
case ISD::ZERO_EXTEND_VECTOR_INREG:
- return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
}
}
}
@@ -2810,22 +3102,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
}
if (TLI.isTypeLegal(NewInVT)) {
- // Because the result and the input are different vector types, widening
- // the result could create a legal type but widening the input might make
- // it an illegal type that might lead to repeatedly splitting the input
- // and then widening it. To avoid this, we widen the input only if
- // it results in a legal type.
- SmallVector<SDValue, 16> Ops(NewNumElts);
- SDValue UndefVal = DAG.getUNDEF(InVT);
- Ops[0] = InOp;
- for (unsigned i = 1; i < NewNumElts; ++i)
- Ops[i] = UndefVal;
-
SDValue NewVec;
- if (InVT.isVector())
+ if (InVT.isVector()) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT));
+ Ops[0] = InOp;
+
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
- else
- NewVec = DAG.getBuildVector(NewInVT, dl, Ops);
+ } else {
+ NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp);
+ }
return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
@@ -3003,7 +3293,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
- SDValue Src0 = GetWidenedVector(N->getSrc0());
+ SDValue PassThru = GetWidenedVector(N->getPassThru());
ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N);
@@ -3014,9 +3304,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
Mask = ModifyToType(Mask, WideMaskVT, true);
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
- Mask, Src0, N->getMemoryVT(),
+ Mask, PassThru, N->getMemoryVT(),
N->getMemOperand(), ExtType,
- N->isExpandingLoad());
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -3028,7 +3318,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
- SDValue Src0 = GetWidenedVector(N->getValue());
+ SDValue PassThru = GetWidenedVector(N->getPassThru());
SDValue Scale = N->getScale();
unsigned NumElts = WideVT.getVectorNumElements();
SDLoc dl(N);
@@ -3045,7 +3335,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Index.getValueType().getScalarType(),
NumElts);
Index = ModifyToType(Index, WideIndexVT);
- SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale };
+ SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
+ Scale };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
N->getMemOperand());
@@ -3155,16 +3446,6 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
return Mask;
}
-// Get the target mask VT, and widen if needed.
-EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) {
- assert(SetCC->getOpcode() == ISD::SETCC);
- LLVMContext &Ctx = *DAG.getContext();
- EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType());
- if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
- MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT);
- return MaskVT;
-}
-
// This method tries to handle VSELECT and its mask by legalizing operands
// (which may require widening) and if needed adjusting the mask vector type
// to match that of the VSELECT. Without it, many cases end up with
@@ -3232,7 +3513,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
SDValue Mask;
if (Cond->getOpcode() == ISD::SETCC) {
- EVT MaskVT = getSETCCWidenedResultTy(Cond);
+ EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType());
Mask = convertMask(Cond, MaskVT, ToMaskVT);
} else if (isLogicalMaskOp(Cond->getOpcode()) &&
Cond->getOperand(0).getOpcode() == ISD::SETCC &&
@@ -3240,8 +3521,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
// Cond is (AND/OR/XOR (SETCC, SETCC))
SDValue SETCC0 = Cond->getOperand(0);
SDValue SETCC1 = Cond->getOperand(1);
- EVT VT0 = getSETCCWidenedResultTy(SETCC0);
- EVT VT1 = getSETCCWidenedResultTy(SETCC1);
+ EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType());
+ EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType());
unsigned ScalarBits0 = VT0.getScalarSizeInBits();
unsigned ScalarBits1 = VT1.getScalarSizeInBits();
unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
@@ -3414,6 +3695,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
+ case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
@@ -3503,11 +3785,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
default:
llvm_unreachable("Extend legalization on extend operation!");
case ISD::ANY_EXTEND:
- return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
+ return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp);
case ISD::SIGN_EXTEND:
- return DAG.getSignExtendVectorInReg(InOp, DL, VT);
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp);
case ISD::ZERO_EXTEND:
- return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp);
}
}
@@ -3537,8 +3819,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
InVT.getVectorNumElements());
if (TLI.isTypeLegal(WideVT)) {
SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
- DAG.getIntPtrConstant(0, dl));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
EVT InEltVT = InVT.getVectorElementType();
@@ -3580,20 +3863,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
- // If the input vector is not legal, it is likely that we will not find a
- // legal vector of the same size. Replace the concatenate vector with a
- // nasty build vector.
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
+ EVT InVT = N->getOperand(0).getValueType();
SDLoc dl(N);
+
+ // If the widen width for this operand is the same as the width of the concat
+ // and all but the first operand is undef, just use the widened operand.
+ unsigned NumOperands = N->getNumOperands();
+ if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ unsigned i;
+ for (i = 1; i < NumOperands; ++i)
+ if (!N->getOperand(i).isUndef())
+ break;
+
+ if (i == NumOperands)
+ return GetWidenedVector(N->getOperand(0));
+ }
+
+ // Otherwise, fall back to a nasty build vector.
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(NumElts);
- EVT InVT = N->getOperand(0).getValueType();
unsigned NumInElts = InVT.getVectorNumElements();
unsigned Idx = 0;
- unsigned NumOperands = N->getNumOperands();
for (unsigned i=0; i < NumOperands; ++i) {
SDValue InOp = N->getOperand(i);
assert(getTypeAction(InOp.getValueType()) ==
@@ -3641,60 +3935,97 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
- assert(OpNo == 3 && "Can widen only data operand of mstore");
+ assert((OpNo == 1 || OpNo == 3) &&
+ "Can widen only data or mask operand of mstore");
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
EVT MaskVT = Mask.getValueType();
SDValue StVal = MST->getValue();
- // Widen the value
- SDValue WideVal = GetWidenedVector(StVal);
SDLoc dl(N);
- // The mask should be widened as well.
- EVT WideVT = WideVal.getValueType();
- EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
- MaskVT.getVectorElementType(),
- WideVT.getVectorNumElements());
- Mask = ModifyToType(Mask, WideMaskVT, true);
+ if (OpNo == 1) {
+ // Widen the value.
+ StVal = GetWidenedVector(StVal);
+
+ // The mask should be widened as well.
+ EVT WideVT = StVal.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+ } else {
+ // Widen the mask.
+ EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ EVT ValueVT = StVal.getValueType();
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+ ValueVT.getVectorElementType(),
+ WideMaskVT.getVectorNumElements());
+ StVal = ModifyToType(StVal, WideVT);
+ }
assert(Mask.getValueType().getVectorNumElements() ==
- WideVal.getValueType().getVectorNumElements() &&
+ StVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
- return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
+ return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
Mask, MST->getMemoryVT(), MST->getMemOperand(),
false, MST->isCompressingStore());
}
+SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 4 && "Can widen only the index of mgather");
+ auto *MG = cast<MaskedGatherSDNode>(N);
+ SDValue DataOp = MG->getPassThru();
+ SDValue Mask = MG->getMask();
+ SDValue Scale = MG->getScale();
+
+ // Just widen the index. It's allowed to have extra elements.
+ SDValue Index = GetWidenedVector(MG->getIndex());
+
+ SDLoc dl(N);
+ SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
+ Scale};
+ SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
+ MG->getMemOperand());
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ return SDValue();
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
- assert(OpNo == 1 && "Can widen only data operand of mscatter");
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue DataOp = MSC->getValue();
SDValue Mask = MSC->getMask();
- EVT MaskVT = Mask.getValueType();
+ SDValue Index = MSC->getIndex();
SDValue Scale = MSC->getScale();
- // Widen the value.
- SDValue WideVal = GetWidenedVector(DataOp);
- EVT WideVT = WideVal.getValueType();
- unsigned NumElts = WideVT.getVectorNumElements();
- SDLoc dl(N);
-
- // The mask should be widened as well.
- EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
- MaskVT.getVectorElementType(), NumElts);
- Mask = ModifyToType(Mask, WideMaskVT, true);
-
- // Widen index.
- SDValue Index = MSC->getIndex();
- EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
- Index.getValueType().getScalarType(),
- NumElts);
- Index = ModifyToType(Index, WideIndexVT);
+ unsigned NumElts;
+ if (OpNo == 1) {
+ DataOp = GetWidenedVector(DataOp);
+ NumElts = DataOp.getValueType().getVectorNumElements();
+
+ // Widen index.
+ EVT IndexVT = Index.getValueType();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ IndexVT.getVectorElementType(), NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+
+ // The mask should be widened as well.
+ EVT MaskVT = Mask.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(), NumElts);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+ } else if (OpNo == 4) {
+ // Just widen the index. It's allowed to have extra elements.
+ Index = GetWidenedVector(Index);
+ } else
+ llvm_unreachable("Can't widen this operand of mscatter");
- SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index,
+ SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
- MSC->getMemoryVT(), dl, Ops,
+ MSC->getMemoryVT(), SDLoc(N), Ops,
MSC->getMemOperand());
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 7e6b57426338..f7566b246f32 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -24,6 +24,7 @@ class DIVariable;
class DIExpression;
class SDNode;
class Value;
+class raw_ostream;
/// Holds the information from a dbg_value node through SDISel.
/// We do not use SDValue here to avoid including its header.
@@ -52,6 +53,7 @@ private:
enum DbgValueKind kind;
bool IsIndirect;
bool Invalid = false;
+ bool Emitted = false;
public:
/// Constructor for non-constants.
@@ -124,6 +126,17 @@ public:
/// deleted.
void setIsInvalidated() { Invalid = true; }
bool isInvalidated() const { return Invalid; }
+
+ /// setIsEmitted / isEmitted - Getter/Setter for flag indicating that this
+ /// SDDbgValue has been emitted to an MBB.
+ void setIsEmitted() { Emitted = true; }
+ bool isEmitted() const { return Emitted; }
+
+ /// clearIsEmitted - Reset Emitted flag, for certain special cases where
+ /// dbg.addr is emitted twice.
+ void clearIsEmitted() { Emitted = false; }
+
+ LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
};
/// Holds the information from a dbg_label node through SDISel.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 3944d7df286d..90e109b022fd 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -125,8 +125,7 @@ void ScheduleDAGFast::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(nullptr);
- LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su]
- .dumpAll(this));
+ LLVM_DEBUG(dump());
// Execute the actual scheduling loop.
ListScheduleBottomUp();
@@ -144,7 +143,7 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- PredSU->dump(this);
+ dumpNode(*PredSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -182,7 +181,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
/// the Available queue.
void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
SU->setHeightToAtLeast(CurCycle);
@@ -777,11 +776,9 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (N->getHasDebugValue()) {
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
for (auto DV : DAG->GetDbgValues(N)) {
- if (DV->isInvalidated())
- continue;
- if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
- BB->insert(InsertPos, DbgMI);
- DV->setIsInvalidated();
+ if (!DV->isEmitted())
+ if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
+ BB->insert(InsertPos, DbgMI);
}
}
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 43e8ffd3839c..8d75b8133a30 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -365,7 +365,7 @@ void ScheduleDAGRRList::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(nullptr);
- LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
+ LLVM_DEBUG(dump());
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
@@ -396,7 +396,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- PredSU->dump(this);
+ dumpNode(*PredSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -729,7 +729,7 @@ static void resetVRegCycle(SUnit *SU);
/// the Available queue.
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
#ifndef NDEBUG
if (CurCycle < SU->getHeight())
@@ -828,7 +828,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
/// its predecessor states to reflect the change.
void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
for (SDep &Pred : SU->Preds) {
CapturePred(&Pred);
@@ -1130,7 +1130,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
return nullptr;
LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
if (N->getGluedNode() &&
!TII->canCopyGluedNodeDuringSchedule(N)) {
@@ -1888,7 +1888,7 @@ public:
while (!DumpQueue.empty()) {
SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
dbgs() << "Height " << SU->getHeight() << ": ";
- SU->dump(DAG);
+ DAG->dumpNode(*SU);
}
}
#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 430d8fb34476..e258f0a218a5 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -145,20 +145,18 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
Ops.push_back(ExtraOper);
SDVTList VTList = DAG->getVTList(VTs);
- MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr;
MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
// Store memory references.
- if (MN) {
- Begin = MN->memoperands_begin();
- End = MN->memoperands_end();
- }
+ SmallVector<MachineMemOperand *, 2> MMOs;
+ if (MN)
+ MMOs.assign(MN->memoperands_begin(), MN->memoperands_end());
DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
// Reset the memory references
if (MN)
- MN->setMemRefs(Begin, End);
+ DAG->setNodeMemRefs(MN, MMOs);
}
static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
@@ -244,7 +242,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
return;
// Sort them in increasing order.
- llvm::sort(Offsets.begin(), Offsets.end());
+ llvm::sort(Offsets);
// Check if the loads are close enough.
SmallVector<SDNode*, 4> Loads;
@@ -650,18 +648,20 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
dep.setLatency(Latency);
}
-void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
- // Cannot completely remove virtual function even in release mode.
+void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- if (!SU->getNode()) {
+ dumpNodeName(SU);
+ dbgs() << ": ";
+
+ if (!SU.getNode()) {
dbgs() << "PHYS REG COPY\n";
return;
}
- SU->getNode()->dump(DAG);
+ SU.getNode()->dump(DAG);
dbgs() << "\n";
SmallVector<SDNode *, 4> GluedNodes;
- for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode())
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
dbgs() << " ";
@@ -672,11 +672,22 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
#endif
}
+void ScheduleDAGSDNodes::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (EntrySU.getNode() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits)
+ dumpNodeAll(SU);
+ if (ExitSU.getNode() != nullptr)
+ dumpNodeAll(ExitSU);
+#endif
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ScheduleDAGSDNodes::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
- SU->dump(this);
+ dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
}
@@ -711,7 +722,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
for (auto DV : DAG->GetDbgValues(N)) {
- if (DV->isInvalidated())
+ if (DV->isEmitted())
continue;
unsigned DVOrder = DV->getOrder();
if (!Order || DVOrder == Order) {
@@ -720,7 +731,6 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
Orders.push_back({DVOrder, DbgMI});
BB->insert(InsertPos, DbgMI);
}
- DV->setIsInvalidated();
}
}
}
@@ -811,8 +821,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
for (; PDI != PDE; ++PDI) {
MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
- if (DbgMI)
+ if (DbgMI) {
BB->insert(InsertPos, DbgMI);
+ // We re-emit the dbg_value closer to its use, too, after instructions
+ // are emitted to the BB.
+ (*PDI)->clearIsEmitted();
+ }
}
}
@@ -878,7 +892,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
for (; DI != DE; ++DI) {
if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
break;
- if ((*DI)->isInvalidated())
+ if ((*DI)->isEmitted())
continue;
MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
@@ -900,7 +914,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// some of them before one or more conditional branches?
SmallVector<MachineInstr*, 8> DbgMIs;
for (; DI != DE; ++DI) {
- if ((*DI)->isInvalidated())
+ if ((*DI)->isEmitted())
continue;
assert((*DI)->getOrder() >= LastOrder &&
"emitting DBG_VALUE out of order");
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 6417e16bd0fd..3fa7ad895725 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -122,8 +122,8 @@ class InstrItineraryData;
virtual MachineBasicBlock*
EmitSchedule(MachineBasicBlock::iterator &InsertPos);
- void dumpNode(const SUnit *SU) const override;
-
+ void dumpNode(const SUnit &SU) const override;
+ void dump() const override;
void dumpSchedule() const;
std::string getGraphNodeLabel(const SUnit *SU) const override;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 84055f8ecc1a..416061475b1a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -118,7 +118,7 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
+ dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -152,7 +152,7 @@ void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
/// the Available queue.
void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 48e03c6da68f..647496c1afcb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -87,6 +87,8 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+void SelectionDAG::DAGNodeDeletedListener::anchor() {}
+
#define DEBUG_TYPE "selectiondag"
static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
@@ -269,15 +271,24 @@ bool ISD::allOperandsUndef(const SDNode *N) {
}
bool ISD::matchUnaryPredicate(SDValue Op,
- std::function<bool(ConstantSDNode *)> Match) {
+ std::function<bool(ConstantSDNode *)> Match,
+ bool AllowUndefs) {
+ // FIXME: Add support for scalar UNDEF cases?
if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
return Match(Cst);
+ // FIXME: Add support for vector UNDEF cases?
if (ISD::BUILD_VECTOR != Op.getOpcode())
return false;
EVT SVT = Op.getValueType().getScalarType();
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ if (AllowUndefs && Op.getOperand(i).isUndef()) {
+ if (!Match(nullptr))
+ return false;
+ continue;
+ }
+
auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
return false;
@@ -287,26 +298,33 @@ bool ISD::matchUnaryPredicate(SDValue Op,
bool ISD::matchBinaryPredicate(
SDValue LHS, SDValue RHS,
- std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
+ bool AllowUndefs) {
if (LHS.getValueType() != RHS.getValueType())
return false;
+ // TODO: Add support for scalar UNDEF cases?
if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
return Match(LHSCst, RHSCst);
+ // TODO: Add support for vector UNDEF cases?
if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
ISD::BUILD_VECTOR != RHS.getOpcode())
return false;
EVT SVT = LHS.getValueType().getScalarType();
for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
- auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
- auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
- if (!LHSCst || !RHSCst)
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ bool LHSUndef = AllowUndefs && LHSOp.isUndef();
+ bool RHSUndef = AllowUndefs && RHSOp.isUndef();
+ auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp);
+ auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp);
+ if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef))
return false;
- if (LHSCst->getValueType(0) != SVT ||
- LHSCst->getValueType(0) != RHSCst->getValueType(0))
+ if (LHSOp.getValueType() != SVT ||
+ LHSOp.getValueType() != RHSOp.getValueType())
return false;
if (!Match(LHSCst, RHSCst))
return false;
@@ -984,7 +1002,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE,
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- DivergenceAnalysis * Divergence) {
+ LegacyDivergenceAnalysis * Divergence) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
@@ -1118,39 +1136,6 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
getConstant(Imm, DL, Op.getValueType()));
}
-SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
- EVT VT) {
- assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
- "The sizes of the input and result must match in order to perform the "
- "extend in-register.");
- assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
- "The destination vector type must have fewer lanes than the input.");
- return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
-}
-
-SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
- EVT VT) {
- assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
- "The sizes of the input and result must match in order to perform the "
- "extend in-register.");
- assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
- "The destination vector type must have fewer lanes than the input.");
- return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
-}
-
-SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
- EVT VT) {
- assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
- "The sizes of the input and result must match in order to perform the "
- "extend in-register.");
- assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
- "The destination vector type must have fewer lanes than the input.");
- return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
-}
-
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
@@ -1718,7 +1703,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
// SDNode doesn't have access to it. This memory will be "leaked" when
// the node is deallocated, but recovered when the NodeAllocator is released.
int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
- std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc);
+ llvm::copy(MaskVec, MaskAlloc);
auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
dl.getDebugLoc(), MaskAlloc);
@@ -2135,6 +2120,15 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
break;
}
+ case ISD::SIGN_EXTEND_INREG:
+ EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT();
+ unsigned ExVTBits = ExVT.getScalarSizeInBits();
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if (Mask.getActiveBits() <= ExVTBits)
+ return V.getOperand(0);
+
+ break;
}
return SDValue();
}
@@ -2151,9 +2145,103 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
/// for bits that V cannot have.
bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
unsigned Depth) const {
- KnownBits Known;
- computeKnownBits(Op, Known, Depth);
- return Mask.isSubsetOf(Known.Zero);
+ return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero);
+}
+
+/// isSplatValue - Return true if the vector V has the same value
+/// across all DemandedElts.
+bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
+ APInt &UndefElts) {
+ if (!DemandedElts)
+ return false; // No demanded elts, better to assume we don't know anything.
+
+ EVT VT = V.getValueType();
+ assert(VT.isVector() && "Vector type expected");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
+ UndefElts = APInt::getNullValue(NumElts);
+
+ switch (V.getOpcode()) {
+ case ISD::BUILD_VECTOR: {
+ SDValue Scl;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = V.getOperand(i);
+ if (Op.isUndef()) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (!DemandedElts[i])
+ continue;
+ if (Scl && Scl != Op)
+ return false;
+ Scl = Op;
+ }
+ return true;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat.
+ // TODO: Do we need to handle shuffle(splat, undef, mask)?
+ int SplatIndex = -1;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (!DemandedElts[i])
+ continue;
+ if (0 <= SplatIndex && SplatIndex != M)
+ return false;
+ SplatIndex = M;
+ }
+ return true;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Src = V.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt UndefSrcElts;
+ APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) {
+ UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
+ return true;
+ }
+ }
+ break;
+ }
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND: {
+ APInt UndefLHS, UndefRHS;
+ SDValue LHS = V.getOperand(0);
+ SDValue RHS = V.getOperand(1);
+ if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
+ isSplatValue(RHS, DemandedElts, UndefRHS)) {
+ UndefElts = UndefLHS | UndefRHS;
+ return true;
+ }
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// Helper wrapper to main isSplatValue function.
+bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
+ EVT VT = V.getValueType();
+ assert(VT.isVector() && "Vector type expected");
+ unsigned NumElts = VT.getVectorNumElements();
+
+ APInt UndefElts;
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return isSplatValue(V, DemandedElts, UndefElts) &&
+ (AllowUndefs || !UndefElts);
}
/// Helper function that checks to see if a node is a constant or a
@@ -2195,60 +2283,59 @@ static const APInt *getValidShiftAmountConstant(SDValue V) {
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. For vectors, the known bits are those that are shared by
/// every vector element.
-void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
- unsigned Depth) const {
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
- computeKnownBits(Op, Known, DemandedElts, Depth);
+ return computeKnownBits(Op, DemandedElts, Depth);
}
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. The DemandedElts argument allows us to only collect the known
/// bits that are shared by the requested vector elements.
-void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
- const APInt &DemandedElts,
- unsigned Depth) const {
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth) const {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- Known = KnownBits(BitWidth); // Don't know anything.
+ KnownBits Known(BitWidth); // Don't know anything.
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
// We know all of the bits for a constant!
Known.One = C->getAPIntValue();
Known.Zero = ~Known.One;
- return;
+ return Known;
}
if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
// We know all of the bits for a constant fp!
Known.One = C->getValueAPF().bitcastToAPInt();
Known.Zero = ~Known.One;
- return;
+ return Known;
}
if (Depth == 6)
- return; // Limit search depth.
+ return Known; // Limit search depth.
KnownBits Known2;
unsigned NumElts = DemandedElts.getBitWidth();
+ assert((!Op.getValueType().isVector() ||
+ NumElts == Op.getValueType().getVectorNumElements()) &&
+ "Unexpected vector size");
if (!DemandedElts)
- return; // No demanded elts, better to assume we don't know anything.
+ return Known; // No demanded elts, better to assume we don't know anything.
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded vector element.
- assert(NumElts == Op.getValueType().getVectorNumElements() &&
- "Unexpected vector size");
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
if (!DemandedElts[i])
continue;
SDValue SrcOp = Op.getOperand(i);
- computeKnownBits(SrcOp, Known2, Depth + 1);
+ Known2 = computeKnownBits(SrcOp, Depth + 1);
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
if (SrcOp.getValueSizeInBits() != BitWidth) {
@@ -2295,7 +2382,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// Known bits are the values that are shared by every demanded element.
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
- computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1);
+ Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -2304,7 +2391,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
- computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1);
+ Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -2321,7 +2408,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
DemandedSub = DemandedSub.trunc(NumSubVectorElts);
if (!!DemandedSub) {
SDValue Sub = Op.getOperand(i);
- computeKnownBits(Sub, Known2, DemandedSub, Depth + 1);
+ Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -2344,22 +2431,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
uint64_t Idx = SubIdx->getZExtValue();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
if (!!DemandedSubElts) {
- computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1);
+ Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1);
if (Known.isUnknown())
break; // early-out.
}
APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
APInt DemandedSrcElts = DemandedElts & ~SubMask;
if (!!DemandedSrcElts) {
- computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1);
+ Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
} else {
- computeKnownBits(Sub, Known, Depth + 1);
+ Known = computeKnownBits(Sub, Depth + 1);
if (Known.isUnknown())
break; // early-out.
- computeKnownBits(Src, Known2, Depth + 1);
+ Known2 = computeKnownBits(Src, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -2374,13 +2461,26 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
- computeKnownBits(Src, Known, DemandedSrc, Depth + 1);
+ APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
} else {
- computeKnownBits(Src, Known, Depth + 1);
+ Known = computeKnownBits(Src, Depth + 1);
}
break;
}
+ case ISD::SCALAR_TO_VECTOR: {
+ // We know about scalar_to_vector as much as we know about it source,
+ // which becomes the first element of otherwise unknown vector.
+ if (DemandedElts != 1)
+ break;
+
+ SDValue N0 = Op.getOperand(0);
+ Known = computeKnownBits(N0, Depth + 1);
+ if (N0.getValueSizeInBits() != BitWidth)
+ Known = Known.trunc(BitWidth);
+
+ break;
+ }
case ISD::BITCAST: {
SDValue N0 = Op.getOperand(0);
EVT SubVT = N0.getValueType();
@@ -2392,7 +2492,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// Fast handling of 'identity' bitcasts.
if (BitWidth == SubBitWidth) {
- computeKnownBits(N0, Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(N0, DemandedElts, Depth + 1);
break;
}
@@ -2413,7 +2513,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
SubDemandedElts.setBit(i * SubScale);
for (unsigned i = 0; i != SubScale; ++i) {
- computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
+ Known2 = computeKnownBits(N0, SubDemandedElts.shl(i),
Depth + 1);
unsigned Shifts = IsLE ? i : SubScale - 1 - i;
Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts);
@@ -2434,7 +2534,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
if (DemandedElts[i])
SubDemandedElts.setBit(i / SubScale);
- computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1);
+ Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0; i != NumElts; ++i)
@@ -2452,8 +2552,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// Output known-1 bits are only known if set in both the LHS & RHS.
Known.One &= Known2.One;
@@ -2461,8 +2561,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero |= Known2.Zero;
break;
case ISD::OR:
- computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// Output known-0 bits are only known if clear in both the LHS & RHS.
Known.Zero &= Known2.Zero;
@@ -2470,8 +2570,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.One |= Known2.One;
break;
case ISD::XOR: {
- computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
@@ -2481,8 +2581,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::MUL: {
- computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If low bits are zero in either operand, output low known-0 bits.
// Also compute a conservative estimate for high known-0 bits.
@@ -2503,10 +2603,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned LeadZ = Known2.countMinLeadingZeros();
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
if (RHSMaxLeadingZeros != BitWidth)
LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
@@ -2516,22 +2616,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::SELECT:
case ISD::VSELECT:
- computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1);
+ Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
- computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1);
+ Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
- computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1);
+ Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
@@ -2560,7 +2660,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
case ISD::SHL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
Known.Zero <<= Shift;
Known.One <<= Shift;
@@ -2570,7 +2670,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
case ISD::SRL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
Known.Zero.lshrInPlace(Shift);
Known.One.lshrInPlace(Shift);
@@ -2599,13 +2699,46 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
case ISD::SRA:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
// Sign extend known zero/one bit (else is unknown).
Known.Zero.ashrInPlace(Shift);
Known.One.ashrInPlace(Shift);
}
break;
+ case ISD::FSHL:
+ case ISD::FSHR:
+ if (ConstantSDNode *C =
+ isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) {
+ unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+ // For fshl, 0-shift returns the 1st arg.
+ // For fshr, 0-shift returns the 2nd arg.
+ if (Amt == 0) {
+ Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1),
+ DemandedElts, Depth + 1);
+ break;
+ }
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Opcode == ISD::FSHL) {
+ Known.One <<= Amt;
+ Known.Zero <<= Amt;
+ Known2.One.lshrInPlace(BitWidth - Amt);
+ Known2.Zero.lshrInPlace(BitWidth - Amt);
+ } else {
+ Known.One <<= BitWidth - Amt;
+ Known.Zero <<= BitWidth - Amt;
+ Known2.One.lshrInPlace(Amt);
+ Known2.Zero.lshrInPlace(Amt);
+ }
+ Known.One |= Known2.One;
+ Known.Zero |= Known2.Zero;
+ }
+ break;
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned EBits = EVT.getScalarSizeInBits();
@@ -2623,7 +2756,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
if (NewBits.getBoolValue())
InputDemandedBits |= InSignMask;
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.One &= InputDemandedBits;
Known.Zero &= InputDemandedBits;
@@ -2643,7 +2776,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleTZ = Known2.countMaxTrailingZeros();
unsigned LowBits = Log2_32(PossibleTZ) + 1;
@@ -2652,7 +2785,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleLZ = Known2.countMaxLeadingZeros();
unsigned LowBits = Log2_32(PossibleLZ) + 1;
@@ -2660,7 +2793,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::CTPOP: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we know some of the bits are zero, they can't be one.
unsigned PossibleOnes = Known2.countMaxPopulation();
Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
@@ -2681,41 +2814,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
- computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1);
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
break;
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
break;
}
- // TODO ISD::SIGN_EXTEND_VECTOR_INREG
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ Known = Known.sext(BitWidth);
+ break;
+ }
case ISD::SIGN_EXTEND: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the sign bit is known to be zero or one, then sext will extend
// it to the top bits, else it will just zext.
Known = Known.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
- computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
Known = Known.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known = Known.trunc(BitWidth);
break;
}
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
- computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
Known.Zero |= (~InMask);
Known.One &= (~Known.Zero);
break;
@@ -2745,7 +2886,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts,
Depth + 1);
// If all of the MaskV bits are known to be zero, then we know the
@@ -2762,12 +2903,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// If low bits are know to be zero in both operands, then we know they are
// going to be 0 in the result. Both addition and complement operations
// preserve the low zero bits.
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned KnownZeroLow = Known2.countMinTrailingZeros();
if (KnownZeroLow == 0)
break;
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
Known.Zero.setLowBits(KnownZeroLow);
break;
@@ -2794,12 +2935,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// known to be clear. For example, if one input has the top 10 bits clear
// and the other has the top 8 bits clear, we know the top 7 bits of the
// output must be clear.
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
unsigned KnownZeroLow = Known2.countMinTrailingZeros();
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
- Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
@@ -2823,7 +2963,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
const APInt &RA = Rem->getAPIntValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// The low bits of the first operand are unchanged by the srem.
Known.Zero = Known2.Zero & LowBits;
@@ -2847,7 +2987,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// The upper bits are all zero, the lower ones are unchanged.
Known.Zero = Known2.Zero | ~LowBits;
@@ -2858,8 +2998,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
uint32_t Leaders =
std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
@@ -2868,7 +3008,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::EXTRACT_ELEMENT: {
- computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
const unsigned Index = Op.getConstantOperandVal(1);
const unsigned BitWidth = Op.getValueSizeInBits();
@@ -2896,10 +3036,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// If we know the element index, just demand that vector element.
unsigned Idx = ConstEltNo->getZExtValue();
APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
- computeKnownBits(InVec, Known, DemandedElt, Depth + 1);
+ Known = computeKnownBits(InVec, DemandedElt, Depth + 1);
} else {
// Unknown element index, so ignore DemandedElts and demand them all.
- computeKnownBits(InVec, Known, Depth + 1);
+ Known = computeKnownBits(InVec, Depth + 1);
}
if (BitWidth > EltBitWidth)
Known = Known.zext(BitWidth);
@@ -2919,7 +3059,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// If we demand the inserted element then add its common known bits.
if (DemandedElts[EltIdx]) {
- computeKnownBits(InVal, Known2, Depth + 1);
+ Known2 = computeKnownBits(InVal, Depth + 1);
Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
}
@@ -2928,33 +3068,33 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// that we don't demand the inserted element.
APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
if (!!VectorElts) {
- computeKnownBits(InVec, Known2, VectorElts, Depth + 1);
+ Known2 = computeKnownBits(InVec, VectorElts, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
} else {
// Unknown element index, so ignore DemandedElts and demand them all.
- computeKnownBits(InVec, Known, Depth + 1);
- computeKnownBits(InVal, Known2, Depth + 1);
+ Known = computeKnownBits(InVec, Depth + 1);
+ Known2 = computeKnownBits(InVal, Depth + 1);
Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
}
break;
}
case ISD::BITREVERSE: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.Zero = Known2.Zero.reverseBits();
Known.One = Known2.One.reverseBits();
break;
}
case ISD::BSWAP: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.Zero = Known2.Zero.byteSwap();
Known.One = Known2.One.byteSwap();
break;
}
case ISD::ABS: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the source's MSB is zero then we know the rest of the bits already.
if (Known2.isNonNegative()) {
@@ -2973,8 +3113,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::UMIN: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// UMIN - we know that the result will have the maximum of the
// known zero leading bits of the inputs.
@@ -2987,9 +3127,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::UMAX: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts,
- Depth + 1);
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// UMAX - we know that the result will have the maximum of the
// known one leading bits of the inputs.
@@ -3033,9 +3172,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
// Fallback - just get the shared known bits of the operands.
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Known.isUnknown()) break; // Early-out
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
break;
@@ -3058,6 +3197,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ return Known;
}
SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
@@ -3066,11 +3206,9 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
if (isNullConstant(N1))
return OFK_Never;
- KnownBits N1Known;
- computeKnownBits(N1, N1Known);
+ KnownBits N1Known = computeKnownBits(N1);
if (N1Known.Zero.getBoolValue()) {
- KnownBits N0Known;
- computeKnownBits(N0, N0Known);
+ KnownBits N0Known = computeKnownBits(N0);
bool overflow;
(void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
@@ -3084,8 +3222,7 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
return OFK_Never;
if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
- KnownBits N0Known;
- computeKnownBits(N0, N0Known);
+ KnownBits N0Known = computeKnownBits(N0);
if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
return OFK_Never;
@@ -3131,8 +3268,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// to handle some common cases.
// Fall back to computeKnownBits to catch other known cases.
- KnownBits Known;
- computeKnownBits(Val, Known);
+ KnownBits Known = computeKnownBits(Val);
return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
}
@@ -3240,14 +3376,35 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (VTBits == SrcBits)
return ComputeNumSignBits(N0, DemandedElts, Depth + 1);
+ bool IsLE = getDataLayout().isLittleEndian();
+
// Bitcast 'large element' scalar/vector to 'small element' vector.
- // TODO: Handle cases other than 'sign splat' when we have a use case.
- // Requires handling of DemandedElts and Endianness.
if ((SrcBits % VTBits) == 0) {
- assert(Op.getValueType().isVector() && "Expected bitcast to vector");
- Tmp = ComputeNumSignBits(N0, Depth + 1);
+ assert(VT.isVector() && "Expected bitcast to vector");
+
+ unsigned Scale = SrcBits / VTBits;
+ APInt SrcDemandedElts(NumElts / Scale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBit(i / Scale);
+
+ // Fast case - sign splat can be simply split across the small elements.
+ Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
if (Tmp == SrcBits)
return VTBits;
+
+ // Slow case - determine how far the sign extends into each sub-element.
+ Tmp2 = VTBits;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned SubOffset = i % Scale;
+ SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset);
+ SubOffset = SubOffset * VTBits;
+ if (Tmp <= SubOffset)
+ return 1;
+ Tmp2 = std::min(Tmp2, Tmp - SubOffset);
+ }
+ return Tmp2;
}
break;
}
@@ -3264,7 +3421,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SIGN_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
- APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements());
Tmp = VTBits - SrcVT.getScalarSizeInBits();
return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
}
@@ -3361,7 +3518,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If setcc returns 0/-1, all bits are sign bits.
// We know that we have an integer-based boolean since these operations
// are only available for integer.
- if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ if (TLI->getBooleanContents(VT.isVector(), false) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
@@ -3396,8 +3553,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Special case decrementing a value (ADD X, -1):
if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
if (CRHS->isAllOnesValue()) {
- KnownBits Known;
- computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -3421,8 +3577,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Handle NEG.
if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
if (CLHS->isNullValue()) {
- KnownBits Known;
- computeKnownBits(Op.getOperand(1), Known, Depth+1);
+ KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((Known.Zero | 1).isAllOnesValue())
@@ -3533,12 +3688,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
+ APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
}
return ComputeNumSignBits(Src, Depth + 1);
}
- case ISD::CONCAT_VECTORS:
+ case ISD::CONCAT_VECTORS: {
// Determine the minimum number of sign bits across all demanded
// elts of the input vectors. Early out if the result is already 1.
Tmp = std::numeric_limits<unsigned>::max();
@@ -3556,6 +3711,40 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
+ case ISD::INSERT_SUBVECTOR: {
+ // If we know the element index, demand any elements from the subvector and
+ // the remainder from the src its inserted into, otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
+ Tmp = std::numeric_limits<unsigned>::max();
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ if (!!DemandedSubElts) {
+ Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
+ if (Tmp == 1) return 1; // early-out
+ }
+ APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
+ APInt DemandedSrcElts = DemandedElts & ~SubMask;
+ if (!!DemandedSrcElts) {
+ Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+
+ // Not able to determine the index so just assume worst case.
+ Tmp = ComputeNumSignBits(Sub, Depth + 1);
+ if (Tmp == 1) return 1; // early-out
+ Tmp2 = ComputeNumSignBits(Src, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+ }
// If we are looking at the loaded value of the SDNode.
if (Op.getResNo() == 0) {
@@ -3587,8 +3776,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
- KnownBits Known;
- computeKnownBits(Op, Known, DemandedElts, Depth);
+ KnownBits Known = computeKnownBits(Op, DemandedElts, Depth);
APInt Mask;
if (Known.isNonNegative()) { // sign bit is 0
@@ -3622,21 +3810,121 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
return true;
}
-bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
// If we're told that NaNs won't happen, assume they won't.
- if (getTarget().Options.NoNaNsFPMath)
+ if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
- if (Op->getFlags().hasNoNaNs())
- return true;
+ if (Depth == 6)
+ return false; // Limit search depth.
+ // TODO: Handle vectors.
// If the value is a constant, we can obviously see if it is a NaN or not.
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
- return !C->getValueAPF().isNaN();
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
+ return !C->getValueAPF().isNaN() ||
+ (SNaN && !C->getValueAPF().isSignaling());
+ }
- // TODO: Recognize more cases here.
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ if (SNaN)
+ return true;
+ // TODO: Need isKnownNeverInfinity
+ return false;
+ }
+ case ISD::FCANONICALIZE:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FROUND:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::FABS:
+ case ISD::FNEG:
+ case ISD::FCOPYSIGN: {
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::SELECT:
+ return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return true;
+ case ISD::FMA:
+ case ISD::FMAD: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ }
+ case ISD::FSQRT: // Need is known positive
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FPOWI:
+ case ISD::FPOW: {
+ if (SNaN)
+ return true;
+ // TODO: Refine on operand
+ return false;
+ }
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Only one needs to be known not-nan, since it will be returned if the
+ // other ends up being one.
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE: {
+ if (SNaN)
+ return true;
+ // This can return a NaN if either operand is an sNaN, or if both operands
+ // are NaN.
+ return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
+ (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+ }
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ // TODO: Does this quiet or return the origina NaN as-is?
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ default:
+ if (Opcode >= ISD::BUILTIN_OP_END ||
+ Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::INTRINSIC_VOID) {
+ return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
+ }
- return false;
+ return false;
+ }
}
bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
@@ -3690,10 +3978,39 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
- KnownBits AKnown, BKnown;
- computeKnownBits(A, AKnown);
- computeKnownBits(B, BKnown);
- return (AKnown.Zero | BKnown.Zero).isAllOnesValue();
+ return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
+}
+
+static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
+ SelectionDAG &DAG) {
+ int NumOps = Ops.size();
+ assert(NumOps != 0 && "Can't build an empty vector!");
+ assert(VT.getVectorNumElements() == (unsigned)NumOps &&
+ "Incorrect element count in BUILD_VECTOR!");
+
+ // BUILD_VECTOR of UNDEFs is UNDEF.
+ if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+ return DAG.getUNDEF(VT);
+
+ // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity.
+ SDValue IdentitySrc;
+ bool IsIdentity = true;
+ for (int i = 0; i != NumOps; ++i) {
+ if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Ops[i].getOperand(0).getValueType() != VT ||
+ (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) ||
+ !isa<ConstantSDNode>(Ops[i].getOperand(1)) ||
+ cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) {
+ IsIdentity = false;
+ break;
+ }
+ IdentitySrc = Ops[i].getOperand(0);
+ }
+ if (IsIdentity)
+ return IdentitySrc;
+
+ return SDValue();
}
static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
@@ -3779,9 +4096,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SIGN_EXTEND:
return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
+ case ISD::TRUNCATE:
+ if (C->isOpaque())
+ break;
+ LLVM_FALLTHROUGH;
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::TRUNCATE:
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
case ISD::UINT_TO_FP:
@@ -3947,6 +4267,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::MERGE_VALUES:
case ISD::CONCAT_VECTORS:
return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {Operand};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
case ISD::FP_EXTEND:
assert(VT.isFloatingPoint() &&
@@ -4045,6 +4372,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(Operand.getValueType().bitsLE(VT) &&
+ "The input must be the same size or smaller than the result.");
+ assert(VT.getVectorNumElements() <
+ Operand.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ break;
case ISD::ABS:
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid ABS!");
@@ -4151,6 +4488,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
+ case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true);
+ case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true);
+ case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true);
+ case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
@@ -4258,14 +4599,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
return FoldSymbolOffset(Opcode, VT, GA, Cst1);
- // For vectors extract each constant element into Inputs so we can constant
- // fold them individually.
- BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
- BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
- if (!BV1 || !BV2)
+ // For vectors, extract each constant element and fold them individually.
+ // Either input may be an undef value.
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+ if (!BV1 && !Cst1->isUndef())
+ return SDValue();
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+ if (!BV2 && !Cst2->isUndef())
+ return SDValue();
+ // If both operands are undef, that's handled the same way as scalars.
+ if (!BV1 && !BV2)
return SDValue();
- assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+ assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) &&
+ "Vector binop with different number of elements in operands?");
EVT SVT = VT.getScalarType();
EVT LegalSVT = SVT;
@@ -4275,15 +4622,15 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
return SDValue();
}
SmallVector<SDValue, 4> Outputs;
- for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
- SDValue V1 = BV1->getOperand(I);
- SDValue V2 = BV2->getOperand(I);
-
+ unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands();
+ for (unsigned I = 0; I != NumOps; ++I) {
+ SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT);
+ SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT);
if (SVT.isInteger()) {
- if (V1->getValueType(0).bitsGT(SVT))
- V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
- if (V2->getValueType(0).bitsGT(SVT))
- V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+ if (V1->getValueType(0).bitsGT(SVT))
+ V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
+ if (V2->getValueType(0).bitsGT(SVT))
+ V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
}
if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
@@ -4436,6 +4783,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N2.getOpcode() == ISD::EntryToken) return N1;
if (N1 == N2) return N1;
break;
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {N1, N2};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
case ISD::CONCAT_VECTORS: {
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2};
@@ -4477,6 +4831,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
@@ -4499,6 +4857,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
+ if (SDValue V = simplifyShift(N1, N2))
+ return V;
+ LLVM_FALLTHROUGH;
case ISD::ROTL:
case ISD::ROTR:
assert(VT == N1.getValueType() &&
@@ -4507,7 +4868,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Shifts only work on integers");
assert((!VT.isVector() || VT == N2.getValueType()) &&
"Vector shift amounts must be in the same as their first arg");
- // Verify that the shift amount VT is bit enough to hold valid shift
+ // Verify that the shift amount VT is big enough to hold valid shift
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
// TLI.getShiftAmount().
@@ -4555,8 +4916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(!EVT.isVector() &&
"AssertSExt/AssertZExt type should be the vector element type "
"rather than the vector type!");
- assert(EVT.bitsLE(VT) && "Not extending!");
- if (VT == EVT) return N1; // noop assertion.
+ assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!");
+ if (VT.getScalarType() == EVT) return N1; // noop assertion.
break;
}
case ISD::SIGN_EXTEND_INREG: {
@@ -4793,14 +5154,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
}
- // Any FP binop with an undef operand is folded to NaN. This matches the
- // behavior of the IR optimizer.
switch (Opcode) {
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
+ // If both operands are undef, the result is undef. If 1 operand is undef,
+ // the result is NaN. This should match the behavior of the IR optimizer.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
if (N1.isUndef() || N2.isUndef())
return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
}
@@ -4819,9 +5182,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::SHL:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
}
}
@@ -4837,21 +5199,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getConstant(0, DL, VT);
LLVM_FALLTHROUGH;
case ISD::ADD:
- case ISD::ADDC:
- case ISD::ADDE:
case ISD::SUB:
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::SHL:
return getUNDEF(VT); // fold op(arg1, undef) -> undef
case ISD::MUL:
case ISD::AND:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
case ISD::OR:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
return getAllOnesConstant(DL, VT);
}
}
@@ -4907,6 +5268,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
break;
}
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
case ISD::CONCAT_VECTORS: {
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2, N3};
@@ -4915,6 +5283,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::SETCC: {
+ assert(VT.isInteger() && "SETCC result type must be an integer!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ "SETCC operands must have the same type!");
+ assert(VT.isVector() == N1.getValueType().isVector() &&
+ "SETCC type should be vector iff the operand type is vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) &&
+ "SETCC vector element counts must match!");
// Use FoldSetCC to simplify SETCC's.
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
return V;
@@ -4927,13 +5303,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::SELECT:
- if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
- if (N1C->getZExtValue())
- return N2; // select true, X, Y -> X
- return N3; // select false, X, Y -> Y
- }
-
- if (N2 == N3) return N2; // select C, X, X -> X
+ case ISD::VSELECT:
+ if (SDValue V = simplifySelect(N1, N2, N3))
+ return V;
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
@@ -5048,8 +5420,11 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
assert(C->getAPIntValue().getBitWidth() == 8);
APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
- if (VT.isInteger())
- return DAG.getConstant(Val, dl, VT);
+ if (VT.isInteger()) {
+ bool IsOpaque = VT.getSizeInBits() > 64 ||
+ !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue());
+ return DAG.getConstant(Val, dl, VT, false, IsOpaque);
+ }
return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
VT);
}
@@ -5229,12 +5604,10 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
- // FIXME: Only does this for 64-bit or more since we don't have proper
- // cost model for unaligned load / store.
bool Fast;
- if (NumMemOps && AllowOverlap &&
- VTSize >= 8 && NewVTSize < Size &&
- TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast)
+ if (NumMemOps && AllowOverlap && NewVTSize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
+ Fast)
VTSize = Size;
else {
VT = NewVT;
@@ -6495,11 +6868,11 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
}
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
- SDValue Ptr, SDValue Mask, SDValue Src0,
+ SDValue Ptr, SDValue Mask, SDValue PassThru,
EVT MemVT, MachineMemOperand *MMO,
ISD::LoadExtType ExtTy, bool isExpanding) {
SDVTList VTs = getVTList(VT, MVT::Other);
- SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
+ SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(VT.getRawBits());
@@ -6530,7 +6903,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
"Invalid chain type");
EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);
- SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ SDValue Ops[] = { Chain, Val, Ptr, Mask };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());
@@ -6574,12 +6947,12 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
VTs, VT, MMO);
createOperands(N, Ops);
- assert(N->getValue().getValueType() == N->getValueType(0) &&
+ assert(N->getPassThru().getValueType() == N->getValueType(0) &&
"Incompatible type of the PassThru value in MaskedGatherSDNode");
assert(N->getMask().getValueType().getVectorNumElements() ==
N->getValueType(0).getVectorNumElements() &&
"Vector width mismatch between mask and data");
- assert(N->getIndex().getValueType().getVectorNumElements() ==
+ assert(N->getIndex().getValueType().getVectorNumElements() >=
N->getValueType(0).getVectorNumElements() &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
@@ -6616,7 +6989,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
assert(N->getMask().getValueType().getVectorNumElements() ==
N->getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between mask and data");
- assert(N->getIndex().getValueType().getVectorNumElements() ==
+ assert(N->getIndex().getValueType().getVectorNumElements() >=
N->getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
@@ -6630,6 +7003,60 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
+ // select undef, T, F --> T (if T is a constant), otherwise F
+ // select, ?, undef, F --> F
+ // select, ?, T, undef --> T
+ if (Cond.isUndef())
+ return isConstantValueOfAnyType(T) ? T : F;
+ if (T.isUndef())
+ return F;
+ if (F.isUndef())
+ return T;
+
+ // select true, T, F --> T
+ // select false, T, F --> F
+ if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
+ return CondC->isNullValue() ? F : T;
+
+ // TODO: This should simplify VSELECT with constant condition using something
+ // like this (but check boolean contents to be complete?):
+ // if (ISD::isBuildVectorAllOnes(Cond.getNode()))
+ // return T;
+ // if (ISD::isBuildVectorAllZeros(Cond.getNode()))
+ // return F;
+
+ // select ?, T, T --> T
+ if (T == F)
+ return T;
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
+ // shift undef, Y --> 0 (can always assume that the undef value is 0)
+ if (X.isUndef())
+ return getConstant(0, SDLoc(X.getNode()), X.getValueType());
+ // shift X, undef --> undef (because it may shift by the bitwidth)
+ if (Y.isUndef())
+ return getUNDEF(X.getValueType());
+
+ // shift 0, Y --> 0
+ // shift X, 0 --> X
+ if (isNullOrNullSplat(X) || isNullOrNullSplat(Y))
+ return X;
+
+ // shift X, C >= bitwidth(X) --> undef
+ // All vector elements must be too big (or undef) to avoid partial undefs.
+ auto isShiftTooBig = [X](ConstantSDNode *Val) {
+ return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits());
+ };
+ if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true))
+ return getUNDEF(X.getValueType());
+
+ return SDValue();
+}
+
SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue SV, unsigned Align) {
SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
@@ -6659,12 +7086,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case 0: return getNode(Opcode, DL, VT);
case 1: return getNode(Opcode, DL, VT, Ops[0], Flags);
case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
- case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags);
default: break;
}
switch (Opcode) {
default: break;
+ case ISD::BUILD_VECTOR:
+ // Attempt to simplify BUILD_VECTOR.
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
case ISD::CONCAT_VECTORS:
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
@@ -6880,7 +7312,7 @@ SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) {
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(NumVTs);
- std::copy(VTs.begin(), VTs.end(), Array);
+ llvm::copy(VTs, Array);
Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
VTListMap.InsertNode(Result, IP);
}
@@ -7010,6 +7442,27 @@ void SDNode::DropOperands() {
}
}
+void SelectionDAG::setNodeMemRefs(MachineSDNode *N,
+ ArrayRef<MachineMemOperand *> NewMemRefs) {
+ if (NewMemRefs.empty()) {
+ N->clearMemRefs();
+ return;
+ }
+
+ // Check if we can avoid allocating by storing a single reference directly.
+ if (NewMemRefs.size() == 1) {
+ N->MemRefs = NewMemRefs[0];
+ N->NumMemRefs = 1;
+ return;
+ }
+
+ MachineMemOperand **MemRefsBuffer =
+ Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size());
+ llvm::copy(NewMemRefs, MemRefsBuffer);
+ N->MemRefs = MemRefsBuffer;
+ N->NumMemRefs = static_cast<int>(NewMemRefs.size());
+}
+
/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
/// machine opcode.
///
@@ -7152,7 +7605,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
// For MachineNode, initialize the memory references information.
if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N))
- MN->setMemRefs(nullptr, nullptr);
+ MN->clearMemRefs();
// Swap for an appropriately sized array from the recycler.
removeOperands(N);
@@ -7202,6 +7655,12 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
NewOpc = ISD::FNEARBYINT;
IsUnary = true;
break;
+ case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
+ case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
+ case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break;
+ case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
+ case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
+ case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
}
// We're taking this node out of the chain, so we need to re-link things.
@@ -7488,8 +7947,11 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
Dbg->getDebugLoc(), Dbg->getOrder());
ClonedDVs.push_back(Clone);
- if (InvalidateDbg)
+ if (InvalidateDbg) {
+ // Invalidate value and indicate the SDDbgValue should not be emitted.
Dbg->setIsInvalidated();
+ Dbg->setIsEmitted();
+ }
}
for (SDDbgValue *Dbg : ClonedDVs)
@@ -7526,6 +7988,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
+ DV->setIsEmitted();
LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting";
N0.getNode()->dumprFull(this);
dbgs() << " into " << *DIExpr << '\n');
@@ -7688,7 +8151,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
// Preserve Debug Info.
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
- transferDbgValues(SDValue(From, i), *To);
+ transferDbgValues(SDValue(From, i), To[i]);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -7700,18 +8163,22 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(User);
- // A user can appear in a use list multiple times, and when this
- // happens the uses are usually next to each other in the list.
- // To help reduce the number of CSE recomputations, process all
- // the uses of this user that we can find this way.
+ // A user can appear in a use list multiple times, and when this happens the
+ // uses are usually next to each other in the list. To help reduce the
+ // number of CSE and divergence recomputations, process all the uses of this
+ // user that we can find this way.
+ bool To_IsDivergent = false;
do {
SDUse &Use = UI.getUse();
const SDValue &ToOp = To[Use.getResNo()];
++UI;
Use.set(ToOp);
- if (To->getNode()->isDivergent() != From->isDivergent())
- updateDivergence(User);
+ To_IsDivergent |= ToOp->isDivergent();
} while (UI != UE && *UI == User);
+
+ if (To_IsDivergent != From->isDivergent())
+ updateDivergence(User);
+
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7842,6 +8309,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
}
}
+#ifndef NDEBUG
void SelectionDAG::VerifyDAGDiverence()
{
std::vector<SDNode*> TopoOrder;
@@ -7868,6 +8336,7 @@ void SelectionDAG::VerifyDAGDiverence()
"Divergence bit inconsistency detected\n");
}
}
+#endif
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
@@ -7901,7 +8370,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
}
// Sort the uses, so that all the uses from a given User are together.
- llvm::sort(Uses.begin(), Uses.end());
+ llvm::sort(Uses);
for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
UseIndex != UseIndexEnd; ) {
@@ -8053,6 +8522,32 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
return TokenFactor;
}
+SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
+ Function **OutFunction) {
+ assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol");
+
+ auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ auto *Module = MF->getFunction().getParent();
+ auto *Function = Module->getFunction(Symbol);
+
+ if (OutFunction != nullptr)
+ *OutFunction = Function;
+
+ if (Function != nullptr) {
+ auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace());
+ return getGlobalAddress(Function, SDLoc(Op), PtrTy);
+ }
+
+ std::string ErrorStr;
+ raw_string_ostream ErrorFormatter(ErrorStr);
+
+ ErrorFormatter << "Undefined external symbol ";
+ ErrorFormatter << '"' << Symbol << '"';
+ ErrorFormatter.flush();
+
+ report_fatal_error(ErrorStr);
+}
+
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
@@ -8077,11 +8572,26 @@ bool llvm::isOneConstant(SDValue V) {
return Const != nullptr && Const->isOne();
}
+SDValue llvm::peekThroughBitcasts(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ return V;
+}
+
+SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse())
+ V = V.getOperand(0);
+ return V;
+}
+
bool llvm::isBitwiseNot(SDValue V) {
- return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
+ if (V.getOpcode() != ISD::XOR)
+ return false;
+ ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1)));
+ return C && C->isAllOnesValue();
}
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
@@ -8090,9 +8600,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
- // FIXME: We blindly ignore splats which include undef which is overly
- // pessimistic.
- if (CN && UndefElements.none() &&
+ if (CN && (UndefElements.none() || AllowUndefs) &&
CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
@@ -8100,21 +8608,40 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
return nullptr;
}
-ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
-
- if (CN && UndefElements.none())
+ if (CN && (UndefElements.none() || AllowUndefs))
return CN;
}
return nullptr;
}
+bool llvm::isNullOrNullSplat(SDValue N) {
+ // TODO: may want to use peekThroughBitcast() here.
+ ConstantSDNode *C = isConstOrConstSplat(N);
+ return C && C->isNullValue();
+}
+
+bool llvm::isOneOrOneSplat(SDValue N) {
+ // TODO: may want to use peekThroughBitcast() here.
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ ConstantSDNode *C = isConstOrConstSplat(N);
+ return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth;
+}
+
+bool llvm::isAllOnesOrAllOnesSplat(SDValue N) {
+ N = peekThroughBitcasts(N);
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ ConstantSDNode *C = isConstOrConstSplat(N);
+ return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
+}
+
HandleSDNode::~HandleSDNode() {
DropOperands();
}
@@ -8318,6 +8845,64 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
this->Flags.intersectWith(Flags);
}
+SDValue
+SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
+ ArrayRef<ISD::NodeType> CandidateBinOps) {
+ // The pattern must end in an extract from index 0.
+ if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isNullConstant(Extract->getOperand(1)))
+ return SDValue();
+
+ SDValue Op = Extract->getOperand(0);
+ unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
+
+ // Match against one of the candidate binary ops.
+ if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
+ return Op.getOpcode() == unsigned(BinOp);
+ }))
+ return SDValue();
+
+ // At each stage, we're looking for something that looks like:
+ // %s = shufflevector <8 x i32> %op, <8 x i32> undef,
+ // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
+ // i32 undef, i32 undef, i32 undef, i32 undef>
+ // %a = binop <8 x i32> %op, %s
+ // Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
+ // we expect something like:
+ // <4,5,6,7,u,u,u,u>
+ // <2,3,u,u,u,u,u,u>
+ // <1,u,u,u,u,u,u,u>
+ unsigned CandidateBinOp = Op.getOpcode();
+ for (unsigned i = 0; i < Stages; ++i) {
+ if (Op.getOpcode() != CandidateBinOp)
+ return SDValue();
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0);
+ if (Shuffle) {
+ Op = Op1;
+ } else {
+ Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1);
+ Op = Op0;
+ }
+
+ // The first operand of the shuffle should be the same as the other operand
+ // of the binop.
+ if (!Shuffle || Shuffle->getOperand(0) != Op)
+ return SDValue();
+
+ // Verify the shuffle has the expected (at this stage of the pyramid) mask.
+ for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
+ if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
+ return SDValue();
+ }
+
+ BinOp = (ISD::NodeType)CandidateBinOp;
+ return Op;
+}
+
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -8681,8 +9266,11 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
assert(!Node->OperandList && "Node already has operands");
+ assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >=
+ Vals.size() &&
+ "too many operands to fit into SDNode");
SDUse *Ops = OperandRecycler.allocate(
- ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+ ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
bool IsDivergent = false;
for (unsigned I = 0; I != Vals.size(); ++I) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index c859f16e74fe..488bac1a9a80 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -19,8 +19,9 @@
using namespace llvm;
-bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
- const SelectionDAG &DAG, int64_t &Off) {
+bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
+ const SelectionDAG &DAG,
+ int64_t &Off) const {
// Conservatively fail if we a match failed..
if (!Base.getNode() || !Other.Base.getNode())
return false;
@@ -75,7 +76,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
}
/// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
+BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N,
const SelectionDAG &DAG) {
SDValue Ptr = N->getBasePtr();
@@ -106,14 +107,14 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1)))
if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) {
Offset += C->getSExtValue();
- Base = Base->getOperand(0);
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
continue;
}
break;
case ISD::ADD:
if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
Offset += C->getSExtValue();
- Base = Base->getOperand(0);
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
continue;
}
break;
@@ -129,7 +130,7 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
Offset -= Off;
else
Offset += Off;
- Base = LSBase->getBasePtr();
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(LSBase->getBasePtr());
continue;
}
break;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5f6b6010cae2..871ab9b29881 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -88,6 +88,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -121,6 +122,7 @@
#include <vector>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "isel"
@@ -614,6 +616,32 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
std::reverse(Parts, Parts + OrigNumParts);
}
+static SDValue widenVectorToPartType(SelectionDAG &DAG,
+ SDValue Val, const SDLoc &DL, EVT PartVT) {
+ if (!PartVT.isVector())
+ return SDValue();
+
+ EVT ValueVT = Val.getValueType();
+ unsigned PartNumElts = PartVT.getVectorNumElements();
+ unsigned ValueNumElts = ValueVT.getVectorNumElements();
+ if (PartNumElts > ValueNumElts &&
+ PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ DAG.ExtractVectorElements(Val, Ops);
+ SDValue EltUndef = DAG.getUNDEF(ElementVT);
+ for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i)
+ Ops.push_back(EltUndef);
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+ return DAG.getBuildVector(PartVT, DL, Ops);
+ }
+
+ return SDValue();
+}
+
/// getCopyToPartsVector - Create a series of nodes that contain the specified
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
@@ -632,28 +660,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
- } else if (PartVT.isVector() &&
- PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
- PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
- EVT ElementVT = PartVT.getVectorElementType();
- // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
- // undef elements.
- SmallVector<SDValue, 16> Ops;
- for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
- Ops.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
-
- for (unsigned i = ValueVT.getVectorNumElements(),
- e = PartVT.getVectorNumElements(); i != e; ++i)
- Ops.push_back(DAG.getUNDEF(ElementVT));
-
- Val = DAG.getBuildVector(PartVT, DL, Ops);
-
- // FIXME: Use CONCAT for 2x -> 4x.
-
- //SDValue UndefElts = DAG.getUNDEF(VectorTy);
- //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
+ Val = Widened;
} else if (PartVT.isVector() &&
PartEVT.getVectorElementType().bitsGE(
ValueVT.getVectorElementType()) &&
@@ -695,33 +703,38 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
}
- unsigned NumElements = ValueVT.getVectorNumElements();
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ unsigned IntermediateNumElts = IntermediateVT.isVector() ?
+ IntermediateVT.getVectorNumElements() : 1;
+
// Convert the vector to the appropiate type if necessary.
- unsigned DestVectorNoElts =
- NumIntermediates *
- (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
+ unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
+
EVT BuiltVectorTy = EVT::getVectorVT(
*DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
- if (Val.getValueType() != BuiltVectorTy)
+ MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ if (ValueVT != BuiltVectorTy) {
+ if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
+ Val = Widened;
+
Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+ }
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
- if (IntermediateVT.isVector())
- Ops[i] =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
- DAG.getConstant(i * (NumElements / NumIntermediates), DL,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
- else
+ if (IntermediateVT.isVector()) {
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+ DAG.getConstant(i * IntermediateNumElts, DL, IdxVT));
+ } else {
Ops[i] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ DAG.getConstant(i, DL, IdxVT));
+ }
}
// Split the intermediate operands into legal parts.
@@ -810,7 +823,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// If the source register was virtual and if we know something about it,
// add an assert node.
if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
- !RegisterVT.isInteger() || RegisterVT.isVector())
+ !RegisterVT.isInteger())
continue;
const FunctionLoweringInfo::LiveOutInfo *LOI =
@@ -818,7 +831,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
if (!LOI)
continue;
- unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned RegSize = RegisterVT.getScalarSizeInBits();
unsigned NumSignBits = LOI->NumSignBits;
unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
@@ -1019,8 +1032,19 @@ SDValue SelectionDAGBuilder::getRoot() {
}
// Otherwise, we have to make a token factor node.
- SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
- PendingLoads);
+ // If we have >= 2^16 loads then split across multiple token factors as
+ // there's a 64k limit on the number of SDNode operands.
+ SDValue Root;
+ size_t Limit = (1 << 16) - 1;
+ while (PendingLoads.size() > Limit) {
+ unsigned SliceIdx = PendingLoads.size() - Limit;
+ auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit);
+ SDValue NewTF =
+ DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs);
+ PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end());
+ PendingLoads.emplace_back(NewTF);
+ }
+ Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads);
PendingLoads.clear();
DAG.setRoot(Root);
return Root;
@@ -1054,7 +1078,7 @@ SDValue SelectionDAGBuilder::getControlRoot() {
void SelectionDAGBuilder::visit(const Instruction &I) {
// Set up outgoing PHI node register values before emitting the terminator.
- if (isa<TerminatorInst>(&I)) {
+ if (I.isTerminator()) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
@@ -1082,7 +1106,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
}
}
- if (!isa<TerminatorInst>(&I) && !HasTailCall &&
+ if (!I.isTerminator() && !HasTailCall &&
!isStatepoint(&I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
@@ -1178,7 +1202,8 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), InReg, Ty, getABIRegCopyCC(V));
+ DAG.getDataLayout(), InReg, Ty,
+ None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
V);
@@ -1437,8 +1462,11 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
// Don't emit any special code for the cleanuppad instruction. It just marks
// the start of an EH scope/funclet.
FuncInfo.MBB->setIsEHScopeEntry();
- FuncInfo.MBB->setIsEHFuncletEntry();
- FuncInfo.MBB->setIsCleanupFuncletEntry();
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ if (Pers != EHPersonality::Wasm_CXX) {
+ FuncInfo.MBB->setIsEHFuncletEntry();
+ FuncInfo.MBB->setIsCleanupFuncletEntry();
+ }
}
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
@@ -1458,6 +1486,7 @@ static void findUnwindDestinations(
classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
bool IsSEH = isAsynchronousEHPersonality(Personality);
while (EHPadBB) {
@@ -1472,7 +1501,8 @@ static void findUnwindDestinations(
// personalities.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
- UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsWasmCXX)
+ UnwindDests.back().first->setIsEHFuncletEntry();
break;
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations.
@@ -1807,7 +1837,6 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
SwitchCases.push_back(CB);
}
-/// FindMergedConditions - If Cond is an expression like
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -1819,13 +1848,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
bool InvertCond) {
// Skip over not part of the tree and remember to invert op and operands at
// next level.
- if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
- const Value *CondOp = BinaryOperator::getNotArgument(Cond);
- if (InBlock(CondOp, CurBB->getBasicBlock())) {
- FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
- !InvertCond);
- return;
- }
+ Value *NotCond;
+ if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+ InBlock(NotCond, CurBB->getBasicBlock())) {
+ FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
}
const Instruction *BOp = dyn_cast<Instruction>(Cond);
@@ -2193,12 +2221,11 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
if (Global) {
MachinePointerInfo MPInfo(Global);
- MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
- *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
- DAG.getEVTAlignment(PtrTy));
- Node->setMemRefs(MemRefs, MemRefs + 1);
+ MachineMemOperand *MemRef = MF.getMachineMemOperand(
+ MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
+ DAG.setNodeMemRefs(Node, {MemRef});
}
return SDValue(Node, 0);
}
@@ -2514,9 +2541,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
assert(FuncInfo.MBB->isEHPad() &&
"Call to landingpad not in landing pad!");
- MachineBasicBlock *MBB = FuncInfo.MBB;
- addLandingPadInfo(LP, *MBB);
-
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -2567,8 +2591,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
assert(CC.Low == CC.High && "Input clusters must be single-case");
#endif
- llvm::sort(Clusters.begin(), Clusters.end(),
- [](const CaseCluster &a, const CaseCluster &b) {
+ llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
return a.Low->getValue().slt(b.Low->getValue());
});
@@ -2789,6 +2812,15 @@ static bool isVectorReductionOp(const User *I) {
return ReduxExtracted;
}
+void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
+ SDNodeFlags Flags;
+
+ SDValue Op = getValue(I.getOperand(0));
+ SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
+ Op, Flags);
+ setValue(&I, UnNodeValue);
+}
+
void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
@@ -2815,7 +2847,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op2 = getValue(I.getOperand(1));
EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
- Op2.getValueType(), DAG.getDataLayout());
+ Op1.getValueType(), DAG.getDataLayout());
// Coerce the shift amount to the right type if we can.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -2932,7 +2964,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
ISD::VSELECT : ISD::SELECT;
// Min/max matching is only viable if all output VTs are the same.
- if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
+ if (is_splat(ValueVTs)) {
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
@@ -2960,16 +2992,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_FMINNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
+ case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
case SPNB_RETURNS_ANY: {
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
Opc = ISD::FMINNUM;
- else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
- Opc = ISD::FMINNAN;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
+ Opc = ISD::FMINIMUM;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
- ISD::FMINNUM : ISD::FMINNAN;
+ ISD::FMINNUM : ISD::FMINIMUM;
break;
}
}
@@ -2977,17 +3009,17 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_FMAXNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
+ case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
Opc = ISD::FMAXNUM;
- else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
- Opc = ISD::FMAXNAN;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
+ Opc = ISD::FMAXIMUM;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
- ISD::FMAXNUM : ISD::FMAXNAN;
+ ISD::FMAXNUM : ISD::FMAXIMUM;
break;
}
break;
@@ -3662,8 +3694,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA && AA->pointsToConstantMemory(MemoryLocation(
- SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
+ else if (AA &&
+ AA->pointsToConstantMemory(MemoryLocation(
+ SV,
+ LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3774,9 +3809,12 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
Type *Ty = I.getType();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
- SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
- "load_from_swift_error should not be constant memory");
+ assert(
+ (!AA ||
+ !AA->pointsToConstantMemory(MemoryLocation(
+ SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+ AAInfo))) &&
+ "load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
@@ -4063,8 +4101,12 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
- bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
- PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
+ bool AddToChain =
+ !AA || !AA->pointsToConstantMemory(MemoryLocation(
+ PtrOperand,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(I.getType())),
+ AAInfo));
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO =
@@ -4105,10 +4147,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
const Value *BasePtr = Ptr;
bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
bool ConstantMemory = false;
- if (UniformBase &&
- AA && AA->pointsToConstantMemory(MemoryLocation(
- BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
- AAInfo))) {
+ if (UniformBase && AA &&
+ AA->pointsToConstantMemory(
+ MemoryLocation(BasePtr,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(I.getType())),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -5038,6 +5082,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout())));
return nullptr;
+ case Intrinsic::sponentry:
+ setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ return nullptr;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
@@ -5176,7 +5224,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
- const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);
+ const auto &DI = cast<DbgVariableIntrinsic>(I);
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
@@ -5276,7 +5324,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
SDDbgValue *SDV;
- if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
+ isa<ConstantPointerNull>(V)) {
SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, nullptr, false);
return nullptr;
@@ -5553,8 +5602,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::minnum: {
auto VT = getValue(I.getArgOperand(0)).getValueType();
unsigned Opc =
- I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)
- ? ISD::FMINNAN
+ I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)
+ ? ISD::FMINIMUM
: ISD::FMINNUM;
setValue(&I, DAG.getNode(Opc, sdl, VT,
getValue(I.getArgOperand(0)),
@@ -5564,14 +5613,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::maxnum: {
auto VT = getValue(I.getArgOperand(0)).getValueType();
unsigned Opc =
- I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)
- ? ISD::FMAXNAN
+ I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)
+ ? ISD::FMAXIMUM
: ISD::FMAXNUM;
setValue(&I, DAG.getNode(Opc, sdl, VT,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
return nullptr;
}
+ case Intrinsic::minimum:
+ setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ case Intrinsic::maximum:
+ setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5603,6 +5664,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_log2:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_maxnum:
+ case Intrinsic::experimental_constrained_minnum:
+ case Intrinsic::experimental_constrained_ceil:
+ case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_round:
+ case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return nullptr;
case Intrinsic::fmuladd: {
@@ -5693,43 +5760,94 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Y = getValue(I.getArgOperand(1));
SDValue Z = getValue(I.getArgOperand(2));
EVT VT = X.getValueType();
+ SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
+ SDValue Zero = DAG.getConstant(0, sdl, VT);
+ SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
- // When X == Y, this is rotate. Create the node directly if legal.
- // TODO: This should also be done if the operation is custom, but we have
- // to make sure targets are handling the modulo shift amount as expected.
- // TODO: If the rotate direction (left or right) corresponding to the shift
- // is not available, adjust the shift value and invert the direction.
- auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
- if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) {
- setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
+ if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
+ setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
return nullptr;
}
- // Get the shift amount and inverse shift amount, modulo the bit-width.
- SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
- SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
- SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, Z);
- SDValue InvShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
+ // When X == Y, this is rotate. If the data type has a power-of-2 size, we
+ // avoid the select that is necessary in the general case to filter out
+ // the 0-shift possibility that leads to UB.
+ if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) {
+ auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ return nullptr;
+ }
+
+ // Some targets only rotate one way. Try the opposite direction.
+ RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL;
+ if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
+ // Negate the shift amount because it is safe to ignore the high bits.
+ SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
+ return nullptr;
+ }
+
+ // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
+ // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW))
+ SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
+ SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
+ SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
+ SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
+ setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
+ return nullptr;
+ }
- // fshl: (X << (Z % BW)) | (Y >> ((BW - Z) % BW))
- // fshr: (X << ((BW - Z) % BW)) | (Y >> (Z % BW))
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt);
SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
- SDValue Res = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
-
- // If (Z % BW == 0), then (BW - Z) % BW is also zero, so the result would
- // be X | Y. If X == Y (rotate), that's fine. If not, we have to select.
- if (X != Y) {
- SDValue Zero = DAG.getConstant(0, sdl, VT);
- EVT CCVT = MVT::i1;
- if (VT.isVector())
- CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
- // For fshl, 0 shift returns the 1st arg (X).
- // For fshr, 0 shift returns the 2nd arg (Y).
- SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
- Res = DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Res);
- }
- setValue(&I, Res);
+ SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
+
+ // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
+ // and that is undefined. We must compare and select to avoid UB.
+ EVT CCVT = MVT::i1;
+ if (VT.isVector())
+ CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
+
+ // For fshl, 0-shift returns the 1st arg (X).
+ // For fshr, 0-shift returns the 2nd arg (Y).
+ SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
+ setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
+ return nullptr;
+ }
+ case Intrinsic::sadd_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return nullptr;
+ }
+ case Intrinsic::uadd_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return nullptr;
+ }
+ case Intrinsic::ssub_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return nullptr;
+ }
+ case Intrinsic::usub_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return nullptr;
+ }
+ case Intrinsic::smul_fix: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I,
+ DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3));
return nullptr;
}
case Intrinsic::stacksave: {
@@ -5824,6 +5942,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return nullptr;
}
+
+ case Intrinsic::is_constant:
+ // If this wasn't constant-folded away by now, then it's not a
+ // constant.
+ setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
+ return nullptr;
+
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
case Intrinsic::launder_invariant_group:
@@ -6224,7 +6349,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
GA->getGlobal(), getCurSDLoc(),
Val.getValueType(), GA->getOffset())});
}
- llvm::sort(Targets.begin(), Targets.end(),
+ llvm::sort(Targets,
[](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
return T1.Offset < T2.Offset;
});
@@ -6243,12 +6368,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
- case Intrinsic::wasm_landingpad_index: {
- // TODO store landing pad index in a map, which will be used when generating
- // LSDA information
+ case Intrinsic::wasm_landingpad_index:
+ // Information this intrinsic contained has been transferred to
+ // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
+ // delete it now.
return nullptr;
}
- }
}
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
@@ -6311,6 +6436,24 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_nearbyint:
Opcode = ISD::STRICT_FNEARBYINT;
break;
+ case Intrinsic::experimental_constrained_maxnum:
+ Opcode = ISD::STRICT_FMAXNUM;
+ break;
+ case Intrinsic::experimental_constrained_minnum:
+ Opcode = ISD::STRICT_FMINNUM;
+ break;
+ case Intrinsic::experimental_constrained_ceil:
+ Opcode = ISD::STRICT_FCEIL;
+ break;
+ case Intrinsic::experimental_constrained_floor:
+ Opcode = ISD::STRICT_FFLOOR;
+ break;
+ case Intrinsic::experimental_constrained_round:
+ Opcode = ISD::STRICT_FROUND;
+ break;
+ case Intrinsic::experimental_constrained_trunc:
+ Opcode = ISD::STRICT_FTRUNC;
+ break;
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain = getRoot();
@@ -6405,7 +6548,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
BeginLabel, EndLabel);
- } else {
+ } else if (!isScopedEHPersonality(Pers)) {
MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
}
}
@@ -7200,10 +7343,11 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
-static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
- const SDLoc &DL, SDISelAsmOperandInfo &OpInfo,
+static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
+ SDISelAsmOperandInfo &OpInfo,
SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
@@ -7211,13 +7355,21 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
- std::pair<unsigned, const TargetRegisterClass *> PhysReg =
- TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode,
- RefOpInfo.ConstraintVT);
+ unsigned AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
+ &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
+ // RC is unset only on failure. Return immediately.
+ if (!RC)
+ return;
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
- unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other) {
- // If this is a FP operand in an integer register (or visa versa), or more
+ // If this is an FP operand in an integer register (or visa versa), or more
// generally if the operand value disagrees with the register class we plan
// to stick it in, fix the operand type.
//
@@ -7225,34 +7377,30 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// Bitcast for output value is done at the end of visitInlineAsm().
if ((OpInfo.Type == InlineAsm::isOutput ||
OpInfo.Type == InlineAsm::isInput) &&
- PhysReg.second &&
- !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) {
+ !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
// Try to convert to the first EVT that the reg class contains. If the
// types are identical size, use a bitcast to convert (e.g. two differing
// vector types). Note: output bitcast is done at the end of
// visitInlineAsm().
- MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second);
if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
// Exclude indirect inputs while they are unsupported because the code
// to perform the load is missing and thus OpInfo.CallOperand still
- // refer to the input address rather than the pointed-to value.
+ // refers to the input address rather than the pointed-to value.
if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
OpInfo.CallOperand =
DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
- // If the operand is a FP value and we want it in integer registers,
+ // If the operand is an FP value and we want it in integer registers,
// use the corresponding integer type. This turns an f64 value into
// i64, which can be passed with two i32 values on a 32-bit machine.
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
- RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+ MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
if (OpInfo.Type == InlineAsm::isInput)
OpInfo.CallOperand =
- DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
- OpInfo.ConstraintVT = RegVT;
+ DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = VT;
}
}
-
- NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
}
// No need to allocate a matching input constraint since the constraint it's
@@ -7260,59 +7408,38 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
if (OpInfo.isMatchingInputConstraint())
return;
- MVT RegVT;
EVT ValueVT = OpInfo.ConstraintVT;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Initialize NumRegs.
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other)
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
// If this is a constraint for a specific physical register, like {r17},
// assign it now.
- if (unsigned AssignedReg = PhysReg.first) {
- const TargetRegisterClass *RC = PhysReg.second;
- if (OpInfo.ConstraintVT == MVT::Other)
- ValueVT = *TRI.legalclasstypes_begin(*RC);
-
- // Get the actual register value type. This is important, because the user
- // may have asked for (e.g.) the AX register in i32 type. We need to
- // remember that AX is actually i16 to get the right extension.
- RegVT = *TRI.legalclasstypes_begin(*RC);
-
- // This is a explicit reference to a physical register.
- Regs.push_back(AssignedReg);
-
- // If this is an expanded reference, add the rest of the regs to Regs.
- if (NumRegs != 1) {
- TargetRegisterClass::iterator I = RC->begin();
- for (; *I != AssignedReg; ++I)
- assert(I != RC->end() && "Didn't find reg!");
- // Already added the first reg.
- --NumRegs; ++I;
- for (; NumRegs; --NumRegs, ++I) {
- assert(I != RC->end() && "Ran out of registers to allocate!");
- Regs.push_back(*I);
- }
- }
+ // If this associated to a specific register, initialize iterator to correct
+ // place. If virtual, make sure we have enough registers
- OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- return;
- }
+ // Initialize iterator if necessary
+ TargetRegisterClass::iterator I = RC->begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
- // Otherwise, if this was a reference to an LLVM register class, create vregs
- // for this reference.
- if (const TargetRegisterClass *RC = PhysReg.second) {
- RegVT = *TRI.legalclasstypes_begin(*RC);
- if (OpInfo.ConstraintVT == MVT::Other)
- ValueVT = RegVT;
-
- // Create the appropriate number of virtual registers.
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- for (; NumRegs; --NumRegs)
- Regs.push_back(RegInfo.createVirtualRegister(RC));
+ // Do not check for single registers.
+ if (AssignedReg) {
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "AssignedReg should be member of RC");
+ }
- OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- return;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ auto R = (AssignedReg) ? *I : RegInfo.createVirtualRegister(RC);
+ Regs.push_back(R);
}
- // Otherwise, we couldn't allocate enough registers for this.
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
}
static unsigned
@@ -7333,21 +7460,6 @@ findMatchingInlineAsmOperand(unsigned OperandNo,
return CurOp;
}
-/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT
-/// \return true if it has succeeded, false otherwise
-static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
- MVT RegVT, SelectionDAG &DAG) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
- for (unsigned i = 0, e = NumRegs; i != e; ++i) {
- if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
- Regs.push_back(RegInfo.createVirtualRegister(RC));
- else
- return false;
- }
- return true;
-}
-
namespace {
class ExtraFlags {
@@ -7404,12 +7516,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+ for (auto &T : TargetConstraints) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
- MVT OpVT = MVT::Other;
-
// Compute the value type for each operand.
if (OpInfo.Type == InlineAsm::isInput ||
(OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
@@ -7423,39 +7533,37 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
- OpVT =
+ OpInfo.ConstraintVT =
OpInfo
.getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
.getSimpleVT();
- }
-
- if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
+ } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
- OpVT = TLI.getSimpleValueType(DAG.getDataLayout(),
- STy->getElementType(ResNo));
+ OpInfo.ConstraintVT = TLI.getSimpleValueType(
+ DAG.getDataLayout(), STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
+ OpInfo.ConstraintVT =
+ TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
}
++ResNo;
+ } else {
+ OpInfo.ConstraintVT = MVT::Other;
}
- OpInfo.ConstraintVT = OpVT;
-
if (!hasMemory)
hasMemory = OpInfo.hasMemory(TLI);
// Determine if this InlineAsm MayLoad or MayStore based on the constraints.
- // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]?
- auto TargetConstraint = TargetConstraints[i];
+ // FIXME: Could we compute this on OpInfo rather than T?
// Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(TargetConstraint, SDValue());
+ TLI.ComputeConstraintToUse(T, SDValue());
- ExtraInfo.update(TargetConstraint);
+ ExtraInfo.update(T);
}
SDValue Chain, Flag;
@@ -7469,9 +7577,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Second pass over the constraints: compute which constraint option to use
// and assign registers to constraints that want a specific physreg.
- for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
- SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
-
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
// other is floating point, or their sizes are different, flag it as an
@@ -7511,24 +7617,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &RefOpInfo =
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
- : ConstraintOperands[i];
+ : OpInfo;
if (RefOpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
+ GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
}
// Third pass - Loop over all of the operands, assigning virtual or physregs
// to register class operands.
- for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
- SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
SDISelAsmOperandInfo &RefOpInfo =
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
- : ConstraintOperands[i];
+ : OpInfo;
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
+ GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -7555,9 +7660,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// IndirectStoresToEmit - The set of stores to emit after the inline asm node.
std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
- for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
- SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
-
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
@@ -7635,9 +7738,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
SmallVector<unsigned, 4> Regs;
- if (!createVirtualRegs(Regs,
- InlineAsm::getNumOperandRegisters(OpFlag),
- RegVT, DAG)) {
+ if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) {
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
+ MachineRegisterInfo &RegInfo =
+ DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+ } else {
emitInlineAsmError(CS, "inline asm error: This value type register "
"class is not natively supported!");
return;
@@ -7768,10 +7875,29 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
Chain, &Flag, CS.getInstruction());
- // FIXME: Why don't we do this for inline asms with MRVs?
- if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
- EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
-
+ llvm::Type *CSResultType = CS.getType();
+ unsigned numRet;
+ ArrayRef<Type *> ResultTypes;
+ SmallVector<SDValue, 1> ResultValues(1);
+ if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) {
+ numRet = StructResult->getNumElements();
+ assert(Val->getNumOperands() == numRet &&
+ "Mismatch in number of output operands in asm result");
+ ResultTypes = StructResult->elements();
+ ArrayRef<SDUse> ValueUses = Val->ops();
+ ResultValues.resize(numRet);
+ std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(),
+ [](const SDUse &u) -> SDValue { return u.get(); });
+ } else {
+ numRet = 1;
+ ResultValues[0] = Val;
+ ResultTypes = makeArrayRef(CSResultType);
+ }
+ SmallVector<EVT, 1> ResultVTs(numRet);
+ for (unsigned i = 0; i < numRet; i++) {
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]);
+ SDValue Val = ResultValues[i];
+ assert(ResultTypes[i]->isSized() && "Unexpected unsized type");
// If the type of the inline asm call site return value is different but
// has same size as the type of the asm output bitcast it. One example
// of this is for vectors with different width / number of elements.
@@ -7782,22 +7908,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// This can also happen for a return value that disagrees with the
// register class it is put in, eg. a double in a general-purpose
// register on a 32-bit machine.
- if (ResultType != Val.getValueType() &&
- ResultType.getSizeInBits() == Val.getValueSizeInBits()) {
- Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(),
- ResultType, Val);
-
- } else if (ResultType != Val.getValueType() &&
- ResultType.isInteger() && Val.getValueType().isInteger()) {
- // If a result value was tied to an input value, the computed result may
- // have a wider width than the expected result. Extract the relevant
- // portion.
- Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val);
+ if (ResultVT != Val.getValueType() &&
+ ResultVT.getSizeInBits() == Val.getValueSizeInBits())
+ Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val);
+ else if (ResultVT != Val.getValueType() && ResultVT.isInteger() &&
+ Val.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result
+ // may have a wider width than the expected result. Extract the
+ // relevant portion.
+ Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val);
}
- assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+ assert(ResultVT == Val.getValueType() && "Asm result value mismatch!");
+ ResultVTs[i] = ResultVT;
+ ResultValues[i] = Val;
}
+ Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ResultVTs), ResultValues);
setValue(CS.getInstruction(), Val);
// Don't need to use this as a chain in this case.
if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
@@ -7901,7 +8029,8 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
return Op;
APInt Hi = CR.getUnsignedMax();
- unsigned Bits = Hi.getActiveBits();
+ unsigned Bits = std::max(Hi.getActiveBits(),
+ static_cast<unsigned>(IntegerType::MIN_INT_BITS));
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
@@ -8656,7 +8785,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
// notional registers required by the type.
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
- getABIRegCopyCC(V));
+ None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -9189,7 +9318,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
/// the end.
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
- const TerminatorInst *TI = LLVMBB->getTerminator();
+ const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
@@ -9621,7 +9750,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
}
BitTestInfo BTI;
- llvm::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
+ llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
// Sort by probability first, number of bits second, bit mask third.
if (a.ExtraProb != b.ExtraProb)
return a.ExtraProb > b.ExtraProb;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 4b5dda982f1b..5f9cdb69daf7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -854,6 +854,9 @@ private:
void visitInvoke(const InvokeInst &I);
void visitResume(const ResumeInst &I);
+ void visitUnary(const User &I, unsigned Opcode);
+ void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
+
void visitBinary(const User &I, unsigned Opcode);
void visitShift(const User &I, unsigned Opcode);
void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index fa341e8b5fa5..43df2abb674b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -46,6 +46,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "SDNodeDbgValue.h"
#include <cstdint>
#include <iterator>
@@ -123,6 +124,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::SPONENTRY: return "SPONENTRY";
case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
case ISD::READ_REGISTER: return "READ_REGISTER";
case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
@@ -174,25 +176,34 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
// Unary operators
case ISD::FABS: return "fabs";
case ISD::FMINNUM: return "fminnum";
+ case ISD::STRICT_FMINNUM: return "strict_fminnum";
case ISD::FMAXNUM: return "fmaxnum";
- case ISD::FMINNAN: return "fminnan";
- case ISD::FMAXNAN: return "fmaxnan";
+ case ISD::STRICT_FMAXNUM: return "strict_fmaxnum";
+ case ISD::FMINNUM_IEEE: return "fminnum_ieee";
+ case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee";
+ case ISD::FMINIMUM: return "fminimum";
+ case ISD::FMAXIMUM: return "fmaximum";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::STRICT_FSQRT: return "strict_fsqrt";
+ case ISD::FCBRT: return "fcbrt";
case ISD::FSIN: return "fsin";
case ISD::STRICT_FSIN: return "strict_fsin";
case ISD::FCOS: return "fcos";
case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
case ISD::FTRUNC: return "ftrunc";
+ case ISD::STRICT_FTRUNC: return "strict_ftrunc";
case ISD::FFLOOR: return "ffloor";
+ case ISD::STRICT_FFLOOR: return "strict_ffloor";
case ISD::FCEIL: return "fceil";
+ case ISD::STRICT_FCEIL: return "strict_fceil";
case ISD::FRINT: return "frint";
case ISD::STRICT_FRINT: return "strict_frint";
case ISD::FNEARBYINT: return "fnearbyint";
case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
case ISD::FROUND: return "fround";
+ case ISD::STRICT_FROUND: return "strict_fround";
case ISD::FEXP: return "fexp";
case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";
@@ -226,6 +237,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SRL: return "srl";
case ISD::ROTL: return "rotl";
case ISD::ROTR: return "rotr";
+ case ISD::FSHL: return "fshl";
+ case ISD::FSHR: return "fshr";
case ISD::FADD: return "fadd";
case ISD::STRICT_FADD: return "strict_fadd";
case ISD::FSUB: return "fsub";
@@ -280,6 +293,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
+ case ISD::SADDSAT: return "saddsat";
+ case ISD::UADDSAT: return "uaddsat";
+ case ISD::SSUBSAT: return "ssubsat";
+ case ISD::USUBSAT: return "usubsat";
+ case ISD::SMULFIX: return "smulfix";
+
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
case ISD::ZERO_EXTEND: return "zero_extend";
@@ -681,9 +700,26 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ':' << L->getLine();
if (unsigned C = L->getColumn())
OS << ':' << C;
+
+ for (SDDbgValue *Dbg : G->GetDbgValues(this)) {
+ if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
+ continue;
+ Dbg->dump(OS);
+ }
}
}
+LLVM_DUMP_METHOD void SDDbgValue::dump(raw_ostream &OS) const {
+ OS << " DbgVal";
+ if (kind==SDNODE)
+ OS << '(' << u.s.ResNo << ')';
+ OS << ":\"" << Var->getName() << '"';
+#ifndef NDEBUG
+ if (Expr->getNumElements())
+ Expr->dump();
+#endif
+}
+
/// Return true if this node is so simple that we should just print it inline
/// if it appears as an operand.
static bool shouldPrintInline(const SDNode &Node) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index f7bd8847bee3..af5c2433fa2f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -176,7 +177,8 @@ static const bool ViewDAGCombine1 = false,
/// RegisterScheduler class - Track the registration of instruction schedulers.
///
//===---------------------------------------------------------------------===//
-MachinePassRegistry RegisterScheduler::Registry;
+MachinePassRegistry<RegisterScheduler::FunctionPassCtor>
+ RegisterScheduler::Registry;
//===---------------------------------------------------------------------===//
///
@@ -417,7 +419,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
CurDAG->init(*MF, *ORE, this, LibInfo,
- getAnalysisIfAvailable<DivergenceAnalysis>());
+ getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
FuncInfo->set(Fn, *MF, CurDAG);
// Now get the optional analyzes if we want to.
@@ -451,7 +453,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (!succ_empty(&BB))
continue;
- const TerminatorInst *Term = BB.getTerminator();
+ const Instruction *Term = BB.getTerminator();
if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term))
continue;
@@ -695,14 +697,14 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
if (!TargetRegisterInfo::isVirtualRegister(DestReg))
continue;
- // Ignore non-scalar or non-integer values.
+ // Ignore non-integer values.
SDValue Src = N->getOperand(2);
EVT SrcVT = Src.getValueType();
- if (!SrcVT.isInteger() || SrcVT.isVector())
+ if (!SrcVT.isInteger())
continue;
unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
- CurDAG->computeKnownBits(Src, Known);
+ Known = CurDAG->computeKnownBits(Src);
FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known);
} while (!Worklist.empty());
}
@@ -714,8 +716,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
int BlockNumber = -1;
(void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
+#ifndef NDEBUG
TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
+#endif
// Pre-type legalization allow creation of any node types.
CurDAG->NewNodesMustHaveLegalTypes = false;
@@ -750,8 +754,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -770,8 +776,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -792,8 +800,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -839,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
}
if (ViewLegalizeDAGs && MatchFilterBB)
@@ -852,8 +864,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -870,8 +884,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -1114,6 +1130,37 @@ static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
return false;
}
+// wasm.landingpad.index intrinsic is for associating a landing pad index number
+// with a catchpad instruction. Retrieve the landing pad index in the intrinsic
+// and store the mapping in the function.
+static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
+ const CatchPadInst *CPI) {
+ MachineFunction *MF = MBB->getParent();
+ // In case of single catch (...), we don't emit LSDA, so we don't need
+ // this information.
+ bool IsSingleCatchAllClause =
+ CPI->getNumArgOperands() == 1 &&
+ cast<Constant>(CPI->getArgOperand(0))->isNullValue();
+ if (!IsSingleCatchAllClause) {
+ // Create a mapping from landing pad label to landing pad index.
+ bool IntrFound = false;
+ for (const User *U : CPI->users()) {
+ if (const auto *Call = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = Call->getIntrinsicID();
+ if (IID == Intrinsic::wasm_landingpad_index) {
+ Value *IndexArg = Call->getArgOperand(1);
+ int Index = cast<ConstantInt>(IndexArg)->getZExtValue();
+ MF->setWasmLandingPadIndex(MBB, Index);
+ IntrFound = true;
+ break;
+ }
+ }
+ }
+ assert(IntrFound && "wasm.landingpad.index intrinsic not found!");
+ (void)IntrFound;
+ }
+}
+
/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
/// do other setup for EH landing-pad blocks.
bool SelectionDAGISel::PrepareEHLandingPad() {
@@ -1123,44 +1170,48 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
const TargetRegisterClass *PtrRC =
TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
+ auto Pers = classifyEHPersonality(PersonalityFn);
+
// Catchpads have one live-in register, which typically holds the exception
// pointer or code.
- if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
- if (hasExceptionPointerOrCodeUser(CPI)) {
- // Get or create the virtual register to hold the pointer or code. Mark
- // the live in physreg and copy into the vreg.
- MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
- assert(EHPhysReg && "target lacks exception pointer register");
- MBB->addLiveIn(EHPhysReg);
- unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
- BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
- TII->get(TargetOpcode::COPY), VReg)
- .addReg(EHPhysReg, RegState::Kill);
+ if (isFuncletEHPersonality(Pers)) {
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+ if (hasExceptionPointerOrCodeUser(CPI)) {
+ // Get or create the virtual register to hold the pointer or code. Mark
+ // the live in physreg and copy into the vreg.
+ MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+ assert(EHPhysReg && "target lacks exception pointer register");
+ MBB->addLiveIn(EHPhysReg);
+ unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::COPY), VReg)
+ .addReg(EHPhysReg, RegState::Kill);
+ }
}
return true;
}
- if (!LLVMBB->isLandingPad())
- return true;
-
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
MCSymbol *Label = MF->addLandingPad(MBB);
- // Assign the call site to the landing pad's begin label.
- MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-
const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
- // Mark exception register as live in.
- if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
- FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
-
- // Mark exception selector register as live in.
- if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
- FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ if (Pers == EHPersonality::Wasm_CXX) {
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI()))
+ mapWasmLandingPadIndex(MBB, CPI);
+ } else {
+ // Assign the call site to the landing pad's begin label.
+ MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+ // Mark exception register as live in.
+ if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
+ FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ // Mark exception selector register as live in.
+ if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
+ FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ }
return true;
}
@@ -1171,7 +1222,7 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
static bool isFoldedOrDeadInstruction(const Instruction *I,
FunctionLoweringInfo *FuncInfo) {
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
- !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !I->isTerminator() && // Terminators aren't folded.
!isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
!I->isEHPad() && // EH pad instructions aren't folded.
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
@@ -1688,7 +1739,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
Inst->getDebugLoc(), LLVMBB);
bool ShouldAbort = EnableFastISelAbort;
- if (isa<TerminatorInst>(Inst)) {
+ if (Inst->isTerminator()) {
// Use a different message for terminator misses.
R << "FastISel missed terminator";
// Don't abort for terminator unless the level is really high
@@ -2160,9 +2211,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
// Otherwise, the DAG Combiner may have proven that the value coming in is
// either already zero or is not demanded. Check for known zero input bits.
APInt NeededMask = DesiredMask & ~ActualMask;
-
- KnownBits Known;
- CurDAG->computeKnownBits(LHS, Known);
+ KnownBits Known = CurDAG->computeKnownBits(LHS);
// If all the missing bits in the or are already known to be set, match!
if (NeededMask.isSubsetOf(Known.One))
@@ -3156,6 +3205,18 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
N.getNode()))
break;
continue;
+ case OPC_CheckPredicateWithOperands: {
+ unsigned OpNum = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Operands;
+
+ for (unsigned i = 0; i < OpNum; ++i)
+ Operands.push_back(RecordedNodes[MatcherTable[MatcherIndex++]].first);
+
+ unsigned PredNo = MatcherTable[MatcherIndex++];
+ if (!CheckNodePredicateWithOperands(N.getNode(), PredNo, Operands))
+ break;
+ continue;
+ }
case OPC_CheckComplexPat: {
unsigned CPNum = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
@@ -3598,38 +3659,22 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
bool mayLoad = MCID.mayLoad();
bool mayStore = MCID.mayStore();
- unsigned NumMemRefs = 0;
- for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
- MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
- if ((*I)->isLoad()) {
- if (mayLoad)
- ++NumMemRefs;
- } else if ((*I)->isStore()) {
- if (mayStore)
- ++NumMemRefs;
- } else {
- ++NumMemRefs;
- }
- }
-
- MachineSDNode::mmo_iterator MemRefs =
- MF->allocateMemRefsArray(NumMemRefs);
-
- MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
- for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
- MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
- if ((*I)->isLoad()) {
+ // We expect to have relatively few of these so just filter them into a
+ // temporary buffer so that we can easily add them to the instruction.
+ SmallVector<MachineMemOperand *, 4> FilteredMemRefs;
+ for (MachineMemOperand *MMO : MatchedMemRefs) {
+ if (MMO->isLoad()) {
if (mayLoad)
- *MemRefsPos++ = *I;
- } else if ((*I)->isStore()) {
+ FilteredMemRefs.push_back(MMO);
+ } else if (MMO->isStore()) {
if (mayStore)
- *MemRefsPos++ = *I;
+ FilteredMemRefs.push_back(MMO);
} else {
- *MemRefsPos++ = *I;
+ FilteredMemRefs.push_back(MMO);
}
}
- Res->setMemRefs(MemRefs, MemRefs + NumMemRefs);
+ CurDAG->setNodeMemRefs(Res, FilteredMemRefs);
}
LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs()
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 54cbd6859f70..90a1b350fc94 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -522,7 +522,16 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// The vm state arguments are lowered in an opaque manner. We do not know
// what type of values are contained within.
for (const Value *V : SI.DeoptState) {
- SDValue Incoming = Builder.getValue(V);
+ SDValue Incoming;
+ // If this is a function argument at a static frame index, generate it as
+ // the frame index.
+ if (const Argument *Arg = dyn_cast<Argument>(V)) {
+ int FI = Builder.FuncInfo.getArgumentFrameIndex(Arg);
+ if (FI != INT_MAX)
+ Incoming = Builder.DAG.getFrameIndex(FI, Builder.getFrameIndexTy());
+ }
+ if (!Incoming.getNode())
+ Incoming = Builder.getValue(V);
const bool LiveInValue = LiveInDeopt && !isGCValue(V);
lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e317268fa5f4..a2f05c1e3cef 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -55,10 +55,12 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
const Function &F = DAG.getMachineFunction().getFunction();
// Conservatively require the attributes of the call to match those of
- // the return. Ignore noalias because it doesn't affect the call sequence.
+ // the return. Ignore NoAlias and NonNull because they don't affect the
+ // call sequence.
AttributeList CallerAttrs = F.getAttributes();
if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
.removeAttribute(Attribute::NoAlias)
+ .removeAttribute(Attribute::NonNull)
.hasAttributes())
return false;
@@ -429,87 +431,56 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
return false;
}
-bool
-TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
- const APInt &Demanded,
- DAGCombinerInfo &DCI,
- TargetLoweringOpt &TLO) const {
- SDValue Op = User->getOperand(OpIdx);
- KnownBits Known;
-
- if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true))
- return false;
-
-
- // Old will not always be the same as Op. For example:
- //
- // Demanded = 0xffffff
- // Op = i64 truncate (i32 and x, 0xffffff)
- // In this case simplify demand bits will want to replace the 'and' node
- // with the value 'x', which will give us:
- // Old = i32 and x, 0xffffff
- // New = x
- if (TLO.Old.hasOneUse()) {
- // For the one use case, we just commit the change.
- DCI.CommitTargetLoweringOpt(TLO);
- return true;
- }
-
- // If Old has more than one use then it must be Op, because the
- // AssumeSingleUse flag is not propogated to recursive calls of
- // SimplifyDemanded bits, so the only node with multiple use that
- // it will attempt to combine will be Op.
- assert(TLO.Old == Op);
-
- SmallVector <SDValue, 4> NewOps;
- for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
- if (i == OpIdx) {
- NewOps.push_back(TLO.New);
- continue;
- }
- NewOps.push_back(User->getOperand(i));
- }
- User = TLO.DAG.UpdateNodeOperands(User, NewOps);
- // Op has less users now, so we may be able to perform additional combines
- // with it.
- DCI.AddToWorklist(Op.getNode());
- // User's operands have been updated, so we may be able to do new combines
- // with it.
- DCI.AddToWorklist(User);
- return true;
-}
-
-bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
DAGCombinerInfo &DCI) const {
-
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
KnownBits Known;
- bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO);
- if (Simplified)
+ bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
+ }
return Simplified;
}
-/// Look at Op. At this point, we know that only the DemandedMask bits of the
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ KnownBits &Known,
+ TargetLoweringOpt &TLO,
+ unsigned Depth,
+ bool AssumeSingleUse) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
+ AssumeSingleUse);
+}
+
+/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
/// original and new nodes in Old and New. Otherwise, analyze the expression and
/// return a mask of Known bits for the expression (used to simplify the
/// caller). The Known bits may only be accurate for those bits in the
-/// DemandedMask.
-bool TargetLowering::SimplifyDemandedBits(SDValue Op,
- const APInt &DemandedMask,
- KnownBits &Known,
- TargetLoweringOpt &TLO,
- unsigned Depth,
- bool AssumeSingleUse) const {
- unsigned BitWidth = DemandedMask.getBitWidth();
+/// OriginalDemandedBits and OriginalDemandedElts.
+bool TargetLowering::SimplifyDemandedBits(
+ SDValue Op, const APInt &OriginalDemandedBits,
+ const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+ unsigned Depth, bool AssumeSingleUse) const {
+ unsigned BitWidth = OriginalDemandedBits.getBitWidth();
assert(Op.getScalarValueSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
- APInt NewMask = DemandedMask;
+
+ unsigned NumElts = OriginalDemandedElts.getBitWidth();
+ assert((!Op.getValueType().isVector() ||
+ NumElts == Op.getValueType().getVectorNumElements()) &&
+ "Unexpected vector size");
+
+ APInt DemandedBits = OriginalDemandedBits;
+ APInt DemandedElts = OriginalDemandedElts;
SDLoc dl(Op);
auto &DL = TLO.DAG.getDataLayout();
@@ -529,18 +500,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Depth != 0) {
// If not at the root, Just compute the Known bits to
// simplify things downstream.
- TLO.DAG.computeKnownBits(Op, Known, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
- // just set the NewMask to all bits.
- NewMask = APInt::getAllOnesValue(BitWidth);
- } else if (DemandedMask == 0) {
- // Not demanding any bits from Op.
+ // just set the DemandedBits/Elts to all bits.
+ DemandedBits = APInt::getAllOnesValue(BitWidth);
+ DemandedElts = APInt::getAllOnesValue(NumElts);
+ } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
+ // Not demanding any bits/elts from Op.
if (!Op.isUndef())
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
return false;
- } else if (Depth == 6) { // Limit search depth.
+ } else if (Depth == 6) { // Limit search depth.
return false;
}
@@ -570,24 +542,90 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
- return false; // Don't fall through, will infinitely loop.
- case ISD::AND:
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::CONCAT_VECTORS: {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ EVT SubVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVecs = Op.getNumOperands();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ APInt DemandedSubElts =
+ DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ // Known bits are shared by every demanded subvector element.
+ if (!!DemandedSubElts) {
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ }
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // Collect demanded elements from shuffle operands..
+ APInt DemandedLHS(NumElts, 0);
+ APInt DemandedRHS(NumElts, 0);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ int M = ShuffleMask[i];
+ if (M < 0) {
+ // For UNDEF elements, we don't know anything about the common state of
+ // the shuffle result.
+ DemandedLHS.clearAllBits();
+ DemandedRHS.clearAllBits();
+ break;
+ }
+ assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
+ }
+
+ if (!!DemandedLHS || !!DemandedRHS) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!DemandedLHS) {
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,
+ Known2, TLO, Depth + 1))
+ return true;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ if (!!DemandedRHS) {
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,
+ Known2, TLO, Depth + 1))
+ return true;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ }
+ break;
+ }
+ case ISD::AND: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
// If the RHS is a constant, check to see if the LHS would be zero without
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
- if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) {
- SDValue Op0 = Op.getOperand(0);
- KnownBits LHSKnown;
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
// Do not increment Depth here; that can cause an infinite loop.
- TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
+ KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
// If the LHS already has zeros where RHSC does, this 'and' is dead.
- if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ if ((LHSKnown.Zero & DemandedBits) ==
+ (~RHSC->getAPIntValue() & DemandedBits))
return TLO.CombineTo(Op, Op0);
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
- if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
@@ -597,34 +635,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
LHSKnown.One == ~RHSC->getAPIntValue()) {
- SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0),
- Op.getOperand(1));
+ SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
return TLO.CombineTo(Op, Xor);
}
}
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask,
- Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
- if (NewMask.isSubsetOf(Known2.Zero | Known.One))
- return TLO.CombineTo(Op, Op.getOperand(0));
- if (NewMask.isSubsetOf(Known.Zero | Known2.One))
- return TLO.CombineTo(Op, Op.getOperand(1));
+ if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
+ return TLO.CombineTo(Op, Op1);
// If all of the demanded bits in the inputs are known zeros, return zero.
- if (NewMask.isSubsetOf(Known.Zero | Known2.Zero))
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
return true;
// If the operation can be done in a smaller type, do so.
- if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
@@ -632,26 +669,30 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Output known-0 are known to be clear if zero in either the LHS | RHS.
Known.Zero |= Known2.Zero;
break;
- case ISD::OR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
+ }
+ case ISD::OR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask,
- Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
- if (NewMask.isSubsetOf(Known2.One | Known.Zero))
- return TLO.CombineTo(Op, Op.getOperand(0));
- if (NewMask.isSubsetOf(Known.One | Known2.Zero))
- return TLO.CombineTo(Op, Op.getOperand(1));
+ if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
// If the operation can be done in a smaller type, do so.
- if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -659,78 +700,81 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Output known-1 are known to be set if set in either the LHS | RHS.
Known.One |= Known2.One;
break;
+ }
case ISD::XOR: {
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
- if (NewMask.isSubsetOf(Known.Zero))
- return TLO.CombineTo(Op, Op.getOperand(0));
- if (NewMask.isSubsetOf(Known2.Zero))
- return TLO.CombineTo(Op, Op.getOperand(1));
+ if (DemandedBits.isSubsetOf(Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
// If the operation can be done in a smaller type, do so.
- if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// If all of the unknown bits are known to be zero on one side or the other
// (but not both) turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
- if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT,
- Op.getOperand(0),
- Op.getOperand(1)));
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
// Output known-0 bits are known if clear or set in both the LHS & RHS.
KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
- // If all of the demanded bits on one side are known, and all of the set
- // bits on that side are also known to be set on the other side, turn this
- // into an AND, as we know the bits will be cleared.
- // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
- // NB: it is okay if more bits are known than are requested
- if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side
- if (Known.One == Known2.One) { // set bits are the same on both sides
- SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
- Op.getOperand(0), ANDC));
+ if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
+ // If one side is a constant, and all of the known set bits on the other
+ // side are also set in the constant, turn this into an AND, as we know
+ // the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ // NB: it is okay if more bits are known than are requested
+ if (C->getAPIntValue() == Known2.One) {
+ SDValue ANDC =
+ TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
}
- }
- // If the RHS is a constant, see if we can change it. Don't alter a -1
- // constant because that's a 'not' op, and that is better for combining and
- // codegen.
- ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1));
- if (C && !C->isAllOnesValue()) {
- if (NewMask.isSubsetOf(C->getAPIntValue())) {
- // We're flipping all demanded bits. Flip the undemanded bits too.
- SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT);
- return TLO.CombineTo(Op, New);
+ // If the RHS is a constant, see if we can change it. Don't alter a -1
+ // constant because that's a 'not' op, and that is better for combining
+ // and codegen.
+ if (!C->isAllOnesValue()) {
+ if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ // We're flipping all demanded bits. Flip the undemanded bits too.
+ SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
+ return TLO.CombineTo(Op, New);
+ }
+ // If we can't turn this into a 'not', try to shrink the constant.
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ return true;
}
- // If we can't turn this into a 'not', try to shrink the constant.
- if (ShrinkDemandedConstant(Op, NewMask, TLO))
- return true;
}
Known = std::move(KnownOut);
break;
}
case ISD::SELECT:
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (ShrinkDemandedConstant(Op, NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -738,15 +782,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
- if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (ShrinkDemandedConstant(Op, NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -760,7 +806,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
- if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
+ if (DemandedBits.isSignMask() &&
+ Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(VT) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
@@ -780,10 +827,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.Zero.setBitsFrom(1);
break;
}
- case ISD::SHL:
- if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
- SDValue InOp = Op.getOperand(0);
+ case ISD::SHL: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
@@ -793,90 +841,91 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
- if (InOp.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
- if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (Op0.getOpcode() == ISD::SRL) {
+ if (ShAmt &&
+ (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SHL;
- int Diff = ShAmt-C1;
+ int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
}
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0),
- NewSA));
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
}
- if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, Known, TLO,
+ Depth + 1))
return true;
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
- if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
- SDValue InnerOp = InOp.getOperand(0);
+ if (Op0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = Op0.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
unsigned InnerBits = InnerVT.getScalarSizeInBits();
- if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
+ if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
ShTy = InnerVT;
SDValue NarrowShl =
- TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
- TLO.DAG.getConstant(ShAmt, dl, ShTy));
- return
- TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
+ TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getConstant(ShAmt, dl, ShTy));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
- if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+ if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
InnerOp.hasOneUse()) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(InnerOp.getOperand(1))) {
unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
- if (InnerShAmt < ShAmt &&
- InnerShAmt < InnerBits &&
- NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
- NewMask.countTrailingZeros() >= ShAmt) {
- SDValue NewSA =
- TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
- Op.getOperand(1).getValueType());
+ if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
+ DemandedBits.getActiveBits() <=
+ (InnerBits - InnerShAmt + ShAmt) &&
+ DemandedBits.countTrailingZeros() >= ShAmt) {
+ SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
+ Op1.getValueType());
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
- NewExt, NewSA));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
}
}
}
}
Known.Zero <<= ShAmt;
- Known.One <<= ShAmt;
+ Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
}
break;
- case ISD::SRL:
- if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
- SDValue InOp = Op.getOperand(0);
+ }
+ case ISD::SRL: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
- APInt InDemandedMask = (NewMask << ShAmt);
+ APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
@@ -886,56 +935,56 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
- if (InOp.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+ if (Op0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
if (ShAmt &&
- (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
- int Diff = ShAmt-C1;
+ int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
}
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0),
- NewSA));
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
}
// Compute the new bits that are at the top now.
- if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
- Known.Zero.setHighBits(ShAmt); // High bits known zero.
+ Known.Zero.setHighBits(ShAmt); // High bits known zero.
}
break;
- case ISD::SRA:
+ }
+ case ISD::SRA: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (NewMask.isOneValue())
- return TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
- Op.getOperand(1)));
+ if (DemandedBits.isOneValue())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
- if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
- APInt InDemandedMask = (NewMask << ShAmt);
+ APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
@@ -944,11 +993,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
- if (NewMask.countLeadingZeros() < ShAmt)
+ if (DemandedBits.countLeadingZeros() < ShAmt)
InDemandedMask.setSignBit();
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO,
- Depth+1))
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
@@ -957,22 +1005,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (Known.Zero[BitWidth - ShAmt - 1] ||
- NewMask.countLeadingZeros() >= ShAmt) {
+ DemandedBits.countLeadingZeros() >= ShAmt) {
SDNodeFlags Flags;
Flags.setExact(Op->getFlags().hasExact());
- return TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
- Op.getOperand(1), Flags));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
}
- int Log2 = NewMask.exactLogBase2();
+ int Log2 = DemandedBits.exactLogBase2();
if (Log2 >= 0) {
// The bit must come from the sign.
SDValue NewSA =
- TLO.DAG.getConstant(BitWidth - 1 - Log2, dl,
- Op.getOperand(1).getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
- Op.getOperand(0), NewSA));
+ TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
}
if (Known.One[BitWidth - ShAmt - 1])
@@ -980,15 +1025,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.One.setHighBits(ShAmt);
}
break;
+ }
case ISD::SIGN_EXTEND_INREG: {
+ SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExVTBits = ExVT.getScalarSizeInBits();
// If we only care about the highest bit, don't bother shifting right.
- if (NewMask.isSignMask()) {
- SDValue InOp = Op.getOperand(0);
+ if (DemandedBits.isSignMask()) {
bool AlreadySignExtended =
- TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1;
+ TLO.DAG.ComputeNumSignBits(Op0) >= BitWidth - ExVTBits + 1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -998,25 +1044,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
- SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
- ShiftAmtTy);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp,
- ShiftAmt));
+ SDValue ShiftAmt =
+ TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
}
}
// If none of the extended bits are demanded, eliminate the sextinreg.
- if (NewMask.getActiveBits() <= ExVTBits)
- return TLO.CombineTo(Op, Op.getOperand(0));
+ if (DemandedBits.getActiveBits() <= ExVTBits)
+ return TLO.CombineTo(Op, Op0);
- APInt InputDemandedBits = NewMask.getLoBits(ExVTBits);
+ APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits.setBit(ExVTBits - 1);
- if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
- Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -1025,14 +1070,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known zero, convert this into a zero extension.
if (Known.Zero[ExVTBits - 1])
- return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
- Op.getOperand(0), dl, ExVT.getScalarType()));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
- if (Known.One[ExVTBits - 1]) { // Input sign bit known set
+ if (Known.One[ExVTBits - 1]) { // Input sign bit known set
Known.One.setBitsFrom(ExVTBits);
Known.Zero &= Mask;
- } else { // Input sign bit unknown
+ } else { // Input sign bit unknown
Known.Zero &= Mask;
Known.One &= Mask;
}
@@ -1042,8 +1087,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
EVT HalfVT = Op.getOperand(0).getValueType();
unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
- APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
- APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
+ APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
+ APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
KnownBits KnownLo, KnownHi;
@@ -1061,36 +1106,35 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::ZERO_EXTEND: {
- unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+ SDValue Src = Op.getOperand(0);
+ unsigned InBits = Src.getScalarValueSizeInBits();
// If none of the top bits are demanded, convert this into an any_extend.
- if (NewMask.getActiveBits() <= OperandBitWidth)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
- Op.getOperand(0)));
+ if (DemandedBits.getActiveBits() <= InBits)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
- APInt InMask = NewMask.trunc(OperandBitWidth);
- if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(OperandBitWidth);
+ Known.Zero.setBitsFrom(InBits);
break;
}
case ISD::SIGN_EXTEND: {
- unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits();
+ SDValue Src = Op.getOperand(0);
+ unsigned InBits = Src.getScalarValueSizeInBits();
// If none of the top bits are demanded, convert this into an any_extend.
- if (NewMask.getActiveBits() <= InBits)
- return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
- Op.getOperand(0)));
+ if (DemandedBits.getActiveBits() <= InBits)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
- APInt InDemandedBits = NewMask.trunc(InBits);
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
InDemandedBits.setBit(InBits - 1);
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
- Depth+1))
+ if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit is known one, the top bits match.
@@ -1098,34 +1142,55 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the sign bit is known zero, convert this to a zero extend.
if (Known.isNonNegative())
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT,
- Op.getOperand(0)));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src));
+ break;
+ }
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ // TODO - merge this with SIGN_EXTEND above?
+ SDValue Src = Op.getOperand(0);
+ unsigned InBits = Src.getScalarValueSizeInBits();
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+
+ // If some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ if (InBits < DemandedBits.getActiveBits())
+ InDemandedBits.setBit(InBits - 1);
+
+ if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ // If the sign bit is known one, the top bits match.
+ Known = Known.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
- unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
- APInt InMask = NewMask.trunc(OperandBitWidth);
- if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
+ SDValue Src = Op.getOperand(0);
+ unsigned InBits = Src.getScalarValueSizeInBits();
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known = Known.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
+ SDValue Src = Op.getOperand(0);
+
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
- unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
- APInt TruncMask = NewMask.zext(OperandBitWidth);
- if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1))
+ unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
+ APInt TruncMask = DemandedBits.zext(OperandBitWidth);
+ if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
return true;
Known = Known.trunc(BitWidth);
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
- if (Op.getOperand(0).getNode()->hasOneUse()) {
- SDValue In = Op.getOperand(0);
- switch (In.getOpcode()) {
- default: break;
+ if (Src.getNode()->hasOneUse()) {
+ switch (Src.getOpcode()) {
+ default:
+ break;
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
@@ -1133,10 +1198,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
- ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
if (!ShAmt)
break;
- SDValue Shift = In.getOperand(1);
+ SDValue Shift = Src.getOperand(1);
if (TLO.LegalTypes()) {
uint64_t ShVal = ShAmt->getZExtValue();
Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
@@ -1148,13 +1213,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
HighBits.lshrInPlace(ShAmt->getZExtValue());
HighBits = HighBits.trunc(BitWidth);
- if (!(HighBits & NewMask)) {
+ if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
- SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT,
- In.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc,
- Shift));
+ SDValue NewTrunc =
+ TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
}
}
break;
@@ -1169,7 +1234,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// demanded by its users.
EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
- if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits,
Known, TLO, Depth+1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -1177,50 +1242,111 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.Zero |= ~InMask;
break;
}
- case ISD::BITCAST:
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Src = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ unsigned EltBitWidth = Src.getScalarValueSizeInBits();
+
+ // Demand the bits from every vector element without a constant index.
+ APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
+ if (CIdx->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
+
+ // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+ // anything about the extended bits.
+ APInt DemandedSrcBits = DemandedBits;
+ if (BitWidth > EltBitWidth)
+ DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
+
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
+ Depth + 1))
+ return true;
+
+ Known = Known2;
+ if (BitWidth > EltBitWidth)
+ Known = Known.zext(BitWidth);
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
- if (!TLO.LegalOperations() && !VT.isVector() &&
- !Op.getOperand(0).getValueType().isVector() &&
- NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
- Op.getOperand(0).getValueType().isFloatingPoint()) {
+ if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
+ DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
+ SrcVT.isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
- bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
- if ((OpVTLegal || i32Legal) && VT.isSimple() &&
- Op.getOperand(0).getValueType() != MVT::f16 &&
- Op.getOperand(0).getValueType() != MVT::f128) {
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
+ SrcVT != MVT::f128) {
// Cannot eliminate/lower SHL for f128 yet.
EVT Ty = OpVTLegal ? VT : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
- SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
unsigned OpVTSizeInBits = Op.getValueSizeInBits();
if (!OpVTLegal && OpVTSizeInBits > 32)
Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
unsigned ShVal = Op.getValueSizeInBits() - 1;
SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
+ }
+ }
+ // If bitcast from a vector, see if we can use SimplifyDemandedVectorElts by
+ // demanding the element if any bits from it are demanded.
+ // TODO - bigendian once we have test coverage.
+ // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
+ if (SrcVT.isVector() && NumSrcEltBits > 1 &&
+ (BitWidth % NumSrcEltBits) == 0 &&
+ TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = BitWidth / NumSrcEltBits;
+ auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool {
+ DemandedSubElts = APInt::getNullValue(Scale);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned Offset = i * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ if (!Sub.isNullValue())
+ DemandedSubElts.setBit(i);
+ }
+ return true;
+ };
+
+ APInt DemandedSubElts;
+ if (GetDemandedSubMask(DemandedSubElts)) {
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts);
+
+ APInt KnownUndef, KnownZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
+ return true;
}
}
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
// recursive call where Known may be useful to the caller.
if (Depth > 0) {
- TLO.DAG.computeKnownBits(Op, Known, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, Depth);
return false;
}
break;
+ }
case ISD::ADD:
case ISD::MUL:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
- unsigned NewMaskLZ = NewMask.countLeadingZeros();
- APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ);
- if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) ||
- SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||
+ unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
+ APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
+ if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
+ SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
- ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
SDNodeFlags Flags = Op.getNode()->getFlags();
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
@@ -1240,7 +1366,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
// is probably not useful (and could be detrimental).
ConstantSDNode *C = isConstOrConstSplat(Op1);
- APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ);
+ APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
if (C && !C->isAllOnesValue() && !C->isOne() &&
(C->getAPIntValue() | HighMask).isAllOnesValue()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
@@ -1257,24 +1383,34 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
LLVM_FALLTHROUGH;
}
default:
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
+ Known, TLO, Depth))
+ return true;
+ break;
+ }
+
// Just use computeKnownBits to compute output bits.
- TLO.DAG.computeKnownBits(Op, Known, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
// If we know the value of all of the demanded bits, return this as a
// constant.
- if (NewMask.isSubsetOf(Known.Zero|Known.One)) {
+ if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
// Avoid folding to a constant if any OpaqueConstant is involved.
const SDNode *N = Op.getNode();
for (SDNodeIterator I = SDNodeIterator::begin(N),
- E = SDNodeIterator::end(N); I != E; ++I) {
+ E = SDNodeIterator::end(N);
+ I != E; ++I) {
SDNode *Op = *I;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
if (C->isOpaque())
return false;
}
- return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
+ // TODO: Handle float bits as well.
+ if (VT.isInteger())
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
}
return false;
@@ -1291,8 +1427,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
bool Simplified =
SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
- if (Simplified)
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
+ }
return Simplified;
}
@@ -1371,6 +1509,23 @@ bool TargetLowering::SimplifyDemandedVectorElts(
TLO, Depth + 1))
return true;
+ // Try calling SimplifyDemandedBits, converting demanded elts to the bits
+ // of the large element.
+ // TODO - bigendian once we have test coverage.
+ if (TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
+ APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Ofs = (i % Scale) * EltSizeInBits;
+ SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
+ }
+
+ KnownBits Known;
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
+ return true;
+ }
+
// If the src element is zero/undef then all the output elements will be -
// only demanded elements are guaranteed to be correct.
for (unsigned i = 0; i != NumSrcElts; ++i) {
@@ -1463,7 +1618,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
EVT SubVT = Sub.getValueType();
unsigned NumSubElts = SubVT.getVectorNumElements();
const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
- if (Idx.uge(NumElts - NumSubElts))
+ if (Idx.ugt(NumElts - NumSubElts))
break;
unsigned SubIdx = Idx.getZExtValue();
APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
@@ -1481,22 +1636,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::EXTRACT_SUBVECTOR: {
- if (!isa<ConstantSDNode>(Op.getOperand(1)))
- break;
SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
- if (Idx.uge(NumSrcElts - NumElts))
- break;
- // Offset the demanded elts by the subvector index.
- uint64_t SubIdx = Idx.getZExtValue();
- APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx);
- APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
- Depth + 1))
- return true;
- KnownUndef = SrcUndef.extractBits(NumElts, SubIdx);
- KnownZero = SrcZero.extractBits(NumElts, SubIdx);
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef = SrcUndef.extractBits(NumElts, Idx);
+ KnownZero = SrcZero.extractBits(NumElts, Idx);
+ }
break;
}
case ISD::INSERT_VECTOR_ELT: {
@@ -1510,9 +1663,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned Idx = CIdx->getZExtValue();
if (!DemandedElts[Idx])
return TLO.CombineTo(Op, Vec);
- DemandedElts.clearBit(Idx);
- if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef,
+ APInt DemandedVecElts(DemandedElts);
+ DemandedVecElts.clearBit(Idx);
+ if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
@@ -1534,12 +1688,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::VSELECT: {
- APInt DemandedLHS(DemandedElts);
- APInt DemandedRHS(DemandedElts);
-
- // TODO - add support for constant vselect masks.
+ // Try to transform the select condition based on the current demanded
+ // elements.
+ // TODO: If a condition element is undef, we can choose from one arm of the
+ // select (and if one arm is undef, then we can propagate that to the
+ // result).
+ // TODO - add support for constant vselect masks (see IR version of this).
+ APInt UnusedUndef, UnusedZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
+ UnusedZero, TLO, Depth + 1))
+ return true;
// See if we can simplify either vselect operand.
+ APInt DemandedLHS(DemandedElts);
+ APInt DemandedRHS(DemandedElts);
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
@@ -1624,8 +1786,35 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ APInt SrcUndef, SrcZero;
+ SDValue Src = Op.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ KnownZero = SrcZero.zextOrTrunc(NumElts);
+ KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+ }
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
case ISD::ADD:
- case ISD::SUB: {
+ case ISD::SUB:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM: {
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
SrcZero, TLO, Depth + 1))
@@ -1637,21 +1826,58 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef &= SrcUndef;
break;
}
+ case ISD::AND: {
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ // If either side has a zero element, then the result element is zero, even
+ // if the other is an UNDEF.
+ KnownZero |= SrcZero;
+ KnownUndef &= SrcUndef;
+ KnownUndef &= ~KnownZero;
+ break;
+ }
case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+ }
break;
default: {
- if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
KnownZero, TLO, Depth))
return true;
+ } else {
+ KnownBits Known;
+ APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
+ if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO,
+ Depth, AssumeSingleUse))
+ return true;
+ }
break;
}
}
-
assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
+
+ // Constant fold all undef cases.
+ // TODO: Handle zero cases as well.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
return false;
}
@@ -1711,6 +1937,32 @@ bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return false;
}
+bool TargetLowering::SimplifyDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyDemandedBits if you don't know whether Op"
+ " is a target node!");
+ computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
+ return false;
+}
+
+bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isKnownNeverNaN if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
// work with truncating build vectors and vectors with elements of less than
// 8 bits.
@@ -1901,10 +2153,24 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
} else
return SDValue();
- const APInt &I01 = C01->getAPIntValue();
- // Both of them must be power-of-two, and the constant from setcc is bigger.
- if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2()))
- return SDValue();
+ APInt I01 = C01->getAPIntValue();
+
+ auto checkConstants = [&I1, &I01]() -> bool {
+ // Both of them must be power-of-two, and the constant from setcc is bigger.
+ return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
+ };
+
+ if (checkConstants()) {
+ // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
+ } else {
+ // What if we invert constants? (and the target predicate)
+ I1.negate();
+ I01.negate();
+ NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
+ if (!checkConstants())
+ return SDValue();
+ // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
+ }
// They are power-of-two, so which bit is set?
const unsigned KeptBits = I1.logBase2();
@@ -2141,7 +2407,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (bestWidth) {
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
- if (newVT.isRound()) {
+ if (newVT.isRound() &&
+ shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
EVT PtrType = Lod->getOperand(1).getValueType();
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
@@ -2819,8 +3086,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
/// Returns true (and the GlobalValue and the offset) if the node is a
/// GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
+bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
int64_t &Offset) const {
+
+ SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
+
if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
GA = GASD->getGlobal();
Offset += GASD->getOffset();
@@ -3419,34 +3689,63 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// Given an exact SDIV by a constant, create a multiplication
/// with the multiplicative inverse of the constant.
-static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
+static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) {
- assert(d != 0 && "Division by zero!");
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ bool UseSRA = false;
+ SmallVector<SDValue, 16> Shifts, Factors;
+
+ auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isNullValue())
+ return false;
+ APInt Divisor = C->getAPIntValue();
+ unsigned Shift = Divisor.countTrailingZeros();
+ if (Shift) {
+ Divisor.ashrInPlace(Shift);
+ UseSRA = true;
+ }
+ // Calculate the multiplicative inverse, using Newton's method.
+ APInt t;
+ APInt Factor = Divisor;
+ while ((t = Divisor * Factor) != 1)
+ Factor *= APInt(Divisor.getBitWidth(), 2) - t;
+ Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
+ Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+ return true;
+ };
+
+ // Collect all magic values from the build vector.
+ if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
+ return SDValue();
+
+ SDValue Shift, Factor;
+ if (VT.isVector()) {
+ Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+ Factor = DAG.getBuildVector(VT, dl, Factors);
+ } else {
+ Shift = Shifts[0];
+ Factor = Factors[0];
+ }
+
+ SDValue Res = Op0;
// Shift the value upfront if it is even, so the LSB is one.
- unsigned ShAmt = d.countTrailingZeros();
- if (ShAmt) {
+ if (UseSRA) {
// TODO: For UDIV use SRL instead of SRA.
- SDValue Amt =
- DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(),
- DAG.getDataLayout()));
SDNodeFlags Flags;
Flags.setExact(true);
- Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, Flags);
- Created.push_back(Op1.getNode());
- d.ashrInPlace(ShAmt);
+ Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
+ Created.push_back(Res.getNode());
}
- // Calculate the multiplicative inverse, using Newton's method.
- APInt t, xn = d;
- while ((t = d*xn) != 1)
- xn *= APInt(d.getBitWidth(), 2) - t;
-
- SDValue Op2 = DAG.getConstant(xn, dl, Op1.getValueType());
- SDValue Mul = DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
- Created.push_back(Mul.getNode());
- return Mul;
+ return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
}
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
@@ -3463,11 +3762,15 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG, bool IsAfterLegalization,
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const {
- EVT VT = N->getValueType(0);
SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
@@ -3476,50 +3779,90 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
// If the sdiv has an 'exact' bit we can use a simpler lowering.
if (N->getFlags().hasExact())
- return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, Created);
+ return BuildExactSDIV(*this, N, dl, DAG, Created);
+
+ SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
+
+ auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isNullValue())
+ return false;
+
+ const APInt &Divisor = C->getAPIntValue();
+ APInt::ms magics = Divisor.magic();
+ int NumeratorFactor = 0;
+ int ShiftMask = -1;
+
+ if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
+ // If d is +1/-1, we just multiply the numerator by +1/-1.
+ NumeratorFactor = Divisor.getSExtValue();
+ magics.m = 0;
+ magics.s = 0;
+ ShiftMask = 0;
+ } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+ // If d > 0 and m < 0, add the numerator.
+ NumeratorFactor = 1;
+ } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+ // If d < 0 and m > 0, subtract the numerator.
+ NumeratorFactor = -1;
+ }
+
+ MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
+ Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
+ Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
+ ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Collect the shifts / magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+ return SDValue();
- APInt::ms magics = Divisor.magic();
+ SDValue MagicFactor, Factor, Shift, ShiftMask;
+ if (VT.isVector()) {
+ MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+ Factor = DAG.getBuildVector(VT, dl, Factors);
+ Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+ ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
+ } else {
+ MagicFactor = MagicFactors[0];
+ Factor = Factors[0];
+ Shift = Shifts[0];
+ ShiftMask = ShiftMasks[0];
+ }
- // Multiply the numerator (operand 0) by the magic value
- // FIXME: We should support doing a MUL in a wider type
+ // Multiply the numerator (operand 0) by the magic value.
+ // FIXME: We should support doing a MUL in a wider type.
SDValue Q;
- if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
- isOperationLegalOrCustom(ISD::MULHS, VT))
- Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
- DAG.getConstant(magics.m, dl, VT));
- else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
- isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
- Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
- N->getOperand(0),
- DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
- else
- return SDValue(); // No mulhs or equvialent
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
+ : isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
+ else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
+ : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
+ SDValue LoHi =
+ DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
+ Q = SDValue(LoHi.getNode(), 1);
+ } else
+ return SDValue(); // No mulhs or equivalent.
+ Created.push_back(Q.getNode());
+ // (Optionally) Add/subtract the numerator using Factor.
+ Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
+ Created.push_back(Factor.getNode());
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
Created.push_back(Q.getNode());
- // If d > 0 and m < 0, add the numerator
- if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
- Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
- Created.push_back(Q.getNode());
- }
- // If d < 0 and m > 0, subtract the numerator.
- if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
- Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
- Created.push_back(Q.getNode());
- }
- auto &DL = DAG.getDataLayout();
- // Shift right algebraic if shift value is nonzero
- if (magics.s > 0) {
- Q = DAG.getNode(
- ISD::SRA, dl, VT, Q,
- DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
- Created.push_back(Q.getNode());
- }
- // Extract the sign bit and add it to the quotient
- SDValue T =
- DAG.getNode(ISD::SRL, dl, VT, Q,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
- getShiftAmountTy(Q.getValueType(), DL)));
+ // Shift right algebraic by shift value.
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
+ Created.push_back(Q.getNode());
+
+ // Extract the sign bit, mask it and add it to the quotient.
+ SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
+ SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
+ Created.push_back(T.getNode());
+ T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
Created.push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
@@ -3528,72 +3871,133 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG, bool IsAfterLegalization,
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const {
- EVT VT = N->getValueType(0);
SDLoc dl(N);
- auto &DL = DAG.getDataLayout();
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
if (!isTypeLegal(VT))
return SDValue();
- // FIXME: We should use a narrower constant when the upper
- // bits are known to be zero.
- APInt::mu magics = Divisor.magicu();
+ bool UseNPQ = false;
+ SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
- SDValue Q = N->getOperand(0);
+ auto BuildUDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isNullValue())
+ return false;
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ APInt Divisor = C->getAPIntValue();
+ APInt::mu magics = Divisor.magicu();
+ unsigned PreShift = 0, PostShift = 0;
+
+ // If the divisor is even, we can avoid using the expensive fixup by
+ // shifting the divided value upfront.
+ if (magics.a != 0 && !Divisor[0]) {
+ PreShift = Divisor.countTrailingZeros();
+ // Get magic number for the shifted divisor.
+ magics = Divisor.lshr(PreShift).magicu(PreShift);
+ assert(magics.a == 0 && "Should use cheap fixup now");
+ }
- // If the divisor is even, we can avoid using the expensive fixup by shifting
- // the divided value upfront.
- if (magics.a != 0 && !Divisor[0]) {
- unsigned Shift = Divisor.countTrailingZeros();
- Q = DAG.getNode(
- ISD::SRL, dl, VT, Q,
- DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
- Created.push_back(Q.getNode());
+ APInt Magic = magics.m;
+
+ unsigned SelNPQ;
+ if (magics.a == 0 || Divisor.isOneValue()) {
+ assert(magics.s < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ PostShift = magics.s;
+ SelNPQ = false;
+ } else {
+ PostShift = magics.s - 1;
+ SelNPQ = true;
+ }
+
+ PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
+ MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
+ NPQFactors.push_back(
+ DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getNullValue(EltBits),
+ dl, SVT));
+ PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
+ UseNPQ |= SelNPQ;
+ return true;
+ };
- // Get magic number for the shifted divisor.
- magics = Divisor.lshr(Shift).magicu(Shift);
- assert(magics.a == 0 && "Should use cheap fixup now");
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Collect the shifts/magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ return SDValue();
+
+ SDValue PreShift, PostShift, MagicFactor, NPQFactor;
+ if (VT.isVector()) {
+ PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
+ MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+ NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
+ PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
+ } else {
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
}
- // Multiply the numerator (operand 0) by the magic value
- // FIXME: We should support doing a MUL in a wider type
- if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
- isOperationLegalOrCustom(ISD::MULHU, VT))
- Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT));
- else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
- isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
- Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
- DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
- else
- return SDValue(); // No mulhu or equivalent
+ SDValue Q = N0;
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
+ Created.push_back(Q.getNode());
+
+ // FIXME: We should support doing a MUL in a wider type.
+ auto GetMULHU = [&](SDValue X, SDValue Y) {
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
+ : isOperationLegalOrCustom(ISD::MULHU, VT))
+ return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
+ if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
+ : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
+ SDValue LoHi =
+ DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
+ return SDValue(LoHi.getNode(), 1);
+ }
+ return SDValue(); // No mulhu or equivalent
+ };
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = GetMULHU(Q, MagicFactor);
+ if (!Q)
+ return SDValue();
Created.push_back(Q.getNode());
- if (magics.a == 0) {
- assert(magics.s < Divisor.getBitWidth() &&
- "We shouldn't generate an undefined shift!");
- return DAG.getNode(
- ISD::SRL, dl, VT, Q,
- DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
- } else {
- SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
- Created.push_back(NPQ.getNode());
- NPQ = DAG.getNode(
- ISD::SRL, dl, VT, NPQ,
- DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
+ if (UseNPQ) {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
Created.push_back(NPQ.getNode());
- NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (VT.isVector())
+ NPQ = GetMULHU(NPQ, NPQFactor);
+ else
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
+
Created.push_back(NPQ.getNode());
- return DAG.getNode(
- ISD::SRL, dl, VT, NPQ,
- DAG.getConstant(magics.s - 1, dl,
- getShiftAmountTy(NPQ.getValueType(), DL)));
+
+ Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ Created.push_back(Q.getNode());
}
+
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
+ Created.push_back(Q.getNode());
+
+ SDValue One = DAG.getConstant(1, dl, VT);
+ SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
+ return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
bool TargetLowering::
@@ -3750,8 +4154,17 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
return false;
- Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
- Merge(Lo, Hi));
+ SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+ EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
+ isOperationLegalOrCustom(ISD::ADDE, VT));
+ if (UseGlue)
+ Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
+ Merge(Lo, Hi));
+ else
+ Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
+ Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
SDValue Carry = Next.getValue(1);
Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
@@ -3760,9 +4173,13 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
return false;
- SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
- Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
- Carry);
+ if (UseGlue)
+ Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
+ Carry);
+ else
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
+ Zero, Carry);
+
Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
if (Opcode == ISD::SMUL_LOHI) {
@@ -3797,66 +4214,525 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
return Ok;
}
+bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+ return false;
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ SDValue X = Node->getOperand(0);
+ SDValue Y = Node->getOperand(1);
+ SDValue Z = Node->getOperand(2);
+
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsFSHL = Node->getOpcode() == ISD::FSHL;
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Z.getValueType();
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+ SDValue Zero = DAG.getConstant(0, DL, ShVT);
+
+ SDValue ShAmt;
+ if (isPowerOf2_32(EltSizeInBits)) {
+ SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
+ } else {
+ ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
+ }
+
+ SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
+ SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
+ SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
+
+ // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
+ // and that is undefined. We must compare and select to avoid UB.
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
+
+ // For fshl, 0-shift returns the 1st arg (X).
+ // For fshr, 0-shift returns the 2nd arg (Y).
+ SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
+ Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
+ return true;
+}
+
+// TODO: Merge with expandFunnelShift.
+bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsLeft = Node->getOpcode() == ISD::ROTL;
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Op1.getValueType();
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+
+ // If a rotate in the other direction is legal, use it.
+ unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
+ if (isOperationLegal(RevRot, VT)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
+ return true;
+ }
+
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+ return false;
+
+ // Otherwise,
+ // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
+ // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
+ //
+ assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
+ "Expecting the type bitwidth to be a power of 2");
+ unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
+ unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
+ SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
+ SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
+ Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
+ DAG.getNode(HsOpc, DL, VT, Op0, And1));
+ return true;
+}
+
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
- EVT VT = Node->getOperand(0).getValueType();
- EVT NVT = Node->getValueType(0);
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
SDLoc dl(SDValue(Node, 0));
// FIXME: Only f32 to i64 conversions are supported.
- if (VT != MVT::f32 || NVT != MVT::i64)
+ if (SrcVT != MVT::f32 || DstVT != MVT::i64)
return false;
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
// https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
- EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits());
+ unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
+ EVT IntVT = SrcVT.changeTypeToInteger();
+ EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
+
SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
SDValue Bias = DAG.getConstant(127, dl, IntVT);
- SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl,
- IntVT);
- SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
+ SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
+ SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
- auto &DL = DAG.getDataLayout();
SDValue ExponentBits = DAG.getNode(
ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
- DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
- SDValue Sign = DAG.getNode(
- ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
- DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
- Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
+ Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
- DAG.getConstant(0x00800000, dl, IntVT));
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+ DAG.getConstant(0x00800000, dl, IntVT));
- R = DAG.getZExtOrTrunc(R, dl, NVT);
+ R = DAG.getZExtOrTrunc(R, dl, DstVT);
R = DAG.getSelectCC(
dl, Exponent, ExponentLoBit,
- DAG.getNode(ISD::SHL, dl, NVT, R,
+ DAG.getNode(ISD::SHL, dl, DstVT, R,
DAG.getZExtOrTrunc(
DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
- dl, getShiftAmountTy(IntVT, DL))),
- DAG.getNode(ISD::SRL, dl, NVT, R,
+ dl, IntShVT)),
+ DAG.getNode(ISD::SRL, dl, DstVT, R,
DAG.getZExtOrTrunc(
DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
- dl, getShiftAmountTy(IntVT, DL))),
+ dl, IntShVT)),
ISD::SETGT);
- SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
- DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
- Sign);
+ SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
+ DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
- DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
+ DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
+ return true;
+}
+
+bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(SDValue(Node, 0));
+ SDValue Src = Node->getOperand(0);
+
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
+ return false;
+
+ // If the maximum float value is smaller then the signed integer range,
+ // the destination signmask can't be represented by the float, so we can
+ // just use FP_TO_SINT directly.
+ const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
+ APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
+ APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
+ if (APFloat::opOverflow &
+ APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
+ Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ return true;
+ }
+
+ SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
+ SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+
+ bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+ if (Strict) {
+ // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
+ // signmask then offset (the result of which should be fully representable).
+ // Sel = Src < 0x8000000000000000
+ // Val = select Sel, Src, Src - 0x8000000000000000
+ // Ofs = select Sel, 0, 0x8000000000000000
+ // Result = fp_to_sint(Val) ^ Ofs
+
+ // TODO: Should any fast-math-flags be set for the FSUB?
+ SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
+ DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+ SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
+ DAG.getConstant(SignMask, dl, DstVT));
+ Result = DAG.getNode(ISD::XOR, dl, DstVT,
+ DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
+ } else {
+ // Expand based on maximum range of FP_TO_SINT:
+ // True = fp_to_sint(Src)
+ // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
+ // Result = select (Src < 0x8000000000000000), True, False
+
+ SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ // TODO: Should any fast-math-flags be set for the FSUB?
+ SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
+ DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+ False = DAG.getNode(ISD::XOR, dl, DstVT, False,
+ DAG.getConstant(SignMask, dl, DstVT));
+ Result = DAG.getSelect(dl, DstVT, Sel, True, False);
+ }
+ return true;
+}
+
+bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+
+ if (SrcVT.getScalarType() != MVT::i64)
+ return false;
+
+ SDLoc dl(SDValue(Node, 0));
+ EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
+
+ if (DstVT.getScalarType() == MVT::f32) {
+ // Only expand vector types if we have the appropriate vector bit
+ // operations.
+ if (SrcVT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+ !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+ !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
+ return false;
+
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+
+ SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ // pseudo-op, or, even better, for whole-function isel.
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+ SDValue SignBitTest = DAG.getSetCC(
+ dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+ Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
+ return true;
+ }
+
+ if (DstVT.getScalarType() == MVT::f64) {
+ // Only expand vector types if we have the appropriate vector bit
+ // operations.
+ if (SrcVT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+ !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+ !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
+ return false;
+
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
+ SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
+ BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
+ SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
+ SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
+ SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
+
+ SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
+ SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
+ SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ return true;
+ }
+
+ return false;
+}
+
+SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
+ ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ EVT VT = Node->getValueType(0);
+ if (isOperationLegalOrCustom(NewOp, VT)) {
+ SDValue Quiet0 = Node->getOperand(0);
+ SDValue Quiet1 = Node->getOperand(1);
+
+ if (!Node->getFlags().hasNoNaNs()) {
+ // Insert canonicalizes if it's possible we need to quiet to get correct
+ // sNaN behavior.
+ if (!DAG.isKnownNeverSNaN(Quiet0)) {
+ Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
+ Node->getFlags());
+ }
+ if (!DAG.isKnownNeverSNaN(Quiet1)) {
+ Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
+ Node->getFlags());
+ }
+ }
+
+ return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
+ }
+
+ return SDValue();
+}
+
+bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ unsigned Len = VT.getScalarSizeInBits();
+ assert(VT.isInteger() && "CTPOP not implemented for this type.");
+
+ // TODO: Add support for irregular type lengths.
+ if (!(Len <= 128 && Len % 8 == 0))
+ return false;
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+ return false;
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ SDValue Mask55 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+ SDValue Mask33 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
+ SDValue Mask0F =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, dl, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, dl, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, dl, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ if (Len > 8)
+ Op =
+ DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, dl, ShVT));
+
+ Result = Op;
+ return true;
+}
+
+bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+ isOperationLegalOrCustom(ISD::CTLZ, VT)) {
+ Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
+ return true;
+ }
+
+ // If the ZERO_UNDEF version is supported use that and handle the zero case.
+ if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
+ return true;
+ }
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+ return false;
+
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // Ref: "Hacker's Delight" by Henry Warren
+ for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
+ SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ return true;
+}
+
+bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Op = Node->getOperand(0);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
+ isOperationLegalOrCustom(ISD::CTTZ, VT)) {
+ Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
+ return true;
+ }
+
+ // If the ZERO_UNDEF version is supported use that and handle the zero case.
+ if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
+ return true;
+ }
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ return false;
+
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // Ref: "Hacker's Delight" by Henry Warren
+ SDValue Tmp = DAG.getNode(
+ ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
+
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
+ Result =
+ DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
+ return true;
+ }
+
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
+ return true;
+}
+
+bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = N->getOperand(0);
+
+ // Only expand vector types if we have the appropriate vector operations.
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
+ !isOperationLegalOrCustom(ISD::ADD, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ return false;
+
+ SDValue Shift =
+ DAG.getNode(ISD::SRA, dl, VT, Op,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
+ Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
return true;
}
@@ -3876,8 +4752,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
unsigned Stride = SrcEltVT.getSizeInBits() / 8;
assert(SrcEltVT.isByteSized());
- EVT PtrVT = BasePTR.getValueType();
-
SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
@@ -3888,8 +4762,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
- BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR,
- DAG.getConstant(Stride, SL, PtrVT));
+ BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -3989,7 +4862,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
- if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {
+ if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
+ LoadedVT.isVector()) {
// Scalarize the load and let the individual components be handled.
SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
@@ -4139,13 +5013,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
auto &MF = DAG.getMachineFunction();
+ EVT MemVT = ST->getMemoryVT();
SDLoc dl(ST);
- if (ST->getMemoryVT().isFloatingPoint() ||
- ST->getMemoryVT().isVector()) {
+ if (MemVT.isFloatingPoint() || MemVT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
if (isTypeLegal(intVT)) {
- if (!isOperationLegalOrCustom(ISD::STORE, intVT)) {
+ if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
+ MemVT.isVector()) {
// Scalarize the store and let the individual components be handled.
SDValue Result = scalarizeVectorStore(ST, DAG);
@@ -4399,3 +5274,134 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
}
return SDValue();
}
+
+SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ SDLoc dl(Node);
+
+ // usub.sat(a, b) -> umax(a, b) - b
+ if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
+ SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
+ return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
+ }
+
+ if (VT.isVector()) {
+ // TODO: Consider not scalarizing here.
+ return SDValue();
+ }
+
+ unsigned OverflowOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ OverflowOp = ISD::SADDO;
+ break;
+ case ISD::UADDSAT:
+ OverflowOp = ISD::UADDO;
+ break;
+ case ISD::SSUBSAT:
+ OverflowOp = ISD::SSUBO;
+ break;
+ case ISD::USUBSAT:
+ OverflowOp = ISD::USUBO;
+ break;
+ default:
+ llvm_unreachable("Expected method to receive signed or unsigned saturation "
+ "addition or subtraction node.");
+ }
+
+ assert(LHS.getValueType().isScalarInteger() &&
+ "Expected operands to be integers. Vector of int arguments should "
+ "already be unrolled.");
+ assert(RHS.getValueType().isScalarInteger() &&
+ "Expected operands to be integers. Vector of int arguments should "
+ "already be unrolled.");
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Expected both operands to be the same type");
+
+ unsigned BitWidth = LHS.getValueSizeInBits();
+ EVT ResultType = LHS.getValueType();
+ EVT BoolVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType);
+ SDValue Result =
+ DAG.getNode(OverflowOp, dl, DAG.getVTList(ResultType, BoolVT), LHS, RHS);
+ SDValue SumDiff = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue Zero = DAG.getConstant(0, dl, ResultType);
+
+ if (Opcode == ISD::UADDSAT) {
+ // Just need to check overflow for SatMax.
+ APInt MaxVal = APInt::getMaxValue(BitWidth);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
+ return DAG.getSelect(dl, ResultType, Overflow, SatMax, SumDiff);
+ } else if (Opcode == ISD::USUBSAT) {
+ // Just need to check overflow for SatMin.
+ APInt MinVal = APInt::getMinValue(BitWidth);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
+ return DAG.getSelect(dl, ResultType, Overflow, SatMin, SumDiff);
+ } else {
+ // SatMax -> Overflow && SumDiff < 0
+ // SatMin -> Overflow && SumDiff >= 0
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
+ SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, ResultType, SumNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff);
+ }
+}
+
+SDValue
+TargetLowering::getExpandedFixedPointMultiplication(SDNode *Node,
+ SelectionDAG &DAG) const {
+ assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX.");
+ assert(Node->getNumOperands() == 3 &&
+ "Expected signed fixed point multiplication to have 3 operands.");
+
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ assert(LHS.getValueType().isScalarInteger() &&
+ "Expected operands to be integers. Vector of int arguments should "
+ "already be unrolled.");
+ assert(RHS.getValueType().isScalarInteger() &&
+ "Expected operands to be integers. Vector of int arguments should "
+ "already be unrolled.");
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Expected both operands to be the same type");
+
+ unsigned Scale = Node->getConstantOperandVal(2);
+ EVT VT = LHS.getValueType();
+ assert(Scale < VT.getScalarSizeInBits() &&
+ "Expected scale to be less than the number of bits.");
+
+ if (!Scale)
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+
+ // Get the upper and lower bits of the result.
+ SDValue Lo, Hi;
+ if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ Lo = Result.getValue(0);
+ Hi = Result.getValue(1);
+ } else if (isOperationLegalOrCustom(ISD::MULHS, VT)) {
+ Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS);
+ } else {
+ report_fatal_error("Unable to expand signed fixed point multiplication.");
+ }
+
+ // The result will need to be shifted right by the scale since both operands
+ // are scaled. The result is given to us in 2 halves, so we only want part of
+ // both in the result.
+ EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy));
+ Hi = DAG.getNode(
+ ISD::SHL, dl, VT, Hi,
+ DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy));
+ return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+}
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index ed74b3e4fa19..fccbb8ec91cb 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -95,7 +95,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
}
// Sort the Idx2MBBMap
- llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+ llvm::sort(idx2MBBMap, Idx2MBBCompare());
LLVM_DEBUG(mf->print(dbgs(), this));
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 8fbe724045e6..bcc8f8cf18bc 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SlotIndexes.h"
@@ -76,6 +77,18 @@ public:
/// Returns the last insert point as an iterator for \pCurLI in \pMBB.
MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI,
MachineBasicBlock &MBB);
+
+ /// Return the base index of the first insert point in \pMBB.
+ SlotIndex getFirstInsertPoint(MachineBasicBlock &MBB) {
+ SlotIndex Res = LIS.getMBBStartIdx(&MBB);
+ if (!MBB.empty()) {
+ MachineBasicBlock::iterator MII = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+ if (MII != MBB.end())
+ Res = LIS.getInstructionIndex(*MII);
+ }
+ return Res;
+ }
+
};
/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
@@ -225,6 +238,10 @@ public:
MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {
return IPA.getLastInsertPointIter(*CurLI, *BB);
}
+
+ SlotIndex getFirstSplitPoint(unsigned Num) {
+ return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num));
+ }
};
/// SplitEditor - Edit machine code and LiveIntervals for live range
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 81a41970f9e2..eb8552915e2a 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -1022,9 +1022,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
// We adjust AliasAnalysis information for merged stack slots.
- MachineSDNode::mmo_iterator NewMemOps =
- MF->allocateMemRefsArray(I.getNumMemOperands());
- unsigned MemOpIdx = 0;
+ SmallVector<MachineMemOperand *, 2> NewMMOs;
bool ReplaceMemOps = false;
for (MachineMemOperand *MMO : I.memoperands()) {
// If this memory location can be a slot remapped here,
@@ -1051,17 +1049,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
}
if (MayHaveConflictingAAMD) {
- NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes());
+ NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
ReplaceMemOps = true;
+ } else {
+ NewMMOs.push_back(MMO);
}
- else
- NewMemOps[MemOpIdx++] = MMO;
}
// If any memory operand is updated, set memory references of
// this instruction.
if (ReplaceMemOps)
- I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));
+ I.setMemRefs(*MF, NewMMOs);
}
// Update the location of C++ catch objects for the MSVC personality routine.
@@ -1233,7 +1231,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
});
for (auto &s : LiveStarts)
- llvm::sort(s.begin(), s.end());
+ llvm::sort(s);
bool Changed = true;
while (Changed) {
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 19a191c01db9..0676fa2421e8 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -268,11 +268,10 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
// in the list. Merge entries that refer to the same dwarf register and use
// the maximum size that needs to be spilled.
- llvm::sort(LiveOuts.begin(), LiveOuts.end(),
- [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
- // Only sort by the dwarf register number.
- return LHS.DwarfRegNum < RHS.DwarfRegNum;
- });
+ llvm::sort(LiveOuts, [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
+ // Only sort by the dwarf register number.
+ return LHS.DwarfRegNum < RHS.DwarfRegNum;
+ });
for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {
for (auto II = std::next(I); II != E; ++II) {
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index cb12c7ce6e82..3b578c7391da 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -157,14 +157,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
-static bool isLifetimeInst(const Instruction *I) {
- if (const auto Intrinsic = dyn_cast<IntrinsicInst>(I)) {
- const auto Id = Intrinsic->getIntrinsicID();
- return Id == Intrinsic::lifetime_start || Id == Intrinsic::lifetime_end;
- }
- return false;
-}
-
bool StackProtector::HasAddressTaken(const Instruction *AI) {
for (const User *U : AI->users()) {
if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
@@ -175,7 +167,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
return true;
} else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
// Ignore intrinsics that are not calls. TODO: Use isLoweredToCall().
- if (!isa<DbgInfoIntrinsic>(CI) && !isLifetimeInst(CI))
+ if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
return true;
} else if (isa<InvokeInst>(U)) {
return true;
@@ -199,6 +191,18 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
return false;
}
+/// Search for the first call to the llvm.stackprotector intrinsic and return it
+/// if present.
+static const CallInst *findStackProtectorIntrinsic(Function &F) {
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->getCalledFunction() ==
+ Intrinsic::getDeclaration(F.getParent(), Intrinsic::stackprotector))
+ return CI;
+ return nullptr;
+}
+
/// Check whether or not this function needs a stack protector based
/// upon the stack protector level.
///
@@ -215,13 +219,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
bool StackProtector::RequiresStackProtector() {
bool Strong = false;
bool NeedsProtector = false;
- for (const BasicBlock &BB : *F)
- for (const Instruction &I : BB)
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (CI->getCalledFunction() ==
- Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::stackprotector))
- HasPrologue = true;
+ HasPrologue = findStackProtectorIntrinsic(*F);
if (F->hasFnAttribute(Attribute::SafeStack))
return false;
@@ -379,7 +377,8 @@ bool StackProtector::InsertStackProtectors() {
// protection in SDAG.
bool SupportsSelectionDAGSP =
TLI->useStackGuardXorFP() ||
- (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
+ (EnableSelectionDAGSP && !TM->Options.EnableFastISel &&
+ !TM->Options.EnableGlobalISel);
AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
@@ -399,6 +398,14 @@ bool StackProtector::InsertStackProtectors() {
if (SupportsSelectionDAGSP)
break;
+ // Find the stack guard slot if the prologue was not created by this pass
+ // itself via a previous call to CreatePrologue().
+ if (!AI) {
+ const CallInst *SPCall = findStackProtectorIntrinsic(*F);
+ assert(SPCall && "Call to llvm.stackprotector is missing");
+ AI = cast<AllocaInst>(SPCall->getArgOperand(1));
+ }
+
// Set HasIRCheck to true, so that SelectionDAG will not generate its own
// version. SelectionDAG called 'shouldEmitSDCheck' to check whether
// instrumentation has already been generated.
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index eb15b15a24a6..d8c6a249e4da 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -214,7 +214,7 @@ void StackSlotColoring::InitializeSlots() {
Intervals.reserve(LS->getNumIntervals());
for (auto &I : *LS)
Intervals.push_back(&I);
- llvm::sort(Intervals.begin(), Intervals.end(),
+ llvm::sort(Intervals,
[](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
// Gather all spill slots into a list.
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f0cfa2fbe4fd..cf78fb5a1f12 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -30,12 +30,6 @@ using namespace llvm;
TargetFrameLowering::~TargetFrameLowering() = default;
-/// The default implementation just looks at attribute "no-frame-pointer-elim".
-bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
- auto Attr = MF.getFunction().getFnAttribute("no-frame-pointer-elim");
- return Attr.getValueAsString() == "true";
-}
-
bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 963f8178b509..2a17af391105 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -339,42 +339,32 @@ bool TargetInstrInfo::PredicateInstruction(
return MadeChange;
}
-bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
+bool TargetInstrInfo::hasLoadFromStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
+ size_t StartSize = Accesses.size();
for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
oe = MI.memoperands_end();
o != oe; ++o) {
- if ((*o)->isLoad()) {
- if (const FixedStackPseudoSourceValue *Value =
- dyn_cast_or_null<FixedStackPseudoSourceValue>(
- (*o)->getPseudoValue())) {
- FrameIndex = Value->getFrameIndex();
- MMO = *o;
- return true;
- }
- }
+ if ((*o)->isLoad() &&
+ dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ Accesses.push_back(*o);
}
- return false;
+ return Accesses.size() != StartSize;
}
-bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
+bool TargetInstrInfo::hasStoreToStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
+ size_t StartSize = Accesses.size();
for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
oe = MI.memoperands_end();
o != oe; ++o) {
- if ((*o)->isStore()) {
- if (const FixedStackPseudoSourceValue *Value =
- dyn_cast_or_null<FixedStackPseudoSourceValue>(
- (*o)->getPseudoValue())) {
- FrameIndex = Value->getFrameIndex();
- MMO = *o;
- return true;
- }
- }
+ if ((*o)->isStore() &&
+ dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ Accesses.push_back(*o);
}
- return false;
+ return Accesses.size() != StartSize;
}
bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
@@ -388,8 +378,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
return true;
}
unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
- // Convert bit size to byte size to be consistent with
- // MCRegisterClass::getSize().
+ // Convert bit size to byte size.
if (BitSize % 8)
return false;
@@ -584,7 +573,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
}
if (NewMI) {
- NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ NewMI->setMemRefs(MF, MI.memoperands());
// Add a memory operand, foldMemoryOperandImpl doesn't do that.
assert((!(Flags & MachineMemOperand::MOStore) ||
NewMI->mayStore()) &&
@@ -654,10 +643,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
// Copy the memoperands from the load to the folded instruction.
if (MI.memoperands_empty()) {
- NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end());
+ NewMI->setMemRefs(MF, LoadMI.memoperands());
} else {
// Handle the rare case of folding multiple loads.
- NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ NewMI->setMemRefs(MF, MI.memoperands());
for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
E = LoadMI.memoperands_end();
I != E; ++I) {
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 7b1b76821daa..e86190375642 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -161,7 +161,8 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee");
}
- if (TT.isGNUEnvironment() || TT.isOSFuchsia()) {
+ if (TT.isGNUEnvironment() || TT.isOSFuchsia() ||
+ (TT.isAndroid() && !TT.isAndroidVersionLT(9))) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
setLibcallName(RTLIB::SINCOS_F80, "sincosl");
@@ -599,14 +600,23 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Expand);
setOperationAction(ISD::FMAXNUM, VT, Expand);
- setOperationAction(ISD::FMINNAN, VT, Expand);
- setOperationAction(ISD::FMAXNAN, VT, Expand);
+ setOperationAction(ISD::FMINNUM_IEEE, VT, Expand);
+ setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand);
+ setOperationAction(ISD::FMINIMUM, VT, Expand);
+ setOperationAction(ISD::FMAXIMUM, VT, Expand);
setOperationAction(ISD::FMAD, VT, Expand);
setOperationAction(ISD::SMIN, VT, Expand);
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
setOperationAction(ISD::ABS, VT, Expand);
+ setOperationAction(ISD::FSHL, VT, Expand);
+ setOperationAction(ISD::FSHR, VT, Expand);
+ setOperationAction(ISD::SADDSAT, VT, Expand);
+ setOperationAction(ISD::UADDSAT, VT, Expand);
+ setOperationAction(ISD::SSUBSAT, VT, Expand);
+ setOperationAction(ISD::USUBSAT, VT, Expand);
+ setOperationAction(ISD::SMULFIX, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -666,6 +676,7 @@ void TargetLoweringBase::initActions() {
// These library functions default to expand.
for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+ setOperationAction(ISD::FCBRT, VT, Expand);
setOperationAction(ISD::FLOG , VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
@@ -968,7 +979,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
MIB.add(MI->getOperand(i));
// Inherit previous memory operands.
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.cloneMemRefs(*MI);
assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
// Add a new memory operand for this FI.
@@ -1096,7 +1107,7 @@ void TargetLoweringBase::computeRegisterProperties(
LegalIntReg = IntReg;
} else {
RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
- (const MVT::SimpleValueType)LegalIntReg;
+ (MVT::SimpleValueType)LegalIntReg;
ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
}
}
@@ -1443,6 +1454,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case CatchPad: return 0;
case CatchSwitch: return 0;
case CleanupPad: return 0;
+ case FNeg: return ISD::FNEG;
case Add: return ISD::ADD;
case FAdd: return ISD::FADD;
case Sub: return ISD::SUB;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index f6b91a2f0231..cb2fe691d702 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -95,6 +95,161 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
const TargetMachine &TgtM) {
TargetLoweringObjectFile::Initialize(Ctx, TgtM);
TM = &TgtM;
+
+ CodeModel::Model CM = TgtM.getCodeModel();
+
+ switch (TgtM.getTargetTriple().getArch()) {
+ case Triple::arm:
+ case Triple::armeb:
+ case Triple::thumb:
+ case Triple::thumbeb:
+ if (Ctx.getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
+ break;
+ // Fallthrough if not using EHABI
+ LLVM_FALLTHROUGH;
+ case Triple::ppc:
+ case Triple::x86:
+ PersonalityEncoding = isPositionIndependent()
+ ? dwarf::DW_EH_PE_indirect |
+ dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = isPositionIndependent()
+ ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = isPositionIndependent()
+ ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ break;
+ case Triple::x86_64:
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CM == CodeModel::Small || CM == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel |
+ (CM == CodeModel::Small
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CM == CodeModel::Small || CM == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
+ } else {
+ PersonalityEncoding =
+ (CM == CodeModel::Small || CM == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = (CM == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = (CM == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::hexagon:
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ if (isPositionIndependent()) {
+ PersonalityEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
+ LSDAEncoding |= dwarf::DW_EH_PE_pcrel;
+ TTypeEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
+ }
+ break;
+ case Triple::aarch64:
+ case Triple::aarch64_be:
+ // The small model guarantees static code/data size < 4GB, but not where it
+ // will be in memory. Most of these could end up >2GB away so even a signed
+ // pc-relative 32-bit address is insufficient, theoretically.
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::lanai:
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ break;
+ case Triple::mips:
+ case Triple::mipsel:
+ case Triple::mips64:
+ case Triple::mips64el:
+ // MIPS uses indirect pointer to refer personality functions and types, so
+ // that the eh_frame section can be read-only. DW.ref.personality will be
+ // generated for relocation.
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect;
+ // FIXME: The N64 ABI probably ought to use DW_EH_PE_sdata8 but we can't
+ // identify N64 from just a triple.
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ // We don't support PC-relative LSDA references in GAS so we use the default
+ // DW_EH_PE_absptr for those.
+
+ // FreeBSD must be explicit about the data size and using pcrel since it's
+ // assembler/linker won't do the automatic conversion that the Linux tools
+ // do.
+ if (TgtM.getTargetTriple().isOSFreeBSD()) {
+ PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ }
+ break;
+ case Triple::ppc64:
+ case Triple::ppc64le:
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_udata8;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_udata8;
+ break;
+ case Triple::sparcel:
+ case Triple::sparc:
+ if (isPositionIndependent()) {
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::sparcv9:
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::systemz:
+ // All currently-defined code models guarantee that 4-byte PC-relative
+ // values will be in range.
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ default:
+ break;
+ }
}
void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
@@ -351,6 +506,30 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr;
}
+static unsigned getEntrySizeForKind(SectionKind Kind) {
+ if (Kind.isMergeable1ByteCString())
+ return 1;
+ else if (Kind.isMergeable2ByteCString())
+ return 2;
+ else if (Kind.isMergeable4ByteCString())
+ return 4;
+ else if (Kind.isMergeableConst4())
+ return 4;
+ else if (Kind.isMergeableConst8())
+ return 8;
+ else if (Kind.isMergeableConst16())
+ return 16;
+ else if (Kind.isMergeableConst32())
+ return 32;
+ else {
+ // We shouldn't have mergeable C strings or mergeable constants that we
+ // didn't handle above.
+ assert(!Kind.isMergeableCString() && "unknown string width");
+ assert(!Kind.isMergeableConst() && "unknown data width");
+ return 0;
+ }
+}
+
MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
StringRef SectionName = GO->getSection();
@@ -395,7 +574,7 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
MCSectionELF *Section = getContext().getELFSection(
SectionName, getELFSectionType(SectionName, Kind), Flags,
- /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol);
+ getEntrySizeForKind(Kind), Group, UniqueID, AssociatedSymbol);
// Make sure that we did not get some other section with incompatible sh_link.
// This should not be possible due to UniqueID code above.
assert(Section->getAssociatedSymbol() == AssociatedSymbol &&
@@ -422,30 +601,6 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro";
}
-static unsigned getEntrySizeForKind(SectionKind Kind) {
- if (Kind.isMergeable1ByteCString())
- return 1;
- else if (Kind.isMergeable2ByteCString())
- return 2;
- else if (Kind.isMergeable4ByteCString())
- return 4;
- else if (Kind.isMergeableConst4())
- return 4;
- else if (Kind.isMergeableConst8())
- return 8;
- else if (Kind.isMergeableConst16())
- return 16;
- else if (Kind.isMergeableConst32())
- return 32;
- else {
- // We shouldn't have mergeable C strings or mergeable constants that we
- // didn't handle above.
- assert(!Kind.isMergeableCString() && "unknown string width");
- assert(!Kind.isMergeableConst() && "unknown data width");
- return 0;
- }
-}
-
static MCSectionELF *selectELFSectionForGlobal(
MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
@@ -640,6 +795,14 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
}
+MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const {
+ // Use ".GCC.command.line" since this feature is to support clang's
+ // -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the
+ // same name.
+ return getContext().getELFSection(".GCC.command.line", ELF::SHT_PROGBITS,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+}
+
void
TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
UseInitArray = UseInitArray_;
@@ -684,6 +847,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
MachO::S_MOD_TERM_FUNC_POINTERS,
SectionKind::getData());
}
+
+ PersonalityEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel;
+ TTypeEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
}
void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
@@ -939,6 +1108,22 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
// .indirect_symbol _extfoo
// .long 0
//
+ // The indirect symbol table (and sections of non_lazy_symbol_pointers type)
+ // may point to both local (same translation unit) and global (other
+ // translation units) symbols. Example:
+ //
+ // .section __DATA,__pointers,non_lazy_symbol_pointers
+ // L1:
+ // .indirect_symbol _myGlobal
+ // .long 0
+ // L2:
+ // .indirect_symbol _myLocal
+ // .long _myLocal
+ //
+ // If the symbol is local, instead of the symbol's index, the assembler
+ // places the constant INDIRECT_SYMBOL_LOCAL into the indirect symbol table.
+ // Then the linker will notice the constant in the table and will look at the
+ // content of the symbol.
MachineModuleInfoMachO &MachOMMI =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
MCContext &Ctx = getContext();
@@ -958,9 +1143,12 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
- if (!StubSym.getPointer())
- StubSym = MachineModuleInfoImpl::
- StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */);
+ if (!StubSym.getPointer()) {
+ bool IsIndirectLocal = Sym->isDefined() && !Sym->isExternal();
+ // With the assumption that IsIndirectLocal == GV->hasLocalLinkage().
+ StubSym = MachineModuleInfoImpl::StubValueTy(const_cast<MCSymbol *>(Sym),
+ !IsIndirectLocal);
+ }
const MCExpr *BSymExpr =
MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
@@ -1156,10 +1344,11 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
MCSymbol *Sym = TM.getSymbol(ComdatGV);
StringRef COMDATSymName = Sym->getName();
- // Append "$symbol" to the section name when targetting mingw. The ld.bfd
+ // Append "$symbol" to the section name *before* IR-level mangling is
+ // applied when targetting mingw. This is what GCC does, and the ld.bfd
// COFF linker will not properly handle comdats otherwise.
if (getTargetTriple().isWindowsGNUEnvironment())
- raw_svector_ostream(Name) << '$' << COMDATSymName;
+ raw_svector_ostream(Name) << '$' << ComdatGV->getName();
return getContext().getCOFFSection(Name, Characteristics, Kind,
COMDATSymName, Selection, UniqueID);
@@ -1295,8 +1484,25 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
unsigned Priority,
const MCSymbol *KeySym,
MCSectionCOFF *Default) {
- if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment())
- return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
+ if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+ // If the priority is the default, use .CRT$XCU, possibly associative.
+ if (Priority == 65535)
+ return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
+
+ // Otherwise, we need to compute a new section name. Low priorities should
+ // run earlier. The linker will sort sections ASCII-betically, and we need a
+ // string that sorts between .CRT$XCA and .CRT$XCU. In the general case, we
+ // make a name like ".CRT$XCT12345", since that runs before .CRT$XCU. Really
+ // low priorities need to sort before 'L', since the CRT uses that
+ // internally, so we use ".CRT$XCA00001" for them.
+ SmallString<24> Name;
+ raw_svector_ostream OS(Name);
+ OS << ".CRT$XC" << (Priority < 200 ? 'A' : 'T') << format("%05u", Priority);
+ MCSectionCOFF *Sec = Ctx.getCOFFSection(
+ Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ return Ctx.getAssociativeCOFFSection(Sec, KeySym, 0);
+ }
std::string Name = IsCtor ? ".ctors" : ".dtors";
if (Priority != 65535)
@@ -1570,6 +1776,10 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
void TargetLoweringObjectFileWasm::InitializeWasm() {
StaticCtorSection =
getContext().getWasmSection(".init_array", SectionKind::getData());
+
+ // We don't use PersonalityEncoding and LSDAEncoding because we don't emit
+ // .cfi directives. We use TTypeEncoding to encode typeinfo global variables.
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection(
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 853e71d0efa5..3c133fb8594e 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -23,15 +23,34 @@ using namespace llvm;
/// DisableFramePointerElim - This returns true if frame pointer elimination
/// optimization should be disabled for the given machine function.
bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
- // Check to see if we should eliminate all frame pointers.
- if (MF.getSubtarget().getFrameLowering()->noFramePointerElim(MF))
+ // Check to see if the target want to forcably keep frame pointer.
+ if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF))
return true;
- // Check to see if we should eliminate non-leaf frame pointers.
- if (MF.getFunction().hasFnAttribute("no-frame-pointer-elim-non-leaf"))
- return MF.getFrameInfo().hasCalls();
+ const Function &F = MF.getFunction();
+
+ // TODO: Remove support for old `fp elim` function attributes after fully
+ // migrate to use "frame-pointer"
+ if (!F.hasFnAttribute("frame-pointer")) {
+ // Check to see if we should eliminate all frame pointers.
+ if (F.getFnAttribute("no-frame-pointer-elim").getValueAsString() == "true")
+ return true;
+
+ // Check to see if we should eliminate non-leaf frame pointers.
+ if (F.hasFnAttribute("no-frame-pointer-elim-non-leaf"))
+ return MF.getFrameInfo().hasCalls();
- return false;
+ return false;
+ }
+
+ StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString();
+ if (FP == "all")
+ return true;
+ if (FP == "non-leaf")
+ return MF.getFrameInfo().hasCalls();
+ if (FP == "none")
+ return false;
+ llvm_unreachable("unknown frame pointer flag");
}
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 2db03288f2ac..28126fcf766d 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -107,10 +108,10 @@ static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
cl::desc("Print LLVM IR input to isel pass"));
static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
cl::desc("Dump garbage collector data"));
-static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
- cl::desc("Verify generated machine code"),
- cl::init(false),
- cl::ZeroOrMore);
+static cl::opt<cl::boolOrDefault>
+ VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::ZeroOrMore);
enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault };
// Enable or disable the MachineOutliner.
static cl::opt<RunOutliner> EnableMachineOutliner(
@@ -136,13 +137,15 @@ static cl::opt<std::string> PrintMachineInstrs(
"print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"),
cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden);
-static cl::opt<int> EnableGlobalISelAbort(
+static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
"global-isel-abort", cl::Hidden,
cl::desc("Enable abort calls when \"global\" instruction selection "
- "fails to lower/select an instruction: 0 disable the abort, "
- "1 enable the abort, and "
- "2 disable the abort but emit a diagnostic on failure"),
- cl::init(1));
+ "fails to lower/select an instruction"),
+ cl::values(
+ clEnumValN(GlobalISelAbortMode::Disable, "0", "Disable the abort"),
+ clEnumValN(GlobalISelAbortMode::Enable, "1", "Enable the abort"),
+ clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2",
+ "Disable the abort but emit a diagnostic on failure")));
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
@@ -342,11 +345,39 @@ static AnalysisID getPassIDFromName(StringRef PassName) {
return PI ? PI->getTypeInfo() : nullptr;
}
+static std::pair<StringRef, unsigned>
+getPassNameAndInstanceNum(StringRef PassName) {
+ StringRef Name, InstanceNumStr;
+ std::tie(Name, InstanceNumStr) = PassName.split(',');
+
+ unsigned InstanceNum = 0;
+ if (!InstanceNumStr.empty() && InstanceNumStr.getAsInteger(10, InstanceNum))
+ report_fatal_error("invalid pass instance specifier " + PassName);
+
+ return std::make_pair(Name, InstanceNum);
+}
+
void TargetPassConfig::setStartStopPasses() {
- StartBefore = getPassIDFromName(StartBeforeOpt);
- StartAfter = getPassIDFromName(StartAfterOpt);
- StopBefore = getPassIDFromName(StopBeforeOpt);
- StopAfter = getPassIDFromName(StopAfterOpt);
+ StringRef StartBeforeName;
+ std::tie(StartBeforeName, StartBeforeInstanceNum) =
+ getPassNameAndInstanceNum(StartBeforeOpt);
+
+ StringRef StartAfterName;
+ std::tie(StartAfterName, StartAfterInstanceNum) =
+ getPassNameAndInstanceNum(StartAfterOpt);
+
+ StringRef StopBeforeName;
+ std::tie(StopBeforeName, StopBeforeInstanceNum)
+ = getPassNameAndInstanceNum(StopBeforeOpt);
+
+ StringRef StopAfterName;
+ std::tie(StopAfterName, StopAfterInstanceNum)
+ = getPassNameAndInstanceNum(StopAfterOpt);
+
+ StartBefore = getPassIDFromName(StartBeforeName);
+ StartAfter = getPassIDFromName(StartAfterName);
+ StopBefore = getPassIDFromName(StopBeforeName);
+ StopAfter = getPassIDFromName(StopAfterName);
if (StartBefore && StartAfter)
report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") +
Twine(StartAfterOptName) + Twine(" specified!"));
@@ -383,6 +414,9 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
if (TM.Options.EnableIPRA)
setRequiresCodeGenSCCOrder();
+ if (EnableGlobalISelAbort.getNumOccurrences())
+ TM.Options.GlobalISelAbort = EnableGlobalISelAbort;
+
setStartStopPasses();
}
@@ -418,8 +452,13 @@ TargetPassConfig::TargetPassConfig()
"triple set?");
}
-bool TargetPassConfig::hasLimitedCodeGenPipeline() const {
- return StartBefore || StartAfter || StopBefore || StopAfter;
+bool TargetPassConfig::willCompleteCodeGenPipeline() {
+ return StopBeforeOpt.empty() && StopAfterOpt.empty();
+}
+
+bool TargetPassConfig::hasLimitedCodeGenPipeline() {
+ return !StartBeforeOpt.empty() || !StartAfterOpt.empty() ||
+ !willCompleteCodeGenPipeline();
}
std::string
@@ -482,9 +521,9 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
// and shouldn't reference it.
AnalysisID PassID = P->getPassID();
- if (StartBefore == PassID)
+ if (StartBefore == PassID && StartBeforeCount++ == StartBeforeInstanceNum)
Started = true;
- if (StopBefore == PassID)
+ if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum)
Stopped = true;
if (Started && !Stopped) {
std::string Banner;
@@ -507,9 +546,11 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
} else {
delete P;
}
- if (StopAfter == PassID)
+
+ if (StopAfter == PassID && StopAfterCount++ == StopAfterInstanceNum)
Stopped = true;
- if (StartAfter == PassID)
+
+ if (StartAfter == PassID && StartAfterCount++ == StartAfterInstanceNum)
Started = true;
if (Stopped && !Started)
report_fatal_error("Cannot stop compilation after pass that is not run");
@@ -552,7 +593,7 @@ void TargetPassConfig::addPrintPass(const std::string &Banner) {
}
void TargetPassConfig::addVerifyPass(const std::string &Banner) {
- bool Verify = VerifyMachineCode;
+ bool Verify = VerifyMachineCode == cl::BOU_TRUE;
#ifdef EXPENSIVE_CHECKS
if (VerifyMachineCode == cl::BOU_UNSET)
Verify = TM->isMachineVerifierClean();
@@ -714,18 +755,34 @@ void TargetPassConfig::addISelPrepare() {
bool TargetPassConfig::addCoreISelPasses() {
// Enable FastISel with -fast-isel, but allow that to be overridden.
TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
- if (EnableFastISelOption == cl::BOU_TRUE ||
- (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel()))
- TM->setFastISel(true);
- // Ask the target for an instruction selector.
- // Explicitly enabling fast-isel should override implicitly enabled
- // global-isel.
- if (EnableGlobalISelOption == cl::BOU_TRUE ||
- (EnableGlobalISelOption == cl::BOU_UNSET &&
- TM->Options.EnableGlobalISel && EnableFastISelOption != cl::BOU_TRUE)) {
+ // Determine an instruction selector.
+ enum class SelectorType { SelectionDAG, FastISel, GlobalISel };
+ SelectorType Selector;
+
+ if (EnableFastISelOption == cl::BOU_TRUE)
+ Selector = SelectorType::FastISel;
+ else if (EnableGlobalISelOption == cl::BOU_TRUE ||
+ (TM->Options.EnableGlobalISel &&
+ EnableGlobalISelOption != cl::BOU_FALSE))
+ Selector = SelectorType::GlobalISel;
+ else if (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())
+ Selector = SelectorType::FastISel;
+ else
+ Selector = SelectorType::SelectionDAG;
+
+ // Set consistently TM->Options.EnableFastISel and EnableGlobalISel.
+ if (Selector == SelectorType::FastISel) {
+ TM->setFastISel(true);
+ TM->setGlobalISel(false);
+ } else if (Selector == SelectorType::GlobalISel) {
TM->setFastISel(false);
+ TM->setGlobalISel(true);
+ }
+ // Add instruction selector passes.
+ if (Selector == SelectorType::GlobalISel) {
+ SaveAndRestore<bool> SavedAddingMachinePasses(AddingMachinePasses, true);
if (addIRTranslator())
return true;
@@ -804,15 +861,17 @@ void TargetPassConfig::addMachinePasses() {
AddingMachinePasses = true;
// Insert a machine instr printer pass after the specified pass.
- if (!StringRef(PrintMachineInstrs.getValue()).equals("") &&
- !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) {
- const PassRegistry *PR = PassRegistry::getPassRegistry();
- const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
- const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
- assert (TPI && IPI && "Pass ID not registered!");
- const char *TID = (const char *)(TPI->getTypeInfo());
- const char *IID = (const char *)(IPI->getTypeInfo());
- insertPass(TID, IID);
+ StringRef PrintMachineInstrsPassName = PrintMachineInstrs.getValue();
+ if (!PrintMachineInstrsPassName.equals("") &&
+ !PrintMachineInstrsPassName.equals("option-unspecified")) {
+ if (const PassInfo *TPI = getPassInfo(PrintMachineInstrsPassName)) {
+ const PassRegistry *PR = PassRegistry::getPassRegistry();
+ const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
+ assert(IPI && "failed to get \"machineinstr-printer\" PassInfo!");
+ const char *TID = (const char *)(TPI->getTypeInfo());
+ const char *IID = (const char *)(IPI->getTypeInfo());
+ insertPass(TID, IID);
+ }
}
// Print the instruction selected machine code...
@@ -981,7 +1040,8 @@ bool TargetPassConfig::getOptimizeRegAlloc() const {
}
/// RegisterRegAlloc's global Registry tracks allocator registration.
-MachinePassRegistry RegisterRegAlloc::Registry;
+MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor>
+ RegisterRegAlloc::Registry;
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
@@ -1155,14 +1215,9 @@ void TargetPassConfig::addBlockPlacement() {
/// GlobalISel Configuration
//===---------------------------------------------------------------------===//
bool TargetPassConfig::isGlobalISelAbortEnabled() const {
- if (EnableGlobalISelAbort.getNumOccurrences() > 0)
- return EnableGlobalISelAbort == 1;
-
- // When no abort behaviour is specified, we don't abort if the target says
- // that GISel is enabled.
- return !TM->Options.EnableGlobalISel;
+ return TM->Options.GlobalISelAbort == GlobalISelAbortMode::Enable;
}
bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
- return EnableGlobalISelAbort == 2;
+ return TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag;
}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0ca435016ead..4b72f6a84ca1 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -592,17 +592,17 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// the two-address register.
// e.g.
// %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
- // %reg1029 = MOV8rr %reg1028
+ // %reg1029 = COPY %reg1028
// %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
- // insert => %reg1030 = MOV8rr %reg1028
+ // insert => %reg1030 = COPY %reg1028
// %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
- // In this case, it might not be possible to coalesce the second MOV8rr
+ // In this case, it might not be possible to coalesce the second COPY
// instruction if the first one is coalesced. So it would be profitable to
// commute it:
// %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
- // %reg1029 = MOV8rr %reg1028
+ // %reg1029 = COPY %reg1028
// %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
- // insert => %reg1030 = MOV8rr %reg1029
+ // insert => %reg1030 = COPY %reg1029
// %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
if (!isPlainlyKilled(MI, regC, LIS))
@@ -929,9 +929,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator Begin = MI;
MachineBasicBlock::iterator AfterMI = std::next(Begin);
MachineBasicBlock::iterator End = AfterMI;
- while (End->isCopy() &&
- regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) {
- Defs.push_back(End->getOperand(0).getReg());
+ while (End != MBB->end()) {
+ End = skipDebugInstructionsForward(End, MBB->end());
+ if (End->isCopy() && regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI))
+ Defs.push_back(End->getOperand(0).getReg());
+ else
+ break;
++End;
}
@@ -1608,23 +1611,28 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
if (AllUsesCopied) {
+ bool ReplacedAllUntiedUses = true;
if (!IsEarlyClobber) {
// Replace other (un-tied) uses of regB with LastCopiedReg.
for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB &&
- MO.isUse()) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.getSubReg() == SubRegB) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ MO.setSubReg(0);
+ } else {
+ ReplacedAllUntiedUses = false;
}
- MO.setReg(LastCopiedReg);
- MO.setSubReg(MO.getSubReg());
}
}
}
// Update live variables for regB.
- if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(*MI)) {
+ if (RemovedKillFlag && ReplacedAllUntiedUses &&
+ LV && LV->getVarInfo(RegB).removeKill(*MI)) {
MachineBasicBlock::iterator PrevMI = MI;
--PrevMI;
LV->addVirtualRegisterKilled(RegB, *PrevMI);
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 0ead2b8340ab..ed7bef667e77 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -525,7 +525,7 @@ void VirtRegRewriter::rewrite() {
// Preserve semantics of sub-register operands.
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
- if (NoSubRegLiveness) {
+ if (NoSubRegLiveness || !MRI->shouldTrackSubRegLiveness(VirtReg)) {
// A virtual register kill refers to the whole register, so we may
// have to add implicit killed operands for the super-register. A
// partial redef always kills and redefines the super-register.
diff --git a/lib/CodeGen/WasmEHPrepare.cpp b/lib/CodeGen/WasmEHPrepare.cpp
index 83d04da5dd0c..e5002eb95346 100644
--- a/lib/CodeGen/WasmEHPrepare.cpp
+++ b/lib/CodeGen/WasmEHPrepare.cpp
@@ -137,6 +137,7 @@ class WasmEHPrepare : public FunctionPass {
Value *LSDAField = nullptr; // lsda field
Value *SelectorField = nullptr; // selector
+ Function *ThrowF = nullptr; // wasm.throw() intrinsic
Function *CatchF = nullptr; // wasm.catch.extract() intrinsic
Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
Function *LSDAF = nullptr; // wasm.lsda() intrinsic
@@ -145,6 +146,9 @@ class WasmEHPrepare : public FunctionPass {
Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper
Function *ClangCallTermF = nullptr; // __clang_call_terminate() function
+ bool prepareEHPads(Function &F);
+ bool prepareThrows(Function &F);
+
void prepareEHPad(BasicBlock *BB, unsigned Index);
void prepareTerminateCleanupPad(BasicBlock *BB);
@@ -177,7 +181,62 @@ bool WasmEHPrepare::doInitialization(Module &M) {
return false;
}
+// Erase the specified BBs if the BB does not have any remaining predecessors,
+// and also all its dead children.
+template <typename Container>
+static void eraseDeadBBsAndChildren(const Container &BBs) {
+ SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end());
+ while (!WL.empty()) {
+ auto *BB = WL.pop_back_val();
+ if (pred_begin(BB) != pred_end(BB))
+ continue;
+ WL.append(succ_begin(BB), succ_end(BB));
+ DeleteDeadBlock(BB);
+ }
+}
+
bool WasmEHPrepare::runOnFunction(Function &F) {
+ bool Changed = false;
+ Changed |= prepareThrows(F);
+ Changed |= prepareEHPads(F);
+ return Changed;
+}
+
+bool WasmEHPrepare::prepareThrows(Function &F) {
+ Module &M = *F.getParent();
+ IRBuilder<> IRB(F.getContext());
+ bool Changed = false;
+
+ // wasm.throw() intinsic, which will be lowered to wasm 'throw' instruction.
+ ThrowF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_throw);
+
+ // Insert an unreachable instruction after a call to @llvm.wasm.throw and
+ // delete all following instructions within the BB, and delete all the dead
+ // children of the BB as well.
+ for (User *U : ThrowF->users()) {
+ // A call to @llvm.wasm.throw() is only generated from
+ // __builtin_wasm_throw() builtin call within libcxxabi, and cannot be an
+ // InvokeInst.
+ auto *ThrowI = cast<CallInst>(U);
+ if (ThrowI->getFunction() != &F)
+ continue;
+ Changed = true;
+ auto *BB = ThrowI->getParent();
+ SmallVector<BasicBlock *, 4> Succs(succ_begin(BB), succ_end(BB));
+ auto &InstList = BB->getInstList();
+ InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end());
+ IRB.SetInsertPoint(BB);
+ IRB.CreateUnreachable();
+ eraseDeadBBsAndChildren(Succs);
+ }
+
+ return Changed;
+}
+
+bool WasmEHPrepare::prepareEHPads(Function &F) {
+ Module &M = *F.getParent();
+ IRBuilder<> IRB(F.getContext());
+
SmallVector<BasicBlock *, 16> CatchPads;
SmallVector<BasicBlock *, 16> CleanupPads;
for (BasicBlock &BB : F) {
@@ -194,9 +253,6 @@ bool WasmEHPrepare::runOnFunction(Function &F) {
return false;
assert(F.hasPersonalityFn() && "Personality function not found");
- Module &M = *F.getParent();
- IRBuilder<> IRB(F.getContext());
-
// __wasm_lpad_context global variable
LPadContextGV = cast<GlobalVariable>(
M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy));
@@ -300,7 +356,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
// This is to create a map of <landingpad EH label, landingpad index> in
// SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
// Pseudocode: wasm.landingpad.index(Index);
- IRB.CreateCall(LPadIndexF, IRB.getInt32(Index));
+ IRB.CreateCall(LPadIndexF, {FPI, IRB.getInt32(Index)});
// Pseudocode: __wasm_lpad_context.lpad_index = index;
IRB.CreateStore(IRB.getInt32(Index), LPadIndexField);
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 65d0a7a774fe..6a15240fa6e0 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -218,7 +218,7 @@ static void calculateStateNumbersForInvokes(const Function *Fn,
// to. If the unwind edge came from an invoke, return null.
static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
Value *ParentPad) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
if (isa<InvokeInst>(TI))
return nullptr;
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
@@ -977,7 +977,7 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
break;
}
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
// CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst.
bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad;
// The token consumed by a CatchReturnInst must match the funclet token.
@@ -1074,7 +1074,7 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
AllocaInst *SpillSlot = nullptr;
Instruction *EHPad = PHIBlock->getFirstNonPHI();
- if (!isa<TerminatorInst>(EHPad)) {
+ if (!EHPad->isTerminator()) {
// If the EHPad isn't a terminator, then we can insert a load in this block
// that will dominate all uses.
SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
@@ -1148,8 +1148,7 @@ void WinEHPrepare::insertPHIStore(
BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) {
- if (PredBlock->isEHPad() &&
- isa<TerminatorInst>(PredBlock->getFirstNonPHI())) {
+ if (PredBlock->isEHPad() && PredBlock->getFirstNonPHI()->isTerminator()) {
// Pred is unsplittable, so we need to queue it on the worklist.
Worklist.push_back({PredBlock, PredVal});
return;