summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp6
-rw-r--r--lib/CodeGen/Analysis.cpp24
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp195
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp68
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp59
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h13
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp107
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp12
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp28
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp212
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h9
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp123
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h17
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp248
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h126
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp177
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h40
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp21
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp4
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp4
-rw-r--r--lib/CodeGen/BranchCoalescing.cpp758
-rw-r--r--lib/CodeGen/BranchFolding.cpp163
-rw-r--r--lib/CodeGen/BranchFolding.h43
-rw-r--r--lib/CodeGen/BranchRelaxation.cpp4
-rw-r--r--lib/CodeGen/BuiltinGCs.cpp9
-rw-r--r--lib/CodeGen/CMakeLists.txt8
-rw-r--r--lib/CodeGen/CallingConvLower.cpp3
-rw-r--r--lib/CodeGen/CodeGen.cpp15
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp928
-rw-r--r--lib/CodeGen/CountingFunctionInserter.cpp2
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp7
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp2
-rw-r--r--lib/CodeGen/DetectDeadLanes.cpp2
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp472
-rw-r--r--lib/CodeGen/FEntryInserter.cpp55
-rw-r--r--lib/CodeGen/FaultMaps.cpp15
-rw-r--r--lib/CodeGen/GCStrategy.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/CMakeLists.txt1
-rw-r--r--lib/CodeGen/GlobalISel/CallLowering.cpp37
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp755
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelect.cpp98
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp41
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp84
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp399
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp31
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp266
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp27
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBank.cpp9
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp95
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp50
-rw-r--r--lib/CodeGen/IfConversion.cpp111
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp206
-rw-r--r--lib/CodeGen/InlineSpiller.cpp5
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp6
-rw-r--r--lib/CodeGen/LLVMBuild.txt2
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp15
-rw-r--r--lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp97
-rw-r--r--lib/CodeGen/LexicalScopes.cpp41
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp168
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp4
-rw-r--r--lib/CodeGen/LiveInterval.cpp33
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp162
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp41
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp12
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp41
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp13
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp27
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp126
-rw-r--r--lib/CodeGen/LiveVariables.cpp4
-rw-r--r--lib/CodeGen/LowLevelType.cpp55
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp151
-rw-r--r--lib/CodeGen/MIRParser/MIParser.h9
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp52
-rw-r--r--lib/CodeGen/MIRPrinter.cpp53
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp57
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp85
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp1047
-rw-r--r--lib/CodeGen/MachineCombiner.cpp48
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp12
-rw-r--r--lib/CodeGen/MachineDominators.cpp28
-rw-r--r--lib/CodeGen/MachineFunction.cpp7
-rw-r--r--lib/CodeGen/MachineInstr.cpp104
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp16
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp4
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp1
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp100
-rw-r--r--lib/CodeGen/MachineOutliner.cpp1251
-rw-r--r--lib/CodeGen/MachinePipeliner.cpp25
-rw-r--r--lib/CodeGen/MachineRegionInfo.cpp22
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp76
-rw-r--r--lib/CodeGen/MachineScheduler.cpp256
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp66
-rw-r--r--lib/CodeGen/MachineVerifier.cpp33
-rw-r--r--lib/CodeGen/PatchableFunction.cpp2
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp2
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp19
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp5
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp2
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp92
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp60
-rw-r--r--lib/CodeGen/RegUsageInfoCollector.cpp24
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp50
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp289
-rw-r--r--lib/CodeGen/RegisterPressure.cpp57
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp81
-rw-r--r--lib/CodeGen/ResetMachineFunctionPass.cpp15
-rw-r--r--lib/CodeGen/SafeStack.cpp2
-rw-r--r--lib/CodeGen/SafeStackColoring.cpp2
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp442
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp209
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp2429
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp101
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp120
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp41
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp24
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h15
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp29
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp25
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp308
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp42
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp561
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp996
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h87
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp613
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp172
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp30
-rw-r--r--lib/CodeGen/SlotIndexes.cpp42
-rw-r--r--lib/CodeGen/SplitKit.cpp136
-rw-r--r--lib/CodeGen/SplitKit.h11
-rw-r--r--lib/CodeGen/StackColoring.cpp28
-rw-r--r--lib/CodeGen/StackMaps.cpp24
-rw-r--r--lib/CodeGen/StackProtector.cpp58
-rw-r--r--lib/CodeGen/TailDuplicator.cpp3
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp2
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp10
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp32
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp229
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp8
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp28
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp16
-rw-r--r--lib/CodeGen/TargetSchedule.cpp110
-rw-r--r--lib/CodeGen/TargetSubtargetInfo.cpp46
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp4
-rw-r--r--lib/CodeGen/VirtRegMap.cpp49
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp6
-rw-r--r--lib/CodeGen/XRayInstrumentation.cpp7
154 files changed, 13182 insertions, 5237 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index bb908618b6794..955524c2a676e 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -163,9 +163,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo &MFI = MF.getFrameInfo();
BitVector Pristine = MFI.getPristineRegs(MF);
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
unsigned Reg = *I;
- if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg)))
+ continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
State->UnionGroups(AliasReg, 0);
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 79ecc4308fe7f..09a37a77e9fbc 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -516,10 +516,9 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
ADS = true;
- AttrBuilder CallerAttrs(F->getAttributes(),
- AttributeSet::ReturnIndex);
+ AttrBuilder CallerAttrs(F->getAttributes(), AttributeList::ReturnIndex);
AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
- AttributeSet::ReturnIndex);
+ AttributeList::ReturnIndex);
// Noalias is completely benign as far as calling convention goes, it
// shouldn't affect whether the call is a tail call.
@@ -613,25 +612,6 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
return true;
}
-bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
- if (!GV->hasLinkOnceODRLinkage())
- return false;
-
- // We assume that anyone who sets global unnamed_addr on a non-constant knows
- // what they're doing.
- if (GV->hasGlobalUnnamedAddr())
- return true;
-
- // If it is a non constant variable, it needs to be uniqued across shared
- // objects.
- if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
- if (!Var->isConstant())
- return false;
- }
-
- return GV->hasAtLeastLocalUnnamedAddr();
-}
-
static void collectFuncletMembers(
DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
const MachineBasicBlock *MBB) {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 24fdbfc901fdf..6c18d56b82723 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -11,48 +11,102 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/AsmPrinter.h"
+#include "AsmPrinterHandler.h"
#include "CodeViewDebug.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
#include "WinException.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ObjectUtils.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Timer.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -69,6 +123,10 @@ static const char *const CodeViewLineTablesGroupDescription =
STATISTIC(EmittedInsts, "Number of machine instrs printed");
+static cl::opt<bool>
+ PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
+ cl::desc("Print 'sched: [latency:throughput]' in .s output"));
+
char AsmPrinter::ID = 0;
typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type;
@@ -78,7 +136,6 @@ static gcp_map_type &getGCMap(void *&P) {
return *(gcp_map_type*)P;
}
-
/// getGVAlignmentLog2 - Return the alignment to use for the specified global
/// value in log2 form. This rounds up to the preferred alignment if possible
/// and legal.
@@ -107,16 +164,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
- OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
- isCFIMoveForDebugging(false), LastMI(nullptr), LastFn(0), Counter(~0U) {
- DD = nullptr;
- MMI = nullptr;
- LI = nullptr;
- MF = nullptr;
- CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr;
- CurrentFnBegin = nullptr;
- CurrentFnEnd = nullptr;
- GCMetadataPrinters = nullptr;
+ OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)) {
VerboseAsm = OutStreamer->isVerboseAsm();
}
@@ -171,6 +219,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
if (isVerbose())
AU.addRequired<MachineLoopInfo>();
@@ -223,7 +272,7 @@ bool AsmPrinter::doInitialization(Module &M) {
// don't, this at least helps the user find where a global came from.
if (MAI->hasSingleParameterDotFile()) {
// .file "foo.c"
- OutStreamer->EmitFileDirective(M.getModuleIdentifier());
+ OutStreamer->EmitFileDirective(M.getSourceFileName());
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -571,7 +620,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
///
/// \p Value - The value to emit.
/// \p Size - The size of the integer (in bytes) to emit.
-void AsmPrinter::EmitDebugValue(const MCExpr *Value,
+void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
unsigned Size) const {
OutStreamer->EmitValue(Value, Size);
}
@@ -602,8 +651,23 @@ void AsmPrinter::EmitFunctionHeader() {
}
// Emit the prefix data.
- if (F->hasPrefixData())
- EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+ if (F->hasPrefixData()) {
+ if (MAI->hasSubsectionsViaSymbols()) {
+ // Preserving prefix data on platforms which use subsections-via-symbols
+ // is a bit tricky. Here we introduce a symbol for the prefix data
+ // and use the .alt_entry attribute to mark the function's real entry point
+ // as an alternative entry point to the prefix-data symbol.
+ MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol();
+ OutStreamer->EmitLabel(PrefixSym);
+
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+
+ // Emit an .alt_entry directive for the actual function symbol.
+ OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
+ } else {
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+ }
+ }
// Emit the CurrentFnSym. This is a virtual function to allow targets to
// do their wild and crazy things as required.
@@ -660,7 +724,8 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
/// emitComments - Pretty-print comments for instructions.
-static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
+ AsmPrinter *AP) {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -668,6 +733,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
int FI;
const MachineFrameInfo &MFI = MF->getFrameInfo();
+ bool Commented = false;
// We assume a single instruction only has a spill or reload, not
// both.
@@ -675,24 +741,39 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Reload\n";
+ CommentOS << MMO->getSize() << "-byte Reload";
+ Commented = true;
}
} else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI))
- CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ if (MFI.isSpillSlotObjectIndex(FI)) {
+ CommentOS << MMO->getSize() << "-byte Folded Reload";
+ Commented = true;
+ }
} else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Spill\n";
+ CommentOS << MMO->getSize() << "-byte Spill";
+ Commented = true;
}
} else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI))
- CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ if (MFI.isSpillSlotObjectIndex(FI)) {
+ CommentOS << MMO->getSize() << "-byte Folded Spill";
+ Commented = true;
+ }
}
// Check for spill-induced copies
- if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
- CommentOS << " Reload Reuse\n";
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
+ Commented = true;
+ CommentOS << " Reload Reuse";
+ }
+
+ if (Commented && AP->EnablePrintSchedInfo)
+ // If any comment was added above and we need sched info comment then
+ // add this new comment just after the above comment w/o "\n" between them.
+ CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
+ else if (Commented)
+ CommentOS << "\n";
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -883,6 +964,7 @@ void AsmPrinter::EmitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
+ int NumInstsInFunction = 0;
for (auto &MBB : *MF) {
// Print a label for the basic block.
EmitBasicBlockStart(MBB);
@@ -892,7 +974,7 @@ void AsmPrinter::EmitFunctionBody() {
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugValue()) {
HasAnyRealCode = true;
- ++EmittedInsts;
+ ++NumInstsInFunction;
}
if (ShouldPrintDebugScopes) {
@@ -905,7 +987,7 @@ void AsmPrinter::EmitFunctionBody() {
}
if (isVerbose())
- emitComments(MI, OutStreamer->GetCommentOS());
+ emitComments(MI, OutStreamer->GetCommentOS(), this);
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
@@ -953,6 +1035,14 @@ void AsmPrinter::EmitFunctionBody() {
EmitBasicBlockEnd(MBB);
}
+ EmittedInsts += NumInstsInFunction;
+ MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionCount",
+ MF->getFunction()->getSubprogram(),
+ &MF->front());
+ R << ore::NV("NumInstructions", NumInstsInFunction)
+ << " instructions in function";
+ ORE->emit(R);
+
// If the function is empty and the object file uses .subsections_via_symbols,
// then we need to emit *something* to the function body to prevent the
// labels from collapsing together. Just emit a noop.
@@ -1238,7 +1328,7 @@ bool AsmPrinter::doFinalization(Module &M) {
break;
AliasStack.push_back(Cur);
}
- for (const GlobalAlias *AncestorAlias : reverse(AliasStack))
+ for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack))
emitGlobalIndirectSymbol(M, *AncestorAlias);
AliasStack.clear();
}
@@ -1311,19 +1401,28 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnSymForSize = CurrentFnBegin;
}
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
if (isVerbose())
LI = &getAnalysis<MachineLoopInfo>();
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
+ ? PrintSchedule
+ : STI.supportPrintSchedInfo();
}
namespace {
+
// Keep track the alignment, constpool entries per Section.
struct SectionCPs {
MCSection *S;
unsigned Alignment;
SmallVector<unsigned, 4> CPEs;
+
SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {}
};
-}
+
+} // end anonymous namespace
/// EmitConstantPool - Print to the current output stream assembly
/// representations of the constants in the constant pool MCP. This is
@@ -1547,7 +1646,6 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
OutStreamer->EmitValue(Value, EntrySize);
}
-
/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
/// special global used by LLVM. If so, emit it and return true, otherwise
/// do nothing and return false.
@@ -1598,13 +1696,16 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
}
namespace {
+
struct Structor {
- Structor() : Priority(0), Func(nullptr), ComdatKey(nullptr) {}
- int Priority;
- llvm::Constant *Func;
- llvm::GlobalValue *ComdatKey;
+ int Priority = 0;
+ Constant *Func = nullptr;
+ GlobalValue *ComdatKey = nullptr;
+
+ Structor() = default;
};
-} // end namespace
+
+} // end anonymous namespace
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
@@ -1653,8 +1754,11 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
const TargetLoweringObjectFile &Obj = getObjFileLowering();
const MCSymbol *KeySym = nullptr;
if (GlobalValue *GV = S.ComdatKey) {
- if (GV->hasAvailableExternallyLinkage())
- // If the associated variable is available_externally, some other TU
+ if (GV->isDeclarationForLinker())
+ // If the associated variable is not defined in this module
+ // (it might be available_externally, or have been an
+ // available_externally definition that was dropped by the
+ // EliminateAvailableExternally pass), some other TU
// will provide its dynamic initializer.
continue;
@@ -1931,7 +2035,6 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) {
return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
}
-
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
@@ -1972,7 +2075,6 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
static void emitGlobalConstantDataSequential(const DataLayout &DL,
const ConstantDataSequential *CDS,
AsmPrinter &AP) {
-
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, DL);
if (Value != -1) {
@@ -2006,7 +2108,6 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
CDS->getNumElements();
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer->EmitZeros(Padding);
-
}
static void emitGlobalConstantArray(const DataLayout &DL,
@@ -2420,8 +2521,6 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
return OutContext.getOrCreateSymbol(NameStr);
}
-
-
/// PrintParentLoopComment - Print comments about parent loops of this one.
static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
unsigned FunctionNumber) {
@@ -2486,7 +2585,6 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
}
-
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
@@ -2607,8 +2705,6 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
return true;
}
-
-
GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
if (!S.usesMetadata())
return nullptr;
@@ -2639,7 +2735,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
}
/// Pin vtable to this file.
-AsmPrinterHandler::~AsmPrinterHandler() {}
+AsmPrinterHandler::~AsmPrinterHandler() = default;
void AsmPrinterHandler::markFunctionEnd() {}
@@ -2702,8 +2798,11 @@ void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
SledKind Kind) {
auto Fn = MI.getParent()->getParent()->getFunction();
auto Attr = Fn->getFnAttribute("function-instrument");
+ bool LogArgs = Fn->hasFnAttribute("xray-log-args");
bool AlwaysInstrument =
Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
+ if (Kind == SledKind::FUNCTION_ENTER && LogArgs)
+ Kind = SledKind::LOG_ARGS_ENTER;
Sleds.emplace_back(
XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 57864e4e4d4f2..683e622e3d537 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -40,25 +40,24 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-namespace {
- struct SrcMgrDiagInfo {
- const MDNode *LocInfo;
- LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
- void *DiagContext;
- };
-}
-
/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an
/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
/// struct above.
static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
- SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+ AsmPrinter::SrcMgrDiagInfo *DiagInfo =
+ static_cast<AsmPrinter::SrcMgrDiagInfo *>(diagInfo);
assert(DiagInfo && "Diagnostic context not passed down?");
+ // Look up a LocInfo for the buffer this diagnostic is coming from.
+ unsigned BufNum = DiagInfo->SrcMgr.FindBufferContainingLoc(Diag.getLoc());
+ const MDNode *LocInfo = nullptr;
+ if (BufNum > 0 && BufNum <= DiagInfo->LocInfos.size())
+ LocInfo = DiagInfo->LocInfos[BufNum-1];
+
// If the inline asm had metadata associated with it, pull out a location
// cookie corresponding to which line the error occurred on.
unsigned LocCookie = 0;
- if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ if (LocInfo) {
unsigned ErrorLine = Diag.getLineNo()-1;
if (ErrorLine >= LocInfo->getNumOperands())
ErrorLine = 0;
@@ -99,35 +98,39 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
return;
}
- SourceMgr SrcMgr;
- SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+ if (!DiagInfo) {
+ DiagInfo = make_unique<SrcMgrDiagInfo>();
- SrcMgrDiagInfo DiagInfo;
-
- // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
- LLVMContext &LLVMCtx = MMI->getModule()->getContext();
- bool HasDiagHandler = false;
- if (LLVMCtx.getInlineAsmDiagnosticHandler() != nullptr) {
- // If the source manager has an issue, we arrange for srcMgrDiagHandler
- // to be invoked, getting DiagInfo passed into it.
- DiagInfo.LocInfo = LocMDNode;
- DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
- DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
- SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo);
- HasDiagHandler = true;
+ MCContext &Context = MMI->getContext();
+ Context.setInlineSourceManager(&DiagInfo->SrcMgr);
+
+ LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+ if (LLVMCtx.getInlineAsmDiagnosticHandler()) {
+ DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get());
+ }
}
+ SourceMgr &SrcMgr = DiagInfo->SrcMgr;
+ SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+
std::unique_ptr<MemoryBuffer> Buffer;
- if (isNullTerminated)
- Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
- else
- Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+ // The inline asm source manager will outlive Str, so make a copy of the
+ // string for SourceMgr to own.
+ Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
- SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+ unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+
+ // Store LocMDNode in DiagInfo, using BufNum as an identifier.
+ if (LocMDNode) {
+ DiagInfo->LocInfos.resize(BufNum);
+ DiagInfo->LocInfos[BufNum-1] = LocMDNode;
+ }
std::unique_ptr<MCAsmParser> Parser(
- createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI));
+ createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
// We create a new MCInstrInfo here since we might be at the module level
// and not have a MachineFunction to initialize the TargetInstrInfo from and
@@ -151,7 +154,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
int Res = Parser->Run(/*NoInitialTextSection*/ true,
/*NoFinalize*/ true);
emitInlineAsmEnd(STI, &TAP->getSTI());
- if (Res && !HasDiagHandler)
+
+ if (Res && !DiagInfo->DiagHandler)
report_fatal_error("Error parsing inline asm\n");
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 83440513225c1..383b8cddb1a06 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -23,13 +23,13 @@
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
#include "llvm/IR/Constants.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -38,7 +38,6 @@
using namespace llvm;
using namespace llvm::codeview;
-using namespace llvm::msf;
CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
: DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(),
@@ -495,9 +494,9 @@ void CodeViewDebug::emitTypeInformation() {
// comments. The MSVC linker doesn't do much type record validation,
// so the first link of an invalid type record can succeed while
// subsequent links will fail with LNK1285.
- ByteStream Stream(Record);
+ BinaryByteStream Stream(Record, llvm::support::little);
CVTypeArray Types;
- StreamReader Reader(Stream);
+ BinaryStreamReader Reader(Stream);
Error E = Reader.readArray(Types, Reader.getLength());
if (!E) {
TypeVisitorCallbacks C;
@@ -948,10 +947,10 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
// Handle fragments.
auto Fragment = DIExpr->getFragmentInfo();
- if (DIExpr && Fragment) {
+ if (Fragment) {
IsSubfield = true;
StructOffset = Fragment->OffsetInBits / 8;
- } else if (DIExpr && DIExpr->getNumElements() > 0) {
+ } else if (DIExpr->getNumElements() > 0) {
continue; // Ignore unrecognized exprs.
}
@@ -1014,14 +1013,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
}
}
-void CodeViewDebug::beginFunction(const MachineFunction *MF) {
- assert(!CurFn && "Can't process two functions at once!");
-
- if (!Asm || !MMI->hasDebugInfo() || !MF->getFunction()->getSubprogram())
- return;
-
- DebugHandlerBase::beginFunction(MF);
-
+void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
const Function *GV = MF->getFunction();
assert(FnDebugInfo.count(GV) == false);
CurFn = &FnDebugInfo[GV];
@@ -1150,27 +1142,6 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
-
- // We want to assert that the element type multiplied by the array lengths
- // match the size of the overall array. However, if we don't have complete
- // type information for the base type, we can't make this assertion. This
- // happens if limited debug info is enabled in this case:
- // struct VTableOptzn { VTableOptzn(); virtual ~VTableOptzn(); };
- // VTableOptzn array[3];
- // The DICompositeType of VTableOptzn will have size zero, and the array will
- // have size 3 * sizeof(void*), and we should avoid asserting.
- //
- // There is a related bug in the front-end where an array of a structure,
- // which was declared as incomplete structure first, ends up not getting a
- // size assigned to it. (PR28303)
- // Example:
- // struct A(*p)[3];
- // struct A { int f; } a[3];
- bool PartiallyIncomplete = false;
- if (Ty->getSizeInBits() == 0 || ElementSize == 0) {
- PartiallyIncomplete = true;
- }
-
// Add subranges to array type.
DINodeArray Elements = Ty->getElements();
for (int i = Elements.size() - 1; i >= 0; --i) {
@@ -1185,16 +1156,14 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
// Variable Length Array (VLA) has Count equal to '-1'.
// Replace with Count '1', assume it is the minimum VLA length.
// FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
- if (Count == -1) {
+ if (Count == -1)
Count = 1;
- PartiallyIncomplete = true;
- }
// Update the element size and element type index for subsequent subranges.
ElementSize *= Count;
// If this is the outermost array, use the size from the array. It will be
- // more accurate if PartiallyIncomplete is true.
+ // more accurate if we had a VLA or an incomplete element type size.
uint64_t ArraySize =
(i == 0 && ElementSize == 0) ? Ty->getSizeInBits() / 8 : ElementSize;
@@ -1203,9 +1172,6 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
ElementTypeIndex = TypeTable.writeKnownType(AR);
}
- (void)PartiallyIncomplete;
- assert(PartiallyIncomplete || ElementSize == (Ty->getSizeInBits() / 8));
-
return ElementTypeIndex;
}
@@ -2115,18 +2081,13 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
}
}
-void CodeViewDebug::endFunction(const MachineFunction *MF) {
- if (!Asm || !CurFn) // We haven't created any debug info for this function.
- return;
-
+void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
const Function *GV = MF->getFunction();
assert(FnDebugInfo.count(GV));
assert(CurFn == &FnDebugInfo[GV]);
collectVariableInfo(GV->getSubprogram());
- DebugHandlerBase::endFunction(MF);
-
// Don't emit anything if we don't have any line tables.
if (!CurFn->HaveLineInfo) {
FnDebugInfo.erase(GV);
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 3dd4315e4c2f1..343384c517728 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -299,6 +299,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
unsigned getPointerSizeInBytes();
+protected:
+ /// \brief Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// \brief Gather post-function debug information.
+ void endFunctionImpl(const MachineFunction *) override;
+
public:
CodeViewDebug(AsmPrinter *Asm);
@@ -307,12 +314,6 @@ public:
/// \brief Emit the COFF section that holds the line table information.
void endModule() override;
- /// \brief Gather pre-function debug information.
- void beginFunction(const MachineFunction *MF) override;
-
- /// \brief Gather post-function debug information.
- void endFunction(const MachineFunction *) override;
-
/// \brief Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
};
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 879918995472c..b510e0ef36ac6 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -42,6 +42,8 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
// overloads. Otherwise MSVC 2010 thinks this call is ambiguous.
ID.AddInteger(unsigned(Attribute));
ID.AddInteger(unsigned(Form));
+ if (Form == dwarf::DW_FORM_implicit_const)
+ ID.AddInteger(Value);
}
//===----------------------------------------------------------------------===//
@@ -107,13 +109,20 @@ void DIEAbbrev::print(raw_ostream &O) {
O << " "
<< dwarf::AttributeString(Data[i].getAttribute())
<< " "
- << dwarf::FormEncodingString(Data[i].getForm())
- << '\n';
+ << dwarf::FormEncodingString(Data[i].getForm());
+
+ if (Data[i].getForm() == dwarf::DW_FORM_implicit_const)
+ O << " " << Data[i].getValue();
+
+ O << '\n';
}
}
-LLVM_DUMP_METHOD
-void DIEAbbrev::dump() { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIEAbbrev::dump() {
+ print(dbgs());
+}
+#endif
//===----------------------------------------------------------------------===//
// DIEAbbrevSet Implementation
@@ -249,10 +258,11 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
O << "\n";
}
-LLVM_DUMP_METHOD
-void DIE::dump() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIE::dump() {
print(dbgs());
}
+#endif
unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
DIEAbbrevSet &AbbrevSet,
@@ -340,10 +350,11 @@ void DIEValue::print(raw_ostream &O) const {
}
}
-LLVM_DUMP_METHOD
-void DIEValue::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIEValue::dump() const {
print(dbgs());
}
+#endif
//===----------------------------------------------------------------------===//
// DIEInteger Implementation
@@ -354,57 +365,42 @@ void DIEValue::dump() const {
void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_implicit_const:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_flag_present:
// Emit something to keep the lines and comments in sync.
// FIXME: Is there a better way to do this?
Asm->OutStreamer->AddBlankLine();
return;
case dwarf::DW_FORM_flag:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref1:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data1:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_addrx1:
case dwarf::DW_FORM_ref2:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data2:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_addrx2:
case dwarf::DW_FORM_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref4:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data4:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sup4:
+ case dwarf::DW_FORM_strx4:
+ case dwarf::DW_FORM_addrx4:
case dwarf::DW_FORM_ref8:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_sig8:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data8:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sup8:
case dwarf::DW_FORM_GNU_ref_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_strp_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_line_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_sec_offset:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_strp_sup:
- LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref_sup:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_addr:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_addr:
Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form));
return;
case dwarf::DW_FORM_GNU_str_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_addr_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_udata:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_udata:
Asm->EmitULEB128(Integer);
return;
@@ -419,35 +415,41 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
///
unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
- case dwarf::DW_FORM_implicit_const: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_flag_present: return 0;
- case dwarf::DW_FORM_flag: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref1: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data1: return sizeof(int8_t);
- case dwarf::DW_FORM_ref2: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data2: return sizeof(int16_t);
- case dwarf::DW_FORM_ref4: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data4: return sizeof(int32_t);
- case dwarf::DW_FORM_ref8: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref_sig8: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_implicit_const:
+ case dwarf::DW_FORM_flag_present:
+ return 0;
+ case dwarf::DW_FORM_flag:
+ case dwarf::DW_FORM_ref1:
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_addrx1:
+ return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2:
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_addrx2:
+ return sizeof(int16_t);
+ case dwarf::DW_FORM_ref4:
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_ref_sup4:
+ case dwarf::DW_FORM_strx4:
+ case dwarf::DW_FORM_addrx4:
+ return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8:
+ case dwarf::DW_FORM_ref_sig8:
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_ref_sup8:
+ return sizeof(int64_t);
case dwarf::DW_FORM_ref_addr:
if (AP->getDwarfVersion() == 2)
return AP->getPointerSize();
LLVM_FALLTHROUGH;
case dwarf::DW_FORM_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_ref_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_strp_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_line_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_sec_offset:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_strp_sup:
- LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref_sup:
switch (AP->OutStreamer->getContext().getDwarfFormat()) {
case dwarf::DWARF32:
return 4;
@@ -456,11 +458,8 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
}
llvm_unreachable("Invalid DWARF format");
case dwarf::DW_FORM_GNU_str_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_addr_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_udata:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_udata:
return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata:
@@ -484,7 +483,7 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->EmitDebugValue(Expr, SizeOf(AP, Form));
+ AP->EmitDebugThreadLocal(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index d8ecc7ccfb9bf..8e3b88d0af0e5 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -490,9 +490,9 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) {
Hash.final(Result);
// ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
- return support::endian::read64le(Result + 8);
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
+ return Result.high();
}
/// This is based on the type signature computation given in section 7.27 of the
@@ -514,7 +514,7 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
Hash.final(Result);
// ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
- return support::endian::read64le(Result + 8);
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
+ return Result.high();
}
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 94190981e88ec..1d63e33a4d33a 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -115,12 +115,35 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
return getBaseTypeSize(BaseType);
}
+bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo())
+ return false;
+ auto *SP = MF->getFunction()->getSubprogram();
+ if (!SP)
+ return false;
+ assert(SP->getUnit());
+ auto EK = SP->getUnit()->getEmissionKind();
+ if (EK == DICompileUnit::NoDebug)
+ return false;
+ return true;
+}
+
void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
+ assert(Asm);
+ PrevInstBB = nullptr;
+
+ if (!hasDebugInfo(MMI, MF)) {
+ skippedNonDebugFunction();
+ return;
+ }
+
// Grab the lexical scopes for the function, if we don't have any of those
// then we're not going to be able to do anything.
LScopes.initialize(*MF);
- if (LScopes.empty())
+ if (LScopes.empty()) {
+ beginFunctionImpl(MF);
return;
+ }
// Make sure that each lexical scope will have a begin/end label.
identifyScopeMarkers();
@@ -167,6 +190,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
PrevInstLoc = DebugLoc();
PrevLabel = Asm->getFunctionBegin();
+ beginFunctionImpl(MF);
}
void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
@@ -228,6 +252,8 @@ void DebugHandlerBase::endInstruction() {
}
void DebugHandlerBase::endFunction(const MachineFunction *MF) {
+ if (hasDebugInfo(MMI, MF))
+ endFunctionImpl(MF);
DbgValues.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
index c00fa189d94af..659a921e1fc56 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -80,6 +80,10 @@ protected:
LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
}
+ virtual void beginFunctionImpl(const MachineFunction *MF) = 0;
+ virtual void endFunctionImpl(const MachineFunction *MF) = 0;
+ virtual void skippedNonDebugFunction() {}
+
// AsmPrinterHandler overrides.
public:
void beginInstruction(const MachineInstr *MI) override;
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 36fb1507ddc65..a68e8cc6b4b31 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -76,7 +76,8 @@ public:
const DIExpression *getExpression() const { return Expression; }
friend bool operator==(const Value &, const Value &);
friend bool operator<(const Value &, const Value &);
- void dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
if (isLocation()) {
llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
if (Loc.isIndirect())
@@ -90,6 +91,7 @@ public:
if (Expression)
Expression->dump();
}
+#endif
};
private:
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d904372af589b..a550ff2fb90f3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,3 +1,16 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for constructing a dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -129,67 +142,72 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
bool addToAccelTable = false;
DIELoc *Loc = nullptr;
std::unique_ptr<DIEDwarfExpression> DwarfExpr;
- bool AllConstant = std::all_of(
- GlobalExprs.begin(), GlobalExprs.end(),
- [&](const GlobalExpr GE) {
- return GE.Expr && GE.Expr->isConstant();
- });
-
for (const auto &GE : GlobalExprs) {
const GlobalVariable *Global = GE.Var;
const DIExpression *Expr = GE.Expr;
+
// For compatibility with DWARF 3 and earlier,
// DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes
// DW_AT_const_value(X).
if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
+ addToAccelTable = true;
addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1));
- // We cannot describe the location of dllimport'd variables: the
- // computation of their address requires loads from the IAT.
- } else if ((Global && !Global->hasDLLImportStorageClass()) || AllConstant) {
- if (!Loc) {
- Loc = new (DIEValueAllocator) DIELoc;
- DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
- }
+ break;
+ }
+
+ // We cannot describe the location of dllimport'd variables: the
+ // computation of their address requires loads from the IAT.
+ if (Global && Global->hasDLLImportStorageClass())
+ continue;
+
+ // Nothing to describe without address or constant.
+ if (!Global && (!Expr || !Expr->isConstant()))
+ continue;
+
+ if (!Loc) {
addToAccelTable = true;
- if (Global) {
- const MCSymbol *Sym = Asm->getSymbol(Global);
- if (Global->isThreadLocal()) {
- if (Asm->TM.Options.EmulatedTLS) {
- // TODO: add debug info for emulated thread local mode.
- } else {
- // FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
- // Based on GCC's support for TLS:
- if (!DD->useSplitDwarf()) {
- // 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1,
- PointerSize == 4 ? dwarf::DW_OP_const4u
- : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) offset of the TLS variable
- // within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
- Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
- } else {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(*Loc, dwarf::DW_FORM_udata,
- DD->getAddressPool().getIndex(Sym, /* TLS */ true));
- }
- // 3) followed by an OP to make the debugger do a TLS lookup.
+ Loc = new (DIEValueAllocator) DIELoc;
+ DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
+ }
+
+ if (Global) {
+ const MCSymbol *Sym = Asm->getSymbol(Global);
+ if (Global->isThreadLocal()) {
+ if (Asm->TM.Options.EmulatedTLS) {
+ // TODO: add debug info for emulated thread local mode.
+ } else {
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
addUInt(*Loc, dwarf::DW_FORM_data1,
- DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
- : dwarf::DW_OP_form_tls_address);
+ PointerSize == 4 ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
}
- } else {
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
}
+ } else {
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
}
- if (Expr) {
- DwarfExpr->addFragmentOffset(Expr);
- DwarfExpr->AddExpression(Expr);
- }
+ }
+ if (Expr) {
+ DwarfExpr->addFragmentOffset(Expr);
+ DwarfExpr->addExpression(Expr);
}
}
if (Loc)
@@ -507,8 +525,8 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
// If there is an expression, emit raw unsigned bytes.
DwarfExpr.addFragmentOffset(Expr);
- DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm());
- DwarfExpr.AddExpression(Expr);
+ DwarfExpr.addUnsignedConstant(DVInsn->getOperand(0).getImm());
+ DwarfExpr.addExpression(Expr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
} else
addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
@@ -532,9 +550,15 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
DwarfExpr.addFragmentOffset(Fragment.Expr);
- DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- FrameReg, Offset);
- DwarfExpr.AddExpression(Fragment.Expr);
+ SmallVector<uint64_t, 8> Ops;
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Offset);
+ Ops.push_back(dwarf::DW_OP_deref);
+ Ops.append(Fragment.Expr->elements_begin(), Fragment.Expr->elements_end());
+ DIExpressionCursor Expr(Ops);
+ DwarfExpr.addMachineRegExpression(
+ *Asm->MF->getSubtarget().getRegisterInfo(), Expr, FrameReg);
+ DwarfExpr.addExpression(std::move(Expr));
}
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
@@ -690,11 +714,14 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
Asm->OutStreamer->EmitLabel(LabelBegin);
}
- DwarfUnit::emitHeader(UseOffsets);
+ dwarf::UnitType UT = Skeleton ? dwarf::DW_UT_split_compile
+ : DD->useSplitDwarf() ? dwarf::DW_UT_skeleton
+ : dwarf::DW_UT_compile;
+ DwarfUnit::emitCommonHeader(UseOffsets, UT);
}
/// addGlobalName - Add a new global name to the compile unit.
-void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
+void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) {
if (includeMinimalInlineScopes())
return;
@@ -702,6 +729,18 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
GlobalNames[FullName] = &Die;
}
+void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
+ const DIScope *Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Name.str();
+ // Insert, allowing the entry to remain as-is if it's already present
+ // This way the CU-level type DIE is preferred over the "can't describe this
+ // type as a unit offset because it's not really in the CU at all, it's only
+ // in a type unit"
+ GlobalNames.insert(std::make_pair(std::move(FullName), &getUnitDie()));
+}
+
/// Add a new global type to the unit.
void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {
@@ -711,6 +750,18 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
GlobalTypes[FullName] = &Die;
}
+void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
+ const DIScope *Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Ty->getName().str();
+ // Insert, allowing the entry to remain as-is if it's already present
+ // This way the CU-level type DIE is preferred over the "can't describe this
+ // type as a unit offset because it's not really in the CU at all, it's only
+ // in a type unit"
+ GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie()));
+}
+
/// addVariableAddress - Add DW_AT_location attribute for a
/// DbgVariable based on provided MachineLocation.
void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
@@ -727,22 +778,22 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- DIEDwarfExpression Expr(*Asm, *this, *Loc);
-
- bool validReg;
- if (Location.isReg())
- validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg());
- else
- validReg =
- Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg(), Location.getOffset());
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- if (!validReg)
+ SmallVector<uint64_t, 8> Ops;
+ if (Location.isIndirect()) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Location.getOffset());
+ Ops.push_back(dwarf::DW_OP_deref);
+ }
+ DIExpressionCursor Cursor(Ops);
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
+ DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Expr.finalize());
+ addBlock(Die, Attribute, DwarfExpr.finalize());
}
/// Start with the address based on the location provided, and generate the
@@ -754,23 +805,24 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- const DIExpression *Expr = DV.getSingleExpression();
- DIExpressionCursor ExprCursor(Expr);
+ const DIExpression *DIExpr = DV.getSingleExpression();
+ DwarfExpr.addFragmentOffset(DIExpr);
+
+ SmallVector<uint64_t, 8> Ops;
+ if (Location.isIndirect()) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Location.getOffset());
+ Ops.push_back(dwarf::DW_OP_deref);
+ }
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ DIExpressionCursor Cursor(Ops);
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
- auto Reg = Location.getReg();
- DwarfExpr.addFragmentOffset(Expr);
- bool ValidReg =
- Location.getOffset()
- ? DwarfExpr.AddMachineRegIndirect(TRI, Reg, Location.getOffset())
- : DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Reg);
-
- if (!ValidReg)
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
-
- DwarfExpr.AddExpression(std::move(ExprCursor));
+ DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
+ addBlock(Die, Attribute, DwarfExpr.finalize());
}
/// Add a Dwarf loclistptr attribute data and value.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index a8025f1d15219..9a64b4b76b06e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -210,12 +210,19 @@ public:
}
/// Add a new global name to the compile unit.
- void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override;
+ void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) override;
+
+ /// Add a new global name present in a type unit to this compile unit.
+ void addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context);
/// Add a new global type to the compile unit.
void addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) override;
+ /// Add a new global type present in a type unit to this compile unit.
+ void addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context);
+
const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 91a3d0989cc5e..5ce1113092088 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -39,7 +39,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
@@ -127,17 +126,17 @@ static const char *const DWARFGroupDescription = "DWARF Emission";
static const char *const DbgTimerName = "writer";
static const char *const DbgTimerDescription = "DWARF Debug Writer";
-void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
+void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
BS.EmitInt8(
Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
: dwarf::OperationEncodingString(Op));
}
-void DebugLocDwarfExpression::EmitSigned(int64_t Value) {
+void DebugLocDwarfExpression::emitSigned(int64_t Value) {
BS.EmitSLEB128(Value, Twine(Value));
}
-void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) {
+void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
BS.EmitULEB128(Value, Twine(Value));
}
@@ -200,6 +199,12 @@ const DIType *DbgVariable::getType() const {
}
ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
+ if (FrameIndexExprs.size() == 1)
+ return FrameIndexExprs;
+
+ assert(all_of(FrameIndexExprs,
+ [](const FrameIndexExpr &A) { return A.Expr->isFragment(); }) &&
+ "multiple FI expressions without DW_OP_LLVM_fragment");
std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
[](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
return A.Expr->getFragmentInfo()->OffsetInBits <
@@ -418,7 +423,14 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
Asm->OutStreamer->getContext().setMCLineTableCompilationDir(
NewCU.getUniqueID(), CompilationDir);
- NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer());
+ StringRef Producer = DIUnit->getProducer();
+ StringRef Flags = DIUnit->getFlags();
+ if (!Flags.empty()) {
+ std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
+ NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
+ } else
+ NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
+
NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit->getSourceLanguage());
NewCU.addString(Die, dwarf::DW_AT_name, FN);
@@ -544,7 +556,6 @@ void DwarfDebug::beginModule() {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
if (DIType *RT = dyn_cast<DIType>(Ty))
- if (!RT->isExternalTypeRef())
// There is no point in force-emitting a forward declaration.
CU.getOrCreateTypeDIE(RT);
}
@@ -740,6 +751,7 @@ DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
LexicalScope *Scope) {
+ assert(Scope && Scope->isAbstractScope());
auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
AbstractVariables[Var] = std::move(AbsDbgVariable);
@@ -1137,20 +1149,9 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// Gather pre-function debug information. Assumes being called immediately
// after the function entry point has been emitted.
-void DwarfDebug::beginFunction(const MachineFunction *MF) {
+void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn = MF;
- // If there's no debug info for the function we're not going to do anything.
- if (!MMI->hasDebugInfo())
- return;
-
- auto DI = MF->getFunction()->getSubprogram();
- if (!DI)
- return;
-
- // Grab the lexical scopes for the function, if we don't have any of those
- // then we're not going to be able to do anything.
- DebugHandlerBase::beginFunction(MF);
if (LScopes.empty())
return;
@@ -1189,23 +1190,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
}
+void DwarfDebug::skippedNonDebugFunction() {
+ // If we don't have a subprogram for this function then there will be a hole
+ // in the range information. Keep note of this by setting the previously used
+ // section to nullptr.
+ PrevCU = nullptr;
+ CurFn = nullptr;
+}
+
// Gather and emit post-function debug information.
-void DwarfDebug::endFunction(const MachineFunction *MF) {
+void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
+ const DISubprogram *SP = MF->getFunction()->getSubprogram();
+
assert(CurFn == MF &&
"endFunction should be called with the same function as beginFunction");
- const DISubprogram *SP = MF->getFunction()->getSubprogram();
- if (!MMI->hasDebugInfo() || !SP ||
- SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) {
- // If we don't have a subprogram for this function then there will be a hole
- // in the range information. Keep note of this by setting the previously
- // used section to nullptr.
- PrevCU = nullptr;
- CurFn = nullptr;
- DebugHandlerBase::endFunction(MF);
- return;
- }
-
// Set DwarfDwarfCompileUnitID in MCContext to default value.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
@@ -1220,17 +1219,14 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
// Under -gmlt, skip building the subprogram if there are no inlined
- // subroutines inside it.
- if (TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
+ // subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
+ // is still needed as we need its source location.
+ if (!TheCU.getCUNode()->getDebugInfoForProfiling() &&
+ TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
LScopes.getAbstractScopesList().empty() && !IsDarwin) {
assert(InfoHolder.getScopeVariables().empty());
- assert(DbgValues.empty());
- // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed
- // by a -gmlt CU. Add a test and remove this assertion.
- assert(AbstractVariables.empty());
PrevLabel = nullptr;
CurFn = nullptr;
- DebugHandlerBase::endFunction(MF);
return;
}
@@ -1266,7 +1262,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
InfoHolder.getScopeVariables().clear();
PrevLabel = nullptr;
CurFn = nullptr;
- DebugHandlerBase::endFunction(MF);
}
// Register a source line with debug info. Returns the unique label that was
@@ -1361,6 +1356,18 @@ void DwarfDebug::emitAccelTypes() {
/// computeIndexValue - Compute the gdb index value for the DIE and CU.
static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
const DIE *Die) {
+ // Entities that ended up only in a Type Unit reference the CU instead (since
+ // the pub entry has offsets within the CU there's no real offset that can be
+ // provided anyway). As it happens all such entities (namespaces and types,
+ // types only in C++ at that) are rendered as TYPE+EXTERNAL. If this turns out
+ // not to be true it would be necessary to persist this information from the
+ // point at which the entry is added to the index data structure - since by
+ // the time the index is built from that, the original type/namespace DIE in a
+ // type unit has already been destroyed so it can't be queried for properties
+ // like tag, etc.
+ if (Die->getTag() == dwarf::DW_TAG_compile_unit)
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE,
+ dwarf::GIEL_EXTERNAL);
dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;
// We could have a specification DIE that has our most of our knowledge,
@@ -1498,27 +1505,37 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
ByteStreamer &Streamer,
const DebugLocEntry::Value &Value,
DwarfExpression &DwarfExpr) {
- DIExpressionCursor ExprCursor(Value.getExpression());
- DwarfExpr.addFragmentOffset(Value.getExpression());
+ auto *DIExpr = Value.getExpression();
+ DIExpressionCursor ExprCursor(DIExpr);
+ DwarfExpr.addFragmentOffset(DIExpr);
// Regular entry.
if (Value.isInt()) {
if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
BT->getEncoding() == dwarf::DW_ATE_signed_char))
- DwarfExpr.AddSignedConstant(Value.getInt());
+ DwarfExpr.addSignedConstant(Value.getInt());
else
- DwarfExpr.AddUnsignedConstant(Value.getInt());
+ DwarfExpr.addUnsignedConstant(Value.getInt());
} else if (Value.isLocation()) {
- MachineLocation Loc = Value.getLoc();
+ MachineLocation Location = Value.getLoc();
+
+ SmallVector<uint64_t, 8> Ops;
+ // FIXME: Should this condition be Location.isIndirect() instead?
+ if (Location.getOffset()) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Location.getOffset());
+ Ops.push_back(dwarf::DW_OP_deref);
+ }
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ DIExpressionCursor Cursor(Ops);
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
- if (Loc.getOffset())
- DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
- else
- DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Loc.getReg());
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return;
+ return DwarfExpr.addExpression(std::move(Cursor));
} else if (Value.isConstantFP()) {
APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
- DwarfExpr.AddUnsignedConstant(RawBytes);
+ DwarfExpr.addUnsignedConstant(RawBytes);
}
- DwarfExpr.AddExpression(std::move(ExprCursor));
+ DwarfExpr.addExpression(std::move(ExprCursor));
}
void DebugLocEntry::finalize(const AsmPrinter &AP,
@@ -1940,11 +1957,11 @@ uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
MD5 Hash;
Hash.update(Identifier);
// ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
MD5::MD5Result Result;
Hash.final(Result);
- return support::endian::read64le(Result + 8);
+ return Result.high();
}
void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 253e3f06200ed..8a96e7867b6e3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -89,7 +89,7 @@ public:
assert(!MInsn && "Already initialized?");
assert((!E || E->isValid()) && "Expected valid expression");
- assert(~FI && "Expected valid index");
+ assert(FI != INT_MAX && "Expected valid index");
FrameIndexExprs.push_back({FI, E});
}
@@ -448,6 +448,15 @@ class DwarfDebug : public DebugHandlerBase {
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &P);
+protected:
+ /// Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function debug information.
+ void endFunctionImpl(const MachineFunction *MF) override;
+
+ void skippedNonDebugFunction() override;
+
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -463,12 +472,6 @@ public:
/// Emit all Dwarf sections that should come after the content.
void endModule() override;
- /// Gather pre-function debug information.
- void beginFunction(const MachineFunction *MF) override;
-
- /// Gather and emit post-function debug information.
- void endFunction(const MachineFunction *MF) override;
-
/// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 61b2c7e658424..debe88f3b1ee1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -22,79 +22,76 @@
using namespace llvm;
-void DwarfExpression::AddReg(int DwarfReg, const char *Comment) {
+void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
if (DwarfReg < 32) {
- EmitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+ emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
} else {
- EmitOp(dwarf::DW_OP_regx, Comment);
- EmitUnsigned(DwarfReg);
+ emitOp(dwarf::DW_OP_regx, Comment);
+ emitUnsigned(DwarfReg);
}
}
-void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) {
+void DwarfExpression::addBReg(int DwarfReg, int Offset) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
if (DwarfReg < 32) {
- EmitOp(dwarf::DW_OP_breg0 + DwarfReg);
+ emitOp(dwarf::DW_OP_breg0 + DwarfReg);
} else {
- EmitOp(dwarf::DW_OP_bregx);
- EmitUnsigned(DwarfReg);
+ emitOp(dwarf::DW_OP_bregx);
+ emitUnsigned(DwarfReg);
}
- EmitSigned(Offset);
- if (Deref)
- EmitOp(dwarf::DW_OP_deref);
+ emitSigned(Offset);
}
-void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+void DwarfExpression::addFBReg(int Offset) {
+ emitOp(dwarf::DW_OP_fbreg);
+ emitSigned(Offset);
+}
+
+void DwarfExpression::addOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
if (!SizeInBits)
return;
const unsigned SizeOfByte = 8;
if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
- EmitOp(dwarf::DW_OP_bit_piece);
- EmitUnsigned(SizeInBits);
- EmitUnsigned(OffsetInBits);
+ emitOp(dwarf::DW_OP_bit_piece);
+ emitUnsigned(SizeInBits);
+ emitUnsigned(OffsetInBits);
} else {
- EmitOp(dwarf::DW_OP_piece);
+ emitOp(dwarf::DW_OP_piece);
unsigned ByteSize = SizeInBits / SizeOfByte;
- EmitUnsigned(ByteSize);
+ emitUnsigned(ByteSize);
}
this->OffsetInBits += SizeInBits;
}
-void DwarfExpression::AddShr(unsigned ShiftBy) {
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(ShiftBy);
- EmitOp(dwarf::DW_OP_shr);
+void DwarfExpression::addShr(unsigned ShiftBy) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(ShiftBy);
+ emitOp(dwarf::DW_OP_shr);
}
-bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI,
- unsigned MachineReg, int Offset) {
- if (isFrameRegister(TRI, MachineReg)) {
- // If variable offset is based in frame register then use fbreg.
- EmitOp(dwarf::DW_OP_fbreg);
- EmitSigned(Offset);
- return true;
- }
-
- int DwarfReg = TRI.getDwarfRegNum(MachineReg, false);
- if (DwarfReg < 0)
- return false;
-
- AddRegIndirect(DwarfReg, Offset);
- return true;
+void DwarfExpression::addAnd(unsigned Mask) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Mask);
+ emitOp(dwarf::DW_OP_and);
}
-bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
+bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
unsigned MachineReg, unsigned MaxSize) {
- if (!TRI.isPhysicalRegister(MachineReg))
+ if (!TRI.isPhysicalRegister(MachineReg)) {
+ if (isFrameRegister(TRI, MachineReg)) {
+ DwarfRegs.push_back({-1, 0, nullptr});
+ return true;
+ }
return false;
+ }
int Reg = TRI.getDwarfRegNum(MachineReg, false);
// If this is a valid register number, emit it.
if (Reg >= 0) {
- AddReg(Reg);
+ DwarfRegs.push_back({Reg, 0, nullptr});
return true;
}
@@ -106,7 +103,7 @@ bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
- AddReg(Reg, "super-register");
+ DwarfRegs.push_back({Reg, 0, "super-register"});
// Use a DW_OP_bit_piece to describe the sub-register.
setSubRegisterPiece(Size, RegOffset);
return true;
@@ -136,72 +133,101 @@ bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
// If this sub-register has a DWARF number and we haven't covered
// its range, emit a DWARF piece for it.
if (Reg >= 0 && Intersection.any()) {
- AddReg(Reg, "sub-register");
+ // Emit a piece for any gap in the coverage.
+ if (Offset > CurPos)
+ DwarfRegs.push_back({-1, Offset - CurPos, nullptr});
+ DwarfRegs.push_back(
+ {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
if (Offset >= MaxSize)
break;
- // Emit a piece for the any gap in the coverage.
- if (Offset > CurPos)
- AddOpPiece(Offset - CurPos);
- AddOpPiece(std::min<unsigned>(Size, MaxSize - Offset));
- CurPos = Offset + Size;
// Mark it as emitted.
Coverage.set(Offset, Offset + Size);
+ CurPos = Offset + Size;
}
}
return CurPos;
}
-void DwarfExpression::AddStackValue() {
+void DwarfExpression::addStackValue() {
if (DwarfVersion >= 4)
- EmitOp(dwarf::DW_OP_stack_value);
+ emitOp(dwarf::DW_OP_stack_value);
}
-void DwarfExpression::AddSignedConstant(int64_t Value) {
- EmitOp(dwarf::DW_OP_consts);
- EmitSigned(Value);
- AddStackValue();
+void DwarfExpression::addSignedConstant(int64_t Value) {
+ emitOp(dwarf::DW_OP_consts);
+ emitSigned(Value);
+ addStackValue();
}
-void DwarfExpression::AddUnsignedConstant(uint64_t Value) {
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(Value);
- AddStackValue();
+void DwarfExpression::addUnsignedConstant(uint64_t Value) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Value);
+ addStackValue();
}
-void DwarfExpression::AddUnsignedConstant(const APInt &Value) {
+void DwarfExpression::addUnsignedConstant(const APInt &Value) {
unsigned Size = Value.getBitWidth();
const uint64_t *Data = Value.getRawData();
// Chop it up into 64-bit pieces, because that's the maximum that
- // AddUnsignedConstant takes.
+ // addUnsignedConstant takes.
unsigned Offset = 0;
while (Offset < Size) {
- AddUnsignedConstant(*Data++);
+ addUnsignedConstant(*Data++);
if (Offset == 0 && Size <= 64)
break;
- AddOpPiece(std::min(Size-Offset, 64u), Offset);
+ addOpPiece(std::min(Size-Offset, 64u), Offset);
Offset += 64;
}
}
-bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
+bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
DIExpressionCursor &ExprCursor,
unsigned MachineReg,
unsigned FragmentOffsetInBits) {
- if (!ExprCursor)
- return AddMachineReg(TRI, MachineReg);
+ auto Fragment = ExprCursor.getFragmentInfo();
+ if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U))
+ return false;
- // Pattern-match combinations for which more efficient representations exist
- // first.
- bool ValidReg = false;
+ bool HasComplexExpression = false;
auto Op = ExprCursor.peek();
+ if (Op && Op->getOp() != dwarf::DW_OP_LLVM_fragment)
+ HasComplexExpression = true;
+
+ // If the register can only be described by a complex expression (i.e.,
+ // multiple subregisters) it doesn't safely compose with another complex
+ // expression. For example, it is not possible to apply a DW_OP_deref
+ // operation to multiple DW_OP_pieces.
+ if (HasComplexExpression && DwarfRegs.size() > 1) {
+ DwarfRegs.clear();
+ return false;
+ }
+
+ // Handle simple register locations.
+ if (!HasComplexExpression) {
+ for (auto &Reg : DwarfRegs) {
+ if (Reg.DwarfRegNo >= 0)
+ addReg(Reg.DwarfRegNo, Reg.Comment);
+ addOpPiece(Reg.Size);
+ }
+ DwarfRegs.clear();
+ return true;
+ }
+
+ assert(DwarfRegs.size() == 1);
+ auto Reg = DwarfRegs[0];
+ bool FBReg = isFrameRegister(TRI, MachineReg);
+ assert(Reg.Size == 0 && "subregister has same size as superregister");
+
+ // Pattern-match combinations for which more efficient representations exist.
switch (Op->getOp()) {
default: {
- auto Fragment = ExprCursor.getFragmentInfo();
- ValidReg = AddMachineReg(TRI, MachineReg,
- Fragment ? Fragment->SizeInBits : ~1U);
+ if (FBReg)
+ addFBReg(0);
+ else
+ addReg(Reg.DwarfRegNo, 0);
break;
}
case dwarf::DW_OP_plus:
@@ -210,28 +236,42 @@ bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
// [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
auto N = ExprCursor.peekNext();
if (N && N->getOp() == dwarf::DW_OP_deref) {
- unsigned Offset = Op->getArg(0);
- ValidReg = AddMachineRegIndirect(
- TRI, MachineReg, Op->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
+ int Offset = Op->getArg(0);
+ int SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset;
+ if (FBReg)
+ addFBReg(SignedOffset);
+ else
+ addBReg(Reg.DwarfRegNo, SignedOffset);
+
ExprCursor.consume(2);
- } else
- ValidReg = AddMachineReg(TRI, MachineReg);
+ break;
+ }
+ addReg(Reg.DwarfRegNo, 0);
break;
}
case dwarf::DW_OP_deref:
// [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
- ValidReg = AddMachineRegIndirect(TRI, MachineReg);
+ if (FBReg)
+ addFBReg(0);
+ else
+ addBReg(Reg.DwarfRegNo, 0);
ExprCursor.take();
break;
}
-
- return ValidReg;
+ DwarfRegs.clear();
+ return true;
}
-void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
+void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
unsigned FragmentOffsetInBits) {
while (ExprCursor) {
auto Op = ExprCursor.take();
+
+ // If we need to mask out a subregister, do it now, unless the next
+ // operation would emit an OpPiece anyway.
+ if (SubRegisterSizeInBits && Op->getOp() != dwarf::DW_OP_LLVM_fragment)
+ maskSubRegister();
+
switch (Op->getOp()) {
case dwarf::DW_OP_LLVM_fragment: {
unsigned SizeInBits = Op->getArg(1);
@@ -241,39 +281,45 @@ void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
// location.
assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
- // If \a AddMachineReg already emitted DW_OP_piece operations to represent
+ // If \a addMachineReg already emitted DW_OP_piece operations to represent
// a super-register by splicing together sub-registers, subtract the size
// of the pieces that was already emitted.
SizeInBits -= OffsetInBits - FragmentOffset;
- // If \a AddMachineReg requested a DW_OP_bit_piece to stencil out a
+ // If \a addMachineReg requested a DW_OP_bit_piece to stencil out a
// sub-register that is smaller than the current fragment's size, use it.
if (SubRegisterSizeInBits)
SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
- AddOpPiece(SizeInBits, SubRegisterOffsetInBits);
+ addOpPiece(SizeInBits, SubRegisterOffsetInBits);
setSubRegisterPiece(0, 0);
break;
}
case dwarf::DW_OP_plus:
- EmitOp(dwarf::DW_OP_plus_uconst);
- EmitUnsigned(Op->getArg(0));
+ emitOp(dwarf::DW_OP_plus_uconst);
+ emitUnsigned(Op->getArg(0));
break;
case dwarf::DW_OP_minus:
// There is no OP_minus_uconst.
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(Op->getArg(0));
- EmitOp(dwarf::DW_OP_minus);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Op->getArg(0));
+ emitOp(dwarf::DW_OP_minus);
break;
case dwarf::DW_OP_deref:
- EmitOp(dwarf::DW_OP_deref);
+ emitOp(dwarf::DW_OP_deref);
break;
case dwarf::DW_OP_constu:
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(Op->getArg(0));
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Op->getArg(0));
break;
case dwarf::DW_OP_stack_value:
- AddStackValue();
+ addStackValue();
+ break;
+ case dwarf::DW_OP_swap:
+ emitOp(dwarf::DW_OP_swap);
+ break;
+ case dwarf::DW_OP_xderef:
+ emitOp(dwarf::DW_OP_xderef);
break;
default:
llvm_unreachable("unhandled opcode found in expression");
@@ -281,9 +327,25 @@ void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
}
}
+/// add masking operations to stencil out a subregister.
+void DwarfExpression::maskSubRegister() {
+ assert(SubRegisterSizeInBits && "no subregister was registered");
+ if (SubRegisterOffsetInBits > 0)
+ addShr(SubRegisterOffsetInBits);
+ uint64_t Mask = (1ULL << (uint64_t)SubRegisterSizeInBits) - 1ULL;
+ addAnd(Mask);
+}
+
+
void DwarfExpression::finalize() {
- if (SubRegisterSizeInBits)
- AddOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
+ assert(DwarfRegs.size() == 0 && "dwarf registers not emitted");
+ // Emit any outstanding DW_OP_piece operations to mask out subregisters.
+ if (SubRegisterSizeInBits == 0)
+ return;
+ // Don't emit a DW_OP_piece for a subregister at offset 0.
+ if (SubRegisterOffsetInBits == 0)
+ return;
+ addOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
}
void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
@@ -294,6 +356,6 @@ void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
assert(FragmentOffset >= OffsetInBits &&
"overlapping or duplicate fragments");
if (FragmentOffset > OffsetInBits)
- AddOpPiece(FragmentOffset - OffsetInBits);
+ addOpPiece(FragmentOffset - OffsetInBits);
OffsetInBits = FragmentOffset;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index fd90fa05bc32a..e8dc211eb3c22 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -84,9 +84,19 @@ public:
/// entry.
class DwarfExpression {
protected:
- unsigned DwarfVersion;
+ /// Holds information about all subregisters comprising a register location.
+ struct Register {
+ int DwarfRegNo;
+ unsigned Size;
+ const char *Comment;
+ };
+
+ /// The register location, if any.
+ SmallVector<Register, 2> DwarfRegs;
+
/// Current Fragment Offset in Bits.
uint64_t OffsetInBits = 0;
+ unsigned DwarfVersion;
/// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
unsigned SubRegisterSizeInBits = 0;
@@ -99,35 +109,54 @@ protected:
SubRegisterOffsetInBits = OffsetInBits;
}
-public:
- DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
- virtual ~DwarfExpression() {};
-
- /// This needs to be called last to commit any pending changes.
- void finalize();
+ /// Add masking operations to stencil out a subregister.
+ void maskSubRegister();
/// Output a dwarf operand and an optional assembler comment.
- virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
+ virtual void emitOp(uint8_t Op, const char *Comment = nullptr) = 0;
/// Emit a raw signed value.
- virtual void EmitSigned(int64_t Value) = 0;
+ virtual void emitSigned(int64_t Value) = 0;
/// Emit a raw unsigned value.
- virtual void EmitUnsigned(uint64_t Value) = 0;
+ virtual void emitUnsigned(uint64_t Value) = 0;
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
- /// Emit a dwarf register operation.
- void AddReg(int DwarfReg, const char *Comment = nullptr);
- /// Emit an (double-)indirect dwarf register operation.
- void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false);
+ /// Emit a DW_OP_reg operation.
+ void addReg(int DwarfReg, const char *Comment = nullptr);
+ /// Emit a DW_OP_breg operation.
+ void addBReg(int DwarfReg, int Offset);
+ /// Emit DW_OP_fbreg <Offset>.
+ void addFBReg(int Offset);
+
+ /// Emit a partial DWARF register operation.
+ ///
+ /// \param MachineReg The register number.
+ /// \param MaxSize If the register must be composed from
+ /// sub-registers this is an upper bound
+ /// for how many bits the emitted DW_OP_piece
+ /// may cover.
+ ///
+ /// If size and offset is zero an operation for the entire register is
+ /// emitted: Some targets do not provide a DWARF register number for every
+ /// register. If this is the case, this function will attempt to emit a DWARF
+ /// register by emitting a fragment of a super-register or by piecing together
+ /// multiple subregisters that alias the register.
+ ///
+ /// \return false if no DWARF register exists for MachineReg.
+ bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ unsigned MaxSize = ~1U);
+
/// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
/// \param OffsetInBits This is an optional offset into the location that
/// is at the top of the DWARF stack.
- void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
+ void addOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
- /// Emit a shift-right dwarf expression.
- void AddShr(unsigned ShiftBy);
+ /// Emit a shift-right dwarf operation.
+ void addShr(unsigned ShiftBy);
+ /// Emit a bitwise and dwarf operation.
+ void addAnd(unsigned Mask);
/// Emit a DW_OP_stack_value, if supported.
///
@@ -140,37 +169,21 @@ public:
/// constant value, so the producers and consumers started to rely on
/// heuristics to disambiguate the value vs. location status of the
/// expression. See PR21176 for more details.
- void AddStackValue();
+ void addStackValue();
- /// Emit an indirect dwarf register operation for the given machine register.
- /// \return false if no DWARF register exists for MachineReg.
- bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg,
- int Offset = 0);
+ ~DwarfExpression() = default;
+public:
+ DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
- /// Emit a partial DWARF register operation.
- ///
- /// \param MachineReg The register number.
- /// \param MaxSize If the register must be composed from
- /// sub-registers this is an upper bound
- /// for how many bits the emitted DW_OP_piece
- /// may cover.
- ///
- /// If size and offset is zero an operation for the entire register is
- /// emitted: Some targets do not provide a DWARF register number for every
- /// register. If this is the case, this function will attempt to emit a DWARF
- /// register by emitting a fragment of a super-register or by piecing together
- /// multiple subregisters that alias the register.
- ///
- /// \return false if no DWARF register exists for MachineReg.
- bool AddMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
- unsigned MaxSize = ~1U);
+ /// This needs to be called last to commit any pending changes.
+ void finalize();
/// Emit a signed constant.
- void AddSignedConstant(int64_t Value);
+ void addSignedConstant(int64_t Value);
/// Emit an unsigned constant.
- void AddUnsignedConstant(uint64_t Value);
+ void addUnsignedConstant(uint64_t Value);
/// Emit an unsigned constant.
- void AddUnsignedConstant(const APInt &Value);
+ void addUnsignedConstant(const APInt &Value);
/// Emit a machine register location. As an optimization this may also consume
/// the prefix of a DwarfExpression if a more efficient representation for
@@ -181,7 +194,7 @@ public:
/// fragment inside the entire variable.
/// \return false if no DWARF register exists
/// for MachineReg.
- bool AddMachineRegExpression(const TargetRegisterInfo &TRI,
+ bool addMachineRegExpression(const TargetRegisterInfo &TRI,
DIExpressionCursor &Expr, unsigned MachineReg,
unsigned FragmentOffsetInBits = 0);
/// Emit all remaining operations in the DIExpressionCursor.
@@ -189,7 +202,7 @@ public:
/// \param FragmentOffsetInBits If this is one fragment out of multiple
/// locations, this is the offset of the
/// fragment inside the entire variable.
- void AddExpression(DIExpressionCursor &&Expr,
+ void addExpression(DIExpressionCursor &&Expr,
unsigned FragmentOffsetInBits = 0);
/// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
@@ -198,33 +211,32 @@ public:
};
/// DwarfExpression implementation for .debug_loc entries.
-class DebugLocDwarfExpression : public DwarfExpression {
+class DebugLocDwarfExpression final : public DwarfExpression {
ByteStreamer &BS;
+ void emitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void emitSigned(int64_t Value) override;
+ void emitUnsigned(uint64_t Value) override;
+ bool isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) override;
public:
DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
: DwarfExpression(DwarfVersion), BS(BS) {}
-
- void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int64_t Value) override;
- void EmitUnsigned(uint64_t Value) override;
- bool isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) override;
};
/// DwarfExpression implementation for singular DW_AT_location.
-class DIEDwarfExpression : public DwarfExpression {
+class DIEDwarfExpression final : public DwarfExpression {
const AsmPrinter &AP;
DwarfUnit &DU;
DIELoc &DIE;
-public:
- DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
- void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int64_t Value) override;
- void EmitUnsigned(uint64_t Value) override;
+ void emitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void emitSigned(int64_t Value) override;
+ void emitUnsigned(uint64_t Value) override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
+public:
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
DIELoc *finalize() {
DwarfExpression::finalize();
return &DIE;
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 2a866c071f593..bad5b09553cdc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -54,15 +54,15 @@ DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
: DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
DIE(DIE) {}
-void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
+void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
}
-void DIEDwarfExpression::EmitSigned(int64_t Value) {
+void DIEDwarfExpression::emitSigned(int64_t Value) {
DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
}
-void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
+void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
}
@@ -98,25 +98,35 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
default:
break;
- case dwarf::DW_LANG_C89:
- case dwarf::DW_LANG_C99:
+ // The languages below have valid values in all DWARF versions.
case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C89:
case dwarf::DW_LANG_C_plus_plus:
- case dwarf::DW_LANG_ObjC:
- case dwarf::DW_LANG_ObjC_plus_plus:
return 0;
case dwarf::DW_LANG_Fortran77:
case dwarf::DW_LANG_Fortran90:
- case dwarf::DW_LANG_Fortran95:
return 1;
- // The languages below have valid values only if the DWARF version >= 4.
+ // The languages below have valid values only if the DWARF version >= 3.
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_ObjC:
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ if (DD->getDwarfVersion() >= 3)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Fortran95:
+ if (DD->getDwarfVersion() >= 3)
+ return 1;
+ break;
+
+ // Starting with DWARF v4, all defined languages have valid values.
+ case dwarf::DW_LANG_D:
case dwarf::DW_LANG_Java:
case dwarf::DW_LANG_Python:
case dwarf::DW_LANG_UPC:
- case dwarf::DW_LANG_D:
- if (dwarf::DWARF_VERSION >= 4)
+ if (DD->getDwarfVersion() >= 4)
return 0;
break;
@@ -127,31 +137,33 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
case dwarf::DW_LANG_Modula2:
case dwarf::DW_LANG_Pascal83:
case dwarf::DW_LANG_PLI:
- if (dwarf::DWARF_VERSION >= 4)
+ if (DD->getDwarfVersion() >= 4)
return 1;
break;
- // The languages below have valid values only if the DWARF version >= 5.
- case dwarf::DW_LANG_OpenCL:
- case dwarf::DW_LANG_Go:
- case dwarf::DW_LANG_Haskell:
+ // The languages below are new in DWARF v5.
+ case dwarf::DW_LANG_BLISS:
+ case dwarf::DW_LANG_C11:
case dwarf::DW_LANG_C_plus_plus_03:
case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ case dwarf::DW_LANG_Dylan:
+ case dwarf::DW_LANG_Go:
+ case dwarf::DW_LANG_Haskell:
case dwarf::DW_LANG_OCaml:
+ case dwarf::DW_LANG_OpenCL:
+ case dwarf::DW_LANG_RenderScript:
case dwarf::DW_LANG_Rust:
- case dwarf::DW_LANG_C11:
case dwarf::DW_LANG_Swift:
- case dwarf::DW_LANG_Dylan:
- case dwarf::DW_LANG_C_plus_plus_14:
- if (dwarf::DWARF_VERSION >= 5)
+ if (DD->getDwarfVersion() >= 5)
return 0;
break;
- case dwarf::DW_LANG_Modula3:
- case dwarf::DW_LANG_Julia:
case dwarf::DW_LANG_Fortran03:
case dwarf::DW_LANG_Fortran08:
- if (dwarf::DWARF_VERSION >= 5)
+ case dwarf::DW_LANG_Julia:
+ case dwarf::DW_LANG_Modula3:
+ if (DD->getDwarfVersion() >= 5)
return 1;
break;
}
@@ -285,13 +297,6 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) {
dwarf::DW_FORM_ref_sig8, DIEInteger(Signature));
}
-void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
- StringRef Identifier) {
- uint64_t Signature = DD->makeTypeSignature(Identifier);
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8,
- DIEInteger(Signature));
-}
-
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
DIEEntry Entry) {
const DIEUnit *CU = Die.getUnit();
@@ -465,50 +470,47 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// Decode the original location, and use that as the start of the byref
// variable's location.
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- SmallVector<uint64_t, 6> DIExpr;
- DIEDwarfExpression Expr(*Asm, *this, *Loc);
-
- bool validReg;
- if (Location.isReg())
- validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg());
- else
- validReg =
- Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg(), Location.getOffset());
-
- if (!validReg)
- return;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ SmallVector<uint64_t, 9> Ops;
+ if (Location.isIndirect()) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Location.getOffset());
+ Ops.push_back(dwarf::DW_OP_deref);
+ }
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
if (isPointer)
- DIExpr.push_back(dwarf::DW_OP_deref);
+ Ops.push_back(dwarf::DW_OP_deref);
// Next add the offset for the '__forwarding' field:
// DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
// adding the offset if it's 0.
if (forwardingFieldOffset > 0) {
- DIExpr.push_back(dwarf::DW_OP_plus);
- DIExpr.push_back(forwardingFieldOffset);
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(forwardingFieldOffset);
}
// Now dereference the __forwarding field to get to the real __Block_byref
// struct: DW_OP_deref.
- DIExpr.push_back(dwarf::DW_OP_deref);
+ Ops.push_back(dwarf::DW_OP_deref);
// Now that we've got the real __Block_byref... struct, add the offset
// for the variable's field to get to the location of the actual variable:
// DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
if (varFieldOffset > 0) {
- DIExpr.push_back(dwarf::DW_OP_plus);
- DIExpr.push_back(varFieldOffset);
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(varFieldOffset);
}
- Expr.AddExpression(makeArrayRef(DIExpr));
- Expr.finalize();
+
+ DIExpressionCursor Cursor(Ops);
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return;
+ DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
+ addBlock(Die, Attribute, DwarfExpr.finalize());
}
/// Return true if type encoding is unsigned.
@@ -672,7 +674,7 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
return getDIE(Context);
}
-DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) {
auto *Context = resolve(Ty->getScope());
DIE *ContextDIE = getOrCreateContextDIE(Context);
@@ -684,8 +686,7 @@ DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
- if (!Ty->isExternalTypeRef())
- updateAcceleratorTables(Context, Ty, TyDIE);
+ updateAcceleratorTables(Context, Ty, TyDIE);
return &TyDIE;
}
@@ -841,6 +842,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy->isForwardDecl())
addSourceLine(Buffer, DTy);
+
+ // If DWARF address space value is other than None, add it for pointer and
+ // reference types as DW_AT_address_class.
+ if (DTy->getDWARFAddressSpace() && (Tag == dwarf::DW_TAG_pointer_type ||
+ Tag == dwarf::DW_TAG_reference_type))
+ addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
+ DTy->getDWARFAddressSpace().getValue());
}
void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
@@ -892,13 +900,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
- if (CTy->isExternalTypeRef()) {
- StringRef Identifier = CTy->getIdentifier();
- assert(!Identifier.empty() && "external type ref without identifier");
- addFlag(Buffer, dwarf::DW_AT_declaration);
- return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier);
- }
-
// Add name if not anonymous or intermediate type.
StringRef Name = CTy->getName();
@@ -1180,8 +1181,12 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
}
void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
- bool Minimal) {
- if (!Minimal)
+ bool SkipSPAttributes) {
+ // If -fdebug-info-for-profiling is enabled, need to emit the subprogram
+ // and its source location.
+ bool SkipSPSourceLocation = SkipSPAttributes &&
+ !CUNode->getDebugInfoForProfiling();
+ if (!SkipSPSourceLocation)
if (applySubprogramDefinitionAttributes(SP, SPDie))
return;
@@ -1189,12 +1194,13 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (!SP->getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP->getName());
+ if (!SkipSPSourceLocation)
+ addSourceLine(SPDie, SP);
+
// Skip the rest of the attributes under -gmlt to save space.
- if (Minimal)
+ if (SkipSPAttributes)
return;
- addSourceLine(SPDie, SP);
-
// Add the prototype if we have a prototype and we have a C like
// language.
uint16_t Language = getLanguage();
@@ -1526,18 +1532,27 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
return &StaticMemberDIE;
}
-void DwarfUnit::emitHeader(bool UseOffsets) {
+void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
Asm->OutStreamer->AddComment("Length of Unit");
Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize());
Asm->OutStreamer->AddComment("DWARF version number");
- Asm->EmitInt16(DD->getDwarfVersion());
- Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
+ unsigned Version = DD->getDwarfVersion();
+ Asm->EmitInt16(Version);
+
+ // DWARF v5 reorders the address size and adds a unit type.
+ if (Version >= 5) {
+ Asm->OutStreamer->AddComment("DWARF Unit Type");
+ Asm->EmitInt8(UT);
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ }
// We share one abbreviations table across all units so it's always at the
// start of the section. Use a relocatable offset where needed to ensure
// linking doesn't invalidate that offset.
+ Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
if (UseOffsets)
Asm->EmitInt32(0);
@@ -1545,12 +1560,16 @@ void DwarfUnit::emitHeader(bool UseOffsets) {
Asm->emitDwarfSymbolReference(
TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
- Asm->OutStreamer->AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ if (Version <= 4) {
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ }
}
void DwarfTypeUnit::emitHeader(bool UseOffsets) {
- DwarfUnit::emitHeader(UseOffsets);
+ DwarfUnit::emitCommonHeader(UseOffsets,
+ DD->useSplitDwarf() ? dwarf::DW_UT_split_type
+ : dwarf::DW_UT_type);
Asm->OutStreamer->AddComment("Type Signature");
Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature));
Asm->OutStreamer->AddComment("Type DIE Offset");
@@ -1564,3 +1583,13 @@ bool DwarfTypeUnit::isDwoUnit() const {
// when split DWARF is being used.
return DD->useSplitDwarf();
}
+
+void DwarfTypeUnit::addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) {
+ getCU().addGlobalNameForTypeUnit(Name, Context);
+}
+
+void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) {
+ getCU().addGlobalTypeUnitType(Ty, Context);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 8654d6f0caf43..d626ef920f956 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -124,12 +124,12 @@ public:
std::string getParentContextString(const DIScope *Context) const;
/// Add a new global name to the compile unit.
- virtual void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) {
- }
+ virtual void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) = 0;
/// Add a new global type to the compile unit.
virtual void addGlobalType(const DIType *Ty, const DIE &Die,
- const DIScope *Context) {}
+ const DIScope *Context) = 0;
/// Returns the DIE map slot for the specified debug variable.
///
@@ -198,9 +198,6 @@ public:
/// Add a type's DW_AT_signature and set the declaration flag.
void addDIETypeSignature(DIE &Die, uint64_t Signature);
- /// Add an attribute containing the type signature for a unique identifier.
- void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
- StringRef Identifier);
/// Add block data.
void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
@@ -256,15 +253,12 @@ public:
DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
- bool Minimal = false);
+ bool SkipSPAttributes = false);
/// Find existing DIE or create new DIE for the given type.
DIE *getOrCreateTypeDIE(const MDNode *N);
/// Get context owner's DIE.
- DIE *createTypeDIE(const DICompositeType *Ty);
-
- /// Get context owner's DIE.
DIE *getOrCreateContextDIE(const DIScope *Context);
/// Construct DIEs for types that contain vtables.
@@ -282,11 +276,13 @@ public:
virtual unsigned getHeaderSize() const {
return sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t); // Pointer Size (in bytes)
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ (DD->getDwarfVersion() >= 5 ? sizeof(int8_t)
+ : 0); // DWARF v5 unit type
}
/// Emit the header for this unit, not including the initial length field.
- virtual void emitHeader(bool UseOffsets);
+ virtual void emitHeader(bool UseOffsets) = 0;
virtual DwarfCompileUnit &getCU() = 0;
@@ -306,6 +302,14 @@ protected:
return Ref.resolve();
}
+ /// If this is a named finished type then include it in the list of types for
+ /// the accelerator tables.
+ void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+ const DIE &TyDIE);
+
+ /// Emit the common part of the header for this unit.
+ void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT);
+
private:
void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
@@ -330,11 +334,6 @@ private:
/// Set D as anonymous type for index which can be reused later.
void setIndexTyDie(DIE *D) { IndexTyDie = D; }
- /// If this is a named finished type then include it in the list of types for
- /// the accelerator tables.
- void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
- const DIE &TyDIE);
-
virtual bool isDwoUnit() const = 0;
};
@@ -354,12 +353,19 @@ public:
void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
void setType(const DIE *Ty) { this->Ty = Ty; }
+ /// Get context owner's DIE.
+ DIE *createTypeDIE(const DICompositeType *Ty);
+
/// Emit the header for this unit, not including the initial length field.
void emitHeader(bool UseOffsets) override;
unsigned getHeaderSize() const override {
return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
sizeof(uint32_t); // Type DIE Offset
}
+ void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) override;
+ void addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) override;
DwarfCompileUnit &getCU() override { return CU; }
};
} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 6a023b998b326..342efc3611c78 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//===- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,21 +14,19 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -38,13 +36,12 @@ class ErlangGCPrinter : public GCMetadataPrinter {
public:
void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
};
-}
+
+} // end anonymous namespace
static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
X("erlang", "erlang-compatible garbage collector");
-void llvm::linkErlangGCPrinter() {}
-
void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
MCStreamer &OS = *AP.OutStreamer;
@@ -121,3 +118,5 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
}
}
}
+
+void llvm::linkErlangGCPrinter() {}
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 9d7c96a1b8efd..704f0ac2f1919 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -68,7 +68,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
const Function *F = MF->getFunction();
- shouldEmitMoves = Asm->needsSEHMoves();
+ shouldEmitMoves = Asm->needsSEHMoves() && MF->hasWinCFI();
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
@@ -94,7 +94,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
// If we're not using CFI, we don't want the CFI or the personality, but we
// might want EH tables if we had EH pads.
- if (!Asm->MAI->usesWindowsCFI() || (!MF->hasWinCFI() && !PerFn)) {
+ if (!Asm->MAI->usesWindowsCFI()) {
if (Per == EHPersonality::MSVC_X86SEH && !hasEHFunclets) {
// If this is 32-bit SEH and we don't have any funclets (really invokes),
// make sure we emit the parent offset label. Some unreferenced filter
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index bf5cf105a8f86..9c19a4fd3c3e0 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -1532,7 +1532,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
Type *ResultTy;
SmallVector<Value *, 6> Args;
- AttributeSet Attr;
+ AttributeList Attr;
// 'size' argument.
if (!UseSizedLibcall) {
@@ -1593,7 +1593,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// Now, the return type.
if (CASExpected) {
ResultTy = Type::getInt1Ty(Ctx);
- Attr = Attr.addAttribute(Ctx, AttributeSet::ReturnIndex, Attribute::ZExt);
+ Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
} else if (HasResult && UseSizedLibcall)
ResultTy = SizedIntTy;
else
diff --git a/lib/CodeGen/BranchCoalescing.cpp b/lib/CodeGen/BranchCoalescing.cpp
new file mode 100644
index 0000000000000..efdf300df8506
--- /dev/null
+++ b/lib/CodeGen/BranchCoalescing.cpp
@@ -0,0 +1,758 @@
+//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Coalesce basic blocks guarded by the same branch condition into a single
+/// basic block.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "coal-branch"
+
+static cl::opt<cl::boolOrDefault>
+ EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden,
+ cl::desc("enable coalescing of duplicate branches"));
+
+STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
+STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
+STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
+
+//===----------------------------------------------------------------------===//
+// BranchCoalescing
+//===----------------------------------------------------------------------===//
+///
+/// Improve scheduling by coalescing branches that depend on the same condition.
+/// This pass looks for blocks that are guarded by the same branch condition
+/// and attempts to merge the blocks together. Such opportunities arise from
+/// the expansion of select statements in the IR.
+///
+/// For example, consider the following LLVM IR:
+///
+/// %test = icmp eq i32 %x 0
+/// %tmp1 = select i1 %test, double %a, double 2.000000e-03
+/// %tmp2 = select i1 %test, double %b, double 5.000000e-03
+///
+/// This IR expands to the following machine code on PowerPC:
+///
+/// BB#0: derived from LLVM BB %entry
+/// Live Ins: %F1 %F3 %X6
+/// <SNIP1>
+/// %vreg0<def> = COPY %F1; F8RC:%vreg0
+/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
+/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
+/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
+/// BCC 76, %vreg5, <BB#2>; CRRC:%vreg5
+/// Successors according to CFG: BB#1(?%) BB#2(?%)
+///
+/// BB#1: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0
+/// Successors according to CFG: BB#2(?%)
+///
+/// BB#2: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0 BB#1
+/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
+/// F8RC:%vreg9,%vreg8,%vreg0
+/// <SNIP2>
+/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
+/// Successors according to CFG: BB#3(?%) BB#4(?%)
+///
+/// BB#3: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#2
+/// Successors according to CFG: BB#4(?%)
+///
+/// BB#4: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#2 BB#3
+/// %vreg13<def> = PHI %vreg12, <BB#3>, %vreg2, <BB#2>;
+/// F8RC:%vreg13,%vreg12,%vreg2
+/// <SNIP3>
+/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
+///
+/// When this pattern is detected, branch coalescing will try to collapse
+/// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3.
+///
+/// If all conditions are meet, IR should collapse to:
+///
+/// BB#0: derived from LLVM BB %entry
+/// Live Ins: %F1 %F3 %X6
+/// <SNIP1>
+/// %vreg0<def> = COPY %F1; F8RC:%vreg0
+/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
+/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
+/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
+/// <SNIP2>
+/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
+/// Successors according to CFG: BB#1(0x2aaaaaaa / 0x80000000 = 33.33%)
+/// BB#4(0x55555554 / 0x80000000 = 66.67%)
+///
+/// BB#1: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0
+/// Successors according to CFG: BB#4(0x40000000 / 0x80000000 = 50.00%)
+///
+/// BB#4: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0 BB#1
+/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
+/// F8RC:%vreg9,%vreg8,%vreg0
+/// %vreg13<def> = PHI %vreg12, <BB#1>, %vreg2, <BB#0>;
+/// F8RC:%vreg13,%vreg12,%vreg2
+/// <SNIP3>
+/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
+///
+/// Branch Coalescing does not split blocks, it moves everything in the same
+/// direction ensuring it does not break use/definition semantics.
+///
+/// PHI nodes and its corresponding use instructions are moved to its successor
+/// block if there are no uses within the successor block PHI nodes. PHI
+/// node ordering cannot be assumed.
+///
+/// Non-PHI can be moved up to the predecessor basic block or down to the
+/// successor basic block following any PHI instructions. Whether it moves
+/// up or down depends on whether the register(s) defined in the instructions
+/// are used in current block or in any PHI instructions at the beginning of
+/// the successor block.
+
+namespace {
+
+class BranchCoalescing : public MachineFunctionPass {
+ struct CoalescingCandidateInfo {
+ MachineBasicBlock *BranchBlock; // Block containing the branch
+ MachineBasicBlock *BranchTargetBlock; // Block branched to
+ MachineBasicBlock *FallThroughBlock; // Fall-through if branch not taken
+ SmallVector<MachineOperand, 4> Cond;
+ bool MustMoveDown;
+ bool MustMoveUp;
+
+ CoalescingCandidateInfo();
+ void clear();
+ };
+
+ MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+
+ void initialize(MachineFunction &F);
+ bool canCoalesceBranch(CoalescingCandidateInfo &Cand);
+ bool identicalOperands(ArrayRef<MachineOperand> OperandList1,
+ ArrayRef<MachineOperand> OperandList2) const;
+ bool validateCandidates(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const;
+
+ static bool isBranchCoalescingEnabled() {
+ return EnableBranchCoalescing == cl::BOU_TRUE;
+ }
+
+public:
+ static char ID;
+
+ BranchCoalescing() : MachineFunctionPass(ID) {
+ initializeBranchCoalescingPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "Branch Coalescing"; }
+
+ bool mergeCandidates(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion);
+ bool canMoveToBeginning(const MachineInstr &MI,
+ const MachineBasicBlock &MBB) const;
+ bool canMoveToEnd(const MachineInstr &MI,
+ const MachineBasicBlock &MBB) const;
+ bool canMerge(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const;
+ void moveAndUpdatePHIs(MachineBasicBlock *SourceRegionMBB,
+ MachineBasicBlock *TargetRegionMBB);
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char BranchCoalescing::ID = 0;
+char &llvm::BranchCoalescingID = BranchCoalescing::ID;
+
+INITIALIZE_PASS_BEGIN(BranchCoalescing, "branch-coalescing",
+ "Branch Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(BranchCoalescing, "branch-coalescing", "Branch Coalescing",
+ false, false)
+
+BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
+ : BranchBlock(nullptr), BranchTargetBlock(nullptr),
+ FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {}
+
+void BranchCoalescing::CoalescingCandidateInfo::clear() {
+ BranchBlock = nullptr;
+ BranchTargetBlock = nullptr;
+ FallThroughBlock = nullptr;
+ Cond.clear();
+ MustMoveDown = false;
+ MustMoveUp = false;
+}
+
+void BranchCoalescing::initialize(MachineFunction &MF) {
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+}
+
+///
+/// Analyze the branch statement to determine if it can be coalesced. This
+/// method analyses the branch statement for the given candidate to determine
+/// if it can be coalesced. If the branch can be coalesced, then the
+/// BranchTargetBlock and the FallThroughBlock are recorded in the specified
+/// Candidate.
+///
+///\param[in,out] Cand The coalescing candidate to analyze
+///\return true if and only if the branch can be coalesced, false otherwise
+///
+bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
+ DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber()
+ << " can be coalesced:");
+ MachineBasicBlock *FalseMBB = nullptr;
+
+ if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB,
+ Cand.Cond)) {
+ DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n");
+ return false;
+ }
+
+ for (auto &I : Cand.BranchBlock->terminators()) {
+ DEBUG(dbgs() << "Looking at terminator : " << I << "\n");
+ if (!I.isBranch())
+ continue;
+
+ if (I.getNumOperands() != I.getNumExplicitOperands()) {
+ DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I
+ << "\n");
+ return false;
+ }
+ }
+
+ if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) {
+ DEBUG(dbgs() << "EH Pad - skip\n");
+ return false;
+ }
+
+ // For now only consider triangles (i.e, BranchTargetBlock is set,
+ // FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock)
+ if (!Cand.BranchTargetBlock || FalseMBB ||
+ !Cand.BranchBlock->isSuccessor(Cand.BranchTargetBlock)) {
+ DEBUG(dbgs() << "Does not form a triangle - skip\n");
+ return false;
+ }
+
+ // Ensure there are only two successors
+ if (Cand.BranchBlock->succ_size() != 2) {
+ DEBUG(dbgs() << "Does not have 2 successors - skip\n");
+ return false;
+ }
+
+ // Sanity check - the block must be able to fall through
+ assert(Cand.BranchBlock->canFallThrough() &&
+ "Expecting the block to fall through!");
+
+ // We have already ensured there are exactly two successors to
+ // BranchBlock and that BranchTargetBlock is a successor to BranchBlock.
+ // Ensure the single fall though block is empty.
+ MachineBasicBlock *Succ =
+ (*Cand.BranchBlock->succ_begin() == Cand.BranchTargetBlock)
+ ? *Cand.BranchBlock->succ_rbegin()
+ : *Cand.BranchBlock->succ_begin();
+
+ assert(Succ && "Expecting a valid fall-through block\n");
+
+ if (!Succ->empty()) {
+ DEBUG(dbgs() << "Fall-through block contains code -- skip\n");
+ return false;
+ }
+
+ if (!Succ->isSuccessor(Cand.BranchTargetBlock)) {
+ DEBUG(dbgs()
+ << "Successor of fall through block is not branch taken block\n");
+ return false;
+ }
+
+ Cand.FallThroughBlock = Succ;
+ DEBUG(dbgs() << "Valid Candidate\n");
+ return true;
+}
+
+///
+/// Determine if the two operand lists are identical
+///
+/// \param[in] OpList1 operand list
+/// \param[in] OpList2 operand list
+/// \return true if and only if the operands lists are identical
+///
+bool BranchCoalescing::identicalOperands(
+ ArrayRef<MachineOperand> OpList1, ArrayRef<MachineOperand> OpList2) const {
+
+ if (OpList1.size() != OpList2.size()) {
+ DEBUG(dbgs() << "Operand list is different size\n");
+ return false;
+ }
+
+ for (unsigned i = 0; i < OpList1.size(); ++i) {
+ const MachineOperand &Op1 = OpList1[i];
+ const MachineOperand &Op2 = OpList2[i];
+
+ DEBUG(dbgs() << "Op1: " << Op1 << "\n"
+ << "Op2: " << Op2 << "\n");
+
+ if (Op1.isIdenticalTo(Op2)) {
+ DEBUG(dbgs() << "Op1 and Op2 are identical!\n");
+ continue;
+ }
+
+ // If the operands are not identical, but are registers, check to see if the
+ // definition of the register produces the same value. If they produce the
+ // same value, consider them to be identical.
+ if (Op1.isReg() && Op2.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(Op1.getReg()) &&
+ TargetRegisterInfo::isVirtualRegister(Op2.getReg())) {
+ MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg());
+ MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg());
+ if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) {
+ DEBUG(dbgs() << "Op1Def: " << *Op1Def << " and " << *Op2Def
+ << " produce the same value!\n");
+ } else {
+ DEBUG(dbgs() << "Operands produce different values\n");
+ return false;
+ }
+ } else {
+ DEBUG(dbgs() << "The operands are not provably identical.\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+///
+/// Moves ALL PHI instructions in SourceMBB to beginning of TargetMBB
+/// and update them to refer to the new block. PHI node ordering
+/// cannot be assumed so it does not matter where the PHI instructions
+/// are moved to in TargetMBB.
+///
+/// \param[in] SourceMBB block to move PHI instructions from
+/// \param[in] TargetMBB block to move PHI instructions to
+///
+void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
+ MachineBasicBlock *TargetMBB) {
+
+ MachineBasicBlock::iterator MI = SourceMBB->begin();
+ MachineBasicBlock::iterator ME = SourceMBB->getFirstNonPHI();
+
+ if (MI == ME) {
+ DEBUG(dbgs() << "SourceMBB contains no PHI instructions.\n");
+ return;
+ }
+
+ // Update all PHI instructions in SourceMBB and move to top of TargetMBB
+ for (MachineBasicBlock::iterator Iter = MI; Iter != ME; Iter++) {
+ MachineInstr &PHIInst = *Iter;
+ for (unsigned i = 2, e = PHIInst.getNumOperands() + 1; i != e; i += 2) {
+ MachineOperand &MO = PHIInst.getOperand(i);
+ if (MO.getMBB() == SourceMBB)
+ MO.setMBB(TargetMBB);
+ }
+ }
+ TargetMBB->splice(TargetMBB->begin(), SourceMBB, MI, ME);
+}
+
+///
+/// This function checks if MI can be moved to the beginning of the TargetMBB
+/// following PHI instructions. A MI instruction can be moved to beginning of
+/// the TargetMBB if there are no uses of it within the TargetMBB PHI nodes.
+///
+/// \param[in] MI the machine instruction to move.
+/// \param[in] TargetMBB the machine basic block to move to
+/// \return true if it is safe to move MI to beginning of TargetMBB,
+/// false otherwise.
+///
+bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
+ const MachineBasicBlock &TargetMBB
+ ) const {
+
+ DEBUG(dbgs() << "Checking if " << MI << " can move to beginning of "
+ << TargetMBB.getNumber() << "\n");
+
+ for (auto &Def : MI.defs()) { // Looking at Def
+ for (auto &Use : MRI->use_instructions(Def.getReg())) {
+ if (Use.isPHI() && Use.getParent() == &TargetMBB) {
+ DEBUG(dbgs() << " *** used in a PHI -- cannot move ***\n");
+ return false;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << " Safe to move to the beginning.\n");
+ return true;
+}
+
+///
+/// This function checks if MI can be moved to the end of the TargetMBB,
+/// immediately before the first terminator. A MI instruction can be moved
+/// to then end of the TargetMBB if no PHI node defines what MI uses within
+/// it's own MBB.
+///
+/// \param[in] MI the machine instruction to move.
+/// \param[in] TargetMBB the machine basic block to move to
+/// \return true if it is safe to move MI to end of TargetMBB,
+/// false otherwise.
+///
+bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI,
+ const MachineBasicBlock &TargetMBB
+ ) const {
+
+ DEBUG(dbgs() << "Checking if " << MI << " can move to end of "
+ << TargetMBB.getNumber() << "\n");
+
+ for (auto &Use : MI.uses()) {
+ if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) {
+ MachineInstr *DefInst = MRI->getVRegDef(Use.getReg());
+ if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) {
+ DEBUG(dbgs() << " *** Cannot move this instruction ***\n");
+ return false;
+ } else {
+ DEBUG(dbgs() << " *** def is in another block -- safe to move!\n");
+ }
+ }
+ }
+
+ DEBUG(dbgs() << " Safe to move to the end.\n");
+ return true;
+}
+
+///
+/// This method checks to ensure the two coalescing candidates follows the
+/// expected pattern required for coalescing.
+///
+/// \param[in] SourceRegion The candidate to move statements from
+/// \param[in] TargetRegion The candidate to move statements to
+/// \return true if all instructions in SourceRegion.BranchBlock can be merged
+/// into a block in TargetRegion; false otherwise.
+///
+bool BranchCoalescing::validateCandidates(
+ CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const {
+
+ if (TargetRegion.BranchTargetBlock != SourceRegion.BranchBlock)
+ llvm_unreachable("Expecting SourceRegion to immediately follow TargetRegion");
+ else if (!MDT->dominates(TargetRegion.BranchBlock, SourceRegion.BranchBlock))
+ llvm_unreachable("Expecting TargetRegion to dominate SourceRegion");
+ else if (!MPDT->dominates(SourceRegion.BranchBlock, TargetRegion.BranchBlock))
+ llvm_unreachable("Expecting SourceRegion to post-dominate TargetRegion");
+ else if (!TargetRegion.FallThroughBlock->empty() ||
+ !SourceRegion.FallThroughBlock->empty())
+ llvm_unreachable("Expecting fall-through blocks to be empty");
+
+ return true;
+}
+
+///
+/// This method determines whether the two coalescing candidates can be merged.
+/// In order to be merged, all instructions must be able to
+/// 1. Move to the beginning of the SourceRegion.BranchTargetBlock;
+/// 2. Move to the end of the TargetRegion.BranchBlock.
+/// Merging involves moving the instructions in the
+/// TargetRegion.BranchTargetBlock (also SourceRegion.BranchBlock).
+///
+/// This function first try to move instructions from the
+/// TargetRegion.BranchTargetBlock down, to the beginning of the
+/// SourceRegion.BranchTargetBlock. This is not possible if any register defined
+/// in TargetRegion.BranchTargetBlock is used in a PHI node in the
+/// SourceRegion.BranchTargetBlock. In this case, check whether the statement
+/// can be moved up, to the end of the TargetRegion.BranchBlock (immediately
+/// before the branch statement). If it cannot move, then these blocks cannot
+/// be merged.
+///
+/// Note that there is no analysis for moving instructions past the fall-through
+/// blocks because they are confirmed to be empty. An assert is thrown if they
+/// are not.
+///
+/// \param[in] SourceRegion The candidate to move statements from
+/// \param[in] TargetRegion The candidate to move statements to
+/// \return true if all instructions in SourceRegion.BranchBlock can be merged
+/// into a block in TargetRegion, false otherwise.
+///
+bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const {
+ if (!validateCandidates(SourceRegion, TargetRegion))
+ return false;
+
+ // Walk through PHI nodes first and see if they force the merge into the
+ // SourceRegion.BranchTargetBlock.
+ for (MachineBasicBlock::iterator
+ I = SourceRegion.BranchBlock->instr_begin(),
+ E = SourceRegion.BranchBlock->getFirstNonPHI();
+ I != E; ++I) {
+ for (auto &Def : I->defs())
+ for (auto &Use : MRI->use_instructions(Def.getReg())) {
+ if (Use.isPHI() && Use.getParent() == SourceRegion.BranchTargetBlock) {
+ DEBUG(dbgs() << "PHI " << *I << " defines register used in another "
+ "PHI within branch target block -- can't merge\n");
+ NumPHINotMoved++;
+ return false;
+ }
+ if (Use.getParent() == SourceRegion.BranchBlock) {
+ DEBUG(dbgs() << "PHI " << *I
+ << " defines register used in this "
+ "block -- all must move down\n");
+ SourceRegion.MustMoveDown = true;
+ }
+ }
+ }
+
+ // Walk through the MI to see if they should be merged into
+ // TargetRegion.BranchBlock (up) or SourceRegion.BranchTargetBlock (down)
+ for (MachineBasicBlock::iterator
+ I = SourceRegion.BranchBlock->getFirstNonPHI(),
+ E = SourceRegion.BranchBlock->end();
+ I != E; ++I) {
+ if (!canMoveToBeginning(*I, *SourceRegion.BranchTargetBlock)) {
+ DEBUG(dbgs() << "Instruction " << *I
+ << " cannot move down - must move up!\n");
+ SourceRegion.MustMoveUp = true;
+ }
+ if (!canMoveToEnd(*I, *TargetRegion.BranchBlock)) {
+ DEBUG(dbgs() << "Instruction " << *I
+ << " cannot move up - must move down!\n");
+ SourceRegion.MustMoveDown = true;
+ }
+ }
+
+ return (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) ? false : true;
+}
+
+/// Merge the instructions from SourceRegion.BranchBlock,
+/// SourceRegion.BranchTargetBlock, and SourceRegion.FallThroughBlock into
+/// TargetRegion.BranchBlock, TargetRegion.BranchTargetBlock and
+/// TargetRegion.FallThroughBlock respectively.
+///
+/// The successors for blocks in TargetRegion will be updated to use the
+/// successors from blocks in SourceRegion. Finally, the blocks in SourceRegion
+/// will be removed from the function.
+///
+/// A region consists of a BranchBlock, a FallThroughBlock, and a
+/// BranchTargetBlock. Branch coalesce works on patterns where the
+/// TargetRegion's BranchTargetBlock must also be the SourceRegions's
+/// BranchBlock.
+///
+/// Before mergeCandidates:
+///
+/// +---------------------------+
+/// | TargetRegion.BranchBlock |
+/// +---------------------------+
+/// / |
+/// / +--------------------------------+
+/// | | TargetRegion.FallThroughBlock |
+/// \ +--------------------------------+
+/// \ |
+/// +----------------------------------+
+/// | TargetRegion.BranchTargetBlock |
+/// | SourceRegion.BranchBlock |
+/// +----------------------------------+
+/// / |
+/// / +--------------------------------+
+/// | | SourceRegion.FallThroughBlock |
+/// \ +--------------------------------+
+/// \ |
+/// +----------------------------------+
+/// | SourceRegion.BranchTargetBlock |
+/// +----------------------------------+
+///
+/// After mergeCandidates:
+///
+/// +-----------------------------+
+/// | TargetRegion.BranchBlock |
+/// | SourceRegion.BranchBlock |
+/// +-----------------------------+
+/// / |
+/// / +---------------------------------+
+/// | | TargetRegion.FallThroughBlock |
+/// | | SourceRegion.FallThroughBlock |
+/// \ +---------------------------------+
+/// \ |
+/// +----------------------------------+
+/// | SourceRegion.BranchTargetBlock |
+/// +----------------------------------+
+///
+/// \param[in] SourceRegion The candidate to move blocks from
+/// \param[in] TargetRegion The candidate to move blocks to
+///
+bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) {
+
+ if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) {
+ llvm_unreachable("Cannot have both MustMoveDown and MustMoveUp set!");
+ return false;
+ }
+
+ if (!validateCandidates(SourceRegion, TargetRegion))
+ return false;
+
+ // Start the merging process by first handling the BranchBlock.
+ // Move any PHIs in SourceRegion.BranchBlock down to the branch-taken block
+ moveAndUpdatePHIs(SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
+
+ // Move remaining instructions in SourceRegion.BranchBlock into
+ // TargetRegion.BranchBlock
+ MachineBasicBlock::iterator firstInstr =
+ SourceRegion.BranchBlock->getFirstNonPHI();
+ MachineBasicBlock::iterator lastInstr =
+ SourceRegion.BranchBlock->getFirstTerminator();
+
+ MachineBasicBlock *Source = SourceRegion.MustMoveDown
+ ? SourceRegion.BranchTargetBlock
+ : TargetRegion.BranchBlock;
+
+ MachineBasicBlock::iterator Target =
+ SourceRegion.MustMoveDown
+ ? SourceRegion.BranchTargetBlock->getFirstNonPHI()
+ : TargetRegion.BranchBlock->getFirstTerminator();
+
+ Source->splice(Target, SourceRegion.BranchBlock, firstInstr, lastInstr);
+
+ // Once PHI and instructions have been moved we need to clean up the
+ // control flow.
+
+ // Remove SourceRegion.FallThroughBlock before transferring successors of
+ // SourceRegion.BranchBlock to TargetRegion.BranchBlock.
+ SourceRegion.BranchBlock->removeSuccessor(SourceRegion.FallThroughBlock);
+ TargetRegion.BranchBlock->transferSuccessorsAndUpdatePHIs(
+ SourceRegion.BranchBlock);
+ // Update branch in TargetRegion.BranchBlock to jump to
+ // SourceRegion.BranchTargetBlock
+ // In this case, TargetRegion.BranchTargetBlock == SourceRegion.BranchBlock.
+ TargetRegion.BranchBlock->ReplaceUsesOfBlockWith(
+ SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
+ // Remove the branch statement(s) in SourceRegion.BranchBlock
+ MachineBasicBlock::iterator I =
+ SourceRegion.BranchBlock->terminators().begin();
+ while (I != SourceRegion.BranchBlock->terminators().end()) {
+ MachineInstr &CurrInst = *I;
+ ++I;
+ if (CurrInst.isBranch())
+ CurrInst.eraseFromParent();
+ }
+
+ // Fall-through block should be empty since this is part of the condition
+ // to coalesce the branches.
+ assert(TargetRegion.FallThroughBlock->empty() &&
+ "FallThroughBlocks should be empty!");
+
+ // Transfer successor information and move PHIs down to the
+ // branch-taken block.
+ TargetRegion.FallThroughBlock->transferSuccessorsAndUpdatePHIs(
+ SourceRegion.FallThroughBlock);
+ TargetRegion.FallThroughBlock->removeSuccessor(SourceRegion.BranchBlock);
+
+ // Remove the blocks from the function.
+ assert(SourceRegion.BranchBlock->empty() &&
+ "Expecting branch block to be empty!");
+ SourceRegion.BranchBlock->eraseFromParent();
+
+ assert(SourceRegion.FallThroughBlock->empty() &&
+ "Expecting fall-through block to be empty!\n");
+ SourceRegion.FallThroughBlock->eraseFromParent();
+
+ NumBlocksCoalesced++;
+ return true;
+}
+
+bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
+
+ if (skipFunction(*MF.getFunction()) || MF.empty() ||
+ !isBranchCoalescingEnabled())
+ return false;
+
+ bool didSomething = false;
+
+ DEBUG(dbgs() << "******** Branch Coalescing ********\n");
+ initialize(MF);
+
+ DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
+
+ CoalescingCandidateInfo Cand1, Cand2;
+ // Walk over blocks and find candidates to merge
+ // Continue trying to merge with the first candidate found, as long as merging
+ // is successfull.
+ for (MachineBasicBlock &MBB : MF) {
+ bool MergedCandidates = false;
+ do {
+ MergedCandidates = false;
+ Cand1.clear();
+ Cand2.clear();
+
+ Cand1.BranchBlock = &MBB;
+
+ // If unable to coalesce the branch, then continue to next block
+ if (!canCoalesceBranch(Cand1))
+ break;
+
+ Cand2.BranchBlock = Cand1.BranchTargetBlock;
+ if (!canCoalesceBranch(Cand2))
+ break;
+
+ // Sanity check
+ // The branch-taken block of the second candidate should post-dominate the
+ // first candidate
+ assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) &&
+ "Branch-taken block should post-dominate first candidate");
+
+ if (!identicalOperands(Cand1.Cond, Cand2.Cond)) {
+ DEBUG(dbgs() << "Blocks " << Cand1.BranchBlock->getNumber() << " and "
+ << Cand2.BranchBlock->getNumber()
+ << " have different branches\n");
+ break;
+ }
+ if (!canMerge(Cand2, Cand1)) {
+ DEBUG(dbgs() << "Cannot merge blocks " << Cand1.BranchBlock->getNumber()
+ << " and " << Cand2.BranchBlock->getNumber() << "\n");
+ NumBlocksNotCoalesced++;
+ continue;
+ }
+ DEBUG(dbgs() << "Merging blocks " << Cand1.BranchBlock->getNumber()
+ << " and " << Cand1.BranchTargetBlock->getNumber() << "\n");
+ MergedCandidates = mergeCandidates(Cand2, Cand1);
+ if (MergedCandidates)
+ didSomething = true;
+
+ DEBUG(dbgs() << "Function after merging: "; MF.dump(); dbgs() << "\n");
+ } while (MergedCandidates);
+ }
+
+#ifndef NDEBUG
+ // Verify MF is still valid after branch coalescing
+ if (didSomething)
+ MF.verify(nullptr, "Error in code produced by branch coalescing");
+#endif // NDEBUG
+
+ DEBUG(dbgs() << "Finished Branch Coalescing\n");
+ return didSomething;
+}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 6fba161033b02..2d01301402f04 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -49,6 +50,7 @@ STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
STATISTIC(NumHoist , "Number of times common instructions are hoisted");
+STATISTIC(NumTailCalls, "Number of tail calls optimized");
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -123,8 +125,6 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
}
}
-/// RemoveDeadBlock - Remove the specified dead machine basic block from the
-/// function, updating the CFG.
void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
@@ -144,9 +144,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
MLI->removeBlock(MBB);
}
-/// OptimizeFunction - Perhaps branch folding, tail merging and other
-/// CFG optimizations on the given function. Block placement changes the layout
-/// and may create new tail merging opportunities.
bool BranchFolder::OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
@@ -348,8 +345,6 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
return TailLen;
}
-/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
-/// after it, replacing it with an unconditional branch to NewDest.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
TII->ReplaceTailWithBranchTo(OldInst, NewDest);
@@ -362,9 +357,6 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
++NumTailMerge;
}
-/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
-/// MBB so that the part before the iterator falls into the part starting at the
-/// iterator. This returns the new MBB.
MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1,
const BasicBlock *BB) {
@@ -388,7 +380,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
// NewMBB belongs to the same loop as CurMBB.
- if (MLI)
+ if (MLI)
if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
@@ -436,7 +428,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc dl = CurMBB->findBranchDebugLoc();
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
@@ -497,6 +489,15 @@ BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
return MBFI.printBlockFreq(OS, Freq);
}
+void BranchFolder::MBFIWrapper::view(const Twine &Name, bool isSimple) {
+ MBFI.view(Name, isSimple);
+}
+
+uint64_t
+BranchFolder::MBFIWrapper::getEntryFreq() const {
+ return MBFI.getEntryFreq();
+}
+
/// CountTerminators - Count the number of terminators in the given
/// block and set I to the position of the first non-terminator, if there
/// is one, or MBB->end() otherwise.
@@ -516,6 +517,17 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
return NumTerms;
}
+/// A no successor, non-return block probably ends in unreachable and is cold.
+/// Also consider a block that ends in an indirect branch to be a return block,
+/// since many targets use plain indirect branches to return.
+static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) {
+ if (!MBB->succ_empty())
+ return false;
+ if (MBB->empty())
+ return true;
+ return !(MBB->back().isReturn() || MBB->back().isIndirectBranch());
+}
+
/// ProfitableToMerge - Check if two machine basic blocks have a common tail
/// and decide if it would be profitable to merge those tails. Return the
/// length of the common tail and iterators to the first common instruction
@@ -570,6 +582,15 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
return true;
}
+ // If these are identical non-return blocks with no successors, merge them.
+ // Such blocks are typically cold calls to noreturn functions like abort, and
+ // are unlikely to become a fallthrough target after machine block placement.
+ // Tail merging these blocks is unlikely to create additional unconditional
+ // branches, and will reduce the size of this cold code.
+ if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+ blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
+ return true;
+
// If one of the blocks can be completely merged and happens to be in
// a position where the other could fall through into it, merge any number
// of instructions, because it can be done without a branch.
@@ -579,6 +600,22 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
return true;
+ // If both blocks are identical and end in a branch, merge them unless they
+ // both have a fallthrough predecessor and successor.
+ // We can only do this after block placement because it depends on whether
+ // there are fallthroughs, and we don't know until after layout.
+ if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+ auto BothFallThrough = [](MachineBasicBlock *MBB) {
+ if (MBB->succ_size() != 0 && !MBB->canFallThrough())
+ return false;
+ MachineFunction::iterator I(MBB);
+ MachineFunction *MF = MBB->getParent();
+ return (MBB != &*MF->begin()) && std::prev(I)->canFallThrough();
+ };
+ if (!BothFallThrough(MBB1) || !BothFallThrough(MBB2))
+ return true;
+ }
+
// If both blocks have an unconditional branch temporarily stripped out,
// count that as an additional common instruction for the following
// heuristics. This heuristic is only accurate for single-succ blocks, so to
@@ -604,16 +641,6 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
(I1 == MBB1->begin() || I2 == MBB2->begin());
}
-/// ComputeSameTails - Look through all the blocks in MergePotentials that have
-/// hash CurHash (guaranteed to match the last element). Build the vector
-/// SameTails of all those that have the (same) largest number of instructions
-/// in common of any pair of these blocks. SameTails entries contain an
-/// iterator into MergePotentials (from which the MachineBasicBlock can be
-/// found) and a MachineBasicBlock::iterator into that MBB indicating the
-/// instruction where the matching code sequence begins.
-/// Order of elements in SameTails is the reverse of the order in which
-/// those blocks appear in MergePotentials (where they are not necessarily
-/// consecutive).
unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
unsigned MinCommonTailLength,
MachineBasicBlock *SuccBB,
@@ -650,8 +677,6 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
return maxCommonTailLength;
}
-/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
-/// MergePotentials, restoring branches at ends of blocks as appropriate.
void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB) {
@@ -671,8 +696,6 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
MergePotentials.erase(CurMPIter, MergePotentials.end());
}
-/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
-/// only of the common tail. Create a block that does by splitting one.
bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
MachineBasicBlock *SuccBB,
unsigned maxCommonTailLength,
@@ -723,6 +746,43 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
return true;
}
+void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) {
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
+ for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
+ if (i != commonTailIndex)
+ NextCommonInsts[i] = SameTails[i].getTailStartPos();
+ else {
+ assert(SameTails[i].getTailStartPos() == MBB->begin() &&
+ "MBB is not a common tail only block");
+ }
+ }
+
+ for (auto &MI : *MBB) {
+ if (MI.isDebugValue())
+ continue;
+ DebugLoc DL = MI.getDebugLoc();
+ for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
+ if (i == commonTailIndex)
+ continue;
+
+ auto &Pos = NextCommonInsts[i];
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ while (Pos->isDebugValue()) {
+ ++Pos;
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ }
+ assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
+ DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
+ NextCommonInsts[i] = ++Pos;
+ }
+ MI.setDebugLoc(DL);
+ }
+}
+
static void
mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
MachineBasicBlock &MBBCommon) {
@@ -875,10 +935,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Recompute common tail MBB's edge weights and block frequency.
setCommonTailEdgeWeights(*MBB);
- // Remove the original debug location from the common tail.
- for (auto &MI : *MBB)
- if (!MI.isDebugValue())
- MI.setDebugLoc(DebugLoc());
+ // Merge debug locations across identical instructions for common tail.
+ MergeCommonTailDebugLocs(commonTailIndex);
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
@@ -1043,7 +1101,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc dl = PBB->findBranchDebugLoc();
TII->removeBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
@@ -1193,8 +1251,6 @@ static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
return DebugLoc();
}
-/// OptimizeBlock - Analyze and optimize control flow related to the specified
-/// block. This is never called on the entry block.
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
MachineFunction &MF = *MBB->getParent();
@@ -1386,6 +1442,42 @@ ReoptimizeBlock:
}
}
+ if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
+ MF.getFunction()->optForSize()) {
+ // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
+ // direction, thereby defeating careful block placement and regressing
+ // performance. Therefore, only consider this for optsize functions.
+ MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
+ if (TII->isUnconditionalTailCall(TailCall)) {
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ bool PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+ if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) {
+ // The predecessor has a conditional branch to this block which consists
+ // of only a tail call. Try to fold the tail call into the conditional
+ // branch.
+ if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+ // TODO: It would be nice if analyzeBranch() could provide a pointer
+ // to the branch insturction so replaceBranchWithTailCall() doesn't
+ // have to search for it.
+ TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+ ++NumTailCalls;
+ Pred->removeSuccessor(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ // If the predecessor is falling through to this block, we could reverse
+ // the branch condition and fold the tail call into that. However, after
+ // that we might have to re-arrange the CFG to fall through to the other
+ // block and there is a high risk of regressing code size rather than
+ // improving it.
+ }
+ }
+
// Analyze the branch in the current block.
MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
SmallVector<MachineOperand, 4> CurCond;
@@ -1599,8 +1691,6 @@ ReoptimizeBlock:
// Hoist Common Code
//===----------------------------------------------------------------------===//
-/// HoistCommonCode - Hoist common instruction sequences at the start of basic
-/// blocks to their common predecessor.
bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
bool MadeChange = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
@@ -1734,9 +1824,6 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
return PI;
}
-/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
-/// sequence at the start of the function, move the instructions before MBB
-/// terminator if it's legal.
bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index fc48e484292d6..4852721eea102 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -37,6 +37,9 @@ namespace llvm {
// flag. Ignored for optsize.
unsigned MinCommonTailLength = 0);
+ /// Perhaps branch folding, tail merging and other CFG optimizations on the
+ /// given function. Block placement changes the layout and may create new
+ /// tail merging opportunities.
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
MachineLoopInfo *mli = nullptr,
@@ -122,6 +125,8 @@ namespace llvm {
const MachineBasicBlock *MBB) const;
raw_ostream &printBlockFreq(raw_ostream &OS,
const BlockFrequency Freq) const;
+ void view(const Twine &Name, bool isSimple = true);
+ uint64_t getEntryFreq() const;
private:
const MachineBlockFrequencyInfo &MBFI;
@@ -137,26 +142,64 @@ namespace llvm {
MachineBasicBlock* PredBB,
unsigned MinCommonTailLength);
void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
+
+ /// Delete the instruction OldInst and everything after it, replacing it
+ /// with an unconditional branch to NewDest.
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
+
+ /// Given a machine basic block and an iterator into it, split the MBB so
+ /// that the part before the iterator falls into the part starting at the
+ /// iterator. This returns the new MBB.
MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1,
const BasicBlock *BB);
+
+ /// Look through all the blocks in MergePotentials that have hash CurHash
+ /// (guaranteed to match the last element). Build the vector SameTails of
+ /// all those that have the (same) largest number of instructions in common
+ /// of any pair of these blocks. SameTails entries contain an iterator into
+ /// MergePotentials (from which the MachineBasicBlock can be found) and a
+ /// MachineBasicBlock::iterator into that MBB indicating the instruction
+ /// where the matching code sequence begins. Order of elements in SameTails
+ /// is the reverse of the order in which those blocks appear in
+ /// MergePotentials (where they are not necessarily consecutive).
unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB);
+
+ /// Remove all blocks with hash CurHash from MergePotentials, restoring
+ /// branches at ends of blocks as appropriate.
void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
+
+ /// None of the blocks to be tail-merged consist only of the common tail.
+ /// Create a block that does by splitting one.
bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
MachineBasicBlock *SuccBB,
unsigned maxCommonTailLength,
unsigned &commonTailIndex);
+ /// Create merged DebugLocs of identical instructions across SameTails and
+ /// assign it to the instruction in common tail.
+ void MergeCommonTailDebugLocs(unsigned commonTailIndex);
+
bool OptimizeBranches(MachineFunction &MF);
+
+ /// Analyze and optimize control flow related to the specified block. This
+ /// is never called on the entry block.
bool OptimizeBlock(MachineBasicBlock *MBB);
+
+ /// Remove the specified dead machine basic block from the function,
+ /// updating the CFG.
void RemoveDeadBlock(MachineBasicBlock *MBB);
+ /// Hoist common instruction sequences at the start of basic blocks to their
+ /// common predecessor.
bool HoistCommonCode(MachineFunction &MF);
+
+ /// If the successors of MBB has common instruction sequence at the start of
+ /// the function, move the instructions before MBB terminator if it's legal.
bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
};
}
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index 8b27570a17f47..7af1369416615 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -126,14 +126,16 @@ void BranchRelaxation::verify() {
#endif
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
-void BranchRelaxation::dumpBBs() {
+LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
for (auto &MBB : *MF) {
const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
<< format("size=%#x\n", BBI.Size);
}
}
+#endif
/// scanFunction - Do the initial scan of the function, building up
/// information about each block.
diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp
index ff7c99de0420e..e4eab8c513d99 100644
--- a/lib/CodeGen/BuiltinGCs.cpp
+++ b/lib/CodeGen/BuiltinGCs.cpp
@@ -1,4 +1,4 @@
-//===-- BuiltinGCs.cpp - Boilerplate for our built in GC types --*- C++ -*-===//
+//===- BuiltinGCs.cpp - Boilerplate for our built in GC types -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,6 +14,8 @@
#include "llvm/CodeGen/GCs.h"
#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
@@ -77,6 +79,7 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
+
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
const PointerType *PT = cast<PointerType>(Ty);
@@ -110,6 +113,7 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
+
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
const PointerType *PT = cast<PointerType>(Ty);
@@ -117,7 +121,8 @@ public:
return (1 == PT->getAddressSpace());
}
};
-}
+
+} // end anonymous namespace
// Register all the above so that they can be found at runtime. Note that
// these static initializers are important since the registration list is
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 398ea88363b64..0912d9f68aff2 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMCodeGen
Analysis.cpp
AtomicExpandPass.cpp
BasicTargetTransformInfo.cpp
+ BranchCoalescing.cpp
BranchFolding.cpp
BranchRelaxation.cpp
BuiltinGCs.cpp
@@ -23,6 +24,7 @@ add_llvm_library(LLVMCodeGen
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
FaultMaps.cpp
+ FEntryInserter.cpp
FuncletLayout.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
@@ -36,6 +38,7 @@ add_llvm_library(LLVMCodeGen
InterleavedAccessPass.cpp
IntrinsicLowering.cpp
LatencyPriorityQueue.cpp
+ LazyMachineBlockFrequencyInfo.cpp
LexicalScopes.cpp
LiveDebugValues.cpp
LiveDebugVariables.cpp
@@ -46,6 +49,7 @@ add_llvm_library(LLVMCodeGen
LiveRangeCalc.cpp
LiveRangeEdit.cpp
LiveRegMatrix.cpp
+ LiveRegUnits.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
LLVMTargetMachine.cpp
@@ -70,6 +74,8 @@ add_llvm_library(LLVMCodeGen
MachineLoopInfo.cpp
MachineModuleInfo.cpp
MachineModuleInfoImpls.cpp
+ MachineOptimizationRemarkEmitter.cpp
+ MachineOutliner.cpp
MachinePassRegistry.cpp
MachinePipeliner.cpp
MachinePostDominators.cpp
@@ -147,7 +153,7 @@ add_llvm_library(LLVMCodeGen
${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen
${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen/PBQP
- LINK_LIBS ${PTHREAD_LIB}
+ LINK_LIBS ${LLVM_PTHREAD_LIB}
DEPENDS
intrinsics_gen
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 2e33f14c7ee3e..7cad4d0311694 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -30,8 +30,7 @@ using namespace llvm;
CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
: CallingConv(CC), IsVarArg(isVarArg), MF(mf),
- TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C),
- CallOrPrologue(Unknown) {
+ TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) {
// No stack is used.
StackOffset = 0;
MaxStackArgAlign = 1;
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 4cf9b138f10d3..3fc12ccc3b60c 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
+ initializeBranchCoalescingPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCodeGenPreparePass(Registry);
@@ -31,12 +32,15 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyIfConverterPass(Registry);
initializeExpandISelPseudosPass(Registry);
initializeExpandPostRAPass(Registry);
+ initializeFEntryInserterPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
initializeIfConverterPass(Registry);
+ initializeImplicitNullChecksPass(Registry);
initializeInterleavedAccessPass(Registry);
+ initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
initializeLiveStacksPass(Registry);
@@ -47,7 +51,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
initializeMachineCSEPass(Registry);
- initializeImplicitNullChecksPass(Registry);
initializeMachineCombinerPass(Registry);
initializeMachineCopyPropagationPass(Registry);
initializeMachineDominatorTreePass(Registry);
@@ -55,16 +58,18 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoPass(Registry);
+ initializeMachineOptimizationRemarkEmitterPassPass(Registry);
+ initializeMachineOutlinerPass(Registry);
initializeMachinePipelinerPass(Registry);
initializeMachinePostDominatorTreePass(Registry);
+ initializeMachineRegionInfoPassPass(Registry);
initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
initializeMachineVerifierPassPass(Registry);
- initializeXRayInstrumentationPass(Registry);
- initializePatchableFunctionPass(Registry);
initializeOptimizePHIsPass(Registry);
initializePEIPass(Registry);
initializePHIEliminationPass(Registry);
+ initializePatchableFunctionPass(Registry);
initializePeepholeOptimizerPass(Registry);
initializePostMachineSchedulerPass(Registry);
initializePostRAHazardRecognizerPass(Registry);
@@ -74,12 +79,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRAGreedyPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
+ initializeSafeStackPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackColoringPass(Registry);
initializeStackMapLivenessPass(Registry);
- initializeLiveDebugValuesPass(Registry);
- initializeSafeStackPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeTailDuplicatePassPass(Registry);
@@ -91,6 +95,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeVirtRegMapPass(Registry);
initializeVirtRegRewriterPass(Registry);
initializeWinEHPreparePass(Registry);
+ initializeXRayInstrumentationPass(Registry);
}
void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 934b470f13b5f..2bdd189557b40 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -15,10 +15,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -53,8 +55,10 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -77,7 +81,6 @@ STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
-STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
static cl::opt<bool> DisableBranchOpts(
@@ -93,7 +96,7 @@ static cl::opt<bool> DisableSelectToBranch(
cl::desc("Disable select to branch conversion."));
static cl::opt<bool> AddrSinkUsingGEPs(
- "addr-sink-using-gep", cl::Hidden, cl::init(false),
+ "addr-sink-using-gep", cl::Hidden, cl::init(true),
cl::desc("Address sinking in CGP using GEPs."));
static cl::opt<bool> EnableAndCmpSinking(
@@ -135,15 +138,24 @@ static cl::opt<bool> ForceSplitStore(
"force-split-store", cl::Hidden, cl::init(false),
cl::desc("Force store splitting no matter what the target query says."));
+static cl::opt<bool>
+EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
+ cl::desc("Enable merging of redundant sexts when one is dominating"
+ " the other."), cl::init(true));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+typedef SmallVector<Instruction *, 16> SExts;
+typedef DenseMap<Value *, SExts> ValueToSExts;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
const TargetMachine *TM;
+ const TargetSubtargetInfo *SubtargetInfo;
const TargetLowering *TLI;
+ const TargetRegisterInfo *TRI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
@@ -165,6 +177,15 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
+ /// Keep track of instructions removed during promotion.
+ SetOfInstrs RemovedInsts;
+
+ /// Keep track of sext chains based on their initial value.
+ DenseMap<Value *, Instruction *> SeenChainsForSExt;
+
+ /// Keep track of SExt promoted.
+ ValueToSExts ValToSExtendedUses;
+
/// True if CFG is modified in any way.
bool ModifiedDT;
@@ -206,7 +227,7 @@ class TypePromotionTransaction;
Type *AccessTy, unsigned AS);
bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
- bool moveExtToFormExtLoad(Instruction *&I);
+ bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *I);
bool optimizeSelectInst(SelectInst *SI);
@@ -215,13 +236,21 @@ class TypePromotionTransaction;
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB);
bool placeDbgValues(Function &F);
- bool sinkAndCmp(Function &F);
- bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
- Instruction *&Inst,
- const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInstCost);
+ bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
+ LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
+ bool tryToPromoteExts(TypePromotionTransaction &TPT,
+ const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost = 0);
+ bool mergeSExts(Function &F);
+ bool performAddressTypePromotion(
+ Instruction *&Inst,
+ bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
+ bool splitIndirectCriticalEdges(Function &F);
};
}
@@ -250,8 +279,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BPI.reset();
ModifiedDT = false;
- if (TM)
- TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ if (TM) {
+ SubtargetInfo = TM->getSubtargetImpl(F);
+ TLI = SubtargetInfo->getTargetLowering();
+ TRI = SubtargetInfo->getRegisterInfo();
+ }
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
@@ -260,9 +292,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (ProfileGuidedSectionPrefix) {
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- if (PSI->isFunctionEntryHot(&F))
+ if (PSI->isFunctionHotInCallGraph(&F))
F.setSectionPrefix(".hot");
- else if (PSI->isFunctionEntryCold(&F))
+ else if (PSI->isFunctionColdInCallGraph(&F))
F.setSectionPrefix(".cold");
}
@@ -290,18 +322,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// find a node corresponding to the value.
EverMadeChange |= placeDbgValues(F);
- // If there is a mask, compare against zero, and branch that can be combined
- // into a single target instruction, push the mask and compare into branch
- // users. Do this before OptimizeBlock -> OptimizeInst ->
- // OptimizeCmpExpression, which perturbs the pattern being searched for.
- if (!DisableBranchOpts) {
- EverMadeChange |= sinkAndCmp(F);
+ if (!DisableBranchOpts)
EverMadeChange |= splitBranchCondition(F);
- }
+
+ // Split some critical edges where one of the sources is an indirect branch,
+ // to help generate sane code for PHIs involving such edges.
+ EverMadeChange |= splitIndirectCriticalEdges(F);
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
+ SeenChainsForSExt.clear();
+ ValToSExtendedUses.clear();
+ RemovedInsts.clear();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -311,6 +344,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (ModifiedDTOnIteration)
break;
}
+ if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
+ MadeChange |= mergeSExts(F);
+
+ // Really free removed instructions during promotion.
+ for (Instruction *I : RemovedInsts)
+ delete I;
+
EverMadeChange |= MadeChange;
}
@@ -432,6 +472,154 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
return DestBB;
}
+// Return the unique indirectbr predecessor of a block. This may return null
+// even if such a predecessor exists, if it's not useful for splitting.
+// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
+// predecessors of BB.
+static BasicBlock *
+findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
+ // If the block doesn't have any PHIs, we don't care about it, since there's
+ // no point in splitting it.
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (!PN)
+ return nullptr;
+
+ // Verify we have exactly one IBR predecessor.
+ // Conservatively bail out if one of the other predecessors is not a "regular"
+ // terminator (that is, not a switch or a br).
+ BasicBlock *IBB = nullptr;
+ for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
+ BasicBlock *PredBB = PN->getIncomingBlock(Pred);
+ TerminatorInst *PredTerm = PredBB->getTerminator();
+ switch (PredTerm->getOpcode()) {
+ case Instruction::IndirectBr:
+ if (IBB)
+ return nullptr;
+ IBB = PredBB;
+ break;
+ case Instruction::Br:
+ case Instruction::Switch:
+ OtherPreds.push_back(PredBB);
+ continue;
+ default:
+ return nullptr;
+ }
+ }
+
+ return IBB;
+}
+
+// Split critical edges where the source of the edge is an indirectbr
+// instruction. This isn't always possible, but we can handle some easy cases.
+// This is useful because MI is unable to split such critical edges,
+// which means it will not be able to sink instructions along those edges.
+// This is especially painful for indirect branches with many successors, where
+// we end up having to prepare all outgoing values in the origin block.
+//
+// Our normal algorithm for splitting critical edges requires us to update
+// the outgoing edges of the edge origin block, but for an indirectbr this
+// is hard, since it would require finding and updating the block addresses
+// the indirect branch uses. But if a block only has a single indirectbr
+// predecessor, with the others being regular branches, we can do it in a
+// different way.
+// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.
+// We can split D into D0 and D1, where D0 contains only the PHIs from D,
+// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
+// create the following structure:
+// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
+bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
+ // Check whether the function has any indirectbrs, and collect which blocks
+ // they may jump to. Since most functions don't have indirect branches,
+ // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
+ SmallSetVector<BasicBlock *, 16> Targets;
+ for (auto &BB : F) {
+ auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
+ if (!IBI)
+ continue;
+
+ for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
+ Targets.insert(IBI->getSuccessor(Succ));
+ }
+
+ if (Targets.empty())
+ return false;
+
+ bool Changed = false;
+ for (BasicBlock *Target : Targets) {
+ SmallVector<BasicBlock *, 16> OtherPreds;
+ BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
+ // If we did not found an indirectbr, or the indirectbr is the only
+ // incoming edge, this isn't the kind of edge we're looking for.
+ if (!IBRPred || OtherPreds.empty())
+ continue;
+
+ // Don't even think about ehpads/landingpads.
+ Instruction *FirstNonPHI = Target->getFirstNonPHI();
+ if (FirstNonPHI->isEHPad() || Target->isLandingPad())
+ continue;
+
+ BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
+ // It's possible Target was its own successor through an indirectbr.
+ // In this case, the indirectbr now comes from BodyBlock.
+ if (IBRPred == Target)
+ IBRPred = BodyBlock;
+
+ // At this point Target only has PHIs, and BodyBlock has the rest of the
+ // block's body. Create a copy of Target that will be used by the "direct"
+ // preds.
+ ValueToValueMapTy VMap;
+ BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
+
+ for (BasicBlock *Pred : OtherPreds)
+ Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+
+ // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
+ // they are clones, so the number of PHIs are the same.
+ // (a) Remove the edge coming from IBRPred from the "Direct" PHI
+ // (b) Leave that as the only edge in the "Indirect" PHI.
+ // (c) Merge the two in the body block.
+ BasicBlock::iterator Indirect = Target->begin(),
+ End = Target->getFirstNonPHI()->getIterator();
+ BasicBlock::iterator Direct = DirectSucc->begin();
+ BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
+
+ assert(&*End == Target->getTerminator() &&
+ "Block was expected to only contain PHIs");
+
+ while (Indirect != End) {
+ PHINode *DirPHI = cast<PHINode>(Direct);
+ PHINode *IndPHI = cast<PHINode>(Indirect);
+
+ // Now, clean up - the direct block shouldn't get the indirect value,
+ // and vice versa.
+ DirPHI->removeIncomingValue(IBRPred);
+ Direct++;
+
+ // Advance the pointer here, to avoid invalidation issues when the old
+ // PHI is erased.
+ Indirect++;
+
+ PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
+ NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
+ IBRPred);
+
+ // Create a PHI in the body block, to merge the direct and indirect
+ // predecessors.
+ PHINode *MergePHI =
+ PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
+ MergePHI->addIncoming(NewIndPHI, Target);
+ MergePHI->addIncoming(DirPHI, DirectSucc);
+
+ IndPHI->replaceAllUsesWith(MergePHI);
+ IndPHI->eraseFromParent();
+ }
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
/// edges in ways that are non-optimal for isel. Start by eliminating these
@@ -1090,6 +1278,83 @@ static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
return false;
}
+/// Duplicate and sink the given 'and' instruction into user blocks where it is
+/// used in a compare to allow isel to generate better code for targets where
+/// this operation can be combined.
+///
+/// Return true if any changes are made.
+static bool sinkAndCmp0Expression(Instruction *AndI,
+ const TargetLowering &TLI,
+ SetOfInstrs &InsertedInsts) {
+ // Double-check that we're not trying to optimize an instruction that was
+ // already optimized by some other part of this pass.
+ assert(!InsertedInsts.count(AndI) &&
+ "Attempting to optimize already optimized and instruction");
+ (void) InsertedInsts;
+
+ // Nothing to do for single use in same basic block.
+ if (AndI->hasOneUse() &&
+ AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
+ return false;
+
+ // Try to avoid cases where sinking/duplicating is likely to increase register
+ // pressure.
+ if (!isa<ConstantInt>(AndI->getOperand(0)) &&
+ !isa<ConstantInt>(AndI->getOperand(1)) &&
+ AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
+ return false;
+
+ for (auto *U : AndI->users()) {
+ Instruction *User = cast<Instruction>(U);
+
+ // Only sink for and mask feeding icmp with 0.
+ if (!isa<ICmpInst>(User))
+ return false;
+
+ auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
+ if (!CmpC || !CmpC->isZero())
+ return false;
+ }
+
+ if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
+ return false;
+
+ DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
+ DEBUG(AndI->getParent()->dump());
+
+ // Push the 'and' into the same block as the icmp 0. There should only be
+ // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
+ // others, so we don't need to keep track of which BBs we insert into.
+ for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
+
+ // Keep the 'and' in the same place if the use is already in the same block.
+ Instruction *InsertPt =
+ User->getParent() == AndI->getParent() ? AndI : User;
+ Instruction *InsertedAnd =
+ BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
+ AndI->getOperand(1), "", InsertPt);
+ // Propagate the debug info.
+ InsertedAnd->setDebugLoc(AndI->getDebugLoc());
+
+ // Replace a use of the 'and' with a use of the new 'and'.
+ TheUse = InsertedAnd;
+ ++NumAndUses;
+ DEBUG(User->getParent()->dump());
+ }
+
+ // We removed all uses, nuke the and.
+ AndI->eraseFromParent();
+ return true;
+}
+
/// Check if the candidates could be combined with a shift instruction, which
/// includes:
/// 1. Truncate instruction
@@ -2028,16 +2293,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
if (TLI) {
- // Unknown address space.
- // TODO: Target hook to pick which address space the intrinsic cares
- // about?
- unsigned AddrSpace = ~0u;
SmallVector<Value*, 2> PtrOps;
Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
- while (!PtrOps.empty())
- if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
+ if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty()) {
+ Value *PtrVal = PtrOps.pop_back_val();
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
return true;
+ }
}
}
@@ -2168,11 +2432,11 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
- AttributeSet CalleeAttrs = CS.getAttributes();
- if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias))
+ AttributeList CalleeAttrs = CS.getAttributes();
+ if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias))
continue;
// Make sure the call instruction is followed by an unconditional branch to
@@ -2561,25 +2825,30 @@ class TypePromotionTransaction {
OperandsHider Hider;
/// Keep track of the uses replaced, if any.
UsesReplacer *Replacer;
+ /// Keep track of instructions removed.
+ SetOfInstrs &RemovedInsts;
public:
/// \brief Remove all reference of \p Inst and optinally replace all its
/// uses with New.
+ /// \p RemovedInsts Keep track of the instructions removed by this Action.
/// \pre If !Inst->use_empty(), then New != nullptr
- InstructionRemover(Instruction *Inst, Value *New = nullptr)
+ InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
+ Value *New = nullptr)
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
- Replacer(nullptr) {
+ Replacer(nullptr), RemovedInsts(RemovedInsts) {
if (New)
Replacer = new UsesReplacer(Inst, New);
DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+ RemovedInsts.insert(Inst);
+ /// The instructions removed here will be freed after completing
+ /// optimizeBlock() for all blocks as we need to keep track of the
+ /// removed instructions during promotion.
Inst->removeFromParent();
}
~InstructionRemover() override { delete Replacer; }
- /// \brief Really remove the instruction.
- void commit() override { delete Inst; }
-
/// \brief Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
@@ -2588,6 +2857,7 @@ class TypePromotionTransaction {
if (Replacer)
Replacer->undo();
Hider.undo();
+ RemovedInsts.erase(Inst);
}
};
@@ -2596,6 +2866,10 @@ public:
/// The restoration point is a pointer to an action instead of an iterator
/// because the iterator may be invalidated but not the pointer.
typedef const TypePromotionAction *ConstRestorationPt;
+
+ TypePromotionTransaction(SetOfInstrs &RemovedInsts)
+ : RemovedInsts(RemovedInsts) {}
+
/// Advocate every changes made in that transaction.
void commit();
/// Undo all the changes made after the given point.
@@ -2627,6 +2901,7 @@ private:
/// The ordered list of actions made so far.
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
+ SetOfInstrs &RemovedInsts;
};
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
@@ -2638,7 +2913,8 @@ void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
- make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
+ make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
+ RemovedInsts, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
@@ -2705,8 +2981,8 @@ void TypePromotionTransaction::rollback(
/// This encapsulates the logic for matching the target-legal addressing modes.
class AddressingModeMatcher {
SmallVectorImpl<Instruction*> &AddrModeInsts;
- const TargetMachine &TM;
const TargetLowering &TLI;
+ const TargetRegisterInfo &TRI;
const DataLayout &DL;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
@@ -2731,14 +3007,14 @@ class AddressingModeMatcher {
bool IgnoreProfitability;
AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
- const TargetMachine &TM, Type *AT, unsigned AS,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI,
+ Type *AT, unsigned AS,
Instruction *MI, ExtAddrMode &AM,
const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT)
- : AddrModeInsts(AMI), TM(TM),
- TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
- ->getTargetLowering()),
+ : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
PromotedInsts(PromotedInsts), TPT(TPT) {
@@ -2756,13 +3032,15 @@ public:
static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
Instruction *MemoryInst,
SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetMachine &TM,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI,
const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
+ bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI,
+ AccessTy, AS,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -3583,18 +3861,18 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
/// Check to see if all uses of OpVal by the specified inline asm call are due
/// to memory operands. If so, return true, otherwise return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
- const TargetMachine &TM) {
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI) {
const Function *F = CI->getParent()->getParent();
- const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
+ TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
ImmutableCallSite(CI));
+
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue());
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
// operand, we can't fold it!
@@ -3613,7 +3891,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
static bool FindAllMemoryUses(
Instruction *I,
SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
- SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts,
+ const TargetLowering &TLI, const TargetRegisterInfo &TRI) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -3635,11 +3914,28 @@ static bool FindAllMemoryUses(
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
unsigned opNo = U.getOperandNo();
- if (opNo == 0) return true; // Storing addr, not into addr.
+ if (opNo != StoreInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
MemoryUses.push_back(std::make_pair(SI, opNo));
continue;
}
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
+ unsigned opNo = U.getOperandNo();
+ if (opNo != AtomicRMWInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(RMW, opNo));
+ continue;
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
+ unsigned opNo = U.getOperandNo();
+ if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(CmpX, opNo));
+ continue;
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
// If this is a cold call, we can sink the addressing calculation into
// the cold path. See optimizeCallInst
@@ -3650,12 +3946,12 @@ static bool FindAllMemoryUses(
if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
- if (!IsOperandAMemoryOperand(CI, IA, I, TM))
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
return true;
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI))
return true;
}
@@ -3743,7 +4039,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -3775,7 +4071,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode Result;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI,
+ AddressAccessTy, AS,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
@@ -3844,7 +4141,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool IsNumUsesConsensusValid = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
- TypePromotionTransaction TPT;
+ TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
while (!worklist.empty()) {
@@ -3869,7 +4166,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// addressing instructions might have.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
+ V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TLI, *TRI,
InsertedInsts, PromotedInsts, TPT);
// This check is broken into two cases with very similar code to avoid using
@@ -3935,11 +4232,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
} else if (AddrSinkUsingGEPs ||
(!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
- TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
- ->useAA())) {
+ SubtargetInfo->useAA())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -4042,7 +4338,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// We need to add this separately from the scale above to help with
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
- ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
@@ -4053,12 +4349,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = ResultPtr;
} else {
if (ResultPtr->getType() != I8PtrTy)
- ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
}
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -4185,14 +4481,14 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
return MadeChange;
}
-/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
+/// \brief Check if all the uses of \p Val are equivalent (or free) zero or
/// sign extensions.
-static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
- assert(!Inst->use_empty() && "Input must have at least one use");
- const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
+static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
+ assert(!Val->use_empty() && "Input must have at least one use");
+ const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
bool IsSExt = isa<SExtInst>(FirstUser);
Type *ExtTy = FirstUser->getType();
- for (const User *U : Inst->users()) {
+ for (const User *U : Val->users()) {
const Instruction *UI = cast<Instruction>(U);
if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
return false;
@@ -4202,11 +4498,11 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
continue;
// If IsSExt is true, we are in this situation:
- // a = Inst
+ // a = Val
// b = sext ty1 a to ty2
// c = sext ty1 a to ty3
// Assuming ty2 is shorter than ty3, this could be turned into:
- // a = Inst
+ // a = Val
// b = sext ty1 a to ty2
// c = sext ty2 b to ty3
// However, the last sext is not free.
@@ -4233,51 +4529,44 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
return true;
}
-/// \brief Try to form ExtLd by promoting \p Exts until they reach a
-/// load instruction.
-/// If an ext(load) can be formed, it is returned via \p LI for the load
-/// and \p Inst for the extension.
-/// Otherwise LI == nullptr and Inst == nullptr.
-/// When some promotion happened, \p TPT contains the proper state to
-/// revert them.
+/// \brief Try to speculatively promote extensions in \p Exts and continue
+/// promoting through newly promoted operands recursively as far as doing so is
+/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
+/// When some promotion happened, \p TPT contains the proper state to revert
+/// them.
///
-/// \return true when promoting was necessary to expose the ext(load)
-/// opportunity, false otherwise.
-///
-/// Example:
-/// \code
-/// %ld = load i32* %addr
-/// %add = add nuw i32 %ld, 4
-/// %zext = zext i32 %add to i64
-/// \endcode
-/// =>
-/// \code
-/// %ld = load i32* %addr
-/// %zext = zext i32 %ld to i64
-/// %add = add nuw i64 %zext, 4
-/// \encode
-/// Thanks to the promotion, we can match zext(load i32*) to i64.
-bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
- LoadInst *&LI, Instruction *&Inst,
- const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInstsCost = 0) {
- // Iterate over all the extensions to see if one form an ext(load).
+/// \return true if some promotion happened, false otherwise.
+bool CodeGenPrepare::tryToPromoteExts(
+ TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost) {
+ bool Promoted = false;
+
+ // Iterate over all the extensions to try to promote them.
for (auto I : Exts) {
- // Check if we directly have ext(load).
- if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
- Inst = I;
- // No promotion happened here.
- return false;
+ // Early check if we directly have ext(load).
+ if (isa<LoadInst>(I->getOperand(0))) {
+ ProfitablyMovedExts.push_back(I);
+ continue;
}
- // Check whether or not we want to do any promotion.
+
+ // Check whether or not we want to do any promotion. The reason we have
+ // this check inside the for loop is to catch the case where an extension
+ // is directly fed by a load because in such case the extension can be moved
+ // up without any promotion on its operands.
if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
- continue;
+ return false;
+
// Get the action to perform the promotion.
- TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
- I, InsertedInsts, *TLI, PromotedInsts);
+ TypePromotionHelper::Action TPH =
+ TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
// Check if we can promote.
- if (!TPH)
+ if (!TPH) {
+ // Save the current extension as we cannot move up through its operand.
+ ProfitablyMovedExts.push_back(I);
continue;
+ }
+
// Save the current state.
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
@@ -4297,110 +4586,293 @@ bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
// one extension but leave one. However, we optimistically keep going,
// because the new extension may be removed too.
long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
- TotalCreatedInstsCost -= ExtCost;
+ // FIXME: It would be possible to propagate a negative value instead of
+ // conservatively ceiling it to 0.
+ TotalCreatedInstsCost =
+ std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
if (!StressExtLdPromotion &&
(TotalCreatedInstsCost > 1 ||
!isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
- // The promotion is not profitable, rollback to the previous state.
+ // This promotion is not profitable, rollback to the previous state, and
+ // save the current extension in ProfitablyMovedExts as the latest
+ // speculative promotion turned out to be unprofitable.
TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
+ continue;
+ }
+ // Continue promoting NewExts as far as doing so is profitable.
+ SmallVector<Instruction *, 2> NewlyMovedExts;
+ (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
+ bool NewPromoted = false;
+ for (auto ExtInst : NewlyMovedExts) {
+ Instruction *MovedExt = cast<Instruction>(ExtInst);
+ Value *ExtOperand = MovedExt->getOperand(0);
+ // If we have reached to a load, we need this extra profitability check
+ // as it could potentially be merged into an ext(load).
+ if (isa<LoadInst>(ExtOperand) &&
+ !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
+ (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
+ continue;
+
+ ProfitablyMovedExts.push_back(MovedExt);
+ NewPromoted = true;
+ }
+
+ // If none of speculative promotions for NewExts is profitable, rollback
+ // and save the current extension (I) as the last profitable extension.
+ if (!NewPromoted) {
+ TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
continue;
}
// The promotion is profitable.
- // Check if it exposes an ext(load).
- (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
- if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
- // If we have created a new extension, i.e., now we have two
- // extensions. We must make sure one of them is merged with
- // the load, otherwise we may degrade the code quality.
- (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
- // Promotion happened.
- return true;
- // If this does not help to expose an ext(load) then, rollback.
- TPT.rollback(LastKnownGood);
+ Promoted = true;
}
- // None of the extension can form an ext(load).
- LI = nullptr;
- Inst = nullptr;
- return false;
+ return Promoted;
}
-/// Move a zext or sext fed by a load into the same basic block as the load,
-/// unless conditions are unfavorable. This allows SelectionDAG to fold the
-/// extend into the load.
-/// \p I[in/out] the extension may be modified during the process if some
-/// promotions apply.
-///
-bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
- // ExtLoad formation infrastructure requires TLI to be effective.
- if (!TLI)
- return false;
+/// Merging redundant sexts when one is dominating the other.
+bool CodeGenPrepare::mergeSExts(Function &F) {
+ DominatorTree DT(F);
+ bool Changed = false;
+ for (auto &Entry : ValToSExtendedUses) {
+ SExts &Insts = Entry.second;
+ SExts CurPts;
+ for (Instruction *Inst : Insts) {
+ if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
+ Inst->getOperand(0) != Entry.first)
+ continue;
+ bool inserted = false;
+ for (auto &Pt : CurPts) {
+ if (DT.dominates(Inst, Pt)) {
+ Pt->replaceAllUsesWith(Inst);
+ RemovedInsts.insert(Pt);
+ Pt->removeFromParent();
+ Pt = Inst;
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!DT.dominates(Pt, Inst))
+ // Give up if we need to merge in a common dominator as the
+ // expermients show it is not profitable.
+ continue;
+ Inst->replaceAllUsesWith(Pt);
+ RemovedInsts.insert(Inst);
+ Inst->removeFromParent();
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!inserted)
+ CurPts.push_back(Inst);
+ }
+ }
+ return Changed;
+}
- // Try to promote a chain of computation if it allows to form
- // an extended load.
- TypePromotionTransaction TPT;
- TypePromotionTransaction::ConstRestorationPt LastKnownGood =
- TPT.getRestorationPoint();
- SmallVector<Instruction *, 1> Exts;
- Exts.push_back(I);
- // Look for a load being extended.
- LoadInst *LI = nullptr;
- Instruction *OldExt = I;
- bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
- if (!LI || !I) {
- assert(!HasPromoted && !LI && "If we did not match any load instruction "
- "the code must remain the same");
- I = OldExt;
- return false;
+/// Return true, if an ext(load) can be formed from an extension in
+/// \p MovedExts.
+bool CodeGenPrepare::canFormExtLd(
+ const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
+ Instruction *&Inst, bool HasPromoted) {
+ for (auto *MovedExtInst : MovedExts) {
+ if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
+ LI = cast<LoadInst>(MovedExtInst->getOperand(0));
+ Inst = MovedExtInst;
+ break;
+ }
}
+ if (!LI)
+ return false;
// If they're already in the same block, there's nothing to do.
// Make the cheap checks first if we did not promote.
// If we promoted, we need to check if it is indeed profitable.
- if (!HasPromoted && LI->getParent() == I->getParent())
+ if (!HasPromoted && LI->getParent() == Inst->getParent())
return false;
- EVT VT = TLI->getValueType(*DL, I->getType());
+ EVT VT = TLI->getValueType(*DL, Inst->getType());
EVT LoadVT = TLI->getValueType(*DL, LI->getType());
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
- if (!LI->hasOneUse() &&
- (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
- !TLI->isTruncateFree(I->getType(), LI->getType())) {
- I = OldExt;
- TPT.rollback(LastKnownGood);
+ if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
+ !TLI->isTruncateFree(Inst->getType(), LI->getType()))
return false;
- }
// Check whether the target supports casts folded into loads.
unsigned LType;
- if (isa<ZExtInst>(I))
+ if (isa<ZExtInst>(Inst))
LType = ISD::ZEXTLOAD;
else {
- assert(isa<SExtInst>(I) && "Unexpected ext type!");
+ assert(isa<SExtInst>(Inst) && "Unexpected ext type!");
LType = ISD::SEXTLOAD;
}
- if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) {
- I = OldExt;
- TPT.rollback(LastKnownGood);
+
+ return TLI->isLoadExtLegal(LType, VT, LoadVT);
+}
+
+/// Move a zext or sext fed by a load into the same basic block as the load,
+/// unless conditions are unfavorable. This allows SelectionDAG to fold the
+/// extend into the load.
+///
+/// E.g.,
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
+/// allow us to match zext(load i32*) to i64.
+///
+/// Also, try to promote the computations used to obtain a sign extended
+/// value used into memory accesses.
+/// E.g.,
+/// \code
+/// a = add nsw i32 b, 3
+/// d = sext i32 a to i64
+/// e = getelementptr ..., i64 d
+/// \endcode
+/// =>
+/// \code
+/// f = sext i32 b to i64
+/// a = add nsw i64 f, 3
+/// e = getelementptr ..., i64 a
+/// \endcode
+///
+/// \p Inst[in/out] the extension may be modified during the process if some
+/// promotions apply.
+bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
+ // ExtLoad formation and address type promotion infrastructure requires TLI to
+ // be effective.
+ if (!TLI)
return false;
+
+ bool AllowPromotionWithoutCommonHeader = false;
+ /// See if it is an interesting sext operations for the address type
+ /// promotion before trying to promote it, e.g., the ones with the right
+ /// type and used in memory accesses.
+ bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
+ *Inst, AllowPromotionWithoutCommonHeader);
+ TypePromotionTransaction TPT(RemovedInsts);
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> SpeculativelyMovedExts;
+ Exts.push_back(Inst);
+
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
+
+ // Look for a load being extended.
+ LoadInst *LI = nullptr;
+ Instruction *ExtFedByLoad;
+
+ // Try to promote a chain of computation if it allows to form an extended
+ // load.
+ if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
+ assert(LI && ExtFedByLoad && "Expect a valid load and extension");
+ TPT.commit();
+ // Move the extend into the same block as the load
+ ExtFedByLoad->removeFromParent();
+ ExtFedByLoad->insertAfter(LI);
+ // CGP does not check if the zext would be speculatively executed when moved
+ // to the same basic block as the load. Preserving its original location
+ // would pessimize the debugging experience, as well as negatively impact
+ // the quality of sample pgo. We don't want to use "line 0" as that has a
+ // size cost in the line-table section and logically the zext can be seen as
+ // part of the load. Therefore we conservatively reuse the same debug
+ // location for the load and the zext.
+ ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
+ ++NumExtsMoved;
+ Inst = ExtFedByLoad;
+ return true;
}
- // Move the extend into the same block as the load, so that SelectionDAG
- // can fold it.
- TPT.commit();
- I->removeFromParent();
- I->insertAfter(LI);
- // CGP does not check if the zext would be speculatively executed when moved
- // to the same basic block as the load. Preserving its original location would
- // pessimize the debugging experience, as well as negatively impact the
- // quality of sample pgo. We don't want to use "line 0" as that has a
- // size cost in the line-table section and logically the zext can be seen as
- // part of the load. Therefore we conservatively reuse the same debug location
- // for the load and the zext.
- I->setDebugLoc(LI->getDebugLoc());
- ++NumExtsMoved;
- return true;
+ // Continue promoting SExts if known as considerable depending on targets.
+ if (ATPConsiderable &&
+ performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
+ HasPromoted, TPT, SpeculativelyMovedExts))
+ return true;
+
+ TPT.rollback(LastKnownGood);
+ return false;
+}
+
+// Perform address type promotion if doing so is profitable.
+// If AllowPromotionWithoutCommonHeader == false, we should find other sext
+// instructions that sign extended the same initial value. However, if
+// AllowPromotionWithoutCommonHeader == true, we expect promoting the
+// extension is just profitable.
+bool CodeGenPrepare::performAddressTypePromotion(
+ Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
+ bool Promoted = false;
+ SmallPtrSet<Instruction *, 1> UnhandledExts;
+ bool AllSeenFirst = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ DenseMap<Value *, Instruction *>::iterator AlreadySeen =
+ SeenChainsForSExt.find(HeadOfChain);
+ // If there is an unhandled SExt which has the same header, try to promote
+ // it as well.
+ if (AlreadySeen != SeenChainsForSExt.end()) {
+ if (AlreadySeen->second != nullptr)
+ UnhandledExts.insert(AlreadySeen->second);
+ AllSeenFirst = false;
+ }
+ }
+
+ if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
+ SpeculativelyMovedExts.size() == 1)) {
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ // Update Inst as promotion happen.
+ Inst = SpeculativelyMovedExts.pop_back_val();
+ } else {
+ // This is the first chain visited from the header, keep the current chain
+ // as unhandled. Defer to promote this until we encounter another SExt
+ // chain derived from the same header.
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = Inst;
+ }
+ return false;
+ }
+
+ if (!AllSeenFirst && !UnhandledExts.empty())
+ for (auto VisitedSExt : UnhandledExts) {
+ if (RemovedInsts.count(VisitedSExt))
+ continue;
+ TypePromotionTransaction TPT(RemovedInsts);
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> Chains;
+ Exts.push_back(VisitedSExt);
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : Chains) {
+ Value *HeadOfChain = I->getOperand(0);
+ // Mark this as handled.
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ }
+ return Promoted;
}
bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
@@ -4534,13 +5006,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
!(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
return false;
- // Skip loads we've already transformed or have no reason to transform.
- if (Load->hasOneUse()) {
- User *LoadUser = *Load->user_begin();
- if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
- !dyn_cast<PHINode>(LoadUser))
- return false;
- }
+ // Skip loads we've already transformed.
+ if (Load->hasOneUse() &&
+ InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
+ return false;
// Look at all uses of Load, looking through phis, to determine how many bits
// of the loaded value are needed.
@@ -4620,7 +5089,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
//
// Also avoid hoisting if we didn't see any ands with the exact DemandBits
// mask, since these are the only ands that will be removed by isel.
- if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
+ if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
WidestAndBits != DemandBits)
return false;
@@ -4636,6 +5105,9 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
IRBuilder<> Builder(Load->getNextNode());
auto *NewAnd = dyn_cast<Instruction>(
Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
+ // Mark this instruction as "inserted by CGP", so that other
+ // optimizations don't touch it.
+ InsertedInsts.insert(NewAnd);
// Replace all uses of load with new and (except for the use of load in the
// new and itself).
@@ -4985,7 +5457,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
ExtInst->insertBefore(SI);
SI->setCondition(ExtInst);
- for (SwitchInst::CaseIt Case : SI->cases()) {
+ for (auto Case : SI->cases()) {
APInt NarrowConst = Case.getCaseValue()->getValue();
APInt WideConst = (ExtType == Instruction::ZExt) ?
NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
@@ -5514,7 +5986,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
- bool MadeChange = moveExtToFormExtLoad(I);
+ bool MadeChange = optimizeExt(I);
return MadeChange | optimizeExtUses(I);
}
}
@@ -5548,8 +6020,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
return false;
}
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ unsigned AS = RMW->getPointerAddressSpace();
+ return optimizeMemoryInst(I, RMW->getPointerOperand(),
+ RMW->getType(), AS);
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
+ unsigned AS = CmpX->getPointerAddressSpace();
+ return optimizeMemoryInst(I, CmpX->getPointerOperand(),
+ CmpX->getCompareOperand()->getType(), AS);
+ }
+
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
+ if (BinOp && (BinOp->getOpcode() == Instruction::And) &&
+ EnableAndCmpSinking && TLI)
+ return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
@@ -5679,68 +6167,6 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
return MadeChange;
}
-// If there is a sequence that branches based on comparing a single bit
-// against zero that can be combined into a single instruction, and the
-// target supports folding these into a single instruction, sink the
-// mask and compare into the branch uses. Do this before OptimizeBlock ->
-// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
-// searched for.
-bool CodeGenPrepare::sinkAndCmp(Function &F) {
- if (!EnableAndCmpSinking)
- return false;
- if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
- return false;
- bool MadeChange = false;
- for (BasicBlock &BB : F) {
- // Does this BB end with the following?
- // %andVal = and %val, #single-bit-set
- // %icmpVal = icmp %andResult, 0
- // br i1 %cmpVal label %dest1, label %dest2"
- BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator());
- if (!Brcc || !Brcc->isConditional())
- continue;
- ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
- if (!Cmp || Cmp->getParent() != &BB)
- continue;
- ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
- if (!Zero || !Zero->isZero())
- continue;
- Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
- if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB)
- continue;
- ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
- if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
- continue;
- DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump());
-
- // Push the "and; icmp" for any users that are conditional branches.
- // Since there can only be one branch use per BB, we don't need to keep
- // track of which BBs we insert into.
- for (Use &TheUse : Cmp->uses()) {
- // Find brcc use.
- BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
- if (!BrccUser || !BrccUser->isConditional())
- continue;
- BasicBlock *UserBB = BrccUser->getParent();
- if (UserBB == &BB) continue;
- DEBUG(dbgs() << "found Brcc use\n");
-
- // Sink the "and; icmp" to use.
- MadeChange = true;
- BinaryOperator *NewAnd =
- BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
- BrccUser);
- CmpInst *NewCmp =
- CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
- "", BrccUser);
- TheUse = NewCmp;
- ++NumAndCmpsMoved;
- DEBUG(BrccUser->getParent()->dump());
- }
- }
- return MadeChange;
-}
-
/// \brief Scale down both weights to fit into uint32_t.
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
diff --git a/lib/CodeGen/CountingFunctionInserter.cpp b/lib/CodeGen/CountingFunctionInserter.cpp
index 1e46a7a99e7e3..7f7350f5fb5cd 100644
--- a/lib/CodeGen/CountingFunctionInserter.cpp
+++ b/lib/CodeGen/CountingFunctionInserter.cpp
@@ -41,7 +41,7 @@ namespace {
Type *VoidTy = Type::getVoidTy(F.getContext());
Constant *CountingFn =
F.getParent()->getOrInsertFunction(CountingFunctionName,
- VoidTy, nullptr);
+ VoidTy);
CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt());
return true;
}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 5d60c3055456d..e1eeddf0816c1 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -71,8 +71,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo &MFI = MF.getFrameInfo();
BitVector Pristine = MFI.getPristineRegs(MF);
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
- if (!IsReturnBlock && !Pristine.test(*I)) continue;
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg)))
+ continue;
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 17c229a216ae1..7ac2e5445435d 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -110,7 +110,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Start out assuming that reserved registers are live out of this block.
LivePhysRegs = MRI->getReservedRegs();
- // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+ // Add live-ins from successors to LivePhysRegs. Normally, physregs are not
// live across blocks, but some targets (x86) can have flags live out of a
// block.
for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index a7ba694c144d1..6f4ea1912cf4e 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -441,7 +441,7 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
if (CrossCopy)
- DEBUG(dbgs() << "Copy accross incompatible classes: " << UseMI);
+ DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI);
}
if (!CrossCopy)
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 32c57e3e37051..e272d25047e63 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -6,21 +6,9 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file contains the execution dependency fix pass.
-//
-// Some X86 SSE instructions like mov, and, or, xor are available in different
-// variants for different operand types. These variant instructions are
-// equivalent, but on Nehalem and newer cpus there is extra latency
-// transferring data between integer and floating point domains. ARM cores
-// have similar issues when they are configured with both VFP and NEON
-// pipelines.
-//
-// This pass changes the variant instructions to minimize domain crossings.
-//
-//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ExecutionDepsFix.h"
+
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -35,193 +23,18 @@
using namespace llvm;
-#define DEBUG_TYPE "execution-fix"
-
-/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
-/// of execution domains.
-///
-/// An open DomainValue represents a set of instructions that can still switch
-/// execution domain. Multiple registers may refer to the same open
-/// DomainValue - they will eventually be collapsed to the same execution
-/// domain.
-///
-/// A collapsed DomainValue represents a single register that has been forced
-/// into one of more execution domains. There is a separate collapsed
-/// DomainValue for each register, but it may contain multiple execution
-/// domains. A register value is initially created in a single execution
-/// domain, but if we were forced to pay the penalty of a domain crossing, we
-/// keep track of the fact that the register is now available in multiple
-/// domains.
-namespace {
-struct DomainValue {
- // Basic reference counting.
- unsigned Refs;
-
- // Bitmask of available domains. For an open DomainValue, it is the still
- // possible domains for collapsing. For a collapsed DomainValue it is the
- // domains where the register is available for free.
- unsigned AvailableDomains;
-
- // Pointer to the next DomainValue in a chain. When two DomainValues are
- // merged, Victim.Next is set to point to Victor, so old DomainValue
- // references can be updated by following the chain.
- DomainValue *Next;
-
- // Twiddleable instructions using or defining these registers.
- SmallVector<MachineInstr*, 8> Instrs;
-
- // A collapsed DomainValue has no instructions to twiddle - it simply keeps
- // track of the domains where the registers are already available.
- bool isCollapsed() const { return Instrs.empty(); }
-
- // Is domain available?
- bool hasDomain(unsigned domain) const {
- assert(domain <
- static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
- "undefined behavior");
- return AvailableDomains & (1u << domain);
- }
-
- // Mark domain as available.
- void addDomain(unsigned domain) {
- AvailableDomains |= 1u << domain;
- }
-
- // Restrict to a single domain available.
- void setSingleDomain(unsigned domain) {
- AvailableDomains = 1u << domain;
- }
-
- // Return bitmask of domains that are available and in mask.
- unsigned getCommonDomains(unsigned mask) const {
- return AvailableDomains & mask;
- }
-
- // First domain available.
- unsigned getFirstDomain() const {
- return countTrailingZeros(AvailableDomains);
- }
-
- DomainValue() : Refs(0) { clear(); }
-
- // Clear this DomainValue and point to next which has all its data.
- void clear() {
- AvailableDomains = 0;
- Next = nullptr;
- Instrs.clear();
- }
-};
-}
-
-namespace {
-/// Information about a live register.
-struct LiveReg {
- /// Value currently in this register, or NULL when no value is being tracked.
- /// This counts as a DomainValue reference.
- DomainValue *Value;
-
- /// Instruction that defined this register, relative to the beginning of the
- /// current basic block. When a LiveReg is used to represent a live-out
- /// register, this value is relative to the end of the basic block, so it
- /// will be a negative number.
- int Def;
-};
-} // anonymous namespace
-
-namespace {
-class ExeDepsFix : public MachineFunctionPass {
- static char ID;
- SpecificBumpPtrAllocator<DomainValue> Allocator;
- SmallVector<DomainValue*,16> Avail;
-
- const TargetRegisterClass *const RC;
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- RegisterClassInfo RegClassInfo;
- std::vector<SmallVector<int, 1>> AliasMap;
- const unsigned NumRegs;
- LiveReg *LiveRegs;
- typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
- LiveOutMap LiveOuts;
-
- /// List of undefined register reads in this block in forward order.
- std::vector<std::pair<MachineInstr*, unsigned> > UndefReads;
-
- /// Storage for register unit liveness.
- LivePhysRegs LiveRegSet;
-
- /// Current instruction number.
- /// The first instruction in each basic block is 0.
- int CurInstr;
-
- /// True when the current block has a predecessor that hasn't been visited
- /// yet.
- bool SeenUnknownBackEdge;
-
-public:
- ExeDepsFix(const TargetRegisterClass *rc)
- : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
-
- StringRef getPassName() const override { return "Execution dependency fix"; }
-
-private:
- iterator_range<SmallVectorImpl<int>::const_iterator>
- regIndices(unsigned Reg) const;
-
- // DomainValue allocation.
- DomainValue *alloc(int domain = -1);
- DomainValue *retain(DomainValue *DV) {
- if (DV) ++DV->Refs;
- return DV;
- }
- void release(DomainValue*);
- DomainValue *resolve(DomainValue*&);
-
- // LiveRegs manipulations.
- void setLiveReg(int rx, DomainValue *DV);
- void kill(int rx);
- void force(int rx, unsigned domain);
- void collapse(DomainValue *dv, unsigned domain);
- bool merge(DomainValue *A, DomainValue *B);
-
- void enterBasicBlock(MachineBasicBlock*);
- void leaveBasicBlock(MachineBasicBlock*);
- void visitInstr(MachineInstr*);
- void processDefs(MachineInstr*, bool Kill);
- void visitSoftInstr(MachineInstr*, unsigned mask);
- void visitHardInstr(MachineInstr*, unsigned domain);
- void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref);
- bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
- void processUndefReads(MachineBasicBlock*);
-};
-}
-
-char ExeDepsFix::ID = 0;
+#define DEBUG_TYPE "execution-deps-fix"
/// Translate TRI register number to a list of indices into our smaller tables
/// of interesting registers.
iterator_range<SmallVectorImpl<int>::const_iterator>
-ExeDepsFix::regIndices(unsigned Reg) const {
+ExecutionDepsFix::regIndices(unsigned Reg) const {
assert(Reg < AliasMap.size() && "Invalid register");
const auto &Entry = AliasMap[Reg];
return make_range(Entry.begin(), Entry.end());
}
-DomainValue *ExeDepsFix::alloc(int domain) {
+DomainValue *ExecutionDepsFix::alloc(int domain) {
DomainValue *dv = Avail.empty() ?
new(Allocator.Allocate()) DomainValue :
Avail.pop_back_val();
@@ -234,7 +47,7 @@ DomainValue *ExeDepsFix::alloc(int domain) {
/// Release a reference to DV. When the last reference is released,
/// collapse if needed.
-void ExeDepsFix::release(DomainValue *DV) {
+void ExecutionDepsFix::release(DomainValue *DV) {
while (DV) {
assert(DV->Refs && "Bad DomainValue");
if (--DV->Refs)
@@ -254,7 +67,7 @@ void ExeDepsFix::release(DomainValue *DV) {
/// Follow the chain of dead DomainValues until a live DomainValue is reached.
/// Update the referenced pointer when necessary.
-DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) {
DomainValue *DV = DVRef;
if (!DV || !DV->Next)
return DV;
@@ -271,7 +84,7 @@ DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
}
/// Set LiveRegs[rx] = dv, updating reference counts.
-void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
+void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) {
assert(unsigned(rx) < NumRegs && "Invalid index");
assert(LiveRegs && "Must enter basic block first.");
@@ -283,7 +96,7 @@ void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
}
// Kill register rx, recycle or collapse any DomainValue.
-void ExeDepsFix::kill(int rx) {
+void ExecutionDepsFix::kill(int rx) {
assert(unsigned(rx) < NumRegs && "Invalid index");
assert(LiveRegs && "Must enter basic block first.");
if (!LiveRegs[rx].Value)
@@ -294,7 +107,7 @@ void ExeDepsFix::kill(int rx) {
}
/// Force register rx into domain.
-void ExeDepsFix::force(int rx, unsigned domain) {
+void ExecutionDepsFix::force(int rx, unsigned domain) {
assert(unsigned(rx) < NumRegs && "Invalid index");
assert(LiveRegs && "Must enter basic block first.");
if (DomainValue *dv = LiveRegs[rx].Value) {
@@ -317,7 +130,7 @@ void ExeDepsFix::force(int rx, unsigned domain) {
/// Collapse open DomainValue into given domain. If there are multiple
/// registers using dv, they each get a unique collapsed DomainValue.
-void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
+void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) {
assert(dv->hasDomain(domain) && "Cannot collapse");
// Collapse all the instructions.
@@ -333,7 +146,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
}
/// All instructions and registers in B are moved to A, and B is released.
-bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
+bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) {
assert(!A->isCollapsed() && "Cannot merge into collapsed");
assert(!B->isCollapsed() && "Cannot merge from collapsed");
if (A == B)
@@ -359,10 +172,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
}
/// Set up LiveRegs by merging predecessor live-out values.
-void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
- // Detect back-edges from predecessors we haven't processed yet.
- SeenUnknownBackEdge = false;
-
+void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Reset instruction counter in each basic block.
CurInstr = 0;
@@ -397,18 +207,21 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Try to coalesce live-out registers from predecessors.
for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
pe = MBB->pred_end(); pi != pe; ++pi) {
- LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
- if (fi == LiveOuts.end()) {
- SeenUnknownBackEdge = true;
+ auto fi = MBBInfos.find(*pi);
+ assert(fi != MBBInfos.end() &&
+ "Should have pre-allocated MBBInfos for all MBBs");
+ LiveReg *Incoming = fi->second.OutRegs;
+ // Incoming is null if this is a backedge from a BB
+ // we haven't processed yet
+ if (Incoming == nullptr) {
continue;
}
- assert(fi->second && "Can't have NULL entries");
for (unsigned rx = 0; rx != NumRegs; ++rx) {
// Use the most recent predecessor def for each register.
- LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+ LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def);
- DomainValue *pdv = resolve(fi->second[rx].Value);
+ DomainValue *pdv = resolve(Incoming[rx].Value);
if (!pdv)
continue;
if (!LiveRegs[rx].Value) {
@@ -432,35 +245,34 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
force(rx, pdv->getFirstDomain());
}
}
- DEBUG(dbgs() << "BB#" << MBB->getNumber()
- << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+ DEBUG(
+ dbgs() << "BB#" << MBB->getNumber()
+ << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n"));
}
-void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
assert(LiveRegs && "Must enter basic block first.");
- // Save live registers at end of MBB - used by enterBasicBlock().
- // Also use LiveOuts as a visited set to detect back-edges.
- bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
-
- if (First) {
- // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to
- // the end of this block instead of the beginning.
- for (unsigned i = 0, e = NumRegs; i != e; ++i)
- LiveRegs[i].Def -= CurInstr;
- } else {
- // Insertion failed, this must be the second pass.
+ LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs;
+ // Save register clearances at end of MBB - used by enterBasicBlock().
+ MBBInfos[MBB].OutRegs = LiveRegs;
+
+ // While processing the basic block, we kept `Def` relative to the start
+ // of the basic block for convenience. However, future use of this information
+ // only cares about the clearance from the end of the block, so adjust
+ // everything to be relative to the end of the basic block.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ LiveRegs[i].Def -= CurInstr;
+ if (OldOutRegs) {
+ // This must be the second pass.
// Release all the DomainValues instead of keeping them.
for (unsigned i = 0, e = NumRegs; i != e; ++i)
- release(LiveRegs[i].Value);
- delete[] LiveRegs;
+ release(OldOutRegs[i].Value);
+ delete[] OldOutRegs;
}
LiveRegs = nullptr;
}
-void ExeDepsFix::visitInstr(MachineInstr *MI) {
- if (MI->isDebugValue())
- return;
-
+bool ExecutionDepsFix::visitInstr(MachineInstr *MI) {
// Update instructions with explicit execution domains.
std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
if (DomP.first) {
@@ -470,16 +282,16 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
visitHardInstr(MI, DomP.first);
}
- // Process defs to track register ages, and kill values clobbered by generic
- // instructions.
- processDefs(MI, !DomP.first);
+ return !DomP.first;
}
/// \brief Helps avoid false dependencies on undef registers by updating the
/// machine instructions' undef operand to use a register that the instruction
/// is truly dependent on, or use a register with clearance higher than Pref.
-void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
+/// Returns true if it was able to find a true dependency, thus not requiring
+/// a dependency breaking instruction regardless of clearance.
+bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI,
+ unsigned OpIdx, unsigned Pref) {
MachineOperand &MO = MI->getOperand(OpIdx);
assert(MO.isUndef() && "Expected undef machine operand");
@@ -487,7 +299,7 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// Update only undef operands that are mapped to one register.
if (AliasMap[OriginalReg].size() != 1)
- return;
+ return false;
// Get the undef operand's register class
const TargetRegisterClass *OpRC =
@@ -502,7 +314,7 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// We found a true dependency - replace the undef register with the true
// dependency.
MO.setReg(CurrMO.getReg());
- return;
+ return true;
}
// Go over all registers in the register class and find the register with
@@ -527,12 +339,14 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// Update the operand if we found a register with better clearance.
if (MaxClearanceReg != OriginalReg)
MO.setReg(MaxClearanceReg);
+
+ return false;
}
/// \brief Return true to if it makes sense to break dependence on a partial def
/// or undef use.
-bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
+bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
unsigned reg = MI->getOperand(OpIdx).getReg();
for (int rx : regIndices(reg)) {
unsigned Clearance = CurInstr - LiveRegs[rx].Def;
@@ -542,14 +356,7 @@ bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
DEBUG(dbgs() << ": Break dependency.\n");
continue;
}
- // The current clearance seems OK, but we may be ignoring a def from a
- // back-edge.
- if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
- DEBUG(dbgs() << ": OK .\n");
- return false;
- }
- // A def from an unprocessed back-edge may make us break this dependency.
- DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
+ DEBUG(dbgs() << ": OK .\n");
return false;
}
return true;
@@ -559,16 +366,22 @@ bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
// If Kill is set, also kill off DomainValues clobbered by the defs.
//
// Also break dependencies on partial defs and undef uses.
-void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency,
+ bool Kill) {
assert(!MI->isDebugValue() && "Won't process debug values");
// Break dependence on undef uses. Do this before updating LiveRegs below.
unsigned OpNum;
- unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
- if (Pref) {
- pickBestRegisterForUndef(MI, OpNum, Pref);
- if (shouldBreakDependence(MI, OpNum, Pref))
- UndefReads.push_back(std::make_pair(MI, OpNum));
+ if (breakDependency) {
+ unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
+ if (Pref) {
+ bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
+ // We don't need to bother trying to break a dependency if this
+ // instruction has a true dependency on that register through another
+ // operand - we'll have to wait for it to be available regardless.
+ if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
+ UndefReads.push_back(std::make_pair(MI, OpNum));
+ }
}
const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
@@ -584,11 +397,13 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
<< '\t' << *MI);
- // Check clearance before partial register updates.
- // Call breakDependence before setting LiveRegs[rx].Def.
- unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
- if (Pref && shouldBreakDependence(MI, i, Pref))
- TII->breakPartialRegDependency(*MI, i, TRI);
+ if (breakDependency) {
+ // Check clearance before partial register updates.
+ // Call breakDependence before setting LiveRegs[rx].Def.
+ unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
+ if (Pref && shouldBreakDependence(MI, i, Pref))
+ TII->breakPartialRegDependency(*MI, i, TRI);
+ }
// How many instructions since rx was last written?
LiveRegs[rx].Def = CurInstr;
@@ -607,7 +422,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
/// only do it on demand. Note that the occurrence of undefined register reads
/// that should be broken is very rare, but when they occur we may have many in
/// a single block.
-void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
+void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) {
if (UndefReads.empty())
return;
@@ -640,7 +455,7 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
// A hard instruction only works in one domain. All input registers will be
// forced into that domain.
-void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
+void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
// Collapse all uses.
for (unsigned i = mi->getDesc().getNumDefs(),
e = mi->getDesc().getNumOperands(); i != e; ++i) {
@@ -663,7 +478,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
}
// A soft instruction can be changed to work in other domains given by mask.
-void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Bitmask of available domains for this instruction after taking collapsed
// operands into account.
unsigned available = mask;
@@ -774,7 +589,34 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
}
}
-bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
+void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB,
+ bool PrimaryPass) {
+ enterBasicBlock(MBB);
+ // If this block is not done, it makes little sense to make any decisions
+ // based on clearance information. We need to make a second pass anyway,
+ // and by then we'll have better information, so we can avoid doing the work
+ // to try and break dependencies now.
+ bool breakDependency = isBlockDone(MBB);
+ for (MachineInstr &MI : *MBB) {
+ if (!MI.isDebugValue()) {
+ bool Kill = false;
+ if (PrimaryPass)
+ Kill = visitInstr(&MI);
+ processDefs(&MI, breakDependency, Kill);
+ }
+ }
+ if (breakDependency)
+ processUndefReads(MBB);
+ leaveBasicBlock(MBB);
+}
+
+bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) {
+ return MBBInfos[MBB].PrimaryCompleted &&
+ MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming &&
+ MBBInfos[MBB].IncomingProcessed == MBB->pred_size();
+}
+
+bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) {
if (skipFunction(*mf.getFunction()))
return false;
MF = &mf;
@@ -810,52 +652,104 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
AliasMap[*AI].push_back(i);
}
+ // Initialize the MMBInfos
+ for (auto &MBB : mf) {
+ MBBInfo InitialInfo;
+ MBBInfos.insert(std::make_pair(&MBB, InitialInfo));
+ }
+
+ /*
+ * We want to visit every instruction in every basic block in order to update
+ * it's execution domain or break any false dependencies. However, for the
+ * dependency breaking, we need to know clearances from all predecessors
+ * (including any backedges). One way to do so would be to do two complete
+ * passes over all basic blocks/instructions, the first for recording
+ * clearances, the second to break the dependencies. However, for functions
+ * without backedges, or functions with a lot of straight-line code, and
+ * a small loop, that would be a lot of unnecessary work (since only the
+ * BBs that are part of the loop require two passes). As an example,
+ * consider the following loop.
+ *
+ *
+ * PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT
+ * ^ |
+ * +----------------------------------+
+ *
+ * The iteration order is as follows:
+ * Naive: PH A B C D A' B' C' D'
+ * Optimized: PH A B C A' B' C' D
+ *
+ * Note that we avoid processing D twice, because we can entirely process
+ * the predecessors before getting to D. We call a block that is ready
+ * for its second round of processing `done` (isBlockDone). Once we finish
+ * processing some block, we update the counters in MBBInfos and re-process
+ * any successors that are now done.
+ */
+
MachineBasicBlock *Entry = &*MF->begin();
ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
- SmallVector<MachineBasicBlock*, 16> Loops;
+ SmallVector<MachineBasicBlock *, 4> Workqueue;
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
MachineBasicBlock *MBB = *MBBI;
- enterBasicBlock(MBB);
- if (SeenUnknownBackEdge)
- Loops.push_back(MBB);
- for (MachineInstr &MI : *MBB)
- visitInstr(&MI);
- processUndefReads(MBB);
- leaveBasicBlock(MBB);
+ // N.B: IncomingProcessed and IncomingCompleted were already updated while
+ // processing this block's predecessors.
+ MBBInfos[MBB].PrimaryCompleted = true;
+ MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed;
+ bool Primary = true;
+ Workqueue.push_back(MBB);
+ while (!Workqueue.empty()) {
+ MachineBasicBlock *ActiveMBB = &*Workqueue.back();
+ Workqueue.pop_back();
+ processBasicBlock(ActiveMBB, Primary);
+ bool Done = isBlockDone(ActiveMBB);
+ for (auto *Succ : ActiveMBB->successors()) {
+ if (!isBlockDone(Succ)) {
+ if (Primary) {
+ MBBInfos[Succ].IncomingProcessed++;
+ }
+ if (Done) {
+ MBBInfos[Succ].IncomingCompleted++;
+ }
+ if (isBlockDone(Succ)) {
+ Workqueue.push_back(Succ);
+ }
+ }
+ }
+ Primary = false;
+ }
}
- // Visit all the loop blocks again in order to merge DomainValues from
- // back-edges.
- for (MachineBasicBlock *MBB : Loops) {
- enterBasicBlock(MBB);
- for (MachineInstr &MI : *MBB)
- if (!MI.isDebugValue())
- processDefs(&MI, false);
- processUndefReads(MBB);
- leaveBasicBlock(MBB);
+ // We need to go through again and finalize any blocks that are not done yet.
+ // This is possible if blocks have dead predecessors, so we didn't visit them
+ // above.
+ for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator
+ MBBI = RPOT.begin(),
+ MBBE = RPOT.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock *MBB = *MBBI;
+ if (!isBlockDone(MBB)) {
+ processBasicBlock(MBB, false);
+ // Don't update successors here. We'll get to them anyway through this
+ // loop.
+ }
}
// Clear the LiveOuts vectors and collapse any remaining DomainValues.
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
- LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
- if (FI == LiveOuts.end() || !FI->second)
+ auto FI = MBBInfos.find(*MBBI);
+ if (FI == MBBInfos.end() || !FI->second.OutRegs)
continue;
for (unsigned i = 0, e = NumRegs; i != e; ++i)
- if (FI->second[i].Value)
- release(FI->second[i].Value);
- delete[] FI->second;
+ if (FI->second.OutRegs[i].Value)
+ release(FI->second.OutRegs[i].Value);
+ delete[] FI->second.OutRegs;
}
- LiveOuts.clear();
+ MBBInfos.clear();
UndefReads.clear();
Avail.clear();
Allocator.DestroyAll();
return false;
}
-
-FunctionPass *
-llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
- return new ExeDepsFix(RC);
-}
diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp
new file mode 100644
index 0000000000000..0759bf6713e07
--- /dev/null
+++ b/lib/CodeGen/FEntryInserter.cpp
@@ -0,0 +1,55 @@
+//===-- FEntryInsertion.cpp - Patchable prologues for LLVM -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file edits function bodies to insert fentry calls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+struct FEntryInserter : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ FEntryInserter() : MachineFunctionPass(ID) {
+ initializeFEntryInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+}
+
+bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) {
+ const std::string FEntryName =
+ MF.getFunction()->getFnAttribute("fentry-call").getValueAsString();
+ if (FEntryName != "true")
+ return false;
+
+ auto &FirstMBB = *MF.begin();
+ auto &FirstMI = *FirstMBB.begin();
+
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ TII->get(TargetOpcode::FENTRY_CALL));
+ return true;
+}
+
+char FEntryInserter::ID = 0;
+char &llvm::FEntryInserterID = FEntryInserter::ID;
+INITIALIZE_PASS(FEntryInserter, "fentry-insert", "Insert fentry calls", false,
+ false)
diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp
index 2acafafdb9fcc..43f3641289787 100644
--- a/lib/CodeGen/FaultMaps.cpp
+++ b/lib/CodeGen/FaultMaps.cpp
@@ -1,4 +1,4 @@
-//===---------------------------- FaultMaps.cpp ---------------------------===//
+//===- FaultMaps.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/FaultMaps.h"
-
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -102,14 +105,16 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,
}
}
-
const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) {
switch (FT) {
default:
llvm_unreachable("unhandled fault type!");
-
case FaultMaps::FaultingLoad:
return "FaultingLoad";
+ case FaultMaps::FaultingLoadStore:
+ return "FaultingLoadStore";
+ case FaultMaps::FaultingStore:
+ return "FaultingStore";
}
}
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 31ab86fdf2761..6be4c16c6301e 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -1,4 +1,4 @@
-//===-- GCStrategy.cpp - Garbage Collector Description --------------------===//
+//===- GCStrategy.cpp - Garbage Collector Description ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,7 +18,4 @@ using namespace llvm;
LLVM_INSTANTIATE_REGISTRY(GCRegistry)
-GCStrategy::GCStrategy()
- : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
- CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
- UsesMetadata(false) {}
+GCStrategy::GCStrategy() = default;
diff --git a/lib/CodeGen/GlobalISel/CMakeLists.txt b/lib/CodeGen/GlobalISel/CMakeLists.txt
index 76ab5d36047e5..03a8c4f5f909a 100644
--- a/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -22,7 +22,6 @@ else()
set(LLVM_OPTIONAL_SOURCES LLVMGlobalISel ${GLOBAL_ISEL_FILES})
endif()
-
# In LLVMBuild.txt files, it is not possible to mark a dependency to a
# library as optional. So instead, generate an empty library if we did
# not ask for it.
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index 13212212fa016..035a2ac78ed99 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -24,40 +24,42 @@
using namespace llvm;
bool CallLowering::lowerCall(
- MachineIRBuilder &MIRBuilder, const CallInst &CI, unsigned ResReg,
+ MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
- auto &DL = CI.getParent()->getParent()->getParent()->getDataLayout();
+ auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
SmallVector<ArgInfo, 8> OrigArgs;
unsigned i = 0;
- for (auto &Arg : CI.arg_operands()) {
- ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}};
- setArgFlags(OrigArg, i + 1, DL, CI);
+ unsigned NumFixedArgs = CS.getFunctionType()->getNumParams();
+ for (auto &Arg : CS.args()) {
+ ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
+ i < NumFixedArgs};
+ setArgFlags(OrigArg, i + 1, DL, CS);
OrigArgs.push_back(OrigArg);
++i;
}
MachineOperand Callee = MachineOperand::CreateImm(0);
- if (Function *F = CI.getCalledFunction())
+ if (const Function *F = CS.getCalledFunction())
Callee = MachineOperand::CreateGA(F, 0);
else
Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- ArgInfo OrigRet{ResReg, CI.getType(), ISD::ArgFlagsTy{}};
+ ArgInfo OrigRet{ResReg, CS.getType(), ISD::ArgFlagsTy{}};
if (!OrigRet.Ty->isVoidTy())
- setArgFlags(OrigRet, AttributeSet::ReturnIndex, DL, CI);
+ setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS);
- return lowerCall(MIRBuilder, Callee, OrigRet, OrigArgs);
+ return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs);
}
template <typename FuncInfoTy>
void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
const FuncInfoTy &FuncInfo) const {
- const AttributeSet &Attrs = FuncInfo.getAttributes();
+ const AttributeList &Attrs = FuncInfo.getAttributes();
if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
Arg.Flags.setZExt();
if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
@@ -103,7 +105,6 @@ CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const CallInst &FuncInfo) const;
bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
- CCAssignFn *AssignFn,
ArrayRef<ArgInfo> Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
@@ -116,12 +117,20 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT CurVT = MVT::getVT(Args[i].Ty);
- if (AssignFn(i, CurVT, CurVT, CCValAssign::Full, Args[i].Flags, CCInfo))
+ if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
return false;
}
- for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) {
+ assert(j < ArgLocs.size() && "Skipped too many arg locs");
+
+ CCValAssign &VA = ArgLocs[j];
+ assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
+
+ if (VA.needsCustom()) {
+ j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ continue;
+ }
if (VA.isRegLoc())
Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 89a042ffc477e..7661873784469 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -12,7 +12,10 @@
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -21,11 +24,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -40,11 +45,21 @@ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
-static void reportTranslationError(const Value &V, const Twine &Message) {
- std::string ErrStorage;
- raw_string_ostream Err(ErrStorage);
- Err << Message << ": " << V << '\n';
- report_fatal_error(Err.str());
+static void reportTranslationError(MachineFunction &MF,
+ const TargetPassConfig &TPC,
+ OptimizationRemarkEmitter &ORE,
+ OptimizationRemarkMissed &R) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (TPC.isGlobalISelAbortEnabled())
+ report_fatal_error(R.getMsg());
+ else
+ ORE.emit(R);
}
IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
@@ -59,28 +74,31 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
unsigned &ValReg = ValToVReg[&Val];
- // Check if this is the first time we see Val.
- if (!ValReg) {
- // Fill ValRegsSequence with the sequence of registers
- // we need to concat together to produce the value.
- assert(Val.getType()->isSized() &&
- "Don't know how to create an empty vreg");
- unsigned VReg = MRI->createGenericVirtualRegister(LLT{*Val.getType(), *DL});
- ValReg = VReg;
-
- if (auto CV = dyn_cast<Constant>(&Val)) {
- bool Success = translate(*CV, VReg);
- if (!Success) {
- if (!TPC->isGlobalISelAbortEnabled()) {
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- return VReg;
- }
- reportTranslationError(Val, "unable to translate constant");
- }
+
+ if (ValReg)
+ return ValReg;
+
+ // Fill ValRegsSequence with the sequence of registers
+ // we need to concat together to produce the value.
+ assert(Val.getType()->isSized() &&
+ "Don't know how to create an empty vreg");
+ unsigned VReg =
+ MRI->createGenericVirtualRegister(getLLTForType(*Val.getType(), *DL));
+ ValReg = VReg;
+
+ if (auto CV = dyn_cast<Constant>(&Val)) {
+ bool Success = translate(*CV, VReg);
+ if (!Success) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ MF->getFunction()->getSubprogram(),
+ &MF->getFunction()->getEntryBlock());
+ R << "unable to translate constant: " << ore::NV("Type", Val.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return VReg;
}
}
- return ValReg;
+
+ return VReg;
}
int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
@@ -112,28 +130,27 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
} else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Alignment = LI->getAlignment();
ValTy = LI->getType();
- } else if (!TPC->isGlobalISelAbortEnabled()) {
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
+ } else {
+ OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
+ R << "unable to translate memop: " << ore::NV("Opcode", &I);
+ reportTranslationError(*MF, *TPC, *ORE, R);
return 1;
- } else
- llvm_unreachable("unhandled memory instruction");
+ }
return Alignment ? Alignment : DL->getABITypeAlignment(ValTy);
}
-MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
+MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
MachineBasicBlock *&MBB = BBToMBB[&BB];
- if (!MBB) {
- MBB = MF->CreateMachineBasicBlock(&BB);
- MF->push_back(MBB);
-
- if (BB.hasAddressTaken())
- MBB->setHasAddressTaken();
- }
+ assert(MBB && "BasicBlock was not encountered before");
return *MBB;
}
+void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
+ assert(NewPred && "new predecessor must be a real MachineBasicBlock");
+ MachinePreds[Edge].push_back(NewPred);
+}
+
bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
// FIXME: handle signed/unsigned wrapping flags.
@@ -149,6 +166,18 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
return true;
}
+bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
+ // -0.0 - X --> G_FNEG
+ if (isa<Constant>(U.getOperand(0)) &&
+ U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(1)));
+ return true;
+ }
+ return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
+}
+
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
const CmpInst *CI = dyn_cast<CmpInst>(&U);
@@ -158,9 +187,14 @@ bool IRTranslator::translateCompare(const User &U,
CmpInst::Predicate Pred =
CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
cast<ConstantExpr>(U).getPredicate());
-
if (CmpInst::isIntPredicate(Pred))
MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
+ else if (Pred == CmpInst::FCMP_FALSE)
+ MIRBuilder.buildCopy(
+ Res, getOrCreateVReg(*Constant::getNullValue(CI->getType())));
+ else if (Pred == CmpInst::FCMP_TRUE)
+ MIRBuilder.buildCopy(
+ Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
else
MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
@@ -183,18 +217,21 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
// We want a G_BRCOND to the true BB followed by an unconditional branch.
unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
- MachineBasicBlock &TrueBB = getOrCreateBB(TrueTgt);
+ MachineBasicBlock &TrueBB = getMBB(TrueTgt);
MIRBuilder.buildBrCond(Tst, TrueBB);
}
const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
- MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
- MIRBuilder.buildBr(TgtBB);
+ MachineBasicBlock &TgtBB = getMBB(BrTgt);
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+
+ // If the unconditional target is the layout successor, fallthrough.
+ if (!CurBB.isLayoutSuccessor(&TgtBB))
+ MIRBuilder.buildBr(TgtBB);
// Link successors.
- MachineBasicBlock &CurBB = MIRBuilder.getMBB();
for (const BasicBlock *Succ : BrInst.successors())
- CurBB.addSuccessor(&getOrCreateBB(*Succ));
+ CurBB.addSuccessor(&getMBB(*Succ));
return true;
}
@@ -209,30 +246,52 @@ bool IRTranslator::translateSwitch(const User &U,
const SwitchInst &SwInst = cast<SwitchInst>(U);
const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
+ const BasicBlock *OrigBB = SwInst.getParent();
- LLT LLTi1 = LLT(*Type::getInt1Ty(U.getContext()), *DL);
+ LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL);
for (auto &CaseIt : SwInst.cases()) {
const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
- MachineBasicBlock &CurBB = MIRBuilder.getMBB();
- MachineBasicBlock &TrueBB = getOrCreateBB(*CaseIt.getCaseSuccessor());
+ MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
+ const BasicBlock *TrueBB = CaseIt.getCaseSuccessor();
+ MachineBasicBlock &TrueMBB = getMBB(*TrueBB);
- MIRBuilder.buildBrCond(Tst, TrueBB);
- CurBB.addSuccessor(&TrueBB);
+ MIRBuilder.buildBrCond(Tst, TrueMBB);
+ CurMBB.addSuccessor(&TrueMBB);
+ addMachineCFGPred({OrigBB, TrueBB}, &CurMBB);
- MachineBasicBlock *FalseBB =
+ MachineBasicBlock *FalseMBB =
MF->CreateMachineBasicBlock(SwInst.getParent());
- MF->push_back(FalseBB);
- MIRBuilder.buildBr(*FalseBB);
- CurBB.addSuccessor(FalseBB);
+ // Insert the comparison blocks one after the other.
+ MF->insert(std::next(CurMBB.getIterator()), FalseMBB);
+ MIRBuilder.buildBr(*FalseMBB);
+ CurMBB.addSuccessor(FalseMBB);
- MIRBuilder.setMBB(*FalseBB);
+ MIRBuilder.setMBB(*FalseMBB);
}
// handle default case
- MachineBasicBlock &DefaultBB = getOrCreateBB(*SwInst.getDefaultDest());
- MIRBuilder.buildBr(DefaultBB);
- MIRBuilder.getMBB().addSuccessor(&DefaultBB);
+ const BasicBlock *DefaultBB = SwInst.getDefaultDest();
+ MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB);
+ MIRBuilder.buildBr(DefaultMBB);
+ MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
+ CurMBB.addSuccessor(&DefaultMBB);
+ addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB);
+
+ return true;
+}
+
+bool IRTranslator::translateIndirectBr(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
+
+ const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress());
+ MIRBuilder.buildBrIndirect(Tgt);
+
+ // Link successors.
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ for (const BasicBlock *Succ : BrInst.successors())
+ CurBB.addSuccessor(&getMBB(*Succ));
return true;
}
@@ -240,47 +299,38 @@ bool IRTranslator::translateSwitch(const User &U,
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
- if (!TPC->isGlobalISelAbortEnabled() && LI.isAtomic())
- return false;
-
- assert(!LI.isAtomic() && "only non-atomic loads are supported at the moment");
auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile
: MachineMemOperand::MONone;
Flags |= MachineMemOperand::MOLoad;
unsigned Res = getOrCreateVReg(LI);
unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
- LLT VTy{*LI.getType(), *DL}, PTy{*LI.getPointerOperand()->getType(), *DL};
+
MIRBuilder.buildLoad(
Res, Addr,
*MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
Flags, DL->getTypeStoreSize(LI.getType()),
- getMemOpAlignment(LI)));
+ getMemOpAlignment(LI), AAMDNodes(), nullptr,
+ LI.getSynchScope(), LI.getOrdering()));
return true;
}
bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
const StoreInst &SI = cast<StoreInst>(U);
-
- if (!TPC->isGlobalISelAbortEnabled() && SI.isAtomic())
- return false;
-
- assert(!SI.isAtomic() && "only non-atomic stores supported at the moment");
auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile
: MachineMemOperand::MONone;
Flags |= MachineMemOperand::MOStore;
unsigned Val = getOrCreateVReg(*SI.getValueOperand());
unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
- LLT VTy{*SI.getValueOperand()->getType(), *DL},
- PTy{*SI.getPointerOperand()->getType(), *DL};
MIRBuilder.buildStore(
Val, Addr,
*MF->getMachineMemOperand(
MachinePointerInfo(SI.getPointerOperand()), Flags,
DL->getTypeStoreSize(SI.getValueOperand()->getType()),
- getMemOpAlignment(SI)));
+ getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSynchScope(),
+ SI.getOrdering()));
return true;
}
@@ -305,7 +355,7 @@ bool IRTranslator::translateExtractValue(const User &U,
uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
unsigned Res = getOrCreateVReg(U);
- MIRBuilder.buildExtract(Res, Offset, getOrCreateVReg(*Src));
+ MIRBuilder.buildExtract(Res, getOrCreateVReg(*Src), Offset);
return true;
}
@@ -348,12 +398,18 @@ bool IRTranslator::translateSelect(const User &U,
bool IRTranslator::translateBitCast(const User &U,
MachineIRBuilder &MIRBuilder) {
- if (LLT{*U.getOperand(0)->getType(), *DL} == LLT{*U.getType(), *DL}) {
+ // If we're bitcasting to the source type, we can reuse the source vreg.
+ if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
+ getLLTForType(*U.getType(), *DL)) {
+ // Get the source vreg now, to avoid invalidating ValToVReg.
+ unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
unsigned &Reg = ValToVReg[&U];
+ // If we already assigned a vreg for this bitcast, we can't change that.
+ // Emit a copy to satisfy the users we already emitted.
if (Reg)
- MIRBuilder.buildCopy(Reg, getOrCreateVReg(*U.getOperand(0)));
+ MIRBuilder.buildCopy(Reg, SrcReg);
else
- Reg = getOrCreateVReg(*U.getOperand(0));
+ Reg = SrcReg;
return true;
}
return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
@@ -375,9 +431,10 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Value &Op0 = *U.getOperand(0);
unsigned BaseReg = getOrCreateVReg(Op0);
- LLT PtrTy{*Op0.getType(), *DL};
- unsigned PtrSize = DL->getPointerSizeInBits(PtrTy.getAddressSpace());
- LLT OffsetTy = LLT::scalar(PtrSize);
+ Type *PtrIRTy = Op0.getType();
+ LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+ Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
int64_t Offset = 0;
for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
@@ -399,8 +456,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
- unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ unsigned OffsetReg =
+ getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
BaseReg = NewBaseReg;
@@ -408,8 +465,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
// N = N + Idx * ElementSize;
- unsigned ElementSizeReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildConstant(ElementSizeReg, ElementSize);
+ unsigned ElementSizeReg =
+ getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));
unsigned IdxReg = getOrCreateVReg(*Idx);
if (MRI->getType(IdxReg) != OffsetTy) {
@@ -428,8 +485,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
return true;
}
@@ -438,13 +494,12 @@ bool IRTranslator::translateGetElementPtr(const User &U,
return true;
}
-bool IRTranslator::translateMemcpy(const CallInst &CI,
- MachineIRBuilder &MIRBuilder) {
- LLT SizeTy{*CI.getArgOperand(2)->getType(), *DL};
- if (cast<PointerType>(CI.getArgOperand(0)->getType())->getAddressSpace() !=
- 0 ||
- cast<PointerType>(CI.getArgOperand(1)->getType())->getAddressSpace() !=
- 0 ||
+bool IRTranslator::translateMemfunc(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder,
+ unsigned ID) {
+ LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
+ Type *DstTy = CI.getArgOperand(0)->getType();
+ if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
return false;
@@ -454,14 +509,32 @@ bool IRTranslator::translateMemcpy(const CallInst &CI,
Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
}
- MachineOperand Callee = MachineOperand::CreateES("memcpy");
+ const char *Callee;
+ switch (ID) {
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy: {
+ Type *SrcTy = CI.getArgOperand(1)->getType();
+ if(cast<PointerType>(SrcTy)->getAddressSpace() != 0)
+ return false;
+ Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove";
+ break;
+ }
+ case Intrinsic::memset:
+ Callee = "memset";
+ break;
+ default:
+ return false;
+ }
- return CLI->lowerCall(MIRBuilder, Callee,
+ return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
+ MachineOperand::CreateES(Callee),
CallLowering::ArgInfo(0, CI.getType()), Args);
}
void IRTranslator::getStackGuard(unsigned DstReg,
MachineIRBuilder &MIRBuilder) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD);
MIB.addDef(DstReg);
@@ -482,7 +555,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder) {
- LLT Ty{*CI.getOperand(0)->getType(), *DL};
+ LLT Ty = getLLTForType(*CI.getOperand(0)->getType(), *DL);
LLT s1 = LLT::scalar(1);
unsigned Width = Ty.getSizeInBits();
unsigned Res = MRI->createGenericVirtualRegister(Ty);
@@ -494,8 +567,8 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
.addUse(getOrCreateVReg(*CI.getOperand(1)));
if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
- unsigned Zero = MRI->createGenericVirtualRegister(s1);
- EntryBuilder.buildConstant(Zero, 0);
+ unsigned Zero = getOrCreateVReg(
+ *Constant::getNullValue(Type::getInt1Ty(CI.getContext())));
MIB.addUse(Zero);
}
@@ -508,12 +581,83 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
switch (ID) {
default:
break;
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- // FIXME: these obviously need to be supported properly.
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // Stack coloring is not enabled in O0 (which we care about now) so we can
+ // drop these. Make sure someone notices when we start compiling at higher
+ // opts though.
+ if (MF->getTarget().getOptLevel() != CodeGenOpt::None)
+ return false;
+ return true;
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
+ assert(DI.getVariable() && "Missing variable");
+
+ const Value *Address = DI.getAddress();
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return true;
+ }
+
+ assert(DI.getVariable()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ auto AI = dyn_cast<AllocaInst>(Address);
+ if (AI && AI->isStaticAlloca()) {
+ // Static allocas are tracked at the MF level, no need for DBG_VALUE
+ // instructions (in fact, they get ignored if they *do* exist).
+ MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
+ getOrCreateFrameIndex(*AI), DI.getDebugLoc());
+ } else
+ MIRBuilder.buildDirectDbgValue(getOrCreateVReg(*Address),
+ DI.getVariable(), DI.getExpression());
+ return true;
+ }
+ case Intrinsic::vaend:
+ // No target I know of cares about va_end. Certainly no in-tree target
+ // does. Simplest intrinsic ever!
return true;
+ case Intrinsic::vastart: {
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ Value *Ptr = CI.getArgOperand(0);
+ unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
+
+ MIRBuilder.buildInstr(TargetOpcode::G_VASTART)
+ .addUse(getOrCreateVReg(*Ptr))
+ .addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0));
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst &DI = cast<DbgValueInst>(CI);
+ const Value *V = DI.getValue();
+ assert(DI.getVariable()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ MIRBuilder.buildIndirectDbgValue(0, DI.getOffset(), DI.getVariable(),
+ DI.getExpression());
+ } else if (const auto *CI = dyn_cast<Constant>(V)) {
+ MIRBuilder.buildConstDbgValue(*CI, DI.getOffset(), DI.getVariable(),
+ DI.getExpression());
+ } else {
+ unsigned Reg = getOrCreateVReg(*V);
+ // FIXME: This does not handle register-indirect values at offset 0. The
+ // direct/indirect thing shouldn't really be handled by something as
+ // implicit as reg+noreg vs reg+imm in the first palce, but it seems
+ // pretty baked in right now.
+ if (DI.getOffset() != 0)
+ MIRBuilder.buildIndirectDbgValue(Reg, DI.getOffset(), DI.getVariable(),
+ DI.getExpression());
+ else
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(),
+ DI.getExpression());
+ }
+ return true;
+ }
case Intrinsic::uadd_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder);
case Intrinsic::sadd_with_overflow:
@@ -526,8 +670,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
case Intrinsic::smul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
+ case Intrinsic::pow:
+ MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
+ return true;
case Intrinsic::memcpy:
- return translateMemcpy(CI, MIRBuilder);
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ return translateMemfunc(CI, MIRBuilder, ID);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
unsigned Reg = getOrCreateVReg(CI);
@@ -546,7 +698,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
case Intrinsic::stackprotector: {
- LLT PtrTy{*CI.getArgOperand(0)->getType(), *DL};
+ LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
getStackGuard(GuardVal, MIRBuilder);
@@ -564,18 +716,41 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return false;
}
+bool IRTranslator::translateInlineAsm(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder) {
+ const InlineAsm &IA = cast<InlineAsm>(*CI.getCalledValue());
+ if (!IA.getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA.hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA.getDialect() == InlineAsm::AD_Intel)
+ ExtraInfo |= InlineAsm::Extra_AsmDialect;
+
+ MIRBuilder.buildInstr(TargetOpcode::INLINEASM)
+ .addExternalSymbol(IA.getAsmString().c_str())
+ .addImm(ExtraInfo);
+
+ return true;
+}
+
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const CallInst &CI = cast<CallInst>(U);
auto TII = MF->getTarget().getIntrinsicInfo();
const Function *F = CI.getCalledFunction();
+ if (CI.isInlineAsm())
+ return translateInlineAsm(CI, MIRBuilder);
+
if (!F || !F->isIntrinsic()) {
unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
SmallVector<unsigned, 8> Args;
for (auto &Arg: CI.arg_operands())
Args.push_back(getOrCreateVReg(*Arg));
- return CLI->lowerCall(MIRBuilder, CI, Res, Args, [&]() {
+ MF->getFrameInfo().setHasCalls(true);
+ return CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
return getOrCreateVReg(*CI.getCalledValue());
});
}
@@ -594,10 +769,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
for (auto &Arg : CI.arg_operands()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg))
- MIB.addImm(CI->getSExtValue());
- else
- MIB.addUse(getOrCreateVReg(*Arg));
+ // Some intrinsics take metadata parameters. Reject them.
+ if (isa<MetadataAsValue>(Arg))
+ return false;
+ MIB.addUse(getOrCreateVReg(*Arg));
}
return true;
}
@@ -610,7 +785,7 @@ bool IRTranslator::translateInvoke(const User &U,
const BasicBlock *ReturnBB = I.getSuccessor(0);
const BasicBlock *EHPadBB = I.getSuccessor(1);
- const Value *Callee(I.getCalledValue());
+ const Value *Callee = I.getCalledValue();
const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
return false;
@@ -634,23 +809,24 @@ bool IRTranslator::translateInvoke(const User &U,
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I);
- SmallVector<CallLowering::ArgInfo, 8> Args;
+ SmallVector<unsigned, 8> Args;
for (auto &Arg: I.arg_operands())
- Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+ Args.push_back(getOrCreateVReg(*Arg));
- if (!CLI->lowerCall(MIRBuilder, MachineOperand::CreateGA(Fn, 0),
- CallLowering::ArgInfo(Res, I.getType()), Args))
+ if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
+ [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
return false;
MCSymbol *EndSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
// FIXME: track probabilities.
- MachineBasicBlock &EHPadMBB = getOrCreateBB(*EHPadBB),
- &ReturnMBB = getOrCreateBB(*ReturnBB);
+ MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
+ &ReturnMBB = getMBB(*ReturnBB);
MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
+ MIRBuilder.buildBr(ReturnMBB);
return true;
}
@@ -684,37 +860,158 @@ bool IRTranslator::translateLandingPad(const User &U,
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
.addSym(MF->addLandingPad(&MBB));
+ LLT Ty = getLLTForType(*LP.getType(), *DL);
+ unsigned Undef = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Undef);
+
+ SmallVector<LLT, 2> Tys;
+ for (Type *Ty : cast<StructType>(LP.getType())->elements())
+ Tys.push_back(getLLTForType(*Ty, *DL));
+ assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
+
// Mark exception register as live in.
- SmallVector<unsigned, 2> Regs;
- SmallVector<uint64_t, 2> Offsets;
- LLT p0 = LLT::pointer(0, DL->getPointerSizeInBits());
- if (unsigned Reg = TLI.getExceptionPointerRegister(PersonalityFn)) {
- unsigned VReg = MRI->createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(VReg, Reg);
- Regs.push_back(VReg);
- Offsets.push_back(0);
+ unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
+ if (!ExceptionReg)
+ return false;
+
+ MBB.addLiveIn(ExceptionReg);
+ unsigned VReg = MRI->createGenericVirtualRegister(Tys[0]),
+ Tmp = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(VReg, ExceptionReg);
+ MIRBuilder.buildInsert(Tmp, Undef, VReg, 0);
+
+ unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
+ if (!SelectorReg)
+ return false;
+
+ MBB.addLiveIn(SelectorReg);
+
+ // N.b. the exception selector register always has pointer type and may not
+ // match the actual IR-level type in the landingpad so an extra cast is
+ // needed.
+ unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
+ MIRBuilder.buildCopy(PtrVReg, SelectorReg);
+
+ VReg = MRI->createGenericVirtualRegister(Tys[1]);
+ MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(VReg).addUse(PtrVReg);
+ MIRBuilder.buildInsert(getOrCreateVReg(LP), Tmp, VReg,
+ Tys[0].getSizeInBits());
+ return true;
+}
+
+bool IRTranslator::translateAlloca(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ auto &AI = cast<AllocaInst>(U);
+
+ if (AI.isStaticAlloca()) {
+ unsigned Res = getOrCreateVReg(AI);
+ int FI = getOrCreateFrameIndex(AI);
+ MIRBuilder.buildFrameIndex(Res, FI);
+ return true;
+ }
+
+ // Now we're in the harder dynamic case.
+ Type *Ty = AI.getAllocatedType();
+ unsigned Align =
+ std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment());
+
+ unsigned NumElts = getOrCreateVReg(*AI.getArraySize());
+
+ Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
+ LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
+ if (MRI->getType(NumElts) != IntPtrTy) {
+ unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
+ NumElts = ExtElts;
}
- if (unsigned Reg = TLI.getExceptionSelectorRegister(PersonalityFn)) {
- unsigned VReg = MRI->createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(VReg, Reg);
- Regs.push_back(VReg);
- Offsets.push_back(p0.getSizeInBits());
+ unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
+ unsigned TySize =
+ getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
+ MIRBuilder.buildMul(AllocSize, NumElts, TySize);
+
+ LLT PtrTy = getLLTForType(*AI.getType(), *DL);
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+
+ unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildCopy(SPTmp, SPReg);
+
+ unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
+
+ // Handle alignment. We have to realign if the allocation granule was smaller
+ // than stack alignment, or the specific alloca requires more than stack
+ // alignment.
+ unsigned StackAlign =
+ MF->getSubtarget().getFrameLowering()->getStackAlignment();
+ Align = std::max(Align, StackAlign);
+ if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) {
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
+ AllocTmp = AlignedAlloc;
}
- MIRBuilder.buildSequence(getOrCreateVReg(LP), Regs, Offsets);
+ MIRBuilder.buildCopy(SPReg, AllocTmp);
+ MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp);
+
+ MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
+ assert(MF->getFrameInfo().hasVarSizedObjects());
return true;
}
-bool IRTranslator::translateStaticAlloca(const AllocaInst &AI,
- MachineIRBuilder &MIRBuilder) {
- if (!TPC->isGlobalISelAbortEnabled() && !AI.isStaticAlloca())
- return false;
+bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
+ // FIXME: We may need more info about the type. Because of how LLT works,
+ // we're completely discarding the i64/double distinction here (amongst
+ // others). Fortunately the ABIs I know of where that matters don't use va_arg
+ // anyway but that's not guaranteed.
+ MIRBuilder.buildInstr(TargetOpcode::G_VAARG)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(0)))
+ .addImm(DL->getABITypeAlignment(U.getType()));
+ return true;
+}
- assert(AI.isStaticAlloca() && "only handle static allocas now");
- unsigned Res = getOrCreateVReg(AI);
- int FI = getOrCreateFrameIndex(AI);
- MIRBuilder.buildFrameIndex(Res, FI);
+bool IRTranslator::translateInsertElement(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // If it is a <1 x Ty> vector, use the scalar as it is
+ // not a legal vector type in LLT.
+ if (U.getType()->getVectorNumElements() == 1) {
+ unsigned Elt = getOrCreateVReg(*U.getOperand(1));
+ ValToVReg[&U] = Elt;
+ return true;
+ }
+ MIRBuilder.buildInsertVectorElement(
+ getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
+ getOrCreateVReg(*U.getOperand(1)), getOrCreateVReg(*U.getOperand(2)));
+ return true;
+}
+
+bool IRTranslator::translateExtractElement(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // If it is a <1 x Ty> vector, use the scalar as it is
+ // not a legal vector type in LLT.
+ if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
+ unsigned Elt = getOrCreateVReg(*U.getOperand(0));
+ ValToVReg[&U] = Elt;
+ return true;
+ }
+ MIRBuilder.buildExtractVectorElement(getOrCreateVReg(U),
+ getOrCreateVReg(*U.getOperand(0)),
+ getOrCreateVReg(*U.getOperand(1)));
+ return true;
+}
+
+bool IRTranslator::translateShuffleVector(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(0)))
+ .addUse(getOrCreateVReg(*U.getOperand(1)))
+ .addUse(getOrCreateVReg(*U.getOperand(2)));
return true;
}
@@ -736,11 +1033,21 @@ void IRTranslator::finishPendingPhis() {
// won't create extra control flow here, otherwise we need to find the
// dominating predecessor here (or perhaps force the weirder IRTranslators
// to provide a simple boundary).
+ SmallSet<const BasicBlock *, 4> HandledPreds;
+
for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
- assert(BBToMBB[PI->getIncomingBlock(i)]->isSuccessor(MIB->getParent()) &&
- "I appear to have misunderstood Machine PHIs");
- MIB.addUse(getOrCreateVReg(*PI->getIncomingValue(i)));
- MIB.addMBB(BBToMBB[PI->getIncomingBlock(i)]);
+ auto IRPred = PI->getIncomingBlock(i);
+ if (HandledPreds.count(IRPred))
+ continue;
+
+ HandledPreds.insert(IRPred);
+ unsigned ValReg = getOrCreateVReg(*PI->getIncomingValue(i));
+ for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
+ assert(Pred->isSuccessor(MIB->getParent()) &&
+ "incorrect CFG at MachineBasicBlock level");
+ MIB.addUse(ValReg);
+ MIB.addMBB(Pred);
+ }
}
}
}
@@ -752,9 +1059,7 @@ bool IRTranslator::translate(const Instruction &Inst) {
case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
#include "llvm/IR/Instruction.def"
default:
- if (!TPC->isGlobalISelAbortEnabled())
- return false;
- llvm_unreachable("unknown opcode");
+ return false;
}
}
@@ -764,25 +1069,43 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
else if (auto CF = dyn_cast<ConstantFP>(&C))
EntryBuilder.buildFConstant(Reg, *CF);
else if (isa<UndefValue>(C))
- EntryBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Reg);
+ EntryBuilder.buildUndef(Reg);
else if (isa<ConstantPointerNull>(C))
EntryBuilder.buildConstant(Reg, 0);
else if (auto GV = dyn_cast<GlobalValue>(&C))
EntryBuilder.buildGlobalValue(Reg, GV);
- else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
+ else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
+ if (!CAZ->getType()->isVectorTy())
+ return false;
+ // Return the scalar if it is a <1 x Ty> vector.
+ if (CAZ->getNumElements() == 1)
+ return translate(*CAZ->getElementValue(0u), Reg);
+ std::vector<unsigned> Ops;
+ for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
+ Constant &Elt = *CAZ->getElementValue(i);
+ Ops.push_back(getOrCreateVReg(Elt));
+ }
+ EntryBuilder.buildMerge(Reg, Ops);
+ } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
+ // Return the scalar if it is a <1 x Ty> vector.
+ if (CV->getNumElements() == 1)
+ return translate(*CV->getElementAsConstant(0), Reg);
+ std::vector<unsigned> Ops;
+ for (unsigned i = 0; i < CV->getNumElements(); ++i) {
+ Constant &Elt = *CV->getElementAsConstant(i);
+ Ops.push_back(getOrCreateVReg(Elt));
+ }
+ EntryBuilder.buildMerge(Reg, Ops);
+ } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
switch(CE->getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
#include "llvm/IR/Instruction.def"
default:
- if (!TPC->isGlobalISelAbortEnabled())
- return false;
- llvm_unreachable("unknown opcode");
+ return false;
}
- } else if (!TPC->isGlobalISelAbortEnabled())
+ } else
return false;
- else
- llvm_unreachable("unhandled constant kind");
return true;
}
@@ -793,7 +1116,7 @@ void IRTranslator::finalizeFunction() {
PendingPHIs.clear();
ValToVReg.clear();
FrameIndices.clear();
- Constants.clear();
+ MachinePreds.clear();
}
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
@@ -807,85 +1130,101 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
TPC = &getAnalysis<TargetPassConfig>();
+ ORE = make_unique<OptimizationRemarkEmitter>(&F);
assert(PendingPHIs.empty() && "stale PHIs");
- // Setup a separate basic-block for the arguments and constants, falling
- // through to the IR-level Function's entry block.
+ // Release the per-function state when we return, whether we succeeded or not.
+ auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
+
+ // Setup a separate basic-block for the arguments and constants
MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
MF->push_back(EntryBB);
- EntryBB->addSuccessor(&getOrCreateBB(F.front()));
EntryBuilder.setMBB(*EntryBB);
+ // Create all blocks, in IR order, to preserve the layout.
+ for (const BasicBlock &BB: F) {
+ auto *&MBB = BBToMBB[&BB];
+
+ MBB = MF->CreateMachineBasicBlock(&BB);
+ MF->push_back(MBB);
+
+ if (BB.hasAddressTaken())
+ MBB->setHasAddressTaken();
+ }
+
+ // Make our arguments/constants entry block fallthrough to the IR entry block.
+ EntryBB->addSuccessor(&getMBB(F.front()));
+
// Lower the actual args into this basic block.
SmallVector<unsigned, 8> VRegArgs;
for (const Argument &Arg: F.args())
VRegArgs.push_back(getOrCreateVReg(Arg));
- bool Succeeded = CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs);
- if (!Succeeded) {
- if (!TPC->isGlobalISelAbortEnabled()) {
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- finalizeFunction();
- return false;
- }
- report_fatal_error("Unable to lower arguments");
+ if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ MF->getFunction()->getSubprogram(),
+ &MF->getFunction()->getEntryBlock());
+ R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
}
// And translate the function!
for (const BasicBlock &BB: F) {
- MachineBasicBlock &MBB = getOrCreateBB(BB);
+ MachineBasicBlock &MBB = getMBB(BB);
// Set the insertion point of all the following translations to
// the end of this basic block.
CurBuilder.setMBB(MBB);
for (const Instruction &Inst: BB) {
- Succeeded &= translate(Inst);
- if (!Succeeded) {
- if (TPC->isGlobalISelAbortEnabled())
- reportTranslationError(Inst, "unable to translate instruction");
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- break;
- }
- }
- }
-
- if (Succeeded) {
- finishPendingPhis();
-
- // Now that the MachineFrameInfo has been configured, no further changes to
- // the reserved registers are possible.
- MRI->freezeReservedRegs(*MF);
-
- // Merge the argument lowering and constants block with its single
- // successor, the LLVM-IR entry block. We want the basic block to
- // be maximal.
- assert(EntryBB->succ_size() == 1 &&
- "Custom BB used for lowering should have only one successor");
- // Get the successor of the current entry block.
- MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
- assert(NewEntryBB.pred_size() == 1 &&
- "LLVM-IR entry block has a predecessor!?");
- // Move all the instruction from the current entry block to the
- // new entry block.
- NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
- EntryBB->end());
-
- // Update the live-in information for the new entry block.
- for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
- NewEntryBB.addLiveIn(LiveIn);
- NewEntryBB.sortUniqueLiveIns();
+ if (translate(Inst))
+ continue;
- // Get rid of the now empty basic block.
- EntryBB->removeSuccessor(&NewEntryBB);
- MF->remove(EntryBB);
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << Inst;
- assert(&MF->front() == &NewEntryBB &&
- "New entry wasn't next in the list of basic block!");
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ Inst.getDebugLoc(), &BB);
+ R << "unable to translate instruction: " << ore::NV("Opcode", &Inst)
+ << ": '" << InstStr.str() << "'";
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
+ }
}
- finalizeFunction();
+ finishPendingPhis();
+
+ // Now that the MachineFrameInfo has been configured, no further changes to
+ // the reserved registers are possible.
+ MRI->freezeReservedRegs(*MF);
+
+ // Merge the argument lowering and constants block with its single
+ // successor, the LLVM-IR entry block. We want the basic block to
+ // be maximal.
+ assert(EntryBB->succ_size() == 1 &&
+ "Custom BB used for lowering should have only one successor");
+ // Get the successor of the current entry block.
+ MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
+ assert(NewEntryBB.pred_size() == 1 &&
+ "LLVM-IR entry block has a predecessor!?");
+ // Move all the instruction from the current entry block to the
+ // new entry block.
+ NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
+ EntryBB->end());
+
+ // Update the live-in information for the new entry block.
+ for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
+ NewEntryBB.addLiveIn(LiveIn);
+ NewEntryBB.sortUniqueLiveIns();
+
+ // Get rid of the now empty basic block.
+ EntryBB->removeSuccessor(&NewEntryBB);
+ MF->remove(EntryBB);
+ MF->DeleteMachineBasicBlock(EntryBB);
+
+ assert(&MF->front() == &NewEntryBB &&
+ "New entry wasn't next in the list of basic block!");
return false;
}
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 1d205cd6c9c8b..26454c1ef00f9 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -12,11 +12,15 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -44,17 +48,14 @@ void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-static void reportSelectionError(const MachineInstr *MI, const Twine &Message) {
- const MachineFunction &MF = *MI->getParent()->getParent();
- std::string ErrStorage;
- raw_string_ostream Err(ErrStorage);
- Err << Message << ":\nIn function: " << MF.getName() << '\n';
- if (MI)
- Err << *MI << '\n';
- report_fatal_error(Err.str());
-}
-
bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // No matter what happens, whether we successfully select the function or not,
+ // nothing is going to use the vreg types after us. Make sure they disappear.
+ auto ClearVRegTypesOnReturn =
+ make_scope_exit([&]() { MRI.getVRegToType().clear(); });
+
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
@@ -66,11 +67,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
assert(ISel && "Cannot work without InstructionSelector");
+ // An optimization remark emitter. Used to report failures.
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+
// FIXME: freezeReservedRegs is now done in IRTranslator, but there are many
// other MF/MFI fields we need to initialize.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
#ifndef NDEBUG
// Check that our input is fully legal: we require the function to have the
// Legalized property, so it should be.
@@ -80,17 +82,19 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// that it has the same layering problem, but we only use inline methods so
// end up not needing to link against the GlobalISel library.
if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo())
- for (const MachineBasicBlock &MBB : MF)
- for (const MachineInstr &MI : MBB)
- if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
- reportSelectionError(&MI, "Instruction is not legal");
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "instruction is not legal", MI);
+ return false;
+ }
#endif
// FIXME: We could introduce new blocks and will need to fix the outer loop.
// Until then, keep track of the number of blocks to assert that we don't.
const size_t NumBlocks = MF.size();
- bool Failed = false;
for (MachineBasicBlock *MBB : post_order(&MF)) {
if (MBB->empty())
continue;
@@ -115,14 +119,19 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Selecting: \n " << MI);
+ // We could have folded this instruction away already, making it dead.
+ // If so, erase it.
+ if (isTriviallyDead(MI, MRI)) {
+ DEBUG(dbgs() << "Is dead; erasing.\n");
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ continue;
+ }
+
if (!ISel->select(MI)) {
- if (TPC.isGlobalISelAbortEnabled())
- // FIXME: It would be nice to dump all inserted instructions. It's
- // not
- // obvious how, esp. considering select() can insert after MI.
- reportSelectionError(&MI, "Cannot select");
- Failed = true;
- break;
+ // FIXME: It would be nice to dump all inserted instructions. It's
+ // not obvious how, esp. considering select() can insert after MI.
+ reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI);
+ return false;
}
// Dump the range of instructions that MI expanded into.
@@ -142,33 +151,36 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
for (auto &VRegToType : MRI.getVRegToType()) {
unsigned VReg = VRegToType.first;
auto *RC = MRI.getRegClassOrNull(VReg);
- auto *MI = MRI.def_instr_begin(VReg) == MRI.def_instr_end()
- ? nullptr
- : &*MRI.def_instr_begin(VReg);
- if (!RC) {
- if (TPC.isGlobalISelAbortEnabled())
- reportSelectionError(MI, "VReg as no regclass after selection");
- Failed = true;
- break;
- }
+ MachineInstr *MI = nullptr;
+ if (!MRI.def_empty(VReg))
+ MI = &*MRI.def_instr_begin(VReg);
+ else if (!MRI.use_empty(VReg))
+ MI = &*MRI.use_instr_begin(VReg);
+
+ if (MI && !RC) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "VReg has no regclass after selection", *MI);
+ return false;
+ } else if (!RC)
+ continue;
if (VRegToType.second.isValid() &&
VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) {
- if (TPC.isGlobalISelAbortEnabled())
- reportSelectionError(
- MI, "VReg has explicit size different from class size");
- Failed = true;
- break;
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "VReg has explicit size different from class size",
+ *MI);
+ return false;
}
}
- MRI.getVRegToType().clear();
-
- if (!TPC.isGlobalISelAbortEnabled() && (Failed || MF.size() != NumBlocks)) {
- MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ if (MF.size() != NumBlocks) {
+ MachineOptimizationRemarkMissed R("gisel-select", "GISelFailure",
+ MF.getFunction()->getSubprogram(),
+ /*MBB=*/nullptr);
+ R << "inserting blocks is not supported yet";
+ reportGISelFailure(MF, TPC, MORE, R);
return false;
}
- assert(MF.size() == NumBlocks && "Inserting blocks is not supported yet");
// FIXME: Should we accurately track changes?
return true;
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 5c34da0dc5579..fb9d01ef8542a 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -14,6 +14,8 @@
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -55,6 +57,45 @@ bool InstructionSelector::constrainSelectedInstRegOperands(
// constrainOperandRegClass does that for us.
MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
Reg, OpI));
+
+ // Tie uses to defs as indicated in MCInstrDesc.
+ if (MO.isUse()) {
+ int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ I.tieOperands(DefIdx, OpI);
+ }
}
return true;
}
+
+Optional<int64_t>
+InstructionSelector::getConstantVRegVal(unsigned VReg,
+ const MachineRegisterInfo &MRI) const {
+ MachineInstr *MI = MRI.getVRegDef(VReg);
+ if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return None;
+
+ if (MI->getOperand(1).isImm())
+ return MI->getOperand(1).getImm();
+
+ if (MI->getOperand(1).isCImm() &&
+ MI->getOperand(1).getCImm()->getBitWidth() <= 64)
+ return MI->getOperand(1).getCImm()->getSExtValue();
+
+ return None;
+}
+
+bool InstructionSelector::isOperandImmEqual(
+ const MachineOperand &MO, int64_t Value,
+ const MachineRegisterInfo &MRI) const {
+
+ if (MO.getReg())
+ if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
+ return *VRegVal == Value;
+ return false;
+}
+
+bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const {
+ return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
+ MI.implicit_operands().begin() == MI.implicit_operands().end();
+}
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index e86356880e99f..657ddb3079195 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -16,6 +16,8 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
@@ -92,10 +94,7 @@ bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
"unexpected physical register in G_SEQUENCE");
// Finally we can replace the uses.
- for (auto &Use : MRI.use_operands(ExtractReg)) {
- Changed = true;
- Use.setReg(OrigReg);
- }
+ MRI.replaceRegWith(ExtractReg, OrigReg);
}
if (AllDefsReplaced) {
@@ -114,6 +113,36 @@ bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
return Changed;
}
+bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII) {
+ if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+ return false;
+
+ unsigned NumDefs = MI.getNumOperands() - 1;
+ unsigned SrcReg = MI.getOperand(NumDefs).getReg();
+ MachineInstr &MergeI = *MRI.def_instr_begin(SrcReg);
+ if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES)
+ return false;
+
+ if (MergeI.getNumOperands() - 1 != NumDefs)
+ return false;
+
+ // FIXME: is a COPY appropriate if the types mismatch? We know both registers
+ // are allocatable by now.
+ if (MRI.getType(MI.getOperand(0).getReg()) !=
+ MRI.getType(MergeI.getOperand(1).getReg()))
+ return false;
+
+ for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
+ MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
+ MergeI.getOperand(Idx + 1).getReg());
+
+ MI.eraseFromParent();
+ if (MRI.use_empty(MergeI.getOperand(0).getReg()))
+ MergeI.eraseFromParent();
+ return true;
+}
+
bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
@@ -122,7 +151,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
init(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
- const LegalizerInfo &LegalizerInfo = *MF.getSubtarget().getLegalizerInfo();
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
LegalizerHelper Helper(MF);
// FIXME: an instruction may need more than one pass before it is legal. For
@@ -142,27 +171,33 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// and are assumed to be legal.
if (!isPreISelGenericOpcode(MI->getOpcode()))
continue;
-
- auto Res = Helper.legalizeInstr(*MI, LegalizerInfo);
-
- // Error out if we couldn't legalize this instruction. We may want to fall
- // back to DAG ISel instead in the future.
- if (Res == LegalizerHelper::UnableToLegalize) {
- if (!TPC.isGlobalISelAbortEnabled()) {
- MF.getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- return false;
+ SmallVector<MachineInstr *, 4> WorkList;
+ Helper.MIRBuilder.recordInsertions(
+ [&](MachineInstr *MI) { WorkList.push_back(MI); });
+ WorkList.push_back(&*MI);
+
+ LegalizerHelper::LegalizeResult Res;
+ unsigned Idx = 0;
+ do {
+ Res = Helper.legalizeInstrStep(*WorkList[Idx]);
+ // Error out if we couldn't legalize this instruction. We may want to
+ // fall
+ // back to DAG ISel instead in the future.
+ if (Res == LegalizerHelper::UnableToLegalize) {
+ Helper.MIRBuilder.stopRecordingInsertions();
+ if (Res == LegalizerHelper::UnableToLegalize) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+ "unable to legalize instruction",
+ *WorkList[Idx]);
+ return false;
+ }
}
- std::string Msg;
- raw_string_ostream OS(Msg);
- OS << "unable to legalize instruction: ";
- MI->print(OS);
- report_fatal_error(OS.str());
- }
-
- Changed |= Res == LegalizerHelper::Legalized;
- }
+ Changed |= Res == LegalizerHelper::Legalized;
+ ++Idx;
+ } while (Idx < WorkList.size());
+ Helper.MIRBuilder.stopRecordingInsertions();
+ }
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -173,6 +208,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
NextMI = std::next(MI);
Changed |= combineExtracts(*MI, MRI, TII);
+ Changed |= combineMerges(*MI, MRI, TII);
}
}
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index eb25b6ca268f7..20358f7ee6c2e 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -29,14 +29,13 @@
using namespace llvm;
LegalizerHelper::LegalizerHelper(MachineFunction &MF)
- : MRI(MF.getRegInfo()) {
+ : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) {
MIRBuilder.setMF(MF);
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
- const LegalizerInfo &LegalizerInfo) {
- auto Action = LegalizerInfo.getAction(MI, MRI);
+LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
+ auto Action = LI.getAction(MI, MRI);
switch (std::get<0>(Action)) {
case LegalizerInfo::Legal:
return AlreadyLegal;
@@ -50,46 +49,32 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
return lower(MI, std::get<1>(Action), std::get<2>(Action));
case LegalizerInfo::FewerElements:
return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
+ case LegalizerInfo::Custom:
+ return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
+ : UnableToLegalize;
default:
return UnableToLegalize;
}
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::legalizeInstr(MachineInstr &MI,
- const LegalizerInfo &LegalizerInfo) {
- SmallVector<MachineInstr *, 4> WorkList;
- MIRBuilder.recordInsertions(
- [&](MachineInstr *MI) { WorkList.push_back(MI); });
- WorkList.push_back(&MI);
-
- bool Changed = false;
- LegalizeResult Res;
- unsigned Idx = 0;
- do {
- Res = legalizeInstrStep(*WorkList[Idx], LegalizerInfo);
- if (Res == UnableToLegalize) {
- MIRBuilder.stopRecordingInsertions();
- return UnableToLegalize;
- }
- Changed |= Res == Legalized;
- ++Idx;
- } while (Idx < WorkList.size());
-
- MIRBuilder.stopRecordingInsertions();
-
- return Changed ? Legalized : AlreadyLegal;
-}
-
void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
SmallVectorImpl<unsigned> &VRegs) {
- unsigned Size = Ty.getSizeInBits();
- SmallVector<uint64_t, 4> Indexes;
- for (int i = 0; i < NumParts; ++i) {
+ for (int i = 0; i < NumParts; ++i)
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- Indexes.push_back(i * Size);
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+}
+
+static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
+ switch (Opcode) {
+ case TargetOpcode::G_FADD:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
+ case TargetOpcode::G_FREM:
+ return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
+ case TargetOpcode::G_FPOW:
+ return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
}
- MIRBuilder.buildExtract(VRegs, Indexes, Reg);
+ llvm_unreachable("Unknown libcall function");
}
LegalizerHelper::LegalizeResult
@@ -101,17 +86,19 @@ LegalizerHelper::libcall(MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM: {
auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- const char *Name =
- TLI.getLibcallName(Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32);
-
+ auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+ const char *Name = TLI.getLibcallName(Libcall);
+ MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
CLI.lowerCall(
- MIRBuilder, MachineOperand::CreateES(Name),
- {MI.getOperand(0).getReg(), Ty},
+ MIRBuilder, TLI.getLibcallCallingConv(Libcall),
+ MachineOperand::CreateES(Name), {MI.getOperand(0).getReg(), Ty},
{{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}});
MI.eraseFromParent();
return Legalized;
@@ -125,19 +112,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// FIXME: Don't know how to handle secondary types yet.
if (TypeIdx != 0)
return UnableToLegalize;
+
+ MIRBuilder.setInstr(MI);
+
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_ADD: {
// Expand in terms of carry-setting/consuming G_ADDE instructions.
- unsigned NarrowSize = NarrowTy.getSizeInBits();
int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
NarrowTy.getSizeInBits();
- MIRBuilder.setInstr(MI);
-
SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
@@ -152,11 +138,138 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Src2Regs[i], CarryIn);
DstRegs.push_back(DstReg);
- Indexes.push_back(i * NarrowSize);
CarryIn = CarryOut;
}
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_INSERT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ int64_t NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+
+ SmallVector<unsigned, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ unsigned OpReg = MI.getOperand(2).getReg();
+ int64_t OpStart = MI.getOperand(3).getImm();
+ int64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstStart = i * NarrowSize;
+
+ if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
+ // No part of the insert affects this subregister, forward the original.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is defined by this insert, forward the new
+ // value.
+ DstRegs.push_back(OpReg);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, InsertOffset, SegSize;
+ if (OpStart < DstStart) {
+ InsertOffset = 0;
+ ExtractOffset = DstStart - OpStart;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
+ } else {
+ InsertOffset = OpStart - DstStart;
+ ExtractOffset = 0;
+ SegSize =
+ std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
+ }
+
+ unsigned SegReg = OpReg;
+ if (ExtractOffset != 0 || SegSize != OpSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
+ }
+
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
+ DstRegs.push_back(DstReg);
+ }
+
+ assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_LOAD: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ LLT NarrowPtrTy = LLT::pointer(
+ MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize);
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ unsigned SrcReg = MRI.createGenericVirtualRegister(NarrowPtrTy);
+ unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64));
+
+ MIRBuilder.buildConstant(Offset, i * NarrowSize / 8);
+ MIRBuilder.buildGEP(SrcReg, MI.getOperand(1).getReg(), Offset);
+ // TODO: This is conservatively correct, but we probably want to split the
+ // memory operands in the future.
+ MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin());
+
+ DstRegs.push_back(DstReg);
+ }
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_STORE: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ LLT NarrowPtrTy = LLT::pointer(
+ MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize);
+
+ SmallVector<unsigned, 2> SrcRegs;
+ extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowPtrTy);
+ unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ MIRBuilder.buildConstant(Offset, i * NarrowSize / 8);
+ MIRBuilder.buildGEP(DstReg, MI.getOperand(1).getReg(), Offset);
+ // TODO: This is conservatively correct, but we probably want to split the
+ // memory operands in the future.
+ MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin());
+ }
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CONSTANT: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ ConstantInt *CI =
+ ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
+ MIRBuilder.buildConstant(DstReg, *CI);
+ DstRegs.push_back(DstReg);
+ }
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -175,7 +288,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
- case TargetOpcode::G_SUB: {
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_SHL: {
// Perform operation at larger width (any extension is fine here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
@@ -195,10 +309,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_SDIV:
- case TargetOpcode::G_UDIV: {
- unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV
- ? TargetOpcode::G_SEXT
- : TargetOpcode::G_ZEXT;
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR: {
+ unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV ||
+ MI.getOpcode() == TargetOpcode::G_ASHR
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ZEXT;
unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse(
@@ -218,6 +335,85 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SELECT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ // Perform operation at larger width (any extension is fine here, high bits
+ // don't affect the result) and then truncate the result back to the
+ // original type.
+ unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy);
+ unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(2).getReg());
+ MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(3).getReg());
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(TargetOpcode::G_SELECT)
+ .addDef(DstExt)
+ .addReg(MI.getOperand(1).getReg())
+ .addUse(Src1Ext)
+ .addUse(Src2Ext);
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(DstExt)
+ .addUse(MI.getOperand(1).getReg());
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ unsigned Src = MI.getOperand(1).getReg();
+ unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+
+ if (MI.getOpcode() == TargetOpcode::G_SITOFP) {
+ MIRBuilder.buildSExt(SrcExt, Src);
+ } else {
+ assert(MI.getOpcode() == TargetOpcode::G_UITOFP && "Unexpected conv op");
+ MIRBuilder.buildZExt(SrcExt, Src);
+ }
+
+ MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(MI.getOperand(0).getReg())
+ .addUse(SrcExt);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_INSERT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned Src = MI.getOperand(1).getReg();
+ unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildAnyExt(SrcExt, Src);
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ auto MIB = MIRBuilder.buildInsert(DstExt, SrcExt, MI.getOperand(2).getReg(),
+ MI.getOperand(3).getImm());
+ for (unsigned OpNum = 4; OpNum < MI.getNumOperands(); OpNum += 2) {
+ MIB.addReg(MI.getOperand(OpNum).getReg());
+ MIB.addImm(MI.getOperand(OpNum + 1).getImm());
+ }
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_LOAD: {
assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
WideTy.getSizeInBits() &&
@@ -231,12 +427,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_STORE: {
- assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
- WideTy.getSizeInBits() &&
- "illegal to increase number of bytes modified by a store");
+ if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
+ WideTy != LLT::scalar(8))
+ return UnableToLegalize;
+
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ auto Content = TLI.getBooleanContents(false, false);
+
+ unsigned ExtOp = TargetOpcode::G_ANYEXT;
+ if (Content == TargetLoweringBase::ZeroOrOneBooleanContent)
+ ExtOp = TargetOpcode::G_ZEXT;
+ else if (Content == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
+ ExtOp = TargetOpcode::G_SEXT;
+ else
+ ExtOp = TargetOpcode::G_ANYEXT;
unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildAnyExt(SrcExt, MI.getOperand(0).getReg());
+ MIRBuilder.buildInstr(ExtOp).addDef(SrcExt).addUse(
+ MI.getOperand(0).getReg());
MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(),
**MI.memoperands_begin());
MI.eraseFromParent();
@@ -315,6 +523,83 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SMULO:
+ case TargetOpcode::G_UMULO: {
+ // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
+ // result.
+ unsigned Res = MI.getOperand(0).getReg();
+ unsigned Overflow = MI.getOperand(1).getReg();
+ unsigned LHS = MI.getOperand(2).getReg();
+ unsigned RHS = MI.getOperand(3).getReg();
+
+ MIRBuilder.buildMul(Res, LHS, RHS);
+
+ unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
+ ? TargetOpcode::G_SMULH
+ : TargetOpcode::G_UMULH;
+
+ unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInstr(Opcode)
+ .addDef(HiPart)
+ .addUse(LHS)
+ .addUse(RHS);
+
+ unsigned Zero = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildConstant(Zero, 0);
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FNEG: {
+ // TODO: Handle vector types once we are able to
+ // represent them.
+ if (Ty.isVector())
+ return UnableToLegalize;
+ unsigned Res = MI.getOperand(0).getReg();
+ Type *ZeroTy;
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ ZeroTy = Type::getHalfTy(Ctx);
+ break;
+ case 32:
+ ZeroTy = Type::getFloatTy(Ctx);
+ break;
+ case 64:
+ ZeroTy = Type::getDoubleTy(Ctx);
+ break;
+ default:
+ llvm_unreachable("unexpected floating-point type");
+ }
+ ConstantFP &ZeroForNegation =
+ *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
+ unsigned Zero = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildFConstant(Zero, ZeroForNegation);
+ MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
+ .addDef(Res)
+ .addUse(Zero)
+ .addUse(MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FSUB: {
+ // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
+ // First, check if G_FNEG is marked as Lower. If so, we may
+ // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
+ if (LI.getAction({G_FNEG, Ty}).first == LegalizerInfo::Lower)
+ return UnableToLegalize;
+ unsigned Res = MI.getOperand(0).getReg();
+ unsigned LHS = MI.getOperand(1).getReg();
+ unsigned RHS = MI.getOperand(2).getReg();
+ unsigned Neg = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
+ MIRBuilder.buildInstr(TargetOpcode::G_FADD)
+ .addDef(Res)
+ .addUse(LHS)
+ .addUse(Neg);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -335,7 +620,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
MIRBuilder.setInstr(MI);
SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
@@ -343,10 +627,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
DstRegs.push_back(DstReg);
- Indexes.push_back(i * NarrowSize);
}
- MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index e49662075ed5e..eaf4056e47eaf 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -41,6 +41,8 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
DefaultActions[TargetOpcode::G_STORE] = NarrowScalar;
DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
+ DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar;
+ DefaultActions[TargetOpcode::G_FNEG] = Lower;
}
void LegalizerInfo::computeTables() {
@@ -71,28 +73,36 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
// These *have* to be implemented for now, they're the fundamental basis of
// how everything else is transformed.
- // Nothing is going to go well with types that aren't a power of 2 yet, so
- // don't even try because we might make things worse.
- if (!isPowerOf2_64(Aspect.Type.getSizeInBits()))
- return std::make_pair(Unsupported, LLT());
-
// FIXME: the long-term plan calls for expansion in terms of load/store (if
// they're not legal).
if (Aspect.Opcode == TargetOpcode::G_SEQUENCE ||
- Aspect.Opcode == TargetOpcode::G_EXTRACT)
+ Aspect.Opcode == TargetOpcode::G_EXTRACT ||
+ Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
+ Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
return std::make_pair(Legal, Aspect.Type);
+ LLT Ty = Aspect.Type;
LegalizeAction Action = findInActions(Aspect);
+ // LegalizerHelper is not able to handle non-power-of-2 types right now, so do
+ // not try to legalize them unless they are marked as Legal or Custom.
+ // FIXME: This is a temporary hack until the general non-power-of-2
+ // legalization works.
+ if (!isPowerOf2_64(Ty.getSizeInBits()) &&
+ !(Action == Legal || Action == Custom))
+ return std::make_pair(Unsupported, LLT());
+
if (Action != NotFound)
return findLegalAction(Aspect, Action);
unsigned Opcode = Aspect.Opcode;
- LLT Ty = Aspect.Type;
if (!Ty.isVector()) {
auto DefaultAction = DefaultActions.find(Aspect.Opcode);
if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal)
return std::make_pair(Legal, Ty);
+ if (DefaultAction != DefaultActions.end() && DefaultAction->second == Lower)
+ return std::make_pair(Lower, Ty);
+
if (DefaultAction == DefaultActions.end() ||
DefaultAction->second != NarrowScalar)
return std::make_pair(Unsupported, LLT());
@@ -160,6 +170,7 @@ LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
case Legal:
case Lower:
case Libcall:
+ case Custom:
return Aspect.Type;
case NarrowScalar: {
return findLegalType(Aspect,
@@ -180,3 +191,9 @@ LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
}
}
}
+
+bool LegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ return false;
+}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index c04f6e4ae897a..8d1a263395a0e 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -54,7 +55,7 @@ void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
void MachineIRBuilder::recordInsertions(
std::function<void(MachineInstr *)> Inserted) {
- InsertedInstr = Inserted;
+ InsertedInstr = std::move(Inserted);
}
void MachineIRBuilder::stopRecordingInsertions() {
@@ -82,6 +83,70 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildDirectDbgValue(
+ unsigned Reg, const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return buildInstr(TargetOpcode::DBG_VALUE)
+ .addReg(Reg, RegState::Debug)
+ .addReg(0, RegState::Debug)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIndirectDbgValue(
+ unsigned Reg, unsigned Offset, const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return buildInstr(TargetOpcode::DBG_VALUE)
+ .addReg(Reg, RegState::Debug)
+ .addImm(Offset)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
+ const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return buildInstr(TargetOpcode::DBG_VALUE)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
+ unsigned Offset,
+ const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ auto MIB = buildInstr(TargetOpcode::DBG_VALUE);
+ if (auto *CI = dyn_cast<ConstantInt>(&C)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getZExtValue());
+ } else if (auto *CFP = dyn_cast<ConstantFP>(&C)) {
+ MIB.addFPImm(CFP);
+ } else {
+ // Insert %noreg if we didn't find a usable constant and had to drop it.
+ MIB.addReg(0U);
+ }
+
+ return MIB.addImm(Offset).addMetadata(Variable).addMetadata(Expr);
+}
+
MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
assert(MRI->getType(Res).isPointer() && "invalid operand type");
return buildInstr(TargetOpcode::G_FRAME_INDEX)
@@ -126,6 +191,17 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
.addUse(Op1);
}
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+ uint32_t NumBits) {
+ assert(MRI->getType(Res).isPointer() &&
+ MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
+
+ return buildInstr(TargetOpcode::G_PTR_MASK)
+ .addDef(Res)
+ .addUse(Op0)
+ .addImm(NumBits);
+}
+
MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
unsigned Op1) {
assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
@@ -152,10 +228,27 @@ MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0,
.addUse(Op1);
}
+MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+ "invalid operand type");
+ assert(MRI->getType(Res) == MRI->getType(Op0) &&
+ MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+ return buildInstr(TargetOpcode::G_AND)
+ .addDef(Res)
+ .addUse(Op0)
+ .addUse(Op1);
+}
+
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
+ return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
+}
+
MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
}
@@ -262,34 +355,56 @@ MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef<unsigned> Results,
- ArrayRef<uint64_t> Indices,
- unsigned Src) {
-#ifndef NDEBUG
- assert(Results.size() == Indices.size() && "inconsistent number of regs");
- assert(!Results.empty() && "invalid trivial extract");
- assert(std::is_sorted(Indices.begin(), Indices.end()) &&
- "extract offsets must be in ascending order");
+MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
+ unsigned Op) {
+ unsigned Opcode = TargetOpcode::COPY;
+ if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
+ Opcode = TargetOpcode::G_ZEXT;
+ else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
+ Opcode = TargetOpcode::G_TRUNC;
- assert(MRI->getType(Src).isValid() && "invalid operand type");
- for (auto Res : Results)
- assert(MRI->getType(Res).isValid() && "invalid operand type");
-#endif
+ return buildInstr(Opcode).addDef(Res).addUse(Op);
+}
- auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT));
- for (auto Res : Results)
- MIB.addDef(Res);
- MIB.addUse(Src);
+MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
+ LLT SrcTy = MRI->getType(Src);
+ LLT DstTy = MRI->getType(Dst);
+ if (SrcTy == DstTy)
+ return buildCopy(Dst, Src);
+
+ unsigned Opcode;
+ if (SrcTy.isPointer() && DstTy.isScalar())
+ Opcode = TargetOpcode::G_PTRTOINT;
+ else if (DstTy.isPointer() && SrcTy.isScalar())
+ Opcode = TargetOpcode::G_INTTOPTR;
+ else {
+ assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
+ Opcode = TargetOpcode::G_BITCAST;
+ }
- for (auto Idx : Indices)
- MIB.addImm(Idx);
+ return buildInstr(Opcode).addDef(Dst).addUse(Src);
+}
- getMBB().insert(getInsertPt(), MIB);
- if (InsertedInstr)
- InsertedInstr(MIB);
+MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
+ uint64_t Index) {
+#ifndef NDEBUG
+ assert(MRI->getType(Src).isValid() && "invalid operand type");
+ assert(MRI->getType(Res).isValid() && "invalid operand type");
+ assert(Index + MRI->getType(Res).getSizeInBits() <=
+ MRI->getType(Src).getSizeInBits() &&
+ "extracting off end of register");
+#endif
- return MIB;
+ if (MRI->getType(Res).getSizeInBits() == MRI->getType(Src).getSizeInBits()) {
+ assert(Index == 0 && "insertion past the end of a register");
+ return buildCast(Res, Src);
+ }
+
+ return buildInstr(TargetOpcode::G_EXTRACT)
+ .addDef(Res)
+ .addUse(Src)
+ .addImm(Index);
}
MachineInstrBuilder
@@ -316,6 +431,64 @@ MachineIRBuilder::buildSequence(unsigned Res,
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) {
+ return buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Res);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
+ ArrayRef<unsigned> Ops) {
+
+#ifndef NDEBUG
+ assert(!Ops.empty() && "invalid trivial sequence");
+ LLT Ty = MRI->getType(Ops[0]);
+ for (auto Reg : Ops)
+ assert(MRI->getType(Reg) == Ty && "type mismatch in input list");
+ assert(Ops.size() * MRI->getType(Ops[0]).getSizeInBits() ==
+ MRI->getType(Res).getSizeInBits() &&
+ "input operands do not cover output register");
+#endif
+
+ MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES);
+ MIB.addDef(Res);
+ for (unsigned i = 0; i < Ops.size(); ++i)
+ MIB.addUse(Ops[i]);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
+ unsigned Op) {
+
+#ifndef NDEBUG
+ assert(!Res.empty() && "invalid trivial sequence");
+ LLT Ty = MRI->getType(Res[0]);
+ for (auto Reg : Res)
+ assert(MRI->getType(Reg) == Ty && "type mismatch in input list");
+ assert(Res.size() * MRI->getType(Res[0]).getSizeInBits() ==
+ MRI->getType(Op).getSizeInBits() &&
+ "input operands do not cover output register");
+#endif
+
+ MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+ for (unsigned i = 0; i < Res.size(); ++i)
+ MIB.addDef(Res[i]);
+ MIB.addUse(Op);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
+ unsigned Op, unsigned Index) {
+ if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) {
+ assert(Index == 0 && "insertion past the end of a register");
+ return buildCast(Res, Op);
+ }
+
+ return buildInstr(TargetOpcode::G_INSERT)
+ .addDef(Res)
+ .addUse(Src)
+ .addUse(Op)
+ .addImm(Index);
+}
+
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
unsigned Res,
bool HasSideEffects) {
@@ -395,9 +568,10 @@ MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
if (ResTy.isScalar() || ResTy.isPointer())
assert(MRI->getType(Tst).isScalar() && "type mismatch");
else
- assert(MRI->getType(Tst).isVector() &&
- MRI->getType(Tst).getNumElements() ==
- MRI->getType(Op0).getNumElements() &&
+ assert((MRI->getType(Tst).isScalar() ||
+ (MRI->getType(Tst).isVector() &&
+ MRI->getType(Tst).getNumElements() ==
+ MRI->getType(Op0).getNumElements())) &&
"type mismatch");
#endif
@@ -408,6 +582,46 @@ MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
.addUse(Op1);
}
+MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res,
+ unsigned Val,
+ unsigned Elt,
+ unsigned Idx) {
+#ifndef NDEBUG
+ LLT ResTy = MRI->getType(Res);
+ LLT ValTy = MRI->getType(Val);
+ LLT EltTy = MRI->getType(Elt);
+ LLT IdxTy = MRI->getType(Idx);
+ assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");
+ assert(EltTy.isScalar() && IdxTy.isScalar() && "invalid operand type");
+ assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch");
+ assert(ResTy.getElementType() == EltTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT)
+ .addDef(Res)
+ .addUse(Val)
+ .addUse(Elt)
+ .addUse(Idx);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,
+ unsigned Val,
+ unsigned Idx) {
+#ifndef NDEBUG
+ LLT ResTy = MRI->getType(Res);
+ LLT ValTy = MRI->getType(Val);
+ LLT IdxTy = MRI->getType(Idx);
+ assert(ValTy.isVector() && "invalid operand type");
+ assert(ResTy.isScalar() && IdxTy.isScalar() && "invalid operand type");
+ assert(ValTy.getElementType() == ResTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT)
+ .addDef(Res)
+ .addUse(Val)
+ .addUse(Idx);
+}
+
void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src,
bool IsExtend) {
#ifndef NDEBUG
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index cc026ef27296a..f935390a8d1bd 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -71,6 +72,7 @@ void RegBankSelect::init(MachineFunction &MF) {
MBPI = nullptr;
}
MIRBuilder.setMF(MF);
+ MORE = make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
}
void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -585,18 +587,12 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
// LegalizerInfo as it's currently in the separate GlobalISel library.
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) {
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
- if (!TPC->isGlobalISelAbortEnabled()) {
- MF.getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- return false;
- }
- std::string ErrStorage;
- raw_string_ostream Err(ErrStorage);
- Err << "Instruction is not legal: " << MI << '\n';
- report_fatal_error(Err.str());
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "instruction is not legal", MI);
+ return false;
}
}
}
@@ -622,9 +618,8 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
if (!assignInstr(MI)) {
- if (TPC->isGlobalISelAbortEnabled())
- report_fatal_error("Unable to map instruction");
- MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "unable to map instruction", MI);
return false;
}
}
@@ -968,10 +963,12 @@ bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const {
LocalFreq == Cost.LocalFreq;
}
-void RegBankSelect::MappingCost::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegBankSelect::MappingCost::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void RegBankSelect::MappingCost::print(raw_ostream &OS) const {
if (*this == ImpossibleCost()) {
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 49d676f11da6a..940957d021524 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -19,10 +19,11 @@ using namespace llvm;
const unsigned RegisterBank::InvalidID = UINT_MAX;
-RegisterBank::RegisterBank(unsigned ID, const char *Name, unsigned Size,
- const uint32_t *CoveredClasses)
+RegisterBank::RegisterBank(
+ unsigned ID, const char *Name, unsigned Size,
+ const uint32_t *CoveredClasses, unsigned NumRegClasses)
: ID(ID), Name(Name), Size(Size) {
- ContainedRegClasses.resize(200);
+ ContainedRegClasses.resize(NumRegClasses);
ContainedRegClasses.setBitsInMask(CoveredClasses);
}
@@ -75,9 +76,11 @@ bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
return &OtherRB == this;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
print(dbgs(), /* IsForDebug */ true, TRI);
}
+#endif
void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
const TargetRegisterInfo *TRI) const {
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index da5ab0b9fb7b6..b2df2f1596769 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -63,13 +63,6 @@ RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
#endif // NDEBUG
}
-RegisterBankInfo::~RegisterBankInfo() {
- for (auto It : MapOfPartialMappings)
- delete It.second;
- for (auto It : MapOfValueMappings)
- delete It.second;
-}
-
bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
#ifndef NDEBUG
for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
@@ -133,15 +126,26 @@ const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
return &RC;
}
+/// Check whether or not \p MI should be treated like a copy
+/// for the mappings.
+/// Copy like instruction are special for mapping because
+/// they don't have actual register constraints. Moreover,
+/// they sometimes have register classes assigned and we can
+/// just use that instead of failing to provide a generic mapping.
+static bool isCopyLike(const MachineInstr &MI) {
+ return MI.isCopy() || MI.isPHI() ||
+ MI.getOpcode() == TargetOpcode::REG_SEQUENCE;
+}
+
RegisterBankInfo::InstructionMapping
RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// For copies we want to walk over the operands and try to find one
// that has a register bank since the instruction itself will not get
// us any constraint.
- bool isCopyLike = MI.isCopy() || MI.isPHI();
+ bool IsCopyLike = isCopyLike(MI);
// For copy like instruction, only the mapping of the definition
// is important. The rest is not constrained.
- unsigned NumOperandsForMapping = isCopyLike ? 1 : MI.getNumOperands();
+ unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands();
RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1,
/*OperandsMapping*/ nullptr,
@@ -175,7 +179,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// For copy-like instruction, we want to reuse the register bank
// that is already set on Reg, if any, since those instructions do
// not have any constraints.
- const RegisterBank *CurRegBank = isCopyLike ? AltRegBank : nullptr;
+ const RegisterBank *CurRegBank = IsCopyLike ? AltRegBank : nullptr;
if (!CurRegBank) {
// If this is a target specific instruction, we can deduce
// the register bank from the encoding constraints.
@@ -184,7 +188,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// All our attempts failed, give up.
CompleteMapping = false;
- if (!isCopyLike)
+ if (!IsCopyLike)
// MI does not carry enough information to guess the mapping.
return InstructionMapping();
continue;
@@ -192,7 +196,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
}
const ValueMapping *ValMapping =
&getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank);
- if (isCopyLike) {
+ if (IsCopyLike) {
OperandsMapping[0] = ValMapping;
CompleteMapping = true;
break;
@@ -200,7 +204,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
OperandsMapping[OpIdx] = ValMapping;
}
- if (isCopyLike && !CompleteMapping)
+ if (IsCopyLike && !CompleteMapping)
// No way to deduce the type from what we have.
return InstructionMapping();
@@ -234,8 +238,8 @@ RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length,
++NumPartialMappingsCreated;
- const PartialMapping *&PartMapping = MapOfPartialMappings[Hash];
- PartMapping = new PartialMapping{StartIdx, Length, RegBank};
+ auto &PartMapping = MapOfPartialMappings[Hash];
+ PartMapping = llvm::make_unique<PartialMapping>(StartIdx, Length, RegBank);
return *PartMapping;
}
@@ -268,8 +272,8 @@ RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown,
++NumValueMappingsCreated;
- const ValueMapping *&ValMapping = MapOfValueMappings[Hash];
- ValMapping = new ValueMapping{BreakDown, NumBreakDowns};
+ auto &ValMapping = MapOfValueMappings[Hash];
+ ValMapping = llvm::make_unique<ValueMapping>(BreakDown, NumBreakDowns);
return *ValMapping;
}
@@ -282,9 +286,9 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
// The addresses of the value mapping are unique.
// Therefore, we can use them directly to hash the operand mapping.
hash_code Hash = hash_combine_range(Begin, End);
- const auto &It = MapOfOperandsMappings.find(Hash);
- if (It != MapOfOperandsMappings.end())
- return It->second;
+ auto &Res = MapOfOperandsMappings[Hash];
+ if (Res)
+ return Res.get();
++NumOperandsMappingsCreated;
@@ -293,8 +297,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
// mapping, because we use the pointer of the ValueMapping
// to hash and we expect them to uniquely identify an instance
// of value mapping.
- ValueMapping *&Res = MapOfOperandsMappings[Hash];
- Res = new ValueMapping[std::distance(Begin, End)];
+ Res = llvm::make_unique<ValueMapping[]>(std::distance(Begin, End));
unsigned Idx = 0;
for (Iterator It = Begin; It != End; ++It, ++Idx) {
const ValueMapping *ValMap = *It;
@@ -302,7 +305,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
continue;
Res[Idx] = *ValMap;
}
- return Res;
+ return Res.get();
}
const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
@@ -349,6 +352,7 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
MachineInstr &MI = OpdMapper.getMI();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
DEBUG(dbgs() << "Applying default-like mapping\n");
for (unsigned OpIdx = 0,
EndIdx = OpdMapper.getInstrMapping().getNumOperands();
@@ -359,6 +363,13 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
DEBUG(dbgs() << " is not a register, nothing to be done\n");
continue;
}
+ if (!MO.getReg()) {
+ DEBUG(dbgs() << " is %%noreg, nothing to be done\n");
+ continue;
+ }
+ assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns !=
+ 0 &&
+ "Invalid mapping");
assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
1 &&
"This mapping is too complex for this function");
@@ -368,9 +379,25 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
- DEBUG(dbgs() << " changed, replace " << MO.getReg());
- MO.setReg(*NewRegs.begin());
- DEBUG(dbgs() << " with " << MO.getReg());
+ unsigned OrigReg = MO.getReg();
+ unsigned NewReg = *NewRegs.begin();
+ DEBUG(dbgs() << " changed, replace " << PrintReg(OrigReg, nullptr));
+ MO.setReg(NewReg);
+ DEBUG(dbgs() << " with " << PrintReg(NewReg, nullptr));
+
+ // The OperandsMapper creates plain scalar, we may have to fix that.
+ // Check if the types match and if not, fix that.
+ LLT OrigTy = MRI.getType(OrigReg);
+ LLT NewTy = MRI.getType(NewReg);
+ if (OrigTy != NewTy) {
+ assert(OrigTy.getSizeInBits() == NewTy.getSizeInBits() &&
+ "Types with difference size cannot be handled by the default "
+ "mapping");
+ DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
+ << OrigTy);
+ MRI.setType(NewReg, OrigTy);
+ }
+ DEBUG(dbgs() << '\n');
}
}
@@ -400,10 +427,12 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
//------------------------------------------------------------------------------
// Helper classes implementation.
//------------------------------------------------------------------------------
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
bool RegisterBankInfo::PartialMapping::verify() const {
assert(RegBank && "Register bank not set");
@@ -451,10 +480,12 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
return true;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
OS << "#BreakDown: " << NumBreakDowns << " ";
@@ -472,8 +503,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
// Check that all the register operands are properly mapped.
// Check the constructor invariant.
// For PHI, we only care about mapping the definition.
- assert(NumOperands ==
- ((MI.isCopy() || MI.isPHI()) ? 1 : MI.getNumOperands()) &&
+ assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) &&
"NumOperands must match, see constructor");
assert(MI.getParent() && MI.getParent()->getParent() &&
"MI must be connected to a MachineFunction");
@@ -503,10 +533,12 @@ bool RegisterBankInfo::InstructionMapping::verify(
return true;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const {
OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: ";
@@ -576,6 +608,11 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
for (unsigned &NewVReg : NewVRegsForOpIdx) {
assert(PartMap != ValMapping.end() && "Out-of-bound access");
assert(NewVReg == 0 && "Register has already been created");
+ // The new registers are always bound to scalar with the right size.
+ // The actual type has to be set when the target does the mapping
+ // of the instruction.
+ // The rationale is that this generic code cannot guess how the
+ // target plans to split the input type.
NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length));
MRI.setRegBank(NewVReg, *PartMap->RegBank);
++PartMap;
@@ -619,10 +656,12 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
return Res;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const {
print(dbgs(), true);
dbgs() << '\n';
}
+#endif
void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
bool ForDebug) const {
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index e50091833c26d..606a59680a3d4 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -11,10 +11,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -43,3 +46,50 @@ unsigned llvm::constrainOperandRegClass(
return Reg;
}
+
+bool llvm::isTriviallyDead(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ // If we can move an instruction, we can remove it. Otherwise, it has
+ // a side-effect of some sort.
+ bool SawStore = false;
+ if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore))
+ return false;
+
+ // Instructions without side-effects are dead iff they only define dead vregs.
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI.use_nodbg_empty(Reg))
+ return false;
+ }
+ return true;
+}
+
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (TPC.isGlobalISelAbortEnabled())
+ report_fatal_error(R.getMsg());
+ else
+ MORE.emit(R);
+}
+
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ const char *PassName, StringRef Msg,
+ const MachineInstr &MI) {
+ MachineOptimizationRemarkMissed R(PassName, "GISelFailure: ",
+ MI.getDebugLoc(), MI.getParent());
+ R << Msg << ": " << ore::MNV("Inst", MI);
+ reportGISelFailure(MF, TPC, MORE, R);
+}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index b9f3d86eabd84..37fe41582333d 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -588,19 +588,6 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
return TExit && TExit == FalseBBI.BB;
}
-/// Shrink the provided inclusive range by one instruction.
-/// If the range was one instruction (\p It == \p Begin), It is not modified,
-/// but \p Empty is set to true.
-static inline void shrinkInclusiveRange(
- MachineBasicBlock::iterator &Begin,
- MachineBasicBlock::iterator &It,
- bool &Empty) {
- if (It == Begin)
- Empty = true;
- else
- It--;
-}
-
/// Count duplicated instructions and move the iterators to show where they
/// are.
/// @param TIB True Iterator Begin
@@ -633,10 +620,8 @@ bool IfConverter::CountDuplicatedInstructions(
while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
TIB = skipDebugInstructionsForward(TIB, TIE);
- if(TIB == TIE)
- break;
FIB = skipDebugInstructionsForward(FIB, FIE);
- if(FIB == FIE)
+ if (TIB == TIE || FIB == FIE)
break;
if (!TIB->isIdenticalTo(*FIB))
break;
@@ -656,58 +641,42 @@ bool IfConverter::CountDuplicatedInstructions(
if (TIB == TIE || FIB == FIE)
return true;
// Now, in preparation for counting duplicate instructions at the ends of the
- // blocks, move the end iterators up past any branch instructions.
- --TIE;
- --FIE;
-
- // After this point TIB and TIE define an inclusive range, which means that
- // TIB == TIE is true when there is one more instruction to consider, not at
- // the end. Because we may not be able to go before TIB, we need a flag to
- // indicate a completely empty range.
- bool TEmpty = false, FEmpty = false;
-
- // Upon exit TIE and FIE will both point at the last non-shared instruction.
- // They need to be moved forward to point past the last non-shared
- // instruction if the range they delimit is non-empty.
- auto IncrementEndIteratorsOnExit = make_scope_exit([&]() {
- if (!TEmpty)
- ++TIE;
- if (!FEmpty)
- ++FIE;
- });
+ // blocks, switch to reverse_iterators. Note that getReverse() returns an
+ // iterator that points to the same instruction, unlike std::reverse_iterator.
+ // We have to do our own shifting so that we get the same range.
+ MachineBasicBlock::reverse_iterator RTIE = std::next(TIE.getReverse());
+ MachineBasicBlock::reverse_iterator RFIE = std::next(FIE.getReverse());
+ const MachineBasicBlock::reverse_iterator RTIB = std::next(TIB.getReverse());
+ const MachineBasicBlock::reverse_iterator RFIB = std::next(FIB.getReverse());
if (!TBB.succ_empty() || !FBB.succ_empty()) {
if (SkipUnconditionalBranches) {
- while (!TEmpty && TIE->isUnconditionalBranch())
- shrinkInclusiveRange(TIB, TIE, TEmpty);
- while (!FEmpty && FIE->isUnconditionalBranch())
- shrinkInclusiveRange(FIB, FIE, FEmpty);
+ while (RTIE != RTIB && RTIE->isUnconditionalBranch())
+ ++RTIE;
+ while (RFIE != RFIB && RFIE->isUnconditionalBranch())
+ ++RFIE;
}
}
- // If Dups1 includes all of a block, then don't count duplicate
- // instructions at the end of the blocks.
- if (TEmpty || FEmpty)
- return true;
-
// Count duplicate instructions at the ends of the blocks.
- while (!TEmpty && !FEmpty) {
+ while (RTIE != RTIB && RFIE != RFIB) {
// Skip dbg_value instructions. These do not count.
- TIE = skipDebugInstructionsBackward(TIE, TIB);
- FIE = skipDebugInstructionsBackward(FIE, FIB);
- TEmpty = TIE == TIB && TIE->isDebugValue();
- FEmpty = FIE == FIB && FIE->isDebugValue();
- if (TEmpty || FEmpty)
+ // Note that these are reverse iterators going forward.
+ RTIE = skipDebugInstructionsForward(RTIE, RTIB);
+ RFIE = skipDebugInstructionsForward(RFIE, RFIB);
+ if (RTIE == RTIB || RFIE == RFIB)
break;
- if (!TIE->isIdenticalTo(*FIE))
+ if (!RTIE->isIdenticalTo(*RFIE))
break;
// We have to verify that any branch instructions are the same, and then we
// don't count them toward the # of duplicate instructions.
- if (!TIE->isBranch())
+ if (!RTIE->isBranch())
++Dups2;
- shrinkInclusiveRange(TIB, TIE, TEmpty);
- shrinkInclusiveRange(FIB, FIE, FEmpty);
+ ++RTIE;
+ ++RFIE;
}
+ TIE = std::next(RTIE.getReverse());
+ FIE = std::next(RFIE.getReverse());
return true;
}
@@ -741,25 +710,21 @@ bool IfConverter::RescanInstructions(
static void verifySameBranchInstructions(
MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2) {
- MachineBasicBlock::iterator B1 = MBB1->begin();
- MachineBasicBlock::iterator B2 = MBB2->begin();
- MachineBasicBlock::iterator E1 = std::prev(MBB1->end());
- MachineBasicBlock::iterator E2 = std::prev(MBB2->end());
- bool Empty1 = false, Empty2 = false;
- while (!Empty1 && !Empty2) {
- E1 = skipDebugInstructionsBackward(E1, B1);
- E2 = skipDebugInstructionsBackward(E2, B2);
- Empty1 = E1 == B1 && E1->isDebugValue();
- Empty2 = E2 == B2 && E2->isDebugValue();
-
- if (Empty1 && Empty2)
+ const MachineBasicBlock::reverse_iterator B1 = MBB1->rend();
+ const MachineBasicBlock::reverse_iterator B2 = MBB2->rend();
+ MachineBasicBlock::reverse_iterator E1 = MBB1->rbegin();
+ MachineBasicBlock::reverse_iterator E2 = MBB2->rbegin();
+ while (E1 != B1 && E2 != B2) {
+ skipDebugInstructionsForward(E1, B1);
+ skipDebugInstructionsForward(E2, B2);
+ if (E1 == B1 && E2 == B2)
break;
- if (Empty1) {
+ if (E1 == B1) {
assert(!E2->isBranch() && "Branch mis-match, one block is empty.");
break;
}
- if (Empty2) {
+ if (E2 == B2) {
assert(!E1->isBranch() && "Branch mis-match, one block is empty.");
break;
}
@@ -769,8 +734,8 @@ static void verifySameBranchInstructions(
"Branch mis-match, branch instructions don't match.");
else
break;
- shrinkInclusiveRange(B1, E1, Empty1);
- shrinkInclusiveRange(B2, E2, Empty2);
+ ++E1;
+ ++E2;
}
}
#endif
@@ -2183,7 +2148,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// unknown probabilities into known ones.
// FIXME: This usage is too tricky and in the future we would like to
// eliminate all unknown probabilities in MBB.
- ToBBI.BB->normalizeSuccProbs();
+ if (ToBBI.IsBrAnalyzable)
+ ToBBI.BB->normalizeSuccProbs();
SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(),
FromMBB.succ_end());
@@ -2263,7 +2229,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// Normalize the probabilities of ToBBI.BB's successors with all adjustment
// we've done above.
- ToBBI.BB->normalizeSuccProbs();
+ if (ToBBI.IsBrAnalyzable && FromBBI.IsBrAnalyzable)
+ ToBBI.BB->normalizeSuccProbs();
ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
FromBBI.Predicate.clear();
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index 9588dfb720586..920c2a372a9b8 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -22,6 +22,7 @@
// With the help of a runtime that understands the .fault_maps section,
// faulting_load_op branches to throw_npe if executing movl (%r10), %esi incurs
// a page fault.
+// Store and LoadStore are also supported.
//
//===----------------------------------------------------------------------===//
@@ -29,6 +30,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -151,25 +153,44 @@ class ImplicitNullChecks : public MachineFunctionPass {
const TargetRegisterInfo *TRI = nullptr;
AliasAnalysis *AA = nullptr;
MachineModuleInfo *MMI = nullptr;
+ MachineFrameInfo *MFI = nullptr;
bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
SmallVectorImpl<NullCheck> &NullCheckList);
- MachineInstr *insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB,
- MachineBasicBlock *HandlerMBB);
+ MachineInstr *insertFaultingInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *HandlerMBB);
void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
- /// Is \p MI a memory operation that can be used to implicitly null check the
- /// value in \p PointerReg? \p PrevInsts is the set of instruction seen since
+ enum AliasResult {
+ AR_NoAlias,
+ AR_MayAlias,
+ AR_WillAliasEverything
+ };
+ /// Returns AR_NoAlias if \p MI memory operation does not alias with
+ /// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
+ /// they may alias and any further memory operation may alias with \p PrevMI.
+ AliasResult areMemoryOpsAliased(MachineInstr &MI, MachineInstr *PrevMI);
+
+ enum SuitabilityResult {
+ SR_Suitable,
+ SR_Unsuitable,
+ SR_Impossible
+ };
+ /// Return SR_Suitable if \p MI a memory operation that can be used to
+ /// implicitly null check the value in \p PointerReg, SR_Unsuitable if
+ /// \p MI cannot be used to null check and SR_Impossible if there is
+ /// no sense to continue lookup due to any other instruction will not be able
+ /// to be used. \p PrevInsts is the set of instruction seen since
/// the explicit null check on \p PointerReg.
- bool isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
- ArrayRef<MachineInstr *> PrevInsts);
+ SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> PrevInsts);
/// Return true if \p FaultingMI can be hoisted from after the the
/// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a
/// non-null value if we also need to (and legally can) hoist a depedency.
- bool canHoistLoadInst(MachineInstr *FaultingMI, unsigned PointerReg,
- ArrayRef<MachineInstr *> InstsSeenSoFar,
- MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
+ bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> InstsSeenSoFar,
+ MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
public:
static char ID;
@@ -193,7 +214,7 @@ public:
}
bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
- if (MI->isCall() || MI->mayStore() || MI->hasUnmodeledSideEffects())
+ if (MI->isCall() || MI->hasUnmodeledSideEffects())
return false;
auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
(void)IsRegMask;
@@ -248,7 +269,7 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A,
unsigned RegB = MOB.getReg();
- if (TRI->regsOverlap(RegA, RegB))
+ if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef()))
return false;
}
}
@@ -260,6 +281,7 @@ bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getRegInfo().getTargetRegisterInfo();
MMI = &MF.getMMI();
+ MFI = &MF.getFrameInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
SmallVector<NullCheck, 16> NullCheckList;
@@ -283,36 +305,91 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
return false;
}
-bool ImplicitNullChecks::isSuitableMemoryOp(
- MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) {
+ImplicitNullChecks::AliasResult
+ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
+ MachineInstr *PrevMI) {
+ // If it is not memory access, skip the check.
+ if (!(PrevMI->mayStore() || PrevMI->mayLoad()))
+ return AR_NoAlias;
+ // Load-Load may alias
+ if (!(MI.mayStore() || PrevMI->mayStore()))
+ return AR_NoAlias;
+ // We lost info, conservatively alias. If it was store then no sense to
+ // continue because we won't be able to check against it further.
+ if (MI.memoperands_empty())
+ return MI.mayStore() ? AR_WillAliasEverything : AR_MayAlias;
+ if (PrevMI->memoperands_empty())
+ return PrevMI->mayStore() ? AR_WillAliasEverything : AR_MayAlias;
+
+ for (MachineMemOperand *MMO1 : MI.memoperands()) {
+ // MMO1 should have a value due it comes from operation we'd like to use
+ // as implicit null check.
+ assert(MMO1->getValue() && "MMO1 should have a Value!");
+ for (MachineMemOperand *MMO2 : PrevMI->memoperands()) {
+ if (const PseudoSourceValue *PSV = MMO2->getPseudoValue()) {
+ if (PSV->mayAlias(MFI))
+ return AR_MayAlias;
+ continue;
+ }
+ llvm::AliasResult AAResult = AA->alias(
+ MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
+ MMO1->getAAInfo()),
+ MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
+ MMO2->getAAInfo()));
+ if (AAResult != NoAlias)
+ return AR_MayAlias;
+ }
+ }
+ return AR_NoAlias;
+}
+
+ImplicitNullChecks::SuitabilityResult
+ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> PrevInsts) {
int64_t Offset;
unsigned BaseReg;
if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) ||
BaseReg != PointerReg)
- return false;
-
- // We want the load to be issued at a sane offset from PointerReg, so that
- // if PointerReg is null then the load reliably page faults.
- if (!(MI.mayLoad() && !MI.isPredicable() && Offset < PageSize))
- return false;
-
- // Finally, we need to make sure that the load instruction actually is
- // loading from PointerReg, and there isn't some re-definition of PointerReg
- // between the compare and the load.
+ return SR_Unsuitable;
+
+ // We want the mem access to be issued at a sane offset from PointerReg,
+ // so that if PointerReg is null then the access reliably page faults.
+ if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() &&
+ Offset < PageSize))
+ return SR_Unsuitable;
+
+ // Finally, we need to make sure that the access instruction actually is
+ // accessing from PointerReg, and there isn't some re-definition of PointerReg
+ // between the compare and the memory access.
+ // If PointerReg has been redefined before then there is no sense to continue
+ // lookup due to this condition will fail for any further instruction.
+ SuitabilityResult Suitable = SR_Suitable;
for (auto *PrevMI : PrevInsts)
- for (auto &PrevMO : PrevMI->operands())
- if (PrevMO.isReg() && PrevMO.getReg() &&
+ for (auto &PrevMO : PrevMI->operands()) {
+ if (PrevMO.isReg() && PrevMO.getReg() && PrevMO.isDef() &&
TRI->regsOverlap(PrevMO.getReg(), PointerReg))
- return false;
-
- return true;
+ return SR_Impossible;
+
+ // Check whether the current memory access aliases with previous one.
+ // If we already found that it aliases then no need to continue.
+ // But we continue base pointer check as it can result in SR_Impossible.
+ if (Suitable == SR_Suitable) {
+ AliasResult AR = areMemoryOpsAliased(MI, PrevMI);
+ if (AR == AR_WillAliasEverything)
+ return SR_Impossible;
+ if (AR == AR_MayAlias)
+ Suitable = SR_Unsuitable;
+ }
+ }
+ return Suitable;
}
-bool ImplicitNullChecks::canHoistLoadInst(
- MachineInstr *FaultingMI, unsigned PointerReg,
- ArrayRef<MachineInstr *> InstsSeenSoFar, MachineBasicBlock *NullSucc,
- MachineInstr *&Dependence) {
+bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
+ unsigned PointerReg,
+ ArrayRef<MachineInstr *> InstsSeenSoFar,
+ MachineBasicBlock *NullSucc,
+ MachineInstr *&Dependence) {
auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar);
if (!DepResult.CanReorder)
return false;
@@ -359,7 +436,8 @@ bool ImplicitNullChecks::canHoistLoadInst(
// The Dependency can't be re-defining the base register -- then we won't
// get the memory operation on the address we want. This is already
// checked in \c IsSuitableMemoryOp.
- assert(!TRI->regsOverlap(DependenceMO.getReg(), PointerReg) &&
+ assert(!(DependenceMO.isDef() &&
+ TRI->regsOverlap(DependenceMO.getReg(), PointerReg)) &&
"Should have been checked before!");
}
@@ -481,9 +559,11 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
return false;
MachineInstr *Dependence;
- if (isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar) &&
- canHoistLoadInst(&MI, PointerReg, InstsSeenSoFar, NullSucc,
- Dependence)) {
+ SuitabilityResult SR = isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar);
+ if (SR == SR_Impossible)
+ return false;
+ if (SR == SR_Suitable &&
+ canHoistInst(&MI, PointerReg, InstsSeenSoFar, NullSucc, Dependence)) {
NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
NullSucc, Dependence);
return true;
@@ -495,36 +575,42 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
return false;
}
-/// Wrap a machine load instruction, LoadMI, into a FAULTING_LOAD_OP machine
-/// instruction. The FAULTING_LOAD_OP instruction does the same load as LoadMI
-/// (defining the same register), and branches to HandlerMBB if the load
-/// faults. The FAULTING_LOAD_OP instruction is inserted at the end of MBB.
-MachineInstr *
-ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
- MachineBasicBlock *MBB,
- MachineBasicBlock *HandlerMBB) {
+/// Wrap a machine instruction, MI, into a FAULTING machine instruction.
+/// The FAULTING instruction does the same load/store as MI
+/// (defining the same register), and branches to HandlerMBB if the mem access
+/// faults. The FAULTING instruction is inserted at the end of MBB.
+MachineInstr *ImplicitNullChecks::insertFaultingInstr(
+ MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *HandlerMBB) {
const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
// all targets.
DebugLoc DL;
- unsigned NumDefs = LoadMI->getDesc().getNumDefs();
+ unsigned NumDefs = MI->getDesc().getNumDefs();
assert(NumDefs <= 1 && "other cases unhandled!");
unsigned DefReg = NoRegister;
if (NumDefs != 0) {
- DefReg = LoadMI->defs().begin()->getReg();
- assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
+ DefReg = MI->defs().begin()->getReg();
+ assert(std::distance(MI->defs().begin(), MI->defs().end()) == 1 &&
"expected exactly one def!");
}
- auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
+ FaultMaps::FaultKind FK;
+ if (MI->mayLoad())
+ FK =
+ MI->mayStore() ? FaultMaps::FaultingLoadStore : FaultMaps::FaultingLoad;
+ else
+ FK = FaultMaps::FaultingStore;
+
+ auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_OP), DefReg)
+ .addImm(FK)
.addMBB(HandlerMBB)
- .addImm(LoadMI->getOpcode());
+ .addImm(MI->getOpcode());
- for (auto &MO : LoadMI->uses())
- MIB.addOperand(MO);
+ for (auto &MO : MI->uses())
+ MIB.add(MO);
- MIB.setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
return MIB;
}
@@ -545,18 +631,18 @@ void ImplicitNullChecks::rewriteNullChecks(
NC.getCheckBlock()->insert(NC.getCheckBlock()->end(), DepMI);
}
- // Insert a faulting load where the conditional branch was originally. We
- // check earlier ensures that this bit of code motion is legal. We do not
- // touch the successors list for any basic block since we haven't changed
- // control flow, we've just made it implicit.
- MachineInstr *FaultingLoad = insertFaultingLoad(
+ // Insert a faulting instruction where the conditional branch was
+ // originally. We check earlier ensures that this bit of code motion
+ // is legal. We do not touch the successors list for any basic block
+ // since we haven't changed control flow, we've just made it implicit.
+ MachineInstr *FaultingInstr = insertFaultingInstr(
NC.getMemOperation(), NC.getCheckBlock(), NC.getNullSucc());
// Now the values defined by MemOperation, if any, are live-in of
// the block of MemOperation.
- // The original load operation may define implicit-defs alongside
- // the loaded value.
+ // The original operation may define implicit-defs alongside
+ // the value.
MachineBasicBlock *MBB = NC.getMemOperation()->getParent();
- for (const MachineOperand &MO : FaultingLoad->operands()) {
+ for (const MachineOperand &MO : FaultingInstr->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 3d81184f774a3..a1cb0a0695bfa 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -558,7 +558,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
// We take the DebugLoc from MI, since OrigMI may be attributed to a
- // different source location.
+ // different source location.
auto *NewMI = LIS.getInstructionFromIndex(DefIdx);
NewMI->setDebugLoc(MI.getDebugLoc());
@@ -686,7 +686,8 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
return true;
}
-#if !defined(NDEBUG)
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
// Dump the range of instructions from B to E with their slot indexes.
static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
MachineBasicBlock::iterator E,
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index afd24067ace76..c6cc909e25d38 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -115,21 +115,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
break;
case Intrinsic::memmove:
M.getOrInsertFunction("memmove",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
break;
case Intrinsic::memset:
M.getOrInsertFunction("memset",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt32Ty(M.getContext()),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
break;
case Intrinsic::sqrt:
EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
index 86d3624a9d6e0..07ea9dcaea7ae 100644
--- a/lib/CodeGen/LLVMBuild.txt
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG MIRParser GlobalISel
type = Library
name = CodeGen
parent = Libraries
-required_libraries = Analysis BitReader BitWriter Core MC Scalar Support Target TransformUtils
+required_libraries = Analysis BitReader BitWriter Core MC ProfileData Scalar Support Target TransformUtils
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 26794e28020eb..7b1706f0f4ba9 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -42,8 +42,8 @@ static cl::opt<cl::boolOrDefault>
EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
-static cl::opt<bool>
- EnableGlobalISel("global-isel", cl::Hidden, cl::init(false),
+static cl::opt<cl::boolOrDefault>
+ EnableGlobalISel("global-isel", cl::Hidden,
cl::desc("Enable the \"global\" instruction selector"));
void LLVMTargetMachine::initAsmInfo() {
@@ -85,7 +85,7 @@ void LLVMTargetMachine::initAsmInfo() {
LLVMTargetMachine::LLVMTargetMachine(const Target &T,
StringRef DataLayoutString,
const Triple &TT, StringRef CPU,
- StringRef FS, TargetOptions Options,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
@@ -149,7 +149,9 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
TM->setFastISel(true);
// Ask the target for an isel.
- if (LLVM_UNLIKELY(EnableGlobalISel)) {
+ // Enable GlobalISel if the target wants to, but allow that to be overriden.
+ if (EnableGlobalISel == cl::BOU_TRUE || (EnableGlobalISel == cl::BOU_UNSET &&
+ PassConfig->isGlobalISelEnabled())) {
if (PassConfig->addIRTranslator())
return nullptr;
@@ -172,11 +174,12 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
// Pass to reset the MachineFunction if the ISel failed.
PM.add(createResetMachineFunctionPass(
- PassConfig->reportDiagnosticWhenGlobalISelFallback()));
+ PassConfig->reportDiagnosticWhenGlobalISelFallback(),
+ PassConfig->isGlobalISelAbortEnabled()));
// Provide a fallback path when we do not want to abort on
// not-yet-supported input.
- if (LLVM_UNLIKELY(!PassConfig->isGlobalISelAbortEnabled()) &&
+ if (!PassConfig->isGlobalISelAbortEnabled() &&
PassConfig->addInstSelector())
return nullptr;
diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
new file mode 100644
index 0000000000000..996d40ca6e1ee
--- /dev/null
+++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -0,0 +1,97 @@
+///===- LazyMachineBlockFrequencyInfo.cpp - Lazy Machine Block Frequency --===//
+///
+/// The LLVM Compiler Infrastructure
+///
+/// This file is distributed under the University of Illinois Open Source
+/// License. See LICENSE.TXT for details.
+///
+///===---------------------------------------------------------------------===//
+/// \file
+/// This is an alternative analysis pass to MachineBlockFrequencyInfo. The
+/// difference is that with this pass the block frequencies are not computed
+/// when the analysis pass is executed but rather when the BFI result is
+/// explicitly requested by the analysis client.
+///
+///===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lazy-machine-block-freq"
+
+INITIALIZE_PASS_BEGIN(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
+ "Lazy Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
+ "Lazy Machine Block Frequency Analysis", true, true)
+
+char LazyMachineBlockFrequencyInfoPass::ID = 0;
+
+LazyMachineBlockFrequencyInfoPass::LazyMachineBlockFrequencyInfoPass()
+ : MachineFunctionPass(ID) {
+ initializeLazyMachineBlockFrequencyInfoPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void LazyMachineBlockFrequencyInfoPass::print(raw_ostream &OS,
+ const Module *M) const {
+ getBFI().print(OS, M);
+}
+
+void LazyMachineBlockFrequencyInfoPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LazyMachineBlockFrequencyInfoPass::releaseMemory() {
+ OwnedMBFI.reset();
+ OwnedMLI.reset();
+ OwnedMDT.reset();
+}
+
+MachineBlockFrequencyInfo &
+LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
+ auto *MBFI = getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (MBFI) {
+ DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n");
+ return *MBFI;
+ }
+
+ auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+ auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n");
+ DEBUG(if (MLI) dbgs() << "LoopInfo is available\n");
+
+ if (!MLI) {
+ DEBUG(dbgs() << "Building LoopInfo on the fly\n");
+ // First create a dominator tree.
+ DEBUG(if (MDT) dbgs() << "DominatorTree is available\n");
+
+ if (!MDT) {
+ DEBUG(dbgs() << "Building DominatorTree on the fly\n");
+ OwnedMDT = make_unique<MachineDominatorTree>();
+ OwnedMDT->getBase().recalculate(*MF);
+ MDT = OwnedMDT.get();
+ }
+
+ // Generate LoopInfo from it.
+ OwnedMLI = make_unique<MachineLoopInfo>();
+ OwnedMLI->getBase().analyze(MDT->getBase());
+ MLI = OwnedMLI.get();
+ }
+
+ OwnedMBFI = make_unique<MachineBlockFrequencyInfo>();
+ OwnedMBFI->calculate(*MF, MBPI, *MLI);
+ return *OwnedMBFI.get();
+}
+
+bool LazyMachineBlockFrequencyInfoPass::runOnMachineFunction(
+ MachineFunction &F) {
+ MF = &F;
+ return false;
+}
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 834ed5f06c945..275d84e2c185f 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -14,14 +14,23 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <string>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "lexicalscopes"
@@ -38,6 +47,10 @@ void LexicalScopes::reset() {
/// initialize - Scan machine function and constuct lexical scope nest.
void LexicalScopes::initialize(const MachineFunction &Fn) {
+ // Don't attempt any lexical scope creation for a NoDebug compile unit.
+ if (Fn.getFunction()->getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return;
reset();
MF = &Fn;
SmallVector<InsnRange, 4> MIRanges;
@@ -54,7 +67,6 @@ void LexicalScopes::initialize(const MachineFunction &Fn) {
void LexicalScopes::extractLexicalScopes(
SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
-
// Scan each instruction and create scopes. First build working set of scopes.
for (const auto &MBB : *MF) {
const MachineInstr *RangeBeginMI = nullptr;
@@ -127,6 +139,10 @@ LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) {
LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope,
const DILocation *IA) {
if (IA) {
+ // Skip scopes inlined from a NoDebug compile unit.
+ if (Scope->getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return getOrCreateLexicalScope(IA);
// Create an abstract scope for inlined function.
getOrCreateAbstractScope(Scope);
// Create an inlined scope for inlined function.
@@ -181,10 +197,9 @@ LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope,
else
Parent = getOrCreateLexicalScope(InlinedAt);
- I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
- std::forward_as_tuple(P),
- std::forward_as_tuple(Parent, Scope,
- InlinedAt, false))
+ I = InlinedLexicalScopeMap
+ .emplace(std::piecewise_construct, std::forward_as_tuple(P),
+ std::forward_as_tuple(Parent, Scope, InlinedAt, false))
.first;
return &I->second;
}
@@ -241,7 +256,6 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
void LexicalScopes::assignInstructionRanges(
SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
-
LexicalScope *PrevLexicalScope = nullptr;
for (const auto &R : MIRanges) {
LexicalScope *S = MI2ScopeMap.lookup(R.first);
@@ -299,9 +313,8 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
return Result;
}
-/// dump - Print data structures.
-void LexicalScope::dump(unsigned Indent) const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LexicalScope::dump(unsigned Indent) const {
raw_ostream &err = dbgs();
err.indent(Indent);
err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
@@ -316,5 +329,5 @@ void LexicalScope::dump(unsigned Indent) const {
for (unsigned i = 0, e = Children.size(); i != e; ++i)
if (Children[i] != this)
Children[i]->dump(Indent + 2);
-#endif
}
+#endif
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index c945376560f72..f956974b1aafe 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -24,13 +24,16 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -61,6 +64,7 @@ class LiveDebugValues : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
LexicalScopes LS;
/// Keeps track of lexical scopes associated with a user value's source
@@ -127,11 +131,13 @@ private:
if (int RegNo = isDbgValueDescribedByReg(MI)) {
Kind = RegisterKind;
Loc.RegisterLoc.RegNo = RegNo;
- uint64_t Offset =
+ int64_t Offset =
MI.isIndirectDebugValue() ? MI.getOperand(1).getImm() : 0;
// We don't support offsets larger than 4GiB here. They are
// slated to be replaced with DIExpressions anyway.
- if (Offset >= (1ULL << 32))
+ // With indirect debug values used for spill locations, Offset
+ // can be negative.
+ if (Offset == INT64_MIN || std::abs(Offset) >= (1LL << 32))
Kind = InvalidKind;
else
Loc.RegisterLoc.Offset = Offset;
@@ -150,7 +156,9 @@ private:
/// dominates MBB.
bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); }
- void dump() const { MI.dump(); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { MI.dump(); }
+#endif
bool operator==(const VarLoc &Other) const {
return Var == Other.Var && Loc.Hash == Other.Loc.Hash;
@@ -167,6 +175,11 @@ private:
typedef UniqueVector<VarLoc> VarLocMap;
typedef SparseBitVector<> VarLocSet;
typedef SmallDenseMap<const MachineBasicBlock *, VarLocSet> VarLocInMBB;
+ struct SpillDebugPair {
+ MachineInstr *SpillInst;
+ MachineInstr *DebugInst;
+ };
+ typedef SmallVector<SpillDebugPair, 4> SpillMap;
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
@@ -216,14 +229,21 @@ private:
}
};
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+ int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg);
+
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs);
+ void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, SpillMap &Spills);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
const VarLocMap &VarLocIDs);
bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
bool transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs);
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, SpillMap &Spills,
+ bool transferSpills);
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
@@ -282,6 +302,7 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
+#ifndef NDEBUG
void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
const VarLocInMBB &V,
const VarLocMap &VarLocIDs,
@@ -300,6 +321,22 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
}
Out << "\n";
}
+#endif
+
+/// Given a spill instruction, extract the register and offset used to
+/// address the spill location in a target independent way.
+int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
+ unsigned &Reg) {
+ assert(MI.hasOneMemOperand() &&
+ "Spill instruction does not have exactly one memory operand?");
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ assert(PVal->kind() == PseudoSourceValue::FixedStack &&
+ "Inconsistent memory operand in spill instruction");
+ int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
+ const MachineBasicBlock *MBB = MI.getParent();
+ return TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+}
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
@@ -336,8 +373,12 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
SparseBitVector<> KillSet;
for (const MachineOperand &MO : MI.operands()) {
+ // Determine whether the operand is a register def. Assume that call
+ // instructions never clobber SP, because some backends (e.g., AArch64)
+ // never list SP in the regmask.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
- TRI->isPhysicalRegister(MO.getReg())) {
+ TRI->isPhysicalRegister(MO.getReg()) &&
+ !(MI.isCall() && MO.getReg() == SP)) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
for (unsigned ID : OpenRanges.getVarLocs())
@@ -358,6 +399,91 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
OpenRanges.erase(KillSet, VarLocIDs);
}
+/// Decide if @MI is a spill instruction and return true if it is. We use 2
+/// criteria to make this decision:
+/// - Is this instruction a store to a spill slot?
+/// - Is there a register operand that is both used and killed?
+/// TODO: Store optimization can fold spills into other stores (including
+/// other spills). We do not handle this yet (more than one memory operand).
+bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
+ int FI;
+ const MachineMemOperand *MMO;
+
+ // TODO: Handle multiple stores folded into one.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ // To identify a spill instruction, use the same criteria as in AsmPrinter.
+ if (!((TII->isStoreToStackSlotPostFE(MI, FI) ||
+ TII->hasStoreToStackSlot(MI, MMO, FI)) &&
+ FrameInfo.isSpillSlotObjectIndex(FI)))
+ return false;
+
+ // In a spill instruction generated by the InlineSpiller the spilled register
+ // has its kill flag set. Return false if we don't find such a register.
+ Reg = 0;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isUse() && MO.isKill()) {
+ Reg = MO.getReg();
+ break;
+ }
+ }
+ return Reg != 0;
+}
+
+/// A spilled register may indicate that we have to end the current range of
+/// a variable and create a new one for the spill location.
+/// We don't want to insert any instructions in transfer(), so we just create
+/// the DBG_VALUE witout inserting it and keep track of it in @Spills.
+/// It will be inserted into the BB when we're done iterating over the
+/// instructions.
+void LiveDebugValues::transferSpillInst(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ SpillMap &Spills) {
+ unsigned Reg;
+ MachineFunction *MF = MI.getParent()->getParent();
+ if (!isSpillInstruction(MI, MF, Reg))
+ return;
+
+ // Check if the register is the location of a debug value.
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ if (VarLocIDs[ID].isDescribedByReg() == Reg) {
+ DEBUG(dbgs() << "Spilling Register " << PrintReg(Reg, TRI) << '('
+ << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+
+ // Create a DBG_VALUE instruction to describe the Var in its spilled
+ // location, but don't insert it yet to avoid invalidating the
+ // iterator in our caller.
+ unsigned SpillBase;
+ int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
+ const MachineInstr *DMI = &VarLocIDs[ID].MI;
+ MachineInstr *SpDMI =
+ BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, 0,
+ DMI->getDebugVariable(), DMI->getDebugExpression());
+ SpDMI->getOperand(1).setImm(SpillOffset);
+ DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
+ SpDMI->print(dbgs(), false, TII));
+
+ // The newly created DBG_VALUE instruction SpDMI must be inserted after
+ // MI. Keep track of the pairing.
+ SpillDebugPair MIP = {&MI, SpDMI};
+ Spills.push_back(MIP);
+
+ // End all previous ranges of Var.
+ OpenRanges.erase(VarLocIDs[ID].Var);
+
+ // Add the VarLoc to OpenRanges.
+ VarLoc VL(*SpDMI, LS);
+ unsigned SpillLocID = VarLocIDs.insert(VL);
+ OpenRanges.insert(SpillLocID, VL.Var);
+ return;
+ }
+ }
+}
+
/// Terminate all open ranges at the end of the current basic block.
bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
OpenRangesSet &OpenRanges,
@@ -383,10 +509,13 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
/// This routine creates OpenRanges and OutLocs.
bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs) {
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
+ SpillMap &Spills, bool transferSpills) {
bool Changed = false;
transferDebugValue(MI, OpenRanges, VarLocIDs);
transferRegisterDef(MI, OpenRanges, VarLocIDs);
+ if (transferSpills)
+ transferSpillInst(MI, OpenRanges, VarLocIDs, Spills);
Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
return Changed;
}
@@ -475,10 +604,11 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
bool OLChanged = false;
bool MBBJoined = false;
- VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
+ VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
OpenRangesSet OpenRanges; // Ranges that are open until end of bb.
- VarLocInMBB OutLocs; // Ranges that exist beyond bb.
- VarLocInMBB InLocs; // Ranges that are incoming after joining.
+ VarLocInMBB OutLocs; // Ranges that exist beyond bb.
+ VarLocInMBB InLocs; // Ranges that are incoming after joining.
+ SpillMap Spills; // DBG_VALUEs associated with spills.
DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
@@ -490,9 +620,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
Pending;
// Initialize every mbb with OutLocs.
+ // We are not looking at any spill instructions during the initial pass
+ // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE
+ // instructions for spills of registers that are known to be user variables
+ // within the BB in which the spill occurs.
for (auto &MBB : MF)
for (auto &MI : MBB)
- transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+ transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills,
+ /*transferSpills=*/false);
DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization",
dbgs()));
@@ -524,8 +659,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
if (MBBJoined) {
MBBJoined = false;
Changed = true;
+ // Now that we have started to extend ranges across BBs we need to
+ // examine spill instructions to see whether they spill registers that
+ // correspond to user variables.
for (auto &MI : *MBB)
- OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+ OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills,
+ /*transferSpills=*/true);
+
+ // Add any DBG_VALUE instructions necessitated by spills.
+ for (auto &SP : Spills)
+ MBB->insertAfter(MachineBasicBlock::iterator(*SP.SpillInst),
+ SP.DebugInst);
+ Spills.clear();
DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
"OutLocs after propagating", dbgs()));
@@ -559,6 +704,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
+ TFI = MF.getSubtarget().getFrameLowering();
LS.initialize(MF);
bool Changed = ExtendRanges(MF);
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 0934d8cfeaa1f..bcf7c8e99c7ff 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -944,7 +944,7 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
IsIndirect, Loc.getReg(), offset, Variable, Expression);
else
BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
- .addOperand(Loc)
+ .add(Loc)
.addImm(offset)
.addMetadata(Variable)
.addMetadata(Expression);
@@ -1005,7 +1005,7 @@ bool LiveDebugVariables::doInitialization(Module &M) {
return Pass::doInitialization(M);
}
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LiveDebugVariables::dump() {
if (pImpl)
static_cast<LDVImpl*>(pImpl)->print(dbgs());
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 623af492fcd4f..9ef9f238fdcea 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -863,6 +863,37 @@ void LiveInterval::clearSubRanges() {
SubRanges = nullptr;
}
+void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
+ LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) {
+
+ LaneBitmask ToApply = LaneMask;
+ for (SubRange &SR : subranges()) {
+ LaneBitmask SRMask = SR.LaneMask;
+ LaneBitmask Matching = SRMask & LaneMask;
+ if (Matching.none())
+ continue;
+
+ SubRange *MatchingRange;
+ if (SRMask == Matching) {
+ // The subrange fits (it does not cover bits outside \p LaneMask).
+ MatchingRange = &SR;
+ } else {
+ // We have to split the subrange into a matching and non-matching part.
+ // Reduce lanemask of existing lane to non-matching part.
+ SR.LaneMask = SRMask & ~Matching;
+ // Create a new subrange for the matching part
+ MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
+ }
+ Apply(*MatchingRange);
+ ToApply &= ~Matching;
+ }
+ // Create a new subrange if there are uncovered bits left.
+ if (ToApply.any()) {
+ SubRange *NewRange = createSubRange(Allocator, ToApply);
+ Apply(*NewRange);
+ }
+}
+
unsigned LiveInterval::getSize() const {
unsigned Sum = 0;
for (const Segment &S : segments)
@@ -1032,6 +1063,7 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
// When they exist, Spills.back().start <= LastStart,
// and WriteI[-1].start <= LastStart.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LiveRangeUpdater::print(raw_ostream &OS) const {
if (!isDirty()) {
if (LR)
@@ -1058,6 +1090,7 @@ void LiveRangeUpdater::print(raw_ostream &OS) const {
LLVM_DUMP_METHOD void LiveRangeUpdater::dump() const {
print(errs());
}
+#endif
// Determine if A and B should be coalesced.
static inline bool coalescable(const LiveRange::Segment &A,
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 70d34838b2374..3f5b8e19d1f0c 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the LiveInterval analysis pass which is used
-// by the Linear Scan Register allocator. This pass linearizes the
-// basic blocks of the function in DFS order and computes live intervals for
-// each virtual and physical register.
+/// \file This file implements the LiveInterval analysis pass which is used
+/// by the Linear Scan Register allocator. This pass linearizes the
+/// basic blocks of the function in DFS order and computes live intervals for
+/// each virtual and physical register.
//
//===----------------------------------------------------------------------===//
@@ -96,16 +96,14 @@ void LiveIntervals::releaseMemory() {
RegMaskBits.clear();
RegMaskBlocks.clear();
- for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
- delete RegUnitRanges[i];
+ for (LiveRange *LR : RegUnitRanges)
+ delete LR;
RegUnitRanges.clear();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
VNInfoAllocator.Reset();
}
-/// runOnMachineFunction - calculates LiveIntervals
-///
bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
MRI = &MF->getRegInfo();
@@ -135,14 +133,13 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
return true;
}
-/// print - Implement the dump method.
void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << "********** INTERVALS **********\n";
// Dump the regunits.
- for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
- if (LiveRange *LR = RegUnitRanges[i])
- OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n';
+ for (unsigned Unit = 0, UnitE = RegUnitRanges.size(); Unit != UnitE; ++Unit)
+ if (LiveRange *LR = RegUnitRanges[Unit])
+ OS << PrintRegUnit(Unit, TRI) << ' ' << *LR << '\n';
// Dump the virtregs.
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
@@ -152,8 +149,8 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
}
OS << "RegMasks:";
- for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
- OS << ' ' << RegMaskSlots[i];
+ for (SlotIndex Idx : RegMaskSlots)
+ OS << ' ' << Idx;
OS << '\n';
printInstrs(OS);
@@ -165,7 +162,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void LiveIntervals::dumpInstrs() const {
+LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
printInstrs(dbgs());
}
#endif
@@ -177,8 +174,7 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
}
-/// computeVirtRegInterval - Compute the live interval of a virtual register,
-/// based on defs and uses.
+/// Compute the live interval of a virtual register, based on defs and uses.
void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
@@ -200,7 +196,7 @@ void LiveIntervals::computeRegMasks() {
RegMaskBlocks.resize(MF->getNumBlockIDs());
// Find all instructions with regmask operands.
- for (MachineBasicBlock &MBB : *MF) {
+ for (const MachineBasicBlock &MBB : *MF) {
std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()];
RMB.first = RegMaskSlots.size();
@@ -210,7 +206,7 @@ void LiveIntervals::computeRegMasks() {
RegMaskBits.push_back(Mask);
}
- for (MachineInstr &MI : MBB) {
+ for (const MachineInstr &MI : MBB) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
continue;
@@ -245,9 +241,9 @@ void LiveIntervals::computeRegMasks() {
// interference.
//
-/// computeRegUnitInterval - Compute the live range of a register unit, based
-/// on the uses and defs of aliasing registers. The range should be empty,
-/// or contain only dead phi-defs from ABI blocks.
+/// Compute the live range of a register unit, based on the uses and defs of
+/// aliasing registers. The range should be empty, or contain only dead
+/// phi-defs from ABI blocks.
void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
assert(LRCalc && "LRCalc not initialized.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
@@ -257,22 +253,30 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
// may share super-registers. That's OK because createDeadDefs() is
// idempotent. It is very rare for a register unit to have multiple roots, so
// uniquing super-registers is probably not worthwhile.
- for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
- for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
- Supers.isValid(); ++Supers) {
- if (!MRI->reg_empty(*Supers))
- LRCalc->createDeadDefs(LR, *Supers);
+ bool IsReserved = true;
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
+ Super.isValid(); ++Super) {
+ unsigned Reg = *Super;
+ if (!MRI->reg_empty(Reg))
+ LRCalc->createDeadDefs(LR, Reg);
+ // A register unit is considered reserved if all its roots and all their
+ // super registers are reserved.
+ if (!MRI->isReserved(Reg))
+ IsReserved = false;
}
}
// Now extend LR to reach all uses.
// Ignore uses of reserved registers. We only track defs of those.
- for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
- for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
- Supers.isValid(); ++Supers) {
- unsigned Reg = *Supers;
- if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
- LRCalc->extendToUses(LR, Reg);
+ if (!IsReserved) {
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
+ Super.isValid(); ++Super) {
+ unsigned Reg = *Super;
+ if (!MRI->reg_empty(Reg))
+ LRCalc->extendToUses(LR, Reg);
+ }
}
}
@@ -281,11 +285,9 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
LR.flushSegmentSet();
}
-
-/// computeLiveInRegUnits - Precompute the live ranges of any register units
-/// that are live-in to an ABI block somewhere. Register values can appear
-/// without a corresponding def when entering the entry block or a landing pad.
-///
+/// Precompute the live ranges of any register units that are live-in to an ABI
+/// block somewhere. Register values can appear without a corresponding def when
+/// entering the entry block or a landing pad.
void LiveIntervals::computeLiveInRegUnits() {
RegUnitRanges.resize(TRI->getNumRegUnits());
DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
@@ -294,18 +296,15 @@ void LiveIntervals::computeLiveInRegUnits() {
SmallVector<unsigned, 8> NewRanges;
// Check all basic blocks for live-ins.
- for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
- MFI != MFE; ++MFI) {
- const MachineBasicBlock *MBB = &*MFI;
-
+ for (const MachineBasicBlock &MBB : *MF) {
// We only care about ABI blocks: Entry + landing pads.
- if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty())
+ if ((&MBB != &MF->front() && !MBB.isEHPad()) || MBB.livein_empty())
continue;
// Create phi-defs at Begin for all live-in registers.
- SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
- DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
- for (const auto &LI : MBB->liveins()) {
+ SlotIndex Begin = Indexes->getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << Begin << "\tBB#" << MBB.getNumber());
+ for (const auto &LI : MBB.liveins()) {
for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = *Units;
LiveRange *LR = RegUnitRanges[Unit];
@@ -324,16 +323,13 @@ void LiveIntervals::computeLiveInRegUnits() {
DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n");
// Compute the 'normal' part of the ranges.
- for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) {
- unsigned Unit = NewRanges[i];
+ for (unsigned Unit : NewRanges)
computeRegUnitRange(*RegUnitRanges[Unit], Unit);
- }
}
-
static void createSegmentsForValues(LiveRange &LR,
- iterator_range<LiveInterval::vni_iterator> VNIs) {
- for (auto VNI : VNIs) {
+ iterator_range<LiveInterval::vni_iterator> VNIs) {
+ for (VNInfo *VNI : VNIs) {
if (VNI->isUnused())
continue;
SlotIndex Def = VNI->def;
@@ -349,7 +345,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
// Keep track of the PHIs that are in use.
SmallPtrSet<VNInfo*, 8> UsedPHIs;
// Blocks that have already been added to WorkList as live-out.
- SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+ SmallPtrSet<const MachineBasicBlock*, 16> LiveOut;
// Extend intervals to reach all uses in WorkList.
while (!WorkList.empty()) {
@@ -368,7 +364,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
!UsedPHIs.insert(VNI).second)
continue;
// The PHI is live, make sure the predecessors are live-out.
- for (auto &Pred : MBB->predecessors()) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
if (!LiveOut.insert(Pred).second)
continue;
SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
@@ -384,7 +380,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
// Make sure VNI is live-out from the predecessors.
- for (auto &Pred : MBB->predecessors()) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
if (!LiveOut.insert(Pred).second)
continue;
SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
@@ -415,22 +411,20 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
ShrinkToUsesWorkList WorkList;
// Visit all instructions reading li->reg.
- for (MachineRegisterInfo::reg_instr_iterator
- I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end();
- I != E; ) {
- MachineInstr *UseMI = &*(I++);
- if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ unsigned Reg = li->reg;
+ for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) {
+ if (UseMI.isDebugValue() || !UseMI.readsVirtualRegister(Reg))
continue;
- SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
+ SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
LiveQueryResult LRQ = li->Query(Idx);
VNInfo *VNI = LRQ.valueIn();
if (!VNI) {
// This shouldn't happen: readsVirtualRegister returns true, but there is
// no live value. It is likely caused by a target getting <undef> flags
// wrong.
- DEBUG(dbgs() << Idx << '\t' << *UseMI
+ DEBUG(dbgs() << Idx << '\t' << UseMI
<< "Warning: Instr claims to read non-existent value in "
- << *li << '\n');
+ << *li << '\n');
continue;
}
// Special case: An early-clobber tied operand reads and writes the
@@ -458,7 +452,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
bool LiveIntervals::computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead) {
bool MayHaveSplitComponents = false;
- for (auto VNI : LI.valnos) {
+ for (VNInfo *VNI : LI.valnos) {
if (VNI->isUnused())
continue;
SlotIndex Def = VNI->def;
@@ -548,7 +542,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
SR.segments.swap(NewLR.segments);
// Remove dead PHI value numbers
- for (auto VNI : SR.valnos) {
+ for (VNInfo *VNI : SR.valnos) {
if (VNI->isUnused())
continue;
const LiveRange::Segment *Segment = SR.getSegmentContaining(VNI->def);
@@ -571,8 +565,8 @@ void LiveIntervals::extendToIndices(LiveRange &LR,
ArrayRef<SlotIndex> Undefs) {
assert(LRCalc && "LRCalc not initialized.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- for (unsigned i = 0, e = Indices.size(); i != e; ++i)
- LRCalc->extend(LR, Indices[i], /*PhysReg=*/0, Undefs);
+ for (SlotIndex Idx : Indices)
+ LRCalc->extend(LR, Idx, /*PhysReg=*/0, Undefs);
}
void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
@@ -601,11 +595,9 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
// from each successor.
typedef df_iterator_default_set<MachineBasicBlock*,9> VisitedTy;
VisitedTy Visited;
- for (MachineBasicBlock::succ_iterator
- SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
- SuccI != SuccE; ++SuccI) {
+ for (MachineBasicBlock *Succ : KillMBB->successors()) {
for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
- I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+ I = df_ext_begin(Succ, Visited), E = df_ext_end(Succ, Visited);
I != E;) {
MachineBasicBlock *MBB = *I;
@@ -657,9 +649,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Find the regunit intervals for the assigned register. They may overlap
// the virtual register live range, cancelling any kills.
RU.clear();
- for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
- ++Units) {
- const LiveRange &RURange = getRegUnit(*Units);
+ for (MCRegUnitIterator Unit(VRM->getPhys(Reg), TRI); Unit.isValid();
+ ++Unit) {
+ const LiveRange &RURange = getRegUnit(*Unit);
if (RURange.empty())
continue;
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
@@ -802,9 +794,8 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
// Conservatively return true instead of scanning huge predecessor lists.
if (PHIMBB->pred_size() > 100)
return true;
- for (MachineBasicBlock::const_pred_iterator
- PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
- if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ for (const MachineBasicBlock *Pred : PHIMBB->predecessors())
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(Pred)))
return true;
}
return false;
@@ -895,7 +886,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
// IntervalUpdate class.
//===----------------------------------------------------------------------===//
-// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+/// Toolkit used by handleMove to trim or extend live intervals.
class LiveIntervals::HMEditor {
private:
LiveIntervals& LIS;
@@ -1241,10 +1232,12 @@ private:
LiveRange::iterator NewIdxIn = NewIdxOut;
assert(NewIdxIn == LR.find(NewIdx.getBaseIndex()));
const SlotIndex SplitPos = NewIdxDef;
+ OldIdxVNI = OldIdxIn->valno;
// Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
+ OldIdxOut->valno->def = OldIdxIn->start;
*OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
- OldIdxIn->valno);
+ OldIdxOut->valno);
// OldIdxIn and OldIdxVNI are now undef and can be overridden.
// We Slide [NewIdxIn, OldIdxIn) down one position.
// |- X0/NewIdxIn -| ... |- Xn-1 -||- Xn/OldIdxIn -||- OldIdxOut -|
@@ -1514,8 +1507,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
}
}
- for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) {
- unsigned Reg = OrigRegs[i];
+ for (unsigned Reg : OrigRegs) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
@@ -1524,16 +1516,16 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
if (!LI.hasAtLeastOneValue())
continue;
- for (LiveInterval::SubRange &S : LI.subranges()) {
+ for (LiveInterval::SubRange &S : LI.subranges())
repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask);
- }
+
repairOldRegInRange(Begin, End, endIdx, LI, Reg);
}
}
void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- if (LiveRange *LR = getCachedRegUnit(*Units))
+ for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
+ if (LiveRange *LR = getCachedRegUnit(*Unit))
if (VNInfo *VNI = LR->getVNInfoAt(Pos))
LR->removeValNo(VNI);
}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index fc2f233f6d687..b4aa0dc326a58 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -1,4 +1,4 @@
-//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//===- LiveIntervalUnion.cpp - Live interval union data structure ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,19 +13,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/LiveIntervalUnion.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include <algorithm>
+#include <cassert>
+#include <cstdlib>
using namespace llvm;
#define DEBUG_TYPE "regalloc"
-
// Merge a LiveInterval's segments. Guarantee no overlaps.
void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) {
if (Range.empty())
@@ -64,7 +64,7 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) {
LiveRange::const_iterator RegEnd = Range.end();
SegmentIter SegPos = Segments.find(RegPos->start);
- for (;;) {
+ while (true) {
assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
SegPos.erase();
if (!SegPos.valid())
@@ -126,25 +126,24 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
CheckedFirstInterference = true;
// Quickly skip interference check for empty sets.
- if (VirtReg->empty() || LiveUnion->empty()) {
+ if (LR->empty() || LiveUnion->empty()) {
SeenAllInterferences = true;
return 0;
}
- // In most cases, the union will start before VirtReg.
- VirtRegI = VirtReg->begin();
+ // In most cases, the union will start before LR.
+ LRI = LR->begin();
LiveUnionI.setMap(LiveUnion->getMap());
- LiveUnionI.find(VirtRegI->start);
+ LiveUnionI.find(LRI->start);
}
- LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveRange::const_iterator LREnd = LR->end();
LiveInterval *RecentReg = nullptr;
while (LiveUnionI.valid()) {
- assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg");
+ assert(LRI != LREnd && "Reached end of LR");
// Check for overlapping interference.
- while (VirtRegI->start < LiveUnionI.stop() &&
- VirtRegI->end > LiveUnionI.start()) {
+ while (LRI->start < LiveUnionI.stop() && LRI->end > LiveUnionI.start()) {
// This is an overlap, record the interfering register.
LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
@@ -161,20 +160,20 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
}
// The iterators are now not overlapping, LiveUnionI has been advanced
- // beyond VirtRegI.
- assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap");
+ // beyond LRI.
+ assert(LRI->end <= LiveUnionI.start() && "Expected non-overlap");
// Advance the iterator that ends first.
- VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start());
- if (VirtRegI == VirtRegEnd)
+ LRI = LR->advanceTo(LRI, LiveUnionI.start());
+ if (LRI == LREnd)
break;
// Detect overlap, handle above.
- if (VirtRegI->start < LiveUnionI.stop())
+ if (LRI->start < LiveUnionI.stop())
continue;
// Still not overlapping. Catch up LiveUnionI.
- LiveUnionI.advanceTo(VirtRegI->start);
+ LiveUnionI.advanceTo(LRI->start);
}
SeenAllInterferences = true;
return InterferingVRegs.size();
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index dcc41c1718a62..9f7d7cf548480 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -120,12 +120,11 @@ void LivePhysRegs::print(raw_ostream &OS) const {
OS << "\n";
}
-/// Dumps the currently live registers to the debug output.
-LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
dbgs() << " " << *this;
-#endif
}
+#endif
bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
unsigned Reg) const {
@@ -161,7 +160,9 @@ void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
const MachineFrameInfo &MFI,
const TargetRegisterInfo &TRI) {
- for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
+ ++CSR)
LiveRegs.addReg(*CSR);
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
LiveRegs.removeReg(Info.getReg());
@@ -180,7 +181,8 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
if (MBB.isReturnBlock()) {
// The return block has no successors whose live-ins we could merge
// below. So instead we add the callee saved registers manually.
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *I = MRI.getCalleeSavedRegs(); *I; ++I)
addReg(*I);
} else {
addPristines(*this, MF, MFI, *TRI);
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 0128376086281..398066bf8903e 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -75,34 +75,11 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
LI.createSubRangeFrom(*Alloc, ClassMask, LI);
}
- LaneBitmask Mask = SubMask;
- for (LiveInterval::SubRange &S : LI.subranges()) {
- // A Mask for subregs common to the existing subrange and current def.
- LaneBitmask Common = S.LaneMask & Mask;
- if (Common.none())
- continue;
- LiveInterval::SubRange *CommonRange;
- // A Mask for subregs covered by the subrange but not the current def.
- LaneBitmask RM = S.LaneMask & ~Mask;
- if (RM.any()) {
- // Split the subrange S into two parts: one covered by the current
- // def (CommonRange), and the one not affected by it (updated S).
- S.LaneMask = RM;
- CommonRange = LI.createSubRangeFrom(*Alloc, Common, S);
- } else {
- assert(Common == S.LaneMask);
- CommonRange = &S;
- }
+ LI.refineSubRanges(*Alloc, SubMask,
+ [&MO, this](LiveInterval::SubRange &SR) {
if (MO.isDef())
- createDeadDef(*Indexes, *Alloc, *CommonRange, MO);
- Mask &= ~Common;
- }
- // Create a new SubRange for subregs we did not cover yet.
- if (Mask.any()) {
- LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask);
- if (MO.isDef())
- createDeadDef(*Indexes, *Alloc, *NewRange, MO);
- }
+ createDeadDef(*Indexes, *Alloc, SR, MO);
+ });
}
// Create the def in the main liverange. We do not have to do this if
@@ -289,8 +266,7 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
if (UndefOnEntry[BN])
return false;
- auto MarkDefined =
- [this,BN,&DefOnEntry,&UndefOnEntry] (MachineBasicBlock &B) -> bool {
+ auto MarkDefined = [BN, &DefOnEntry](MachineBasicBlock &B) -> bool {
for (MachineBasicBlock *S : B.successors())
DefOnEntry[S->getNumber()] = true;
DefOnEntry[BN] = true;
@@ -311,7 +287,12 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
return MarkDefined(B);
SlotIndex Begin, End;
std::tie(Begin, End) = Indexes->getMBBRange(&B);
- LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), End);
+ // Treat End as not belonging to B.
+ // If LR has a segment S that starts at the next block, i.e. [End, ...),
+ // std::upper_bound will return the segment following S. Instead,
+ // S should be treated as the first segment that does not overlap B.
+ LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(),
+ End.getPrevSlot());
if (UB != LR.begin()) {
LiveRange::Segment &Seg = *std::prev(UB);
if (Seg.end > Begin) {
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 7f1c69c0b4a2a..92cca1a54951e 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -37,6 +37,8 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
LiveInterval &LI = LIS.createEmptyInterval(VReg);
+ if (Parent && !Parent->isSpillable())
+ LI.markNotSpillable();
// Create empty subranges if the OldReg's interval has them. Do not create
// the main range here---it will be constructed later after the subranges
// have been finalized.
@@ -52,6 +54,14 @@ unsigned LiveRangeEdit::createFrom(unsigned OldReg) {
if (VRM) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
+ // FIXME: Getting the interval here actually computes it.
+ // In theory, this may not be what we want, but in practice
+ // the createEmptyIntervalFrom API is used when this is not
+ // the case. Generally speaking we just want to annotate the
+ // LiveInterval when it gets created but we cannot do that at
+ // the moment.
+ if (Parent && !Parent->isSpillable())
+ LIS.getInterval(VReg).markNotSpillable();
return VReg;
}
@@ -442,9 +452,6 @@ LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg)
if (VRM)
VRM->grow();
- if (Parent && !Parent->isSpillable())
- LIS.getInterval(VReg).markNotSpillable();
-
NewRegs.push_back(VReg);
}
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 7a51386aa9caa..882de1a3fad96 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -1,4 +1,4 @@
-//===-- LiveRegMatrix.cpp - Track register interference -------------------===//
+//===- LiveRegMatrix.cpp - Track register interference --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,15 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/LiveRegMatrix.h"
#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Pass.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
using namespace llvm;
@@ -36,8 +43,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
"Live Register Matrix", false, false)
-LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID),
- UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {}
+LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID) {}
void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -169,10 +175,10 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
return Result;
}
-LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
+LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
unsigned RegUnit) {
LiveIntervalUnion::Query &Q = Queries[RegUnit];
- Q.init(UserTag, &VirtReg, &Matrix[RegUnit]);
+ Q.init(UserTag, LR, Matrix[RegUnit]);
return Q;
}
@@ -190,9 +196,12 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
return IK_RegUnit;
// Check the matrix for virtual register interference.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- if (query(VirtReg, *Units).checkInterference())
- return IK_VirtReg;
+ bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
+ [&](unsigned Unit, const LiveRange &LR) {
+ return query(LR, Unit).checkInterference();
+ });
+ if (Interference)
+ return IK_VirtReg;
return IK_Free;
}
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
new file mode 100644
index 0000000000000..dff555f49565e
--- /dev/null
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -0,0 +1,126 @@
+//===- LiveRegUnits.cpp - Register Unit Set -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file imlements the LiveRegUnits set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) {
+ for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
+ for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg))
+ Units.reset(U);
+ }
+ }
+}
+
+void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) {
+ for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
+ for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg))
+ Units.set(U);
+ }
+ }
+}
+
+void LiveRegUnits::stepBackward(const MachineInstr &MI) {
+ // Remove defined registers and regmask kills from the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ if (!O->isDef())
+ continue;
+ unsigned Reg = O->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ removeReg(Reg);
+ } else if (O->isRegMask())
+ removeRegsNotPreserved(O->getRegMask());
+ }
+
+ // Add uses to the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (!O->isReg() || !O->readsReg())
+ continue;
+ unsigned Reg = O->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ addReg(Reg);
+ }
+}
+
+void LiveRegUnits::accumulateBackward(const MachineInstr &MI) {
+ // Add defs, uses and regmask clobbers to the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ unsigned Reg = O->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (!O->isDef() && !O->readsReg())
+ continue;
+ addReg(Reg);
+ } else if (O->isRegMask())
+ addRegsInMask(O->getRegMask());
+ }
+}
+
+/// Add live-in registers of basic block \p MBB to \p LiveUnits.
+static void addLiveIns(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) {
+ for (const auto &LI : MBB.liveins())
+ LiveUnits.addRegMasked(LI.PhysReg, LI.LaneMask);
+}
+
+static void addLiveOuts(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) {
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addLiveIns(LiveUnits, *Succ);
+}
+
+/// Add pristine registers to the given \p LiveUnits. This function removes
+/// actually saved callee save registers when \p InPrologueEpilogue is false.
+static void removeSavedRegs(LiveRegUnits &LiveUnits, const MachineFunction &MF,
+ const MachineFrameInfo &MFI,
+ const TargetRegisterInfo &TRI) {
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ LiveUnits.removeReg(Info.getReg());
+}
+
+void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid()) {
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ addReg(*I);
+ if (!MBB.isReturnBlock())
+ removeSavedRegs(*this, MF, MFI, *TRI);
+ }
+ ::addLiveOuts(*this, MBB);
+}
+
+void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid()) {
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ addReg(*I);
+ if (&MBB != &MF.front())
+ removeSavedRegs(*this, MF, MFI, *TRI);
+ }
+ ::addLiveIns(*this, MBB);
+}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 269b990a31493..3568b0294ad9a 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -64,8 +64,8 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
return nullptr;
}
-LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
dbgs() << " Alive in blocks: ";
for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
E = AliveBlocks.end(); I != E; ++I)
@@ -78,8 +78,8 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
dbgs() << "\n #" << i << ": " << *Kills[i];
dbgs() << "\n";
}
-#endif
}
+#endif
/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
diff --git a/lib/CodeGen/LowLevelType.cpp b/lib/CodeGen/LowLevelType.cpp
index d74b7306e0f43..c4b9068fa905a 100644
--- a/lib/CodeGen/LowLevelType.cpp
+++ b/lib/CodeGen/LowLevelType.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/LowLevelType.cpp --------------------------===//
+//===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,54 +18,21 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-LLT::LLT(Type &Ty, const DataLayout &DL) {
+LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
if (auto VTy = dyn_cast<VectorType>(&Ty)) {
- SizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
- ElementsOrAddrSpace = VTy->getNumElements();
- Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+ auto NumElements = VTy->getNumElements();
+ auto ScalarSizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
+ if (NumElements == 1)
+ return LLT::scalar(ScalarSizeInBits);
+ return LLT::vector(NumElements, ScalarSizeInBits);
} else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
- Kind = Pointer;
- SizeInBits = DL.getTypeSizeInBits(&Ty);
- ElementsOrAddrSpace = PTy->getAddressSpace();
+ return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty));
} else if (Ty.isSized()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
- Kind = Scalar;
- SizeInBits = DL.getTypeSizeInBits(&Ty);
- ElementsOrAddrSpace = 1;
+ auto SizeInBits = DL.getTypeSizeInBits(&Ty);
assert(SizeInBits != 0 && "invalid zero-sized type");
- } else {
- Kind = Invalid;
- SizeInBits = ElementsOrAddrSpace = 0;
+ return LLT::scalar(SizeInBits);
}
-}
-
-LLT::LLT(MVT VT) {
- if (VT.isVector()) {
- SizeInBits = VT.getVectorElementType().getSizeInBits();
- ElementsOrAddrSpace = VT.getVectorNumElements();
- Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
- } else if (VT.isValid()) {
- // Aggregates are no different from real scalars as far as GlobalISel is
- // concerned.
- Kind = Scalar;
- SizeInBits = VT.getSizeInBits();
- ElementsOrAddrSpace = 1;
- assert(SizeInBits != 0 && "invalid zero-sized type");
- } else {
- Kind = Invalid;
- SizeInBits = ElementsOrAddrSpace = 0;
- }
-}
-
-void LLT::print(raw_ostream &OS) const {
- if (isVector())
- OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">";
- else if (isPointer())
- OS << "p" << getAddressSpace();
- else if (isValid()) {
- assert(isScalar() && "unexpected type");
- OS << "s" << getScalarSizeInBits();
- } else
- llvm_unreachable("trying to print an invalid type");
+ return LLT();
}
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index c8bed0890dd60..cac22af32956e 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -41,8 +41,11 @@
using namespace llvm;
PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
- SourceMgr &SM, const SlotMapping &IRSlots)
- : MF(MF), SM(&SM), IRSlots(IRSlots) {
+ SourceMgr &SM, const SlotMapping &IRSlots,
+ const Name2RegClassMap &Names2RegClasses,
+ const Name2RegBankMap &Names2RegBanks)
+ : MF(MF), SM(&SM), IRSlots(IRSlots), Names2RegClasses(Names2RegClasses),
+ Names2RegBanks(Names2RegBanks) {
}
VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
@@ -139,6 +142,7 @@ public:
bool parseVirtualRegister(VRegInfo *&Info);
bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo);
bool parseRegisterFlag(unsigned &Flags);
+ bool parseRegisterClassOrBank(VRegInfo &RegInfo);
bool parseSubRegisterIndex(unsigned &SubReg);
bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
bool parseRegisterOperand(MachineOperand &Dest,
@@ -172,6 +176,7 @@ public:
bool parseIntrinsicOperand(MachineOperand &Dest);
bool parsePredicateOperand(MachineOperand &Dest);
bool parseTargetIndexOperand(MachineOperand &Dest);
+ bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
bool parseMachineOperand(MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
@@ -184,6 +189,7 @@ public:
bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
bool parseMachinePointerInfo(MachinePointerInfo &Dest);
+ bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
private:
@@ -878,6 +884,66 @@ bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) {
}
}
+bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
+ if (Token.isNot(MIToken::Identifier) && Token.isNot(MIToken::underscore))
+ return error("expected '_', register class, or register bank name");
+ StringRef::iterator Loc = Token.location();
+ StringRef Name = Token.stringValue();
+
+ // Was it a register class?
+ auto RCNameI = PFS.Names2RegClasses.find(Name);
+ if (RCNameI != PFS.Names2RegClasses.end()) {
+ lex();
+ const TargetRegisterClass &RC = *RCNameI->getValue();
+
+ switch (RegInfo.Kind) {
+ case VRegInfo::UNKNOWN:
+ case VRegInfo::NORMAL:
+ RegInfo.Kind = VRegInfo::NORMAL;
+ if (RegInfo.Explicit && RegInfo.D.RC != &RC) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ return error(Loc, Twine("conflicting register classes, previously: ") +
+ Twine(TRI.getRegClassName(RegInfo.D.RC)));
+ }
+ RegInfo.D.RC = &RC;
+ RegInfo.Explicit = true;
+ return false;
+
+ case VRegInfo::GENERIC:
+ case VRegInfo::REGBANK:
+ return error(Loc, "register class specification on generic register");
+ }
+ llvm_unreachable("Unexpected register kind");
+ }
+
+ // Should be a register bank or a generic register.
+ const RegisterBank *RegBank = nullptr;
+ if (Name != "_") {
+ auto RBNameI = PFS.Names2RegBanks.find(Name);
+ if (RBNameI == PFS.Names2RegBanks.end())
+ return error(Loc, "expected '_', register class, or register bank name");
+ RegBank = RBNameI->getValue();
+ }
+
+ lex();
+
+ switch (RegInfo.Kind) {
+ case VRegInfo::UNKNOWN:
+ case VRegInfo::GENERIC:
+ case VRegInfo::REGBANK:
+ RegInfo.Kind = RegBank ? VRegInfo::REGBANK : VRegInfo::GENERIC;
+ if (RegInfo.Explicit && RegInfo.D.RegBank != RegBank)
+ return error(Loc, "conflicting generic register banks");
+ RegInfo.D.RegBank = RegBank;
+ RegInfo.Explicit = true;
+ return false;
+
+ case VRegInfo::NORMAL:
+ return error(Loc, "register bank specification on normal register");
+ }
+ llvm_unreachable("Unexpected register kind");
+}
+
bool MIParser::parseRegisterFlag(unsigned &Flags) {
const unsigned OldFlags = Flags;
switch (Token.kind()) {
@@ -1004,6 +1070,13 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return error("subregister index expects a virtual register");
}
+ if (Token.is(MIToken::colon)) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return error("register class specification expects a virtual register");
+ lex();
+ if (parseRegisterClassOrBank(*RegInfo))
+ return true;
+ }
MachineRegisterInfo &MRI = MF.getRegInfo();
if ((Flags & RegState::Define) == 0) {
if (consumeIfPresent(MIToken::lparen)) {
@@ -1598,6 +1671,35 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {
+ assert(Token.stringValue() == "CustomRegMask" && "Expected a custom RegMask");
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+
+ uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ while (true) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ unsigned Reg;
+ if (parseNamedRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ // TODO: Report an error if the same register is used more than once.
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ }
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateRegMask(Mask);
+ return false;
+}
+
bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::kw_liveout));
const auto *TRI = MF.getSubtarget().getRegisterInfo();
@@ -1695,8 +1797,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
Dest = MachineOperand::CreateRegMask(RegMask);
lex();
break;
- }
- LLVM_FALLTHROUGH;
+ } else
+ return parseCustomRegisterMaskOperand(Dest);
default:
// FIXME: Parse the MCSymbol machine operand.
return error("expected a machine operand");
@@ -1969,6 +2071,28 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
return false;
}
+bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) {
+ Order = AtomicOrdering::NotAtomic;
+ if (Token.isNot(MIToken::Identifier))
+ return false;
+
+ Order = StringSwitch<AtomicOrdering>(Token.stringValue())
+ .Case("unordered", AtomicOrdering::Unordered)
+ .Case("monotonic", AtomicOrdering::Monotonic)
+ .Case("acquire", AtomicOrdering::Acquire)
+ .Case("release", AtomicOrdering::Release)
+ .Case("acq_rel", AtomicOrdering::AcquireRelease)
+ .Case("seq_cst", AtomicOrdering::SequentiallyConsistent)
+ .Default(AtomicOrdering::NotAtomic);
+
+ if (Order != AtomicOrdering::NotAtomic) {
+ lex();
+ return false;
+ }
+
+ return error("expected an atomic scope, ordering or a size integer literal");
+}
+
bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (expectAndConsume(MIToken::lparen))
return true;
@@ -1986,6 +2110,21 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
Flags |= MachineMemOperand::MOStore;
lex();
+ // Optional "singlethread" scope.
+ SynchronizationScope Scope = SynchronizationScope::CrossThread;
+ if (Token.is(MIToken::Identifier) && Token.stringValue() == "singlethread") {
+ Scope = SynchronizationScope::SingleThread;
+ lex();
+ }
+
+ // Up to two atomic orderings (cmpxchg provides guarantees on failure).
+ AtomicOrdering Order, FailureOrder;
+ if (parseOptionalAtomicOrdering(Order))
+ return true;
+
+ if (parseOptionalAtomicOrdering(FailureOrder))
+ return true;
+
if (Token.isNot(MIToken::IntegerLiteral))
return error("expected the size integer literal after memory operation");
uint64_t Size;
@@ -2040,8 +2179,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
}
if (expectAndConsume(MIToken::rparen))
return true;
- Dest =
- MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range);
+ Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range,
+ Scope, Order, FailureOrder);
return false;
}
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
index 93a4d84ba62f6..9b3879cf83772 100644
--- a/lib/CodeGen/MIRParser/MIParser.h
+++ b/lib/CodeGen/MIRParser/MIParser.h
@@ -45,11 +45,16 @@ struct VRegInfo {
unsigned PreferredReg = 0;
};
+typedef StringMap<const TargetRegisterClass*> Name2RegClassMap;
+typedef StringMap<const RegisterBank*> Name2RegBankMap;
+
struct PerFunctionMIParsingState {
BumpPtrAllocator Allocator;
MachineFunction &MF;
SourceMgr *SM;
const SlotMapping &IRSlots;
+ const Name2RegClassMap &Names2RegClasses;
+ const Name2RegBankMap &Names2RegBanks;
DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
DenseMap<unsigned, VRegInfo*> VRegInfos;
@@ -59,7 +64,9 @@ struct PerFunctionMIParsingState {
DenseMap<unsigned, unsigned> JumpTableSlots;
PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
- const SlotMapping &IRSlots);
+ const SlotMapping &IRSlots,
+ const Name2RegClassMap &Names2RegClasses,
+ const Name2RegBankMap &Names2RegBanks);
VRegInfo &getVRegInfo(unsigned VReg);
};
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 3dff1147631b3..a2773cccc5dbd 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -55,9 +55,9 @@ class MIRParserImpl {
StringMap<std::unique_ptr<yaml::MachineFunction>> Functions;
SlotMapping IRSlots;
/// Maps from register class names to register classes.
- StringMap<const TargetRegisterClass *> Names2RegClasses;
+ Name2RegClassMap Names2RegClasses;
/// Maps from register bank names to register banks.
- StringMap<const RegisterBank *> Names2RegBanks;
+ Name2RegBankMap Names2RegBanks;
public:
MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
@@ -325,11 +325,15 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
return error(Twine("no machine function information for function '") +
MF.getName() + "' in the MIR file");
// TODO: Recreate the machine function.
+ initNames2RegClasses(MF);
+ initNames2RegBanks(MF);
const yaml::MachineFunction &YamlMF = *It->getValue();
if (YamlMF.Alignment)
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
+ if (YamlMF.NoVRegs)
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
if (YamlMF.Legalized)
MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
if (YamlMF.RegBankSelected)
@@ -338,7 +342,8 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
if (YamlMF.Selected)
MF.getProperties().set(MachineFunctionProperties::Property::Selected);
- PerFunctionMIParsingState PFS(MF, SM, IRSlots);
+ PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses,
+ Names2RegBanks);
if (parseRegisterInfo(PFS, YamlMF))
return true;
if (!YamlMF.Constants.empty()) {
@@ -362,9 +367,6 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
}
PFS.SM = &SM;
- if (MF.empty())
- return error(Twine("machine function '") + Twine(MF.getName()) +
- "' requires at least one machine basic block in its body");
// Initialize the frame information after creating all the MBBs so that the
// MBB references in the frame information can be resolved.
if (initializeFrameInfo(PFS, YamlMF))
@@ -462,17 +464,19 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
RegInfo.addLiveIn(Reg, VReg);
}
- // Parse the callee saved register mask.
- BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size());
- if (!YamlMF.CalleeSavedRegisters)
- return false;
- for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
- unsigned Reg = 0;
- if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
- return error(Error, RegSource.SourceRange);
- CalleeSavedRegisterMask[Reg] = true;
+ // Parse the callee saved registers (Registers that will
+ // be saved for the caller).
+ if (YamlMF.CalleeSavedRegisters) {
+ SmallVector<MCPhysReg, 16> CalleeSavedRegisters;
+ for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
+ return error(Error, RegSource.SourceRange);
+ CalleeSavedRegisters.push_back(Reg);
+ }
+ RegInfo.setCalleeSavedRegs(CalleeSavedRegisters);
}
- RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip());
+
return false;
}
@@ -505,14 +509,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
}
// Compute MachineRegisterInfo::UsedPhysRegMask
- if (!YamlMF.CalleeSavedRegisters) {
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isRegMask())
- continue;
- MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
- }
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
}
}
}
@@ -818,7 +820,6 @@ void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) {
const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
StringRef Name) {
- initNames2RegClasses(MF);
auto RegClassInfo = Names2RegClasses.find(Name);
if (RegClassInfo == Names2RegClasses.end())
return nullptr;
@@ -827,7 +828,6 @@ const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF,
StringRef Name) {
- initNames2RegBanks(MF);
auto RegBankInfo = Names2RegBanks.find(Name);
if (RegBankInfo == Names2RegBanks.end())
return nullptr;
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index db87092177cad..6da174a536666 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -175,6 +175,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.Alignment = MF.getAlignment();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
+ YamlMF.NoVRegs = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs);
YamlMF.Legalized = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Legalized);
YamlMF.RegBankSelected = MF.getProperties().hasProperty(
@@ -205,6 +207,25 @@ void MIRPrinter::print(const MachineFunction &MF) {
Out << YamlMF;
}
+static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ assert(RegMask && "Can't print an empty register mask");
+ OS << StringRef("CustomRegMask(");
+
+ bool IsRegInRegMaskFound = false;
+ for (int I = 0, E = TRI->getNumRegs(); I < E; I++) {
+ // Check whether the register is asserted in regmask.
+ if (RegMask[I / 32] & (1u << (I % 32))) {
+ if (IsRegInRegMaskFound)
+ OS << ',';
+ printReg(I, OS, TRI);
+ IsRegInRegMaskFound = true;
+ }
+ }
+
+ OS << ')';
+}
+
void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineRegisterInfo &RegInfo,
const TargetRegisterInfo *TRI) {
@@ -239,20 +260,18 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
printReg(I->second, LiveIn.VirtualRegister, TRI);
MF.LiveIns.push_back(LiveIn);
}
- // The used physical register mask is printed as an inverted callee saved
- // register mask.
- const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask();
- if (UsedPhysRegMask.none())
- return;
- std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
- for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) {
- if (!UsedPhysRegMask[I]) {
+
+ // Prints the callee saved registers.
+ if (RegInfo.isUpdatedCSRsInitialized()) {
+ const MCPhysReg *CalleeSavedRegs = RegInfo.getCalleeSavedRegs();
+ std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
+ for (const MCPhysReg *I = CalleeSavedRegs; *I; ++I) {
yaml::FlowStringValue Reg;
- printReg(I, Reg, TRI);
+ printReg(*I, Reg, TRI);
CalleeSavedRegisters.push_back(Reg);
}
+ MF.CalleeSavedRegisters = CalleeSavedRegisters;
}
- MF.CalleeSavedRegisters = CalleeSavedRegisters;
}
void MIRPrinter::convert(ModuleSlotTracker &MST,
@@ -860,7 +879,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
if (RegMaskInfo != RegisterMaskIds.end())
OS << StringRef(TRI->getRegMaskNames()[RegMaskInfo->second]).lower();
else
- llvm_unreachable("Can't print this machine register mask yet.");
+ printCustomRegMask(Op.getRegMask(), OS, TRI);
break;
}
case MachineOperand::MO_RegisterLiveOut: {
@@ -906,6 +925,9 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
<< CmpInst::getPredicateName(Pred) << ')';
break;
}
+ case MachineOperand::MO_Placeholder:
+ OS << "<placeholder>";
+ break;
}
}
@@ -926,6 +948,15 @@ void MIPrinter::print(const MachineMemOperand &Op) {
assert(Op.isStore() && "Non load machine operand must be a store");
OS << "store ";
}
+
+ if (Op.getSynchScope() == SynchronizationScope::SingleThread)
+ OS << "singlethread ";
+
+ if (Op.getOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(Op.getOrdering()) << ' ';
+ if (Op.getFailureOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(Op.getFailureOrdering()) << ' ';
+
OS << Op.getSize();
if (const Value *Val = Op.getValue()) {
OS << (Op.isLoad() ? " from " : " into ");
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 3869f976854d0..06112723497b0 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -148,8 +149,11 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+
iterator E = end();
- while (I != E && (I->isPHI() || I->isPosition()))
+ while (I != E && (I->isPHI() || I->isPosition() ||
+ TII->isBasicBlockPrologue(*I)))
++I;
// FIXME: This needs to change if we wish to bundle labels
// inside the bundle.
@@ -160,8 +164,11 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+
iterator E = end();
- while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue()))
+ while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() ||
+ TII->isBasicBlockPrologue(*I)))
++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values
// inside the bundle.
@@ -225,7 +232,7 @@ StringRef MachineBasicBlock::getName() const {
if (const BasicBlock *LBB = getBasicBlock())
return LBB->getName();
else
- return "(null)";
+ return StringRef("", 0);
}
/// Return a hopefully unique identifier for this block.
@@ -417,7 +424,7 @@ void MachineBasicBlock::updateTerminator() {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- DebugLoc DL; // FIXME: this is nowhere
+ DebugLoc DL = findBranchDebugLoc();
bool B = TII->analyzeBranch(*this, TBB, FBB, Cond);
(void) B;
assert(!B && "UpdateTerminators requires analyzable predecessors!");
@@ -485,7 +492,7 @@ void MachineBasicBlock::updateTerminator() {
// FIXME: This does not seem like a reasonable pattern to support, but it
// has been seen in the wild coming out of degenerate ARM test cases.
TII->removeBranch(*this);
-
+
// Finally update the unconditional successor to be reached via a branch if
// it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
@@ -681,16 +688,16 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
return std::next(I) == MachineFunction::const_iterator(MBB);
}
-bool MachineBasicBlock::canFallThrough() {
+MachineBasicBlock *MachineBasicBlock::getFallThrough() {
MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
// If FallthroughBlock is off the end of the function, it can't fall through.
if (Fallthrough == getParent()->end())
- return false;
+ return nullptr;
// If FallthroughBlock isn't a successor, no fallthrough is possible.
if (!isSuccessor(&*Fallthrough))
- return false;
+ return nullptr;
// Analyze the branches, if any, at the end of the block.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -702,25 +709,31 @@ bool MachineBasicBlock::canFallThrough() {
// is possible. The isPredicated check is needed because this code can be
// called during IfConversion, where an instruction which is normally a
// Barrier is predicated and thus no longer an actual control barrier.
- return empty() || !back().isBarrier() || TII->isPredicated(back());
+ return (empty() || !back().isBarrier() || TII->isPredicated(back()))
+ ? &*Fallthrough
+ : nullptr;
}
// If there is no branch, control always falls through.
- if (!TBB) return true;
+ if (!TBB) return &*Fallthrough;
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
if (MachineFunction::iterator(TBB) == Fallthrough ||
MachineFunction::iterator(FBB) == Fallthrough)
- return true;
+ return &*Fallthrough;
// If it's an unconditional branch to some block not the fall through, it
// doesn't fall through.
- if (Cond.empty()) return false;
+ if (Cond.empty()) return nullptr;
// Otherwise, if it is conditional and has no explicit false block, it falls
// through.
- return FBB == nullptr;
+ return (FBB == nullptr) ? &*Fallthrough : nullptr;
+}
+
+bool MachineBasicBlock::canFallThrough() {
+ return getFallThrough() != nullptr;
}
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
@@ -1144,6 +1157,24 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
return {};
}
+/// Find and return the merged DebugLoc of the branch instructions of the block.
+/// Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findBranchDebugLoc() {
+ DebugLoc DL;
+ auto TI = getFirstTerminator();
+ while (TI != end() && !TI->isBranch())
+ ++TI;
+
+ if (TI != end()) {
+ DL = TI->getDebugLoc();
+ for (++TI ; TI != end() ; ++TI)
+ if (TI->isBranch())
+ DL = DILocation::getMergedLocation(DL, TI->getDebugLoc());
+ }
+ return DL;
+}
+
/// Return probability of the edge from this block to MBB.
BranchProbability
MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const {
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 7d5124d30a04e..9c7367b4c7802 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -28,7 +28,6 @@ using namespace llvm;
#define DEBUG_TYPE "block-freq"
-#ifndef NDEBUG
static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"view-machine-block-freq-propagation-dags", cl::Hidden,
@@ -43,10 +42,37 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"integer fractional block frequency representation."),
clEnumValN(GVDT_Count, "count", "display a graph using the real "
"profile count if available.")));
+// Similar option above, but used to control BFI display only after MBP pass
+cl::opt<GVDAGType> ViewBlockLayoutWithBFI(
+ "view-block-layout-with-bfi", cl::Hidden,
+ cl::desc(
+ "Pop up a window to show a dag displaying MBP layout and associated "
+ "block frequencies of the CFG."),
+ cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction",
+ "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer",
+ "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValN(GVDT_Count, "count",
+ "display a graph using the real "
+ "profile count if available.")));
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+// Command line option to specify hot frequency threshold.
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc=
extern cl::opt<unsigned> ViewHotFreqPercent;
+static GVDAGType getGVDT() {
+ if (ViewBlockLayoutWithBFI != GVDT_None)
+ return ViewBlockLayoutWithBFI;
+
+ return ViewMachineBlockFreqPropagationDAG;
+}
+
namespace llvm {
template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
@@ -80,12 +106,32 @@ template <>
struct DOTGraphTraits<MachineBlockFrequencyInfo *>
: public MBFIDOTGraphTraitsBase {
explicit DOTGraphTraits(bool isSimple = false)
- : MBFIDOTGraphTraitsBase(isSimple) {}
+ : MBFIDOTGraphTraitsBase(isSimple), CurFunc(nullptr), LayoutOrderMap() {}
+
+ const MachineFunction *CurFunc;
+ DenseMap<const MachineBasicBlock *, int> LayoutOrderMap;
std::string getNodeLabel(const MachineBasicBlock *Node,
const MachineBlockFrequencyInfo *Graph) {
- return MBFIDOTGraphTraitsBase::getNodeLabel(
- Node, Graph, ViewMachineBlockFreqPropagationDAG);
+
+ int layout_order = -1;
+ // Attach additional ordering information if 'isSimple' is false.
+ if (!isSimple()) {
+ const MachineFunction *F = Node->getParent();
+ if (!CurFunc || F != CurFunc) {
+ if (CurFunc)
+ LayoutOrderMap.clear();
+
+ CurFunc = F;
+ int O = 0;
+ for (auto MBI = F->begin(); MBI != F->end(); ++MBI, ++O) {
+ LayoutOrderMap[&*MBI] = O;
+ }
+ }
+ layout_order = LayoutOrderMap[Node];
+ }
+ return MBFIDOTGraphTraitsBase::getNodeLabel(Node, Graph, getGVDT(),
+ layout_order);
}
std::string getNodeAttributes(const MachineBasicBlock *Node,
@@ -102,7 +148,6 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *>
};
} // end namespace llvm
-#endif
INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
"Machine Block Frequency Analysis", true, true)
@@ -127,20 +172,24 @@ void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
- MachineBranchProbabilityInfo &MBPI =
- getAnalysis<MachineBranchProbabilityInfo>();
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+void MachineBlockFrequencyInfo::calculate(
+ const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI,
+ const MachineLoopInfo &MLI) {
if (!MBFI)
MBFI.reset(new ImplType);
MBFI->calculate(F, MBPI, MLI);
-#ifndef NDEBUG
if (ViewMachineBlockFreqPropagationDAG != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
F.getName().equals(ViewBlockFreqFuncName))) {
- view();
+ view("MachineBlockFrequencyDAGS." + F.getName());
}
-#endif
+}
+
+bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
+ MachineBranchProbabilityInfo &MBPI =
+ getAnalysis<MachineBranchProbabilityInfo>();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ calculate(F, MBPI, MLI);
return false;
}
@@ -148,15 +197,9 @@ void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); }
/// Pop up a ghostview window with the current block frequency propagation
/// rendered using dot.
-void MachineBlockFrequencyInfo::view() const {
-// This code is only for debugging.
-#ifndef NDEBUG
- ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this),
- "MachineBlockFrequencyDAGs");
-#else
- errs() << "MachineBlockFrequencyInfo::view is only available in debug builds "
- "on systems with Graphviz or gv!\n";
-#endif // NDEBUG
+void MachineBlockFrequencyInfo::view(const Twine &Name, bool isSimple) const {
+ // This code is only for debugging.
+ ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this), Name, isSimple);
}
BlockFrequency
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 40e3840e6b0b3..4cfc128a8c1d6 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -32,14 +32,15 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
@@ -49,6 +50,8 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <functional>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "block-placement"
@@ -82,19 +85,6 @@ static cl::opt<unsigned> ExitBlockBias(
// Definition:
// - Outlining: placement of a basic block outside the chain or hot path.
-static cl::opt<bool> OutlineOptionalBranches(
- "outline-optional-branches",
- cl::desc("Outlining optional branches will place blocks that are optional "
- "branches, i.e. branches with a common post dominator, outside "
- "the hot path or chain"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<unsigned> OutlineOptionalThreshold(
- "outline-optional-threshold",
- cl::desc("Don't outline optional branches that are a single block with an "
- "instruction count below this threshold"),
- cl::init(4), cl::Hidden);
-
static cl::opt<unsigned> LoopToColdBlockRatio(
"loop-to-cold-block-ratio",
cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
@@ -136,20 +126,47 @@ BranchFoldPlacement("branch-fold-placement",
cl::init(true), cl::Hidden);
// Heuristic for tail duplication.
-static cl::opt<unsigned> TailDuplicatePlacementThreshold(
+static cl::opt<unsigned> TailDupPlacementThreshold(
"tail-dup-placement-threshold",
cl::desc("Instruction cutoff for tail duplication during layout. "
"Tail merging during layout is forced to have a threshold "
"that won't conflict."), cl::init(2),
cl::Hidden);
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDupPlacementPenalty(
+ "tail-dup-placement-penalty",
+ cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. "
+ "Copying can increase fallthrough, but it also increases icache "
+ "pressure. This parameter controls the penalty to account for that. "
+ "Percent as integer."),
+ cl::init(2),
+ cl::Hidden);
+
+// Heuristic for triangle chains.
+static cl::opt<unsigned> TriangleChainCount(
+ "triangle-chain-count",
+ cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "
+ "triangle tail duplication heuristic to kick in. 0 to disable."),
+ cl::init(2),
+ cl::Hidden);
+
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi=
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
-typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+typedef DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChainMapType;
}
namespace {
@@ -193,12 +210,15 @@ public:
/// \brief Iterator over blocks within the chain.
typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
+ typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator const_iterator;
/// \brief Beginning of blocks within the chain.
iterator begin() { return Blocks.begin(); }
+ const_iterator begin() const { return Blocks.begin(); }
/// \brief End of blocks within the chain.
iterator end() { return Blocks.end(); }
+ const_iterator end() const { return Blocks.end(); }
bool remove(MachineBasicBlock* BB) {
for(iterator i = begin(); i != end(); ++i) {
@@ -264,12 +284,28 @@ public:
namespace {
class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A typedef for a block filter set.
- typedef SmallSetVector<MachineBasicBlock *, 16> BlockFilterSet;
+ typedef SmallSetVector<const MachineBasicBlock *, 16> BlockFilterSet;
+
+ /// Pair struct containing basic block and taildup profitiability
+ struct BlockAndTailDupResult {
+ MachineBasicBlock *BB;
+ bool ShouldTailDup;
+ };
+
+ /// Triple struct containing edge weight and the edge.
+ struct WeightedEdge {
+ BlockFrequency Weight;
+ MachineBasicBlock *Src;
+ MachineBasicBlock *Dest;
+ };
/// \brief work lists of blocks that are ready to be laid out
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
+ /// Edges that have already been computed as optimal.
+ DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
+
/// \brief Machine Function
MachineFunction *F;
@@ -294,7 +330,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
const TargetLoweringBase *TLI;
/// \brief A handle to the post dominator tree.
- MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
/// \brief Duplicator used to duplicate tails during placement.
///
@@ -303,10 +339,6 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// must be done inline.
TailDuplicator TailDup;
- /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
- /// all terminators of the MachineFunction.
- SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
-
/// \brief Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -322,7 +354,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// BlockChain it participates in, if any. We use it to, among other things,
/// allow implicitly defining edges between chains as the existing edges
/// between basic blocks.
- DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+ DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
#ifndef NDEBUG
/// The set of basic blocks that have terminators that cannot be fully
@@ -334,75 +366,107 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Decrease the UnscheduledPredecessors count for all blocks in chain, and
/// if the count goes to 0, add them to the appropriate work list.
- void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
- const BlockFilterSet *BlockFilter = nullptr);
+ void markChainSuccessors(
+ const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter = nullptr);
/// Decrease the UnscheduledPredecessors count for a single block, and
/// if the count goes to 0, add them to the appropriate work list.
void markBlockSuccessors(
- BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
+ const BlockChain &Chain, const MachineBasicBlock *BB,
+ const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter = nullptr);
-
BranchProbability
- collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter,
- SmallVector<MachineBasicBlock *, 4> &Successors);
- bool shouldPredBlockBeOutlined(MachineBasicBlock *BB, MachineBasicBlock *Succ,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter,
- BranchProbability SuccProb,
- BranchProbability HotProb);
+ collectViableSuccessors(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors);
+ bool shouldPredBlockBeOutlined(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter,
+ BranchProbability SuccProb, BranchProbability HotProb);
bool repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- MachineBasicBlock *LoopHeaderBB,
+ const MachineBasicBlock *LoopHeaderBB,
BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt);
- bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
- const BlockChain &Chain,
- BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- bool &DuplicatedToPred);
- bool
- hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
- BlockChain &SuccChain, BranchProbability SuccProb,
- BranchProbability RealSuccProb, BlockChain &Chain,
- const BlockFilterSet *BlockFilter);
- MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter);
- MachineBasicBlock *
- selectBestCandidateBlock(BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &WorkList);
- MachineBasicBlock *
- getFirstUnplacedBlock(const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- const BlockFilterSet *BlockFilter);
+ bool maybeTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ bool &DuplicatedToPred);
+ bool hasBetterLayoutPredecessor(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ BlockAndTailDupResult selectBestSuccessor(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestCandidateBlock(
+ const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
/// \brief Add a basic block to the work list if it is appropriate.
///
/// If the optional parameter BlockFilter is provided, only MBB
/// present in the set will be added to the worklist. If nullptr
/// is provided, no filtering occurs.
- void fillWorkLists(MachineBasicBlock *MBB,
+ void fillWorkLists(const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter);
- void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+ void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
BlockFilterSet *BlockFilter = nullptr);
- MachineBasicBlock *findBestLoopTop(MachineLoop &L,
- const BlockFilterSet &LoopBlockSet);
- MachineBasicBlock *findBestLoopExit(MachineLoop &L,
- const BlockFilterSet &LoopBlockSet);
- BlockFilterSet collectLoopBlockSet(MachineLoop &L);
- void buildLoopChains(MachineLoop &L);
- void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
- const BlockFilterSet &LoopBlockSet);
- void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
- const BlockFilterSet &LoopBlockSet);
- void collectMustExecuteBBs();
+ MachineBasicBlock *findBestLoopTop(
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopExit(
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
+ void buildLoopChains(const MachineLoop &L);
+ void rotateLoop(
+ BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet);
+ void rotateLoopWithProfile(
+ BlockChain &LoopChain, const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
void buildCFGChains();
void optimizeBranches();
void alignBlocks();
+ /// Returns true if a block should be tail-duplicated to increase fallthrough
+ /// opportunities.
+ bool shouldTailDuplicate(MachineBasicBlock *BB);
+ /// Check the edge frequencies to see if tail duplication will increase
+ /// fallthroughs.
+ bool isProfitableToTailDup(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ BranchProbability AdjustedSumProb,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Check for a trellis layout.
+ bool isTrellis(const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Get the best successor given a trellis layout.
+ BlockAndTailDupResult getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ /// Get the best pair of non-conflicting edges.
+ static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges);
+ /// Returns true if a block can tail duplicate into all unplaced
+ /// predecessors. Filters based on loop.
+ bool canTailDuplicateUnplacedPreds(
+ const MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Find chains of triangles to tail-duplicate where a global analysis works,
+ /// but a local analysis would not find them.
+ void precomputeTriangleChains();
public:
static char ID; // Pass identification, replacement for typeid
@@ -415,7 +479,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineDominatorTree>();
+ if (TailDupPlacement)
+ AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -429,7 +494,7 @@ INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
@@ -438,7 +503,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
/// \brief Helper to print the name of a MBB.
///
/// Only used by debug logging.
-static std::string getBlockName(MachineBasicBlock *BB) {
+static std::string getBlockName(const MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
OS << "BB#" << BB->getNumber();
@@ -455,7 +520,7 @@ static std::string getBlockName(MachineBasicBlock *BB) {
/// having one fewer active predecessor. It also adds any successors of this
/// chain which reach the zero-predecessor state to the appropriate worklist.
void MachineBlockPlacement::markChainSuccessors(
- BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
+ const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
@@ -471,8 +536,8 @@ void MachineBlockPlacement::markChainSuccessors(
/// and was duplicated into the chain end, we need to redo markBlockSuccessors
/// for just that block.
void MachineBlockPlacement::markBlockSuccessors(
- BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
- const BlockFilterSet *BlockFilter) {
+ const BlockChain &Chain, const MachineBasicBlock *MBB,
+ const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {
// Add any successors for which this is the only un-placed in-loop
// predecessor to the worklist as a viable candidate for CFG-neutral
// placement. No subsequent placement of this block will violate the CFG
@@ -504,7 +569,8 @@ void MachineBlockPlacement::markBlockSuccessors(
/// the total branch probability of edges from \p BB to those
/// blocks.
BranchProbability MachineBlockPlacement::collectViableSuccessors(
- MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter,
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
SmallVector<MachineBasicBlock *, 4> &Successors) {
// Adjust edge probabilities by excluding edges pointing to blocks that is
// either not in BlockFilter or is already in the current chain. Consider the
@@ -561,46 +627,573 @@ getAdjustedProbability(BranchProbability OrigProb,
return SuccProb;
}
-/// When the option OutlineOptionalBranches is on, this method
-/// checks if the fallthrough candidate block \p Succ (of block
-/// \p BB) also has other unscheduled predecessor blocks which
-/// are also successors of \p BB (forming triangular shape CFG).
-/// If none of such predecessors are small, it returns true.
-/// The caller can choose to select \p Succ as the layout successors
-/// so that \p Succ's predecessors (optional branches) can be
-/// outlined.
-/// FIXME: fold this with more general layout cost analysis.
-bool MachineBlockPlacement::shouldPredBlockBeOutlined(
- MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
- const BlockFilterSet *BlockFilter, BranchProbability SuccProb,
- BranchProbability HotProb) {
- if (!OutlineOptionalBranches)
+/// Check if \p BB has exactly the successors in \p Successors.
+static bool
+hasSameSuccessors(MachineBasicBlock &BB,
+ SmallPtrSetImpl<const MachineBasicBlock *> &Successors) {
+ if (BB.succ_size() != Successors.size())
return false;
- // If we outline optional branches, look whether Succ is unavoidable, i.e.
- // dominates all terminators of the MachineFunction. If it does, other
- // successors must be optional. Don't do this for cold branches.
- if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {
- for (MachineBasicBlock *Pred : Succ->predecessors()) {
- // Check whether there is an unplaced optional branch.
- if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
+ // We don't want to count self-loops
+ if (Successors.count(&BB))
+ return false;
+ for (MachineBasicBlock *Succ : BB.successors())
+ if (!Successors.count(Succ))
+ return false;
+ return true;
+}
+
+/// Check if a block should be tail duplicated to increase fallthrough
+/// opportunities.
+/// \p BB Block to check.
+bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
+ // Blocks with single successors don't create additional fallthrough
+ // opportunities. Don't duplicate them. TODO: When conditional exits are
+ // analyzable, allow them to be duplicated.
+ bool IsSimple = TailDup.isSimpleBB(BB);
+
+ if (BB->succ_size() == 1)
+ return false;
+ return TailDup.shouldTailDuplicate(IsSimple, *BB);
+}
+
+/// Compare 2 BlockFrequency's with a small penalty for \p A.
+/// In order to be conservative, we apply a X% penalty to account for
+/// increased icache pressure and static heuristics. For small frequencies
+/// we use only the numerators to improve accuracy. For simplicity, we assume the
+/// penalty is less than 100%
+/// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere.
+static bool greaterWithBias(BlockFrequency A, BlockFrequency B,
+ uint64_t EntryFreq) {
+ BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
+ BlockFrequency Gain = A - B;
+ return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
+}
+
+/// Check the edge frequencies to see if tail duplication will increase
+/// fallthroughs. It only makes sense to call this function when
+/// \p Succ would not be chosen otherwise. Tail duplication of \p Succ is
+/// always locally profitable if we would have picked \p Succ without
+/// considering duplication.
+bool MachineBlockPlacement::isProfitableToTailDup(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ BranchProbability QProb,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ // We need to do a probability calculation to make sure this is profitable.
+ // First: does succ have a successor that post-dominates? This affects the
+ // calculation. The 2 relevant cases are:
+ // BB BB
+ // | \Qout | \Qout
+ // P| C |P C
+ // = C' = C'
+ // | /Qin | /Qin
+ // | / | /
+ // Succ Succ
+ // / \ | \ V
+ // U/ =V |U \
+ // / \ = D
+ // D E | /
+ // | /
+ // |/
+ // PDom
+ // '=' : Branch taken for that CFG edge
+ // In the second case, Placing Succ while duplicating it into C prevents the
+ // fallthrough of Succ into either D or PDom, because they now have C as an
+ // unplaced predecessor
+
+ // Start by figuring out which case we fall into
+ MachineBasicBlock *PDom = nullptr;
+ SmallVector<MachineBasicBlock *, 4> SuccSuccs;
+ // Only scan the relevant successors
+ auto AdjustedSuccSumProb =
+ collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
+ BranchProbability PProb = MBPI->getEdgeProbability(BB, Succ);
+ auto BBFreq = MBFI->getBlockFreq(BB);
+ auto SuccFreq = MBFI->getBlockFreq(Succ);
+ BlockFrequency P = BBFreq * PProb;
+ BlockFrequency Qout = BBFreq * QProb;
+ uint64_t EntryFreq = MBFI->getEntryFreq();
+ // If there are no more successors, it is profitable to copy, as it strictly
+ // increases fallthrough.
+ if (SuccSuccs.size() == 0)
+ return greaterWithBias(P, Qout, EntryFreq);
+
+ auto BestSuccSucc = BranchProbability::getZero();
+ // Find the PDom or the best Succ if no PDom exists.
+ for (MachineBasicBlock *SuccSucc : SuccSuccs) {
+ auto Prob = MBPI->getEdgeProbability(Succ, SuccSucc);
+ if (Prob > BestSuccSucc)
+ BestSuccSucc = Prob;
+ if (PDom == nullptr)
+ if (MPDT->dominates(SuccSucc, Succ)) {
+ PDom = SuccSucc;
+ break;
+ }
+ }
+ // For the comparisons, we need to know Succ's best incoming edge that isn't
+ // from BB.
+ auto SuccBestPred = BlockFrequency(0);
+ for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
+ if (SuccPred == Succ || SuccPred == BB
+ || BlockToChain[SuccPred] == &Chain
+ || (BlockFilter && !BlockFilter->count(SuccPred)))
+ continue;
+ auto Freq = MBFI->getBlockFreq(SuccPred)
+ * MBPI->getEdgeProbability(SuccPred, Succ);
+ if (Freq > SuccBestPred)
+ SuccBestPred = Freq;
+ }
+ // Qin is Succ's best unplaced incoming edge that isn't BB
+ BlockFrequency Qin = SuccBestPred;
+ // If it doesn't have a post-dominating successor, here is the calculation:
+ // BB BB
+ // | \Qout | \
+ // P| C | =
+ // = C' | C
+ // | /Qin | |
+ // | / | C' (+Succ)
+ // Succ Succ /|
+ // / \ | \/ |
+ // U/ =V | == |
+ // / \ | / \|
+ // D E D E
+ // '=' : Branch taken for that CFG edge
+ // Cost in the first case is: P + V
+ // For this calculation, we always assume P > Qout. If Qout > P
+ // The result of this function will be ignored at the caller.
+ // Let F = SuccFreq - Qin
+ // Cost in the second case is: Qout + min(Qin, F) * U + max(Qin, F) * V
+
+ if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
+ BranchProbability UProb = BestSuccSucc;
+ BranchProbability VProb = AdjustedSuccSumProb - UProb;
+ BlockFrequency F = SuccFreq - Qin;
+ BlockFrequency V = SuccFreq * VProb;
+ BlockFrequency QinU = std::min(Qin, F) * UProb;
+ BlockFrequency BaseCost = P + V;
+ BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;
+ return greaterWithBias(BaseCost, DupCost, EntryFreq);
+ }
+ BranchProbability UProb = MBPI->getEdgeProbability(Succ, PDom);
+ BranchProbability VProb = AdjustedSuccSumProb - UProb;
+ BlockFrequency U = SuccFreq * UProb;
+ BlockFrequency V = SuccFreq * VProb;
+ BlockFrequency F = SuccFreq - Qin;
+ // If there is a post-dominating successor, here is the calculation:
+ // BB BB BB BB
+ // | \Qout | \ | \Qout | \
+ // |P C | = |P C | =
+ // = C' |P C = C' |P C
+ // | /Qin | | | /Qin | |
+ // | / | C' (+Succ) | / | C' (+Succ)
+ // Succ Succ /| Succ Succ /|
+ // | \ V | \/ | | \ V | \/ |
+ // |U \ |U /\ =? |U = |U /\ |
+ // = D = = =?| | D | = =|
+ // | / |/ D | / |/ D
+ // | / | / | = | /
+ // |/ | / |/ | =
+ // Dom Dom Dom Dom
+ // '=' : Branch taken for that CFG edge
+ // The cost for taken branches in the first case is P + U
+ // Let F = SuccFreq - Qin
+ // The cost in the second case (assuming independence), given the layout:
+ // BB, Succ, (C+Succ), D, Dom or the layout:
+ // BB, Succ, D, Dom, (C+Succ)
+ // is Qout + max(F, Qin) * U + min(F, Qin)
+ // compare P + U vs Qout + P * U + Qin.
+ //
+ // The 3rd and 4th cases cover when Dom would be chosen to follow Succ.
+ //
+ // For the 3rd case, the cost is P + 2 * V
+ // For the 4th case, the cost is Qout + min(Qin, F) * U + max(Qin, F) * V + V
+ // We choose 4 over 3 when (P + V) > Qout + min(Qin, F) * U + max(Qin, F) * V
+ if (UProb > AdjustedSuccSumProb / 2 &&
+ !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
+ Chain, BlockFilter))
+ // Cases 3 & 4
+ return greaterWithBias(
+ (P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
+ EntryFreq);
+ // Cases 1 & 2
+ return greaterWithBias((P + U),
+ (Qout + std::min(Qin, F) * AdjustedSuccSumProb +
+ std::max(Qin, F) * UProb),
+ EntryFreq);
+}
+
+/// Check for a trellis layout. \p BB is the upper part of a trellis if its
+/// successors form the lower part of a trellis. A successor set S forms the
+/// lower part of a trellis if all of the predecessors of S are either in S or
+/// have all of S as successors. We ignore trellises where BB doesn't have 2
+/// successors because for fewer than 2, it's trivial, and for 3 or greater they
+/// are very uncommon and complex to compute optimally. Allowing edges within S
+/// is not strictly a trellis, but the same algorithm works, so we allow it.
+bool MachineBlockPlacement::isTrellis(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ // Technically BB could form a trellis with branching factor higher than 2.
+ // But that's extremely uncommon.
+ if (BB->succ_size() != 2 || ViableSuccs.size() != 2)
+ return false;
+
+ SmallPtrSet<const MachineBasicBlock *, 2> Successors(BB->succ_begin(),
+ BB->succ_end());
+ // To avoid reviewing the same predecessors twice.
+ SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
+
+ for (MachineBasicBlock *Succ : ViableSuccs) {
+ int PredCount = 0;
+ for (auto SuccPred : Succ->predecessors()) {
+ // Allow triangle successors, but don't count them.
+ if (Successors.count(SuccPred)) {
+ // Make sure that it is actually a triangle.
+ for (MachineBasicBlock *CheckSucc : SuccPred->successors())
+ if (!Successors.count(CheckSucc))
+ return false;
continue;
- // Check whether the optional branch has exactly one BB.
- if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+ }
+ const BlockChain *PredChain = BlockToChain[SuccPred];
+ if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
+ PredChain == &Chain || PredChain == BlockToChain[Succ])
continue;
- // Check whether the optional branch is small.
- if (Pred->size() < OutlineOptionalThreshold)
+ ++PredCount;
+ // Perform the successor check only once.
+ if (!SeenPreds.insert(SuccPred).second)
+ continue;
+ if (!hasSameSuccessors(*SuccPred, Successors))
return false;
}
- return true;
- } else
+ // If one of the successors has only BB as a predecessor, it is not a
+ // trellis.
+ if (PredCount < 1)
+ return false;
+ }
+ return true;
+}
+
+/// Pick the highest total weight pair of edges that can both be laid out.
+/// The edges in \p Edges[0] are assumed to have a different destination than
+/// the edges in \p Edges[1]. Simple counting shows that the best pair is either
+/// the individual highest weight edges to the 2 different destinations, or in
+/// case of a conflict, one of them should be replaced with a 2nd best edge.
+std::pair<MachineBlockPlacement::WeightedEdge,
+ MachineBlockPlacement::WeightedEdge>
+MachineBlockPlacement::getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ MutableArrayRef<SmallVector<MachineBlockPlacement::WeightedEdge, 8>>
+ Edges) {
+ // Sort the edges, and then for each successor, find the best incoming
+ // predecessor. If the best incoming predecessors aren't the same,
+ // then that is clearly the best layout. If there is a conflict, one of the
+ // successors will have to fallthrough from the second best predecessor. We
+ // compare which combination is better overall.
+
+ // Sort for highest frequency.
+ auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
+
+ std::stable_sort(Edges[0].begin(), Edges[0].end(), Cmp);
+ std::stable_sort(Edges[1].begin(), Edges[1].end(), Cmp);
+ auto BestA = Edges[0].begin();
+ auto BestB = Edges[1].begin();
+ // Arrange for the correct answer to be in BestA and BestB
+ // If the 2 best edges don't conflict, the answer is already there.
+ if (BestA->Src == BestB->Src) {
+ // Compare the total fallthrough of (Best + Second Best) for both pairs
+ auto SecondBestA = std::next(BestA);
+ auto SecondBestB = std::next(BestB);
+ BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
+ BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
+ if (BestAScore < BestBScore)
+ BestA = SecondBestA;
+ else
+ BestB = SecondBestB;
+ }
+ // Arrange for the BB edge to be in BestA if it exists.
+ if (BestB->Src == BB)
+ std::swap(BestA, BestB);
+ return std::make_pair(*BestA, *BestB);
+}
+
+/// Get the best successor from \p BB based on \p BB being part of a trellis.
+/// We only handle trellises with 2 successors, so the algorithm is
+/// straightforward: Find the best pair of edges that don't conflict. We find
+/// the best incoming edge for each successor in the trellis. If those conflict,
+/// we consider which of them should be replaced with the second best.
+/// Upon return the two best edges will be in \p BestEdges. If one of the edges
+/// comes from \p BB, it will be in \p BestEdges[0]
+MachineBlockPlacement::BlockAndTailDupResult
+MachineBlockPlacement::getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+
+ BlockAndTailDupResult Result = {nullptr, false};
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+
+ // We assume size 2 because it's common. For general n, we would have to do
+ // the Hungarian algorithm, but it's not worth the complexity because more
+ // than 2 successors is fairly uncommon, and a trellis even more so.
+ if (Successors.size() != 2 || ViableSuccs.size() != 2)
+ return Result;
+
+ // Collect the edge frequencies of all edges that form the trellis.
+ SmallVector<WeightedEdge, 8> Edges[2];
+ int SuccIndex = 0;
+ for (auto Succ : ViableSuccs) {
+ for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
+ // Skip any placed predecessors that are not BB
+ if (SuccPred != BB)
+ if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
+ BlockToChain[SuccPred] == &Chain ||
+ BlockToChain[SuccPred] == BlockToChain[Succ])
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
+ MBPI->getEdgeProbability(SuccPred, Succ);
+ Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});
+ }
+ ++SuccIndex;
+ }
+
+ // Pick the best combination of 2 edges from all the edges in the trellis.
+ WeightedEdge BestA, BestB;
+ std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
+
+ if (BestA.Src != BB) {
+ // If we have a trellis, and BB doesn't have the best fallthrough edges,
+ // we shouldn't choose any successor. We've already looked and there's a
+ // better fallthrough edge for all the successors.
+ DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
+ return Result;
+ }
+
+ // Did we pick the triangle edge? If tail-duplication is profitable, do
+ // that instead. Otherwise merge the triangle edge now while we know it is
+ // optimal.
+ if (BestA.Dest == BestB.Src) {
+ // The edges are BB->Succ1->Succ2, and we're looking to see if BB->Succ2
+ // would be better.
+ MachineBasicBlock *Succ1 = BestA.Dest;
+ MachineBasicBlock *Succ2 = BestB.Dest;
+ // Check to see if tail-duplication would be profitable.
+ if (TailDupPlacement && shouldTailDuplicate(Succ2) &&
+ canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
+ isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
+ Chain, BlockFilter)) {
+ DEBUG(BranchProbability Succ2Prob = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(Succ2)
+ << ", probability: " << Succ2Prob << " (Tail Duplicate)\n");
+ Result.BB = Succ2;
+ Result.ShouldTailDup = true;
+ return Result;
+ }
+ }
+ // We have already computed the optimal edge for the other side of the
+ // trellis.
+ ComputedEdges[BestB.Src] = { BestB.Dest, false };
+
+ auto TrellisSucc = BestA.Dest;
+ DEBUG(BranchProbability SuccProb = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(TrellisSucc)
+ << ", probability: " << SuccProb << " (Trellis)\n");
+ Result.BB = TrellisSucc;
+ return Result;
+}
+
+/// When the option TailDupPlacement is on, this method checks if the
+/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
+/// into all of its unplaced, unfiltered predecessors, that are not BB.
+bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
+ const MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ if (!shouldTailDuplicate(Succ))
return false;
+
+ // For CFG checking.
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Make sure all unplaced and unfiltered predecessors can be
+ // tail-duplicated into.
+ // Skip any blocks that are already placed or not in this loop.
+ if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
+ || BlockToChain[Pred] == &Chain)
+ continue;
+ if (!TailDup.canTailDuplicate(Succ, Pred)) {
+ if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors))
+ // This will result in a trellis after tail duplication, so we don't
+ // need to copy Succ into this predecessor. In the presence
+ // of a trellis tail duplication can continue to be profitable.
+ // For example:
+ // A A
+ // |\ |\
+ // | \ | \
+ // | C | C+BB
+ // | / | |
+ // |/ | |
+ // BB => BB |
+ // |\ |\/|
+ // | \ |/\|
+ // | D | D
+ // | / | /
+ // |/ |/
+ // Succ Succ
+ //
+ // After BB was duplicated into C, the layout looks like the one on the
+ // right. BB and C now have the same successors. When considering
+ // whether Succ can be duplicated into all its unplaced predecessors, we
+ // ignore C.
+ // We can do this because C already has a profitable fallthrough, namely
+ // D. TODO(iteratee): ignore sufficiently cold predecessors for
+ // duplication and for this test.
+ //
+ // This allows trellises to be laid out in 2 separate chains
+ // (A,B,Succ,...) and later (C,D,...) This is a reasonable heuristic
+ // because it allows the creation of 2 fallthrough paths with links
+ // between them, and we correctly identify the best layout for these
+ // CFGs. We want to extend trellises that the user created in addition
+ // to trellises created by tail-duplication, so we just look for the
+ // CFG.
+ continue;
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Find chains of triangles where we believe it would be profitable to
+/// tail-duplicate them all, but a local analysis would not find them.
+/// There are 3 ways this can be profitable:
+/// 1) The post-dominators marked 50% are actually taken 55% (This shrinks with
+/// longer chains)
+/// 2) The chains are statically correlated. Branch probabilities have a very
+/// U-shaped distribution.
+/// [http://nrs.harvard.edu/urn-3:HUL.InstRepos:24015805]
+/// If the branches in a chain are likely to be from the same side of the
+/// distribution as their predecessor, but are independent at runtime, this
+/// transformation is profitable. (Because the cost of being wrong is a small
+/// fixed cost, unlike the standard triangle layout where the cost of being
+/// wrong scales with the # of triangles.)
+/// 3) The chains are dynamically correlated. If the probability that a previous
+/// branch was taken positively influences whether the next branch will be
+/// taken
+/// We believe that 2 and 3 are common enough to justify the small margin in 1.
+void MachineBlockPlacement::precomputeTriangleChains() {
+ struct TriangleChain {
+ std::vector<MachineBasicBlock *> Edges;
+ TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
+ : Edges({src, dst}) {}
+
+ void append(MachineBasicBlock *dst) {
+ assert(getKey()->isSuccessor(dst) &&
+ "Attempting to append a block that is not a successor.");
+ Edges.push_back(dst);
+ }
+
+ unsigned count() const { return Edges.size() - 1; }
+
+ MachineBasicBlock *getKey() const {
+ return Edges.back();
+ }
+ };
+
+ if (TriangleChainCount == 0)
+ return;
+
+ DEBUG(dbgs() << "Pre-computing triangle chains.\n");
+ // Map from last block to the chain that contains it. This allows us to extend
+ // chains as we find new triangles.
+ DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
+ for (MachineBasicBlock &BB : *F) {
+ // If BB doesn't have 2 successors, it doesn't start a triangle.
+ if (BB.succ_size() != 2)
+ continue;
+ MachineBasicBlock *PDom = nullptr;
+ for (MachineBasicBlock *Succ : BB.successors()) {
+ if (!MPDT->dominates(Succ, &BB))
+ continue;
+ PDom = Succ;
+ break;
+ }
+ // If BB doesn't have a post-dominating successor, it doesn't form a
+ // triangle.
+ if (PDom == nullptr)
+ continue;
+ // If PDom has a hint that it is low probability, skip this triangle.
+ if (MBPI->getEdgeProbability(&BB, PDom) < BranchProbability(50, 100))
+ continue;
+ // If PDom isn't eligible for duplication, this isn't the kind of triangle
+ // we're looking for.
+ if (!shouldTailDuplicate(PDom))
+ continue;
+ bool CanTailDuplicate = true;
+ // If PDom can't tail-duplicate into it's non-BB predecessors, then this
+ // isn't the kind of triangle we're looking for.
+ for (MachineBasicBlock* Pred : PDom->predecessors()) {
+ if (Pred == &BB)
+ continue;
+ if (!TailDup.canTailDuplicate(PDom, Pred)) {
+ CanTailDuplicate = false;
+ break;
+ }
+ }
+ // If we can't tail-duplicate PDom to its predecessors, then skip this
+ // triangle.
+ if (!CanTailDuplicate)
+ continue;
+
+ // Now we have an interesting triangle. Insert it if it's not part of an
+ // existing chain
+ // Note: This cannot be replaced with a call insert() or emplace() because
+ // the find key is BB, but the insert/emplace key is PDom.
+ auto Found = TriangleChainMap.find(&BB);
+ // If it is, remove the chain from the map, grow it, and put it back in the
+ // map with the end as the new key.
+ if (Found != TriangleChainMap.end()) {
+ TriangleChain Chain = std::move(Found->second);
+ TriangleChainMap.erase(Found);
+ Chain.append(PDom);
+ TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));
+ } else {
+ auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);
+ assert(InsertResult.second && "Block seen twice.");
+ (void)InsertResult;
+ }
+ }
+
+ // Iterating over a DenseMap is safe here, because the only thing in the body
+ // of the loop is inserting into another DenseMap (ComputedEdges).
+ // ComputedEdges is never iterated, so this doesn't lead to non-determinism.
+ for (auto &ChainPair : TriangleChainMap) {
+ TriangleChain &Chain = ChainPair.second;
+ // Benchmarking has shown that due to branch correlation duplicating 2 or
+ // more triangles is profitable, despite the calculations assuming
+ // independence.
+ if (Chain.count() < TriangleChainCount)
+ continue;
+ MachineBasicBlock *dst = Chain.Edges.back();
+ Chain.Edges.pop_back();
+ for (MachineBasicBlock *src : reverse(Chain.Edges)) {
+ DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->" <<
+ getBlockName(dst) << " as pre-computed based on triangles.\n");
+
+ auto InsertResult = ComputedEdges.insert({src, {dst, true}});
+ assert(InsertResult.second && "Block seen twice.");
+ (void)InsertResult;
+
+ dst = src;
+ }
+ }
}
// When profile is not present, return the StaticLikelyProb.
// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability getLayoutSuccessorProbThreshold(
- MachineBasicBlock *BB) {
+ const MachineBasicBlock *BB) {
if (!BB->getParent()->getFunction()->getEntryCount())
return BranchProbability(StaticLikelyProb, 100);
if (BB->succ_size() == 2) {
@@ -609,11 +1202,11 @@ static BranchProbability getLayoutSuccessorProbThreshold(
if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
/* See case 1 below for the cost analysis. For BB->Succ to
* be taken with smaller cost, the following needs to hold:
- * Prob(BB->Succ) > 2* Prob(BB->Pred)
- * So the threshold T
- * T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1,
- * We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified
- * branch bias, we have
+ * Prob(BB->Succ) > 2 * Prob(BB->Pred)
+ * So the threshold T in the calculation below
+ * (1-T) * Prob(BB->Succ) > T * Prob(BB->Pred)
+ * So T / (1 - T) = 2, Yielding T = 2/3
+ * Also adding user specified branch bias, we have
* T = (2/3)*(ProfileLikelyProb/50)
* = (2*ProfileLikelyProb)/150)
*/
@@ -625,10 +1218,17 @@ static BranchProbability getLayoutSuccessorProbThreshold(
/// Checks to see if the layout candidate block \p Succ has a better layout
/// predecessor than \c BB. If yes, returns true.
+/// \p SuccProb: The probability adjusted for only remaining blocks.
+/// Only used for logging
+/// \p RealSuccProb: The un-adjusted probability.
+/// \p Chain: The chain that BB belongs to and Succ is being considered for.
+/// \p BlockFilter: if non-null, the set of blocks that make up the loop being
+/// considered
bool MachineBlockPlacement::hasBetterLayoutPredecessor(
- MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
- BranchProbability SuccProb, BranchProbability RealSuccProb,
- BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
// There isn't a better layout when there are no unscheduled predecessors.
if (SuccChain.UnscheduledPredecessors == 0)
@@ -734,11 +1334,12 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// | Pred----| | S1----
// | | | |
// --(S1 or S2) ---Pred--
+ // |
+ // S2
//
// topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
// + min(freq(Pred->S1), freq(Pred->S2))
// Non-topo-order cost:
- // In the worst case, S2 will not get laid out after Pred.
// non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
// To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
// is 0. Then the non topo layout is better when
@@ -756,13 +1357,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
for (MachineBasicBlock *Pred : Succ->predecessors()) {
if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
(BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
+ BlockToChain[Pred] == &Chain ||
+ // This check is redundant except for look ahead. This function is
+ // called for lookahead by isProfitableToTailDup when BB hasn't been
+ // placed yet.
+ (Pred == BB))
continue;
// Do backward checking.
// For all cases above, we need a backward checking to filter out edges that
- // are not 'strongly' biased. With profile data available, the check is
- // mostly redundant for case 2 (when threshold prob is set at 50%) unless S
- // has more than two successors.
+ // are not 'strongly' biased.
// BB Pred
// \ /
// Succ
@@ -798,14 +1401,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
/// breaking CFG structure, but cave and break such structures in the case of
/// very hot successor edges.
///
-/// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *
-MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
+/// \returns The best successor block found, or null if none are viable, along
+/// with a boolean indicating if tail duplication is necessary.
+MachineBlockPlacement::BlockAndTailDupResult
+MachineBlockPlacement::selectBestSuccessor(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
const BranchProbability HotProb(StaticLikelyProb, 100);
- MachineBasicBlock *BestSucc = nullptr;
+ BlockAndTailDupResult BestSucc = { nullptr, false };
auto BestProb = BranchProbability::getZero();
SmallVector<MachineBasicBlock *, 4> Successors;
@@ -813,22 +1417,45 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
collectViableSuccessors(BB, Chain, BlockFilter, Successors);
DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
+
+ // if we already precomputed the best successor for BB, return that if still
+ // applicable.
+ auto FoundEdge = ComputedEdges.find(BB);
+ if (FoundEdge != ComputedEdges.end()) {
+ MachineBasicBlock *Succ = FoundEdge->second.BB;
+ ComputedEdges.erase(FoundEdge);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
+ SuccChain != &Chain && Succ == *SuccChain->begin())
+ return FoundEdge->second;
+ }
+
+ // if BB is part of a trellis, Use the trellis to determine the optimal
+ // fallthrough edges
+ if (isTrellis(BB, Successors, Chain, BlockFilter))
+ return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
+ BlockFilter);
+
+ // For blocks with CFG violations, we may be able to lay them out anyway with
+ // tail-duplication. We keep this vector so we can perform the probability
+ // calculations the minimum number of times.
+ SmallVector<std::tuple<BranchProbability, MachineBasicBlock *>, 4>
+ DupCandidates;
for (MachineBasicBlock *Succ : Successors) {
auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
BranchProbability SuccProb =
getAdjustedProbability(RealSuccProb, AdjustedSumProb);
- // This heuristic is off by default.
- if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
- HotProb))
- return Succ;
-
BlockChain &SuccChain = *BlockToChain[Succ];
// Skip the edge \c BB->Succ if block \c Succ has a better layout
// predecessor that yields lower global cost.
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
- Chain, BlockFilter))
+ Chain, BlockFilter)) {
+ // If tail duplication would make Succ profitable, place it.
+ if (TailDupPlacement && shouldTailDuplicate(Succ))
+ DupCandidates.push_back(std::make_tuple(SuccProb, Succ));
continue;
+ }
DEBUG(
dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
@@ -836,17 +1463,48 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
<< (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
<< "\n");
- if (BestSucc && BestProb >= SuccProb) {
+ if (BestSucc.BB && BestProb >= SuccProb) {
DEBUG(dbgs() << " Not the best candidate, continuing\n");
continue;
}
DEBUG(dbgs() << " Setting it as best candidate\n");
- BestSucc = Succ;
+ BestSucc.BB = Succ;
BestProb = SuccProb;
}
- if (BestSucc)
- DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc) << "\n");
+ // Handle the tail duplication candidates in order of decreasing probability.
+ // Stop at the first one that is profitable. Also stop if they are less
+ // profitable than BestSucc. Position is important because we preserve it and
+ // prefer first best match. Here we aren't comparing in order, so we capture
+ // the position instead.
+ if (DupCandidates.size() != 0) {
+ auto cmp =
+ [](const std::tuple<BranchProbability, MachineBasicBlock *> &a,
+ const std::tuple<BranchProbability, MachineBasicBlock *> &b) {
+ return std::get<0>(a) > std::get<0>(b);
+ };
+ std::stable_sort(DupCandidates.begin(), DupCandidates.end(), cmp);
+ }
+ for(auto &Tup : DupCandidates) {
+ BranchProbability DupProb;
+ MachineBasicBlock *Succ;
+ std::tie(DupProb, Succ) = Tup;
+ if (DupProb < BestProb)
+ break;
+ if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
+ && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
+ DEBUG(
+ dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
+ << DupProb
+ << " (Tail Duplicate)\n");
+ BestSucc.BB = Succ;
+ BestSucc.ShouldTailDup = true;
+ break;
+ }
+ }
+
+ if (BestSucc.BB)
+ DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n");
return BestSucc;
}
@@ -862,7 +1520,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
///
/// \returns The best block found, or null if none are viable.
MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
- BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
+ const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
// Once we need to walk the worklist looking for a candidate, cleanup the
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
@@ -948,7 +1606,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
}
void MachineBlockPlacement::fillWorkLists(
- MachineBasicBlock *MBB,
+ const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter = nullptr) {
BlockChain &Chain = *BlockToChain[MBB];
@@ -970,23 +1628,23 @@ void MachineBlockPlacement::fillWorkLists(
if (Chain.UnscheduledPredecessors != 0)
return;
- MBB = *Chain.begin();
- if (MBB->isEHPad())
- EHPadWorkList.push_back(MBB);
+ MachineBasicBlock *BB = *Chain.begin();
+ if (BB->isEHPad())
+ EHPadWorkList.push_back(BB);
else
- BlockWorkList.push_back(MBB);
+ BlockWorkList.push_back(BB);
}
void MachineBlockPlacement::buildChain(
- MachineBasicBlock *BB, BlockChain &Chain,
+ const MachineBasicBlock *HeadBB, BlockChain &Chain,
BlockFilterSet *BlockFilter) {
- assert(BB && "BB must not be null.\n");
- assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
+ assert(HeadBB && "BB must not be null.\n");
+ assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
- MachineBasicBlock *LoopHeaderBB = BB;
+ const MachineBasicBlock *LoopHeaderBB = HeadBB;
markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
- BB = *std::prev(Chain.end());
+ MachineBasicBlock *BB = *std::prev(Chain.end());
for (;;) {
assert(BB && "null block found at end of chain in loop.");
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
@@ -995,7 +1653,11 @@ void MachineBlockPlacement::buildChain(
// Look for the best viable successor if there is one to place immediately
// after this block.
- MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+ auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
+ MachineBasicBlock* BestSucc = Result.BB;
+ bool ShouldTailDup = Result.ShouldTailDup;
+ if (TailDupPlacement)
+ ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at
@@ -1016,7 +1678,7 @@ void MachineBlockPlacement::buildChain(
// Placement may have changed tail duplication opportunities.
// Check for that now.
- if (TailDupPlacement && BestSucc) {
+ if (TailDupPlacement && BestSucc && ShouldTailDup) {
// If the chosen successor was duplicated into all its predecessors,
// don't bother laying it out, just go round the loop again with BB as
// the chain end.
@@ -1052,7 +1714,7 @@ void MachineBlockPlacement::buildChain(
/// unconditional jump (for the backedge) rotating it in front of the loop
/// header is always profitable.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// Placing the latch block before the header may introduce an extra branch
// that skips this block the first time the loop is executed, which we want
@@ -1116,7 +1778,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
@@ -1235,7 +1897,7 @@ MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
/// branches. For example, if the loop has fallthrough into its header and out
/// of its bottom already, don't rotate it.
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
- MachineBasicBlock *ExitingBB,
+ const MachineBasicBlock *ExitingBB,
const BlockFilterSet &LoopBlockSet) {
if (!ExitingBB)
return;
@@ -1285,7 +1947,8 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
/// Therefore, the cost for a given rotation is the sum of costs listed above.
/// We select the best rotation with the smallest cost.
void MachineBlockPlacement::rotateLoopWithProfile(
- BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
+ BlockChain &LoopChain, const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
auto HeaderBB = L.getHeader();
auto HeaderIter = find(LoopChain, HeaderBB);
auto RotationPos = LoopChain.end();
@@ -1422,7 +2085,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
/// When profile data is available, exclude cold blocks from the returned set;
/// otherwise, collect all blocks in the loop.
MachineBlockPlacement::BlockFilterSet
-MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
+MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
BlockFilterSet LoopBlockSet;
// Filter cold blocks off from LoopBlockSet when profile data is available.
@@ -1459,10 +2122,10 @@ MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
/// as much as possible. We can then stitch the chains together in a way which
/// both preserves the topological structure and minimizes taken conditional
/// branches.
-void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
+void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// First recurse through any nested loops, building chains for those inner
// loops.
- for (MachineLoop *InnerLoop : L)
+ for (const MachineLoop *InnerLoop : L)
buildLoopChains(*InnerLoop);
assert(BlockWorkList.empty());
@@ -1499,7 +2162,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
assert(LoopChain.UnscheduledPredecessors == 0);
UpdatedPreds.insert(&LoopChain);
- for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ for (const MachineBasicBlock *LoopBB : LoopBlockSet)
fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
buildChain(LoopTop, LoopChain, &LoopBlockSet);
@@ -1533,7 +2196,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
if (!LoopBlockSet.empty()) {
BadLoop = true;
- for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ for (const MachineBasicBlock *LoopBB : LoopBlockSet)
dbgs() << "Loop contains blocks never placed into a chain!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
@@ -1546,31 +2209,6 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
EHPadWorkList.clear();
}
-/// When OutlineOpitonalBranches is on, this method collects BBs that
-/// dominates all terminator blocks of the function \p F.
-void MachineBlockPlacement::collectMustExecuteBBs() {
- if (OutlineOptionalBranches) {
- // Find the nearest common dominator of all of F's terminators.
- MachineBasicBlock *Terminator = nullptr;
- for (MachineBasicBlock &MBB : *F) {
- if (MBB.succ_size() == 0) {
- if (Terminator == nullptr)
- Terminator = &MBB;
- else
- Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
- }
- }
-
- // MBBs dominating this common dominator are unavoidable.
- UnavoidableBlocks.clear();
- for (MachineBasicBlock &MBB : *F) {
- if (MDT->dominates(&MBB, Terminator)) {
- UnavoidableBlocks.insert(&MBB);
- }
- }
- }
-}
-
void MachineBlockPlacement::buildCFGChains() {
// Ensure that every BB in the function has an associated chain to simplify
// the assumptions of the remaining algorithm.
@@ -1605,9 +2243,6 @@ void MachineBlockPlacement::buildCFGChains() {
}
}
- // Turned on with OutlineOptionalBranches option
- collectMustExecuteBBs();
-
// Build any loop-based chains.
PreferredLoopExit = nullptr;
for (MachineLoop *L : *MLI)
@@ -1839,7 +2474,7 @@ void MachineBlockPlacement::alignBlocks() {
/// @return true if \p BB was removed.
bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- MachineBasicBlock *LoopHeaderBB,
+ const MachineBasicBlock *LoopHeaderBB,
BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt) {
bool Removed, DuplicatedToLPred;
@@ -1901,21 +2536,16 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
/// \return - True if the block was duplicated into all preds and removed.
bool MachineBlockPlacement::maybeTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *LPred,
- const BlockChain &Chain, BlockFilterSet *BlockFilter,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt,
bool &DuplicatedToLPred) {
-
DuplicatedToLPred = false;
+ if (!shouldTailDuplicate(BB))
+ return false;
+
DEBUG(dbgs() << "Redoing tail duplication for Succ#"
<< BB->getNumber() << "\n");
- bool IsSimple = TailDup.isSimpleBB(BB);
- // Blocks with single successors don't create additional fallthrough
- // opportunities. Don't duplicate them. TODO: When conditional exits are
- // analyzable, allow them to be duplicated.
- if (!IsSimple && BB->succ_size() == 1)
- return false;
- if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
- return false;
+
// This has to be a callback because none of it can be done after
// BB is deleted.
bool Removed = false;
@@ -1967,6 +2597,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
+ bool IsSimple = TailDup.isSimpleBB(BB);
TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
&DuplicatedPreds, &RemovalCallbackRef);
@@ -2006,21 +2637,24 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
- MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = nullptr;
// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
PreferredLoopExit = nullptr;
+ assert(BlockToChain.empty());
+ assert(ComputedEdges.empty());
+
if (TailDupPlacement) {
- unsigned TailDupSize = TailDuplicatePlacementThreshold;
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ unsigned TailDupSize = TailDupPlacementThreshold;
if (MF.getFunction()->optForSize())
TailDupSize = 1;
TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+ precomputeTriangleChains();
}
- assert(BlockToChain.empty());
-
buildCFGChains();
// Changing the layout can create new tail merging opportunities.
@@ -2032,7 +2666,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFoldPlacement;
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
- unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
+ unsigned TailMergeSize = TailDupPlacementThreshold + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
@@ -2041,8 +2675,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
/*AfterBlockPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
- // Must redo the dominator tree if blocks were changed.
- MDT->runOnMachineFunction(MF);
+ ComputedEdges.clear();
+ // Must redo the post-dominator tree if blocks were changed.
+ if (MPDT)
+ MPDT->runOnMachineFunction(MF);
ChainAllocator.DestroyAll();
buildCFGChains();
}
@@ -2052,6 +2688,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
alignBlocks();
BlockToChain.clear();
+ ComputedEdges.clear();
ChainAllocator.DestroyAll();
if (AlignAllBlock)
@@ -2067,6 +2704,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
MBI->setAlignment(AlignAllNonFallThruBlocks);
}
}
+ if (ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ F->getFunction()->getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MBP." + MF.getName(), false);
+ }
+
// We always return true as we have no way to track whether the final order
// differs from the original order.
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index 5beed5f5dd087..50e453e4067cc 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// The machine combiner pass uses machine trace metrics to ensure the combined
-// instructions does not lengthen the critical path or the resource depth.
+// instructions do not lengthen the critical path or the resource depth.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "machine-combiner"
@@ -135,7 +135,9 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
for (auto *InstrPtr : InsInstrs) { // for each Use
unsigned IDepth = 0;
- DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(TII); dbgs() << "\n";);
+ DEBUG(dbgs() << "NEW INSTR ";
+ InstrPtr->print(dbgs(), TII);
+ dbgs() << "\n";);
for (const MachineOperand &MO : InstrPtr->operands()) {
// Check for virtual register operand.
if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
@@ -352,6 +354,19 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
return false;
}
+static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
+ SmallVector<MachineInstr *, 16> InsInstrs,
+ SmallVector<MachineInstr *, 16> DelInstrs,
+ MachineTraceMetrics *Traces) {
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
+ for (auto *InstrPtr : DelInstrs)
+ InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+ ++NumInstCombined;
+ Traces->invalidate(MBB);
+ Traces->verifyAnalysis();
+}
+
/// Substitute a slow code sequence with a faster one by
/// evaluating instruction combining pattern.
/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
@@ -406,7 +421,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
if (!MinInstr)
MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
- MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
Traces->verifyAnalysis();
TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
InstrIdxForVirtReg);
@@ -426,23 +440,23 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
- if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
- (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
- DelInstrs, InstrIdxForVirtReg, P) &&
- preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
- for (auto *InstrPtr : InsInstrs)
- MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
- for (auto *InstrPtr : DelInstrs)
- InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
-
- Changed = true;
- ++NumInstCombined;
-
- Traces->invalidate(MBB);
- Traces->verifyAnalysis();
+ if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
// Eagerly stop after the first pattern fires.
+ Changed = true;
break;
} else {
+ // Calculating the trace metrics may be expensive,
+ // so only do this when necessary.
+ MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg, P) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+ // Eagerly stop after the first pattern fires.
+ Changed = true;
+ break;
+ }
// Cleanup instructions of the alternative code sequence. There is no
// use for them.
MachineFunction *MF = MBB->getParent();
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 5de6dec29fb9d..7312dc5e94bdd 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -291,17 +291,9 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
if (MO.isDef()) {
Defs.push_back(Reg);
- } else {
+ continue;
+ } else if (MO.readsReg())
ReadRegister(Reg);
- }
- // Treat undef use like defs for copy propagation but not for
- // dead copy. We would need to do a liveness check to be sure the copy
- // is dead for undef uses.
- // The backends are allowed to do whatever they want with undef value
- // and we cannot be sure this register will not be rewritten to break
- // some false dependencies for the hardware for instance.
- if (MO.isUndef())
- Defs.push_back(Reg);
}
// The instruction has a register mask operand which means that it clobbers
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 303a6a9263be7..e3a6c51c47ad5 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -49,32 +49,29 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
CriticalEdgesToSplit.clear();
NewBBs.clear();
+ DT.reset(new DominatorTreeBase<MachineBasicBlock>(false));
DT->recalculate(F);
-
return false;
}
MachineDominatorTree::MachineDominatorTree()
: MachineFunctionPass(ID) {
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
- DT = new DominatorTreeBase<MachineBasicBlock>(false);
-}
-
-MachineDominatorTree::~MachineDominatorTree() {
- delete DT;
}
void MachineDominatorTree::releaseMemory() {
- DT->releaseMemory();
+ CriticalEdgesToSplit.clear();
+ DT.reset(nullptr);
}
void MachineDominatorTree::verifyAnalysis() const {
- if (VerifyMachineDomInfo)
+ if (DT && VerifyMachineDomInfo)
verifyDomTree();
}
void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
- DT->print(OS);
+ if (DT)
+ DT->print(OS);
}
void MachineDominatorTree::applySplitCriticalEdges() const {
@@ -143,15 +140,18 @@ void MachineDominatorTree::applySplitCriticalEdges() const {
}
void MachineDominatorTree::verifyDomTree() const {
+ if (!DT)
+ return;
MachineFunction &F = *getRoot()->getParent();
- MachineDominatorTree OtherDT;
- OtherDT.DT->recalculate(F);
- if (compare(OtherDT)) {
+ DominatorTreeBase<MachineBasicBlock> OtherDT(false);
+ OtherDT.recalculate(F);
+ if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
+ DT->compare(OtherDT)) {
errs() << "MachineDominatorTree is not up to date!\nComputed:\n";
- print(errs(), nullptr);
+ DT->print(errs());
errs() << "\nActual:\n";
- OtherDT.print(errs(), nullptr);
+ OtherDT.print(errs());
abort();
}
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index c1d5ea96cd17a..c9767a25e908d 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -169,6 +169,7 @@ void MachineFunction::clear() {
InstructionRecycler.clear(Allocator);
OperandRecycler.clear(Allocator);
BasicBlockRecycler.clear(Allocator);
+ VariableDbgInfos.clear();
if (RegInfo) {
RegInfo->~MachineRegisterInfo();
Allocator.Deallocate(RegInfo);
@@ -859,7 +860,9 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
if (!isCalleeSavedInfoValid())
return BV;
- for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
+ ++CSR)
BV.set(*CSR);
// Saved CSRs are not pristine.
@@ -956,7 +959,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MachineFrameInfo::dump(const MachineFunction &MF) const {
+LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const {
print(MF, dbgs());
}
#endif
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 2f2e3b3d8e9f1..c0a8b95ed8a06 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -262,8 +262,21 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return getBlockAddress() == Other.getBlockAddress() &&
getOffset() == Other.getOffset();
case MachineOperand::MO_RegisterMask:
- case MachineOperand::MO_RegisterLiveOut:
- return getRegMask() == Other.getRegMask();
+ case MachineOperand::MO_RegisterLiveOut: {
+ // Shallow compare of the two RegMasks
+ const uint32_t *RegMask = getRegMask();
+ const uint32_t *OtherRegMask = Other.getRegMask();
+ if (RegMask == OtherRegMask)
+ return true;
+
+ // Calculate the size of the RegMask
+ const MachineFunction *MF = getParent()->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+
+ // Deep compare of the two RegMasks
+ return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
+ }
case MachineOperand::MO_MCSymbol:
return getMCSymbol() == Other.getMCSymbol();
case MachineOperand::MO_CFIIndex:
@@ -274,6 +287,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return getIntrinsicID() == Other.getIntrinsicID();
case MachineOperand::MO_Predicate:
return getPredicate() == Other.getPredicate();
+ case MachineOperand::MO_Placeholder:
+ return true;
}
llvm_unreachable("Invalid machine operand type");
}
@@ -322,6 +337,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
case MachineOperand::MO_Predicate:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
+ case MachineOperand::MO_Placeholder:
+ return hash_combine();
}
llvm_unreachable("Invalid machine operand type");
}
@@ -403,6 +420,11 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
bool Unused;
APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
OS << "half " << APF.convertToFloat();
+ } else if (getFPImm()->getType()->isFP128Ty()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ SmallString<16> Str;
+ getFPImm()->getValueAPF().toString(Str);
+ OS << "quad " << Str;
} else {
OS << getFPImm()->getValueAPF().convertToDouble();
}
@@ -491,7 +513,11 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
<< CmpInst::getPredicateName(Pred) << '>';
+ break;
}
+ case MachineOperand::MO_Placeholder:
+ OS << "<placeholder>";
+ break;
}
if (unsigned TF = getTargetFlags())
OS << "[TF=" << TF << ']';
@@ -1571,6 +1597,65 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
return true;
}
+bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
+ bool UseTBAA) {
+ const MachineFunction *MF = getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ // If neither instruction stores to memory, they can't alias in any
+ // meaningful way, even if they read from the same address.
+ if (!mayStore() && !Other.mayStore())
+ return false;
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA))
+ return false;
+
+ if (!AA)
+ return true;
+
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!hasOneMemOperand() || !Other.hasOneMemOperand())
+ return true;
+
+ MachineMemOperand *MMOa = *memoperands_begin();
+ MachineMemOperand *MMOb = *Other.memoperands_begin();
+
+ if (!MMOa->getValue() || !MMOb->getValue())
+ return true;
+
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // FIXME: Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
+ int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
+ int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+
+ AliasResult AAResult =
+ AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
+ UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MMOb->getValue(), Overlapb,
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+
+ return (AAResult != NoAlias);
+}
+
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
/// or volatile memory reference, or if the information describing the memory
/// reference is not available. Return false if it is known to have no ordered
@@ -1692,14 +1777,14 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
}
}
-LLVM_DUMP_METHOD void MachineInstr::dump(const TargetInstrInfo *TII) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineInstr::dump() const {
dbgs() << " ";
- print(dbgs(), false /* SkipOpers */, TII);
-#endif
+ print(dbgs());
}
+#endif
-void MachineInstr::print(raw_ostream &OS, bool SkipOpers,
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc,
const TargetInstrInfo *TII) const {
const Module *M = nullptr;
if (const MachineBasicBlock *MBB = getParent())
@@ -1707,11 +1792,12 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers,
M = MF->getFunction()->getParent();
ModuleSlotTracker MST(M);
- print(OS, MST, SkipOpers, TII);
+ print(OS, MST, SkipOpers, SkipDebugLoc, TII);
}
void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
- bool SkipOpers, const TargetInstrInfo *TII) const {
+ bool SkipOpers, bool SkipDebugLoc,
+ const TargetInstrInfo *TII) const {
// We can be a bit tidier if we know the MachineFunction.
const MachineFunction *MF = nullptr;
const TargetRegisterInfo *TRI = nullptr;
@@ -1987,6 +2073,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
if (isIndirectDebugValue())
OS << " indirect";
+ } else if (SkipDebugLoc) {
+ return;
} else if (debugLoc && MF) {
if (!HaveSemi)
OS << ";";
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index fdeaf7b711611..a9aa1d954e70e 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -87,6 +87,22 @@ MachineBasicBlock *MachineLoop::findLoopControlBlock() {
return nullptr;
}
+DebugLoc MachineLoop::getStartLoc() const {
+ // Try the pre-header first.
+ if (MachineBasicBlock *PHeadMBB = getLoopPreheader())
+ if (const BasicBlock *PHeadBB = PHeadMBB->getBasicBlock())
+ if (DebugLoc DL = PHeadBB->getTerminator()->getDebugLoc())
+ return DL;
+
+ // If we have no pre-header or there are no instructions with debug
+ // info in it, try the header.
+ if (MachineBasicBlock *HeadMBB = getHeader())
+ if (const BasicBlock *HeadBB = HeadMBB->getBasicBlock())
+ return HeadBB->getTerminator()->getDebugLoc();
+
+ return DebugLoc();
+}
+
MachineBasicBlock *
MachineLoopInfo::findLoopPreheader(MachineLoop *L,
bool SpeculativePreheader) const {
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 6618857477ed4..2f0f4297ef5c5 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -306,6 +306,10 @@ public:
MMI.deleteMachineFunctionFor(F);
return true;
}
+
+ StringRef getPassName() const override {
+ return "Free MachineFunction";
+ }
};
char FreeMachineFunction::ID;
} // end anonymous namespace
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 22d519e5d88fa..4c81fd91cb829 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -23,6 +23,7 @@ using namespace llvm;
// Out of line virtual method.
void MachineModuleInfoMachO::anchor() {}
void MachineModuleInfoELF::anchor() {}
+void MachineModuleInfoWasm::anchor() {}
static int SortSymbolPair(const void *LHS, const void *RHS) {
typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
new file mode 100644
index 0000000000000..6b6b5f2814a90
--- /dev/null
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -0,0 +1,100 @@
+///===- MachineOptimizationRemarkEmitter.cpp - Opt Diagnostic -*- C++ -*---===//
+///
+/// The LLVM Compiler Infrastructure
+///
+/// This file is distributed under the University of Illinois Open Source
+/// License. See LICENSE.TXT for details.
+///
+///===---------------------------------------------------------------------===//
+/// \file
+/// Optimization diagnostic interfaces for machine passes. It's packaged as an
+/// analysis pass so that by using this service passes become dependent on MBFI
+/// as well. MBFI is used to compute the "hotness" of the diagnostic message.
+///
+///===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/LLVMContext.h"
+
+using namespace llvm;
+
+DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(
+ StringRef MKey, const MachineInstr &MI)
+ : Argument() {
+ Key = MKey;
+
+ raw_string_ostream OS(Val);
+ MI.print(OS, /*SkipOpers=*/false, /*SkipDebugLoc=*/true);
+}
+
+Optional<uint64_t>
+MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) {
+ if (!MBFI)
+ return None;
+
+ return MBFI->getBlockProfileCount(&MBB);
+}
+
+void MachineOptimizationRemarkEmitter::computeHotness(
+ DiagnosticInfoMIROptimization &Remark) {
+ const MachineBasicBlock *MBB = Remark.getBlock();
+ if (MBB)
+ Remark.setHotness(computeHotness(*MBB));
+}
+
+void MachineOptimizationRemarkEmitter::emit(
+ DiagnosticInfoOptimizationBase &OptDiagCommon) {
+ auto &OptDiag = cast<DiagnosticInfoMIROptimization>(OptDiagCommon);
+ computeHotness(OptDiag);
+
+ LLVMContext &Ctx = MF.getFunction()->getContext();
+ yaml::Output *Out = Ctx.getDiagnosticsOutputFile();
+ if (Out) {
+ auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon);
+ *Out << P;
+ }
+ // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
+ // from here to clang.
+ if (!OptDiag.isVerbose() || shouldEmitVerbose())
+ Ctx.diagnose(OptDiag);
+}
+
+MachineOptimizationRemarkEmitterPass::MachineOptimizationRemarkEmitterPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineOptimizationRemarkEmitterPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ MachineBlockFrequencyInfo *MBFI;
+
+ if (MF.getFunction()->getContext().getDiagnosticHotnessRequested())
+ MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
+ else
+ MBFI = nullptr;
+
+ ORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ return false;
+}
+
+void MachineOptimizationRemarkEmitterPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+char MachineOptimizationRemarkEmitterPass::ID = 0;
+static const char ore_name[] = "Machine Optimization Remark Emitter";
+#define ORE_NAME "machine-opt-remark-emitter"
+
+INITIALIZE_PASS_BEGIN(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(LazyMachineBlockFrequencyInfoPass)
+INITIALIZE_PASS_END(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
+ false, true)
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
new file mode 100644
index 0000000000000..581a8ad811497
--- /dev/null
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -0,0 +1,1251 @@
+//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Replaces repeated sequences of instructions with function calls.
+///
+/// This works by placing every instruction from every basic block in a
+/// suffix tree, and repeatedly querying that tree for repeated sequences of
+/// instructions. If a sequence of instructions appears often, then it ought
+/// to be beneficial to pull out into a function.
+///
+/// This was originally presented at the 2016 LLVM Developers' Meeting in the
+/// talk "Reducing Code Size Using Outlining". For a high-level overview of
+/// how this pass works, the talk is available on YouTube at
+///
+/// https://www.youtube.com/watch?v=yorld-WSOeU
+///
+/// The slides for the talk are available at
+///
+/// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
+///
+/// The talk provides an overview of how the outliner finds candidates and
+/// ultimately outlines them. It describes how the main data structure for this
+/// pass, the suffix tree, is queried and purged for candidates. It also gives
+/// a simplified suffix tree construction algorithm for suffix trees based off
+/// of the algorithm actually used here, Ukkonen's algorithm.
+///
+/// For the original RFC for this pass, please see
+///
+/// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
+///
+/// For more information on the suffix tree data structure, please see
+/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
+///
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <functional>
+#include <map>
+#include <sstream>
+#include <tuple>
+#include <vector>
+
+#define DEBUG_TYPE "machine-outliner"
+
+using namespace llvm;
+
+STATISTIC(NumOutlined, "Number of candidates outlined");
+STATISTIC(FunctionsCreated, "Number of functions created");
+
+namespace {
+
+/// \brief An individual sequence of instructions to be replaced with a call to
+/// an outlined function.
+struct Candidate {
+
+ /// Set to false if the candidate overlapped with another candidate.
+ bool InCandidateList = true;
+
+ /// The start index of this \p Candidate.
+ size_t StartIdx;
+
+ /// The number of instructions in this \p Candidate.
+ size_t Len;
+
+ /// The index of this \p Candidate's \p OutlinedFunction in the list of
+ /// \p OutlinedFunctions.
+ size_t FunctionIdx;
+
+ /// \brief The number of instructions that would be saved by outlining every
+ /// candidate of this type.
+ ///
+ /// This is a fixed value which is not updated during the candidate pruning
+ /// process. It is only used for deciding which candidate to keep if two
+ /// candidates overlap. The true benefit is stored in the OutlinedFunction
+ /// for some given candidate.
+ unsigned Benefit = 0;
+
+ Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx)
+ : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {}
+
+ Candidate() {}
+
+ /// \brief Used to ensure that \p Candidates are outlined in an order that
+ /// preserves the start and end indices of other \p Candidates.
+ bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; }
+};
+
+/// \brief The information necessary to create an outlined function for some
+/// class of candidate.
+struct OutlinedFunction {
+
+ /// The actual outlined function created.
+ /// This is initialized after we go through and create the actual function.
+ MachineFunction *MF = nullptr;
+
+ /// A number assigned to this function which appears at the end of its name.
+ size_t Name;
+
+ /// The number of candidates for this OutlinedFunction.
+ size_t OccurrenceCount = 0;
+
+ /// \brief The sequence of integers corresponding to the instructions in this
+ /// function.
+ std::vector<unsigned> Sequence;
+
+ /// The number of instructions this function would save.
+ unsigned Benefit = 0;
+
+ /// \brief Set to true if candidates for this outlined function should be
+ /// replaced with tail calls to this OutlinedFunction.
+ bool IsTailCall = false;
+
+ OutlinedFunction(size_t Name, size_t OccurrenceCount,
+ const std::vector<unsigned> &Sequence,
+ unsigned Benefit, bool IsTailCall)
+ : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence),
+ Benefit(Benefit), IsTailCall(IsTailCall)
+ {}
+};
+
+/// Represents an undefined index in the suffix tree.
+const size_t EmptyIdx = -1;
+
+/// A node in a suffix tree which represents a substring or suffix.
+///
+/// Each node has either no children or at least two children, with the root
+/// being a exception in the empty tree.
+///
+/// Children are represented as a map between unsigned integers and nodes. If
+/// a node N has a child M on unsigned integer k, then the mapping represented
+/// by N is a proper prefix of the mapping represented by M. Note that this,
+/// although similar to a trie is somewhat different: each node stores a full
+/// substring of the full mapping rather than a single character state.
+///
+/// Each internal node contains a pointer to the internal node representing
+/// the same string, but with the first character chopped off. This is stored
+/// in \p Link. Each leaf node stores the start index of its respective
+/// suffix in \p SuffixIdx.
+struct SuffixTreeNode {
+
+ /// The children of this node.
+ ///
+ /// A child existing on an unsigned integer implies that from the mapping
+ /// represented by the current node, there is a way to reach another
+ /// mapping by tacking that character on the end of the current string.
+ DenseMap<unsigned, SuffixTreeNode *> Children;
+
+ /// A flag set to false if the node has been pruned from the tree.
+ bool IsInTree = true;
+
+ /// The start index of this node's substring in the main string.
+ size_t StartIdx = EmptyIdx;
+
+ /// The end index of this node's substring in the main string.
+ ///
+ /// Every leaf node must have its \p EndIdx incremented at the end of every
+ /// step in the construction algorithm. To avoid having to update O(N)
+ /// nodes individually at the end of every step, the end index is stored
+ /// as a pointer.
+ size_t *EndIdx = nullptr;
+
+ /// For leaves, the start index of the suffix represented by this node.
+ ///
+ /// For all other nodes, this is ignored.
+ size_t SuffixIdx = EmptyIdx;
+
+ /// \brief For internal nodes, a pointer to the internal node representing
+ /// the same sequence with the first character chopped off.
+ ///
+ /// This has two major purposes in the suffix tree. The first is as a
+ /// shortcut in Ukkonen's construction algorithm. One of the things that
+ /// Ukkonen's algorithm does to achieve linear-time construction is
+ /// keep track of which node the next insert should be at. This makes each
+ /// insert O(1), and there are a total of O(N) inserts. The suffix link
+ /// helps with inserting children of internal nodes.
+ ///
+ /// Say we add a child to an internal node with associated mapping S. The
+ /// next insertion must be at the node representing S - its first character.
+ /// This is given by the way that we iteratively build the tree in Ukkonen's
+ /// algorithm. The main idea is to look at the suffixes of each prefix in the
+ /// string, starting with the longest suffix of the prefix, and ending with
+ /// the shortest. Therefore, if we keep pointers between such nodes, we can
+ /// move to the next insertion point in O(1) time. If we don't, then we'd
+ /// have to query from the root, which takes O(N) time. This would make the
+ /// construction algorithm O(N^2) rather than O(N).
+ ///
+ /// The suffix link is also used during the tree pruning process to let us
+ /// quickly throw out a bunch of potential overlaps. Say we have a sequence
+ /// S we want to outline. Then each of its suffixes contribute to at least
+ /// one overlapping case. Therefore, we can follow the suffix links
+ /// starting at the node associated with S to the root and "delete" those
+ /// nodes, save for the root. For each candidate, this removes
+ /// O(|candidate|) overlaps from the search space. We don't actually
+ /// completely invalidate these nodes though; doing that is far too
+ /// aggressive. Consider the following pathological string:
+ ///
+ /// 1 2 3 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3
+ ///
+ /// If we, for the sake of example, outlined 1 2 3, then we would throw
+ /// out all instances of 2 3. This isn't desirable. To get around this,
+ /// when we visit a link node, we decrement its occurrence count by the
+ /// number of sequences we outlined in the current step. In the pathological
+ /// example, the 2 3 node would have an occurrence count of 8, while the
+ /// 1 2 3 node would have an occurrence count of 2. Thus, the 2 3 node
+ /// would survive to the next round allowing us to outline the extra
+ /// instances of 2 3.
+ SuffixTreeNode *Link = nullptr;
+
+ /// The parent of this node. Every node except for the root has a parent.
+ SuffixTreeNode *Parent = nullptr;
+
+ /// The number of times this node's string appears in the tree.
+ ///
+ /// This is equal to the number of leaf children of the string. It represents
+ /// the number of suffixes that the node's string is a prefix of.
+ size_t OccurrenceCount = 0;
+
+ /// The length of the string formed by concatenating the edge labels from the
+ /// root to this node.
+ size_t ConcatLen = 0;
+
+ /// Returns true if this node is a leaf.
+ bool isLeaf() const { return SuffixIdx != EmptyIdx; }
+
+ /// Returns true if this node is the root of its owning \p SuffixTree.
+ bool isRoot() const { return StartIdx == EmptyIdx; }
+
+ /// Return the number of elements in the substring associated with this node.
+ size_t size() const {
+
+ // Is it the root? If so, it's the empty string so return 0.
+ if (isRoot())
+ return 0;
+
+ assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
+
+ // Size = the number of elements in the string.
+ // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
+ return *EndIdx - StartIdx + 1;
+ }
+
+ SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link,
+ SuffixTreeNode *Parent)
+ : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
+
+ SuffixTreeNode() {}
+};
+
+/// A data structure for fast substring queries.
+///
+/// Suffix trees represent the suffixes of their input strings in their leaves.
+/// A suffix tree is a type of compressed trie structure where each node
+/// represents an entire substring rather than a single character. Each leaf
+/// of the tree is a suffix.
+///
+/// A suffix tree can be seen as a type of state machine where each state is a
+/// substring of the full string. The tree is structured so that, for a string
+/// of length N, there are exactly N leaves in the tree. This structure allows
+/// us to quickly find repeated substrings of the input string.
+///
+/// In this implementation, a "string" is a vector of unsigned integers.
+/// These integers may result from hashing some data type. A suffix tree can
+/// contain 1 or many strings, which can then be queried as one large string.
+///
+/// The suffix tree is implemented using Ukkonen's algorithm for linear-time
+/// suffix tree construction. Ukkonen's algorithm is explained in more detail
+/// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
+/// paper is available at
+///
+/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
+class SuffixTree {
+private:
+ /// Each element is an integer representing an instruction in the module.
+ ArrayRef<unsigned> Str;
+
+ /// Maintains each node in the tree.
+ SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
+
+ /// The root of the suffix tree.
+ ///
+ /// The root represents the empty string. It is maintained by the
+ /// \p NodeAllocator like every other node in the tree.
+ SuffixTreeNode *Root = nullptr;
+
+ /// Stores each leaf node in the tree.
+ ///
+ /// This is used for finding outlining candidates.
+ std::vector<SuffixTreeNode *> LeafVector;
+
+ /// Maintains the end indices of the internal nodes in the tree.
+ ///
+ /// Each internal node is guaranteed to never have its end index change
+ /// during the construction algorithm; however, leaves must be updated at
+ /// every step. Therefore, we need to store leaf end indices by reference
+ /// to avoid updating O(N) leaves at every step of construction. Thus,
+ /// every internal node must be allocated its own end index.
+ BumpPtrAllocator InternalEndIdxAllocator;
+
+ /// The end index of each leaf in the tree.
+ size_t LeafEndIdx = -1;
+
+ /// \brief Helper struct which keeps track of the next insertion point in
+ /// Ukkonen's algorithm.
+ struct ActiveState {
+ /// The next node to insert at.
+ SuffixTreeNode *Node;
+
+ /// The index of the first character in the substring currently being added.
+ size_t Idx = EmptyIdx;
+
+ /// The length of the substring we have to add at the current step.
+ size_t Len = 0;
+ };
+
+ /// \brief The point the next insertion will take place at in the
+ /// construction algorithm.
+ ActiveState Active;
+
+ /// Allocate a leaf node and add it to the tree.
+ ///
+ /// \param Parent The parent of this node.
+ /// \param StartIdx The start index of this node's associated string.
+ /// \param Edge The label on the edge leaving \p Parent to this node.
+ ///
+ /// \returns A pointer to the allocated leaf node.
+ SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx,
+ unsigned Edge) {
+
+ assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
+
+ SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx,
+ &LeafEndIdx,
+ nullptr,
+ &Parent);
+ Parent.Children[Edge] = N;
+
+ return N;
+ }
+
+ /// Allocate an internal node and add it to the tree.
+ ///
+ /// \param Parent The parent of this node. Only null when allocating the root.
+ /// \param StartIdx The start index of this node's associated string.
+ /// \param EndIdx The end index of this node's associated string.
+ /// \param Edge The label on the edge leaving \p Parent to this node.
+ ///
+ /// \returns A pointer to the allocated internal node.
+ SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx,
+ size_t EndIdx, unsigned Edge) {
+
+ assert(StartIdx <= EndIdx && "String can't start after it ends!");
+ assert(!(!Parent && StartIdx != EmptyIdx) &&
+ "Non-root internal nodes must have parents!");
+
+ size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx);
+ SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx,
+ E,
+ Root,
+ Parent);
+ if (Parent)
+ Parent->Children[Edge] = N;
+
+ return N;
+ }
+
+ /// \brief Set the suffix indices of the leaves to the start indices of their
+ /// respective suffixes. Also stores each leaf in \p LeafVector at its
+ /// respective suffix index.
+ ///
+ /// \param[in] CurrNode The node currently being visited.
+ /// \param CurrIdx The current index of the string being visited.
+ void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) {
+
+ bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
+
+ // Store the length of the concatenation of all strings from the root to
+ // this node.
+ if (!CurrNode.isRoot()) {
+ if (CurrNode.ConcatLen == 0)
+ CurrNode.ConcatLen = CurrNode.size();
+
+ if (CurrNode.Parent)
+ CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
+ }
+
+ // Traverse the tree depth-first.
+ for (auto &ChildPair : CurrNode.Children) {
+ assert(ChildPair.second && "Node had a null child!");
+ setSuffixIndices(*ChildPair.second,
+ CurrIdx + ChildPair.second->size());
+ }
+
+ // Is this node a leaf?
+ if (IsLeaf) {
+ // If yes, give it a suffix index and bump its parent's occurrence count.
+ CurrNode.SuffixIdx = Str.size() - CurrIdx;
+ assert(CurrNode.Parent && "CurrNode had no parent!");
+ CurrNode.Parent->OccurrenceCount++;
+
+ // Store the leaf in the leaf vector for pruning later.
+ LeafVector[CurrNode.SuffixIdx] = &CurrNode;
+ }
+ }
+
+ /// \brief Construct the suffix tree for the prefix of the input ending at
+ /// \p EndIdx.
+ ///
+ /// Used to construct the full suffix tree iteratively. At the end of each
+ /// step, the constructed suffix tree is either a valid suffix tree, or a
+ /// suffix tree with implicit suffixes. At the end of the final step, the
+ /// suffix tree is a valid tree.
+ ///
+ /// \param EndIdx The end index of the current prefix in the main string.
+ /// \param SuffixesToAdd The number of suffixes that must be added
+ /// to complete the suffix tree at the current phase.
+ ///
+ /// \returns The number of suffixes that have not been added at the end of
+ /// this step.
+ unsigned extend(size_t EndIdx, size_t SuffixesToAdd) {
+ SuffixTreeNode *NeedsLink = nullptr;
+
+ while (SuffixesToAdd > 0) {
+
+ // Are we waiting to add anything other than just the last character?
+ if (Active.Len == 0) {
+ // If not, then say the active index is the end index.
+ Active.Idx = EndIdx;
+ }
+
+ assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
+
+ // The first character in the current substring we're looking at.
+ unsigned FirstChar = Str[Active.Idx];
+
+ // Have we inserted anything starting with FirstChar at the current node?
+ if (Active.Node->Children.count(FirstChar) == 0) {
+ // If not, then we can just insert a leaf and move too the next step.
+ insertLeaf(*Active.Node, EndIdx, FirstChar);
+
+ // The active node is an internal node, and we visited it, so it must
+ // need a link if it doesn't have one.
+ if (NeedsLink) {
+ NeedsLink->Link = Active.Node;
+ NeedsLink = nullptr;
+ }
+ } else {
+ // There's a match with FirstChar, so look for the point in the tree to
+ // insert a new node.
+ SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
+
+ size_t SubstringLen = NextNode->size();
+
+ // Is the current suffix we're trying to insert longer than the size of
+ // the child we want to move to?
+ if (Active.Len >= SubstringLen) {
+ // If yes, then consume the characters we've seen and move to the next
+ // node.
+ Active.Idx += SubstringLen;
+ Active.Len -= SubstringLen;
+ Active.Node = NextNode;
+ continue;
+ }
+
+ // Otherwise, the suffix we're trying to insert must be contained in the
+ // next node we want to move to.
+ unsigned LastChar = Str[EndIdx];
+
+ // Is the string we're trying to insert a substring of the next node?
+ if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
+ // If yes, then we're done for this step. Remember our insertion point
+ // and move to the next end index. At this point, we have an implicit
+ // suffix tree.
+ if (NeedsLink && !Active.Node->isRoot()) {
+ NeedsLink->Link = Active.Node;
+ NeedsLink = nullptr;
+ }
+
+ Active.Len++;
+ break;
+ }
+
+ // The string we're trying to insert isn't a substring of the next node,
+ // but matches up to a point. Split the node.
+ //
+ // For example, say we ended our search at a node n and we're trying to
+ // insert ABD. Then we'll create a new node s for AB, reduce n to just
+ // representing C, and insert a new leaf node l to represent d. This
+ // allows us to ensure that if n was a leaf, it remains a leaf.
+ //
+ // | ABC ---split---> | AB
+ // n s
+ // C / \ D
+ // n l
+
+ // The node s from the diagram
+ SuffixTreeNode *SplitNode =
+ insertInternalNode(Active.Node,
+ NextNode->StartIdx,
+ NextNode->StartIdx + Active.Len - 1,
+ FirstChar);
+
+ // Insert the new node representing the new substring into the tree as
+ // a child of the split node. This is the node l from the diagram.
+ insertLeaf(*SplitNode, EndIdx, LastChar);
+
+ // Make the old node a child of the split node and update its start
+ // index. This is the node n from the diagram.
+ NextNode->StartIdx += Active.Len;
+ NextNode->Parent = SplitNode;
+ SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
+
+ // SplitNode is an internal node, update the suffix link.
+ if (NeedsLink)
+ NeedsLink->Link = SplitNode;
+
+ NeedsLink = SplitNode;
+ }
+
+ // We've added something new to the tree, so there's one less suffix to
+ // add.
+ SuffixesToAdd--;
+
+ if (Active.Node->isRoot()) {
+ if (Active.Len > 0) {
+ Active.Len--;
+ Active.Idx = EndIdx - SuffixesToAdd + 1;
+ }
+ } else {
+ // Start the next phase at the next smallest suffix.
+ Active.Node = Active.Node->Link;
+ }
+ }
+
+ return SuffixesToAdd;
+ }
+
+public:
+
+ /// Find all repeated substrings that satisfy \p BenefitFn.
+ ///
+ /// If a substring appears at least twice, then it must be represented by
+ /// an internal node which appears in at least two suffixes. Each suffix is
+ /// represented by a leaf node. To do this, we visit each internal node in
+ /// the tree, using the leaf children of each internal node. If an internal
+ /// node represents a beneficial substring, then we use each of its leaf
+ /// children to find the locations of its substring.
+ ///
+ /// \param[out] CandidateList Filled with candidates representing each
+ /// beneficial substring.
+ /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
+ /// type of candidate.
+ /// \param BenefitFn The function to satisfy.
+ ///
+ /// \returns The length of the longest candidate found.
+ size_t findCandidates(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ const std::function<unsigned(SuffixTreeNode &, size_t, unsigned)>
+ &BenefitFn) {
+
+ CandidateList.clear();
+ FunctionList.clear();
+ size_t FnIdx = 0;
+ size_t MaxLen = 0;
+
+ for (SuffixTreeNode* Leaf : LeafVector) {
+ assert(Leaf && "Leaves in LeafVector cannot be null!");
+ if (!Leaf->IsInTree)
+ continue;
+
+ assert(Leaf->Parent && "All leaves must have parents!");
+ SuffixTreeNode &Parent = *(Leaf->Parent);
+
+ // If it doesn't appear enough, or we already outlined from it, skip it.
+ if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
+ continue;
+
+ size_t StringLen = Leaf->ConcatLen - Leaf->size();
+
+ // How many instructions would outlining this string save?
+ unsigned Benefit = BenefitFn(Parent,
+ StringLen, Str[Leaf->SuffixIdx + StringLen - 1]);
+
+ // If it's not beneficial, skip it.
+ if (Benefit < 1)
+ continue;
+
+ if (StringLen > MaxLen)
+ MaxLen = StringLen;
+
+ unsigned OccurrenceCount = 0;
+ for (auto &ChildPair : Parent.Children) {
+ SuffixTreeNode *M = ChildPair.second;
+
+ // Is it a leaf? If so, we have an occurrence of this candidate.
+ if (M && M->IsInTree && M->isLeaf()) {
+ OccurrenceCount++;
+ CandidateList.emplace_back(M->SuffixIdx, StringLen, FnIdx);
+ CandidateList.back().Benefit = Benefit;
+ M->IsInTree = false;
+ }
+ }
+
+ // Save the function for the new candidate sequence.
+ std::vector<unsigned> CandidateSequence;
+ for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
+ CandidateSequence.push_back(Str[i]);
+
+ FunctionList.emplace_back(FnIdx, OccurrenceCount, CandidateSequence,
+ Benefit, false);
+
+ // Move to the next function.
+ FnIdx++;
+ Parent.IsInTree = false;
+ }
+
+ return MaxLen;
+ }
+
+ /// Construct a suffix tree from a sequence of unsigned integers.
+ ///
+ /// \param Str The string to construct the suffix tree for.
+ SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
+ Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
+ Root->IsInTree = true;
+ Active.Node = Root;
+ LeafVector = std::vector<SuffixTreeNode*>(Str.size());
+
+ // Keep track of the number of suffixes we have to add of the current
+ // prefix.
+ size_t SuffixesToAdd = 0;
+ Active.Node = Root;
+
+ // Construct the suffix tree iteratively on each prefix of the string.
+ // PfxEndIdx is the end index of the current prefix.
+ // End is one past the last element in the string.
+ for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
+ SuffixesToAdd++;
+ LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
+ SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
+ }
+
+ // Set the suffix indices of each leaf.
+ assert(Root && "Root node can't be nullptr!");
+ setSuffixIndices(*Root, 0);
+ }
+};
+
+/// \brief Maps \p MachineInstrs to unsigned integers and stores the mappings.
+struct InstructionMapper {
+
+ /// \brief The next available integer to assign to a \p MachineInstr that
+ /// cannot be outlined.
+ ///
+ /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
+ unsigned IllegalInstrNumber = -3;
+
+ /// \brief The next available integer to assign to a \p MachineInstr that can
+ /// be outlined.
+ unsigned LegalInstrNumber = 0;
+
+ /// Correspondence from \p MachineInstrs to unsigned integers.
+ DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
+ InstructionIntegerMap;
+
+ /// Corresponcence from unsigned integers to \p MachineInstrs.
+ /// Inverse of \p InstructionIntegerMap.
+ DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
+
+ /// The vector of unsigned integers that the module is mapped to.
+ std::vector<unsigned> UnsignedVec;
+
+ /// \brief Stores the location of the instruction associated with the integer
+ /// at index i in \p UnsignedVec for each index i.
+ std::vector<MachineBasicBlock::iterator> InstrList;
+
+ /// \brief Maps \p *It to a legal integer.
+ ///
+ /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
+ /// \p IntegerInstructionMap, and \p LegalInstrNumber.
+ ///
+ /// \returns The integer that \p *It was mapped to.
+ unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
+
+ // Get the integer for this instruction or give it the current
+ // LegalInstrNumber.
+ InstrList.push_back(It);
+ MachineInstr &MI = *It;
+ bool WasInserted;
+ DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
+ ResultIt;
+ std::tie(ResultIt, WasInserted) =
+ InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
+ unsigned MINumber = ResultIt->second;
+
+ // There was an insertion.
+ if (WasInserted) {
+ LegalInstrNumber++;
+ IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
+ }
+
+ UnsignedVec.push_back(MINumber);
+
+ // Make sure we don't overflow or use any integers reserved by the DenseMap.
+ if (LegalInstrNumber >= IllegalInstrNumber)
+ report_fatal_error("Instruction mapping overflow!");
+
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey()
+ && "Tried to assign DenseMap tombstone or empty key to instruction.");
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey()
+ && "Tried to assign DenseMap tombstone or empty key to instruction.");
+
+ return MINumber;
+ }
+
+ /// Maps \p *It to an illegal integer.
+ ///
+ /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
+ ///
+ /// \returns The integer that \p *It was mapped to.
+ unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
+ unsigned MINumber = IllegalInstrNumber;
+
+ InstrList.push_back(It);
+ UnsignedVec.push_back(IllegalInstrNumber);
+ IllegalInstrNumber--;
+
+ assert(LegalInstrNumber < IllegalInstrNumber &&
+ "Instruction mapping overflow!");
+
+ assert(IllegalInstrNumber !=
+ DenseMapInfo<unsigned>::getEmptyKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ assert(IllegalInstrNumber !=
+ DenseMapInfo<unsigned>::getTombstoneKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ return MINumber;
+ }
+
+ /// \brief Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
+ /// and appends it to \p UnsignedVec and \p InstrList.
+ ///
+ /// Two instructions are assigned the same integer if they are identical.
+ /// If an instruction is deemed unsafe to outline, then it will be assigned an
+ /// unique integer. The resulting mapping is placed into a suffix tree and
+ /// queried for candidates.
+ ///
+ /// \param MBB The \p MachineBasicBlock to be translated into integers.
+ /// \param TRI \p TargetRegisterInfo for the module.
+ /// \param TII \p TargetInstrInfo for the module.
+ void convertToUnsignedVec(MachineBasicBlock &MBB,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
+ It++) {
+
+ // Keep track of where this instruction is in the module.
+ switch(TII.getOutliningType(*It)) {
+ case TargetInstrInfo::MachineOutlinerInstrType::Illegal:
+ mapToIllegalUnsigned(It);
+ break;
+
+ case TargetInstrInfo::MachineOutlinerInstrType::Legal:
+ mapToLegalUnsigned(It);
+ break;
+
+ case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
+ break;
+ }
+ }
+
+ // After we're done every insertion, uniquely terminate this part of the
+ // "string". This makes sure we won't match across basic block or function
+ // boundaries since the "end" is encoded uniquely and thus appears in no
+ // repeated substring.
+ InstrList.push_back(MBB.end());
+ UnsignedVec.push_back(IllegalInstrNumber);
+ IllegalInstrNumber--;
+ }
+
+ InstructionMapper() {
+ // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
+ // changed.
+ assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
+ "DenseMapInfo<unsigned>'s empty key isn't -1!");
+ assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
+ "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
+ }
+};
+
+/// \brief An interprocedural pass which finds repeated sequences of
+/// instructions and replaces them with calls to functions.
+///
+/// Each instruction is mapped to an unsigned integer and placed in a string.
+/// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
+/// is then repeatedly queried for repeated sequences of instructions. Each
+/// non-overlapping repeated sequence is then placed in its own
+/// \p MachineFunction and each instance is then replaced with a call to that
+/// function.
+struct MachineOutliner : public ModulePass {
+
+ static char ID;
+
+ StringRef getPassName() const override { return "Machine Outliner"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfo>();
+ AU.addPreserved<MachineModuleInfo>();
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+ }
+
+ MachineOutliner() : ModulePass(ID) {
+ initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// \brief Replace the sequences of instructions represented by the
+ /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
+ /// described in \p FunctionList.
+ ///
+ /// \param M The module we are outlining from.
+ /// \param CandidateList A list of candidates to be outlined.
+ /// \param FunctionList A list of functions to be inserted into the module.
+ /// \param Mapper Contains the instruction mappings for the module.
+ bool outline(Module &M, const ArrayRef<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper);
+
+ /// Creates a function for \p OF and inserts it into the module.
+ MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
+ InstructionMapper &Mapper);
+
+ /// Find potential outlining candidates and store them in \p CandidateList.
+ ///
+ /// For each type of potential candidate, also build an \p OutlinedFunction
+ /// struct containing the information to build the function for that
+ /// candidate.
+ ///
+ /// \param[out] CandidateList Filled with outlining candidates for the module.
+ /// \param[out] FunctionList Filled with functions corresponding to each type
+ /// of \p Candidate.
+ /// \param ST The suffix tree for the module.
+ /// \param TII TargetInstrInfo for the module.
+ ///
+ /// \returns The length of the longest candidate found. 0 if there are none.
+ unsigned buildCandidateList(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ SuffixTree &ST,
+ InstructionMapper &Mapper,
+ const TargetInstrInfo &TII);
+
+ /// \brief Remove any overlapping candidates that weren't handled by the
+ /// suffix tree's pruning method.
+ ///
+ /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
+ /// If a short candidate is chosen for outlining, then a longer candidate
+ /// which has that short candidate as a suffix is chosen, the tree's pruning
+ /// method will not find it. Thus, we need to prune before outlining as well.
+ ///
+ /// \param[in,out] CandidateList A list of outlining candidates.
+ /// \param[in,out] FunctionList A list of functions to be outlined.
+ /// \param MaxCandidateLen The length of the longest candidate.
+ /// \param TII TargetInstrInfo for the module.
+ void pruneOverlaps(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ unsigned MaxCandidateLen,
+ const TargetInstrInfo &TII);
+
+ /// Construct a suffix tree on the instructions in \p M and outline repeated
+ /// strings from that tree.
+ bool runOnModule(Module &M) override;
+};
+
+} // Anonymous namespace.
+
+char MachineOutliner::ID = 0;
+
+namespace llvm {
+ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); }
+}
+
+INITIALIZE_PASS(MachineOutliner, "machine-outliner",
+ "Machine Function Outliner", false, false)
+
+void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ unsigned MaxCandidateLen,
+ const TargetInstrInfo &TII) {
+ // TODO: Experiment with interval trees or other interval-checking structures
+ // to lower the time complexity of this function.
+ // TODO: Can we do better than the simple greedy choice?
+ // Check for overlaps in the range.
+ // This is O(MaxCandidateLen * CandidateList.size()).
+ for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
+ It++) {
+ Candidate &C1 = *It;
+ OutlinedFunction &F1 = FunctionList[C1.FunctionIdx];
+
+ // If we removed this candidate, skip it.
+ if (!C1.InCandidateList)
+ continue;
+
+ // Is it still worth it to outline C1?
+ if (F1.Benefit < 1 || F1.OccurrenceCount < 2) {
+ assert(F1.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F1.OccurrenceCount--;
+ C1.InCandidateList = false;
+ continue;
+ }
+
+ // The minimum start index of any candidate that could overlap with this
+ // one.
+ unsigned FarthestPossibleIdx = 0;
+
+ // Either the index is 0, or it's at most MaxCandidateLen indices away.
+ if (C1.StartIdx > MaxCandidateLen)
+ FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen;
+
+ // Compare against the candidates in the list that start at at most
+ // FarthestPossibleIdx indices away from C1. There are at most
+ // MaxCandidateLen of these.
+ for (auto Sit = It + 1; Sit != Et; Sit++) {
+ Candidate &C2 = *Sit;
+ OutlinedFunction &F2 = FunctionList[C2.FunctionIdx];
+
+ // Is this candidate too far away to overlap?
+ if (C2.StartIdx < FarthestPossibleIdx)
+ break;
+
+ // Did we already remove this candidate in a previous step?
+ if (!C2.InCandidateList)
+ continue;
+
+ // Is the function beneficial to outline?
+ if (F2.OccurrenceCount < 2 || F2.Benefit < 1) {
+ // If not, remove this candidate and move to the next one.
+ assert(F2.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F2.OccurrenceCount--;
+ C2.InCandidateList = false;
+ continue;
+ }
+
+ size_t C2End = C2.StartIdx + C2.Len - 1;
+
+ // Do C1 and C2 overlap?
+ //
+ // Not overlapping:
+ // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
+ //
+ // We sorted our candidate list so C2Start <= C1Start. We know that
+ // C2End > C2Start since each candidate has length >= 2. Therefore, all we
+ // have to check is C2End < C2Start to see if we overlap.
+ if (C2End < C1.StartIdx)
+ continue;
+
+ // C1 and C2 overlap.
+ // We need to choose the better of the two.
+ //
+ // Approximate this by picking the one which would have saved us the
+ // most instructions before any pruning.
+ if (C1.Benefit >= C2.Benefit) {
+
+ // C1 is better, so remove C2 and update C2's OutlinedFunction to
+ // reflect the removal.
+ assert(F2.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F2.OccurrenceCount--;
+ F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(),
+ F2.OccurrenceCount,
+ F2.IsTailCall
+ );
+
+ C2.InCandidateList = false;
+
+ DEBUG (
+ dbgs() << "- Removed C2. \n";
+ dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount << "\n";
+ dbgs() << "--- C2's benefit: " << F2.Benefit << "\n";
+ );
+
+ } else {
+ // C2 is better, so remove C1 and update C1's OutlinedFunction to
+ // reflect the removal.
+ assert(F1.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F1.OccurrenceCount--;
+ F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(),
+ F1.OccurrenceCount,
+ F1.IsTailCall
+ );
+ C1.InCandidateList = false;
+
+ DEBUG (
+ dbgs() << "- Removed C1. \n";
+ dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount << "\n";
+ dbgs() << "--- C1's benefit: " << F1.Benefit << "\n";
+ );
+
+ // C1 is out, so we don't have to compare it against anyone else.
+ break;
+ }
+ }
+ }
+}
+
+unsigned
+MachineOutliner::buildCandidateList(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ SuffixTree &ST,
+ InstructionMapper &Mapper,
+ const TargetInstrInfo &TII) {
+
+ std::vector<unsigned> CandidateSequence; // Current outlining candidate.
+ size_t MaxCandidateLen = 0; // Length of the longest candidate.
+
+ // Function for maximizing query in the suffix tree.
+ // This allows us to define more fine-grained types of things to outline in
+ // the target without putting target-specific info in the suffix tree.
+ auto BenefitFn = [&TII, &Mapper](const SuffixTreeNode &Curr,
+ size_t StringLen, unsigned EndVal) {
+
+ // The root represents the empty string.
+ if (Curr.isRoot())
+ return 0u;
+
+ // Is this long enough to outline?
+ // TODO: Let the target decide how "long" a string is in terms of the sizes
+ // of the instructions in the string. For example, if a call instruction
+ // is smaller than a one instruction string, we should outline that string.
+ if (StringLen < 2)
+ return 0u;
+
+ size_t Occurrences = Curr.OccurrenceCount;
+
+ // Anything we want to outline has to appear at least twice.
+ if (Occurrences < 2)
+ return 0u;
+
+ // Check if the last instruction in the sequence is a return.
+ MachineInstr *LastInstr =
+ Mapper.IntegerInstructionMap[EndVal];
+ assert(LastInstr && "Last instruction in sequence was unmapped!");
+
+ // The only way a terminator could be mapped as legal is if it was safe to
+ // tail call.
+ bool IsTailCall = LastInstr->isTerminator();
+ return TII.getOutliningBenefit(StringLen, Occurrences, IsTailCall);
+ };
+
+ MaxCandidateLen = ST.findCandidates(CandidateList, FunctionList, BenefitFn);
+
+ for (auto &OF : FunctionList)
+ OF.IsTailCall = Mapper.
+ IntegerInstructionMap[OF.Sequence.back()]->isTerminator();
+
+ // Sort the candidates in decending order. This will simplify the outlining
+ // process when we have to remove the candidates from the mapping by
+ // allowing us to cut them out without keeping track of an offset.
+ std::stable_sort(CandidateList.begin(), CandidateList.end());
+
+ return MaxCandidateLen;
+}
+
+MachineFunction *
+MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
+ InstructionMapper &Mapper) {
+
+ // Create the function name. This should be unique. For now, just hash the
+ // module name and include it in the function name plus the number of this
+ // function.
+ std::ostringstream NameStream;
+ NameStream << "OUTLINED_FUNCTION" << "_" << OF.Name;
+
+ // Create the function using an IR-level function.
+ LLVMContext &C = M.getContext();
+ Function *F = dyn_cast<Function>(
+ M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
+ assert(F && "Function was null!");
+
+ // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
+ // which gives us better results when we outline from linkonceodr functions.
+ F->setLinkage(GlobalValue::PrivateLinkage);
+ F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
+ IRBuilder<> Builder(EntryBB);
+ Builder.CreateRetVoid();
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineFunction &MF = MMI.getMachineFunction(*F);
+ MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Insert the new function into the module.
+ MF.insert(MF.begin(), &MBB);
+
+ TII.insertOutlinerPrologue(MBB, MF, OF.IsTailCall);
+
+ // Copy over the instructions for the function using the integer mappings in
+ // its sequence.
+ for (unsigned Str : OF.Sequence) {
+ MachineInstr *NewMI =
+ MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
+ NewMI->dropMemRefs();
+
+ // Don't keep debug information for outlined instructions.
+ // FIXME: This means outlined functions are currently undebuggable.
+ NewMI->setDebugLoc(DebugLoc());
+ MBB.insert(MBB.end(), NewMI);
+ }
+
+ TII.insertOutlinerEpilogue(MBB, MF, OF.IsTailCall);
+
+ return &MF;
+}
+
+bool MachineOutliner::outline(Module &M,
+ const ArrayRef<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper) {
+
+ bool OutlinedSomething = false;
+
+ // Replace the candidates with calls to their respective outlined functions.
+ for (const Candidate &C : CandidateList) {
+
+ // Was the candidate removed during pruneOverlaps?
+ if (!C.InCandidateList)
+ continue;
+
+ // If not, then look at its OutlinedFunction.
+ OutlinedFunction &OF = FunctionList[C.FunctionIdx];
+
+ // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
+ if (OF.OccurrenceCount < 2 || OF.Benefit < 1)
+ continue;
+
+ // If not, then outline it.
+ assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
+ MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent();
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx];
+ unsigned EndIdx = C.StartIdx + C.Len - 1;
+
+ assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
+ MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+ assert(EndIt != MBB->end() && "EndIt out of bounds!");
+
+ EndIt++; // Erase needs one past the end index.
+
+ // Does this candidate have a function yet?
+ if (!OF.MF) {
+ OF.MF = createOutlinedFunction(M, OF, Mapper);
+ FunctionsCreated++;
+ }
+
+ MachineFunction *MF = OF.MF;
+ const TargetSubtargetInfo &STI = MF->getSubtarget();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Insert a call to the new function and erase the old sequence.
+ TII.insertOutlinedCall(M, *MBB, StartIt, *MF, OF.IsTailCall);
+ StartIt = Mapper.InstrList[C.StartIdx];
+ MBB->erase(StartIt, EndIt);
+
+ OutlinedSomething = true;
+
+ // Statistics.
+ NumOutlined++;
+ }
+
+ DEBUG (
+ dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";
+ );
+
+ return OutlinedSomething;
+}
+
+bool MachineOutliner::runOnModule(Module &M) {
+
+ // Is there anything in the module at all?
+ if (M.empty())
+ return false;
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ const TargetSubtargetInfo &STI = MMI.getMachineFunction(*M.begin())
+ .getSubtarget();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+
+ InstructionMapper Mapper;
+
+ // Build instruction mappings for each function in the module.
+ for (Function &F : M) {
+ MachineFunction &MF = MMI.getMachineFunction(F);
+
+ // Is the function empty? Safe to outline from?
+ if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF))
+ continue;
+
+ // If it is, look at each MachineBasicBlock in the function.
+ for (MachineBasicBlock &MBB : MF) {
+
+ // Is there anything in MBB?
+ if (MBB.empty())
+ continue;
+
+ // If yes, map it.
+ Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
+ }
+ }
+
+ // Construct a suffix tree, use it to find candidates, and then outline them.
+ SuffixTree ST(Mapper.UnsignedVec);
+ std::vector<Candidate> CandidateList;
+ std::vector<OutlinedFunction> FunctionList;
+
+ // Find all of the outlining candidates.
+ unsigned MaxCandidateLen =
+ buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
+
+ // Remove candidates that overlap with other candidates.
+ pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII);
+
+ // Outline each of the candidates and return true if something was outlined.
+ return outline(M, CandidateList, FunctionList, Mapper);
+}
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 43a18099d39aa..d06c38cf4ed81 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -552,7 +552,9 @@ public:
os << "\n";
}
- void dump() const { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
};
/// This class repesents the scheduled code. The main data structure is a
@@ -593,7 +595,7 @@ private:
/// Virtual register information.
MachineRegisterInfo &MRI;
- DFAPacketizer *Resources;
+ std::unique_ptr<DFAPacketizer> Resources;
public:
SMSchedule(MachineFunction *mf)
@@ -604,13 +606,6 @@ public:
InitiationInterval = 0;
}
- ~SMSchedule() {
- ScheduledInstrs.clear();
- InstrToCycle.clear();
- RegToStageDiff.clear();
- delete Resources;
- }
-
void reset() {
ScheduledInstrs.clear();
InstrToCycle.clear();
@@ -738,7 +733,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
return false;
if (mf.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+ AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
!EnableSWPOptSize.getPosition())
return false;
@@ -960,7 +955,7 @@ static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
if (Phi.getOperand(i + 1).getMBB() != Loop)
InitVal = Phi.getOperand(i).getReg();
- else if (Phi.getOperand(i + 1).getMBB() == Loop)
+ else
LoopVal = Phi.getOperand(i).getReg();
assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
@@ -2514,7 +2509,7 @@ void SwingSchedulerDAG::generateExistingPhis(
MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
bool IsLast) {
- // Compute the stage number for the inital value of the Phi, which
+ // Compute the stage number for the initial value of the Phi, which
// comes from the prolog. The prolog to use depends on to which kernel/
// epilog that we're adding the Phi.
unsigned PrologStage = 0;
@@ -3480,7 +3475,7 @@ bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
// increment value to determine if the accesses may be loop carried.
if (OffsetS >= OffsetD)
return OffsetS + AccessSizeS > DeltaS;
- else if (OffsetS < OffsetD)
+ else
return OffsetD + AccessSizeD > DeltaD;
return true;
@@ -3980,5 +3975,7 @@ void SMSchedule::print(raw_ostream &os) const {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Utility function used for debugging to print the schedule.
-void SMSchedule::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
+#endif
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
index fc32183c7f639..71ad4e6aa7f52 100644
--- a/lib/CodeGen/MachineRegionInfo.cpp
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -1,10 +1,9 @@
-
#include "llvm/CodeGen/MachineRegionInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/CodeGen/MachinePostDominators.h"
-#define DEBUG_TYPE "region"
+#define DEBUG_TYPE "machine-region-info"
using namespace llvm;
@@ -86,6 +85,9 @@ bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
auto DF = &getAnalysis<MachineDominanceFrontier>();
RI.recalculate(F, DT, PDT, DF);
+
+ DEBUG(RI.dump());
+
return false;
}
@@ -103,9 +105,10 @@ void MachineRegionInfoPass::verifyAnalysis() const {
void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<DominanceFrontierWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineDominanceFrontier>();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const {
@@ -119,14 +122,15 @@ LLVM_DUMP_METHOD void MachineRegionInfoPass::dump() const {
#endif
char MachineRegionInfoPass::ID = 0;
+char &MachineRegionInfoPassID = MachineRegionInfoPass::ID;
-INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions",
- "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, DEBUG_TYPE,
+ "Detect single entry single exit regions", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
-INITIALIZE_PASS_END(MachineRegionInfoPass, "regions",
- "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE,
+ "Detect single entry single exit regions", true, true)
// Create methods available outside of this file, to use them
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 242cb0b809536..128910f8eb2aa 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//===- lib/Codegen/MachineRegisterInfo.cpp --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,13 +11,27 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
using namespace llvm;
@@ -28,9 +42,9 @@ static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden,
void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
- : MF(MF), TheDelegate(nullptr),
- TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
- EnableSubRegLiveness) {
+ : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
+ EnableSubRegLiveness),
+ IsUpdatedCSRsInitialized(false) {
unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
@@ -444,8 +458,8 @@ LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
return TRC.getLaneMask();
}
-#ifndef NDEBUG
-void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(unsigned Reg) const {
for (MachineInstr &I : use_instructions(Reg))
I.dump();
}
@@ -543,3 +557,47 @@ bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const {
}
return false;
}
+
+void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) {
+
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ assert(Reg && (Reg < TRI->getNumRegs()) &&
+ "Trying to disable an invalid register");
+
+ if (!IsUpdatedCSRsInitialized) {
+ const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ UpdatedCSRs.push_back(*I);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
+
+ IsUpdatedCSRsInitialized = true;
+ }
+
+ // Remove the register (and its aliases from the list).
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI),
+ UpdatedCSRs.end());
+}
+
+const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
+ if (IsUpdatedCSRsInitialized)
+ return UpdatedCSRs.data();
+
+ return getTargetRegisterInfo()->getCalleeSavedRegs(MF);
+}
+
+void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) {
+ if (IsUpdatedCSRsInitialized)
+ UpdatedCSRs.clear();
+
+ for (MCPhysReg Reg : CSRs)
+ UpdatedCSRs.push_back(Reg);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
+ IsUpdatedCSRsInitialized = true;
+}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index e06bc517fa916..41e161f71e532 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -12,30 +12,67 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
using namespace llvm;
#define DEBUG_TYPE "misched"
namespace llvm {
+
cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
cl::desc("Force top-down list scheduling"));
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
@@ -43,7 +80,8 @@ cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
cl::opt<bool>
DumpCriticalPathLength("misched-dcpl", cl::Hidden,
cl::desc("Print critical path length to stdout"));
-}
+
+} // end namespace llvm
#ifndef NDEBUG
static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
@@ -80,10 +118,6 @@ static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
cl::desc("Enable memop clustering."),
cl::init(true));
-// Experimental heuristics
-static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
- cl::desc("Enable scheduling for macro fusion."), cl::init(true));
-
static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
cl::desc("Verify machine instrs before and after machine scheduling"));
@@ -92,14 +126,14 @@ static const unsigned MinSubtreeSize = 8;
// Pin the vtables to this file.
void MachineSchedStrategy::anchor() {}
+
void ScheduleDAGMutation::anchor() {}
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
-MachineSchedContext::MachineSchedContext():
- MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) {
+MachineSchedContext::MachineSchedContext() {
RegClassInfo = new RegisterClassInfo();
}
@@ -108,6 +142,7 @@ MachineSchedContext::~MachineSchedContext() {
}
namespace {
+
/// Base class for a machine scheduler class that can run at any point.
class MachineSchedulerBase : public MachineSchedContext,
public MachineFunctionPass {
@@ -149,7 +184,8 @@ public:
protected:
ScheduleDAGInstrs *createPostMachineScheduler();
};
-} // namespace
+
+} // end anonymous namespace
char MachineScheduler::ID = 0;
@@ -158,6 +194,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID;
INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
"Machine Instruction Scheduler", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
@@ -211,7 +248,7 @@ static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
/// MachineSchedOpt allows command line selection of the scheduler.
static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
- RegisterPassParser<MachineSchedRegistry> >
+ RegisterPassParser<MachineSchedRegistry>>
MachineSchedOpt("misched",
cl::init(&useDefaultMachineSched), cl::Hidden,
cl::desc("Machine instruction scheduler to use"));
@@ -448,7 +485,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
// instruction stream until we find the nearest boundary.
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
- for (;I != MBB->begin(); --I) {
+ for (; I != MBB->begin(); --I) {
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
@@ -504,13 +541,14 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
// unimplemented
}
-LLVM_DUMP_METHOD
-void ReadyQueue::dump() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ReadyQueue::dump() {
dbgs() << "Queue " << Name << ": ";
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
dbgs() << Queue[i]->NodeNum << " ";
dbgs() << "\n";
}
+#endif
//===----------------------------------------------------------------------===//
// ScheduleDAGMI - Basic machine instruction scheduling. This is
@@ -519,8 +557,7 @@ void ReadyQueue::dump() {
// ===----------------------------------------------------------------------===/
// Provide a vtable anchor.
-ScheduleDAGMI::~ScheduleDAGMI() {
-}
+ScheduleDAGMI::~ScheduleDAGMI() = default;
bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
@@ -825,7 +862,7 @@ void ScheduleDAGMI::placeDebugValues() {
RegionBegin = FirstDbgValue;
}
- for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
MachineInstr *DbgValue = P.first;
@@ -841,7 +878,7 @@ void ScheduleDAGMI::placeDebugValues() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ScheduleDAGMI::dumpSchedule() const {
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
if (SUnit *SU = getSUnit(&(*MI)))
SU->dump(this);
@@ -1012,7 +1049,7 @@ updateScheduledPressure(const SUnit *SU,
++CritIdx;
if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
- && NewMaxPressure[ID] <= INT16_MAX)
+ && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
}
unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
@@ -1136,6 +1173,12 @@ void ScheduleDAGMILive::schedule() {
dbgs() << " Pressure Diff : ";
getPressureDiff(&SU).dump(*TRI);
}
+ dbgs() << " Single Issue : ";
+ if (SchedModel.mustBeginGroup(SU.getInstr()) &&
+ SchedModel.mustEndGroup(SU.getInstr()))
+ dbgs() << "true;";
+ else
+ dbgs() << "false;";
dbgs() << '\n';
}
if (ExitSU.getInstr() != nullptr)
@@ -1396,6 +1439,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
//===----------------------------------------------------------------------===//
namespace {
+
/// \brief Post-process the DAG to create cluster edges between neighboring
/// loads or between neighboring stores.
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
@@ -1403,6 +1447,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
SUnit *SU;
unsigned BaseReg;
int64_t Offset;
+
MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
: SU(su), BaseReg(reg), Offset(ofs) {}
@@ -1439,25 +1484,26 @@ public:
LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
: BaseMemOpClusterMutation(tii, tri, true) {}
};
-} // anonymous
+
+} // end anonymous namespace
namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
: nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
: nullptr;
}
-} // namespace llvm
+} // end namespace llvm
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
@@ -1543,80 +1589,11 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
}
//===----------------------------------------------------------------------===//
-// MacroFusion - DAG post-processing to encourage fusion of macro ops.
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// \brief Post-process the DAG to create cluster edges between instructions
-/// that may be fused by the processor into a single operation.
-class MacroFusion : public ScheduleDAGMutation {
- const TargetInstrInfo &TII;
-public:
- MacroFusion(const TargetInstrInfo &TII)
- : TII(TII) {}
-
- void apply(ScheduleDAGInstrs *DAGInstrs) override;
-};
-} // anonymous
-
-namespace llvm {
-
-std::unique_ptr<ScheduleDAGMutation>
-createMacroFusionDAGMutation(const TargetInstrInfo *TII) {
- return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr;
-}
-
-} // namespace llvm
-
-/// \brief Callback from DAG postProcessing to create cluster edges to encourage
-/// fused operations.
-void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
- // For now, assume targets can only fuse with the branch.
- SUnit &ExitSU = DAG->ExitSU;
- MachineInstr *Branch = ExitSU.getInstr();
- if (!Branch)
- return;
-
- for (SDep &PredDep : ExitSU.Preds) {
- if (PredDep.isWeak())
- continue;
- SUnit &SU = *PredDep.getSUnit();
- MachineInstr &Pred = *SU.getInstr();
- if (!TII.shouldScheduleAdjacent(Pred, *Branch))
- continue;
-
- // Create a single weak edge from SU to ExitSU. The only effect is to cause
- // bottom-up scheduling to heavily prioritize the clustered SU. There is no
- // need to copy predecessor edges from ExitSU to SU, since top-down
- // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
- // of SU, we could create an artificial edge from the deepest root, but it
- // hasn't been needed yet.
- bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
- (void)Success;
- assert(Success && "No DAG nodes should be reachable from ExitSU");
-
- // Adjust latency of data deps between the nodes.
- for (SDep &PredDep : ExitSU.Preds) {
- if (PredDep.getSUnit() == &SU)
- PredDep.setLatency(0);
- }
- for (SDep &SuccDep : SU.Succs) {
- if (SuccDep.getSUnit() == &ExitSU)
- SuccDep.setLatency(0);
- }
-
- DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
- break;
- }
-}
-
-//===----------------------------------------------------------------------===//
// CopyConstrain - DAG post-processing to encourage copy elimination.
//===----------------------------------------------------------------------===//
namespace {
+
/// \brief Post-process the DAG to create weak edges from all uses of a copy to
/// the one use that defines the copy's source vreg, most likely an induction
/// variable increment.
@@ -1626,6 +1603,7 @@ class CopyConstrain : public ScheduleDAGMutation {
// RegionEndIdx is the slot index of the last non-debug instruction in the
// scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
SlotIndex RegionEndIdx;
+
public:
CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
@@ -1634,17 +1612,18 @@ public:
protected:
void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
};
-} // anonymous
+
+} // end anonymous namespace
namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) {
- return make_unique<CopyConstrain>(TII, TRI);
+ const TargetRegisterInfo *TRI) {
+ return llvm::make_unique<CopyConstrain>(TII, TRI);
}
-} // namespace llvm
+} // end namespace llvm
/// constrainLocalCopy handles two possibilities:
/// 1) Local src:
@@ -1836,7 +1815,7 @@ void SchedBoundary::reset() {
CheckPending = false;
CurrCycle = 0;
CurrMOps = 0;
- MinReadyCycle = UINT_MAX;
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
ExpectedLatency = 0;
DependentLatency = 0;
RetiredMOps = 0;
@@ -1937,12 +1916,22 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
return true;
}
+
unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
<< SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
return true;
}
+
+ if (CurrMOps > 0 &&
+ ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
+ (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
+ DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
+ << (isTop()? "begin" : "end") << " group\n");
+ return true;
+ }
+
if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
for (TargetSchedModel::ProcResIter
@@ -2039,7 +2028,8 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
/// Move the boundary of scheduled code by one cycle.
void SchedBoundary::bumpCycle(unsigned NextCycle) {
if (SchedModel->getMicroOpBufferSize() == 0) {
- assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
+ "MinReadyCycle uninitialized");
if (MinReadyCycle > NextCycle)
NextCycle = MinReadyCycle;
}
@@ -2237,6 +2227,18 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// one cycle. Since we commonly reach the max MOps here, opportunistically
// bump the cycle to avoid uselessly checking everything in the readyQ.
CurrMOps += IncMOps;
+
+ // Bump the cycle count for issue group constraints.
+ // This must be done after NextCycle has been adjust for all other stalls.
+ // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
+ // currCycle to X.
+ if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
+ (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
+ DEBUG(dbgs() << " Bump cycle to "
+ << (isTop() ? "end" : "begin") << " group\n");
+ bumpCycle(++NextCycle);
+ }
+
while (CurrMOps >= SchedModel->getIssueWidth()) {
DEBUG(dbgs() << " *** Max MOps " << CurrMOps
<< " at cycle " << CurrCycle << '\n');
@@ -2250,7 +2252,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
void SchedBoundary::releasePending() {
// If the available queue is empty, it is safe to reset MinReadyCycle.
if (Available.empty())
- MinReadyCycle = UINT_MAX;
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
@@ -2323,10 +2325,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
return nullptr;
}
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// This is useful information to dump after bumpNode.
// Note that the Queue contents are more useful before pickNodeFromQueue.
-void SchedBoundary::dumpScheduledState() {
+LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() {
unsigned ResFactor;
unsigned ResCount;
if (ZoneCritResIdx) {
@@ -2666,11 +2668,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
}
void GenericScheduler::dumpPolicy() {
+ // Cannot completely remove virtual function even in release mode.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << "GenericScheduler RegionPolicy: "
<< " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
<< " OnlyTopDown=" << RegionPolicy.OnlyTopDown
<< " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
<< "\n";
+#endif
}
/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
@@ -2724,7 +2729,7 @@ void GenericScheduler::registerRoots() {
errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
}
- if (EnableCyclicPath) {
+ if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
checkAcyclicLatency();
}
@@ -3106,7 +3111,6 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
}
void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
-
MachineBasicBlock::iterator InsertPos = SU->getInstr();
if (!isTop)
++InsertPos;
@@ -3154,7 +3158,8 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
/// Create the standard converging machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
- ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
+ ScheduleDAGMILive *DAG =
+ new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
// Register DAG post-processors.
//
// FIXME: extend the mutation API to allow earlier mutations to instantiate
@@ -3195,7 +3200,6 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
}
}
-
void PostGenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
@@ -3302,7 +3306,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
}
ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
- return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C),
+ return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
/*RemoveKillFlags=*/true);
}
@@ -3311,14 +3315,14 @@ ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
//===----------------------------------------------------------------------===//
namespace {
+
/// \brief Order nodes by the ILP metric.
struct ILPOrder {
- const SchedDFSResult *DFSResult;
- const BitVector *ScheduledTrees;
+ const SchedDFSResult *DFSResult = nullptr;
+ const BitVector *ScheduledTrees = nullptr;
bool MaximizeILP;
- ILPOrder(bool MaxILP)
- : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {}
+ ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
/// \brief Apply a less-than relation on node priority.
///
@@ -3347,12 +3351,13 @@ struct ILPOrder {
/// \brief Schedule based on the ILP metric.
class ILPScheduler : public MachineSchedStrategy {
- ScheduleDAGMILive *DAG;
+ ScheduleDAGMILive *DAG = nullptr;
ILPOrder Cmp;
std::vector<SUnit*> ReadyQ;
+
public:
- ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {}
+ ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
void initialize(ScheduleDAGMI *dag) override {
assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
@@ -3405,14 +3410,16 @@ public:
std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
}
};
-} // namespace
+
+} // end anonymous namespace
static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true));
+ return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
}
static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false));
+ return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
}
+
static MachineSchedRegistry ILPMaxRegistry(
"ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
static MachineSchedRegistry ILPMinRegistry(
@@ -3424,6 +3431,7 @@ static MachineSchedRegistry ILPMinRegistry(
#ifndef NDEBUG
namespace {
+
/// Apply a less-than relation on the node order, which corresponds to the
/// instruction order prior to scheduling. IsReverse implements greater-than.
template<bool IsReverse>
@@ -3444,11 +3452,12 @@ class InstructionShuffler : public MachineSchedStrategy {
// Using a less-than relation (SUnitOrder<false>) for the TopQ priority
// gives nodes with a higher number higher priority causing the latest
// instructions to be scheduled first.
- PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
TopQ;
// When scheduling bottom-up, use greater-than as the queue priority.
- PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
BottomQ;
+
public:
InstructionShuffler(bool alternate, bool topdown)
: IsAlternating(alternate), IsTopDown(topdown) {}
@@ -3492,15 +3501,18 @@ public:
BottomQ.push(SU);
}
};
-} // namespace
+
+} // end anonymous namespace
static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
bool Alternate = !ForceTopDown && !ForceBottomUp;
bool TopDown = !ForceBottomUp;
assert((TopDown || !ForceTopDown) &&
"-misched-topdown incompatible with -misched-bottomup");
- return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown));
+ return new ScheduleDAGMILive(
+ C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
}
+
static MachineSchedRegistry ShufflerRegistry(
"shuffle", "Shuffle machine instructions alternating directions",
createInstructionShuffler);
@@ -3518,8 +3530,7 @@ template<> struct GraphTraits<
template<>
struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
-
- DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const ScheduleDAG *G) {
return G->MF.getName();
@@ -3576,7 +3587,8 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
return Str;
}
};
-} // namespace llvm
+
+} // end namespace llvm
#endif // NDEBUG
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index ef7e525e8165d..998a9645e68bf 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -1,4 +1,4 @@
-//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,21 +7,35 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
using namespace llvm;
@@ -37,9 +51,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(MachineTraceMetrics,
"machine-trace-metrics", "Machine Trace Metrics", false, true)
-MachineTraceMetrics::MachineTraceMetrics()
- : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
- MRI(nullptr), Loops(nullptr) {
+MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
}
@@ -137,7 +149,6 @@ MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
}
-
//===----------------------------------------------------------------------===//
// Ensemble utility functions
//===----------------------------------------------------------------------===//
@@ -151,7 +162,7 @@ MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
}
// Virtual destructor serves as an anchor.
-MachineTraceMetrics::Ensemble::~Ensemble() {}
+MachineTraceMetrics::Ensemble::~Ensemble() = default;
const MachineLoop*
MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
@@ -297,6 +308,7 @@ static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
// MinInstrCountEnsemble - Pick the trace that executes the least number of
// instructions.
namespace {
+
class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
const char *getName() const override { return "MinInstr"; }
const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
@@ -306,7 +318,8 @@ public:
MinInstrCountEnsemble(MachineTraceMetrics *mtm)
: MachineTraceMetrics::Ensemble(mtm) {}
};
-}
+
+} // end anonymous namespace
// Select the preferred predecessor for MBB.
const MachineBasicBlock*
@@ -409,25 +422,30 @@ void MachineTraceMetrics::verifyAnalysis() const {
// revisit blocks.
namespace {
+
struct LoopBounds {
MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
SmallPtrSet<const MachineBasicBlock*, 8> Visited;
const MachineLoopInfo *Loops;
- bool Downward;
+ bool Downward = false;
+
LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
- const MachineLoopInfo *loops)
- : Blocks(blocks), Loops(loops), Downward(false) {}
+ const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
};
-}
+
+} // end anonymous namespace
// Specialize po_iterator_storage in order to prune the post-order traversal so
// it is limited to the current loop and doesn't traverse the loop back edges.
namespace llvm {
+
template<>
class po_iterator_storage<LoopBounds, true> {
LoopBounds &LB;
+
public:
po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+
void finishPostorder(const MachineBasicBlock*) {}
bool insertEdge(Optional<const MachineBasicBlock *> From,
@@ -452,7 +470,8 @@ public:
return LB.Visited.insert(To).second;
}
};
-}
+
+} // end namespace llvm
/// Compute the trace through MBB.
void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
@@ -603,6 +622,7 @@ void MachineTraceMetrics::Ensemble::verify() const {
// A data dependency is represented as a defining MI and operand numbers on the
// defining and using MI.
namespace {
+
struct DataDep {
const MachineInstr *DefMI;
unsigned DefOp;
@@ -622,7 +642,8 @@ struct DataDep {
assert((++DefI).atEnd() && "Register has multiple defs");
}
};
-}
+
+} // end anonymous namespace
// Get the input data dependencies that must be ready before UseMI can issue.
// Return true if UseMI has any physreg operands.
@@ -678,17 +699,19 @@ static void getPHIDeps(const MachineInstr &UseMI,
// direction instructions are scanned, it could be the operand that defined the
// regunit, or the highest operand to read the regunit.
namespace {
+
struct LiveRegUnit {
unsigned RegUnit;
- unsigned Cycle;
- const MachineInstr *MI;
- unsigned Op;
+ unsigned Cycle = 0;
+ const MachineInstr *MI = nullptr;
+ unsigned Op = 0;
unsigned getSparseSetIndex() const { return RegUnit; }
- LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
+ LiveRegUnit(unsigned RU) : RegUnit(RU) {}
};
-}
+
+} // end anonymous namespace
// Identify physreg dependencies for UseMI, and update the live regunit
// tracking set when scanning instructions downwards.
@@ -922,7 +945,6 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
return Height;
}
-
typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
// Push the height of DefMI upwards if required to match UseMI.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index a98139f9e5af3..d392c044bd716 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -260,8 +260,8 @@ namespace {
static char ID; // Pass ID, replacement for typeid
const std::string Banner;
- MachineVerifierPass(const std::string &banner = nullptr)
- : MachineFunctionPass(ID), Banner(banner) {
+ MachineVerifierPass(std::string banner = std::string())
+ : MachineFunctionPass(ID), Banner(std::move(banner)) {
initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
}
@@ -528,7 +528,8 @@ void MachineVerifier::visitMachineFunctionBefore() {
lastIndex = SlotIndex();
regsReserved = MRI->getReservedRegs();
- markReachable(&MF->front());
+ if (!MF->empty())
+ markReachable(&MF->front());
// Build a set of the basic blocks in the function.
FunctionBlocks.clear();
@@ -548,7 +549,8 @@ void MachineVerifier::visitMachineFunctionBefore() {
// Check that the register use lists are sane.
MRI->verifyUseLists();
- verifyStackFrame();
+ if (!MF->empty())
+ verifyStackFrame();
}
// Does iterator point to a and b as the first two elements?
@@ -572,7 +574,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
for (const auto &LI : MBB->liveins()) {
if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
MBB->getIterator() != MBB->getParent()->begin()) {
- report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+ report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB);
}
}
}
@@ -908,6 +910,14 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
+ // Generic loads and stores must have a single MachineMemOperand
+ // describing that access.
+ if ((MI->getOpcode() == TargetOpcode::G_LOAD ||
+ MI->getOpcode() == TargetOpcode::G_STORE) &&
+ !MI->hasOneMemOperand())
+ report("Generic instruction accessing memory must have one mem operand",
+ MI);
+
StringRef ErrorInfo;
if (!TII->verifyInstruction(*MI, ErrorInfo))
report(ErrorInfo.data(), MI);
@@ -2047,23 +2057,14 @@ void MachineVerifier::verifyStackFrame() {
// Update stack state by checking contents of MBB.
for (const auto &I : *MBB) {
if (I.getOpcode() == FrameSetupOpcode) {
- // The first operand of a FrameOpcode should be i32.
- int Size = I.getOperand(0).getImm();
- assert(Size >= 0 &&
- "Value should be non-negative in FrameSetup and FrameDestroy.\n");
-
if (BBState.ExitIsSetup)
report("FrameSetup is after another FrameSetup", &I);
- BBState.ExitValue -= Size;
+ BBState.ExitValue -= TII->getFrameSize(I);
BBState.ExitIsSetup = true;
}
if (I.getOpcode() == FrameDestroyOpcode) {
- // The first operand of a FrameOpcode should be i32.
- int Size = I.getOperand(0).getImm();
- assert(Size >= 0 &&
- "Value should be non-negative in FrameSetup and FrameDestroy.\n");
-
+ int Size = TII->getFrameSize(I);
if (!BBState.ExitIsSetup)
report("FrameDestroy is not after a FrameSetup", &I);
int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue :
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index ad9166f1ed23b..00e72971a01e8 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -75,7 +75,7 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
.addImm(FirstActualI->getOpcode());
for (auto &MO : FirstActualI->operands())
- MIB.addOperand(MO);
+ MIB.add(MO);
FirstActualI->eraseFromParent();
MF.ensureAlignment(4);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 6081916a6a829..61dccdde8f1dc 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -253,7 +253,7 @@ void SchedulePostRATDList::exitRegion() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dumpSchedule - dump the scheduled Sequence.
-void SchedulePostRATDList::dumpSchedule() const {
+LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
SU->dump(this);
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 5fca7fa5536b5..1354009794cb4 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -265,11 +265,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
std::vector<MachineBasicBlock::iterator> FrameSDOps;
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
- if (I->getOpcode() == FrameSetupOpcode ||
- I->getOpcode() == FrameDestroyOpcode) {
- assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
- " instructions should have a single immediate argument!");
- unsigned Size = I->getOperand(0).getImm();
+ if (TII.isFrameInstr(*I)) {
+ unsigned Size = TII.getFrameSize(*I);
if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
AdjustsStack = true;
FrameSDOps.push_back(I);
@@ -336,7 +333,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
return;
const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+ const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs();
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
@@ -1049,8 +1046,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
- unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
if (RS && FrameIndexEliminationScavenging)
RS->enterBasicBlock(*BB);
@@ -1059,11 +1054,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
- if (I->getOpcode() == FrameSetupOpcode ||
- I->getOpcode() == FrameDestroyOpcode) {
- InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
+ if (TII.isFrameInstr(*I)) {
+ InsideCallSequence = TII.isFrameSetup(*I);
SPAdj += TII.getSPAdjust(*I);
-
I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
continue;
}
@@ -1237,4 +1230,6 @@ doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) {
++I;
}
}
+
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
}
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 804a4c3dad669..b29e62bf1aa3c 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -29,7 +29,10 @@ PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {}
PseudoSourceValue::~PseudoSourceValue() {}
void PseudoSourceValue::printCustom(raw_ostream &O) const {
- O << PSVNames[Kind];
+ if (Kind < TargetCustom)
+ O << PSVNames[Kind];
+ else
+ O << "TargetCustom" << Kind;
}
bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index a558e371ad4c6..a87fed3a687e1 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -176,8 +176,6 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
Q.collectInterferingVRegs();
- if (Q.seenUnspillableVReg())
- return false;
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index c47cfb1b986fb..06500289c971a 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -29,8 +29,10 @@
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
@@ -125,6 +127,7 @@ class RAGreedy : public MachineFunctionPass,
MachineBlockFrequencyInfo *MBFI;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
+ MachineOptimizationRemarkEmitter *ORE;
EdgeBundles *Bundles;
SpillPlacement *SpillPlacer;
LiveDebugVariables *DebugVars;
@@ -419,6 +422,20 @@ private:
void collectHintInfo(unsigned, HintsInfo &);
bool isUnusedCalleeSavedReg(unsigned PhysReg) const;
+
+ /// Compute and report the number of spills and reloads for a loop.
+ void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
+ unsigned &FoldedReloads, unsigned &Spills,
+ unsigned &FoldedSpills);
+
+ /// Report the number of spills and reloads for each loop.
+ void reportNumberOfSplillsReloads() {
+ for (MachineLoop *L : *Loops) {
+ unsigned Reloads, FoldedReloads, Spills, FoldedSpills;
+ reportNumberOfSplillsReloads(L, Reloads, FoldedReloads, Spills,
+ FoldedSpills);
+ }
+ }
};
} // end anonymous namespace
@@ -439,6 +456,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(RAGreedy, "greedy",
"Greedy Register Allocator", false, false)
@@ -490,6 +508,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveRegMatrix>();
AU.addRequired<EdgeBundles>();
AU.addRequired<SpillPlacement>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -679,7 +698,7 @@ unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
MCRegUnitIterator Units(PhysReg, TRI);
for (; Units.isValid(); ++Units) {
// Instantiate a "subquery", not to be confused with the Queries array.
- LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]);
+ LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
if (subQ.checkInterference())
break;
}
@@ -830,7 +849,11 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
SmallVector<LiveInterval*, 8> Intfs;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+ // We usually have the interfering VRegs cached so collectInterferingVRegs()
+ // should be fast, we may need to recalculate if when different physregs
+ // overlap the same register unit so we had different SubRanges queried
+ // against it.
+ Q.collectInterferingVRegs();
ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
Intfs.append(IVR.begin(), IVR.end());
}
@@ -2611,6 +2634,69 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return 0;
}
+void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
+ unsigned &FoldedReloads,
+ unsigned &Spills,
+ unsigned &FoldedSpills) {
+ Reloads = 0;
+ FoldedReloads = 0;
+ Spills = 0;
+ FoldedSpills = 0;
+
+ // Sum up the spill and reloads in subloops.
+ for (MachineLoop *SubLoop : *L) {
+ unsigned SubReloads;
+ unsigned SubFoldedReloads;
+ unsigned SubSpills;
+ unsigned SubFoldedSpills;
+
+ reportNumberOfSplillsReloads(SubLoop, SubReloads, SubFoldedReloads,
+ SubSpills, SubFoldedSpills);
+ Reloads += SubReloads;
+ FoldedReloads += SubFoldedReloads;
+ Spills += SubSpills;
+ FoldedSpills += SubFoldedSpills;
+ }
+
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ int FI;
+
+ for (MachineBasicBlock *MBB : L->getBlocks())
+ // Handle blocks that were not included in subloops.
+ if (Loops->getLoopFor(MBB) == L)
+ for (MachineInstr &MI : *MBB) {
+ const MachineMemOperand *MMO;
+
+ if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI))
+ ++Reloads;
+ else if (TII->hasLoadFromStackSlot(MI, MMO, FI) &&
+ MFI.isSpillSlotObjectIndex(FI))
+ ++FoldedReloads;
+ else if (TII->isStoreToStackSlot(MI, FI) &&
+ MFI.isSpillSlotObjectIndex(FI))
+ ++Spills;
+ else if (TII->hasStoreToStackSlot(MI, MMO, FI) &&
+ MFI.isSpillSlotObjectIndex(FI))
+ ++FoldedSpills;
+ }
+
+ if (Reloads || FoldedReloads || Spills || FoldedSpills) {
+ using namespace ore;
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
+ L->getStartLoc(), L->getHeader());
+ if (Spills)
+ R << NV("NumSpills", Spills) << " spills ";
+ if (FoldedSpills)
+ R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
+ if (Reloads)
+ R << NV("NumReloads", Reloads) << " reloads ";
+ if (FoldedReloads)
+ R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
+ ORE->emit(R << "generated in loop");
+ }
+}
+
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: " << mf.getName() << '\n');
@@ -2633,6 +2719,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
Indexes = &getAnalysis<SlotIndexes>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
DomTree = &getAnalysis<MachineDominatorTree>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
Loops = &getAnalysis<MachineLoopInfo>();
Bundles = &getAnalysis<EdgeBundles>();
@@ -2658,6 +2745,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
allocatePhysRegs();
tryHintsRecoloring();
postOptimization();
+ reportNumberOfSplillsReloads();
releaseMemory();
return true;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 101b30bf3b657..3b5964eef55e4 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -1,4 +1,4 @@
-//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,34 +29,61 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/RegAllocPBQP.h"
#include "RegisterCoalescer.h"
#include "Spiller.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Solution.h"
+#include "llvm/CodeGen/PBQPRAConstraint.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
#include <limits>
+#include <map>
#include <memory>
#include <queue>
#include <set>
#include <sstream>
+#include <string>
+#include <system_error>
+#include <tuple>
#include <vector>
+#include <utility>
using namespace llvm;
@@ -86,7 +113,6 @@ namespace {
/// Programming problems.
class RegAllocPBQP : public MachineFunctionPass {
public:
-
static char ID;
/// Construct a PBQP register allocator.
@@ -113,7 +139,6 @@ public:
}
private:
-
typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
typedef std::vector<const LiveInterval*> Node2LIMap;
typedef std::vector<unsigned> AllowedSet;
@@ -187,7 +212,6 @@ public:
/// @brief Add interference edges between overlapping vregs.
class Interference : public PBQPRAConstraint {
private:
-
typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
@@ -276,7 +300,6 @@ private:
}
public:
-
void apply(PBQPRAGraph &G) override {
// The following is loosely based on the linear scan algorithm introduced in
// "Linear Scan Register Allocation" by Poletto and Sarkar. This version
@@ -363,7 +386,6 @@ public:
}
private:
-
// Create an Interference edge and add it to the graph, unless it is
// a null matrix, meaning the nodes' allowed registers do not have any
// interference. This case occurs frequently between integer and floating
@@ -372,7 +394,6 @@ private:
bool createInterferenceEdge(PBQPRAGraph &G,
PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
IMatrixCache &C) {
-
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getSubtarget().getRegisterInfo();
const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
@@ -409,7 +430,6 @@ private:
}
};
-
class Coalescing : public PBQPRAConstraint {
public:
void apply(PBQPRAGraph &G) override {
@@ -421,7 +441,6 @@ public:
// gives the Ok.
for (const auto &MBB : MF) {
for (const auto &MI : MBB) {
-
// Skip not-coalescable or already coalesced copies.
if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
continue;
@@ -479,7 +498,6 @@ public:
}
private:
-
void addVirtRegCoalesce(
PBQPRAGraph::RawMatrix &CostMat,
const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
@@ -496,14 +514,15 @@ private:
}
}
}
-
};
-} // End anonymous namespace.
+} // end anonymous namespace
// Out-of-line destructor/anchor for PBQPRAConstraint.
-PBQPRAConstraint::~PBQPRAConstraint() {}
+PBQPRAConstraint::~PBQPRAConstraint() = default;
+
void PBQPRAConstraint::anchor() {}
+
void PBQPRAConstraintList::anchor() {}
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
@@ -554,7 +573,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
const MachineFunction &MF) {
- const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSR[i] != 0; ++i)
if (TRI.regsOverlap(reg, CSR[i]))
return true;
@@ -777,7 +796,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// If there are non-empty intervals allocate them using pbqp.
if (!VRegsToAlloc.empty()) {
-
const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
llvm::make_unique<PBQPRAConstraintList>();
@@ -840,7 +858,8 @@ static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
});
}
-void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
for (auto NId : nodeIds()) {
const Vector &Costs = getNodeCosts(NId);
assert(Costs.getLength() != 0 && "Empty vector in graph.");
@@ -861,7 +880,10 @@ void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
}
}
-LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const {
+ dump(dbgs());
+}
+#endif
void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
OS << "graph {\n";
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index ece44c28e9ede..855aa37ff3c36 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -103,9 +103,27 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Clobbered Registers: ");
- for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
- if (MRI->isPhysRegModified(PReg, true))
- RegMask[PReg / 32] &= ~(1u << PReg % 32);
+ const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
+ auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
+ RegMask[Reg / 32] &= ~(1u << Reg % 32);
+ };
+ // Scan all the physical registers. When a register is defined in the current
+ // function set it and all the aliasing registers as defined in the regmask.
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ // If a register is in the UsedPhysRegsMask set then mark it as defined.
+ // All it's aliases will also be in the set, so we can skip setting
+ // as defined all the aliases here.
+ if (UsedPhysRegsMask.test(PReg)) {
+ SetRegAsDefined(PReg);
+ continue;
+ }
+ // If a register is defined by an instruction mark it as defined together
+ // with all it's aliases.
+ if (!MRI->def_empty(PReg)) {
+ for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
+ SetRegAsDefined(*AI);
+ }
+ }
if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
const uint32_t *CallPreservedMask =
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 178fa18ac5a60..82a3bd9a0bd17 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//===- RegisterClassInfo.cpp - Dynamic Register Class Info ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,12 +14,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -29,8 +39,7 @@ static cl::opt<unsigned>
StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
cl::desc("Limit all regclasses to N registers"));
-RegisterClassInfo::RegisterClassInfo()
- : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {}
+RegisterClassInfo::RegisterClassInfo() = default;
void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
bool Update = false;
@@ -48,18 +57,20 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
// Does this MF have different CSRs?
assert(TRI && "no register info set");
- const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
- if (Update || CSR != CalleeSaved) {
- // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+
+ // Get the callee saved registers.
+ const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs();
+ if (Update || CSR != CalleeSavedRegs) {
+ // Build a CSRAlias map. Every CSR alias saves the last
// overlapping CSR.
- CSRNum.clear();
- CSRNum.resize(TRI->getNumRegs(), 0);
- for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ CalleeSavedAliases.resize(TRI->getNumRegs(), 0);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+ CalleeSavedAliases[*AI] = *I;
+
Update = true;
}
- CalleeSaved = CSR;
+ CalleeSavedRegs = CSR;
// Different reserved registers?
const BitVector &RR = MF->getRegInfo().getReservedRegs();
@@ -103,7 +114,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned Cost = TRI->getCostPerUse(PhysReg);
MinCost = std::min(MinCost, Cost);
- if (CSRNum[PhysReg])
+ if (CalleeSavedAliases[PhysReg])
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
else {
@@ -114,7 +125,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
}
}
RCI.NumRegs = N + CSRAlias.size();
- assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+ assert(RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
// CSR aliases go after the volatile registers, preserve the target's order.
for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
@@ -156,9 +167,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
const TargetRegisterClass *RC = nullptr;
unsigned NumRCUnits = 0;
- for (TargetRegisterInfo::regclass_iterator
- RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) {
- const int *PSetID = TRI->getRegClassPressureSets(*RI);
+ for (const TargetRegisterClass *C : TRI->regclasses()) {
+ const int *PSetID = TRI->getRegClassPressureSets(C);
for (; *PSetID != -1; ++PSetID) {
if ((unsigned)*PSetID == Idx)
break;
@@ -168,9 +178,9 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
// Found a register class that counts against this pressure set.
// For efficiency, only compute the set order for the largest set.
- unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit;
+ unsigned NUnits = TRI->getRegClassWeight(C).WeightLimit;
if (!RC || NUnits > NumRCUnits) {
- RC = *RI;
+ RC = C;
NumRCUnits = NUnits;
}
}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 4bb3c229afc58..bf44ee8453b61 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -189,6 +190,9 @@ namespace {
/// This returns true if an interval was modified.
bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+ /// We found a copy which can be moved to its less frequent predecessor.
+ bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
+
/// If the source of a copy is defined by a
/// trivial computation, replace the copy by rematerialize the definition.
bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
@@ -811,42 +815,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
assert(ASubValNo != nullptr);
- LaneBitmask AMask = SA.LaneMask;
- for (LiveInterval::SubRange &SB : IntB.subranges()) {
- LaneBitmask BMask = SB.LaneMask;
- LaneBitmask Common = BMask & AMask;
- if (Common.none())
- continue;
-
- DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
- << " into " << PrintLaneMask(Common) << '\n');
- LaneBitmask BRest = BMask & ~AMask;
- LiveInterval::SubRange *CommonRange;
- if (BRest.any()) {
- SB.LaneMask = BRest;
- DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
- << '\n');
- // Duplicate SubRange for newly merged common stuff.
- CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB);
- } else {
- // We van reuse the L SubRange.
- SB.LaneMask = Common;
- CommonRange = &SB;
- }
- LiveRange RangeCopy(SB, Allocator);
-
- VNInfo *BSubValNo = CommonRange->getVNInfoAt(CopyIdx);
- assert(BSubValNo->def == CopyIdx);
- BSubValNo->def = ASubValNo->def;
- addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo);
- AMask &= ~BMask;
- }
- if (AMask.any()) {
- DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
- LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
- VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
- addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
- }
+ IntB.refineSubRanges(Allocator, SA.LaneMask,
+ [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
+ VNInfo *BSubValNo = SR.empty()
+ ? SR.getNextValue(CopyIdx, Allocator)
+ : SR.getVNInfoAt(CopyIdx);
+ assert(BSubValNo != nullptr);
+ addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ });
}
}
@@ -861,6 +837,184 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return true;
}
+/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
+/// predecessor of BB2, and if B is not redefined on the way from A = B
+/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
+/// execution goes through the path from BB0 to BB2. We may move B = A
+/// to the predecessor without such reversed copy.
+/// So we will transform the program from:
+/// BB0:
+/// A = B; BB1:
+/// ... ...
+/// / \ /
+/// BB2:
+/// ...
+/// B = A;
+///
+/// to:
+///
+/// BB0: BB1:
+/// A = B; ...
+/// ... B = A;
+/// / \ /
+/// BB2:
+/// ...
+///
+/// A special case is when BB0 and BB2 are the same BB which is the only
+/// BB in a loop:
+/// BB1:
+/// ...
+/// BB0/BB2: ----
+/// B = A; |
+/// ... |
+/// A = B; |
+/// |-------
+/// |
+/// We may hoist B = A from BB0/BB2 to BB1.
+///
+/// The major preconditions for correctness to remove such partial
+/// redundancy include:
+/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
+/// the PHI is defined by the reversed copy A = B in BB0.
+/// 2. No B is referenced from the start of BB2 to B = A.
+/// 3. No B is defined from A = B to the end of BB0.
+/// 4. BB1 has only one successor.
+///
+/// 2 and 4 implicitly ensure B is not live at the end of BB1.
+/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
+/// colder place, which not only prevent endless loop, but also make sure
+/// the movement of copy is beneficial.
+bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
+ MachineInstr &CopyMI) {
+ assert(!CP.isPhys());
+ if (!CopyMI.isFullCopy())
+ return false;
+
+ MachineBasicBlock &MBB = *CopyMI.getParent();
+ if (MBB.isEHPad())
+ return false;
+
+ if (MBB.pred_size() != 2)
+ return false;
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // A is defined by PHI at the entry of MBB.
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
+ assert(AValNo && !AValNo->isUnused() && "COPY source not live");
+ if (!AValNo->isPHIDef())
+ return false;
+
+ // No B is referenced before CopyMI in MBB.
+ if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
+ return false;
+
+ // MBB has two predecessors: one contains A = B so no copy will be inserted
+ // for it. The other one will have a copy moved from MBB.
+ bool FoundReverseCopy = false;
+ MachineBasicBlock *CopyLeftBB = nullptr;
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
+ if (!DefMI || !DefMI->isFullCopy()) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ // Check DefMI is a reverse copy and it is in BB Pred.
+ if (DefMI->getOperand(0).getReg() != IntA.reg ||
+ DefMI->getOperand(1).getReg() != IntB.reg ||
+ DefMI->getParent() != Pred) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ // If there is any other def of B after DefMI and before the end of Pred,
+ // we need to keep the copy of B = A at the end of Pred if we remove
+ // B = A from MBB.
+ bool ValB_Changed = false;
+ for (auto VNI : IntB.valnos) {
+ if (VNI->isUnused())
+ continue;
+ if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
+ ValB_Changed = true;
+ break;
+ }
+ }
+ if (ValB_Changed) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ FoundReverseCopy = true;
+ }
+
+ // If no reverse copy is found in predecessors, nothing to do.
+ if (!FoundReverseCopy)
+ return false;
+
+ // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
+ // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
+ // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
+ // update IntA/IntB.
+ //
+ // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
+ // MBB is hotter than CopyLeftBB.
+ if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
+ return false;
+
+ // Now ok to move copy.
+ if (CopyLeftBB) {
+ DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#"
+ << CopyLeftBB->getNumber() << '\t' << CopyMI);
+
+ // Insert new copy to CopyLeftBB.
+ auto InsPos = CopyLeftBB->getFirstTerminator();
+ MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IntB.reg)
+ .addReg(IntA.reg);
+ SlotIndex NewCopyIdx =
+ LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
+ IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+ for (LiveInterval::SubRange &SR : IntB.subranges())
+ SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+ } else {
+ DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
+ << MBB.getNumber() << '\t' << CopyMI);
+ }
+
+ // Remove CopyMI.
+ // Note: This is fine to remove the copy before updating the live-ranges.
+ // While updating the live-ranges, we only look at slot indices and
+ // never go back to the instruction.
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI.eraseFromParent();
+
+ // Update the liveness.
+ SmallVector<SlotIndex, 8> EndPoints;
+ VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
+ LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
+ &EndPoints);
+ BValNo->markUnused();
+ // Extend IntB to the EndPoints of its original live interval.
+ LIS->extendToIndices(IntB, EndPoints);
+
+ // Now, do the same for its subranges.
+ for (LiveInterval::SubRange &SR : IntB.subranges()) {
+ EndPoints.clear();
+ VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
+ assert(BValNo && "All sublanes should be live");
+ LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
+ BValNo->markUnused();
+ LIS->extendToIndices(SR, EndPoints);
+ }
+
+ // Finally, update the live-range of IntA.
+ shrinkToUses(&IntA);
+ return true;
+}
+
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
/// defining a subregister.
static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
@@ -1290,7 +1444,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
// If SrcReg wasn't read, it may still be the case that DstReg is live-in
// because SrcReg is a sub-register.
- if (DstInt && !Reads && SubIdx)
+ if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
// Replace SrcReg with DstReg in all UseMI operands.
@@ -1486,6 +1640,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
}
+ // Try and see if we can partially eliminate the copy by moving the copy to
+ // its predecessor.
+ if (!CP.isPartial() && !CP.isPhys())
+ if (removePartialRedundancy(CP, *CopyMI))
+ return true;
+
// Otherwise, we are unable to join the intervals.
DEBUG(dbgs() << "\tInterference!\n");
Again = true; // May be possible to coalesce later.
@@ -1583,6 +1743,14 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
return false;
}
}
+
+ // We must also check for overlaps with regmask clobbers.
+ BitVector RegMaskUsable;
+ if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
+ !RegMaskUsable.test(DstReg)) {
+ DEBUG(dbgs() << "\t\tRegMask interference\n");
+ return false;
+ }
}
// Skip any value computations, we are not adding new values to the
@@ -1636,14 +1804,6 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
return false;
}
-
- // We must also check for clobbers caused by regmasks.
- for (const auto &MO : MI->operands()) {
- if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
- DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
- return false;
- }
- }
}
}
@@ -2738,39 +2898,16 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
LaneBitmask LaneMask,
CoalescerPair &CP) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- for (LiveInterval::SubRange &R : LI.subranges()) {
- LaneBitmask RMask = R.LaneMask;
- // LaneMask of subregisters common to subrange R and ToMerge.
- LaneBitmask Common = RMask & LaneMask;
- // There is nothing to do without common subregs.
- if (Common.none())
- continue;
-
- DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
- << PrintLaneMask(Common) << '\n');
- // LaneMask of subregisters contained in the R range but not in ToMerge,
- // they have to split into their own subrange.
- LaneBitmask LRest = RMask & ~LaneMask;
- LiveInterval::SubRange *CommonRange;
- if (LRest.any()) {
- R.LaneMask = LRest;
- DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
- // Duplicate SubRange for newly merged common stuff.
- CommonRange = LI.createSubRangeFrom(Allocator, Common, R);
+ LI.refineSubRanges(Allocator, LaneMask,
+ [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
+ if (SR.empty()) {
+ SR.assign(ToMerge, Allocator);
} else {
- // Reuse the existing range.
- R.LaneMask = Common;
- CommonRange = &R;
+ // joinSubRegRange() destroys the merged range, so we need a copy.
+ LiveRange RangeCopy(ToMerge, Allocator);
+ joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
}
- LiveRange RangeCopy(ToMerge, Allocator);
- joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
- LaneMask &= ~RMask;
- }
-
- if (LaneMask.any()) {
- DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
- LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
- }
+ });
}
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index fc84aebb14d7e..c726edc88b41c 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -1,4 +1,4 @@
-//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===//
+//===- RegisterPressure.cpp - Dynamic Register Pressure -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,13 +12,37 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -52,6 +76,7 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
const TargetRegisterInfo *TRI) {
@@ -97,6 +122,7 @@ void RegPressureTracker::dump() const {
P.dump(TRI);
}
+LLVM_DUMP_METHOD
void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
const char *sep = "";
for (const PressureChange &Change : *this) {
@@ -108,6 +134,7 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
}
dbgs() << '\n';
}
+#endif
void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
LaneBitmask PreviousMask,
@@ -264,7 +291,6 @@ bool RegPressureTracker::isBottomClosed() const {
MachineBasicBlock::const_iterator());
}
-
SlotIndex RegPressureTracker::getCurrSlot() const {
MachineBasicBlock::const_iterator IdxPos =
skipDebugInstructionsForward(CurrPos, MBB->end());
@@ -328,7 +354,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
unsigned RegUnit) {
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end())
@@ -340,7 +366,7 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
unsigned RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end()) {
@@ -352,7 +378,7 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
unsigned RegUnit) {
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end()) {
@@ -366,7 +392,7 @@ static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
unsigned RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I != RegUnits.end()) {
@@ -423,6 +449,8 @@ namespace {
///
/// FIXME: always ignore tied opers
class RegisterOperandsCollector {
+ friend class llvm::RegisterOperands;
+
RegisterOperands &RegOpers;
const TargetRegisterInfo &TRI;
const MachineRegisterInfo &MRI;
@@ -517,11 +545,9 @@ class RegisterOperandsCollector {
addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
}
}
-
- friend class llvm::RegisterOperands;
};
-} // namespace
+} // end anonymous namespace
void RegisterOperands::collect(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
@@ -674,7 +700,7 @@ void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
assert(Pair.LaneMask.any());
unsigned RegUnit = Pair.RegUnit;
- auto I = find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
+ auto I = llvm::find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
return Other.RegUnit == RegUnit;
});
LaneBitmask PrevMask;
@@ -772,9 +798,10 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
if (!TrackLaneMasks) {
addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
} else {
- auto I = find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
- return Other.RegUnit == Reg;
- });
+ auto I =
+ llvm::find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
+ return Other.RegUnit == Reg;
+ });
bool IsRedef = I != LiveUses->end();
if (IsRedef) {
// ignore re-defs here...
@@ -1154,7 +1181,7 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) {
int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc();
- if (CritInc > 0 && CritInc <= INT16_MAX) {
+ if (CritInc > 0 && CritInc <= std::numeric_limits<int16_t>::max()) {
Delta.CriticalMax = PressureChange(PSetID);
Delta.CriticalMax.setUnitInc(CritInc);
}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index fdf741fd58f72..6392136fa2909 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -1,4 +1,4 @@
-//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//===- RegisterScavenging.cpp - Machine register scavenging ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,28 +15,32 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <string>
+
using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
- for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
- LaneBitmask UnitMask = (*RUI).second;
- if (UnitMask.none() || (LaneMask & UnitMask).any())
- RegUnitsAvailable.reset((*RUI).first);
- }
+ LiveUnits.addRegMasked(Reg, LaneMask);
}
void RegScavenger::init(MachineBasicBlock &MBB) {
@@ -44,6 +48,7 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
+ LiveUnits.init(*TRI);
assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
"Target changed?");
@@ -51,7 +56,6 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
// Self-initialize.
if (!this->MBB) {
NumRegUnits = TRI->getNumRegUnits();
- RegUnitsAvailable.resize(NumRegUnits);
KillRegUnits.resize(NumRegUnits);
DefRegUnits.resize(NumRegUnits);
TmpRegUnits.resize(NumRegUnits);
@@ -64,32 +68,17 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
I->Restore = nullptr;
}
- // All register units start out unused.
- RegUnitsAvailable.set();
-
- // Pristine CSRs are not available.
- BitVector PR = MF.getFrameInfo().getPristineRegs(MF);
- for (int I = PR.find_first(); I>0; I = PR.find_next(I))
- setRegUsed(I);
-
Tracking = false;
}
-void RegScavenger::setLiveInsUsed(const MachineBasicBlock &MBB) {
- for (const auto &LI : MBB.liveins())
- setRegUsed(LI.PhysReg, LI.LaneMask);
-}
-
void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
init(MBB);
- setLiveInsUsed(MBB);
+ LiveUnits.addLiveIns(MBB);
}
void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
init(MBB);
- // Merge live-ins of successors to get live-outs.
- for (const MachineBasicBlock *Succ : MBB.successors())
- setLiveInsUsed(*Succ);
+ LiveUnits.addLiveOuts(MBB);
// Move internal iterator at the last instruction of the block.
if (MBB.begin() != MBB.end()) {
@@ -263,36 +252,7 @@ void RegScavenger::backward() {
assert(Tracking && "Must be tracking to determine kills and defs");
const MachineInstr &MI = *MBBI;
- // Defined or clobbered registers are available now.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isRegMask()) {
- for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd;
- ++RU) {
- for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
- if (MO.clobbersPhysReg(*RURI)) {
- RegUnitsAvailable.set(RU);
- break;
- }
- }
- }
- } else if (MO.isReg() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
- isReserved(Reg))
- continue;
- addRegUnits(RegUnitsAvailable, Reg);
- }
- }
- // Mark read registers as unavailable.
- for (const MachineOperand &MO : MI.uses()) {
- if (MO.isReg() && MO.readsReg()) {
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
- isReserved(Reg))
- continue;
- removeRegUnits(RegUnitsAvailable, Reg);
- }
- }
+ LiveUnits.stepBackward(MI);
if (MBBI == MBB->begin()) {
MBBI = MachineBasicBlock::iterator(nullptr);
@@ -302,12 +262,9 @@ void RegScavenger::backward() {
}
bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
- if (includeReserved && isReserved(Reg))
- return true;
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
- if (!RegUnitsAvailable.test(*RUI))
- return true;
- return false;
+ if (isReserved(Reg))
+ return includeReserved;
+ return !LiveUnits.available(Reg);
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
@@ -441,7 +398,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
unsigned NeedSize = RC->getSize();
unsigned NeedAlign = RC->getAlignment();
- unsigned SI = Scavenged.size(), Diff = UINT_MAX;
+ unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max();
int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
for (unsigned I = 0; I < Scavenged.size(); ++I) {
if (Scavenged[I].Reg != 0)
diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp
index 451964199ba5a..3e259927ac5cb 100644
--- a/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -30,17 +30,23 @@ namespace {
/// Tells whether or not this pass should emit a fallback
/// diagnostic when it resets a function.
bool EmitFallbackDiag;
+ /// Whether we should abort immediately instead of resetting the function.
+ bool AbortOnFailedISel;
public:
static char ID; // Pass identification, replacement for typeid
- ResetMachineFunction(bool EmitFallbackDiag = false)
- : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag) {}
+ ResetMachineFunction(bool EmitFallbackDiag = false,
+ bool AbortOnFailedISel = false)
+ : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag),
+ AbortOnFailedISel(AbortOnFailedISel) {}
StringRef getPassName() const override { return "ResetMachineFunction"; }
bool runOnMachineFunction(MachineFunction &MF) override {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel)) {
+ if (AbortOnFailedISel)
+ report_fatal_error("Instruction selection failed");
DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n');
++NumFunctionsReset;
MF.reset();
@@ -62,6 +68,7 @@ INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE,
"reset machine function if ISel failed", false, false)
MachineFunctionPass *
-llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false) {
- return new ResetMachineFunction(EmitFallbackDiag);
+llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false,
+ bool AbortOnFailedISel = false) {
+ return new ResetMachineFunction(EmitFallbackDiag, AbortOnFailedISel);
}
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index 2b82df293c148..fa68411284e77 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -451,7 +451,7 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
IRBuilder<> IRBFail(CheckTerm);
// FIXME: respect -fsanitize-trap / -ftrap-function here?
Constant *StackChkFail = F.getParent()->getOrInsertFunction(
- "__stack_chk_fail", IRB.getVoidTy(), nullptr);
+ "__stack_chk_fail", IRB.getVoidTy());
IRBFail.CreateCall(StackChkFail, {});
}
diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp
index 7fbeaddb38e8e..09289f947dc96 100644
--- a/lib/CodeGen/SafeStackColoring.cpp
+++ b/lib/CodeGen/SafeStackColoring.cpp
@@ -236,6 +236,7 @@ void StackColoring::calculateLiveIntervals() {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void StackColoring::dumpAllocas() {
dbgs() << "Allocas:\n";
for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo)
@@ -262,6 +263,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() {
dbgs() << " " << AllocaNo << ": " << Range << "\n";
}
}
+#endif
void StackColoring::run() {
DEBUG(dumpAllocas());
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 427d95268c745..dc72ac0732588 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -1,4 +1,4 @@
-//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,22 +7,32 @@
//
//===----------------------------------------------------------------------===//
//
-// This implements the ScheduleDAG class, which is a base class used by
-// scheduling implementation classes.
+/// \file Implements the ScheduleDAG class, which is a base class used by
+/// scheduling implementation classes.
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <climits>
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
@@ -33,58 +43,52 @@ static cl::opt<bool> StressSchedOpt(
cl::desc("Stress test instruction scheduling"));
#endif
-void SchedulingPriorityQueue::anchor() { }
+void SchedulingPriorityQueue::anchor() {}
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
: TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()),
TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
- MRI(mf.getRegInfo()), EntrySU(), ExitSU() {
+ MRI(mf.getRegInfo()) {
#ifndef NDEBUG
StressSched = StressSchedOpt;
#endif
}
-ScheduleDAG::~ScheduleDAG() {}
+ScheduleDAG::~ScheduleDAG() = default;
-/// Clear the DAG state (e.g. between scheduling regions).
void ScheduleDAG::clearDAG() {
SUnits.clear();
EntrySU = SUnit();
ExitSU = SUnit();
}
-/// getInstrDesc helper to handle SDNodes.
const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
if (!Node || !Node->isMachineOpcode()) return nullptr;
return &TII->get(Node->getMachineOpcode());
}
-/// addPred - This adds the specified edge as a pred of the current node if
-/// not already. It also adds the current node as a successor of the
-/// specified node.
bool SUnit::addPred(const SDep &D, bool Required) {
// If this node already has this dependence, don't add a redundant one.
- for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I) {
+ for (SDep &PredDep : Preds) {
// Zero-latency weak edges may be added purely for heuristic ordering. Don't
// add them if another kind of edge already exists.
- if (!Required && I->getSUnit() == D.getSUnit())
+ if (!Required && PredDep.getSUnit() == D.getSUnit())
return false;
- if (I->overlaps(D)) {
- // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
- if (I->getLatency() < D.getLatency()) {
- SUnit *PredSU = I->getSUnit();
+ if (PredDep.overlaps(D)) {
+ // Extend the latency if needed. Equivalent to
+ // removePred(PredDep) + addPred(D).
+ if (PredDep.getLatency() < D.getLatency()) {
+ SUnit *PredSU = PredDep.getSUnit();
// Find the corresponding successor in N.
- SDep ForwardD = *I;
+ SDep ForwardD = PredDep;
ForwardD.setSUnit(this);
- for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(),
- EE = PredSU->Succs.end(); II != EE; ++II) {
- if (*II == ForwardD) {
- II->setLatency(D.getLatency());
+ for (SDep &SuccDep : PredSU->Succs) {
+ if (SuccDep == ForwardD) {
+ SuccDep.setLatency(D.getLatency());
break;
}
}
- I->setLatency(D.getLatency());
+ PredDep.setLatency(D.getLatency());
}
return false;
}
@@ -95,8 +99,10 @@ bool SUnit::addPred(const SDep &D, bool Required) {
SUnit *N = D.getSUnit();
// Update the bookkeeping.
if (D.getKind() == SDep::Data) {
- assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
- assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+ assert(NumPreds < std::numeric_limits<unsigned>::max() &&
+ "NumPreds will overflow!");
+ assert(N->NumSuccs < std::numeric_limits<unsigned>::max() &&
+ "NumSuccs will overflow!");
++NumPreds;
++N->NumSuccs;
}
@@ -105,7 +111,8 @@ bool SUnit::addPred(const SDep &D, bool Required) {
++WeakPredsLeft;
}
else {
- assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+ assert(NumPredsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumPredsLeft will overflow!");
++NumPredsLeft;
}
}
@@ -114,7 +121,8 @@ bool SUnit::addPred(const SDep &D, bool Required) {
++N->WeakSuccsLeft;
}
else {
- assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ assert(N->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumSuccsLeft will overflow!");
++N->NumSuccsLeft;
}
}
@@ -127,51 +135,46 @@ bool SUnit::addPred(const SDep &D, bool Required) {
return true;
}
-/// removePred - This removes the specified edge as a pred of the current
-/// node if it exists. It also removes the current node as a successor of
-/// the specified node.
void SUnit::removePred(const SDep &D) {
// Find the matching predecessor.
- for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I)
- if (*I == D) {
- // Find the corresponding successor in N.
- SDep P = D;
- P.setSUnit(this);
- SUnit *N = D.getSUnit();
- SmallVectorImpl<SDep>::iterator Succ = find(N->Succs, P);
- assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
- N->Succs.erase(Succ);
- Preds.erase(I);
- // Update the bookkeeping.
- if (P.getKind() == SDep::Data) {
- assert(NumPreds > 0 && "NumPreds will underflow!");
- assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
- --NumPreds;
- --N->NumSuccs;
- }
- if (!N->isScheduled) {
- if (D.isWeak())
- --WeakPredsLeft;
- else {
- assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
- --NumPredsLeft;
- }
- }
- if (!isScheduled) {
- if (D.isWeak())
- --N->WeakSuccsLeft;
- else {
- assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
- --N->NumSuccsLeft;
- }
- }
- if (P.getLatency() != 0) {
- this->setDepthDirty();
- N->setHeightDirty();
- }
- return;
+ SmallVectorImpl<SDep>::iterator I = llvm::find(Preds, D);
+ if (I == Preds.end())
+ return;
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ SmallVectorImpl<SDep>::iterator Succ = llvm::find(N->Succs, P);
+ assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
+ N->Succs.erase(Succ);
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ assert(NumPreds > 0 && "NumPreds will underflow!");
+ assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak())
+ --WeakPredsLeft;
+ else {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
}
+ }
+ if (!isScheduled) {
+ if (D.isWeak())
+ --N->WeakSuccsLeft;
+ else {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
+ }
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
}
void SUnit::setDepthDirty() {
@@ -181,9 +184,8 @@ void SUnit::setDepthDirty() {
do {
SUnit *SU = WorkList.pop_back_val();
SU->isDepthCurrent = false;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(),
- E = SU->Succs.end(); I != E; ++I) {
- SUnit *SuccSU = I->getSUnit();
+ for (SDep &SuccDep : SU->Succs) {
+ SUnit *SuccSU = SuccDep.getSUnit();
if (SuccSU->isDepthCurrent)
WorkList.push_back(SuccSU);
}
@@ -197,18 +199,14 @@ void SUnit::setHeightDirty() {
do {
SUnit *SU = WorkList.pop_back_val();
SU->isHeightCurrent = false;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),
- E = SU->Preds.end(); I != E; ++I) {
- SUnit *PredSU = I->getSUnit();
+ for (SDep &PredDep : SU->Preds) {
+ SUnit *PredSU = PredDep.getSUnit();
if (PredSU->isHeightCurrent)
WorkList.push_back(PredSU);
}
} while (!WorkList.empty());
}
-/// setDepthToAtLeast - Update this node's successors to reflect the
-/// fact that this node's depth just increased.
-///
void SUnit::setDepthToAtLeast(unsigned NewDepth) {
if (NewDepth <= getDepth())
return;
@@ -217,9 +215,6 @@ void SUnit::setDepthToAtLeast(unsigned NewDepth) {
isDepthCurrent = true;
}
-/// setHeightToAtLeast - Update this node's predecessors to reflect the
-/// fact that this node's height just increased.
-///
void SUnit::setHeightToAtLeast(unsigned NewHeight) {
if (NewHeight <= getHeight())
return;
@@ -228,8 +223,7 @@ void SUnit::setHeightToAtLeast(unsigned NewHeight) {
isHeightCurrent = true;
}
-/// ComputeDepth - Calculate the maximal path from the node to the exit.
-///
+/// Calculates the maximal path from the node to the exit.
void SUnit::ComputeDepth() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
@@ -238,12 +232,11 @@ void SUnit::ComputeDepth() {
bool Done = true;
unsigned MaxPredDepth = 0;
- for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
- E = Cur->Preds.end(); I != E; ++I) {
- SUnit *PredSU = I->getSUnit();
+ for (const SDep &PredDep : Cur->Preds) {
+ SUnit *PredSU = PredDep.getSUnit();
if (PredSU->isDepthCurrent)
MaxPredDepth = std::max(MaxPredDepth,
- PredSU->Depth + I->getLatency());
+ PredSU->Depth + PredDep.getLatency());
else {
Done = false;
WorkList.push_back(PredSU);
@@ -261,8 +254,7 @@ void SUnit::ComputeDepth() {
} while (!WorkList.empty());
}
-/// ComputeHeight - Calculate the maximal path from the node to the entry.
-///
+/// Calculates the maximal path from the node to the entry.
void SUnit::ComputeHeight() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
@@ -271,12 +263,11 @@ void SUnit::ComputeHeight() {
bool Done = true;
unsigned MaxSuccHeight = 0;
- for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
- E = Cur->Succs.end(); I != E; ++I) {
- SUnit *SuccSU = I->getSUnit();
+ for (const SDep &SuccDep : Cur->Succs) {
+ SUnit *SuccSU = SuccDep.getSUnit();
if (SuccSU->isHeightCurrent)
MaxSuccHeight = std::max(MaxSuccHeight,
- SuccSU->Height + I->getLatency());
+ SuccSU->Height + SuccDep.getLatency());
else {
Done = false;
WorkList.push_back(SuccSU);
@@ -310,6 +301,7 @@ void SUnit::biasCriticalPath() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const {
if (this == &DAG->ExitSU)
OS << "ExitSU";
@@ -319,15 +311,13 @@ void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const {
OS << "SU(" << NodeNum << ")";
}
-/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
-/// a group of nodes flagged together.
-void SUnit::dump(const ScheduleDAG *G) const {
+LLVM_DUMP_METHOD void SUnit::dump(const ScheduleDAG *G) const {
print(dbgs(), G);
dbgs() << ": ";
G->dumpNode(this);
}
-void SUnit::dumpAll(const ScheduleDAG *G) const {
+LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
dump(G);
dbgs() << " # preds left : " << NumPredsLeft << "\n";
@@ -343,41 +333,39 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
if (Preds.size() != 0) {
dbgs() << " Predecessors:\n";
- for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I) {
+ for (const SDep &SuccDep : Preds) {
dbgs() << " ";
- switch (I->getKind()) {
+ switch (SuccDep.getKind()) {
case SDep::Data: dbgs() << "data "; break;
case SDep::Anti: dbgs() << "anti "; break;
case SDep::Output: dbgs() << "out "; break;
case SDep::Order: dbgs() << "ord "; break;
}
- I->getSUnit()->print(dbgs(), G);
- if (I->isArtificial())
+ SuccDep.getSUnit()->print(dbgs(), G);
+ if (SuccDep.isArtificial())
dbgs() << " *";
- dbgs() << ": Latency=" << I->getLatency();
- if (I->isAssignedRegDep())
- dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << ": Latency=" << SuccDep.getLatency();
+ if (SuccDep.isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI);
dbgs() << "\n";
}
}
if (Succs.size() != 0) {
dbgs() << " Successors:\n";
- for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
- I != E; ++I) {
+ for (const SDep &SuccDep : Succs) {
dbgs() << " ";
- switch (I->getKind()) {
+ switch (SuccDep.getKind()) {
case SDep::Data: dbgs() << "data "; break;
case SDep::Anti: dbgs() << "anti "; break;
case SDep::Output: dbgs() << "out "; break;
case SDep::Order: dbgs() << "ord "; break;
}
- I->getSUnit()->print(dbgs(), G);
- if (I->isArtificial())
+ SuccDep.getSUnit()->print(dbgs(), G);
+ if (SuccDep.isArtificial())
dbgs() << " *";
- dbgs() << ": Latency=" << I->getLatency();
- if (I->isAssignedRegDep())
- dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << ": Latency=" << SuccDep.getLatency();
+ if (SuccDep.isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI);
dbgs() << "\n";
}
}
@@ -385,47 +373,44 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
#endif
#ifndef NDEBUG
-/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
-/// their state is consistent. Return the number of scheduled nodes.
-///
unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
bool AnyNotSched = false;
unsigned DeadNodes = 0;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- if (!SUnits[i].isScheduled) {
- if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ for (const SUnit &SUnit : SUnits) {
+ if (!SUnit.isScheduled) {
+ if (SUnit.NumPreds == 0 && SUnit.NumSuccs == 0) {
++DeadNodes;
continue;
}
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has not been scheduled!\n";
AnyNotSched = true;
}
- if (SUnits[i].isScheduled &&
- (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
- unsigned(INT_MAX)) {
+ if (SUnit.isScheduled &&
+ (isBottomUp ? SUnit.getHeight() : SUnit.getDepth()) >
+ unsigned(std::numeric_limits<int>::max())) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has an unexpected "
<< (isBottomUp ? "Height" : "Depth") << " value!\n";
AnyNotSched = true;
}
if (isBottomUp) {
- if (SUnits[i].NumSuccsLeft != 0) {
+ if (SUnit.NumSuccsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has successors left!\n";
AnyNotSched = true;
}
} else {
- if (SUnits[i].NumPredsLeft != 0) {
+ if (SUnit.NumPredsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has predecessors left!\n";
AnyNotSched = true;
}
@@ -436,36 +421,33 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
}
#endif
-/// InitDAGTopologicalSorting - create the initial topological
-/// ordering from the DAG to be scheduled.
-///
-/// The idea of the algorithm is taken from
-/// "Online algorithms for managing the topological order of
-/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
-/// This is the MNR algorithm, which was first introduced by
-/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
-/// "Maintaining a topological order under edge insertions".
-///
-/// Short description of the algorithm:
-///
-/// Topological ordering, ord, of a DAG maps each node to a topological
-/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
-///
-/// This means that if there is a path from the node X to the node Z,
-/// then ord(X) < ord(Z).
-///
-/// This property can be used to check for reachability of nodes:
-/// if Z is reachable from X, then an insertion of the edge Z->X would
-/// create a cycle.
-///
-/// The algorithm first computes a topological ordering for the DAG by
-/// initializing the Index2Node and Node2Index arrays and then tries to keep
-/// the ordering up-to-date after edge insertions by reordering the DAG.
-///
-/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
-/// the nodes reachable from Y, and then shifts them using Shift to lie
-/// immediately after X in Index2Node.
void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ // The idea of the algorithm is taken from
+ // "Online algorithms for managing the topological order of
+ // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+ // This is the MNR algorithm, which was first introduced by
+ // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+ // "Maintaining a topological order under edge insertions".
+ //
+ // Short description of the algorithm:
+ //
+ // Topological ordering, ord, of a DAG maps each node to a topological
+ // index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+ //
+ // This means that if there is a path from the node X to the node Z,
+ // then ord(X) < ord(Z).
+ //
+ // This property can be used to check for reachability of nodes:
+ // if Z is reachable from X, then an insertion of the edge Z->X would
+ // create a cycle.
+ //
+ // The algorithm first computes a topological ordering for the DAG by
+ // initializing the Index2Node and Node2Index arrays and then tries to keep
+ // the ordering up-to-date after edge insertions by reordering the DAG.
+ //
+ // On insertion of the edge X->Y, the algorithm first marks by calling DFS
+ // the nodes reachable from Y, and then shifts them using Shift to lie
+ // immediately after X in Index2Node.
unsigned DAGSize = SUnits.size();
std::vector<SUnit*> WorkList;
WorkList.reserve(DAGSize);
@@ -476,18 +458,17 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
// Initialize the data structures.
if (ExitSU)
WorkList.push_back(ExitSU);
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- int NodeNum = SU->NodeNum;
- unsigned Degree = SU->Succs.size();
+ for (SUnit &SU : SUnits) {
+ int NodeNum = SU.NodeNum;
+ unsigned Degree = SU.Succs.size();
// Temporarily use the Node2Index array as scratch space for degree counts.
Node2Index[NodeNum] = Degree;
// Is it a node without dependencies?
if (Degree == 0) {
- assert(SU->Succs.empty() && "SUnit should have no successors");
+ assert(SU.Succs.empty() && "SUnit should have no successors");
// Collect leaf nodes.
- WorkList.push_back(SU);
+ WorkList.push_back(&SU);
}
}
@@ -497,9 +478,8 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
WorkList.pop_back();
if (SU->NodeNum < DAGSize)
Allocate(SU->NodeNum, --Id);
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- SUnit *SU = I->getSUnit();
+ for (const SDep &PredDep : SU->Preds) {
+ SUnit *SU = PredDep.getSUnit();
if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
// If all dependencies of the node are processed already,
// then the node can be computed now.
@@ -511,19 +491,15 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
#ifndef NDEBUG
// Check correctness of the ordering
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ for (SUnit &SU : SUnits) {
+ for (const SDep &PD : SU.Preds) {
+ assert(Node2Index[SU.NodeNum] > Node2Index[PD.getSUnit()->NodeNum] &&
"Wrong topological sorting");
}
}
#endif
}
-/// AddPred - Updates the topological ordering to accommodate an edge
-/// to be added from SUnit X to SUnit Y.
void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
int UpperBound, LowerBound;
LowerBound = Node2Index[Y->NodeNum];
@@ -540,16 +516,10 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
}
}
-/// RemovePred - Updates the topological ordering to accommodate an
-/// an edge to be removed from the specified node N from the predecessors
-/// of the current node M.
void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
// InitDAGTopologicalSorting();
}
-/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
-/// all nodes affected by the edge insertion. These nodes will later get new
-/// topological indexes by means of the Shift method.
void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
bool &HasLoop) {
std::vector<const SUnit*> WorkList;
@@ -560,8 +530,9 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
SU = WorkList.back();
WorkList.pop_back();
Visited.set(SU->NodeNum);
- for (int I = SU->Succs.size()-1; I >= 0; --I) {
- unsigned s = SU->Succs[I].getSUnit()->NodeNum;
+ for (const SDep &SuccDep
+ : make_range(SU->Succs.rbegin(), SU->Succs.rend())) {
+ unsigned s = SuccDep.getSUnit()->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
if (s >= Node2Index.size())
continue;
@@ -571,14 +542,93 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
}
// Visit successors if not already and in affected region.
if (!Visited.test(s) && Node2Index[s] < UpperBound) {
- WorkList.push_back(SU->Succs[I].getSUnit());
+ WorkList.push_back(SuccDep.getSUnit());
}
}
} while (!WorkList.empty());
}
-/// Shift - Renumber the nodes so that the topological ordering is
-/// preserved.
+std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU,
+ const SUnit &TargetSU,
+ bool &Success) {
+ std::vector<const SUnit*> WorkList;
+ int LowerBound = Node2Index[StartSU.NodeNum];
+ int UpperBound = Node2Index[TargetSU.NodeNum];
+ bool Found = false;
+ BitVector VisitedBack;
+ std::vector<int> Nodes;
+
+ if (LowerBound > UpperBound) {
+ Success = false;
+ return Nodes;
+ }
+
+ WorkList.reserve(SUnits.size());
+ Visited.reset();
+
+ // Starting from StartSU, visit all successors up
+ // to UpperBound.
+ WorkList.push_back(&StartSU);
+ do {
+ const SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ const SUnit *Succ = SU->Succs[I].getSUnit();
+ unsigned s = Succ->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+ if (Succ->isBoundaryNode())
+ continue;
+ if (Node2Index[s] == UpperBound) {
+ Found = true;
+ continue;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ Visited.set(s);
+ WorkList.push_back(Succ);
+ }
+ }
+ } while (!WorkList.empty());
+
+ if (!Found) {
+ Success = false;
+ return Nodes;
+ }
+
+ WorkList.clear();
+ VisitedBack.resize(SUnits.size());
+ Found = false;
+
+ // Starting from TargetSU, visit all predecessors up
+ // to LowerBound. SUs that are visited by the two
+ // passes are added to Nodes.
+ WorkList.push_back(&TargetSU);
+ do {
+ const SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ for (int I = SU->Preds.size()-1; I >= 0; --I) {
+ const SUnit *Pred = SU->Preds[I].getSUnit();
+ unsigned s = Pred->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
+ if (Pred->isBoundaryNode())
+ continue;
+ if (Node2Index[s] == LowerBound) {
+ Found = true;
+ continue;
+ }
+ if (!VisitedBack.test(s) && Visited.test(s)) {
+ VisitedBack.set(s);
+ WorkList.push_back(Pred);
+ Nodes.push_back(s);
+ }
+ }
+ } while (!WorkList.empty());
+
+ assert(Found && "Error in SUnit Graph!");
+ Success = true;
+ return Nodes;
+}
+
void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
int UpperBound) {
std::vector<int> L;
@@ -598,28 +648,23 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
}
}
- for (unsigned j = 0; j < L.size(); ++j) {
- Allocate(L[j], i - shift);
+ for (unsigned LI : L) {
+ Allocate(LI, i - shift);
i = i + 1;
}
}
-
-/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
-/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
// Is SU reachable from TargetSU via successor edges?
if (IsReachable(SU, TargetSU))
return true;
- for (SUnit::pred_iterator
- I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
- if (I->isAssignedRegDep() &&
- IsReachable(SU, I->getSUnit()))
+ for (const SDep &PredDep : TargetSU->Preds)
+ if (PredDep.isAssignedRegDep() &&
+ IsReachable(SU, PredDep.getSUnit()))
return true;
return false;
}
-/// IsReachable - Checks if SU is reachable from TargetSU.
bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
const SUnit *TargetSU) {
// If insertion of the edge SU->TargetSU would create a cycle
@@ -637,7 +682,6 @@ bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
return HasLoop;
}
-/// Allocate - assign the topological index to the node n.
void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
Node2Index[n] = index;
Index2Node[index] = n;
@@ -647,4 +691,4 @@ ScheduleDAGTopologicalSort::
ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
: SUnits(sunits), ExitSU(exitsu) {}
-ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 611c5a71bd5a2..18823b74c47fe 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This implements the ScheduleDAGInstrs class, which implements re-scheduling
-// of MachineInstrs.
+/// \file This implements the ScheduleDAGInstrs class, which implements
+/// re-scheduling of MachineInstrs.
//
//===----------------------------------------------------------------------===//
@@ -101,8 +101,8 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
SchedModel.init(ST.getSchedModel(), &ST, TII);
}
-/// getUnderlyingObjectFromInt - This is the function that does the work of
-/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+/// This is the function that does the work of looking through basic
+/// ptrtoint+arithmetic+inttoptr sequences.
static const Value *getUnderlyingObjectFromInt(const Value *V) {
do {
if (const Operator *U = dyn_cast<Operator>(V)) {
@@ -129,8 +129,8 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
} while (1);
}
-/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
-/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+/// This is a wrapper around GetUnderlyingObjects and adds support for basic
+/// ptrtoint+arithmetic+inttoptr sequences.
static void getUnderlyingObjects(const Value *V,
SmallVectorImpl<Value *> &Objects,
const DataLayout &DL) {
@@ -158,9 +158,8 @@ static void getUnderlyingObjects(const Value *V,
} while (!Working.empty());
}
-/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
-/// information and it can be tracked to a normal reference to a known
-/// object, return the Value for that object.
+/// If this machine instr has memory reference information and it can be tracked
+/// to a normal reference to a known object, return the Value for that object.
static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo &MFI,
UnderlyingObjectsVector &Objects,
@@ -216,10 +215,6 @@ void ScheduleDAGInstrs::finishBlock() {
BB = nullptr;
}
-/// Initialize the DAG and common scheduler state for the current scheduling
-/// region. This does not actually create the DAG, only clears it. The
-/// scheduling driver may call BuildSchedGraph multiple times per scheduling
-/// region.
void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
@@ -230,20 +225,10 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
NumRegionInstrs = regioninstrs;
}
-/// Close the current scheduling region. Don't clear any state in case the
-/// driver wants to refer to the previous scheduling region.
void ScheduleDAGInstrs::exitRegion() {
// Nothing to do.
}
-/// addSchedBarrierDeps - Add dependencies from instructions in the current
-/// list of instructions being scheduled to scheduling barrier by adding
-/// the exit SU to the register defs and use list. This is because we want to
-/// make sure instructions which define registers that are either used by
-/// the terminator or are live-out are properly scheduled. This is
-/// especially important when the definition latency of the return value(s)
-/// are too high to be hidden by the branch or when the liveout registers
-/// used by instructions in the fallthrough block.
void ScheduleDAGInstrs::addSchedBarrierDeps() {
MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
ExitSU.setInstr(ExitMI);
@@ -271,7 +256,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
}
}
-/// MO is an operand of SU's instruction that defines a physical register. Add
+/// MO is an operand of SU's instruction that defines a physical register. Adds
/// data dependencies from SU to any uses of the physical register.
void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
@@ -313,9 +298,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
}
}
-/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
-/// this SUnit to following instructions in the same scheduling region that
-/// depend the physical register referenced at OperIdx.
+/// \brief Adds register dependencies (data, anti, and output) from this SUnit
+/// to following instructions in the same scheduling region that depend the
+/// physical register referenced at OperIdx.
void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
@@ -406,9 +391,9 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
return TRI->getSubRegIndexLaneMask(SubReg);
}
-/// addVRegDefDeps - Add register output and data dependencies from this SUnit
-/// to instructions that occur later in the same scheduling region if they read
-/// from or write to the virtual register defined at OperIdx.
+/// Adds register output and data dependencies from this SUnit to instructions
+/// that occur later in the same scheduling region if they read from or write to
+/// the virtual register defined at OperIdx.
///
/// TODO: Hoist loop induction variable increments. This has to be
/// reevaluated. Generally, IV scheduling should be done before coalescing.
@@ -515,10 +500,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
}
-/// addVRegUseDeps - Add a register data dependency if the instruction that
-/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
-/// register antidependency from this SUnit to instructions that occur later in
-/// the same scheduling region if they write the virtual register.
+/// \brief Adds a register data dependency if the instruction that defines the
+/// virtual register used at OperIdx is mapped to an SUnit. Add a register
+/// antidependency from this SUnit to instructions that occur later in the same
+/// scheduling region if they write the virtual register.
///
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
@@ -545,87 +530,25 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
}
}
-/// Return true if MI is an instruction we are unable to reason about
+/// Returns true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
}
-/// This returns true if the two MIs need a chain edge between them.
-/// This is called on normal stores and loads.
-static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- const DataLayout &DL, MachineInstr *MIa,
- MachineInstr *MIb) {
- const MachineFunction *MF = MIa->getParent()->getParent();
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-
- assert ((MIa->mayStore() || MIb->mayStore()) &&
- "Dependency checked between two loads");
-
- // Let the target decide if memory accesses cannot possibly overlap.
- if (TII->areMemAccessesTriviallyDisjoint(*MIa, *MIb, AA))
- return false;
-
- // To this point analysis is generic. From here on we do need AA.
- if (!AA)
- return true;
-
- // FIXME: Need to handle multiple memory operands to support all targets.
- if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
- return true;
-
- MachineMemOperand *MMOa = *MIa->memoperands_begin();
- MachineMemOperand *MMOb = *MIb->memoperands_begin();
-
- if (!MMOa->getValue() || !MMOb->getValue())
- return true;
-
- // The following interface to AA is fashioned after DAGCombiner::isAlias
- // and operates with MachineMemOperand offset with some important
- // assumptions:
- // - LLVM fundamentally assumes flat address spaces.
- // - MachineOperand offset can *only* result from legalization and
- // cannot affect queries other than the trivial case of overlap
- // checking.
- // - These offsets never wrap and never step outside
- // of allocated objects.
- // - There should never be any negative offsets here.
- //
- // FIXME: Modify API to hide this math from "user"
- // FIXME: Even before we go to AA we can reason locally about some
- // memory objects. It can save compile time, and possibly catch some
- // corner cases not currently covered.
-
- assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
- assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
-
- int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
- int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
- int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
-
- AliasResult AAResult =
- AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
- UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
- MemoryLocation(MMOb->getValue(), Overlapb,
- UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
-
- return (AAResult != NoAlias);
-}
-
-/// Check whether two objects need a chain edge and add it if needed.
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
unsigned Latency) {
- if (MIsNeedChainEdge(AAForDep, &MFI, MF.getDataLayout(), SUa->getInstr(),
- SUb->getInstr())) {
+ if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
SDep Dep(SUa, SDep::MayAliasMem);
Dep.setLatency(Latency);
SUb->addPred(Dep);
}
}
-/// Create an SUnit for each real instruction, numbered in top-down topological
-/// order. The instruction order A < B, implies that no edge exists from B to A.
+/// \brief Creates an SUnit for each real instruction, numbered in top-down
+/// topological order. The instruction order A < B, implies that no edge exists
+/// from B to A.
///
/// Map each real instruction to its SUnit.
///
@@ -682,14 +605,13 @@ void ScheduleDAGInstrs::initSUnits() {
}
class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
-
/// Current total number of SUs in map.
unsigned NumNodes;
/// 1 for loads, 0 for stores. (see comment in SUList)
unsigned TrueMemOrderLatency;
-public:
+public:
Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
/// To keep NumNodes up to date, insert() is used instead of
@@ -697,8 +619,8 @@ public:
ValueType &operator[](const SUList &Key) {
llvm_unreachable("Don't use. Use insert() instead."); };
- /// Add SU to the SUList of V. If Map grows huge, reduce its size
- /// by calling reduce().
+ /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
+ /// reduce().
void inline insert(SUnit *SU, ValueType V) {
MapVector::operator[](V).push_back(SU);
NumNodes++;
@@ -723,7 +645,7 @@ public:
unsigned inline size() const { return NumNodes; }
- /// Count the number of SUs in this map after a reduction.
+ /// Counts the number of SUs in this map after a reduction.
void reComputeSize(void) {
NumNodes = 0;
for (auto &I : *this)
@@ -797,9 +719,6 @@ void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
map.reComputeSize();
}
-/// If RegPressure is non-null, compute register pressure as a side effect. The
-/// DAG builder is an efficient place to do it because it already visits
-/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs,
@@ -1088,10 +1007,6 @@ void ScheduleDAGInstrs::Value2SUsMap::dump() {
}
}
-/// Reduce maps in FIFO order, by N SUs. This is better than turning
-/// every Nth memory SU into BarrierChain in buildSchedGraph(), since
-/// it avoids unnecessary edges between seen SUs above the new
-/// BarrierChain, and those below it.
void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
Value2SUsMap &loads, unsigned N) {
DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
@@ -1142,7 +1057,6 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
loads.dump());
}
-/// \brief Initialize register live-range state for updating kills.
void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
// Start with no live registers.
LiveRegs.reset();
@@ -1178,32 +1092,35 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
return;
} else
- (--End)->clearRegisterKills(Reg, TRI);
+ (--End)->clearRegisterKills(Reg, TRI);
}
}
-bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
+void ScheduleDAGInstrs::toggleKillFlag(MachineInstr &MI, MachineOperand &MO) {
+ if (MO.isDebug())
+ return;
+
// Setting kill flag...
if (!MO.isKill()) {
MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
- return false;
+ toggleBundleKillFlag(&MI, MO.getReg(), true, TRI);
+ return;
}
// If MO itself is live, clear the kill flag...
if (LiveRegs.test(MO.getReg())) {
MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
- return false;
+ toggleBundleKillFlag(&MI, MO.getReg(), false, TRI);
+ return;
}
// If any subreg of MO is live, then create an imp-def for that
// subreg and keep MO marked as killed.
MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
+ toggleBundleKillFlag(&MI, MO.getReg(), false, TRI);
bool AllDead = true;
const unsigned SuperReg = MO.getReg();
- MachineInstrBuilder MIB(MF, MI);
+ MachineInstrBuilder MIB(MF, &MI);
for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
if (LiveRegs.test(*SubRegs)) {
MIB.addReg(*SubRegs, RegState::ImplicitDefine);
@@ -1213,13 +1130,12 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
if(AllDead) {
MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
+ toggleBundleKillFlag(&MI, MO.getReg(), true, TRI);
}
- return false;
}
-// FIXME: Reuse the LivePhysRegs utility for this.
void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
+ // FIXME: Reuse the LivePhysRegs utility for this.
DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
LiveRegs.resize(TRI->getNumRegs());
@@ -1289,7 +1205,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
if (MO.isKill() != kill) {
DEBUG(dbgs() << "Fixing " << MO << " in ");
- toggleKillFlag(&MI, MO);
+ toggleKillFlag(MI, MO);
DEBUG(MI.dump());
DEBUG({
if (MI.getOpcode() == TargetOpcode::BUNDLE) {
@@ -1319,6 +1235,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+ // Cannot completely remove virtual function even in release mode.
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
SU->getInstr()->dump();
#endif
@@ -1347,7 +1264,7 @@ std::string ScheduleDAGInstrs::getDAGName() const {
//===----------------------------------------------------------------------===//
namespace llvm {
-/// \brief Internal state used to compute SchedDFSResult.
+/// Internal state used to compute SchedDFSResult.
class SchedDFSImpl {
SchedDFSResult &R;
@@ -1358,8 +1275,8 @@ class SchedDFSImpl {
struct RootData {
unsigned NodeID;
- unsigned ParentNodeID; // Parent node (member of the parent subtree).
- unsigned SubInstrCount; // Instr count in this tree only, not children.
+ unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
+ unsigned SubInstrCount; ///< Instr count in this tree only, not children.
RootData(unsigned id): NodeID(id),
ParentNodeID(SchedDFSResult::InvalidSubtreeID),
@@ -1375,7 +1292,7 @@ public:
RootSet.setUniverse(R.DFSNodeData.size());
}
- /// Return true if this node been visited by the DFS traversal.
+ /// Returns true if this node been visited by the DFS traversal.
///
/// During visitPostorderNode the Node's SubtreeID is assigned to the Node
/// ID. Later, SubtreeID is updated but remains valid.
@@ -1384,7 +1301,7 @@ public:
!= SchedDFSResult::InvalidSubtreeID;
}
- /// Initialize this node's instruction count. We don't need to flag the node
+ /// Initializes this node's instruction count. We don't need to flag the node
/// visited until visitPostorder because the DAG cannot have cycles.
void visitPreorder(const SUnit *SU) {
R.DFSNodeData[SU->NodeNum].InstrCount =
@@ -1433,8 +1350,8 @@ public:
RootSet[SU->NodeNum] = RData;
}
- /// Called once for each tree edge after calling visitPostOrderNode on the
- /// predecessor. Increment the parent node's instruction count and
+ /// \brief Called once for each tree edge after calling visitPostOrderNode on
+ /// the predecessor. Increment the parent node's instruction count and
/// preemptively join this subtree to its parent's if it is small enough.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
R.DFSNodeData[Succ->NodeNum].InstrCount
@@ -1442,13 +1359,13 @@ public:
joinPredSubtree(PredDep, Succ);
}
- /// Add a connection for cross edges.
+ /// Adds a connection for cross edges.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
}
- /// Set each node's subtree ID to the representative ID and record connections
- /// between trees.
+ /// Sets each node's subtree ID to the representative ID and record
+ /// connections between trees.
void finalize() {
SubtreeClasses.compress();
R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
@@ -1484,8 +1401,8 @@ public:
}
protected:
- /// Join the predecessor subtree with the successor that is its DFS
- /// parent. Apply some heuristics before joining.
+ /// Joins the predecessor subtree with the successor that is its DFS parent.
+ /// Applies some heuristics before joining.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
bool CheckLimit = true) {
assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
@@ -1531,10 +1448,10 @@ protected:
} while (FromTree != SchedDFSResult::InvalidSubtreeID);
}
};
-} // namespace llvm
+} // end namespace llvm
namespace {
-/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+/// Manage the stack used by a reverse depth-first search over the DAG.
class SchedDAGReverseDFS {
std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
public:
@@ -1569,7 +1486,7 @@ static bool hasDataSucc(const SUnit *SU) {
return false;
}
-/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
/// search from this root.
void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
if (!IsBottomUp)
@@ -1626,8 +1543,8 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
}
}
-LLVM_DUMP_METHOD
-void ILPValue::print(raw_ostream &OS) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const {
OS << InstrCount << " / " << Length << " = ";
if (!Length)
OS << "BADILP";
@@ -1635,8 +1552,7 @@ void ILPValue::print(raw_ostream &OS) const {
OS << format("%g", ((double)InstrCount / Length));
}
-LLVM_DUMP_METHOD
-void ILPValue::dump() const {
+LLVM_DUMP_METHOD void ILPValue::dump() const {
dbgs() << *this << '\n';
}
@@ -1648,4 +1564,5 @@ raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
return OS;
}
-} // namespace llvm
+} // end namespace llvm
+#endif
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 83bc1ba7beb94..b3d83d5313aff 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -1,4 +1,4 @@
-//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//===- ScoreboardHazardRecognizer.cpp - Scheduler Support -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,11 +15,13 @@
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <cassert>
using namespace llvm;
@@ -29,8 +31,7 @@ ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
const InstrItineraryData *II, const ScheduleDAG *SchedDAG,
const char *ParentDebugType)
: ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II),
- DAG(SchedDAG), IssueWidth(0), IssueCount(0) {
-
+ DAG(SchedDAG) {
// Determine the maximum depth of any itinerary. This determines the depth of
// the scoreboard. We always make the scoreboard at least 1 cycle deep to
// avoid dealing with the boundary condition.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2c7bffe76503f..4d468551ae24e 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -53,10 +53,6 @@ STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
- CombinerAA("combiner-alias-analysis", cl::Hidden,
- cl::desc("Enable DAG combiner alias-analysis heuristics"));
-
- static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Enable DAG combiner's use of IR alias analysis"));
@@ -133,6 +129,9 @@ namespace {
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted Node added to Worklist");
+
// Skip handle nodes as they can't usefully be combined and confuse the
// zero-use deletion strategy.
if (N->getOpcode() == ISD::HANDLENODE)
@@ -177,6 +176,7 @@ namespace {
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
+ unsigned MaximumLegalStoreInBits;
/// Check the specified integer node value to see if it can be simplified or
/// if things it uses can be simplified by bit propagation.
@@ -232,9 +232,12 @@ namespace {
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
+ SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
+ SDValue visitUADDO(SDNode *N);
SDValue visitSUBC(SDNode *N);
+ SDValue visitUSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
@@ -259,6 +262,7 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitRotate(SDNode *N);
+ SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
SDValue visitBITREVERSE(SDNode *N);
SDValue visitCTLZ(SDNode *N);
@@ -274,6 +278,7 @@ namespace {
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitAssertZext(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
@@ -336,6 +341,7 @@ namespace {
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
SDValue foldSelectOfConstants(SDNode *N);
+ SDValue foldBinOpIntoSelect(SDNode *BO);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
@@ -344,6 +350,8 @@ namespace {
bool NotExtCompare = false);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
+ SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, bool foldBooleans = true);
@@ -377,6 +385,7 @@ namespace {
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue MatchLoadCombine(SDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -384,9 +393,9 @@ namespace {
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
- SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
- SDValue VecIn1, SDValue VecIn2,
- unsigned LeftIdx);
+ SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
+ ArrayRef<int> VectorMask, SDValue VecIn1,
+ SDValue VecIn2, unsigned LeftIdx);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -416,15 +425,12 @@ namespace {
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
- MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
- MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ MemOpLink(LSBaseSDNode *N, int64_t Offset)
+ : MemNode(N), OffsetFromBase(Offset) {}
// Ptr to the mem node.
LSBaseSDNode *MemNode;
// Offset from the base ptr.
int64_t OffsetFromBase;
- // What is the sequence number of this mem node.
- // Lowest mem operand in the DAG starts at zero.
- unsigned SequenceNum;
};
/// This is a helper function for visitMUL to check the profitability
@@ -435,12 +441,6 @@ namespace {
SDValue &AddNode,
SDValue &ConstNode);
- /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
- /// constant build_vector of the stored constant values in Stores.
- SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
- ArrayRef<MemOpLink> Stores,
- SmallVectorImpl<SDValue> &Chains,
- EVT Ty) const;
/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
@@ -451,34 +451,35 @@ namespace {
EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
bool &NarrowLoad);
+ /// Helper function for MergeConsecutiveStores which merges the
+ /// component store chains.
+ SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores);
+
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
- /// \return number of stores that were merged into a merged store (always
- /// a prefix of \p StoreNode).
- bool MergeStoresOfConstantsOrVecElts(
- SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
- bool IsConstantSrc, bool UseVector);
+ /// \return True if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector);
/// This is a helper function for MergeConsecutiveStores.
/// Stores that may be merged are placed in StoreNodes.
- /// Loads that may alias with those stores are placed in AliasLoadNodes.
- void getStoreMergeAndAliasCandidates(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
- SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
+ void getStoreMergeCandidates(StoreSDNode *St,
+ SmallVectorImpl<MemOpLink> &StoreNodes);
/// Helper function for MergeConsecutiveStores. Checks if
/// Candidate stores have indirect dependency through their
/// operands. \return True if safe to merge
bool checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return number of stores that were merged into a merged store (the
/// affected nodes are stored as a prefix in \p StoreNodes).
- bool MergeConsecutiveStores(StoreSDNode *N,
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ bool MergeConsecutiveStores(StoreSDNode *N);
/// \brief Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -493,6 +494,13 @@ namespace {
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
+
+ MaximumLegalStoreInBits = 0;
+ for (MVT VT : MVT::all_valuetypes())
+ if (EVT(VT).isSimple() && VT != MVT::Other &&
+ TLI.isTypeLegal(EVT(VT)) &&
+ VT.getSizeInBits() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits();
}
/// Runs the dag combiner on all nodes in the work list
@@ -607,10 +615,16 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
switch (Op.getOpcode()) {
default: return false;
- case ISD::ConstantFP:
- // Don't invert constant FP values after legalize. The negated constant
- // isn't necessarily legal.
- return LegalOperations ? 0 : 1;
+ case ISD::ConstantFP: {
+ if (!LegalOperations)
+ return 1;
+
+ // Don't invert constant FP values after legalization unless the target says
+ // the negated constant is legal.
+ EVT VT = Op.getValueType();
+ return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
+ }
case ISD::FADD:
// FIXME: determine better conditions for this xform.
if (!Options->UnsafeFPMath) return 0;
@@ -629,7 +643,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
+ if (!Options->NoSignedZerosFPMath &&
+ !Op.getNode()->getFlags()->hasNoSignedZeros())
return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -1079,37 +1094,36 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
+ DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
- if (!NN0.getNode())
- return SDValue();
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
- SDValue NN1;
- if (N0 == N1)
- NN1 = NN0;
- else {
- NN1 = PromoteOperand(N1, PVT, Replace1);
- if (!NN1.getNode())
- return SDValue();
- }
+ SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
+ SDLoc DL(Op);
- AddToWorklist(NN0.getNode());
- if (NN1.getNode())
- AddToWorklist(NN1.getNode());
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
- if (Replace0)
+ // New replace instances of N0 and N1
+ if (Replace0 && N0 && N0.getOpcode() != ISD::DELETED_NODE && NN0 &&
+ NN0.getOpcode() != ISD::DELETED_NODE) {
+ AddToWorklist(NN0.getNode());
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
- if (Replace1)
+ }
+
+ if (Replace1 && N1 && N1.getOpcode() != ISD::DELETED_NODE && NN1 &&
+ NN1.getOpcode() != ISD::DELETED_NODE) {
+ AddToWorklist(NN1.getNode());
ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+ }
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
- SDLoc DL(Op);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(Opc, DL, PVT, NN0, NN1));
+ // Deal with Op being deleted.
+ if (Op && Op.getOpcode() != ISD::DELETED_NODE)
+ return RV;
}
return SDValue();
}
@@ -1137,26 +1151,32 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
+ DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
bool Replace = false;
SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
if (Opc == ISD::SRA)
- N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+ N0 = SExtPromoteOperand(N0, PVT);
else if (Opc == ISD::SRL)
- N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+ N0 = ZExtPromoteOperand(N0, PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
+
if (!N0.getNode())
return SDValue();
+ SDLoc DL(Op);
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
+
AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
- SDLoc DL(Op);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
+ // Deal with Op being deleted.
+ if (Op && Op.getOpcode() != ISD::DELETED_NODE)
+ return RV;
}
return SDValue();
}
@@ -1361,8 +1381,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
- SDValue OpV = RV;
- DAG.ReplaceAllUsesWith(N, &OpV);
+ DAG.ReplaceAllUsesWith(N, &RV);
}
// Push the new node and any users onto the worklist
@@ -1389,7 +1408,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
+ case ISD::UADDO: return visitUADDO(N);
case ISD::SUBC: return visitSUBC(N);
+ case ISD::USUBO: return visitUSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
@@ -1415,6 +1436,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
+ case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
case ISD::CTLZ: return visitCTLZ(N);
@@ -1430,6 +1452,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::AssertZext: return visitAssertZext(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -1574,7 +1597,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
- SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
bool Changed = false; // If we should replace this token factor.
@@ -1618,6 +1641,86 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
+ // Remove Nodes that are chained to another node in the list. Do so
+ // by walking up chains breath-first stopping when we've seen
+ // another operand. In general we must climb to the EntryNode, but we can exit
+ // early if we find all remaining work is associated with just one operand as
+ // no further pruning is possible.
+
+ // List of nodes to search through and original Ops from which they originate.
+ SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
+ SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
+ SmallPtrSet<SDNode *, 16> SeenChains;
+ bool DidPruneOps = false;
+
+ unsigned NumLeftToConsider = 0;
+ for (const SDValue &Op : Ops) {
+ Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
+ OpWorkCount.push_back(1);
+ }
+
+ auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
+ // If this is an Op, we can remove the op from the list. Remark any
+ // search associated with it as from the current OpNumber.
+ if (SeenOps.count(Op) != 0) {
+ Changed = true;
+ DidPruneOps = true;
+ unsigned OrigOpNumber = 0;
+ while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
+ OrigOpNumber++;
+ assert((OrigOpNumber != Ops.size()) &&
+ "expected to find TokenFactor Operand");
+ // Re-mark worklist from OrigOpNumber to OpNumber
+ for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
+ if (Worklist[i].second == OrigOpNumber) {
+ Worklist[i].second = OpNumber;
+ }
+ }
+ OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
+ OpWorkCount[OrigOpNumber] = 0;
+ NumLeftToConsider--;
+ }
+ // Add if it's a new chain
+ if (SeenChains.insert(Op).second) {
+ OpWorkCount[OpNumber]++;
+ Worklist.push_back(std::make_pair(Op, OpNumber));
+ }
+ };
+
+ for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
+ // We need at least be consider at least 2 Ops to prune.
+ if (NumLeftToConsider <= 1)
+ break;
+ auto CurNode = Worklist[i].first;
+ auto CurOpNumber = Worklist[i].second;
+ assert((OpWorkCount[CurOpNumber] > 0) &&
+ "Node should not appear in worklist");
+ switch (CurNode->getOpcode()) {
+ case ISD::EntryToken:
+ // Hitting EntryToken is the only way for the search to terminate without
+ // hitting
+ // another operand's search. Prevent us from marking this operand
+ // considered.
+ NumLeftToConsider++;
+ break;
+ case ISD::TokenFactor:
+ for (const SDValue &Op : CurNode->op_values())
+ AddToWorklist(i, Op.getNode(), CurOpNumber);
+ break;
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
+ break;
+ default:
+ if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
+ AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
+ break;
+ }
+ OpWorkCount[CurOpNumber]--;
+ if (OpWorkCount[CurOpNumber] == 0)
+ NumLeftToConsider--;
+ }
+
SDValue Result;
// If we've changed things around then replace token factor.
@@ -1626,15 +1729,22 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// The entry token is the only possible outcome.
Result = DAG.getEntryNode();
} else {
- // New and improved token factor.
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ if (DidPruneOps) {
+ SmallVector<SDValue, 8> PrunedOps;
+ //
+ for (const SDValue &Op : Ops) {
+ if (SeenChains.count(Op.getNode()) == 0)
+ PrunedOps.push_back(Op);
+ }
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
+ } else {
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ }
}
- // Add users to worklist if AA is enabled, since it may introduce
- // a lot of new chained token factors while removing memory deps.
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- return CombineTo(N, Result, UseAA /*add to worklist*/);
+ // Add users to worklist, since we may introduce a lot of new
+ // chained token factors while removing memory deps.
+ return CombineTo(N, Result, true /*add to worklist*/);
}
return Result;
@@ -1664,6 +1774,60 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
+SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
+ auto BinOpcode = BO->getOpcode();
+ assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
+ BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
+ BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
+ BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
+ BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
+ BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
+ BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
+ BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
+ BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
+ "Unexpected binary operator");
+
+ // Bail out if any constants are opaque because we can't constant fold those.
+ SDValue C1 = BO->getOperand(1);
+ if (!isConstantOrConstantVector(C1, true) &&
+ !isConstantFPBuildVectorOrConstantFP(C1))
+ return SDValue();
+
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ // TODO: Handle ISD::SELECT_CC.
+ SDValue Sel = BO->getOperand(0);
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
+ return SDValue();
+
+ SDValue CT = Sel.getOperand(1);
+ if (!isConstantOrConstantVector(CT, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CT))
+ return SDValue();
+
+ SDValue CF = Sel.getOperand(2);
+ if (!isConstantOrConstantVector(CF, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CF))
+ return SDValue();
+
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the select.
+ // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
+ EVT VT = Sel.getValueType();
+ SDLoc DL(Sel);
+ SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
+ assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
+ isConstantFPBuildVectorOrConstantFP(NewCT)) &&
+ "Failed to constant fold a binop with constant operands");
+
+ SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
+ assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
+ isConstantFPBuildVectorOrConstantFP(NewCF)) &&
+ "Failed to constant fold a binop with constant operands");
+
+ return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1712,6 +1876,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
}
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate add
if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
return RADD;
@@ -1774,6 +1941,19 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ if (SDValue Combined = visitADDLike(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDLike(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+ EVT VT = N0.getValueType();
+ SDLoc DL(LocReference);
+
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
@@ -1781,12 +1961,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.getNode(ISD::SHL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
- if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
- return DAG.getNode(ISD::SUB, DL, VT, N1,
- DAG.getNode(ISD::SHL, DL, VT,
- N0.getOperand(0).getOperand(1),
- N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
@@ -1797,7 +1971,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// and similar xforms where the inner op is either ~0 or 0.
if (NumSignBits == DestBits &&
isOneConstantOrOneSplatConstant(N1->getOperand(1)))
- return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
}
// add (sext i1), X -> sub X, (zext i1)
@@ -1825,39 +1999,61 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// If the flag result is dead, turn this into an ADD.
if (!N->hasAnyUseOfValue(1))
- return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// canonicalize constant to RHS.
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
- return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
+ return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
// fold (addc x, 0) -> x + no carry out
if (isNullConstant(N1))
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
+ DL, MVT::Glue));
- // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
- APInt LHSZero, LHSOne;
- APInt RHSZero, RHSOne;
- DAG.computeKnownBits(N0, LHSZero, LHSOne);
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
- if (LHSZero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSZero, RHSOne);
+ return SDValue();
+}
- // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
- // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
- return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
- }
+SDValue DAGCombiner::visitUADDO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
+
+ // fold (uaddo x, 0) -> x + no carry out
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
return SDValue();
}
@@ -1920,6 +2116,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
N1.getNode());
}
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
// fold (sub x, c) -> (add x, -c)
@@ -2066,6 +2265,38 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitUSUBO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // fold (usubo x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
+ // fold (usubo x, 0) -> x + no borrow
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (isAllOnesConstant(N0))
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getConstant(0, DL, CarryVT));
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUBE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2131,6 +2362,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue()) {
SDLoc DL(N);
@@ -2297,6 +2532,23 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
return combined;
}
+static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (DAG.isUndef(N->getOpcode(), {N0, N1}))
+ return DAG.getUNDEF(VT);
+
+ // undef / X -> 0
+ // undef % X -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2319,8 +2571,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), N0);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
@@ -2372,7 +2629,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
@@ -2384,13 +2641,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem;
- // undef / X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X / undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2414,6 +2664,12 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
N0C, N1C))
return Folded;
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1)) {
@@ -2444,7 +2700,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
// fold (udiv x, c) -> alternate
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -2456,13 +2712,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem;
- // undef / X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X / undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2482,32 +2731,35 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (isSigned) {
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
- // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
if (DAG.isKnownToBeAPowerOfTwo(N1)) {
- APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
- APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
@@ -2536,13 +2788,6 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem.getValue(1);
- // undef % X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X % undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2932,95 +3177,139 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
}
+/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
+SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL) {
+ SDValue LL, LR, RL, RR, N0CC, N1CC;
+ if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
+ !isSetCCEquivalent(N1, RL, RR, N1CC))
+ return SDValue();
+
+ assert(N0.getValueType() == N1.getValueType() &&
+ "Unexpected operand types for bitwise logic op");
+ assert(LL.getValueType() == LR.getValueType() &&
+ RL.getValueType() == RR.getValueType() &&
+ "Unexpected operand types for setcc");
+
+ // If we're here post-legalization or the logic op type is not i1, the logic
+ // op type must match a setcc result type. Also, all folds require new
+ // operations on the left and right operands, so those types must match.
+ EVT VT = N0.getValueType();
+ EVT OpVT = LL.getValueType();
+ if (LegalOperations || VT != MVT::i1)
+ if (VT != getSetCCResultType(OpVT))
+ return SDValue();
+ if (OpVT != RL.getValueType())
+ return SDValue();
+
+ ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
+ bool IsInteger = OpVT.isInteger();
+ if (LR == RR && CC0 == CC1 && IsInteger) {
+ bool IsZero = isNullConstantOrNullSplatConstant(LR);
+ bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
+
+ // All bits clear?
+ bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
+ // All sign bits clear?
+ bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
+ // Any bits set?
+ bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
+ // Any sign bits set?
+ bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
+
+ // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
+ // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
+ // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
+ // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
+ if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(Or.getNode());
+ return DAG.getSetCC(DL, VT, Or, LR, CC1);
+ }
+
+ // All bits set?
+ bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
+ // All sign bits set?
+ bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
+ // Any bits clear?
+ bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
+ // Any sign bits clear?
+ bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
+
+ // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
+ // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
+ // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
+ // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
+ if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(And.getNode());
+ return DAG.getSetCC(DL, VT, And, LR, CC1);
+ }
+ }
+
+ // TODO: What is the 'or' equivalent of this fold?
+ // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
+ if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
+ ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+ (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+ SDValue One = DAG.getConstant(1, DL, OpVT);
+ SDValue Two = DAG.getConstant(2, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
+ AddToWorklist(Add.getNode());
+ return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
+ }
+
+ // Try more general transforms if the predicates match and the only user of
+ // the compares is the 'and' or 'or'.
+ if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
+ // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
+ if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
+ SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
+ SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
+ SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, Or, Zero, CC1);
+ }
+ }
+
+ // Canonicalize equivalent operands to LL == RL.
+ if (LL == RR && LR == RL) {
+ CC1 = ISD::getSetCCSwappedOperands(CC1);
+ std::swap(RL, RR);
+ }
+
+ // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ if (LL == RL && LR == RR) {
+ ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
+ : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
+ if (NewCC != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC, OpVT))))
+ return DAG.getSetCC(DL, VT, LL, LR, NewCC);
+ }
+
+ return SDValue();
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
/// visitSELECT() already handles those cases.
-SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
- SDNode *LocReference) {
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
+ SDLoc DL(N);
// fold (and x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(LocReference), VT);
- // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
- if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- if (isAllOnesConstant(LR)) {
- // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
- if (Op1 == ISD::SETEQ) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
- }
- }
- // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
- if (Op1 == ISD::SETGT) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- }
- }
- // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
- if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
- Op0 == Op1 && LL.getValueType().isInteger() &&
- Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
- (isAllOnesConstant(LR) && isNullConstant(RR)))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDLoc DL(N0);
- SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
- LL, DAG.getConstant(1, DL,
- LL.getValueType()));
- AddToWorklist(ADDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
- DAG.getConstant(2, DL, LL.getValueType()),
- ISD::SETUGE);
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (N0.getValueType() == CCVT ||
- (!LegalOperations && N0.getValueType() == MVT::i1))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
- }
- }
- }
+ return DAG.getConstant(0, DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
+ return V;
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
VT.getSizeInBits() <= 64) {
@@ -3037,13 +3326,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
ADDC |= Mask;
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- SDLoc DL(N0);
+ SDLoc DL0(N0);
SDValue NewAdd =
- DAG.getNode(ISD::ADD, DL, VT,
+ DAG.getNode(ISD::ADD, DL0, VT,
N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
CombineTo(N0.getNode(), NewAdd);
// Return N so it doesn't get rechecked!
- return SDValue(LocReference, 0);
+ return SDValue(N, 0);
}
}
}
@@ -3068,7 +3357,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
unsigned MaskBits = AndMask.countTrailingOnes();
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
- if (APIntOps::isMask(AndMask) &&
+ if (AndMask.isMask() &&
// Required bits must not span the two halves of the integer and
// must fit in the half size type.
(ShiftBits + MaskBits <= Size / 2) &&
@@ -3108,7 +3397,7 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
bool &NarrowLoad) {
uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
- if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+ if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
return false;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@@ -3191,6 +3480,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate and
if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
return RAND;
@@ -3299,6 +3592,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
+
+ // Fold the AND away. NewLoad may get replaced immediately.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
if (Load->getExtensionType() == ISD::EXTLOAD) {
NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
Load->getValueType(0), SDLoc(Load),
@@ -3316,10 +3613,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- // Fold the AND away, taking care not to fold to the old load node if we
- // replaced it.
- CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
-
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3723,65 +4016,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
/// This contains all DAGCombine rules which reduce two values combined by
/// an Or operation to a single value \see visitANDLike().
-SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
+ SDLoc DL(N);
+
// fold (or x, undef) -> -1
- if (!LegalOperations &&
- (N0.isUndef() || N1.isUndef())) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
- return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
- SDLoc(LocReference), VT);
- }
- // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
- // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
- // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
- if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
- // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
- if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
- }
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (N0.getValueType() == CCVT ||
- (!LegalOperations && N0.getValueType() == MVT::i1))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
- }
- }
- }
+ if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
+ return DAG.getAllOnesConstant(DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
+ return V;
// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
@@ -3802,7 +4046,6 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(0), N1.getOperand(0));
- SDLoc DL(LocReference);
return DAG.getNode(ISD::AND, DL, VT, X,
DAG.getConstant(LHSMask | RHSMask, DL, VT));
}
@@ -3818,7 +4061,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
(N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(1), N1.getOperand(1));
- return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
}
return SDValue();
@@ -3847,14 +4090,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(
- APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
- N0.getValueType());
+ return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(
- APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
- N1.getValueType());
+ return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting shuffle is legal.
@@ -3867,7 +4106,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
// Ensure both shuffles have a zero input.
- if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
+ if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
@@ -3939,6 +4178,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1
if (isAllOnesConstant(N1))
return N1;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (or x, c) -> c iff (x & ~c) == 0
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
@@ -3956,7 +4199,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
- // iff (c1 & c2) == 0.
+ // iff (c1 & c2) != 0.
if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
@@ -3978,6 +4221,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
+ if (SDValue Load = MatchLoadCombine(N))
+ return Load;
+
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
@@ -4190,8 +4436,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
- APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
- SDValue Mask = DAG.getConstant(AllBits, DL, VT);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
if (LHSMask.getNode()) {
APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
@@ -4349,6 +4594,299 @@ struct BaseIndexOffset {
};
} // namespace
+namespace {
+/// Represents known origin of an individual byte in load combine pattern. The
+/// value of the byte is either constant zero or comes from memory.
+struct ByteProvider {
+ // For constant zero providers Load is set to nullptr. For memory providers
+ // Load represents the node which loads the byte from memory.
+ // ByteOffset is the offset of the byte in the value produced by the load.
+ LoadSDNode *Load;
+ unsigned ByteOffset;
+
+ ByteProvider() : Load(nullptr), ByteOffset(0) {}
+
+ static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
+ return ByteProvider(Load, ByteOffset);
+ }
+ static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
+
+ bool isConstantZero() const { return !Load; }
+ bool isMemory() const { return Load; }
+
+ bool operator==(const ByteProvider &Other) const {
+ return Other.Load == Load && Other.ByteOffset == ByteOffset;
+ }
+
+private:
+ ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
+ : Load(Load), ByteOffset(ByteOffset) {}
+};
+
+/// Recursively traverses the expression calculating the origin of the requested
+/// byte of the given value. Returns None if the provider can't be calculated.
+///
+/// For all the values except the root of the expression verifies that the value
+/// has exactly one use and if it's not true return None. This way if the origin
+/// of the byte is returned it's guaranteed that the values which contribute to
+/// the byte are not used outside of this expression.
+///
+/// Because the parts of the expression are not allowed to have more than one
+/// use this function iterates over trees, not DAGs. So it never visits the same
+/// node more than once.
+const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
+ unsigned Depth,
+ bool Root = false) {
+ // Typical i64 by i8 pattern requires recursion up to 8 calls depth
+ if (Depth == 10)
+ return None;
+
+ if (!Root && !Op.hasOneUse())
+ return None;
+
+ assert(Op.getValueType().isScalarInteger() && "can't handle other types");
+ unsigned BitWidth = Op.getValueSizeInBits();
+ if (BitWidth % 8 != 0)
+ return None;
+ unsigned ByteWidth = BitWidth / 8;
+ assert(Index < ByteWidth && "invalid index requested");
+ (void) ByteWidth;
+
+ switch (Op.getOpcode()) {
+ case ISD::OR: {
+ auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
+ if (!LHS)
+ return None;
+ auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
+ if (!RHS)
+ return None;
+
+ if (LHS->isConstantZero())
+ return RHS;
+ if (RHS->isConstantZero())
+ return LHS;
+ return None;
+ }
+ case ISD::SHL: {
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!ShiftOp)
+ return None;
+
+ uint64_t BitShift = ShiftOp->getZExtValue();
+ if (BitShift % 8 != 0)
+ return None;
+ uint64_t ByteShift = BitShift / 8;
+
+ return Index < ByteShift
+ ? ByteProvider::getConstantZero()
+ : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
+ Depth + 1);
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return Op.getOpcode() == ISD::ZERO_EXTEND
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return calculateByteProvider(NarrowOp, Index, Depth + 1);
+ }
+ case ISD::BSWAP:
+ return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
+ Depth + 1);
+ case ISD::LOAD: {
+ auto L = cast<LoadSDNode>(Op.getNode());
+ if (L->isVolatile() || L->isIndexed())
+ return None;
+
+ unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return L->getExtensionType() == ISD::ZEXTLOAD
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return ByteProvider::getMemory(L, Index);
+ }
+ }
+
+ return None;
+}
+} // namespace
+
+/// Match a pattern where a wide type scalar value is loaded by several narrow
+/// loads and combined by shifts and ors. Fold it into a single load or a load
+/// and a BSWAP if the targets supports it.
+///
+/// Assuming little endian target:
+/// i8 *a = ...
+/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+/// =>
+/// i32 val = *((i32)a)
+///
+/// i8 *a = ...
+/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+/// =>
+/// i32 val = BSWAP(*((i32)a))
+///
+/// TODO: This rule matches complex patterns with OR node roots and doesn't
+/// interact well with the worklist mechanism. When a part of the pattern is
+/// updated (e.g. one of the loads) its direct users are put into the worklist,
+/// but the root node of the pattern which triggers the load combine is not
+/// necessarily a direct user of the changed node. For example, once the address
+/// of t28 load is reassociated load combine won't be triggered:
+/// t25: i32 = add t4, Constant:i32<2>
+/// t26: i64 = sign_extend t25
+/// t27: i64 = add t2, t26
+/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
+/// t29: i32 = zero_extend t28
+/// t32: i32 = shl t29, Constant:i8<8>
+/// t33: i32 = or t23, t32
+/// As a possible fix visitLoad can check if the load can be a part of a load
+/// combine pattern and add corresponding OR roots to the worklist.
+SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR &&
+ "Can only match load combining against OR nodes");
+
+ // Handles simple types only
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+ unsigned ByteWidth = VT.getSizeInBits() / 8;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
+ std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
+ unsigned BW, unsigned i) { return i; };
+ std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
+ unsigned BW, unsigned i) { return BW - i - 1; };
+
+ bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
+ auto MemoryByteOffset = [&] (ByteProvider P) {
+ assert(P.isMemory() && "Must be a memory byte provider");
+ unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
+ assert(LoadBitWidth % 8 == 0 &&
+ "can only analyze providers for individual bytes not bit");
+ unsigned LoadByteWidth = LoadBitWidth / 8;
+ return IsBigEndianTarget
+ ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
+ : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ };
+
+ Optional<BaseIndexOffset> Base;
+ SDValue Chain;
+
+ SmallSet<LoadSDNode *, 8> Loads;
+ Optional<ByteProvider> FirstByteProvider;
+ int64_t FirstOffset = INT64_MAX;
+
+ // Check if all the bytes of the OR we are looking at are loaded from the same
+ // base address. Collect bytes offsets from Base address in ByteOffsets.
+ SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
+ for (unsigned i = 0; i < ByteWidth; i++) {
+ auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
+ if (!P || !P->isMemory()) // All the bytes must be loaded from memory
+ return SDValue();
+
+ LoadSDNode *L = P->Load;
+ assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
+ "Must be enforced by calculateByteProvider");
+ assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
+
+ // All loads must share the same chain
+ SDValue LChain = L->getChain();
+ if (!Chain)
+ Chain = LChain;
+ else if (Chain != LChain)
+ return SDValue();
+
+ // Loads must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr))
+ return SDValue();
+
+ // Calculate the offset of the current byte from the base address
+ int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P);
+ ByteOffsets[i] = ByteOffsetFromBase;
+
+ // Remember the first byte load
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstByteProvider = P;
+ FirstOffset = ByteOffsetFromBase;
+ }
+
+ Loads.insert(L);
+ }
+ assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
+ "memory, so there must be at least one load which produces the value");
+ assert(Base && "Base address of the accessed memory location must be set");
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+
+ // Check if the bytes of the OR we are looking at match with either big or
+ // little endian value load
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned i = 0; i < ByteWidth; i++) {
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
+ BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
+ if (!BigEndian && !LittleEndian)
+ return SDValue();
+ }
+ assert((BigEndian != LittleEndian) && "should be either or");
+ assert(FirstByteProvider && "must be set");
+
+ // Ensure that the first byte is loaded from zero offset of the first load.
+ // So the combined value can be loaded from the first load address.
+ if (MemoryByteOffset(*FirstByteProvider) != 0)
+ return SDValue();
+ LoadSDNode *FirstLoad = FirstByteProvider->Load;
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single load and bswap if needed.
+
+ // If the load needs byte swap check if the target supports it
+ bool NeedsBswap = IsBigEndianTarget != BigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // load and byte shuffling instead of several loads and byte shuffling.
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Check that a load of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ VT, FirstLoad->getAddressSpace(),
+ FirstLoad->getAlignment(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ SDValue NewLoad =
+ DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
+
+ // Transfer chain users from old loads to the new load.
+ for (LoadSDNode *L : Loads)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
+
+ return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
+}
+
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4386,6 +4924,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate xor
if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
return RXOR;
@@ -4403,9 +4945,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
default:
llvm_unreachable("Unhandled SetCC Equivalent!");
case ISD::SETCC:
- return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
+ return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
case ISD::SELECT_CC:
- return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
+ return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
N0.getOperand(3), NotCC);
}
}
@@ -4470,6 +5012,17 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N01C->getAPIntValue(), DL, VT));
}
}
+
+ // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
+ N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+ }
+
// fold (xor x, x) -> 0
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
@@ -4673,6 +5226,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl undef, x) -> 0
if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -4808,9 +5365,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
isConstantOrConstantVector(N1, /* No Opaques */ true)) {
- unsigned BitSize = VT.getScalarSizeInBits();
SDLoc DL(N);
- SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
+ SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
}
@@ -4877,6 +5433,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
@@ -5024,6 +5584,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// if (srl x, c) is known to be zero, return 0
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -5074,9 +5638,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
SDLoc DL(N);
- APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
SDValue Mask =
- DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
+ DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
AddToWorklist(Mask.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
}
@@ -5202,6 +5765,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (abs c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+ // fold (abs (abs x)) -> (abs x)
+ if (N0.getOpcode() == ISD::ABS)
+ return N0;
+ // fold (abs x) -> x iff not-negative
+ if (DAG.SignBitIsZero(N0))
+ return N0;
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -5217,7 +5796,11 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ // fold (bitreverse c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
// fold (bitreverse (bitreverse x)) -> x
if (N0.getOpcode() == ISD::BITREVERSE)
return N0.getOperand(0);
@@ -5311,7 +5894,6 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
-// TODO: We should handle other cases of selecting between {-1,0,1} here.
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5320,6 +5902,67 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
EVT CondVT = Cond.getValueType();
SDLoc DL(N);
+ if (!VT.isInteger())
+ return SDValue();
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N1);
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C1 || !C2)
+ return SDValue();
+
+ // Only do this before legalization to avoid conflicting with target-specific
+ // transforms in the other direction (create a select from a zext/sext). There
+ // is also a target-independent combine here in DAGCombiner in the other
+ // direction for (select Cond, -1, 0) when the condition is not i1.
+ if (CondVT == MVT::i1 && !LegalOperations) {
+ if (C1->isNullValue() && C2->isOne()) {
+ // select Cond, 0, 1 --> zext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isNullValue() && C2->isAllOnesValue()) {
+ // select Cond, 0, -1 --> sext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isOne() && C2->isNullValue()) {
+ // select Cond, 1, 0 --> zext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+ if (C1->isAllOnesValue() && C2->isNullValue()) {
+ // select Cond, -1, 0 --> sext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+
+ // For any constants that differ by 1, we can transform the select into an
+ // extend and add. Use a target hook because some targets may prefer to
+ // transform in the other direction.
+ if (TLI.convertSelectOfConstantsToMath()) {
+ if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+ if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+ }
+
+ return SDValue();
+ }
+
// fold (select Cond, 0, 1) -> (xor Cond, 1)
// We can't do this reliably if integer based booleans have different contents
// to floating point based booleans. This is because we can't tell whether we
@@ -5329,15 +5972,14 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// undiscoverable (or not reasonably discoverable). For example, it could be
// in another basic block or it could require searching a complicated
// expression.
- if (VT.isInteger() &&
- (CondVT == MVT::i1 || (CondVT.isInteger() &&
- TLI.getBooleanContents(false, true) ==
- TargetLowering::ZeroOrOneBooleanContent &&
- TLI.getBooleanContents(false, false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
- isNullConstant(N1) && isOneConstant(N2)) {
- SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
- DAG.getConstant(1, DL, CondVT));
+ if (CondVT.isInteger() &&
+ TLI.getBooleanContents(false, true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ C1->isNullValue() && C2->isOne()) {
+ SDValue NotCond =
+ DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
return NotCond;
return DAG.getZExtOrTrunc(NotCond, DL, VT);
@@ -5847,7 +6489,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
ISD::NON_EXTLOAD, MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MLD->isExpandingLoad());
+ MLD->isExpandingLoad());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -5921,34 +6563,6 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
- // If the VSELECT result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (N0.getOpcode() == ISD::SETCC) {
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
- std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
- Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
-
- // Add the new VSELECT nodes to the work list in case they need to be split
- // again.
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
- }
-
// Fold (vselect (build_vector all_ones), N1, N2) -> N1
if (ISD::isBuildVectorAllOnes(N0.getNode()))
return N1;
@@ -6258,6 +6872,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+ // Simplify TF.
+ AddToWorklist(NewChain.getNode());
+
CombineTo(N, NewValue);
// Replace uses of the original load (before extension)
@@ -6273,6 +6890,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
LegalOperations))
@@ -6281,8 +6899,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
- N0.getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
@@ -6314,12 +6931,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
if (NumSignBits > OpBits-MidBits)
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
} else {
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
if (NumSignBits > OpBits-MidBits)
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
}
// fold (sext (truncate x)) -> (sextinreg x).
@@ -6329,7 +6946,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
else if (OpBits > DestBits)
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
DAG.getValueType(N0.getValueType()));
}
}
@@ -6349,16 +6966,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
- ISD::SIGN_EXTEND);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -6376,8 +6991,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
LN0->getBasePtr(), MemVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
@@ -6411,7 +7025,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
- SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
@@ -6419,24 +7032,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(0).getValueType(), ExtLoad);
CombineTo(N, And);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
- ISD::SIGN_EXTEND);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
if (N0.getOpcode() == ISD::SETCC) {
- EVT N0VT = N0.getOperand(0).getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ EVT N00VT = N0.getOperand(0).getValueType();
+
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations &&
- TLI.getBooleanContents(N0VT) ==
+ TLI.getBooleanContents(N00VT) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
// if this is the case.
- EVT SVT = getSetCCResultType(N0VT);
+ EVT SVT = getSetCCResultType(N00VT);
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
@@ -6444,19 +7060,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == SVT.getSizeInBits())
- return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSetCC(DL, VT, N00, N01, CC);
// If the desired elements are smaller or larger than the source
- // elements we can use a matching integer vector type and then
- // truncate/sign extend
- EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
- if (SVT == MatchingVectorType) {
- SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
+ // elements, we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVecType) {
+ SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+ return DAG.getSExtOrTrunc(VsetCC, DL, VT);
}
}
@@ -6465,36 +7077,30 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// getBooleanContents().
unsigned SetCCWidth = N0.getScalarValueSizeInBits();
- SDLoc DL(N);
// To determine the "true" side of the select, we need to know the high bit
// of the value returned by the setcc if it evaluates to true.
// If the type of the setcc is i1, then the true case of the select is just
// sext(i1 1), that is, -1.
// If the type of the setcc is larger (say, i8) then the value of the high
- // bit depends on getBooleanContents(). So, ask TLI for a real "true" value
+ // bit depends on getBooleanContents(), so ask TLI for a real "true" value
// of the appropriate width.
- SDValue ExtTrueVal =
- (SetCCWidth == 1)
- ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
- DL, VT)
- : TLI.getConstTrueVal(DAG, VT, DL);
-
- if (SDValue SCC = SimplifySelectCC(
- DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
- DAG.getConstant(0, DL, VT),
- cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
+ : TLI.getConstTrueVal(DAG, VT, DL);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ if (SDValue SCC =
+ SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
return SCC;
if (!VT.isVector()) {
- EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
- SDLoc DL(N);
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- SDValue SetCC =
- DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
- return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
- DAG.getConstant(0, DL, VT));
+ EVT SetCCVT = getSetCCResultType(N00VT);
+ // Don't do this transform for i1 because there's a select transform
+ // that would reverse it.
+ // TODO: We should not do this transform at all without a target hook
+ // because a sext is likely cheaper than a select?
+ if (SetCCVT.getScalarSizeInBits() != 1 &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
+ return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
}
}
}
@@ -6502,7 +7108,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
return SDValue();
}
@@ -6677,13 +7283,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- CombineTo(N, ExtLoad);
+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::ZERO_EXTEND);
+ CombineTo(N, ExtLoad);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -6991,9 +7598,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitAssertZext(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+
+ // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
+ if (N0.getOpcode() == ISD::AssertZext &&
+ EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
+ return N0;
+
+ return SDValue();
+}
+
/// See if the specified operand can be simplified with the knowledge that only
/// the bits specified by Mask are used. If so, return the simpler operand,
/// otherwise return a null SDValue.
+///
+/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
+/// simplify nodes with multiple uses more aggressively.)
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
@@ -7029,6 +7652,14 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
SimplifyLHS, V.getOperand(1));
}
+ break;
+ case ISD::AND: {
+ // X & -1 -> X (ignoring bits which aren't demanded).
+ ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
+ if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
+ return V.getOperand(0);
+ break;
+ }
}
return SDValue();
}
@@ -7244,6 +7875,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
+ // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
+ if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
+ return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
+ }
+
// fold (sext_in_reg (zext x)) -> (sext x)
// iff we are extending the source sign bit.
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
@@ -7254,7 +7895,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
- if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
// fold operands of sext_in_reg based on knowledge that the top bits are not
@@ -7496,6 +8137,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
VT.getSizeInBits())))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
}
+
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
@@ -7517,6 +8159,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
}
+
// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
// where ... are all 'undef'.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
@@ -7582,6 +8225,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+ // When the adde's carry is not used.
+ if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() &&
+ !N0.getNode()->hasAnyUseOfValue(1) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) {
+ SDLoc SL(N);
+ auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue),
+ X, Y, N0.getOperand(2));
+ }
+
return SDValue();
}
@@ -7672,6 +8327,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
// Only do this before legalize, since afterward the target may be depending
// on the bitconvert.
@@ -8040,6 +8698,11 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return DAG.getBuildVector(VT, DL, Ops);
}
+static bool isContractable(SDNode *N) {
+ SDNodeFlags F = cast<BinaryWithFlagsSDNode>(N)->Flags;
+ return F.hasAllowContract() || F.hasUnsafeAlgebra();
+}
+
/// Try to perform FMA combining on a given FADD node.
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
@@ -8048,24 +8711,27 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
- ;
- if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -8073,35 +8739,39 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
- if (Aggressive && N0.getOpcode() == ISD::FMUL &&
- N1.getOpcode() == ISD::FMUL) {
+ if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
if (N0.getNode()->use_size() > N1.getNode()->use_size())
std::swap(N0, N1);
}
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL &&
- (Aggressive || N0->hasOneUse())) {
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (Aggressive || N1->hasOneUse())) {
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
// Look through FP_EXTEND nodes to do more combining.
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N00))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8113,7 +8783,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N10))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
@@ -8154,7 +8824,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N0));
}
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
@@ -8169,7 +8839,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (N020.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N020))
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
N1);
@@ -8195,7 +8865,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (N002.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N002))
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
N1);
@@ -8208,7 +8878,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
- if (N120.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N120))
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
N0);
@@ -8224,7 +8894,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == PreferredFusedOpcode) {
SDValue N102 = N10.getOperand(2);
- if (N102.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N102))
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
N0);
@@ -8244,23 +8914,26 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
-
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+ // If the subtraction is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
- if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -8268,9 +8941,16 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
+
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL &&
- (Aggressive || N0->hasOneUse())) {
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1));
@@ -8278,16 +8958,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (Aggressive || N1->hasOneUse()))
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG &&
- N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
@@ -8297,12 +8975,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N00))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8316,7 +8994,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N10))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8336,7 +9014,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N000)) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8358,7 +9036,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N000)) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8378,10 +9056,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma x, y (fma u, v, (fneg z)))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
// are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
- N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL &&
- N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
+ if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
+ N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8395,9 +9072,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma (fneg y), z, (fma (fneg u), v, x))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
// are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
- N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N1.getOperand(2))) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8410,14 +9086,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N21, N0));
}
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (N020.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N020))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8440,7 +9116,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (N002.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N002))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8461,7 +9137,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N1.getOperand(2).getOperand(0);
- if (N120.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N120)) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8488,7 +9164,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N100 = N1.getOperand(0).getOperand(0);
SDValue N101 = N1.getOperand(0).getOperand(1);
SDValue N102 = N1.getOperand(0).getOperand(2);
- if (N102.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N102)) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8624,6 +9300,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
@@ -8637,7 +9316,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
GetNegatedExpression(N0, DAG, LegalOperations), Flags);
// FIXME: Auto-upgrade the target/function-level option.
- if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
// fold (fadd A, 0) -> A
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
if (N1C->isZero())
@@ -8771,13 +9450,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, DL, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// FIXME: Auto-upgrade the target/function-level option.
- if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
// (fsub 0, B) -> -B
if (N0CFP && N0CFP->isZero()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -8850,6 +9532,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(1.0))
return N0;
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (Options.UnsafeFPMath) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
@@ -9104,6 +9789,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (Options.UnsafeFPMath) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
@@ -9207,6 +9895,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
&cast<BinaryWithFlagsSDNode>(N)->Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
return SDValue();
}
@@ -10361,7 +11052,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << "\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
-
+ AddUsersToWorklist(Chain.getNode());
if (N->use_empty())
deleteAndRecombine(N);
@@ -10414,7 +11105,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
if (PrevST->getBasePtr() == Ptr &&
PrevST->getValue().getValueType() == N->getValueType(0))
- return CombineTo(N, Chain.getOperand(1), Chain);
+ return CombineTo(N, PrevST->getOperand(1), Chain);
}
}
@@ -10432,14 +11123,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
}
}
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-#ifndef NDEBUG
- if (CombinerAAOnlyFunc.getNumOccurrences() &&
- CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
- UseAA = false;
-#endif
- if (UseAA && LD->isUnindexed()) {
+ if (LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -11021,6 +11705,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
ArgChains);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ AddToWorklist(Chain.getNode());
return true;
}
@@ -11414,18 +12099,24 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
return false;
}
-SDValue DAGCombiner::getMergedConstantVectorStore(
- SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
- SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
- SmallVector<SDValue, 8> BuildVector;
+SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores) {
+ SmallVector<SDValue, 8> Chains;
+ SmallPtrSet<const SDNode *, 8> Visited;
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ for (unsigned i = 0; i < NumStores; ++i) {
+ Visited.insert(StoreNodes[i].MemNode);
+ }
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
- StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
- Chains.push_back(St->getChain());
- BuildVector.push_back(St->getValue());
+ // don't include nodes that are children
+ for (unsigned i = 0; i < NumStores; ++i) {
+ if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
+ Chains.push_back(StoreNodes[i].MemNode->getChain());
}
- return DAG.getBuildVector(Ty, SL, BuildVector);
+ assert(Chains.size() > 0 && "Chain should have generated a chain");
+ return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
@@ -11436,22 +12127,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
return false;
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned LatestNodeUsed = 0;
-
- for (unsigned i=0; i < NumStores; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // latest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
- LatestNodeUsed = i;
- }
-
- SmallVector<SDValue, 8> Chains;
// The latest Node in the DAG.
- LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
@@ -11467,7 +12144,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
if (IsConstantSrc) {
- StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
+ SmallVector<SDValue, 8> BuildVector;
+ for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ SDValue Val = St->getValue();
+ if (MemVT.getScalarType().isInteger())
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
+ Val = DAG.getConstant(
+ (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
+ SDLoc(CFP), MemVT);
+ BuildVector.push_back(Val);
+ }
+ StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
} else {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
@@ -11477,7 +12165,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
- Chains.push_back(St->getChain());
}
// Build the extracted vector elements back into a vector.
@@ -11497,7 +12184,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
for (unsigned i = 0; i < NumStores; ++i) {
unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
- Chains.push_back(St->getChain());
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes * 8;
@@ -11515,54 +12201,27 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
- assert(!Chains.empty());
-
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- if (UseAA) {
- // Replace all merged stores with the new store.
- for (unsigned i = 0; i < NumStores; ++i)
- CombineTo(StoreNodes[i].MemNode, NewStore);
- } else {
- // Replace the last store with the new store.
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumStores; ++i) {
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- // ReplaceAllUsesWith will replace all uses that existed when it was
- // called, but graph optimizations may cause new ones to appear. For
- // example, the case in pr14333 looks like
- //
- // St's chain -> St -> another store -> X
- //
- // And the only difference from St to the other store is the chain.
- // When we change it's chain to be St's chain they become identical,
- // get CSEed and the net result is that X is now a use of St.
- // Since we know that St is redundant, just iterate.
- while (!St->use_empty())
- DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
- }
- }
+ // Replace all merged stores with the new store.
+ for (unsigned i = 0; i < NumStores; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
- StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
+ AddToWorklist(NewChain.getNode());
return true;
}
-void DAGCombiner::getStoreMergeAndAliasCandidates(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
- SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
+void DAGCombiner::getStoreMergeCandidates(
+ StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ EVT MemVT = St->getMemoryVT();
// We must have a base and an offset.
if (!BasePtr.Base.getNode())
@@ -11572,104 +12231,71 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
if (BasePtr.Base.isUndef())
return;
- // Walk up the chain and look for nodes with offsets from the same
- // base pointer. Stop when reaching an instruction with a different kind
- // or instruction which has a different base pointer.
- EVT MemVT = St->getMemoryVT();
- unsigned Seq = 0;
- StoreSDNode *Index = St;
-
-
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-
- if (UseAA) {
- // Look at other users of the same chain. Stores on the same chain do not
- // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
- // to be on the same chain, so don't bother looking at adjacent chains.
-
- SDValue Chain = St->getChain();
- for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
- if (I.getOperandNo() != 0)
- continue;
-
- if (OtherST->isVolatile() || OtherST->isIndexed())
- continue;
-
- if (OtherST->getMemoryVT() != MemVT)
- continue;
-
- BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
-
- if (Ptr.equalBaseIndex(BasePtr))
- StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
- }
- }
-
- return;
- }
-
- while (Index) {
- // If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 0)->hasOneUse())
- break;
-
- // Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
-
- // Check that the base pointer is the same as the original one.
- if (!Ptr.equalBaseIndex(BasePtr))
- break;
-
- // The memory operands must not be volatile.
- if (Index->isVolatile() || Index->isIndexed())
- break;
-
- // No truncation.
- if (Index->isTruncatingStore())
- break;
-
- // The stored memory type must be the same.
- if (Index->getMemoryVT() != MemVT)
- break;
-
- // We do not allow under-aligned stores in order to prevent
- // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
- // be irrelevant here; what MATTERS is that we not move memory
- // operations that potentially overlap past each-other.
- if (Index->getAlignment() < MemVT.getStoreSize())
- break;
-
- // We found a potential memory operand to merge.
- StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
-
- // Find the next memory operand in the chain. If the next operand in the
- // chain is a store then move up and continue the scan with the next
- // memory operand. If the next operand is a load save it and use alias
- // information to check if it interferes with anything.
- SDNode *NextInChain = Index->getChain().getNode();
- while (1) {
- if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
- // We found a store node. Use it for the next iteration.
- Index = STn;
- break;
- } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
- if (Ldn->isVolatile()) {
- Index = nullptr;
- break;
+ bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
+ bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
+ isa<ConstantFPSDNode>(St->getValue());
+ bool IsExtractVecSrc =
+ (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool {
+ if (Other->isVolatile() || Other->isIndexed())
+ return false;
+ // We can merge constant floats to equivalent integers
+ if (Other->getMemoryVT() != MemVT)
+ if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
+ isa<ConstantFPSDNode>(Other->getValue())))
+ return false;
+ if (IsLoadSrc)
+ if (!isa<LoadSDNode>(Other->getValue()))
+ return false;
+ if (IsConstantSrc)
+ if (!(isa<ConstantSDNode>(Other->getValue()) ||
+ isa<ConstantFPSDNode>(Other->getValue())))
+ return false;
+ if (IsExtractVecSrc)
+ if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
+ return false;
+ Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
+ return (Ptr.equalBaseIndex(BasePtr));
+ };
+ // We looking for a root node which is an ancestor to all mergable
+ // stores. We search up through a load, to our root and then down
+ // through all children. For instance we will find Store{1,2,3} if
+ // St is Store1, Store2. or Store3 where the root is not a load
+ // which always true for nonvolatile ops. TODO: Expand
+ // the search to find all valid candidates through multiple layers of loads.
+ //
+ // Root
+ // |-------|-------|
+ // Load Load Store3
+ // | |
+ // Store1 Store2
+ //
+ // FIXME: We should be able to climb and
+ // descend TokenFactors to find candidates as well.
+
+ SDNode *RootNode = (St->getChain()).getNode();
+
+ if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
+ RootNode = Ldn->getChain().getNode();
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
+ for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
+ if (I2.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
+ BaseIndexOffset Ptr;
+ if (CandidateMatch(OtherST, Ptr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
+ }
+ } else
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ if (I.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ BaseIndexOffset Ptr;
+ if (CandidateMatch(OtherST, Ptr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
}
-
- // Save the load node for later. Continue the scan.
- AliasLoadNodes.push_back(Ldn);
- NextInChain = Ldn->getChain().getNode();
- continue;
- } else {
- Index = nullptr;
- break;
- }
- }
- }
}
// We need to check that merging these stores does not cause a loop
@@ -11678,31 +12304,34 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
// through the chain). Check in parallel by searching up from
// non-chain operands of candidates.
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes) {
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
SmallPtrSet<const SDNode *, 16> Visited;
SmallVector<const SDNode *, 8> Worklist;
// search ops of store candidates
- for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
SDNode *n = StoreNodes[i].MemNode;
// Potential loops may happen only through non-chain operands
for (unsigned j = 1; j < n->getNumOperands(); ++j)
Worklist.push_back(n->getOperand(j).getNode());
}
// search through DAG. We can stop early if we find a storenode
- for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
return false;
}
return true;
}
-bool DAGCombiner::MergeConsecutiveStores(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None)
return false;
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+
+ if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
+ return false;
+
bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
Attribute::NoImplicitFloat);
@@ -11731,145 +12360,137 @@ bool DAGCombiner::MergeConsecutiveStores(
if (MemVT.isVector() && IsLoadSrc)
return false;
- // Only look at ends of store sequences.
- SDValue Chain = SDValue(St, 0);
- if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
- return false;
-
- // Save the LoadSDNodes that we find in the chain.
- // We need to make sure that these nodes do not interfere with
- // any of the store nodes.
- SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
- getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
+ SmallVector<MemOpLink, 8> StoreNodes;
+ // Find potential store merge candidates by searching through chain sub-DAG
+ getStoreMergeCandidates(St, StoreNodes);
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
- // only do dependence check in AA case
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
- return false;
-
// Sort the memory operands according to their distance from the
- // base pointer. As a secondary criteria: make sure stores coming
- // later in the code come first in the list. This is important for
- // the non-UseAA case, because we're merging stores into the FINAL
- // store along a chain which potentially contains aliasing stores.
- // Thus, if there are multiple stores to the same address, the last
- // one can be considered for merging but not the others.
+ // base pointer.
std::sort(StoreNodes.begin(), StoreNodes.end(),
[](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase ||
- (LHS.OffsetFromBase == RHS.OffsetFromBase &&
- LHS.SequenceNum < RHS.SequenceNum);
- });
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
// Scan the memory operations on the chain and find the first non-consecutive
// store memory address.
- unsigned LastConsecutiveStore = 0;
+ unsigned NumConsecutiveStores = 0;
int64_t StartAddress = StoreNodes[0].OffsetFromBase;
- for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
- // Check that the addresses are consecutive starting from the second
- // element in the list of stores.
- if (i > 0) {
- int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- }
-
- // Check if this store interferes with any of the loads that we found.
- // If we find a load that alias with this store. Stop the sequence.
- if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
- return isAlias(Ldn, StoreNodes[i].MemNode);
- }))
+ // Check that the addresses are consecutive starting from the second
+ // element in the list of stores.
+ for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
-
- // Mark this node as useful.
- LastConsecutiveStore = i;
+ NumConsecutiveStores = i + 1;
}
+ if (NumConsecutiveStores < 2)
+ return false;
+
+ // Check that we can merge these candidates without causing a cycle
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumConsecutiveStores))
+ return false;
+
+
// The node with the lowest store address.
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
LLVMContext &Context = *DAG.getContext();
const DataLayout &DL = DAG.getDataLayout();
// Store the constants into memory as one consecutive store.
if (IsConstantSrc) {
- unsigned LastLegalType = 0;
- unsigned LastLegalVectorType = 0;
- bool NonZero = false;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = St->getValue();
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
- NonZero |= !C->isNullValue();
- } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
- NonZero |= !C->getConstantFPValue()->isNullValue();
- } else {
- // Non-constant.
- break;
- }
+ bool RV = false;
+ while (NumConsecutiveStores > 1) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned LastLegalType = 0;
+ unsigned LastLegalVectorType = 0;
+ bool NonZero = false;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+ NonZero |= !C->isNullValue();
+ } else if (ConstantFPSDNode *C =
+ dyn_cast<ConstantFPSDNode>(StoredVal)) {
+ NonZero |= !C->getConstantFPValue()->isNullValue();
+ } else {
+ // Non-constant.
+ break;
+ }
- // Find a legal type for the constant store.
- unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast) {
- LastLegalType = i+1;
- // Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast = false;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
IsFast) {
LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ IsFast) {
+ LastLegalType = i + 1;
+ }
}
- }
- // We only use vectors if the constant is known to be zero or the target
- // allows it and the function is not marked with the noimplicitfloat
- // attribute.
- if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
- FirstStoreAS)) &&
- !NoVectors) {
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
- if (TLI.isTypeLegal(Ty) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast)
- LastLegalVectorType = i + 1;
+ // We only use vectors if the constant is known to be zero or the target
+ // allows it and the function is not marked with the noimplicitfloat
+ // attribute.
+ if ((!NonZero ||
+ TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
+ if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
}
- }
- // Check if we found a legal integer type to store.
- if (LastLegalType == 0 && LastLegalVectorType == 0)
- return false;
+ // Check if we found a legal integer type that creates a meaningful merge.
+ if (LastLegalType < 2 && LastLegalVectorType < 2)
+ break;
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
- true, UseVector);
+ bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ true, UseVector);
+ if (!Merged)
+ break;
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ RV = true;
+ NumConsecutiveStores -= NumElem;
+ }
+ return RV;
}
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
if (IsExtractVecSrc) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned NumStoresToMerge = 0;
bool IsVec = MemVT.isVector();
- for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
unsigned StoreValOpcode = St->getValue().getOpcode();
// This restriction could be loosened.
@@ -11909,7 +12530,7 @@ bool DAGCombiner::MergeConsecutiveStores(
// Find acceptable loads. Loads need to have the same chain (token factor),
// must not be zext, volatile, indexed, and they must be consecutive.
BaseIndexOffset LdBasePtr;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
if (!Ld) break;
@@ -11942,7 +12563,7 @@ bool DAGCombiner::MergeConsecutiveStores(
}
// We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
}
if (LoadNodes.size() < 2)
@@ -11954,7 +12575,9 @@ bool DAGCombiner::MergeConsecutiveStores(
if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
St->getAlignment() >= RequiredAlignment)
return false;
-
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
unsigned FirstLoadAS = FirstLoad->getAddressSpace();
unsigned FirstLoadAlign = FirstLoad->getAlignment();
@@ -12023,31 +12646,12 @@ bool DAGCombiner::MergeConsecutiveStores(
// We add +1 here because the LastXXX variables refer to location while
// the NumElem refers to array/index size.
- unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
+ unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
NumElem = std::min(LastLegalType, NumElem);
if (NumElem < 2)
return false;
- // Collect the chains from all merged stores.
- SmallVector<SDValue, 8> MergeStoreChains;
- MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
-
- // The latest Node in the DAG.
- unsigned LatestNodeUsed = 0;
- for (unsigned i=1; i<NumElem; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // latest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
- LatestNodeUsed = i;
-
- MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
- }
-
- LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
-
// Find if it is better to use vectors or integers to load and store
// to memory.
EVT JointMemOpVT;
@@ -12067,8 +12671,9 @@ bool DAGCombiner::MergeConsecutiveStores(
FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), FirstLoadAlign);
- SDValue NewStoreChain =
- DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+
+ AddToWorklist(NewStoreChain.getNode());
SDValue NewStore =
DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
@@ -12081,25 +12686,9 @@ bool DAGCombiner::MergeConsecutiveStores(
SDValue(NewLoad.getNode(), 1));
}
- if (UseAA) {
- // Replace the all stores with the new store.
- for (unsigned i = 0; i < NumElem; ++i)
- CombineTo(StoreNodes[i].MemNode, NewStore);
- } else {
- // Replace the last store with the new store.
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumElem; ++i) {
- // Remove all Store nodes.
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
- }
- }
-
- StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
+ // Replace the all stores with the new store.
+ for (unsigned i = 0; i < NumElem; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
return true;
}
@@ -12256,19 +12845,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-#ifndef NDEBUG
- if (CombinerAAOnlyFunc.getNumOccurrences() &&
- CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
- UseAA = false;
-#endif
- if (UseAA && ST->isUnindexed()) {
- // FIXME: We should do this even without AA enabled. AA will just allow
- // FindBetterChain to work in more situations. The problem with this is that
- // any combine that expects memory operations to be on consecutive chains
- // first needs to be updated to look for users of the same chain.
-
+ if (ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes, on this store and any
// adjacent stores.
if (findBetterNeighborChains(ST)) {
@@ -12302,8 +12879,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SimplifyDemandedBits(
Value,
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits())))
+ ST->getMemoryVT().getScalarSizeInBits()))) {
+ // Re-visit the store if anything changed and the store hasn't been merged
+ // with another node (N is deleted) SimplifyDemandedBits will add Value's
+ // node back to the worklist if necessary, but we also need to re-visit
+ // the Store node itself.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
return SDValue(N, 0);
+ }
}
// If this is a load followed by a store to the same location, then the store
@@ -12347,15 +12931,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
- SmallVector<MemOpLink, 8> StoreNodes;
- bool Changed = MergeConsecutiveStores(ST, StoreNodes);
+ bool Changed = MergeConsecutiveStores(ST);
if (!Changed) break;
-
- if (any_of(StoreNodes,
- [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
- // ST has been merged and no longer exists.
+ // Return N as merge only uses CombineTo and no worklist clean
+ // up is necessary.
+ if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
return SDValue(N, 0);
- }
}
}
@@ -12364,7 +12945,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Make sure to do this only after attempting to merge stores in order to
// avoid changing the types of some subset of stores due to visit order,
// preventing their merging.
- if (isa<ConstantFPSDNode>(Value)) {
+ if (isa<ConstantFPSDNode>(ST->getValue())) {
if (SDValue NewSt = replaceStoreOfFPConstant(ST))
return NewSt;
}
@@ -12493,10 +13074,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
- // If we can't generate a legal BUILD_VECTOR, exit
- if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return SDValue();
-
// Check that we know which element is being inserted
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
@@ -12523,6 +13100,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
}
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
@@ -12544,11 +13125,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// All the operands of BUILD_VECTOR must have the same type;
// we enforce that here.
EVT OpVT = Ops[0].getValueType();
- if (InVal.getValueType() != OpVT)
- InVal = OpVT.bitsGT(InVal.getValueType()) ?
- DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
- DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
- Ops[Elt] = InVal;
+ Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
}
// Return the new vector
@@ -12568,6 +13145,11 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
return SDValue();
+ ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
+ ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ return SDValue();
+
Align = NewAlign;
SDValue NewPtr = OriginalLoad->getBasePtr();
@@ -12639,6 +13221,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
EVT NVT = N->getValueType(0);
+ if (InVec.isUndef())
+ return DAG.getUNDEF(NVT);
+
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
@@ -13022,7 +13607,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
return DAG.getNode(Opcode, DL, VT, BV);
}
-SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
+SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
unsigned LeftIdx) {
@@ -13300,6 +13885,35 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
+ // Check if we can express BUILD VECTOR via subvector extract.
+ if (!LegalTypes && (N->getNumOperands() > 1)) {
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> uint64_t {
+ if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
+ (Op0.getOperand(0) == Op.getOperand(0)))
+ if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return CNode->getZExtValue();
+ return -1;
+ };
+
+ int Offset = checkElem(Op0);
+ for (unsigned i = 0; i < N->getNumOperands(); ++i) {
+ if (Offset + i != checkElem(N->getOperand(i))) {
+ Offset = -1;
+ break;
+ }
+ }
+
+ if ((Offset == 0) &&
+ (Op0.getOperand(0).getValueType() == N->getValueType(0)))
+ return Op0.getOperand(0);
+ if ((Offset != -1) &&
+ ((Offset % N->getValueType(0).getVectorNumElements()) ==
+ 0)) // IDX must be multiple of output size.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
@@ -13491,8 +14105,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
- VT.getSizeInBits() / SclTy.getSizeInBits());
+ unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
+ if (VNTNumElms < 2)
+ return SDValue();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
return SDValue();
@@ -13611,15 +14228,19 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
- if (V->getOpcode() == ISD::CONCAT_VECTORS) {
- // Combine:
- // (extract_subvec (concat V1, V2, ...), i)
- // Into:
- // Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same
- // type.
- if (V->getOperand(0).getValueType() != NVT)
- return SDValue();
+ // Extract from UNDEF is UNDEF.
+ if (V.isUndef())
+ return DAG.getUNDEF(NVT);
+
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same
+ // type.
+ if (V->getOpcode() == ISD::CONCAT_VECTORS &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ V->getOperand(0).getValueType() == NVT) {
unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
@@ -13633,19 +14254,16 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
// Handle only simple case where vector being inserted and vector
- // being extracted are of same type, and are half size of larger vectors.
- EVT BigVT = V->getOperand(0).getValueType();
+ // being extracted are of same size.
EVT SmallVT = V->getOperand(1).getValueType();
- if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ if (!NVT.bitsEq(SmallVT))
return SDValue();
- // Only handle cases where both indexes are constants with the same type.
+ // Only handle cases where both indexes are constants.
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
- if (InsIdx && ExtIdx &&
- InsIdx->getValueType(0).getSizeInBits() <= 64 &&
- ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ if (InsIdx && ExtIdx) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
@@ -13892,6 +14510,113 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
+// Match shuffles that can be converted to any_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
+// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
+SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue N0 = SVN->getOperand(0);
+
+ // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
+ auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
+ // power-of-2 extensions as they are the most likely.
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ if (!isAnyExtend(Scale))
+ continue;
+
+ EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
+ return DAG.getBitcast(VT,
+ DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
+ }
+
+ return SDValue();
+}
+
+// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
+// each source element of a large type into the lowest elements of a smaller
+// destination type. This is often generated during legalization.
+// If the source node itself was a '*_extend_vector_inreg' node then we should
+// then be able to remove it.
+SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ SDValue N0 = SVN->getOperand(0);
+ while (N0.getOpcode() == ISD::BITCAST)
+ N0 = N0.getOperand(0);
+
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ ArrayRef<int> Mask = SVN->getMask();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+
+ // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
+ // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
+ // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
+ auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // At the moment we just handle the case where we've truncated back to the
+ // same size as before the extension.
+ // TODO: handle more extension/truncation cases as cases arise.
+ if (EltSizeInBits != ExtSrcSizeInBits)
+ return SDValue();
+
+ // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
+ // power-of-2 truncations as they are the most likely.
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
+ if (isTruncate(Scale))
+ return DAG.getBitcast(VT, N00);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -13996,6 +14721,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
return S;
+ // Match shuffles that can be converted to any_vector_extend_in_reg.
+ if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+ return V;
+
+ // Combine "truncate_vector_in_reg" style shuffles.
+ if (SDValue V = combineTruncationShuffle(SVN, DAG))
+ return V;
+
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
Level < AfterLegalizeVectorOps &&
(N1.isUndef() ||
@@ -14253,6 +14986,16 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ // If inserting an UNDEF, just return the original vector.
+ if (N1.isUndef())
+ return N0;
+
+ // If this is an insert of an extracted vector into an undef vector, we can
+ // just use the input to the extract.
+ if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
+ return N1.getOperand(0);
+
// Combine INSERT_SUBVECTORs where we are inserting to the same index.
// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
@@ -14262,26 +15005,39 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
N1, N2);
- if (N0.getValueType() != N1.getValueType())
+ if (!isa<ConstantSDNode>(N2))
return SDValue();
+ unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Canonicalize insert_subvector dag nodes.
+ // Example:
+ // (insert_subvector (insert_subvector A, Idx0), Idx1)
+ // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
+ N1.getValueType() == N0.getOperand(1).getValueType() &&
+ isa<ConstantSDNode>(N0.getOperand(2))) {
+ unsigned OtherIdx = cast<ConstantSDNode>(N0.getOperand(2))->getZExtValue();
+ if (InsIdx < OtherIdx) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
+ N0.getOperand(0), N1, N2);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
+ VT, NewOp, N0.getOperand(1), N0.getOperand(2));
+ }
+ }
+
// If the input vector is a concatenation, and the insert replaces
- // one of the halves, we can optimize into a single concat_vectors.
- if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
- N2.getOpcode() == ISD::Constant) {
- APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
+ // one of the pieces, we can optimize into a single concat_vectors.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
+ N0.getOperand(0).getValueType() == N1.getValueType()) {
+ unsigned Factor = N1.getValueType().getVectorNumElements();
- // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
- // (concat_vectors Z, Y)
- if (InsIdx == 0)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
- N0.getOperand(1));
+ SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
+ Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
- // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
- // (concat_vectors X, Z)
- if (InsIdx == VT.getVectorNumElements() / 2)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
- N1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
return SDValue();
@@ -15257,7 +16013,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
if (Base.getOpcode() == ISD::ADD) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
Base = Base.getOperand(0);
- Offset += C->getZExtValue();
+ Offset += C->getSExtValue();
}
}
@@ -15454,6 +16210,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
++Depth;
break;
+ case ISD::CopyFromReg:
+ // Forward past CopyFromReg.
+ Chains.push_back(Chain.getOperand(0));
+ ++Depth;
+ break;
+
default:
// For all other instructions we will just have to take what we can get.
Aliases.push_back(Chain);
@@ -15482,6 +16244,18 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+// This function tries to collect a bunch of potentially interesting
+// nodes to improve the chains of, all at once. This might seem
+// redundant, as this function gets called when visiting every store
+// node, so why not let the work be done on each store as it's visited?
+//
+// I believe this is mainly important because MergeConsecutiveStores
+// is unable to deal with merging stores of different sizes, so unless
+// we improve the chains of all the potential candidates up-front
+// before running MergeConsecutiveStores, it might only see some of
+// the nodes that will eventually be candidates, and then not be able
+// to go from a partially-merged state to the desired final
+// fully-merged state.
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
@@ -15517,10 +16291,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (!Ptr.equalBaseIndex(BasePtr))
break;
- // Find the next memory operand in the chain. If the next operand in the
- // chain is a store then move up and continue the scan with the next
- // memory operand. If the next operand is a load save it and use alias
- // information to check if it interferes with anything.
+ // Walk up the chain to find the next store node, ignoring any
+ // intermediate loads. Any other kind of node will halt the loop.
SDNode *NextInChain = Index->getChain().getNode();
while (true) {
if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
@@ -15539,9 +16311,14 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
Index = nullptr;
break;
}
- }
+ } // end while
}
+ // At this point, ChainedStores lists all of the Store nodes
+ // reachable by iterating up through chain nodes matching the above
+ // conditions. For each such store identified, try to find an
+ // earlier chain to attach the store to which won't violate the
+ // required ordering.
bool MadeChangeToSt = false;
SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e2f33bb433bac..0584ab9f60d1b 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1,4 +1,4 @@
-//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//===- FastISel.cpp - Implementation of the FastISel class ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -39,35 +39,76 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "isel"
@@ -78,21 +119,6 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
"target-specific selector");
STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
-void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS,
- unsigned AttrIdx) {
- IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
- IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
- IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
- IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
- IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
- IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
- IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
- IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
- IsSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
- IsSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
- Alignment = CS->getParamAlignment(AttrIdx);
-}
-
/// Set the current block to which generated machine instructions will be
/// appended, and clear the local CSE map.
void FastISel::startNewBlock() {
@@ -231,17 +257,13 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
// Try to emit the constant by using an integer constant with a cast.
const APFloat &Flt = CF->getValueAPF();
EVT IntVT = TLI.getPointerTy(DL);
-
- uint64_t x[2];
uint32_t IntBitWidth = IntVT.getSizeInBits();
+ APSInt SIntVal(IntBitWidth, /*isUnsigned=*/false);
bool isExact;
- (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
+ (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact);
if (isExact) {
- APInt IntVal(IntBitWidth, x);
-
unsigned IntegerReg =
- getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
if (IntegerReg != 0)
Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
/*Kill=*/false);
@@ -646,7 +668,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::STACKMAP));
for (auto const &MO : Ops)
- MIB.addOperand(MO);
+ MIB.add(MO);
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
@@ -672,10 +694,8 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
Args.reserve(NumArgs);
// Populate the argument list.
- // Attributes for args start at offset 1, after the return attribute.
ImmutableCallSite CS(CI);
- for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
- ArgI != ArgE; ++ArgI) {
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) {
Value *V = CI->getOperand(ArgI);
assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
@@ -683,7 +703,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, AttrI);
+ Entry.setAttributes(&CS, ArgIdx);
Args.push_back(Entry);
}
@@ -826,7 +846,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
TII.get(TargetOpcode::PATCHPOINT));
for (auto &MO : Ops)
- MIB.addOperand(MO);
+ MIB.add(MO);
MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI);
@@ -841,9 +861,9 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
return true;
}
-/// Returns an AttributeSet representing the attributes applied to the return
+/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
-static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
+static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
SmallVector<Attribute::AttrKind, 2> Attrs;
if (CLI.RetSExt)
Attrs.push_back(Attribute::SExt);
@@ -852,8 +872,8 @@ static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
if (CLI.IsInReg)
Attrs.push_back(Attribute::InReg);
- return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
- Attrs);
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
}
bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
@@ -885,9 +905,10 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgI + 1);
+ Entry.setAttributes(&CS, ArgI);
Args.push_back(Entry);
}
+ TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args);
CallLoweringInfo CLI;
CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs);
@@ -1021,7 +1042,7 @@ bool FastISel::lowerCall(const CallInst *CI) {
Entry.Ty = V->getType();
// Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Entry.setAttributes(&CS, i - CS.arg_begin());
Args.push_back(Entry);
}
@@ -1149,7 +1170,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE))
- .addOperand(*Op)
+ .add(*Op)
.addImm(0)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
@@ -1362,7 +1383,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (const auto *Call = dyn_cast<CallInst>(I)) {
const Function *F = Call->getCalledFunction();
- LibFunc::Func Func;
+ LibFunc Func;
// As a special case, don't handle calls to builtin library functions that
// may be translated directly to target instructions.
@@ -1665,7 +1686,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
SkipTargetIndependentISel(SkipTargetIndependentISel) {}
-FastISel::~FastISel() {}
+FastISel::~FastISel() = default;
bool FastISel::fastLowerArguments() { return false; }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4a9042cfb3f44..e85d1951e3aed 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -235,7 +235,6 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
- unsigned NumResults = CountResults(Node);
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
MIB.addReg(VRBase, RegState::Define);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b0028252836ab..fc7cd020fe2e3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1192,8 +1192,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// If the index is dependent on the store we will introduce a cycle when
// creating the load (the load uses the index, and by replacing the chain
- // we will make the index dependent on the load).
- if (SDNode::hasPredecessorHelper(ST, Visited, Worklist))
+ // we will make the index dependent on the load). Also, the store might be
+ // dependent on the extractelement and introduce a cycle when creating
+ // the load.
+ if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
+ ST->hasPredecessor(Op.getNode()))
continue;
StackPtr = ST->getBasePtr();
@@ -1909,8 +1912,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -1935,9 +1938,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
InChain = TCChain;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setTailCall(isTailCall)
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1960,8 +1967,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
for (unsigned i = 0; i != NumOps; ++i) {
Entry.Node = Ops[i];
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -1970,9 +1977,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1994,8 +2004,8 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i);
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -2004,9 +2014,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2081,8 +2094,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
@@ -2090,8 +2103,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = FIPtr;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -2099,9 +2112,12 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(dl)
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2185,24 +2201,24 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
// Pass the argument.
Entry.Node = Node->getOperand(0);
Entry.Ty = RetTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
// Pass the return address of sin.
SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = SinPtr;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
// Also pass the return address of the cos.
SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = CosPtr;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -2210,9 +2226,9 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args));
+ CLI.setDebugLoc(dl).setChain(InChain).setLibCallee(
+ TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee,
+ std::move(Args));
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2529,12 +2545,12 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0);
APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0);
for (unsigned J = 0; J != Sz; J += 8) {
- MaskHi4 = MaskHi4.Or(APInt(Sz, 0xF0ull << J));
- MaskLo4 = MaskLo4.Or(APInt(Sz, 0x0Full << J));
- MaskHi2 = MaskHi2.Or(APInt(Sz, 0xCCull << J));
- MaskLo2 = MaskLo2.Or(APInt(Sz, 0x33ull << J));
- MaskHi1 = MaskHi1.Or(APInt(Sz, 0xAAull << J));
- MaskLo1 = MaskLo1.Or(APInt(Sz, 0x55ull << J));
+ MaskHi4 = MaskHi4 | (0xF0ull << J);
+ MaskLo4 = MaskLo4 | (0x0Full << J);
+ MaskHi2 = MaskHi2 | (0xCCull << J);
+ MaskLo2 = MaskLo2 | (0x33ull << J);
+ MaskHi1 = MaskHi1 | (0xAAull << J);
+ MaskLo1 = MaskLo1 | (0x55ull << J);
}
// BSWAP if the type is wider than a single byte.
@@ -3091,7 +3107,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.getVectorIdxTy(DAG.getDataLayout()))));
}
- Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ Tmp1 = DAG.getBuildVector(VT, dl, Ops);
// We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
Results.push_back(Tmp1);
@@ -3790,8 +3806,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
VT.getScalarType(), Ex, Sh));
}
- SDValue Result =
- DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars);
+
+ SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -3830,10 +3846,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__sync_synchronize",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ .setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -3870,10 +3887,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("abort",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(
+ "abort", TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
Results.push_back(CallResult.second);
@@ -4424,8 +4441,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
NewOps.push_back(Elt);
}
- SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps);
-
+ SDValue NewVec = DAG.getBuildVector(MidVT, SL, NewOps);
Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 72b56d84d9452..6f2b1b94ce465 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -459,7 +459,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
- SoftenFloatResult(Op.getNode(), 0);
+ AddToWorklist(Op.getNode());
}
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
@@ -472,8 +472,6 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
}
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
- Op = GetSoftenedFloat(Op);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
}
@@ -1054,15 +1052,15 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- assert(NVT.getSizeInBits() == integerPartWidth &&
+ assert(NVT.getSizeInBits() == 64 &&
"Do not know how to expand this float constant!");
APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
SDLoc dl(N);
Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
- APInt(integerPartWidth, C.getRawData()[1])),
+ APInt(64, C.getRawData()[1])),
dl, NVT);
Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
- APInt(integerPartWidth, C.getRawData()[0])),
+ APInt(64, C.getRawData()[0])),
dl, NVT);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index dc436ce045142..85068e890756b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -690,7 +690,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
case TargetLowering::TypePromoteInteger:
Res = GetPromotedInteger(InOp);
break;
- case TargetLowering::TypeSplitVector:
+ case TargetLowering::TypeSplitVector: {
EVT InVT = InOp.getValueType();
assert(InVT.isVector() && "Cannot split scalar types");
unsigned NumElts = InVT.getVectorNumElements();
@@ -709,6 +709,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
}
+ case TargetLowering::TypeWidenVector: {
+ SDValue WideInOp = GetWidenedVector(InOp);
+
+ // Truncate widened InOp.
+ unsigned NumElem = WideInOp.getValueType().getVectorNumElements();
+ EVT TruncVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getValueType(0).getScalarType(), NumElem);
+ SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, WideInOp);
+
+ // Zero extend so that the elements are of same type as those of NVT
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), NVT.getVectorElementType(),
+ NumElem);
+ SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc);
+
+ // Extract the low NVT subvector.
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx);
+ }
+ }
// Truncate to NVT instead of VT
return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
@@ -1089,6 +1109,10 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
SDValue Cond = N->getOperand(0);
EVT OpTy = N->getOperand(1).getValueType();
+ if (N->getOpcode() == ISD::VSELECT)
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ return Res;
+
// Promote all the way up to the canonical SetCC type.
EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
Cond = PromoteTargetBoolean(Cond, OpVT);
@@ -2586,24 +2610,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.isSExt = true;
- Entry.isZExt = false;
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
Args.push_back(Entry);
}
// Also pass the address of the overflow check.
Entry.Node = Temp;
Entry.Ty = PtrTy->getPointerTo();
- Entry.isSExt = true;
- Entry.isZExt = false;
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
Args.push_back(Entry);
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
- .setSExtResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
+ .setSExtResult();
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index cf19d75676cda..0a2b680e1c66e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -199,8 +199,7 @@ bool DAGTypeLegalizer::run() {
// non-leaves.
for (SDNode &Node : DAG.allnodes()) {
if (Node.getNumOperands() == 0) {
- Node.setNodeId(ReadyToProcess);
- Worklist.push_back(&Node);
+ AddToWorklist(&Node);
} else {
Node.setNodeId(Unanalyzed);
}
@@ -331,6 +330,12 @@ ScanOperands:
// to the worklist etc.
if (NeedsReanalyzing) {
assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+
+ // Remove any result values from SoftenedFloats as N will be revisited
+ // again.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i)
+ SoftenedFloats.erase(SDValue(N, i));
+
N->setNodeId(NewNode);
// Recompute the NodeId and correct processed operands, adding the node to
// the worklist if ready.
@@ -749,6 +754,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
// new uses of From due to CSE. If this happens, replace the new uses of
// From with To.
} while (!From.use_empty());
+
+ SoftenedFloats.erase(From);
}
void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
@@ -1077,8 +1084,8 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i);
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -1087,9 +1094,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ec55662d75c0b..80c939700518f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -191,6 +191,11 @@ private:
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
SDValue &Lo, SDValue &Hi);
+ void AddToWorklist(SDNode *N) {
+ N->setNodeId(ReadyToProcess);
+ Worklist.push_back(N);
+ }
+
//===--------------------------------------------------------------------===//
// Integer Promotion Support: LegalizeIntegerTypes.cpp
//===--------------------------------------------------------------------===//
@@ -597,6 +602,7 @@ private:
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
+ SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
@@ -672,6 +678,7 @@ private:
SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
@@ -713,6 +720,7 @@ private:
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVSELECTAndMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
@@ -782,6 +790,13 @@ private:
/// By default, the vector will be widened with undefined values.
SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
+ /// Return a mask of vector type MaskVT to replace InMask. Also adjust
+ /// MaskVT to ToMaskVT if needed with vector extension or truncation.
+ SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);
+
+ /// Get the target mask VT, and widen if needed.
+ EVT getSETCCWidenedResultTy(SDValue SetCC);
+
//===--------------------------------------------------------------------===//
// Generic Splitting: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 3682c32460c66..c02b8960b36cb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -512,8 +512,24 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
GetSplitOp(Op, Lo, Hi);
}
-void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Split the inputs.
+ SDValue Lo, Hi, LL, LH, RL, RH;
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+
+ return std::make_pair(Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LL, LH, RL, RH, CL, CH;
SDLoc dl(N);
GetSplitOp(N->getOperand(1), LL, LH);
@@ -522,9 +538,16 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
SDValue Cond = N->getOperand(0);
CL = CH = Cond;
if (Cond.getValueType().isVector()) {
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
+ // It seems to improve code to generate two narrow SETCCs as opposed to
+ // splitting a wide result vector.
+ else if (Cond.getOpcode() == ISD::SETCC)
+ std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG);
// Check if there are already splitted versions of the vector available and
// use those instead of splitting the mask operand again.
- if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector)
+ else if (getTypeAction(Cond.getValueType()) ==
+ TargetLowering::TypeSplitVector)
GetSplitVector(Cond, CL, CH);
else
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index d4fa20f35274c..5f167f8de1cfc 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,7 @@ class VectorLegalizer {
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandFSUB(SDValue Op);
SDValue ExpandBITREVERSE(SDValue Op);
SDValue ExpandCTLZ(SDValue Op);
SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
@@ -621,8 +622,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Op.getNode()->getValueType(0), Vals);
+ Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
} else {
SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
@@ -692,6 +692,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandUINT_TO_FLOAT(Op);
case ISD::FNEG:
return ExpandFNEG(Op);
+ case ISD::FSUB:
+ return ExpandFSUB(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
case ISD::BITREVERSE:
@@ -720,8 +722,6 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
assert(VT.isVector() && !Mask.getValueType().isVector()
&& Op1.getValueType() == Op2.getValueType() && "Invalid type");
- unsigned NumElem = VT.getVectorNumElements();
-
// If we can't even use the basic vector operations of
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
@@ -745,8 +745,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
DAG.getConstant(0, DL, BitTy));
// Broadcast the mask so that the entire vector is all-one or all zero.
- SmallVector<SDValue, 8> Ops(NumElem, Mask);
- Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops);
+ Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
@@ -1025,6 +1024,18 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
+ // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
+ // we can defer this to operation legalization where it will be lowered as
+ // a+(-b).
+ EVT VT = Op.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FADD, VT))
+ return Op; // Defer to LegalizeDAG
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
@@ -1102,7 +1113,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
(EltVT.getSizeInBits()), dl, EltVT),
DAG.getConstant(0, dl, EltVT));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6906f67ebacb6..78fddb5ce8f58 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -65,6 +65,11 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ R = ScalarizeVecRes_VecInregOp(N);
+ break;
case ISD::ANY_EXTEND:
case ISD::BITREVERSE:
case ISD::BSWAP:
@@ -97,6 +102,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
+ case ISD::FCANONICALIZE:
R = ScalarizeVecRes_UnaryOp(N);
break;
@@ -257,6 +263,34 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
LHS, DAG.getValueType(ExtVT));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+
+ EVT OpVT = Op.getValueType();
+ EVT OpEltVT = OpVT.getVectorElementType();
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ switch (N->getOpcode()) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op);
+ }
+
+ llvm_unreachable("Illegal extend_vector_inreg opcode");
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
// If the operand is wider than the vector element type then it is implicitly
// truncated. Make that explicit here.
@@ -486,7 +520,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Ops(N->getNumOperands());
for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
Ops[i] = GetScalarizedVector(N->getOperand(i));
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops);
+ return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
}
/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
@@ -637,6 +671,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
+ case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -781,10 +816,10 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
unsigned LoNumElts = LoVT.getVectorNumElements();
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
- Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps);
+ Lo = DAG.getBuildVector(LoVT, dl, LoOps);
SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
- Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps);
+ Hi = DAG.getBuildVector(HiVT, dl, HiOps);
}
void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
@@ -928,7 +963,12 @@ void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
SDLoc dl(N);
SDValue InLo, InHi;
- GetSplitVector(N0, InLo, InHi);
+
+ if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N0, InLo, InHi);
+ else
+ std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0);
+
EVT InLoVT = InLo.getValueType();
unsigned InNumElements = InLoVT.getVectorNumElements();
@@ -1372,7 +1412,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
}
// Construct the Lo/Hi output using a BUILD_VECTOR.
- Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps);
+ Output = DAG.getBuildVector(NewVT, dl, SVOps);
} else if (InputUsed[0] == -1U) {
// No input vectors were used! The result is undefined.
Output = DAG.getUNDEF(NewVT);
@@ -1466,8 +1506,15 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::FTRUNC:
+ case ISD::FCANONICALIZE:
Res = SplitVecOp_UnaryOp(N);
break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = SplitVecOp_ExtVecInRegOp(N);
+ break;
}
}
@@ -1615,7 +1662,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VecVT.getVectorNumElements());
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps);
+ Vec = DAG.getBuildVector(VecVT, dl, ElementOps);
}
// Store the vector to the stack.
@@ -1629,6 +1676,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
MachinePointerInfo(), EltVT);
}
+SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
+ SDValue Lo, Hi;
+
+ // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so
+ // splitting the result has the same effect as splitting the input operand.
+ SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
unsigned OpNo) {
EVT LoVT, HiVT;
@@ -1881,7 +1938,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
+ return DAG.getBuildVector(N->getValueType(0), DL, Elts);
}
SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
@@ -2323,6 +2380,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
+ if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
+ // If both input and result vector types are of same width, extend
+ // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
+ // accepts fewer elements in the result than in the input.
+ if (Opcode == ISD::SIGN_EXTEND)
+ return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
+ if (Opcode == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ }
}
if (TLI.isTypeLegal(InWidenVT)) {
@@ -2375,7 +2441,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
@@ -2430,7 +2496,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
while (Ops.size() != WidenNumElts)
Ops.push_back(DAG.getUNDEF(WidenSVT));
- return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
@@ -2593,7 +2659,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps);
+ return DAG.getBuildVector(WidenVT, dl, NewOps);
}
SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
@@ -2663,7 +2729,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
@@ -2704,7 +2770,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
@@ -2814,6 +2880,212 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
WidenVT, N->getOperand(0));
}
+// Return true if this is a node that could have two SETCCs as operands.
+static inline bool isLogicalMaskOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return true;
+ }
+ return false;
+}
+
+// This is used just for the assert in convertMask(). Check that this either
+// a SETCC or a previously handled SETCC by convertMask().
+#ifndef NDEBUG
+static inline bool isSETCCorConvertedSETCC(SDValue N) {
+ if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::CONCAT_VECTORS) {
+ for (unsigned i = 1; i < N->getNumOperands(); ++i)
+ if (!N->getOperand(i)->isUndef())
+ return false;
+ N = N.getOperand(0);
+ }
+
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::SIGN_EXTEND)
+ N = N.getOperand(0);
+
+ return (N.getOpcode() == ISD::SETCC);
+}
+#endif
+
+// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT
+// to ToMaskVT if needed with vector extension or truncation.
+SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
+ EVT ToMaskVT) {
+ LLVMContext &Ctx = *DAG.getContext();
+
+ // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
+ unsigned InMaskOpc = InMask->getOpcode();
+ assert((InMaskOpc == ISD::SETCC ||
+ (isLogicalMaskOp(InMaskOpc) &&
+ isSETCCorConvertedSETCC(InMask->getOperand(0)) &&
+ isSETCCorConvertedSETCC(InMask->getOperand(1)))) &&
+ "Unexpected mask argument.");
+
+ // Make a new Mask node, with a legal result VT.
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i < InMask->getNumOperands(); ++i)
+ Ops.push_back(InMask->getOperand(i));
+ SDValue Mask = DAG.getNode(InMaskOpc, SDLoc(InMask), MaskVT, Ops);
+
+ // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
+ // extend or truncate is needed.
+ unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
+ unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
+ if (MaskScalarBits < ToMaskScalBits) {
+ EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask);
+ } else if (MaskScalarBits > ToMaskScalBits) {
+ EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask);
+ }
+
+ assert(Mask->getValueType(0).getScalarSizeInBits() ==
+ ToMaskVT.getScalarSizeInBits() &&
+ "Mask should have the right element size by now.");
+
+ // Adjust Mask to the right number of elements.
+ unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
+ if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy);
+ Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask,
+ ZeroIdx);
+ } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
+ unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
+ EVT SubVT = Mask->getValueType(0);
+ SmallVector<SDValue, 16> SubConcatOps(NumSubVecs);
+ SubConcatOps[0] = Mask;
+ for (unsigned i = 1; i < NumSubVecs; ++i)
+ SubConcatOps[i] = DAG.getUNDEF(SubVT);
+ Mask =
+ DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps);
+ }
+
+ assert((Mask->getValueType(0) == ToMaskVT) &&
+ "A mask of ToMaskVT should have been produced by now.");
+
+ return Mask;
+}
+
+// Get the target mask VT, and widen if needed.
+EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) {
+ assert(SetCC->getOpcode() == ISD::SETCC);
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType());
+ if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT);
+ return MaskVT;
+}
+
+// This method tries to handle VSELECT and its mask by legalizing operands
+// (which may require widening) and if needed adjusting the mask vector type
+// to match that of the VSELECT. Without it, many cases end up with
+// scalarization of the SETCC, with many unnecessary instructions.
+SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Cond = N->getOperand(0);
+
+ if (N->getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
+ return SDValue();
+
+ // If this is a splitted VSELECT that was previously already handled, do
+ // nothing.
+ if (Cond->getValueType(0).getScalarSizeInBits() != 1)
+ return SDValue();
+
+ EVT VSelVT = N->getValueType(0);
+ // Only handle vector types which are a power of 2.
+ if (!isPowerOf2_64(VSelVT.getSizeInBits()))
+ return SDValue();
+
+ // Don't touch if this will be scalarized.
+ EVT FinalVT = VSelVT;
+ while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+ FinalVT = EVT::getVectorVT(Ctx, FinalVT.getVectorElementType(),
+ FinalVT.getVectorNumElements() / 2);
+ if (FinalVT.getVectorNumElements() == 1)
+ return SDValue();
+
+ // If there is support for an i1 vector mask, don't touch.
+ if (Cond.getOpcode() == ISD::SETCC) {
+ EVT SetCCOpVT = Cond->getOperand(0).getValueType();
+ while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
+ SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
+ EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
+ if (SetCCResVT.getScalarSizeInBits() == 1)
+ return SDValue();
+ }
+
+ // Get the VT and operands for VSELECT, and widen if needed.
+ SDValue VSelOp1 = N->getOperand(1);
+ SDValue VSelOp2 = N->getOperand(2);
+ if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) {
+ VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
+ VSelOp1 = GetWidenedVector(VSelOp1);
+ VSelOp2 = GetWidenedVector(VSelOp2);
+ }
+
+ // The mask of the VSELECT should have integer elements.
+ EVT ToMaskVT = VSelVT;
+ if (!ToMaskVT.getScalarType().isInteger())
+ ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
+
+ SDValue Mask;
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT MaskVT = getSETCCWidenedResultTy(Cond);
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else if (isLogicalMaskOp(Cond->getOpcode()) &&
+ Cond->getOperand(0).getOpcode() == ISD::SETCC &&
+ Cond->getOperand(1).getOpcode() == ISD::SETCC) {
+ // Cond is (AND/OR/XOR (SETCC, SETCC))
+ SDValue SETCC0 = Cond->getOperand(0);
+ SDValue SETCC1 = Cond->getOperand(1);
+ EVT VT0 = getSETCCWidenedResultTy(SETCC0);
+ EVT VT1 = getSETCCWidenedResultTy(SETCC1);
+ unsigned ScalarBits0 = VT0.getScalarSizeInBits();
+ unsigned ScalarBits1 = VT1.getScalarSizeInBits();
+ unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
+ EVT MaskVT;
+ // If the two SETCCs have different VTs, either extend/truncate one of
+ // them to the other "towards" ToMaskVT, or truncate one and extend the
+ // other to ToMaskVT.
+ if (ScalarBits0 != ScalarBits1) {
+ EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1);
+ EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0);
+ if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits())
+ MaskVT = WideVT;
+ else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits())
+ MaskVT = NarrowVT;
+ else
+ MaskVT = ToMaskVT;
+ } else
+ // If the two SETCCs have the same VT, don't change it.
+ MaskVT = VT0;
+
+ // Make new SETCCs and logical nodes.
+ SETCC0 = convertMask(SETCC0, VT0, MaskVT);
+ SETCC1 = convertMask(SETCC1, VT1, MaskVT);
+ Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1);
+
+ // Convert the logical op for VSELECT if needed.
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else
+ return SDValue();
+
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -2821,6 +3093,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
if (CondVT.isVector()) {
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ return Res;
+
EVT CondEltVT = CondVT.getVectorElementType();
EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
CondEltVT, WidenNumElts);
@@ -3093,7 +3368,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
@@ -3144,7 +3419,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
@@ -3565,10 +3840,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
for (; i != WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, dl, Ops);
}
-
void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// The strategy assumes that we can efficiently store power-of-two widths.
@@ -3737,5 +4011,5 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
DAG.getUNDEF(EltVT);
for ( ; Idx < WidenNumElts; ++Idx)
Ops[Idx] = FillVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
+ return DAG.getBuildVector(NVT, dl, Ops);
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index ded8e68fcbce5..a1d70ab6f036f 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -57,10 +57,8 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
RegPressure.resize(NumRC);
std::fill(RegLimit.begin(), RegLimit.end(), 0);
std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end();
- I != E; ++I)
- RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
ParallelLiveRanges = 0;
HorizontalVerticalBalance = 0;
@@ -364,16 +362,11 @@ int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
return RegBalance;
if (RawPressure) {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
+ for (const TargetRegisterClass *RC : TRI->regclasses())
RegBalance += rawRegPressureDelta(SU, RC->getID());
- }
}
else {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
if ((RegPressure[RC->getID()] +
rawRegPressureDelta(SU, RC->getID()) > 0) &&
(RegPressure[RC->getID()] +
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 3549ccd9e345b..e923e30e50377 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -422,11 +422,9 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
}
// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
if (N->isMachineOpcode()) {
- if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameDestroyOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
++NestLevel;
- } else if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameSetupOpcode()) {
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
if (NestLevel == 0)
return false;
--NestLevel;
@@ -480,12 +478,10 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
}
// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
if (N->isMachineOpcode()) {
- if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameDestroyOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
++NestLevel;
MaxNest = std::max(MaxNest, NestLevel);
- } else if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameSetupOpcode()) {
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
assert(NestLevel != 0);
--NestLevel;
if (NestLevel == 0)
@@ -550,7 +546,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
if (!LiveRegDefs[CallResource])
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
if (Node->isMachineOpcode() &&
- Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
unsigned NestLevel = 0;
unsigned MaxNest = 0;
SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
@@ -755,7 +751,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
- SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
LiveRegDefs[CallResource] = nullptr;
@@ -826,7 +822,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
- SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
++NumLiveRegs;
LiveRegDefs[CallResource] = SU;
LiveRegGens[CallResource] = CallSeqEndForStart[SU];
@@ -839,7 +835,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
- SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
LiveRegDefs[CallResource] = nullptr;
@@ -1305,7 +1301,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// If we're in the middle of scheduling a call, don't begin scheduling
// another call. Also, don't allow any physical registers to be live across
// the call.
- if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ if ((Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) ||
+ (Node->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
// Check the special calling-sequence resource.
unsigned CallResource = TRI->getNumRegs();
if (LiveRegDefs[CallResource]) {
@@ -1659,9 +1656,8 @@ public:
RegPressure.resize(NumRC);
std::fill(RegLimit.begin(), RegLimit.end(), 0);
std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
}
}
@@ -1788,7 +1784,7 @@ public:
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- void dump(ScheduleDAG *DAG) const override {
+ LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
// Emulate pop() without clobbering NodeQueueIds.
std::vector<SUnit*> DumpQueue = Queue;
SF DumpPicker = Picker;
@@ -1924,19 +1920,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
// Register Pressure Tracking
//===----------------------------------------------------------------------===//
-void RegReductionPQBase::dumpRegPressure() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
+LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
unsigned Id = RC->getID();
unsigned RP = RegPressure[Id];
if (!RP) continue;
DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
<< RegLimit[Id] << '\n');
}
-#endif
}
+#endif
bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
if (!TLI)
@@ -2092,7 +2086,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
RegPressure[RCId] -= Cost;
}
}
- dumpRegPressure();
+ DEBUG(dumpRegPressure());
}
void RegReductionPQBase::unscheduledNode(SUnit *SU) {
@@ -2172,7 +2166,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
}
}
- dumpRegPressure();
+ DEBUG(dumpRegPressure());
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 3be622f8c179a..3c8526ebb7029 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -650,6 +650,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+ // Cannot completely remove virtual function even in release mode.
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";
@@ -704,8 +705,8 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
if (!N->getHasDebugValue())
return;
- // Opportunistically insert immediate dbg_value uses, i.e. those with source
- // order number right after the N.
+ // Opportunistically insert immediate dbg_value uses, i.e. those with the same
+ // source order number as N.
MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
@@ -713,7 +714,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
if (DVs[i]->isInvalidated())
continue;
unsigned DVOrder = DVs[i]->getOrder();
- if (!Order || DVOrder == ++Order) {
+ if (!Order || DVOrder == Order) {
MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
if (DbgMI) {
Orders.push_back(std::make_pair(DVOrder, DbgMI));
@@ -835,8 +836,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
SDNode *N = GluedNodes.back();
- Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
- VRBaseMap);
+ Emitter.EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e225ba8703b77..003ea5030bfce 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -289,28 +289,28 @@ static int isSignedOp(ISD::CondCode Opcode) {
}
ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool isInteger) {
- if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ bool IsInteger) {
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed integer setcc with an unsigned integer setcc.
return ISD::SETCC_INVALID;
unsigned Op = Op1 | Op2; // Combine all of the condition bits.
- // If the N and U bits get set then the resultant comparison DOES suddenly
- // care about orderedness, and is true when ordered.
+ // If the N and U bits get set, then the resultant comparison DOES suddenly
+ // care about orderedness, and it is true when ordered.
if (Op > ISD::SETTRUE2)
Op &= ~16; // Clear the U bit if the N bit is set.
// Canonicalize illegal integer setcc's.
- if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
Op = ISD::SETNE;
return ISD::CondCode(Op);
}
ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool isInteger) {
- if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ bool IsInteger) {
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed setcc with an unsigned setcc.
return ISD::SETCC_INVALID;
@@ -318,7 +318,7 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
// Canonicalize illegal integer setcc's.
- if (isInteger) {
+ if (IsInteger) {
switch (Result) {
default: break;
case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
@@ -871,11 +871,13 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
DbgInfo = new SDDbgInfo();
}
-void SelectionDAG::init(MachineFunction &mf) {
- MF = &mf;
+void SelectionDAG::init(MachineFunction &NewMF,
+ OptimizationRemarkEmitter &NewORE) {
+ MF = &NewMF;
+ ORE = &NewORE;
TLI = getSubtarget().getTargetLowering();
TSI = getSubtarget().getSelectionDAGInfo();
- Context = &mf.getFunction()->getContext();
+ Context = &MF->getFunction()->getContext();
}
SelectionDAG::~SelectionDAG() {
@@ -1994,8 +1996,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
/// us to only collect the known bits that are shared by the requested vector
/// elements.
-/// TODO: We only support DemandedElts on a few opcodes so far, the remainder
-/// should be added when they become necessary.
void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
APInt &KnownOne, const APInt &DemandedElts,
unsigned Depth) const {
@@ -2251,10 +2251,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero2.countLeadingOnes(),
BitWidth) - BitWidth;
- TrailZ = std::min(TrailZ, BitWidth);
- LeadZ = std::min(LeadZ, BitWidth);
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
- APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero.clearAllBits();
+ KnownZero.setLowBits(std::min(TrailZ, BitWidth));
+ KnownZero.setHighBits(std::min(LeadZ, BitWidth));
break;
}
case ISD::UDIV: {
@@ -2272,7 +2271,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
- KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero.setHighBits(LeadZ);
break;
}
case ISD::SELECT:
@@ -2297,10 +2296,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
- case ISD::SADDO:
- case ISD::UADDO:
- case ISD::SSUBO:
- case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
@@ -2312,14 +2307,14 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ KnownZero.setBitsFrom(1);
break;
case ISD::SETCC:
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ KnownZero.setBitsFrom(1);
break;
case ISD::SHL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
@@ -2328,7 +2323,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero = KnownZero << *ShAmt;
KnownOne = KnownOne << *ShAmt;
// Low bits are known zero.
- KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt->getZExtValue());
+ KnownZero.setLowBits(ShAmt->getZExtValue());
}
break;
case ISD::SRL:
@@ -2338,8 +2333,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero = KnownZero.lshr(*ShAmt);
KnownOne = KnownOne.lshr(*ShAmt);
// High bits are known zero.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
- KnownZero |= HighBits;
+ KnownZero.setHighBits(ShAmt->getZExtValue());
}
break;
case ISD::SRA:
@@ -2350,13 +2344,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne = KnownOne.lshr(*ShAmt);
// If we know the value of the sign bit, then we know it is copied across
// the high bits by the shift amount.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
APInt SignBit = APInt::getSignBit(BitWidth);
SignBit = SignBit.lshr(*ShAmt); // Adjust to where it is now in the mask.
if (KnownZero.intersects(SignBit)) {
- KnownZero |= HighBits; // New bits are known zero.
+ KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
} else if (KnownOne.intersects(SignBit)) {
- KnownOne |= HighBits; // New bits are known one.
+ KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
}
}
break;
@@ -2401,9 +2394,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP: {
- unsigned LowBits = Log2_32(BitWidth)+1;
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
- KnownOne.clearAllBits();
+ KnownZero.setBitsFrom(Log2_32(BitWidth)+1);
break;
}
case ISD::LOAD: {
@@ -2412,26 +2403,39 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ KnownZero.setBitsFrom(MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
if (LD->getExtensionType() == ISD::NON_EXTLOAD)
computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne);
}
break;
}
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarSizeInBits();
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,
+ DemandedElts.zext(InVT.getVectorNumElements()),
+ Depth + 1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero.setBitsFrom(InBits);
+ break;
+ }
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
Depth + 1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
- KnownZero |= NewBits;
+ KnownZero.setBitsFrom(InBits);
break;
}
+ // TODO ISD::SIGN_EXTEND_VECTOR_INREG
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
@@ -2478,10 +2482,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
case ISD::FGETSIGN:
// All bits are zero except the low bit.
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ KnownZero.setBitsFrom(1);
break;
-
- case ISD::SUB: {
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero.setBitsFrom(1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::SUB:
+ case ISD::SUBC: {
if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
// We know that the top bits of C-X are clear if X contains less bits
// than C (i.e. no wrap-around can happen). For example, 20-X is
@@ -2499,13 +2514,40 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if ((KnownZero2 & MaskV) == MaskV) {
unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
// Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+ KnownZero.setHighBits(NLZ2);
}
}
}
- LLVM_FALLTHROUGH;
+
+ // If low bits are know to be zero in both operands, then we know they are
+ // going to be 0 in the result. Both addition and complement operations
+ // preserve the low zero bits.
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+ unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
+ if (KnownZeroLow == 0)
+ break;
+
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+ KnownZeroLow = std::min(KnownZeroLow,
+ KnownZero2.countTrailingOnes());
+ KnownZero.setBits(0, KnownZeroLow);
+ break;
}
+ case ISD::UADDO:
+ case ISD::SADDO:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero.setBitsFrom(1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ISD::ADD:
+ case ISD::ADDC:
case ISD::ADDE: {
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
@@ -2526,19 +2568,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZeroLow = std::min(KnownZeroLow,
KnownZero2.countTrailingOnes());
- if (Opcode == ISD::ADD) {
- KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
- if (KnownZeroHigh > 1)
- KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
+ if (Opcode == ISD::ADDE) {
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear.
+ // We then return to the caller that the low bit is unknown but that
+ // other bits are known zero.
+ if (KnownZeroLow >= 2)
+ KnownZero.setBits(1, KnownZeroLow);
break;
}
- // With ADDE, a carry bit may be added in, so we can only use this
- // information if we know (at least) that the low two bits are clear. We
- // then return to the caller that the low bit is unknown but that other bits
- // are known zero.
- if (KnownZeroLow >= 2) // ADDE
- KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow);
+ KnownZero.setLowBits(KnownZeroLow);
+ if (KnownZeroHigh > 1)
+ KnownZero.setHighBits(KnownZeroHigh - 1);
break;
}
case ISD::SREM:
@@ -2591,7 +2633,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
KnownOne.clearAllBits();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+ KnownZero.clearAllBits();
+ KnownZero.setHighBits(Leaders);
break;
}
case ISD::EXTRACT_ELEMENT: {
@@ -2673,6 +2716,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
break;
}
+ case ISD::BITREVERSE: {
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+ KnownZero = KnownZero2.reverseBits();
+ KnownOne = KnownOne2.reverseBits();
+ break;
+ }
case ISD::BSWAP: {
computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
Depth + 1);
@@ -2680,12 +2730,62 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne = KnownOne2.byteSwap();
break;
}
- case ISD::SMIN:
- case ISD::SMAX:
- case ISD::UMIN:
+ case ISD::ABS: {
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+
+ // If the source's MSB is zero then we know the rest of the bits already.
+ if (KnownZero2[BitWidth - 1]) {
+ KnownZero = KnownZero2;
+ KnownOne = KnownOne2;
+ break;
+ }
+
+ // We only know that the absolute values's MSB will be zero iff there is
+ // a set bit that isn't the sign bit (otherwise it could be INT_MIN).
+ KnownOne2.clearBit(BitWidth - 1);
+ if (KnownOne2.getBoolValue()) {
+ KnownZero = APInt::getSignBit(BitWidth);
+ break;
+ }
+ break;
+ }
+ case ISD::UMIN: {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+
+ // UMIN - we know that the result will have the maximum of the
+ // known zero leading bits of the inputs.
+ unsigned LeadZero = KnownZero.countLeadingOnes();
+ LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes());
+
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ KnownZero.setHighBits(LeadZero);
+ break;
+ }
case ISD::UMAX: {
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
Depth + 1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+
+ // UMAX - we know that the result will have the maximum of the
+ // known one leading bits of the inputs.
+ unsigned LeadOne = KnownOne.countLeadingOnes();
+ LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes());
+
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ KnownOne.setHighBits(LeadOne);
+ break;
+ }
+ case ISD::SMIN:
+ case ISD::SMAX: {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
// If we don't know any bits, early out.
if (!KnownOne && !KnownZero)
break;
@@ -2699,7 +2799,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
// The low bits are known zero if the pointer is aligned.
- KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ KnownZero.setLowBits(Log2_32(Align));
break;
}
break;
@@ -2712,13 +2812,48 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
// Allow the target to implement this method for its nodes.
- TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
+ TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, DemandedElts,
+ *this, Depth);
break;
}
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}
+SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
+ SDValue N1) const {
+ // X + 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ APInt N1Zero, N1One;
+ computeKnownBits(N1, N1Zero, N1One);
+ if (N1Zero.getBoolValue()) {
+ APInt N0Zero, N0One;
+ computeKnownBits(N0, N0Zero, N0One);
+
+ bool overflow;
+ (~N0Zero).uadd_ov(~N1Zero, overflow);
+ if (!overflow)
+ return OFK_Never;
+ }
+
+ // mulhi + 1 never overflow
+ if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
+ (~N1Zero & 0x01) == ~N1Zero)
+ return OFK_Never;
+
+ if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
+ APInt N0Zero, N0One;
+ computeKnownBits(N0, N0Zero, N0One);
+
+ if ((~N0Zero & 0x01) == ~N0Zero)
+ return OFK_Never;
+ }
+
+ return OFK_Sometime;
+}
+
bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
EVT OpVT = Val.getValueType();
unsigned BitWidth = OpVT.getScalarSizeInBits();
@@ -2745,7 +2880,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// Are all operands of a build vector constant powers of two?
if (Val.getOpcode() == ISD::BUILD_VECTOR)
- if (llvm::all_of(Val->ops(), [this, BitWidth](SDValue E) {
+ if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
return false;
@@ -2764,6 +2899,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return ComputeNumSignBits(Op, DemandedElts, Depth);
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
unsigned VTBits = VT.getScalarSizeInBits();
unsigned Tmp, Tmp2;
@@ -2772,6 +2916,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
if (Depth == 6)
return 1; // Limit search depth.
+ if (!DemandedElts)
+ return 1; // No demanded elts, better to assume we don't know anything.
+
switch (Op.getOpcode()) {
default: break;
case ISD::AssertSext:
@@ -2786,7 +2933,28 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return Val.getNumSignBits();
}
+ case ISD::BUILD_VECTOR:
+ Tmp = VTBits;
+ for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ SDValue SrcOp = Op.getOperand(i);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
+
+ // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+ if (SrcOp.getValueSizeInBits() != VTBits) {
+ assert(SrcOp.getValueSizeInBits() > VTBits &&
+ "Expected BUILD_VECTOR implicit truncation");
+ unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
+ Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
+ }
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ return Tmp;
+
case ISD::SIGN_EXTEND:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
@@ -2799,7 +2967,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return std::max(Tmp, Tmp2);
case ISD::SRA:
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
// SRA X, C -> adds C sign bits.
if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
APInt ShiftVal = C->getAPIntValue();
@@ -2887,6 +3055,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
}
break;
case ISD::ADD:
+ case ISD::ADDC:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
@@ -2961,19 +3130,63 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
// result. Otherwise it gives either negative or > bitwidth result
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue InVal = Op.getOperand(1);
+ SDValue EltNo = Op.getOperand(2);
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+
+ ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element.
+ unsigned EltIdx = CEltNo->getZExtValue();
+
+ // If we demand the inserted element then get its sign bits.
+ Tmp = UINT_MAX;
+ if (DemandedElts[EltIdx])
+ Tmp = ComputeNumSignBits(InVal, Depth + 1);
+
+ // If we demand the source vector then get its sign bits, and determine
+ // the minimum.
+ APInt VectorElts = DemandedElts;
+ VectorElts.clearBit(EltIdx);
+ if (!!VectorElts) {
+ Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ } else {
+ // Unknown element index, so ignore DemandedElts and demand them all.
+ Tmp = ComputeNumSignBits(InVec, Depth + 1);
+ Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
case ISD::EXTRACT_VECTOR_ELT: {
- // At the moment we keep this simple and skip tracking the specific
- // element. This way we get the lowest common denominator for all elements
- // of the vector.
- // TODO: get information for given vector element
+ SDValue InVec = Op.getOperand(0);
+ SDValue EltNo = Op.getOperand(1);
+ EVT VecVT = InVec.getValueType();
const unsigned BitWidth = Op.getValueSizeInBits();
const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+
// If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
// anything about sign bits. But if the sizes match we can derive knowledge
// about sign bits from the vector operand.
- if (BitWidth == EltBitWidth)
- return ComputeNumSignBits(Op.getOperand(0), Depth+1);
- break;
+ if (BitWidth != EltBitWidth)
+ break;
+
+ // If we know the element index, just demand that vector element, else for
+ // an unknown element index, ignore DemandedElts and demand them all.
+ APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts =
+ APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
+
+ return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
}
case ISD::EXTRACT_SUBVECTOR:
return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@@ -3008,14 +3221,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) {
- unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth);
- if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ unsigned NumBits =
+ TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
+ if (NumBits > 1)
+ FirstAnswer = std::max(FirstAnswer, NumBits);
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
APInt KnownZero, KnownOne;
- computeKnownBits(Op, KnownZero, KnownOne, Depth);
+ computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
APInt Mask;
if (KnownZero.isNegative()) { // sign bit is 0
@@ -3054,6 +3269,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
if (getTarget().Options.NoNaNsFPMath)
return true;
+ if (const BinaryWithFlagsSDNode *BF = dyn_cast<BinaryWithFlagsSDNode>(Op))
+ return BF->Flags.hasNoNaNs();
+
// If the value is a constant, we can obviously see if it is a NaN or not.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->getValueAPF().isNaN();
@@ -3206,6 +3424,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
break;
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BITREVERSE:
+ return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
case ISD::BSWAP:
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
@@ -3220,6 +3444,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
+ case ISD::FP16_TO_FP: {
+ bool Ignored;
+ APFloat FPV(APFloat::IEEEhalf(),
+ (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
+
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)FPV.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstantFP(FPV, DL, VT);
+ }
}
}
@@ -3261,17 +3496,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
- integerPart x[2];
bool ignored;
- static_assert(integerPartWidth >= 64, "APFloat parts too small!");
+ APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
// FIXME need to be more flexible about rounding mode.
- APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
- Opcode==ISD::FP_TO_SINT,
- APFloat::rmTowardZero, &ignored);
- if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ APFloat::opStatus s =
+ V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
+ if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
break;
- APInt api(VT.getSizeInBits(), x);
- return getConstant(api, DL, VT);
+ return getConstant(IntVal, DL, VT);
}
case ISD::BITCAST:
if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
@@ -3281,6 +3513,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
break;
+ case ISD::FP_TO_FP16: {
+ bool Ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(APFloat::IEEEhalf(),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstant(V.bitcastToAPInt(), DL, VT);
+ }
}
}
@@ -3303,6 +3543,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
+ case ISD::ABS:
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -3420,6 +3662,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
+ case ISD::ABS:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid ABS!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
case ISD::BSWAP:
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid BSWAP!");
@@ -3569,6 +3817,30 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
GA->getOffset() + uint64_t(Offset));
}
+bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
+ switch (Opcode) {
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: {
+ // If a divisor is zero/undef or any element of a divisor vector is
+ // zero/undef, the whole op is undef.
+ assert(Ops.size() == 2 && "Div/rem should have 2 operands");
+ SDValue Divisor = Ops[1];
+ if (Divisor.isUndef() || isNullConstant(Divisor))
+ return true;
+
+ return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
+ any_of(Divisor->op_values(),
+ [](SDValue V) { return V.isUndef() || isNullConstant(V); });
+ // TODO: Handle signed overflow.
+ }
+ // TODO: Handle oversized shifts.
+ default:
+ return false;
+ }
+}
+
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
EVT VT, SDNode *Cst1,
SDNode *Cst2) {
@@ -3578,6 +3850,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
+ if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
+ return getUNDEF(VT);
+
// Handle the case of two scalars.
if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
@@ -3645,6 +3920,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
+ if (isUndef(Opcode, Ops))
+ return getUNDEF(VT);
+
// We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
if (!VT.isVector())
return SDValue();
@@ -3676,7 +3954,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// Find legal integer scalar type for constant promotion and
// ensure that its scalar size is at least as large as source.
EVT LegalSVT = VT.getScalarType();
- if (LegalSVT.isInteger()) {
+ if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
if (LegalSVT.bitsLT(VT.getScalarType()))
return SDValue();
@@ -3910,35 +4188,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(EVT.bitsLE(VT) && "Not extending!");
if (EVT == VT) return N1; // Not actually extending
- auto SignExtendInReg = [&](APInt Val) {
+ auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) {
unsigned FromBits = EVT.getScalarSizeInBits();
Val <<= Val.getBitWidth() - FromBits;
Val = Val.ashr(Val.getBitWidth() - FromBits);
- return getConstant(Val, DL, VT.getScalarType());
+ return getConstant(Val, DL, ConstantVT);
};
if (N1C) {
const APInt &Val = N1C->getAPIntValue();
- return SignExtendInReg(Val);
+ return SignExtendInReg(Val, VT);
}
if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
SmallVector<SDValue, 8> Ops;
+ llvm::EVT OpVT = N1.getOperand(0).getValueType();
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N1.getOperand(i);
if (Op.isUndef()) {
- Ops.push_back(getUNDEF(VT.getScalarType()));
+ Ops.push_back(getUNDEF(OpVT));
continue;
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- APInt Val = C->getAPIntValue();
- Val = Val.zextOrTrunc(VT.getScalarSizeInBits());
- Ops.push_back(SignExtendInReg(Val));
- continue;
- }
- break;
+ ConstantSDNode *C = cast<ConstantSDNode>(Op);
+ APInt Val = C->getAPIntValue();
+ Ops.push_back(SignExtendInReg(Val, OpVT));
}
- if (Ops.size() == VT.getVectorNumElements())
- return getBuildVector(VT, DL, Ops);
+ return getBuildVector(VT, DL, Ops);
}
break;
}
@@ -4040,6 +4314,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT.getSimpleVT() == N1.getSimpleValueType())
return N1;
+ // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
+
+ // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
+ // the concat have the same type as the extract.
+ if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0 &&
+ VT == N1.getOperand(0).getValueType()) {
+ unsigned Factor = VT.getVectorNumElements();
+ return N1.getOperand(N2C->getZExtValue() / Factor);
+ }
+
// EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
// during shuffle legalization.
if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
@@ -4943,11 +5230,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -5004,11 +5291,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -5066,11 +5353,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -7049,6 +7336,21 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const {
return Seen;
}
+/// Return true if the only users of N are contained in Nodes.
+bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (llvm::any_of(Nodes,
+ [&User](const SDNode *Node) { return User == Node; }))
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
/// isOperand - Return true if this node is an operand of N.
///
bool SDValue::isOperandOf(const SDNode *N) const {
@@ -7070,21 +7372,39 @@ bool SDNode::isOperandOf(const SDNode *N) const {
/// side-effecting instructions on any chain path. In practice, this looks
/// through token factors and non-volatile loads. In order to remain efficient,
/// this only looks a couple of nodes in, it does not do an exhaustive search.
+///
+/// Note that we only need to examine chains when we're searching for
+/// side-effects; SelectionDAG requires that all side-effects are represented
+/// by chains, even if another operand would force a specific ordering. This
+/// constraint is necessary to allow transformations like splitting loads.
bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
- unsigned Depth) const {
+ unsigned Depth) const {
if (*this == Dest) return true;
// Don't search too deeply, we just want to be able to see through
// TokenFactor's etc.
if (Depth == 0) return false;
- // If this is a token factor, all inputs to the TF happen in parallel. If any
- // of the operands of the TF does not reach dest, then we cannot do the xform.
+ // If this is a token factor, all inputs to the TF happen in parallel.
if (getOpcode() == ISD::TokenFactor) {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
- return false;
- return true;
+ // First, try a shallow search.
+ if (is_contained((*this)->ops(), Dest)) {
+ // We found the chain we want as an operand of this TokenFactor.
+ // Essentially, we reach the chain without side-effects if we could
+ // serialize the TokenFactor into a simple chain of operations with
+ // Dest as the last operation. This is automatically true if the
+ // chain has one use: there are no other ordering constraints.
+ // If the chain has more than one use, we give up: some other
+ // use of Dest might force a side-effect between Dest and the current
+ // node.
+ if (Dest.hasOneUse())
+ return true;
+ }
+ // Next, try a deep search: check whether every operand of the TokenFactor
+ // reaches Dest.
+ return all_of((*this)->ops(), [=](SDValue Op) {
+ return Op.reachesChainWithoutSideEffects(Dest, Depth - 1);
+ });
}
// Loads don't have side effects, look through them.
@@ -7102,11 +7422,6 @@ bool SDNode::hasPredecessor(const SDNode *N) const {
return hasPredecessorHelper(N, Visited, Worklist);
}
-uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
- assert(Num < NumOperands && "Invalid child # of SDNode!");
- return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
-}
-
const SDNodeFlags *SDNode::getFlags() const {
if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
return &FlagsNode->Flags;
@@ -7377,13 +7692,13 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
unsigned BitPos = j * EltBitSize;
if (OpVal.isUndef())
- SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+ SplatUndef.setBits(BitPos, BitPos + EltBitSize);
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
- SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
- zextOrTrunc(sz) << BitPos;
+ SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize),
+ BitPos);
else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
- SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
- else
+ SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos);
+ else
return false;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 996c95bd5f07b..8708f58f1e632 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -84,10 +84,6 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
-static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
- cl::desc("Enable fast-math-flags for DAG nodes"));
-
/// Minimum jump table density for normal functions.
static cl::opt<unsigned>
JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
@@ -634,10 +630,6 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
}
}
-/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-/// this value and returns the result as a ValueVT value. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain,
@@ -739,10 +731,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
}
-/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-/// specified value into the registers specified by this object. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V,
@@ -796,9 +784,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
-/// AddInlineAsmOperands - Add this value to the specified inlineasm node
-/// operand list. This adds the code marker and includes the number of
-/// values added into it.
void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG,
@@ -850,12 +835,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
LPadToCallSiteMap.clear();
}
-/// clear - Clear out the current SelectionDAG and the associated
-/// state and prepare this SelectionDAGBuilder object to be used
-/// for a new block. This doesn't clear out information about
-/// additional blocks that are needed to complete switch lowering
-/// or PHI node updating; that information is cleared out as it is
-/// consumed.
void SelectionDAGBuilder::clear() {
NodeMap.clear();
UnusedArgNodeMap.clear();
@@ -867,21 +846,10 @@ void SelectionDAGBuilder::clear() {
StatepointLowering.clear();
}
-/// clearDanglingDebugInfo - Clear the dangling debug information
-/// map. This function is separated from the clear so that debug
-/// information that is dangling in a basic block can be properly
-/// resolved in a different basic block. This allows the
-/// SelectionDAG to resolve dangling debug information attached
-/// to PHI nodes.
void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}
-/// getRoot - Return the current virtual root of the Selection DAG,
-/// flushing any PendingLoad items. This must be done before emitting
-/// a store or any other node that may need to be ordered after any
-/// prior load instructions.
-///
SDValue SelectionDAGBuilder::getRoot() {
if (PendingLoads.empty())
return DAG.getRoot();
@@ -901,10 +869,6 @@ SDValue SelectionDAGBuilder::getRoot() {
return Root;
}
-/// getControlRoot - Similar to getRoot, but instead of flushing all the
-/// PendingLoad items, flush all the PendingExports items. It is necessary
-/// to do this before emitting a terminator instruction.
-///
SDValue SelectionDAGBuilder::getControlRoot() {
SDValue Root = DAG.getRoot();
@@ -937,7 +901,9 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
- ++SDNodeOrder;
+ // Increase the SDNodeOrder if dealing with a non-debug instruction.
+ if (!isa<DbgInfoIntrinsic>(I))
+ ++SDNodeOrder;
CurInst = &I;
@@ -1403,16 +1369,16 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const Function *F = I.getParent()->getParent();
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
- bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::InReg);
+ bool RetInReg = F->getAttributes().hasAttribute(
+ AttributeList::ReturnIndex, Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
@@ -1582,7 +1548,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
BranchProbability TProb,
- BranchProbability FProb) {
+ BranchProbability FProb,
+ bool InvertCond) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
@@ -1596,10 +1563,14 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
- Condition = getICmpCondCode(IC->getPredicate());
+ ICmpInst::Predicate Pred =
+ InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ Condition = getICmpCondCode(Pred);
} else {
const FCmpInst *FC = cast<FCmpInst>(Cond);
- Condition = getFCmpCondCode(FC->getPredicate());
+ FCmpInst::Predicate Pred =
+ InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ Condition = getFCmpCondCode(Pred);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
}
@@ -1612,7 +1583,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
}
// Create a CaseBlock record representing this branch.
- CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
+ CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
nullptr, TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
}
@@ -1625,16 +1597,44 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc,
BranchProbability TProb,
- BranchProbability FProb) {
- // If this node is not part of the or/and tree, emit it as a branch.
+ BranchProbability FProb,
+ bool InvertCond) {
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
+ const Value *CondOp = BinaryOperator::getNotArgument(Cond);
+ if (InBlock(CondOp, CurBB->getBasicBlock())) {
+ FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+ }
+
const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ unsigned BOpc = 0;
+ if (BOp) {
+ BOpc = BOp->getOpcode();
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
- (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOpc != Opc || !BOp->hasOneUse() ||
BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
- TProb, FProb);
+ TProb, FProb, InvertCond);
return;
}
@@ -1669,14 +1669,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb);
+ NewTrueProb, NewFalseProb, InvertCond);
// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1]);
+ Probs[0], Probs[1], InvertCond);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
@@ -1702,14 +1702,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb);
+ NewTrueProb, NewFalseProb, InvertCond);
// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1]);
+ Probs[0], Probs[1], InvertCond);
}
}
@@ -1793,7 +1793,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
- getEdgeProbability(BrMBB, Succ1MBB));
+ getEdgeProbability(BrMBB, Succ1MBB),
+ /*InvertCond=*/false);
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
@@ -2027,7 +2028,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
Entry.Node = StackSlot;
Entry.Ty = FnTy->getParamType(0);
if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
- Entry.isInReg = true;
+ Entry.IsInReg = true;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2581,13 +2582,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
Flags.setNoSignedWrap(nsw);
Flags.setNoUnsignedWrap(nuw);
Flags.setVectorReduction(vec_redux);
- if (EnableFMFInDAG) {
- Flags.setAllowReciprocal(FMF.allowReciprocal());
- Flags.setNoInfs(FMF.noInfs());
- Flags.setNoNaNs(FMF.noNaNs());
- Flags.setNoSignedZeros(FMF.noSignedZeros());
- Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
- }
+ Flags.setAllowReciprocal(FMF.allowReciprocal());
+ Flags.setAllowContract(FMF.allowContract());
+ Flags.setNoInfs(FMF.noInfs());
+ Flags.setNoNaNs(FMF.noNaNs());
+ Flags.setNoSignedZeros(FMF.noSignedZeros());
+ Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
+
SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
Op1, Op2, &Flags);
setValue(&I, BinNodeValue);
@@ -2914,7 +2915,7 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
DestVT, N)); // convert types.
// Check if the original LLVM IR Operand was a ConstantInt, because getValue()
// might fold any kind of constant expression to an integer constant and that
- // is not what we are looking for. Only regcognize a bitcast of a genuine
+ // is not what we are looking for. Only recognize a bitcast of a genuine
// constant integer as an opaque constant.
else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
@@ -3067,14 +3068,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
if (SrcNumElts > MaskNumElts) {
// Analyze the access pattern of the vector to see if we can extract
- // two subvectors and do the shuffle. The analysis is done by calculating
- // the range of elements the mask access on both vectors.
- int MinRange[2] = { static_cast<int>(SrcNumElts),
- static_cast<int>(SrcNumElts)};
- int MaxRange[2] = {-1, -1};
-
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
+ // two subvectors and do the shuffle.
+ int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
+ bool CanExtract = true;
+ for (int Idx : Mask) {
unsigned Input = 0;
if (Idx < 0)
continue;
@@ -3083,41 +3080,28 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
Input = 1;
Idx -= SrcNumElts;
}
- if (Idx > MaxRange[Input])
- MaxRange[Input] = Idx;
- if (Idx < MinRange[Input])
- MinRange[Input] = Idx;
- }
-
- // Check if the access is smaller than the vector size and can we find
- // a reasonable extract index.
- int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
- // Extract.
- int StartIdx[2]; // StartIdx to extract from
- for (unsigned Input = 0; Input < 2; ++Input) {
- if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
- RangeUse[Input] = 0; // Unused
- StartIdx[Input] = 0;
- continue;
- }
- // Find a good start index that is a multiple of the mask length. Then
- // see if the rest of the elements are in range.
- StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
- if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts <= SrcNumElts)
- RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ // If all the indices come from the same MaskNumElts sized portion of
+ // the sources we can use extract. Also make sure the extract wouldn't
+ // extract past the end of the source.
+ int NewStartIdx = alignDown(Idx, MaskNumElts);
+ if (NewStartIdx + MaskNumElts > SrcNumElts ||
+ (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
+ CanExtract = false;
+ // Make sure we always update StartIdx as we use it to track if all
+ // elements are undef.
+ StartIdx[Input] = NewStartIdx;
}
- if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+ if (StartIdx[0] < 0 && StartIdx[1] < 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
- if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
+ if (CanExtract) {
// Extract appropriate subvector and generate a vector shuffle
for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
- if (RangeUse[Input] == 0)
+ if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
Src = DAG.getNode(
@@ -3128,16 +3112,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
}
// Calculate new mask.
- SmallVector<int, 8> MappedOps;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
- if (Idx >= 0) {
- if (Idx < (int)SrcNumElts)
- Idx -= StartIdx[0];
- else
- Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
- }
- MappedOps.push_back(Idx);
+ SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
+ for (int &Idx : MappedOps) {
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ else if (Idx >= 0)
+ Idx -= StartIdx[0];
}
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
@@ -3151,8 +3131,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
EVT EltVT = VT.getVectorElementType();
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
SmallVector<SDValue,8> Ops;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
+ for (int Idx : Mask) {
SDValue Res;
if (Idx < 0) {
@@ -3281,7 +3260,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// N = N + Offset
uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
- // In an inbouds GEP with an offset that is nonnegative even when
+ // In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
@@ -4752,7 +4731,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
else
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addOperand(*Op)
+ .add(*Op)
.addImm(Offset)
.addMetadata(Variable)
.addMetadata(Expr));
@@ -4764,7 +4743,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
DILocalVariable *Variable,
DIExpression *Expr, int64_t Offset,
- DebugLoc dl,
+ const DebugLoc &dl,
unsigned DbgSDNodeOrder) {
SDDbgValue *SDV;
auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode());
@@ -4794,9 +4773,9 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
# define setjmp_undefined_for_msvc
#endif
-/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
-/// we want to emit this as a call to a named external function, return the name
-/// otherwise lower it and return null.
+/// Lower the call to the specified intrinsic function. If we want to emit this
+/// as a call to a named external function, return the name. Otherwise, lower it
+/// and return null.
const char *
SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -4929,14 +4908,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
report_fatal_error("Unsupported element size");
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl)
- .setChain(getRoot())
- .setCallee(TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(
- TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ TLI.getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
DAG.setRoot(CallResult.second);
@@ -5301,6 +5278,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return nullptr;
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ visitConstrainedFPIntrinsic(I, Intrinsic);
+ return nullptr;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -5537,7 +5521,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::trap: {
StringRef TrapFuncName =
I.getAttributes()
- .getAttribute(AttributeSet::FunctionIndex, "trap-func-name")
+ .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
.getValueAsString();
if (TrapFuncName.empty()) {
ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
@@ -5548,7 +5532,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
TargetLowering::ArgListTy Args;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee(
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
CallingConv::C, I.getType(),
DAG.getExternalSymbol(TrapFuncName.data(),
TLI.getPointerTy(DAG.getDataLayout())),
@@ -5749,6 +5733,46 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
}
+void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ SDLoc sdl = getCurSDLoc();
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::experimental_constrained_fadd:
+ Opcode = ISD::STRICT_FADD;
+ break;
+ case Intrinsic::experimental_constrained_fsub:
+ Opcode = ISD::STRICT_FSUB;
+ break;
+ case Intrinsic::experimental_constrained_fmul:
+ Opcode = ISD::STRICT_FMUL;
+ break;
+ case Intrinsic::experimental_constrained_fdiv:
+ Opcode = ISD::STRICT_FDIV;
+ break;
+ case Intrinsic::experimental_constrained_frem:
+ Opcode = ISD::STRICT_FREM;
+ break;
+ }
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Chain = getRoot();
+ SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)) };
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+ ValueVTs.push_back(MVT::Other); // Out chain
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops);
+
+ assert(Result.getNode()->getNumValues() == 2);
+ SDValue OutChain = Result.getValue(1);
+ DAG.setRoot(OutChain);
+ SDValue FPResult = Result.getValue(0);
+ setValue(&I, FPResult);
+}
+
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
@@ -5827,7 +5851,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Type *RetTy = CS.getType();
TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
Args.reserve(CS.arg_size());
const Value *SwiftErrorVal = nullptr;
@@ -5843,6 +5866,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
+ TargetLowering::ArgListEntry Entry;
const Value *V = *i;
// Skip empty types
@@ -5852,11 +5876,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
- // Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Entry.setAttributes(&CS, i - CS.arg_begin());
// Use swifterror virtual register as input to the call.
- if (Entry.isSwiftError && TLI.supportSwiftError()) {
+ if (Entry.IsSwiftError && TLI.supportSwiftError()) {
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
@@ -5869,7 +5892,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// If we have an explicit sret argument that is an Instruction, (i.e., it
// might point to function-local memory), we can't meaningfully tail-call.
- if (Entry.isSRet && isa<Instruction>(V))
+ if (Entry.IsSRet && isa<Instruction>(V))
isTailCall = false;
}
@@ -5912,8 +5935,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
}
}
-/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero.
+/// Return true if it only matters that the value is equal or not-equal to zero.
static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
for (const User *U : V->users()) {
if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -5928,13 +5950,17 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
}
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
- Type *LoadTy,
SelectionDAGBuilder &Builder) {
// Check to see if this load can be trivially constant folded, e.g. if the
// input is from a string literal.
if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
// Cast pointer to the type we really want to load.
+ Type *LoadTy =
+ Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
+ if (LoadVT.isVector())
+ LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
+
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
@@ -5967,8 +5993,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
return LoadVal;
}
-/// processIntegerCallValue - Record the value for an instruction that
-/// produces an integer result, converting the type where necessary.
+/// Record the value for an instruction that produces an integer result,
+/// converting the type where necessary.
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
SDValue Value,
bool IsSigned) {
@@ -5981,20 +6007,13 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
setValue(&I, Value);
}
-/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
-/// If so, return true and lower it, otherwise return false and it will be
-/// lowered like a normal call.
+/// See if we can lower a memcmp call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
- // Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
- if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
- !I.getArgOperand(2)->getType()->isIntegerTy() ||
- !I.getType()->isIntegerTy())
- return false;
-
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
if (CSize && CSize->getZExtValue() == 0) {
@@ -6005,11 +6024,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
}
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
- std::pair<SDValue, SDValue> Res =
- TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
- getValue(LHS), getValue(RHS), getValue(Size),
- MachinePointerInfo(LHS),
- MachinePointerInfo(RHS));
+ std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
+ DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
+ getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
@@ -6018,88 +6035,79 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
- if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) {
- bool ActuallyDoIt = true;
- MVT LoadVT;
- Type *LoadTy;
- switch (CSize->getZExtValue()) {
- default:
- LoadVT = MVT::Other;
- LoadTy = nullptr;
- ActuallyDoIt = false;
- break;
- case 2:
- LoadVT = MVT::i16;
- LoadTy = Type::getInt16Ty(CSize->getContext());
- break;
- case 4:
- LoadVT = MVT::i32;
- LoadTy = Type::getInt32Ty(CSize->getContext());
- break;
- case 8:
- LoadVT = MVT::i64;
- LoadTy = Type::getInt64Ty(CSize->getContext());
- break;
- /*
- case 16:
- LoadVT = MVT::v4i32;
- LoadTy = Type::getInt32Ty(CSize->getContext());
- LoadTy = VectorType::get(LoadTy, 4);
- break;
- */
- }
-
- // This turns into unaligned loads. We only do this if the target natively
- // supports the MVT we'll be loading or if it is small enough (<= 4) that
- // we'll only produce a small number of byte loads.
+ if (!CSize || !IsOnlyUsedInZeroEqualityComparison(&I))
+ return false;
- // Require that we can find a legal MVT, and only do this if the target
- // supports unaligned loads of that type. Expanding into byte loads would
- // bloat the code.
+ // If the target has a fast compare for the given size, it will return a
+ // preferred load type for that size. Require that the load VT is legal and
+ // that the target supports unaligned loads of that type. Otherwise, return
+ // INVALID.
+ auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (ActuallyDoIt && CSize->getZExtValue() > 4) {
- unsigned DstAS = LHS->getType()->getPointerAddressSpace();
- unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ MVT LVT = TLI.hasFastEqualityCompare(NumBits);
+ if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
// TODO: Check alignment of src and dest ptrs.
- if (!TLI.isTypeLegal(LoadVT) ||
- !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) ||
- !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS))
- ActuallyDoIt = false;
+ unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+ unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ if (!TLI.isTypeLegal(LVT) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
+ LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
}
- if (ActuallyDoIt) {
- SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
- SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+ return LVT;
+ };
- SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
- ISD::SETNE);
- processIntegerCallValue(I, Res, false);
- return true;
- }
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+ MVT LoadVT;
+ unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
+ switch (NumBitsToCompare) {
+ default:
+ return false;
+ case 16:
+ LoadVT = MVT::i16;
+ break;
+ case 32:
+ LoadVT = MVT::i32;
+ break;
+ case 64:
+ case 128:
+ case 256:
+ LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
+ break;
}
+ if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return false;
- return false;
+ SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
+ SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
+
+ // Bitcast to a wide integer type if the loads are vectors.
+ if (LoadVT.isVector()) {
+ EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
+ LoadL = DAG.getBitcast(CmpVT, LoadL);
+ LoadR = DAG.getBitcast(CmpVT, LoadR);
+ }
+
+ SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
+ processIntegerCallValue(I, Cmp, false);
+ return true;
}
-/// visitMemChrCall -- See if we can lower a memchr call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a memchr call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
- // Verify that the prototype makes sense. void *memchr(void *, int, size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
const Value *Src = I.getArgOperand(0);
const Value *Char = I.getArgOperand(1);
const Value *Length = I.getArgOperand(2);
- if (!Src->getType()->isPointerTy() ||
- !Char->getType()->isIntegerTy() ||
- !Length->getType()->isIntegerTy() ||
- !I.getType()->isPointerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6115,15 +6123,12 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
return false;
}
-///
-/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to
-/// to adjust the dst pointer by the size of the copied memory.
+/// See if we can lower a mempcpy call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
-
- // Verify argument count: void *mempcpy(void *, const void *, size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
@@ -6158,19 +6163,13 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
return true;
}
-/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
-/// optimized form. If so, return true and lower it, otherwise return false
-/// and it will be lowered like a normal call.
+/// See if we can lower a strcpy call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
- // Verify that the prototype makes sense. char *strcpy(char *, char *)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isPointerTy() ||
- !I.getType()->isPointerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6187,19 +6186,13 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
return false;
}
-/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form.
-/// If so, return true and lower it, otherwise return false and it will be
-/// lowered like a normal call.
+/// See if we can lower a strcmp call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
- // Verify that the prototype makes sense. int strcmp(void*,void*)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isPointerTy() ||
- !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6216,17 +6209,13 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
return false;
}
-/// visitStrLenCall -- See if we can lower a strlen call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a strlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
- // Verify that the prototype makes sense. size_t strlen(char *)
- if (I.getNumArgOperands() != 1)
- return false;
-
const Value *Arg0 = I.getArgOperand(0);
- if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6241,19 +6230,13 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
return false;
}
-/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a strnlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
- // Verify that the prototype makes sense. size_t strnlen(char *, size_t)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isIntegerTy() ||
- !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6269,16 +6252,15 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
return false;
}
-/// visitUnaryFloatCall - If a call instruction is a unary floating-point
-/// operation (as expected), translate it to an SDNode with the specified opcode
-/// and return true.
+/// See if we can lower a unary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it, otherwise return
+/// false and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
unsigned Opcode) {
- // Sanity check that it really is a unary floating-point call.
- if (I.getNumArgOperands() != 1 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- !I.onlyReadsMemory())
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
return false;
SDValue Tmp = getValue(I.getArgOperand(0));
@@ -6286,17 +6268,15 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
return true;
}
-/// visitBinaryFloatCall - If a call instruction is a binary floating-point
-/// operation (as expected), translate it to an SDNode with the specified opcode
-/// and return true.
+/// See if we can lower a binary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it. Otherwise return
+/// false, and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
unsigned Opcode) {
- // Sanity check that it really is a binary floating-point call.
- if (I.getNumArgOperands() != 2 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- I.getType() != I.getArgOperand(1)->getType() ||
- !I.onlyReadsMemory())
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
return false;
SDValue Tmp0 = getValue(I.getArgOperand(0));
@@ -6336,20 +6316,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call. Don't do the check if marked as nobuiltin for
// some reason.
- LibFunc::Func Func;
+ LibFunc Func;
if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
- LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->getLibFunc(*F, Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
- case LibFunc::copysign:
- case LibFunc::copysignf:
- case LibFunc::copysignl:
- if (I.getNumArgOperands() == 2 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.getType() == I.getArgOperand(1)->getType() &&
- I.onlyReadsMemory()) {
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ // We already checked this call's prototype; verify it doesn't modify
+ // errno.
+ if (I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
@@ -6357,122 +6335,122 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
break;
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
if (visitUnaryFloatCall(I, ISD::FABS))
return;
break;
- case LibFunc::fmin:
- case LibFunc::fminf:
- case LibFunc::fminl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
if (visitBinaryFloatCall(I, ISD::FMINNUM))
return;
break;
- case LibFunc::fmax:
- case LibFunc::fmaxf:
- case LibFunc::fmaxl:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
if (visitBinaryFloatCall(I, ISD::FMAXNUM))
return;
break;
- case LibFunc::sin:
- case LibFunc::sinf:
- case LibFunc::sinl:
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
if (visitUnaryFloatCall(I, ISD::FSIN))
return;
break;
- case LibFunc::cos:
- case LibFunc::cosf:
- case LibFunc::cosl:
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
if (visitUnaryFloatCall(I, ISD::FCOS))
return;
break;
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
- case LibFunc::sqrt_finite:
- case LibFunc::sqrtf_finite:
- case LibFunc::sqrtl_finite:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_sqrt_finite:
+ case LibFunc_sqrtf_finite:
+ case LibFunc_sqrtl_finite:
if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
break;
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
break;
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
break;
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
break;
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
if (visitUnaryFloatCall(I, ISD::FRINT))
return;
break;
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
if (visitUnaryFloatCall(I, ISD::FROUND))
return;
break;
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
break;
- case LibFunc::log2:
- case LibFunc::log2f:
- case LibFunc::log2l:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
break;
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
- case LibFunc::memcmp:
+ case LibFunc_memcmp:
if (visitMemCmpCall(I))
return;
break;
- case LibFunc::mempcpy:
+ case LibFunc_mempcpy:
if (visitMemPCpyCall(I))
return;
break;
- case LibFunc::memchr:
+ case LibFunc_memchr:
if (visitMemChrCall(I))
return;
break;
- case LibFunc::strcpy:
+ case LibFunc_strcpy:
if (visitStrCpyCall(I, false))
return;
break;
- case LibFunc::stpcpy:
+ case LibFunc_stpcpy:
if (visitStrCpyCall(I, true))
return;
break;
- case LibFunc::strcmp:
+ case LibFunc_strcmp:
if (visitStrCmpCall(I))
return;
break;
- case LibFunc::strlen:
+ case LibFunc_strlen:
if (visitStrLenCall(I))
return;
break;
- case LibFunc::strnlen:
+ case LibFunc_strnlen:
if (visitStrNLenCall(I))
return;
break;
@@ -7361,7 +7339,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
// Populate the argument list.
// Attributes for args start at offset 1, after the return attribute.
- for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
ArgI != ArgE; ++ArgI) {
const Value *V = CS->getOperand(ArgI);
@@ -7370,7 +7348,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, AttrI);
+ Entry.setAttributes(&CS, ArgIdx);
Args.push_back(Entry);
}
@@ -7631,9 +7609,9 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
FuncInfo.MF->getFrameInfo().setHasPatchPoint();
}
-/// Returns an AttributeSet representing the attributes applied to the return
+/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
-static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
SmallVector<Attribute::AttrKind, 2> Attrs;
if (CLI.RetSExt)
Attrs.push_back(Attribute::SExt);
@@ -7642,8 +7620,8 @@ static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
if (CLI.IsInReg)
Attrs.push_back(Attribute::InReg);
- return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
- Attrs);
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
@@ -7683,15 +7661,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ArgListEntry Entry;
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isInReg = false;
- Entry.isSRet = true;
- Entry.isNest = false;
- Entry.isByVal = false;
- Entry.isReturned = false;
- Entry.isSwiftSelf = false;
- Entry.isSwiftError = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Entry.IsInReg = false;
+ Entry.IsSRet = true;
+ Entry.IsNest = false;
+ Entry.IsByVal = false;
+ Entry.IsReturned = false;
+ Entry.IsSwiftSelf = false;
+ Entry.IsSwiftError = false;
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
@@ -7724,7 +7702,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ArgListTy &Args = CLI.getArgs();
if (supportSwiftError()) {
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- if (Args[i].isSwiftError) {
+ if (Args[i].IsSwiftError) {
ISD::InputArg MyFlags;
MyFlags.VT = getPointerTy(DL);
MyFlags.ArgVT = EVT(getPointerTy(DL));
@@ -7741,7 +7719,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
Type *FinalType = Args[i].Ty;
- if (Args[i].isByVal)
+ if (Args[i].IsByVal)
FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
FinalType, CLI.CallConv, CLI.IsVarArg);
@@ -7754,11 +7732,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ISD::ArgFlagsTy Flags;
unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- if (Args[i].isZExt)
+ if (Args[i].IsZExt)
Flags.setZExt();
- if (Args[i].isSExt)
+ if (Args[i].IsSExt)
Flags.setSExt();
- if (Args[i].isInReg) {
+ if (Args[i].IsInReg) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (CLI.CallConv == CallingConv::X86_VectorCall &&
@@ -7771,15 +7749,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Set InReg Flag
Flags.setInReg();
}
- if (Args[i].isSRet)
+ if (Args[i].IsSRet)
Flags.setSRet();
- if (Args[i].isSwiftSelf)
+ if (Args[i].IsSwiftSelf)
Flags.setSwiftSelf();
- if (Args[i].isSwiftError)
+ if (Args[i].IsSwiftError)
Flags.setSwiftError();
- if (Args[i].isByVal)
+ if (Args[i].IsByVal)
Flags.setByVal();
- if (Args[i].isInAlloca) {
+ if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
@@ -7788,7 +7766,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// in the various CC lowering callbacks.
Flags.setByVal();
}
- if (Args[i].isByVal || Args[i].isInAlloca) {
+ if (Args[i].IsByVal || Args[i].IsInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
@@ -7801,7 +7779,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
FrameAlign = getByValTypeAlignment(ElementTy, DL);
Flags.setByValAlign(FrameAlign);
}
- if (Args[i].isNest)
+ if (Args[i].IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
@@ -7812,13 +7790,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (Args[i].isSExt)
+ if (Args[i].IsSExt)
ExtendKind = ISD::SIGN_EXTEND;
- else if (Args[i].isZExt)
+ else if (Args[i].IsZExt)
ExtendKind = ISD::ZERO_EXTEND;
// Conservatively only handle 'returned' on non-vectors for now
- if (Args[i].isReturned && !Op.getValueType().isVector()) {
+ if (Args[i].IsReturned && !Op.getValueType().isVector()) {
assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
"unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
@@ -7832,9 +7810,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// parameter extension method is not compatible with the return
// extension method
if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
- (ExtendKind != ISD::ANY_EXTEND &&
- CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt))
- Flags.setReturned();
+ (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
+ CLI.RetZExt == Args[i].IsZExt))
+ Flags.setReturned();
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
@@ -8010,6 +7988,173 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
return true;
}
+typedef DenseMap<const Argument *,
+ std::pair<const AllocaInst *, const StoreInst *>>
+ ArgCopyElisionMapTy;
+
+/// Scan the entry block of the function in FuncInfo for arguments that look
+/// like copies into a local alloca. Record any copied arguments in
+/// ArgCopyElisionCandidates.
+static void
+findArgumentCopyElisionCandidates(const DataLayout &DL,
+ FunctionLoweringInfo *FuncInfo,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
+ // Record the state of every static alloca used in the entry block. Argument
+ // allocas are all used in the entry block, so we need approximately as many
+ // entries as we have arguments.
+ enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
+ SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
+ unsigned NumArgs = FuncInfo->Fn->arg_size();
+ StaticAllocas.reserve(NumArgs * 2);
+
+ auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
+ if (!V)
+ return nullptr;
+ V = V->stripPointerCasts();
+ const auto *AI = dyn_cast<AllocaInst>(V);
+ if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
+ return nullptr;
+ auto Iter = StaticAllocas.insert({AI, Unknown});
+ return &Iter.first->second;
+ };
+
+ // Look for stores of arguments to static allocas. Look through bitcasts and
+ // GEPs to handle type coercions, as long as the alloca is fully initialized
+ // by the store. Any non-store use of an alloca escapes it and any subsequent
+ // unanalyzed store might write it.
+ // FIXME: Handle structs initialized with multiple stores.
+ for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
+ // Look for stores, and handle non-store uses conservatively.
+ const auto *SI = dyn_cast<StoreInst>(&I);
+ if (!SI) {
+ // We will look through cast uses, so ignore them completely.
+ if (I.isCast())
+ continue;
+ // Ignore debug info intrinsics, they don't escape or store to allocas.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ // This is an unknown instruction. Assume it escapes or writes to all
+ // static alloca operands.
+ for (const Use &U : I.operands()) {
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
+ *Info = StaticAllocaInfo::Clobbered;
+ }
+ continue;
+ }
+
+ // If the stored value is a static alloca, mark it as escaped.
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
+ *Info = StaticAllocaInfo::Clobbered;
+
+ // Check if the destination is a static alloca.
+ const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
+ StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
+ if (!Info)
+ continue;
+ const AllocaInst *AI = cast<AllocaInst>(Dst);
+
+ // Skip allocas that have been initialized or clobbered.
+ if (*Info != StaticAllocaInfo::Unknown)
+ continue;
+
+ // Check if the stored value is an argument, and that this store fully
+ // initializes the alloca. Don't elide copies from the same argument twice.
+ const Value *Val = SI->getValueOperand()->stripPointerCasts();
+ const auto *Arg = dyn_cast<Argument>(Val);
+ if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+ Arg->getType()->isEmptyTy() ||
+ DL.getTypeStoreSize(Arg->getType()) !=
+ DL.getTypeAllocSize(AI->getAllocatedType()) ||
+ ArgCopyElisionCandidates.count(Arg)) {
+ *Info = StaticAllocaInfo::Clobbered;
+ continue;
+ }
+
+ DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n');
+
+ // Mark this alloca and store for argument copy elision.
+ *Info = StaticAllocaInfo::Elidable;
+ ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
+
+ // Stop scanning if we've seen all arguments. This will happen early in -O0
+ // builds, which is useful, because -O0 builds have large entry blocks and
+ // many allocas.
+ if (ArgCopyElisionCandidates.size() == NumArgs)
+ break;
+ }
+}
+
+/// Try to elide argument copies from memory into a local alloca. Succeeds if
+/// ArgVal is a load from a suitable fixed stack object.
+static void tryToElideArgumentCopy(
+ FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
+ DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
+ SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
+ SDValue ArgVal, bool &ArgHasUses) {
+ // Check if this is a load from a fixed stack object.
+ auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
+ if (!LNode)
+ return;
+ auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
+ if (!FINode)
+ return;
+
+ // Check that the fixed stack object is the right size and alignment.
+ // Look at the alignment that the user wrote on the alloca instead of looking
+ // at the stack object.
+ auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
+ assert(ArgCopyIter != ArgCopyElisionCandidates.end());
+ const AllocaInst *AI = ArgCopyIter->second.first;
+ int FixedIndex = FINode->getIndex();
+ int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
+ int OldIndex = AllocaIndex;
+ MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
+ if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
+ DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack "
+ "object size\n");
+ return;
+ }
+ unsigned RequiredAlignment = AI->getAlignment();
+ if (!RequiredAlignment) {
+ RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
+ AI->getAllocatedType());
+ }
+ if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
+ DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
+ "greater than stack argument alignment ("
+ << RequiredAlignment << " vs "
+ << MFI.getObjectAlignment(FixedIndex) << ")\n");
+ return;
+ }
+
+ // Perform the elision. Delete the old stack object and replace its only use
+ // in the variable info map. Mark the stack object as mutable.
+ DEBUG({
+ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
+ << " Replacing frame index " << OldIndex << " with " << FixedIndex
+ << '\n';
+ });
+ MFI.RemoveStackObject(OldIndex);
+ MFI.setIsImmutableObjectIndex(FixedIndex, false);
+ AllocaIndex = FixedIndex;
+ ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
+ Chains.push_back(ArgVal.getValue(1));
+
+ // Avoid emitting code for the store implementing the copy.
+ const StoreInst *SI = ArgCopyIter->second.second;
+ ElidedArgCopyInstrs.insert(SI);
+
+ // Check for uses of the argument again so that we can avoid exporting ArgVal
+ // if it is't used by anything other than the store.
+ for (const Value *U : Arg.users()) {
+ if (U != SI) {
+ ArgHasUses = true;
+ break;
+ }
+ }
+}
+
void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
@@ -8032,15 +8177,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Ins.push_back(RetArg);
}
+ // Look for stores of arguments to static allocas. Mark such arguments with a
+ // flag to ask the target to give us the memory location of that argument if
+ // available.
+ ArgCopyElisionMapTy ArgCopyElisionCandidates;
+ findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
+
// Set up the incoming argument description vector.
- unsigned Idx = 1;
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
- I != E; ++I, ++Idx) {
+ unsigned Idx = 0;
+ for (const Argument &Arg : F.args()) {
+ ++Idx;
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
- bool isArgValueUsed = !I->use_empty();
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
+ bool isArgValueUsed = !Arg.use_empty();
unsigned PartBase = 0;
- Type *FinalType = I->getType();
+ Type *FinalType = Arg.getType();
if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
FinalType = cast<PointerType>(FinalType)->getElementType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
@@ -8060,7 +8211,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (F.getCallingConv() == CallingConv::X86_VectorCall &&
- isa<StructType>(I->getType())) {
+ isa<StructType>(Arg.getType())) {
// The first value of a structure is marked
if (0 == Value)
Flags.setHvaStart();
@@ -8092,7 +8243,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setByVal();
}
if (Flags.isByVal() || Flags.isInAlloca()) {
- PointerType *Ty = cast<PointerType>(I->getType());
+ PointerType *Ty = cast<PointerType>(Arg.getType());
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
@@ -8109,6 +8260,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
+ if (ArgCopyElisionCandidates.count(&Arg))
+ Flags.setCopyElisionCandidate();
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
@@ -8155,7 +8308,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Set up the argument values.
unsigned i = 0;
- Idx = 1;
+ Idx = 0;
if (!FuncInfo->CanLowerReturn) {
// Create a virtual register for the sret pointer, and put in a copy
// from the sret argument into it.
@@ -8181,25 +8334,39 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
++i;
}
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
- ++I, ++Idx) {
+ SmallVector<SDValue, 4> Chains;
+ DenseMap<int, int> ArgCopyElisionFrameIndexMap;
+ for (const Argument &Arg : F.args()) {
+ ++Idx;
SmallVector<SDValue, 4> ArgValues;
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ continue;
+
+ bool ArgHasUses = !Arg.use_empty();
+
+ // Elide the copying store if the target loaded this argument from a
+ // suitable fixed stack object.
+ if (Ins[i].Flags.isCopyElisionCandidate()) {
+ tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+ ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
+ InVals[i], ArgHasUses);
+ }
// If this argument is unused then remember its value. It is used to generate
// debugging information.
bool isSwiftErrorArg =
TLI->supportSwiftError() &&
F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
- if (I->use_empty() && NumValues && !isSwiftErrorArg) {
- SDB->setUnusedArgValue(&*I, InVals[i]);
+ if (!ArgHasUses && !isSwiftErrorArg) {
+ SDB->setUnusedArgValue(&Arg, InVals[i]);
// Also remember any frame index for use in FastISel.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
for (unsigned Val = 0; Val != NumValues; ++Val) {
@@ -8210,16 +8377,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Even an apparant 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
// function.
- if (!I->use_empty() || isSwiftErrorArg) {
+ if (ArgHasUses || isSwiftErrorArg) {
Optional<ISD::NodeType> AssertOp;
if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
AssertOp = ISD::AssertSext;
else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
AssertOp = ISD::AssertZext;
- ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
- NumParts, PartVT, VT,
- nullptr, AssertOp));
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+ PartVT, VT, nullptr, AssertOp));
}
i += NumParts;
@@ -8232,18 +8398,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Note down frame index.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
- SDB->setValue(&*I, Res);
+ SDB->setValue(&Arg, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
// Update the SwiftErrorVRegDefMap.
@@ -8263,18 +8429,36 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// uses with vregs.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- FuncInfo->ValueMap[&*I] = Reg;
+ FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
}
- if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) {
- FuncInfo->InitializeRegForValue(&*I);
- SDB->CopyToExportRegsIfNeeded(&*I);
+ if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&Arg);
+ SDB->CopyToExportRegsIfNeeded(&Arg);
}
}
+ if (!Chains.empty()) {
+ Chains.push_back(NewRoot);
+ NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ }
+
+ DAG.setRoot(NewRoot);
+
assert(i == InVals.size() && "Argument register count mismatch!");
+ // If any argument copy elisions occurred and we have debug info, update the
+ // stale frame indices used in the dbg.declare variable info table.
+ MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
+ if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
+ for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
+ auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
+ if (I != ArgCopyElisionFrameIndexMap.end())
+ VI.Slot = I->second;
+ }
+ }
+
// Finally, if the target has anything special to do, allow it to do so.
EmitFunctionEntryCode();
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index abde8a89befce..c6acc09b66028 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -616,33 +616,27 @@ public:
void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
const TargetLibraryInfo *li);
- /// clear - Clear out the current SelectionDAG and the associated
- /// state and prepare this SelectionDAGBuilder object to be used
- /// for a new block. This doesn't clear out information about
- /// additional blocks that are needed to complete switch lowering
- /// or PHI node updating; that information is cleared out as it is
- /// consumed.
+ /// Clear out the current SelectionDAG and the associated state and prepare
+ /// this SelectionDAGBuilder object to be used for a new block. This doesn't
+ /// clear out information about additional blocks that are needed to complete
+ /// switch lowering or PHI node updating; that information is cleared out as
+ /// it is consumed.
void clear();
- /// clearDanglingDebugInfo - Clear the dangling debug information
- /// map. This function is separated from the clear so that debug
- /// information that is dangling in a basic block can be properly
- /// resolved in a different basic block. This allows the
- /// SelectionDAG to resolve dangling debug information attached
- /// to PHI nodes.
+ /// Clear the dangling debug information map. This function is separated from
+ /// the clear so that debug information that is dangling in a basic block can
+ /// be properly resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached to PHI nodes.
void clearDanglingDebugInfo();
- /// getRoot - Return the current virtual root of the Selection DAG,
- /// flushing any PendingLoad items. This must be done before emitting
- /// a store or any other node that may need to be ordered after any
- /// prior load instructions.
- ///
+ /// Return the current virtual root of the Selection DAG, flushing any
+ /// PendingLoad items. This must be done before emitting a store or any other
+ /// node that may need to be ordered after any prior load instructions.
SDValue getRoot();
- /// getControlRoot - Similar to getRoot, but instead of flushing all the
- /// PendingLoad items, flush all the PendingExports items. It is necessary
- /// to do this before emitting a terminator instruction.
- ///
+ /// Similar to getRoot, but instead of flushing all the PendingLoad items,
+ /// flush all the PendingExports items. It is necessary to do this before
+ /// emitting a terminator instruction.
SDValue getControlRoot();
SDLoc getCurSDLoc() const {
@@ -688,12 +682,13 @@ public:
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc, BranchProbability TW,
- BranchProbability FW);
+ BranchProbability FW, bool InvertCond);
void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- BranchProbability TW, BranchProbability FW);
+ BranchProbability TW, BranchProbability FW,
+ bool InvertCond);
bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
@@ -900,6 +895,7 @@ private:
void visitInlineAsm(ImmutableCallSite CS);
const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+ void visitConstrainedFPIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitVAStart(const CallInst &I);
void visitVAArg(const VAArgInst &I);
@@ -944,8 +940,8 @@ private:
/// Return the appropriate SDDbgValue based on N.
SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
- DIExpression *Expr, int64_t Offset, DebugLoc dl,
- unsigned DbgSDNodeOrder);
+ DIExpression *Expr, int64_t Offset,
+ const DebugLoc &dl, unsigned DbgSDNodeOrder);
};
/// RegsForValue - This struct represents the registers (physical or virtual)
@@ -958,26 +954,23 @@ private:
/// type.
///
struct RegsForValue {
- /// ValueVTs - The value types of the values, which may not be legal, and
+ /// The value types of the values, which may not be legal, and
/// may need be promoted or synthesized from one or more registers.
- ///
SmallVector<EVT, 4> ValueVTs;
- /// RegVTs - The value types of the registers. This is the same size as
- /// ValueVTs and it records, for each value, what the type of the assigned
- /// register or registers are. (Individual values are never synthesized
- /// from more than one type of register.)
+ /// The value types of the registers. This is the same size as ValueVTs and it
+ /// records, for each value, what the type of the assigned register or
+ /// registers are. (Individual values are never synthesized from more than one
+ /// type of register.)
///
/// With virtual registers, the contents of RegVTs is redundant with TLI's
/// getRegisterType member function, however when with physical registers
/// it is necessary to have a separate record of the types.
- ///
SmallVector<MVT, 4> RegVTs;
- /// Regs - This list holds the registers assigned to the values.
+ /// This list holds the registers assigned to the values.
/// Each legal or promoted value requires one register, and each
/// expanded value requires multiple registers.
- ///
SmallVector<unsigned, 4> Regs;
RegsForValue();
@@ -987,33 +980,33 @@ struct RegsForValue {
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty);
- /// append - Add the specified values to this one.
+ /// Add the specified values to this one.
void append(const RegsForValue &RHS) {
ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
Regs.append(RHS.Regs.begin(), RHS.Regs.end());
}
- /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
- /// this value and returns the result as a ValueVTs value. This uses
- /// Chain/Flag as the input and updates them for the output Chain/Flag.
- /// If the Flag pointer is NULL, no flag is used.
+ /// Emit a series of CopyFromReg nodes that copies from this value and returns
+ /// the result as a ValueVTs value. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is NULL, no
+ /// flag is used.
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V = nullptr) const;
- /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified
- /// value into the registers specified by this object. This uses Chain/Flag
- /// as the input and updates them for the output Chain/Flag. If the Flag
- /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used
- /// in printing better diagnostic messages on error.
+ /// Emit a series of CopyToReg nodes that copies the specified value into the
+ /// registers specified by this object. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is nullptr, no
+ /// flag is used. If V is not nullptr, then it is used in printing better
+ /// diagnostic messages on error.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl,
SDValue &Chain, SDValue *Flag, const Value *V = nullptr,
ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
- /// AddInlineAsmOperands - Add this value to the specified inlineasm node
- /// operand list. This adds the code marker, matching input operand index
- /// (if applicable), and includes the number of values added into it.
+ /// Add this value to the specified inlineasm node operand list. This adds the
+ /// code marker, matching input operand index (if applicable), and includes
+ /// the number of values added into it.
void AddInlineAsmOperands(unsigned Kind, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 0faaad8a21b76..488c60a28ffbc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -300,6 +300,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
// Bit manipulation
+ case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
@@ -366,11 +367,13 @@ static Printable PrintNodeId(const SDNode &Node) {
});
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
-void SDNode::dump(const SelectionDAG *G) const {
+LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
dbgs() << '\n';
}
+#endif
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
@@ -416,7 +419,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
else {
OS << "<APFloat(";
- CSDN->getValueAPF().bitcastToAPInt().dump();
+ CSDN->getValueAPF().bitcastToAPInt().print(OS, false);
OS << ")>";
}
} else if (const GlobalAddressSDNode *GADN =
@@ -566,6 +569,7 @@ static bool shouldPrintInline(const SDNode &Node) {
return Node.getNumOperands() == 0;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (const SDValue &Op : N->op_values()) {
if (shouldPrintInline(*Op.getNode()))
@@ -592,6 +596,7 @@ LLVM_DUMP_METHOD void SelectionDAG::dump() const {
if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
dbgs() << "\n\n";
}
+#endif
void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
OS << PrintNodeId(*this) << ": ";
@@ -618,6 +623,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
@@ -646,15 +652,16 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
DumpNodesr(OS, Op.getNode(), indent+2, G, once);
}
-void SDNode::dumpr() const {
+LLVM_DUMP_METHOD void SDNode::dumpr() const {
VisitedSDNodeSet once;
DumpNodesr(dbgs(), this, 0, nullptr, once);
}
-void SDNode::dumpr(const SelectionDAG *G) const {
+LLVM_DUMP_METHOD void SDNode::dumpr(const SelectionDAG *G) const {
VisitedSDNodeSet once;
DumpNodesr(dbgs(), this, 0, G, once);
}
+#endif
static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
const SelectionDAG *G, unsigned depth,
@@ -688,14 +695,17 @@ void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
printrWithDepth(OS, G, 10);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
printrWithDepth(dbgs(), G, depth);
}
-void SDNode::dumprFull(const SelectionDAG *G) const {
+LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const {
// Don't print impossibly deep things.
dumprWithDepth(G, 10);
}
+#endif
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
printr(OS, G);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 64e6c221229b2..e21204dbb9661 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,40 +11,65 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -59,6 +84,13 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -73,104 +105,6 @@ STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
STATISTIC(NumFastIselFailLowerArguments,
"Number of entry blocks where fast isel failed to lower arguments");
-#ifndef NDEBUG
-static cl::opt<bool>
-EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
- cl::desc("Enable extra verbose messages in the \"fast\" "
- "instruction selector"));
-
- // Terminators
-STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
-STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
-STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
-STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
-STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
-STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
-STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
-
- // Standard binary operators...
-STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
-STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
-STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
-STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
-STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
-STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
-STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
-STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
-STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
-STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
-STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
-STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
-
- // Logical operators...
-STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
-STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
-STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
-
- // Memory instructions...
-STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
-STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
-STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
-STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
-STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
-STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
-STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
-
- // Convert instructions...
-STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
-STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
-STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
-STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
-STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
-STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
-STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
-STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
-STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
-STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
-STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
-STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
-
- // Other instructions...
-STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
-STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
-STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
-STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
-STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
-STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
-STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
-STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
-STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
-STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
-STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
-STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
-STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
-STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
-STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
-
-// Intrinsic instructions...
-STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call");
-STATISTIC(NumFastIselFailSAddWithOverflow,
- "Fast isel fails on sadd.with.overflow");
-STATISTIC(NumFastIselFailUAddWithOverflow,
- "Fast isel fails on uadd.with.overflow");
-STATISTIC(NumFastIselFailSSubWithOverflow,
- "Fast isel fails on ssub.with.overflow");
-STATISTIC(NumFastIselFailUSubWithOverflow,
- "Fast isel fails on usub.with.overflow");
-STATISTIC(NumFastIselFailSMulWithOverflow,
- "Fast isel fails on smul.with.overflow");
-STATISTIC(NumFastIselFailUMulWithOverflow,
- "Fast isel fails on umul.with.overflow");
-STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress");
-STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call");
-STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call");
-STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call");
-#endif
-
-static cl::opt<bool>
-EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
- cl::desc("Enable verbose messages in the \"fast\" "
- "instruction selector"));
static cl::opt<int> EnableFastISelAbort(
"fast-isel-abort", cl::Hidden,
cl::desc("Enable abort calls when \"fast\" instruction selection "
@@ -179,6 +113,11 @@ static cl::opt<int> EnableFastISelAbort(
"abort for argument lowering, and 3 will never fallback "
"to SelectionDAG."));
+static cl::opt<bool> EnableFastISelFallbackReport(
+ "fast-isel-report-on-fallback", cl::Hidden,
+ cl::desc("Emit a diagnostic when \"fast\" instruction selection "
+ "falls back to SelectionDAG."));
+
static cl::opt<bool>
UseMBPI("use-mbpi",
cl::desc("use Machine Branch Probability Info"),
@@ -238,7 +177,7 @@ MachinePassRegistry RegisterScheduler::Registry;
///
//===---------------------------------------------------------------------===//
static cl::opt<RegisterScheduler::FunctionPassCtor, false,
- RegisterPassParser<RegisterScheduler> >
+ RegisterPassParser<RegisterScheduler>>
ISHeuristic("pre-RA-sched",
cl::init(&createDefaultScheduler), cl::Hidden,
cl::desc("Instruction schedulers available (before register"
@@ -249,6 +188,7 @@ defaultListDAGScheduler("default", "Best scheduler for the target",
createDefaultScheduler);
namespace llvm {
+
//===--------------------------------------------------------------------===//
/// \brief This class is used by SelectionDAGISel to temporarily override
/// the optimization level on a per-function basis.
@@ -318,6 +258,7 @@ namespace llvm {
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
}
+
} // end namespace llvm
// EmitInstrWithCustomInserter - This method should be implemented by targets
@@ -431,8 +372,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MachineFunctionProperties::Property::Selected))
return false;
// Do some sanity-checking on the command-line options.
- assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
- "-fast-isel-verbose requires -fast-isel");
assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
"-fast-isel-abort > 0 requires -fast-isel");
@@ -457,12 +396,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
+ ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn));
- CurDAG->init(*MF);
+ CurDAG->init(*MF, *ORE);
FuncInfo->set(Fn, *MF, CurDAG);
if (UseMBPI && OptLevel != CodeGenOpt::None)
@@ -502,6 +442,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TLI->initializeSplitCSR(EntryMBB);
SelectAllBasicBlocks(Fn);
+ if (FastISelFailed && EnableFastISelFallbackReport) {
+ DiagnosticInfoISelFallback DiagFallback(Fn);
+ Fn.getContext().diagnose(DiagFallback);
+ }
// If the first basic block in the function has live ins that need to be
// copied into vregs, emit the copies into the top of the block before
@@ -628,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
unsigned To = I->second;
// If To is also scheduled to be replaced, find what its ultimate
// replacement is.
- for (;;) {
+ while (true) {
DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
if (J == E) break;
To = J->second;
@@ -666,13 +610,30 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
return true;
}
+static void reportFastISelFailure(MachineFunction &MF,
+ OptimizationRemarkEmitter &ORE,
+ OptimizationRemarkMissed &R,
+ bool ShouldAbort) {
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || ShouldAbort)
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (ShouldAbort)
+ report_fatal_error(R.getMsg());
+
+ ORE.emit(R);
+}
+
void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
BasicBlock::const_iterator End,
bool &HadTailCall) {
// Lower the instructions. If a call is emitted as a tail call, cease emitting
// nodes for this block.
- for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
- SDB->visit(*I);
+ for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
+ if (!ElidedArgCopyInstrs.count(&*I))
+ SDB->visit(*I);
+ }
// Make sure the root of the DAG is up-to-date.
CurDAG->setRoot(SDB->getControlRoot());
@@ -731,6 +692,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
int BlockNumber = -1;
(void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
+
+ // Pre-type legalization allow creation of any node types.
+ CurDAG->NewNodesMustHaveLegalTypes = false;
+
#ifndef NDEBUG
MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
FilterDAGBasicBlockName ==
@@ -777,6 +742,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
+ // Only allow creation of legal node types.
CurDAG->NewNodesMustHaveLegalTypes = true;
if (Changed) {
@@ -802,12 +768,18 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
if (Changed) {
+ DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
{
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
GroupDescription, TimePassesIsEnabled);
CurDAG->LegalizeTypes();
}
+ DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -907,10 +879,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
namespace {
+
/// ISelUpdater - helper class to handle updates of the instruction selection
/// graph.
class ISelUpdater : public SelectionDAG::DAGUpdateListener {
SelectionDAG::allnodes_iterator &ISelPosition;
+
public:
ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
: SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
@@ -923,8 +897,53 @@ public:
++ISelPosition;
}
};
+
} // end anonymous namespace
+static bool isStrictFPOp(SDNode *Node, unsigned &NewOpc) {
+ unsigned OrigOpc = Node->getOpcode();
+ switch (OrigOpc) {
+ case ISD::STRICT_FADD: NewOpc = ISD::FADD; return true;
+ case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; return true;
+ case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; return true;
+ case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; return true;
+ case ISD::STRICT_FREM: NewOpc = ISD::FREM; return true;
+ default: return false;
+ }
+}
+
+SDNode* SelectionDAGISel::MutateStrictFPToFP(SDNode *Node, unsigned NewOpc) {
+ assert(((Node->getOpcode() == ISD::STRICT_FADD && NewOpc == ISD::FADD) ||
+ (Node->getOpcode() == ISD::STRICT_FSUB && NewOpc == ISD::FSUB) ||
+ (Node->getOpcode() == ISD::STRICT_FMUL && NewOpc == ISD::FMUL) ||
+ (Node->getOpcode() == ISD::STRICT_FDIV && NewOpc == ISD::FDIV) ||
+ (Node->getOpcode() == ISD::STRICT_FREM && NewOpc == ISD::FREM)) &&
+ "Unexpected StrictFP opcode!");
+
+ // We're taking this node out of the chain, so we need to re-link things.
+ SDValue InputChain = Node->getOperand(0);
+ SDValue OutputChain = SDValue(Node, 1);
+ CurDAG->ReplaceAllUsesOfValueWith(OutputChain, InputChain);
+
+ SDVTList VTs = CurDAG->getVTList(Node->getOperand(1).getValueType());
+ SDValue Ops[2] = { Node->getOperand(1), Node->getOperand(2) };
+ SDNode *Res = CurDAG->MorphNodeTo(Node, NewOpc, VTs, Ops);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ } else {
+ CurDAG->ReplaceAllUsesWith(Node, Res);
+ CurDAG->RemoveDeadNode(Node);
+ }
+
+ return Res;
+}
+
void SelectionDAGISel::DoInstructionSelection() {
DEBUG(dbgs() << "===== Instruction selection begins: BB#"
<< FuncInfo->MBB->getNumber()
@@ -960,7 +979,23 @@ void SelectionDAGISel::DoInstructionSelection() {
if (Node->use_empty())
continue;
+ // When we are using non-default rounding modes or FP exception behavior
+ // FP operations are represented by StrictFP pseudo-operations. They
+ // need to be simplified here so that the target-specific instruction
+ // selectors know how to handle them.
+ //
+ // If the current node is a strict FP pseudo-op, the isStrictFPOp()
+ // function will provide the corresponding normal FP opcode to which the
+ // node should be mutated.
+ unsigned NormalFPOpc = ISD::UNDEF;
+ bool IsStrictFPOp = isStrictFPOp(Node, NormalFPOpc);
+ if (IsStrictFPOp)
+ Node = MutateStrictFPToFP(Node, NormalFPOpc);
+
Select(Node);
+
+ // FIXME: Add code here to attach an implicit def and use of
+ // target-specific FP environment registers.
}
CurDAG->setRoot(Dummy.getValue());
@@ -1046,116 +1081,6 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
-#ifndef NDEBUG
-// Collect per Instruction statistics for fast-isel misses. Only those
-// instructions that cause the bail are accounted for. It does not account for
-// instructions higher in the block. Thus, summing the per instructions stats
-// will not add up to what is reported by NumFastIselFailures.
-static void collectFailStats(const Instruction *I) {
- switch (I->getOpcode()) {
- default: assert (0 && "<Invalid operator> ");
-
- // Terminators
- case Instruction::Ret: NumFastIselFailRet++; return;
- case Instruction::Br: NumFastIselFailBr++; return;
- case Instruction::Switch: NumFastIselFailSwitch++; return;
- case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
- case Instruction::Invoke: NumFastIselFailInvoke++; return;
- case Instruction::Resume: NumFastIselFailResume++; return;
- case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
-
- // Standard binary operators...
- case Instruction::Add: NumFastIselFailAdd++; return;
- case Instruction::FAdd: NumFastIselFailFAdd++; return;
- case Instruction::Sub: NumFastIselFailSub++; return;
- case Instruction::FSub: NumFastIselFailFSub++; return;
- case Instruction::Mul: NumFastIselFailMul++; return;
- case Instruction::FMul: NumFastIselFailFMul++; return;
- case Instruction::UDiv: NumFastIselFailUDiv++; return;
- case Instruction::SDiv: NumFastIselFailSDiv++; return;
- case Instruction::FDiv: NumFastIselFailFDiv++; return;
- case Instruction::URem: NumFastIselFailURem++; return;
- case Instruction::SRem: NumFastIselFailSRem++; return;
- case Instruction::FRem: NumFastIselFailFRem++; return;
-
- // Logical operators...
- case Instruction::And: NumFastIselFailAnd++; return;
- case Instruction::Or: NumFastIselFailOr++; return;
- case Instruction::Xor: NumFastIselFailXor++; return;
-
- // Memory instructions...
- case Instruction::Alloca: NumFastIselFailAlloca++; return;
- case Instruction::Load: NumFastIselFailLoad++; return;
- case Instruction::Store: NumFastIselFailStore++; return;
- case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
- case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
- case Instruction::Fence: NumFastIselFailFence++; return;
- case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
-
- // Convert instructions...
- case Instruction::Trunc: NumFastIselFailTrunc++; return;
- case Instruction::ZExt: NumFastIselFailZExt++; return;
- case Instruction::SExt: NumFastIselFailSExt++; return;
- case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
- case Instruction::FPExt: NumFastIselFailFPExt++; return;
- case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
- case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
- case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
- case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
- case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
- case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
- case Instruction::BitCast: NumFastIselFailBitCast++; return;
-
- // Other instructions...
- case Instruction::ICmp: NumFastIselFailICmp++; return;
- case Instruction::FCmp: NumFastIselFailFCmp++; return;
- case Instruction::PHI: NumFastIselFailPHI++; return;
- case Instruction::Select: NumFastIselFailSelect++; return;
- case Instruction::Call: {
- if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- NumFastIselFailIntrinsicCall++; return;
- case Intrinsic::sadd_with_overflow:
- NumFastIselFailSAddWithOverflow++; return;
- case Intrinsic::uadd_with_overflow:
- NumFastIselFailUAddWithOverflow++; return;
- case Intrinsic::ssub_with_overflow:
- NumFastIselFailSSubWithOverflow++; return;
- case Intrinsic::usub_with_overflow:
- NumFastIselFailUSubWithOverflow++; return;
- case Intrinsic::smul_with_overflow:
- NumFastIselFailSMulWithOverflow++; return;
- case Intrinsic::umul_with_overflow:
- NumFastIselFailUMulWithOverflow++; return;
- case Intrinsic::frameaddress:
- NumFastIselFailFrameaddress++; return;
- case Intrinsic::sqrt:
- NumFastIselFailSqrt++; return;
- case Intrinsic::experimental_stackmap:
- NumFastIselFailStackMap++; return;
- case Intrinsic::experimental_patchpoint_void: // fall-through
- case Intrinsic::experimental_patchpoint_i64:
- NumFastIselFailPatchPoint++; return;
- }
- }
- NumFastIselFailCall++;
- return;
- }
- case Instruction::Shl: NumFastIselFailShl++; return;
- case Instruction::LShr: NumFastIselFailLShr++; return;
- case Instruction::AShr: NumFastIselFailAShr++; return;
- case Instruction::VAArg: NumFastIselFailVAArg++; return;
- case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
- case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
- case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
- case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
- case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
- case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
- }
-}
-#endif // NDEBUG
-
/// Set up SwiftErrorVals by going through the function. If the function has
/// swifterror argument, it will be the first entry.
static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
@@ -1190,9 +1115,9 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
}
static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
+ FastISel *FastIS,
const TargetLowering *TLI,
const TargetInstrInfo *TII,
- const BasicBlock *LLVMBB,
SelectionDAGBuilder *SDB) {
if (!TLI->supportSwiftError())
return;
@@ -1202,22 +1127,27 @@ static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
if (FuncInfo->SwiftErrorVals.empty())
return;
- if (pred_begin(LLVMBB) == pred_end(LLVMBB)) {
- auto &DL = FuncInfo->MF->getDataLayout();
- auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
- // We will always generate a copy from the argument. It is always used at
- // least by the 'return' of the swifterror.
- if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
- continue;
- unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- // Assign Undef to Vreg. We construct MI directly to make sure it works
- // with FastISel.
- BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
- SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
- VReg);
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
- }
+ assert(FuncInfo->MBB == &*FuncInfo->MF->begin() &&
+ "expected to insert into entry block");
+ auto &DL = FuncInfo->MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
+ // We will always generate a copy from the argument. It is always used at
+ // least by the 'return' of the swifterror.
+ if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
+ continue;
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ // Assign Undef to Vreg. We construct MI directly to make sure it works
+ // with FastISel.
+ BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
+ SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+ VReg);
+
+ // Keep FastIS informed about the value we just inserted.
+ if (FastIS)
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+
+ FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
}
}
@@ -1340,6 +1270,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
}
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = nullptr;
if (TM.Options.EnableFastISel)
@@ -1347,12 +1278,53 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
setupSwiftErrorVals(Fn, TLI, FuncInfo);
- // Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
- for (ReversePostOrderTraversal<const Function*>::rpo_iterator
- I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
- const BasicBlock *LLVMBB = *I;
+ // Lower arguments up front. An RPO iteration always visits the entry block
+ // first.
+ assert(*RPOT.begin() == &Fn.getEntryBlock());
+ ++NumEntryBlocks;
+
+ // Set up FuncInfo for ISel. Entry blocks never have PHIs.
+ FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
+ FuncInfo->InsertPt = FuncInfo->MBB->begin();
+
+ if (!FastIS) {
+ LowerArguments(Fn);
+ } else {
+ // See if fast isel can lower the arguments.
+ FastIS->startNewBlock();
+ if (!FastIS->lowerArguments()) {
+ FastISelFailed = true;
+ // Fast isel failed to lower these arguments
+ ++NumFastIselFailLowerArguments;
+
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Fn.getSubprogram(),
+ &Fn.getEntryBlock());
+ R << "FastISel didn't lower all arguments: "
+ << ore::NV("Prototype", Fn.getType());
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 1);
+
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(nullptr);
+ }
+ createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB);
+
+ // Iterate over all basic blocks in the function.
+ for (const BasicBlock *LLVMBB : RPOT) {
if (OptLevel != CodeGenOpt::None) {
bool AllPredsVisited = true;
for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
@@ -1384,8 +1356,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
if (!FuncInfo->MBB)
continue; // Some blocks like catchpads have no code or MBB.
- FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
- createSwiftErrorEntriesInEntryBlock(FuncInfo, TLI, TII, LLVMBB, SDB);
+
+ // Insert new instructions after any phi or argument setup code.
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Setup an EH landing-pad block.
FuncInfo->ExceptionPointerVirtReg = 0;
@@ -1396,35 +1369,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
- FastIS->startNewBlock();
-
- // Emit code for any incoming arguments. This must happen before
- // beginning FastISel on the entry block.
- if (LLVMBB == &Fn.getEntryBlock()) {
- ++NumEntryBlocks;
-
- // Lower any arguments needed in this block if this is the entry block.
- if (!FastIS->lowerArguments()) {
- // Fast isel failed to lower these arguments
- ++NumFastIselFailLowerArguments;
- if (EnableFastISelAbort > 1)
- report_fatal_error("FastISel didn't lower all arguments");
-
- // Use SelectionDAG argument lowering
- LowerArguments(Fn);
- CurDAG->setRoot(SDB->getControlRoot());
- SDB->clear();
- CodeGenAndEmitDAG();
- }
-
- // If we inserted any instructions at the beginning, make a note of
- // where they are, so we can be sure to emit subsequent instructions
- // after them.
- if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
- FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
- else
- FastIS->setLastLocalValue(nullptr);
- }
+ if (LLVMBB != &Fn.getEntryBlock())
+ FastIS->startNewBlock();
unsigned NumFastIselRemaining = std::distance(Begin, End);
// Do FastISel on as many instructions as possible.
@@ -1432,7 +1378,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
const Instruction *Inst = &*std::prev(BI);
// If we no longer require this instruction, skip it.
- if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo) ||
+ ElidedArgCopyInstrs.count(Inst)) {
--NumFastIselRemaining;
continue;
}
@@ -1443,6 +1390,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->selectInstruction(Inst)) {
+ FastISelFailed = true;
--NumFastIselRemaining;
++NumFastIselSuccess;
// If fast isel succeeded, skip over all the folded instructions, and
@@ -1465,22 +1413,22 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
-#ifndef NDEBUG
- if (EnableFastISelVerbose2)
- collectFailStats(Inst);
-#endif
-
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(Inst)) {
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
+ R << "FastISel missed call";
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
- if (EnableFastISelVerbose || EnableFastISelAbort) {
- dbgs() << "FastISel missed call: ";
- Inst->dump();
+ R << ": " << InstStr.str();
}
- if (EnableFastISelAbort > 2)
- // FastISel selector couldn't handle something and bailed.
- // For the purpose of debugging, just abort.
- report_fatal_error("FastISel didn't select the entire block");
+
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 2);
if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
!Inst->use_empty()) {
@@ -1509,35 +1457,35 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
bool ShouldAbort = EnableFastISelAbort;
- if (EnableFastISelVerbose || EnableFastISelAbort) {
- if (isa<TerminatorInst>(Inst)) {
- // Use a different message for terminator misses.
- dbgs() << "FastISel missed terminator: ";
- // Don't abort unless for terminator unless the level is really high
- ShouldAbort = (EnableFastISelAbort > 2);
- } else {
- dbgs() << "FastISel miss: ";
- }
- Inst->dump();
+ if (isa<TerminatorInst>(Inst)) {
+ // Use a different message for terminator misses.
+ R << "FastISel missed terminator";
+ // Don't abort for terminator unless the level is really high
+ ShouldAbort = (EnableFastISelAbort > 2);
+ } else {
+ R << "FastISel missed";
+ }
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
+ R << ": " << InstStr.str();
}
- if (ShouldAbort)
- // FastISel selector couldn't handle something and bailed.
- // For the purpose of debugging, just abort.
- report_fatal_error("FastISel didn't select the entire block");
+
+ reportFastISelFailure(*MF, *ORE, R, ShouldAbort);
NumFastIselFailures += NumFastIselRemaining;
break;
}
FastIS->recomputeInsertPt();
- } else {
- // Lower any arguments needed in this block if this is the entry block.
- if (LLVMBB == &Fn.getEntryBlock()) {
- ++NumEntryBlocks;
- LowerArguments(Fn);
- }
}
+
if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) {
bool FunctionBasedInstrumentation =
TLI->getSSPStackGuardCheck(*Fn.getParent());
@@ -1556,10 +1504,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// block.
bool HadTailCall;
SelectBasicBlock(Begin, BI, HadTailCall);
+
+ // But if FastISel was run, we already selected some of the block.
+ // If we emitted a tail-call, we need to delete any previously emitted
+ // instruction that follows it.
+ if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
+ FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
}
FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
+ ElidedArgCopyInstrs.clear();
}
propagateSwiftErrorVRegs(FuncInfo);
@@ -2177,7 +2132,6 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
IgnoreChains = false;
}
-
SmallPtrSet<SDNode*, 16> Visited;
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
}
@@ -2554,7 +2508,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
// Accept if it is exactly the same as a previously recorded node.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
@@ -2564,9 +2518,9 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// CheckChildSame - Implements OP_CheckChildXSame.
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
- unsigned ChildNo) {
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes,
+ unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
return false; // Match fails if out of range child #.
return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
@@ -2688,7 +2642,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
unsigned Index, SDValue N,
bool &Result,
const SelectionDAGISel &SDISel,
- SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
switch (Table[Index++]) {
default:
Result = false;
@@ -2756,6 +2710,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
}
namespace {
+
struct MatchScope {
/// FailIndex - If this match fails, this is the index to continue with.
unsigned FailIndex;
@@ -2785,6 +2740,7 @@ class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
SDNode **NodeToMatch;
SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
SmallVectorImpl<MatchScope> &MatchScopes;
+
public:
MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
@@ -2816,6 +2772,7 @@ public:
J.setNode(E);
}
};
+
} // end anonymous namespace
void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
@@ -2921,7 +2878,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// with an OPC_SwitchOpcode instruction. Populate the table now, since this
// is the first time we're selecting an instruction.
unsigned Idx = 1;
- while (1) {
+ while (true) {
// Get the size of this case.
unsigned CaseSize = MatcherTable[Idx++];
if (CaseSize & 128)
@@ -2942,7 +2899,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
MatcherIndex = OpcodeOffset[N.getOpcode()];
}
- while (1) {
+ while (true) {
assert(MatcherIndex < TableSize && "Invalid index");
#ifndef NDEBUG
unsigned CurrentOpcodeIndex = MatcherIndex;
@@ -2957,7 +2914,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// immediately fail, don't even bother pushing a scope for them.
unsigned FailIndex;
- while (1) {
+ while (true) {
unsigned NumToSkip = MatcherTable[MatcherIndex++];
if (NumToSkip & 128)
NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
@@ -3118,7 +3075,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
unsigned CurNodeOpcode = N.getOpcode();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
- while (1) {
+ while (true) {
// Get the size of this case.
CaseSize = MatcherTable[MatcherIndex++];
if (CaseSize & 128)
@@ -3149,7 +3106,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
MVT CurNodeVT = N.getSimpleValueType();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
- while (1) {
+ while (true) {
// Get the size of this case.
CaseSize = MatcherTable[MatcherIndex++];
if (CaseSize & 128)
@@ -3215,7 +3172,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// a single use.
bool HasMultipleUses = false;
for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
- if (!NodeStack[i].hasOneUse()) {
+ if (!NodeStack[i].getNode()->hasOneUse()) {
HasMultipleUses = true;
break;
}
@@ -3381,6 +3338,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr));
continue;
}
+ case OPC_Coverage: {
+ // This is emitted right before MorphNode/EmitNode.
+ // So it should be safe to assume that this node has been selected
+ unsigned index = MatcherTable[MatcherIndex++];
+ index |= (MatcherTable[MatcherIndex++] << 8);
+ dbgs() << "COVERED: " << getPatternForIndex(index) << "\n";
+ dbgs() << "INCLUDED: " << getIncludePathForIndex(index) << "\n";
+ continue;
+ }
case OPC_EmitNode: case OPC_MorphNodeTo:
case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
@@ -3473,7 +3439,6 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
nullptr));
}
-
} else {
assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE &&
"NodeToMatch was removed partway through selection");
@@ -3610,7 +3575,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// find a case to check.
DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
++NumDAGIselRetries;
- while (1) {
+ while (true) {
if (MatchScopes.empty()) {
CannotYetSelect(NodeToMatch);
return;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 690f0d2c80821..2756e276c6a91 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -55,14 +55,15 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
- AttributeSet CallerAttrs = F->getAttributes();
- if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex)
- .removeAttribute(Attribute::NoAlias).hasAttributes())
+ AttributeList CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias)
+ .hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
return false;
// Check if the only use is a function return node.
@@ -96,19 +97,20 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
/// \brief Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
-void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
- unsigned AttrIdx) {
- isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
- isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
- isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
- isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
- isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
- isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
- isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
- isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
- isSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
- isSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
- Alignment = CS->getParamAlignment(AttrIdx);
+void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+ unsigned ArgIdx) {
+ IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt);
+ IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt);
+ IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg);
+ IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet);
+ IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest);
+ IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca);
+ IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
+ IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
+ IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
+ // FIXME: getParamAlignment is off by one from argument index.
+ Alignment = CS->getParamAlignment(ArgIdx + 1);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -125,8 +127,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
for (SDValue Op : Ops) {
Entry.Node = Op;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
- Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
Args.push_back(Entry);
}
@@ -138,10 +140,13 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
- .setSExtResult(signExtend).setZExtResult(!signExtend);
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setNoReturn(doesNotReturn)
+ .setDiscardResult(!isReturnValueUsed)
+ .setSExtResult(signExtend)
+ .setZExtResult(!signExtend);
return LowerCallTo(CLI);
}
@@ -334,34 +339,35 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// Check to see if the specified operand of the specified instruction is a
-/// constant integer. If so, check to see if there are any bits set in the
-/// constant that are not demanded. If so, shrink the constant and return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
- const APInt &Demanded) {
- SDLoc dl(Op);
+/// If the specified instruction has a constant integer operand and there are
+/// bits set in that constant that are not demanded, then clear those bits and
+/// return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(
+ SDValue Op, const APInt &Demanded) {
+ SDLoc DL(Op);
+ unsigned Opcode = Op.getOpcode();
// FIXME: ISD::SELECT, ISD::SELECT_CC
- switch (Op.getOpcode()) {
- default: break;
+ switch (Opcode) {
+ default:
+ break;
case ISD::XOR:
case ISD::AND:
case ISD::OR: {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!C) return false;
+ auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Op1C)
+ return false;
- if (Op.getOpcode() == ISD::XOR &&
- (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ // If this is a 'not' op, don't touch it because that's a canonical form.
+ const APInt &C = Op1C->getAPIntValue();
+ if (Opcode == ISD::XOR && (C | ~Demanded).isAllOnesValue())
return false;
- // if we can expand it to have all bits set, do it
- if (C->getAPIntValue().intersects(~Demanded)) {
+ if (C.intersects(~Demanded)) {
EVT VT = Op.getValueType();
- SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
- DAG.getConstant(Demanded &
- C->getAPIntValue(),
- dl, VT));
- return CombineTo(Op, New);
+ SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
+ SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+ return CombineTo(Op, NewOp);
}
break;
@@ -470,6 +476,21 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
return true;
}
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
+ DAGCombinerInfo &DCI) const {
+
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ APInt KnownZero, KnownOne;
+
+ bool Simplified = SimplifyDemandedBits(Op, DemandedMask, KnownZero, KnownOne,
+ TLO);
+ if (Simplified)
+ DCI.CommitTargetLoweringOpt(TLO);
+ return Simplified;
+}
+
/// Look at Op. At this point, we know that only the DemandedMask bits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -711,8 +732,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
}
- KnownZero = KnownZeroOut;
- KnownOne = KnownOneOut;
+ KnownZero = std::move(KnownZeroOut);
+ KnownOne = std::move(KnownOneOut);
break;
case ISD::SELECT:
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
@@ -750,6 +771,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // If (1) we only need the sign-bit, (2) the setcc operands are the same
+ // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+ // -1, we may be able to bypass the setcc.
+ if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth &&
+ getBooleanContents(Op.getValueType()) ==
+ BooleanContent::ZeroOrNegativeOneBooleanContent) {
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return TLO.CombineTo(Op, Op0);
+
+ // TODO: Should we check for other forms of sign-bit comparisons?
+ // Examples: X <= -1, X >= 0
+ }
+ if (getBooleanContents(Op0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero.setBitsFrom(1);
+ break;
+ }
case ISD::SHL:
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned ShAmt = SA->getZExtValue();
@@ -834,7 +882,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero <<= SA->getZExtValue();
KnownOne <<= SA->getZExtValue();
// low bits known zero.
- KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ KnownZero.setLowBits(SA->getZExtValue());
}
break;
case ISD::SRL:
@@ -853,7 +901,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
- InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ InDemandedMask.setLowBits(ShAmt);
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
@@ -884,8 +932,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
- KnownZero |= HighBits; // High bits known zero.
+ KnownZero.setHighBits(ShAmt); // High bits known zero.
}
break;
case ISD::SRA:
@@ -911,7 +958,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
- InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ InDemandedMask.setLowBits(ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
@@ -1075,7 +1122,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
- APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1);
APInt NewBits = ~InMask & NewMask;
// If none of the top bits are demanded, convert this into an any_extend.
@@ -1191,7 +1238,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return true;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero |= ~InMask & NewMask;
+ KnownZero |= ~InMask;
break;
}
case ISD::BITCAST:
@@ -1281,6 +1328,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
@@ -1295,6 +1343,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
/// This method can be implemented by targets that want to expose additional
/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ const APInt &,
const SelectionDAG &,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
@@ -2050,6 +2099,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETO || Cond == ISD::SETUO)
return DAG.getSetCC(dl, VT, N0, N0, Cond);
+ // setcc (fneg x), C -> setcc swap(pred) x, -C
+ if (N0.getOpcode() == ISD::FNEG) {
+ ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
+ SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
+ }
+ }
+
// If the condition is not legal, see if we can find an equivalent one
// which is legal.
if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
@@ -2470,10 +2529,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
// Figure out which register class contains this reg.
- for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
- E = RI->regclass_end(); RCI != E; ++RCI) {
- const TargetRegisterClass *RC = *RCI;
-
+ for (const TargetRegisterClass *RC : RI->regclasses()) {
// If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
if (!isLegalRC(RC))
@@ -2933,7 +2989,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -3808,7 +3864,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
- CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
+ CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 209bbe54ea236..ab578df4069d5 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -64,6 +64,7 @@ public:
private:
bool setupEntryBlockAndCallSites(Function &F);
+ bool undoSwiftErrorSelect(Function &F);
void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal);
Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads);
void lowerIncomingArguments(Function &F);
@@ -174,8 +175,8 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// because the value needs to be added to the global context list.
auto &DL = F.getParent()->getDataLayout();
unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
- FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
- &EntryBB->front());
+ FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(),
+ nullptr, Align, "fn_context", &EntryBB->front());
// Fill in the function context structure.
for (LandingPadInst *LPI : LPads) {
@@ -458,14 +459,33 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
return true;
}
+bool SjLjEHPrepare::undoSwiftErrorSelect(Function &F) {
+ // We have inserted dummy copies 'select true, arg, undef' in the entry block
+ // for arguments to simplify this pass.
+ // swifterror arguments cannot be used in this way. Undo the select for the
+ // swifterror argument.
+ for (auto &AI : F.args()) {
+ if (AI.isSwiftError()) {
+ assert(AI.hasOneUse() && "Must have converted the argument to a select");
+ auto *Select = dyn_cast<SelectInst>(AI.use_begin()->getUser());
+ assert(Select && "There must be single select user");
+ auto *OrigSwiftError = cast<Argument>(Select->getTrueValue());
+ Select->replaceAllUsesWith(OrigSwiftError);
+ Select->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+}
+
bool SjLjEHPrepare::runOnFunction(Function &F) {
Module &M = *F.getParent();
RegisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), nullptr);
+ PointerType::getUnqual(FunctionContextTy));
UnregisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), nullptr);
+ PointerType::getUnqual(FunctionContextTy));
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
@@ -476,5 +496,7 @@ bool SjLjEHPrepare::runOnFunction(Function &F) {
FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
bool Res = setupEntryBlockAndCallSites(F);
+ if (Res)
+ Res |= undoSwiftErrorSelect(F);
return Res;
}
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index dba103e9bfb15..bc2a1d09056bd 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -103,6 +103,48 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
return false;
}
+void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI) {
+ assert(!MI.isBundledWithPred() &&
+ "Use removeSingleMachineInstrFromMaps() instread");
+ Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
+ if (mi2iItr == mi2iMap.end())
+ return;
+
+ SlotIndex MIIndex = mi2iItr->second;
+ IndexListEntry &MIEntry = *MIIndex.listEntry();
+ assert(MIEntry.getInstr() == &MI && "Instruction indexes broken.");
+ mi2iMap.erase(mi2iItr);
+ // FIXME: Eventually we want to actually delete these indexes.
+ MIEntry.setInstr(nullptr);
+}
+
+void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
+ Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
+ if (mi2iItr == mi2iMap.end())
+ return;
+
+ SlotIndex MIIndex = mi2iItr->second;
+ IndexListEntry &MIEntry = *MIIndex.listEntry();
+ assert(MIEntry.getInstr() == &MI && "Instruction indexes broken.");
+ mi2iMap.erase(mi2iItr);
+
+ // When removing the first instruction of a bundle update mapping to next
+ // instruction.
+ if (MI.isBundledWithSucc()) {
+ // Only the first instruction of a bundle should have an index assigned.
+ assert(!MI.isBundledWithPred() && "Should have first bundle isntruction");
+
+ MachineBasicBlock::instr_iterator Next = std::next(MI.getIterator());
+ MachineInstr &NextMI = *Next;
+ MIEntry.setInstr(&NextMI);
+ mi2iMap.insert(std::make_pair(&NextMI, MIIndex));
+ return;
+ } else {
+ // FIXME: Eventually we want to actually delete these indexes.
+ MIEntry.setInstr(nullptr);
+ }
+}
+
void SlotIndexes::renumberIndexes() {
// Renumber updates the index of every element of the index list.
DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 1c6a84e539440..3a50aaa69985d 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -487,12 +488,126 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
VFP = ValueForcePair(nullptr, true);
}
+SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ bool FirstCopy = !Def.isValid();
+ MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc)
+ .addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy)
+ | getInternalReadRegState(!FirstCopy), SubIdx)
+ .addReg(FromReg, 0, SubIdx);
+
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ if (FirstCopy) {
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+ } else {
+ CopyMI->bundleWithPred();
+ }
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
+ DestLI.refineSubRanges(Allocator, LaneMask,
+ [Def, &Allocator](LiveInterval::SubRange& SR) {
+ SR.createDeadDef(Def, Allocator);
+ });
+ return Def;
+}
+
+SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
+ LaneBitmask LaneMask, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
+ // The full vreg is copied.
+ MachineInstr *CopyMI =
+ BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+ }
+
+ // Only a subset of lanes needs to be copied. The following is a simple
+ // heuristic to construct a sequence of COPYs. We could add a target
+ // specific callback if this turns out to be suboptimal.
+ LiveInterval &DestLI = LIS.getInterval(Edit->get(RegIdx));
+
+ // First pass: Try to find a perfectly matching subregister index. If none
+ // exists find the one covering the most lanemask bits.
+ SmallVector<unsigned, 8> PossibleIndexes;
+ unsigned BestIdx = 0;
+ unsigned BestCover = 0;
+ const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
+ assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
+ for (unsigned Idx = 1, E = TRI.getNumSubRegIndices(); Idx < E; ++Idx) {
+ // Is this index even compatible with the given class?
+ if (TRI.getSubClassWithSubReg(RC, Idx) != RC)
+ continue;
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LaneMask) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // The index must not cover any lanes outside \p LaneMask.
+ if ((SubRegMask & ~LaneMask).any())
+ continue;
+
+ unsigned PopCount = countPopulation(SubRegMask.getAsInteger());
+ PossibleIndexes.push_back(Idx);
+ if (PopCount > BestCover) {
+ BestCover = PopCount;
+ BestIdx = Idx;
+ }
+ }
+
+ // Abort if we cannot possibly implement the COPY with the given indexes.
+ if (BestIdx == 0)
+ report_fatal_error("Impossible to implement partial COPY");
+
+ SlotIndex Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore,
+ BestIdx, DestLI, Late, SlotIndex());
+
+ // Greedy heuristic: Keep iterating keeping the best covering subreg index
+ // each time.
+ LaneBitmask LanesLeft =
+ LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover));
+ while (LanesLeft.any()) {
+ unsigned BestIdx = 0;
+ int BestCover = INT_MIN;
+ for (unsigned Idx : PossibleIndexes) {
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LanesLeft) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // Try to cover as much of the remaining lanes as possible but
+ // as few of the already covered lanes as possible.
+ int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger())
+ - countPopulation((SubRegMask & ~LanesLeft).getAsInteger());
+ if (Cover > BestCover) {
+ BestCover = Cover;
+ BestIdx = Idx;
+ }
+ }
+
+ if (BestIdx == 0)
+ report_fatal_error("Impossible to implement partial COPY");
+
+ buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
+ DestLI, Late, Def);
+ LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx);
+ }
+
+ return Def;
+}
+
VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
VNInfo *ParentVNI,
SlotIndex UseIdx,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
- MachineInstr *CopyMI = nullptr;
SlotIndex Def;
LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
@@ -505,24 +620,29 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
+ unsigned Reg = LI->reg;
bool DidRemat = false;
if (OrigVNI) {
LiveRangeEdit::Remat RM(ParentVNI);
RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
- Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+ Def = Edit->rematerializeAt(MBB, I, Reg, RM, TRI, Late);
++NumRemats;
DidRemat = true;
}
}
if (!DidRemat) {
- // Can't remat, just insert a copy from parent.
- CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
- .addReg(Edit->getReg());
- Def = LIS.getSlotIndexes()
- ->insertMachineInstrInMaps(*CopyMI, Late)
- .getRegSlot();
+ LaneBitmask LaneMask;
+ if (LI->hasSubRanges()) {
+ LaneMask = LaneBitmask::getNone();
+ for (LiveInterval::SubRange &S : LI->subranges())
+ LaneMask |= S.LaneMask;
+ } else {
+ LaneMask = LaneBitmask::getAll();
+ }
+
++NumCopies;
+ Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
}
// Define the value in Reg.
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index a75738aaf4467..9d409e924a3d8 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -405,6 +405,17 @@ private:
/// deleteRematVictims - Delete defs that are dead after rematerializing.
void deleteRematVictims();
+ /// Add a copy instruction copying \p FromReg to \p ToReg before
+ /// \p InsertBefore. This can be invoked with a \p LaneMask which may make it
+ /// necessary to construct a sequence of copies to cover it exactly.
+ SlotIndex buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LaneMask,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ bool Late, unsigned RegIdx);
+
+ SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex PrevCopy);
+
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 89c4b574f17f4..f51d959a089aa 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -385,14 +384,13 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-#ifndef NDEBUG
-
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
const BitVector &BV) const {
- DEBUG(dbgs() << tag << " : { ");
+ dbgs() << tag << " : { ";
for (unsigned I = 0, E = BV.size(); I != E; ++I)
- DEBUG(dbgs() << BV.test(I) << " ");
- DEBUG(dbgs() << "}\n");
+ dbgs() << BV.test(I) << " ";
+ dbgs() << "}\n";
}
LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
@@ -408,20 +406,19 @@ LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
LLVM_DUMP_METHOD void StackColoring::dump() const {
for (MachineBasicBlock *MBB : depth_first(MF)) {
- DEBUG(dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
- << MBB->getName() << "]\n");
- DEBUG(dumpBB(MBB));
+ dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
+ << MBB->getName() << "]\n";
+ dumpBB(MBB);
}
}
LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
- DEBUG(dbgs() << "Interval[" << I << "]:\n");
- DEBUG(Intervals[I]->dump());
+ dbgs() << "Interval[" << I << "]:\n";
+ Intervals[I]->dump();
}
}
-
-#endif // not NDEBUG
+#endif
static inline int getStartOrEndSlot(const MachineInstr &MI)
{
@@ -570,9 +567,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
// Step 2: compute begin/end sets for each block
- // NOTE: We use a reverse-post-order iteration to ensure that we obtain a
- // deterministic numbering, and because we'll need a post-order iteration
- // later for solving the liveness dataflow problem.
+ // NOTE: We use a depth-first iteration to ensure that we obtain a
+ // deterministic numbering.
for (MachineBasicBlock *MBB : depth_first(MF)) {
// Assign a serial number to this basic block.
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 9b7dd400fc92e..1a8ec5bff3229 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -1,4 +1,4 @@
-//===---------------------------- StackMaps.cpp ---------------------------===//
+//===- StackMaps.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,23 +7,34 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <iterator>
+#include <utility>
using namespace llvm;
@@ -276,7 +287,8 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
}
LiveOuts.erase(
- remove_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; }),
+ llvm::remove_if(LiveOuts,
+ [](const LiveOutReg &LO) { return LO.Reg == 0; }),
LiveOuts.end());
return LiveOuts;
@@ -286,7 +298,6 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
bool recordResult) {
-
MCContext &OutContext = AP.OutStreamer->getContext();
MCSymbol *MILabel = OutContext.createTempSymbol();
AP.OutStreamer->EmitLabel(MILabel);
@@ -378,6 +389,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
}
#endif
}
+
void StackMaps::recordStatepoint(const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index c2c010a29d44b..a8aafe78748dc 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -1,4 +1,4 @@
-//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//===- StackProtector.cpp - Stack Protector Insertion ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,30 +14,38 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/StackProtector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <cstdlib>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "stack-protector"
@@ -51,7 +59,7 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
char StackProtector::ID = 0;
INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors",
- false, true)
+ false, true)
FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
return new StackProtector(TM);
@@ -222,7 +230,16 @@ bool StackProtector::RequiresStackProtector() {
if (F->hasFnAttribute(Attribute::SafeStack))
return false;
+ // We are constructing the OptimizationRemarkEmitter on the fly rather than
+ // using the analysis pass to avoid building DominatorTree and LoopInfo which
+ // are not available this late in the IR pipeline.
+ OptimizationRemarkEmitter ORE(F);
+
if (F->hasFnAttribute(Attribute::StackProtectReq)) {
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a function attribute or command-line switch");
NeedsProtector = true;
Strong = true; // Use the same heuristic as strong to determine SSPLayout
} else if (F->hasFnAttribute(Attribute::StackProtectStrong))
@@ -236,20 +253,29 @@ bool StackProtector::RequiresStackProtector() {
for (const Instruction &I : BB) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (AI->isArrayAllocation()) {
+ OptimizationRemark Remark(DEBUG_TYPE, "StackProtectorAllocaOrArray",
+ &I);
+ Remark
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a call to alloca or use of a variable length array";
if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
// stack protectors.
Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ ORE.emit(Remark);
NeedsProtector = true;
} else if (Strong) {
// Require protectors for all alloca calls in strong mode.
Layout.insert(std::make_pair(AI, SSPLK_SmallArray));
+ ORE.emit(Remark);
NeedsProtector = true;
}
} else {
// A call to alloca with a variable size requires protectors.
Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ ORE.emit(Remark);
NeedsProtector = true;
}
continue;
@@ -259,6 +285,11 @@ bool StackProtector::RequiresStackProtector() {
if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray
: SSPLK_SmallArray));
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a stack allocated buffer or struct containing a "
+ "buffer");
NeedsProtector = true;
continue;
}
@@ -266,6 +297,11 @@ bool StackProtector::RequiresStackProtector() {
if (Strong && HasAddressTaken(AI)) {
++NumAddrTaken;
Layout.insert(std::make_pair(AI, SSPLK_AddrOf));
+ ORE.emit(
+ OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to the address of a local variable being taken");
NeedsProtector = true;
}
}
@@ -448,13 +484,13 @@ BasicBlock *StackProtector::CreateFailBB() {
Constant *StackChkFail =
M->getOrInsertFunction("__stack_smash_handler",
Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context), nullptr);
+ Type::getInt8PtrTy(Context));
B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
Constant *StackChkFail =
- M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context),
- nullptr);
+ M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context));
+
B.CreateCall(StackChkFail, {});
}
B.CreateUnreachable();
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index 7709236bbaa83..d2414200e9d57 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -725,6 +725,7 @@ bool TailDuplicator::duplicateSimpleBB(
if (PredTBB == NextBB && PredFBB == nullptr)
PredTBB = nullptr;
+ auto DL = PredBB->findBranchDebugLoc();
TII->removeBranch(*PredBB);
if (!PredBB->isSuccessor(NewTarget))
@@ -735,7 +736,7 @@ bool TailDuplicator::duplicateSimpleBB(
}
if (PredTBB)
- TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+ TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL);
TDBBs.push_back(PredBB);
}
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f082add8c7dd5..e5def6752e071 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -73,7 +73,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
return;
// Get the callee saved register list...
- const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
// Early exit if there are no callee saved registers.
if (!CSRegs || CSRegs[0] == 0)
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 01f91b96b58a7..711144a347430 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -470,7 +470,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
// No need to fold return, the meta data, and function arguments
for (unsigned i = 0; i < StartIdx; ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
MachineOperand &MO = MI.getOperand(i);
@@ -490,7 +490,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
MIB.addImm(SpillOffset);
}
else
- MIB.addOperand(MO);
+ MIB.add(MO);
}
return NewMI;
}
@@ -941,12 +941,10 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
- if (MI.getOpcode() != FrameSetupOpcode &&
- MI.getOpcode() != FrameDestroyOpcode)
+ if (!isFrameInstr(MI))
return 0;
- int SPAdj = MI.getOperand(0).getImm();
- SPAdj = TFI->alignSPAdjust(SPAdj);
+ int SPAdj = TFI->alignSPAdjust(getFrameSize(MI));
if ((!StackGrowsDown && MI.getOpcode() == FrameSetupOpcode) ||
(StackGrowsDown && MI.getOpcode() == FrameDestroyOpcode))
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 003311b157fc2..27630a3055cb3 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -838,7 +838,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
- MaskAndBranchFoldingIsLegal = false;
EnableExtLdPromotion = false;
HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
@@ -851,7 +850,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MinFunctionAlignment = 0;
PrefFunctionAlignment = 0;
PrefLoopAlignment = 0;
- GatherAllAliasesMaxDepth = 6;
+ GatherAllAliasesMaxDepth = 18;
MinStackArgumentAlignment = 1;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
@@ -901,6 +900,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
+ setOperationAction(ISD::ABS, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -1227,7 +1227,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// Copy operands before the frame-index.
for (unsigned i = 0; i < OperIdx; ++i)
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
// Add frame index operands recognized by stackmaps.cpp
if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
// indirect-mem-ref tag, size, #FI, offset.
@@ -1237,18 +1237,18 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
MIB.addImm(StackMaps::IndirectMemRefOp);
MIB.addImm(MFI.getObjectSize(FI));
- MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.add(MI->getOperand(OperIdx));
MIB.addImm(0);
} else {
// direct-mem-ref tag, #FI, offset.
// Used by patchpoint, and direct alloca arguments to statepoints
MIB.addImm(StackMaps::DirectMemRefOp);
- MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.add(MI->getOperand(OperIdx));
MIB.addImm(0);
}
// Copy the operands after the frame index.
for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i)
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
// Inherit previous memory operands.
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -1589,7 +1589,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
+void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI, const DataLayout &DL) {
SmallVector<EVT, 4> ValueVTs;
@@ -1601,9 +1601,9 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
EVT VT = ValueVTs[j];
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
// FIXME: C calling convention requires the return type to be promoted to
@@ -1621,13 +1621,13 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
+ if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg))
Flags.setInReg();
// Propagate extension type if any
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
Flags.setSExt();
- else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
@@ -1818,7 +1818,7 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
- StackPtrTy->getPointerTo(0), nullptr);
+ StackPtrTy->getPointerTo(0));
return IRB.CreateCall(Fn);
}
@@ -1918,11 +1918,7 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
/// override the target defaults.
static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
const Function *F = MF.getFunction();
- StringRef RecipAttrName = "reciprocal-estimates";
- if (!F->hasFnAttribute(RecipAttrName))
- return StringRef();
-
- return F->getFnAttribute(RecipAttrName).getValueAsString();
+ return F->getFnAttribute("reciprocal-estimates").getValueAsString();
}
/// Construct a string for the given reciprocal operation of the given type.
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index eb2a28f574a54..34892680aceb3 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//===- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,36 +12,52 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <string>
+
using namespace llvm;
using namespace dwarf;
@@ -53,10 +69,10 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
const GlobalValue *GV, const TargetMachine &TM,
MachineModuleInfo *MMI) const {
unsigned Encoding = getPersonalityEncoding();
- if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
+ if ((Encoding & 0x80) == DW_EH_PE_indirect)
return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
TM.getSymbol(GV)->getName());
- if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
+ if ((Encoding & 0x70) == DW_EH_PE_absptr)
return TM.getSymbol(GV);
report_fatal_error("We do not support this DWARF encoding yet!");
}
@@ -86,8 +102,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
-
- if (Encoding & dwarf::DW_EH_PE_indirect) {
+ if (Encoding & DW_EH_PE_indirect) {
MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM);
@@ -102,7 +117,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
return TargetLoweringObjectFile::
getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
- Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ Encoding & ~DW_EH_PE_indirect, Streamer);
}
return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
@@ -117,8 +132,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
// section(".eh_frame") gcc will produce:
//
// .section .eh_frame,"a",@progbits
-
- if (Name == getInstrProfCoverageSectionName(false))
+
+ if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF,
+ /*AddSegmentInfo=*/false))
return SectionKind::getMetadata();
if (Name.empty() || Name[0] != '.') return K;
@@ -149,7 +165,6 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
return K;
}
-
static unsigned getELFSectionType(StringRef Name, SectionKind K) {
// Use SHT_NOTE for section whose name starts with ".note" to allow
// emitting ELF notes from C variable declaration.
@@ -211,6 +226,20 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
return C;
}
+static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
+ const TargetMachine &TM) {
+ MDNode *MD = GO->getMetadata(LLVMContext::MD_associated);
+ if (!MD)
+ return nullptr;
+
+ auto *VM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+ if (!VM)
+ report_fatal_error("MD_associated operand is not ValueAsMetadata");
+
+ GlobalObject *OtherGO = dyn_cast<GlobalObject>(VM->getValue());
+ return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr;
+}
+
MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
StringRef SectionName = GO->getSection();
@@ -224,9 +253,23 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
Group = C->getName();
Flags |= ELF::SHF_GROUP;
}
- return getContext().getELFSection(SectionName,
- getELFSectionType(SectionName, Kind), Flags,
- /*EntrySize=*/0, Group);
+
+ // A section can have at most one associated section. Put each global with
+ // MD_associated in a unique section.
+ unsigned UniqueID = MCContext::GenericSectionID;
+ const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM);
+ if (AssociatedSymbol) {
+ UniqueID = NextUniqueID++;
+ Flags |= ELF::SHF_LINK_ORDER;
+ }
+
+ MCSectionELF *Section = getContext().getELFSection(
+ SectionName, getELFSectionType(SectionName, Kind), Flags,
+ /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol);
+ // Make sure that we did not get some other section with incompatible sh_link.
+ // This should not be possible due to UniqueID code above.
+ assert(Section->getAssociatedSymbol() == AssociatedSymbol);
+ return Section;
}
/// Return the section prefix name used by options FunctionsSections and
@@ -248,11 +291,10 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro";
}
-static MCSectionELF *
-selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
- SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM, bool EmitUniqueSection,
- unsigned Flags, unsigned *NextUniqueID) {
+static MCSectionELF *selectELFSectionForGlobal(
+ MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
+ unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) {
unsigned EntrySize = 0;
if (Kind.isMergeableCString()) {
if (Kind.isMergeable2ByteCString()) {
@@ -319,7 +361,7 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
if (Kind.isExecuteOnly())
UniqueID = 0;
return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
- EntrySize, Group, UniqueID);
+ EntrySize, Group, UniqueID, AssociatedSymbol);
}
MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
@@ -337,8 +379,17 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
}
EmitUniqueSection |= GO->hasComdat();
- return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
- EmitUniqueSection, Flags, &NextUniqueID);
+ const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM);
+ if (AssociatedSymbol) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_LINK_ORDER;
+ }
+
+ MCSectionELF *Section = selectELFSectionForGlobal(
+ getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, Flags,
+ &NextUniqueID, AssociatedSymbol);
+ assert(Section->getAssociatedSymbol() == AssociatedSymbol);
+ return Section;
}
MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
@@ -351,8 +402,9 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
return ReadOnlySection;
return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
- getMangler(), TM, EmitUniqueSection, ELF::SHF_ALLOC,
- &NextUniqueID);
+ getMangler(), TM, EmitUniqueSection,
+ ELF::SHF_ALLOC, &NextUniqueID,
+ /* AssociatedSymbol */ nullptr);
}
bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
@@ -723,7 +775,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
return TargetLoweringObjectFile::
getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
- Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ Encoding & ~DW_EH_PE_indirect, Streamer);
}
return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
@@ -1122,33 +1174,110 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
raw_ostream &OS, const GlobalValue *GV) const {
- if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
- return;
+ emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler());
+}
- const Triple &TT = getTargetTriple();
+//===----------------------------------------------------------------------===//
+// Wasm
+//===----------------------------------------------------------------------===//
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << " /EXPORT:";
- else
- OS << " -export:";
-
- if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
- std::string Flag;
- raw_string_ostream FlagOS(Flag);
- getMangler().getNameWithPrefix(FlagOS, GV, false);
- FlagOS.flush();
- if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
- OS << Flag.substr(1);
- else
- OS << Flag;
- } else {
- getMangler().getNameWithPrefix(OS, GV, false);
+static const Comdat *getWasmComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return nullptr;
+
+ if (C->getSelectionKind() != Comdat::Any)
+ report_fatal_error("Wasm COMDATs only support SelectionKind::Any, '" +
+ C->getName() + "' cannot be lowered.");
+
+ return C;
+}
+
+MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ llvm_unreachable("getExplicitSectionGlobal not yet implemented");
+ return nullptr;
+}
+
+static MCSectionWasm *
+selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection,
+ unsigned Flags, unsigned *NextUniqueID) {
+ StringRef Group = "";
+ if (getWasmComdat(GO))
+ llvm_unreachable("comdat not yet supported for wasm");
+
+ bool UniqueSectionNames = TM.getUniqueSectionNames();
+ SmallString<128> Name = getSectionPrefixForGlobal(Kind);
+
+ if (const auto *F = dyn_cast<Function>(GO)) {
+ const auto &OptionalPrefix = F->getSectionPrefix();
+ if (OptionalPrefix)
+ Name += *OptionalPrefix;
}
- if (!GV->getValueType()->isFunctionTy()) {
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << ",DATA";
- else
- OS << ",data";
+ if (EmitUniqueSection && UniqueSectionNames) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GO, Mang, true);
+ }
+ unsigned UniqueID = MCContext::GenericSectionID;
+ if (EmitUniqueSection && !UniqueSectionNames) {
+ UniqueID = *NextUniqueID;
+ (*NextUniqueID)++;
}
+ return Ctx.getWasmSection(Name, /*Type=*/0, Flags,
+ Group, UniqueID);
+}
+
+MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+
+ if (Kind.isCommon())
+ report_fatal_error("mergable sections not supported yet on wasm");
+
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniqueSection = false;
+ if (Kind.isText())
+ EmitUniqueSection = TM.getFunctionSections();
+ else
+ EmitUniqueSection = TM.getDataSections();
+ EmitUniqueSection |= GO->hasComdat();
+
+ return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
+ EmitUniqueSection, /*Flags=*/0,
+ &NextUniqueID);
+}
+
+bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
+}
+
+const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ // We may only use a PLT-relative relocation to refer to unnamed_addr
+ // functions.
+ if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
+ return nullptr;
+
+ // Basic sanity checks.
+ if (LHS->getType()->getPointerAddressSpace() != 0 ||
+ RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
+ RHS->isThreadLocal())
+ return nullptr;
+
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(TM.getSymbol(LHS), MCSymbolRefExpr::VK_None,
+ getContext()),
+ MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
+}
+
+void
+TargetLoweringObjectFileWasm::InitializeWasm() {
+ // TODO: Initialize StaticCtorSection and StaticDtorSection.
}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index b6da8e0aa60db..c20d5ab814f82 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -34,14 +34,6 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
return false;
}
-/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
-/// is specified on the command line. When this flag is off(default), the
-/// code generator is not allowed to generate mad (multiply add) if the
-/// result is "less precise" than doing those operations individually.
-bool TargetOptions::LessPreciseFPMAD() const {
- return UnsafeFPMath || LessPreciseFPMADOption;
-}
-
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
bool TargetOptions::HonorSignDependentRoundingFPMath() const {
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index e7ea2b4563f96..150195f5f85bc 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -92,6 +92,9 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(false),
cl::ZeroOrMore);
+static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner",
+ cl::Hidden,
+ cl::desc("Enable machine outliner"));
static cl::opt<std::string>
PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
@@ -261,7 +264,8 @@ TargetPassConfig::~TargetPassConfig() {
TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
: ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
- DisableVerify(false), EnableTailMerge(true) {
+ DisableVerify(false), EnableTailMerge(true),
+ RequireCodeGenSCCOrder(false) {
Impl = new PassConfigImpl();
@@ -279,6 +283,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
if (StringRef(PrintMachineInstrs.getValue()).equals(""))
TM->Options.PrintMachineCode = true;
+
+ if (TM->Options.EnableIPRA)
+ setRequiresCodeGenSCCOrder();
}
CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
@@ -531,7 +538,7 @@ void TargetPassConfig::addISelPrepare() {
addPreISel();
// Force codegen to run according to the callgraph.
- if (TM->Options.EnableIPRA)
+ if (requiresCodeGenSCCOrder())
addPass(new DummyCGSCCPass);
// Add both the safe stack and the stack protection passes: each of them will
@@ -668,9 +675,15 @@ void TargetPassConfig::addMachinePasses() {
addPass(&StackMapLivenessID, false);
addPass(&LiveDebugValuesID, false);
+ // Insert before XRay Instrumentation.
+ addPass(&FEntryInserterID, false);
+
addPass(&XRayInstrumentationID, false);
addPass(&PatchableFunctionID, false);
+ if (EnableMachineOutliner)
+ PM->add(createMachineOutlinerPass());
+
AddingMachinePasses = false;
}
@@ -704,6 +717,10 @@ void TargetPassConfig::addMachineSSAOptimization() {
addPass(&MachineLICMID, false);
addPass(&MachineCSEID, false);
+
+ // Coalesce basic blocks with the same branch condition
+ addPass(&BranchCoalescingID);
+
addPass(&MachineSinkingID);
addPass(&PeepholeOptimizerID);
@@ -730,7 +747,7 @@ MachinePassRegistry RegisterRegAlloc::Registry;
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
-LLVM_DEFINE_ONCE_FLAG(InitializeDefaultRegisterAllocatorFlag);
+static llvm::once_flag InitializeDefaultRegisterAllocatorFlag;
static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
static RegisterRegAlloc
defaultRegAlloc("default",
@@ -903,6 +920,11 @@ void TargetPassConfig::addBlockPlacement() {
//===---------------------------------------------------------------------===//
/// GlobalISel Configuration
//===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::isGlobalISelEnabled() const {
+ return false;
+}
+
bool TargetPassConfig::isGlobalISelAbortEnabled() const {
return EnableGlobalISelAbort == 1;
}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index cd50c5b6571dc..66cdad278e8da 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -155,8 +155,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
// Pick the most sub register class of the right type that contains
// this physreg.
const TargetRegisterClass* BestRC = nullptr;
- for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
- const TargetRegisterClass* RC = *I;
+ for (const TargetRegisterClass* RC : regclasses()) {
if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
(!BestRC || BestRC->hasSubClass(RC)))
BestRC = RC;
@@ -185,10 +184,9 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
if (SubClass)
getAllocatableSetForRC(MF, SubClass, Allocatable);
} else {
- for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
- E = regclass_end(); I != E; ++I)
- if ((*I)->isAllocatable())
- getAllocatableSetForRC(MF, *I, Allocatable);
+ for (const TargetRegisterClass *C : regclasses())
+ if (C->isAllocatable())
+ getAllocatableSetForRC(MF, C, Allocatable);
}
// Mask out the reserved registers
@@ -415,9 +413,9 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void
-TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
- const TargetRegisterInfo *TRI) {
+LLVM_DUMP_METHOD
+void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
+ const TargetRegisterInfo *TRI) {
dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n";
}
#endif
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 83e52d3353548..0df34ce43112a 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===//
+//===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,22 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -37,13 +47,14 @@ bool TargetSchedModel::hasInstrItineraries() const {
static unsigned gcd(unsigned Dividend, unsigned Divisor) {
// Dividend and Divisor will be naturally swapped as needed.
- while(Divisor) {
+ while (Divisor) {
unsigned Rem = Dividend % Divisor;
Dividend = Divisor;
Divisor = Rem;
};
return Dividend;
}
+
static unsigned lcm(unsigned A, unsigned B) {
unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
assert((LCM >= A && LCM >= B) && "LCM overflow");
@@ -73,6 +84,29 @@ void TargetSchedModel::init(const MCSchedModel &sm,
}
}
+/// Returns true only if instruction is specified as single issue.
+bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->BeginGroup;
+ }
+ return false;
+}
+
+bool TargetSchedModel::mustEndGroup(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->EndGroup;
+ }
+ return false;
+}
+
unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
const MCSchedClassDesc *SC) const {
if (hasInstrItineraries()) {
@@ -100,7 +134,6 @@ static unsigned capLatency(int Cycles) {
/// evaluation of predicates that depend on instruction operands or flags.
const MCSchedClassDesc *TargetSchedModel::
resolveSchedClass(const MachineInstr *MI) const {
-
// Get the definition's scheduling class descriptor from this machine model.
unsigned SchedClass = MI->getDesc().getSchedClass();
const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
@@ -244,7 +277,11 @@ unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
if (SCDesc->isValid() && !SCDesc->isVariant())
return computeInstrLatency(*SCDesc);
- llvm_unreachable("No MI sched latency");
+ if (SCDesc->isValid()) {
+ assert (!SCDesc->isVariant() && "No MI sched latency: SCDesc->isVariant()");
+ return computeInstrLatency(*SCDesc);
+ }
+ return 0;
}
unsigned
@@ -298,3 +335,68 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
}
return 0;
}
+
+static Optional<double>
+getRTroughputFromItineraries(unsigned schedClass,
+ const InstrItineraryData *IID){
+ double Unknown = std::numeric_limits<double>::infinity();
+ double Throughput = Unknown;
+
+ for (const InstrStage *IS = IID->beginStage(schedClass),
+ *E = IID->endStage(schedClass);
+ IS != E; ++IS) {
+ unsigned Cycles = IS->getCycles();
+ if (!Cycles)
+ continue;
+ Throughput =
+ std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles);
+ }
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput;
+}
+
+static Optional<double>
+getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
+ const TargetSubtargetInfo *STI,
+ const MCSchedModel &SchedModel) {
+ double Unknown = std::numeric_limits<double>::infinity();
+ double Throughput = Unknown;
+
+ for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc),
+ *WEnd = STI->getWriteProcResEnd(SCDesc);
+ WPR != WEnd; ++WPR) {
+ unsigned Cycles = WPR->Cycles;
+ if (!Cycles)
+ return Optional<double>();
+
+ unsigned NumUnits =
+ SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
+ Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles);
+ }
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput;
+}
+
+Optional<double>
+TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const {
+ if (hasInstrItineraries())
+ return getRTroughputFromItineraries(MI->getDesc().getSchedClass(),
+ getInstrItineraries());
+ if (hasInstrSchedModel())
+ return getRTroughputFromInstrSchedModel(resolveSchedClass(MI), STI,
+ SchedModel);
+ return Optional<double>();
+}
+
+Optional<double>
+TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const {
+ unsigned SchedClass = TII->get(Opcode).getSchedClass();
+ if (hasInstrItineraries())
+ return getRTroughputFromItineraries(SchedClass, getInstrItineraries());
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ if (SCDesc->isValid() && !SCDesc->isVariant())
+ return getRTroughputFromInstrSchedModel(SCDesc, STI, SchedModel);
+ }
+ return Optional<double>();
+}
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index c74707d95b9e7..0a444e0fff07e 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -52,3 +55,46 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
bool TargetSubtargetInfo::useAA() const {
return false;
}
+
+static std::string createSchedInfoStr(unsigned Latency,
+ Optional<double> RThroughput) {
+ static const char *SchedPrefix = " sched: [";
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ if (Latency > 0 && RThroughput.hasValue())
+ CS << SchedPrefix << Latency << format(":%2.2f", RThroughput.getValue())
+ << "]";
+ else if (Latency > 0)
+ CS << SchedPrefix << Latency << ":?]";
+ else if (RThroughput.hasValue())
+ CS << SchedPrefix << "?:" << RThroughput.getValue() << "]";
+ CS.flush();
+ return Comment;
+}
+
+/// Returns string representation of scheduler comment
+std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
+ if (MI.isPseudo() || MI.isTerminator())
+ return std::string();
+ // We don't cache TSchedModel because it depends on TargetInstrInfo
+ // that could be changed during the compilation
+ TargetSchedModel TSchedModel;
+ TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ unsigned Latency = TSchedModel.computeInstrLatency(&MI);
+ Optional<double> RThroughput = TSchedModel.computeInstrRThroughput(&MI);
+ return createSchedInfoStr(Latency, RThroughput);
+}
+
+/// Returns string representation of scheduler comment
+std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
+ // We don't cache TSchedModel because it depends on TargetInstrInfo
+ // that could be changed during the compilation
+ TargetSchedModel TSchedModel;
+ TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ if (!TSchedModel.hasInstrSchedModel())
+ return std::string();
+ unsigned Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
+ Optional<double> RThroughput =
+ TSchedModel.computeInstrRThroughput(MCI.getOpcode());
+ return createSchedInfoStr(Latency, RThroughput);
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0f1b2ed994b7e..75359fe3c0ea6 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -905,7 +905,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
++End;
}
- // Check if the reschedule will not break depedencies.
+ // Check if the reschedule will not break dependencies.
unsigned NumVisited = 0;
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
@@ -1785,7 +1785,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineInstr *CopyMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
TII->get(TargetOpcode::COPY))
.addReg(DstReg, RegState::Define, SubIdx)
- .addOperand(UseMO);
+ .add(UseMO);
// The first def needs an <undef> flag because there is no live register
// before it.
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 0d506d6466598..c8946010e9d15 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -167,6 +167,7 @@ class VirtRegRewriter : public MachineFunctionPass {
bool readsUndefSubreg(const MachineOperand &MO) const;
void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
+ void expandCopyBundle(MachineInstr &MI) const;
public:
static char ID;
@@ -367,11 +368,41 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
}
if (Indexes)
- Indexes->removeMachineInstrFromMaps(MI);
- MI.eraseFromParent();
+ Indexes->removeSingleMachineInstrFromMaps(MI);
+ MI.eraseFromBundle();
DEBUG(dbgs() << " deleted.\n");
}
+/// The liverange splitting logic sometimes produces bundles of copies when
+/// subregisters are involved. Expand these into a sequence of copy instructions
+/// after processing the last in the bundle. Does not update LiveIntervals
+/// which we shouldn't need for this instruction anymore.
+void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
+ if (!MI.isCopy())
+ return;
+
+ if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
+ // Only do this when the complete bundle is made out of COPYs.
+ MachineBasicBlock &MBB = *MI.getParent();
+ for (MachineBasicBlock::reverse_instr_iterator I =
+ std::next(MI.getReverseIterator()), E = MBB.instr_rend();
+ I != E && I->isBundledWithSucc(); ++I) {
+ if (!I->isCopy())
+ return;
+ }
+
+ for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator();
+ I->isBundledWithPred(); ) {
+ MachineInstr &MI = *I;
+ ++I;
+
+ MI.unbundleFromPred();
+ if (Indexes)
+ Indexes->insertMachineInstrInMaps(MI);
+ }
+ }
+}
+
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<unsigned, 8> SuperDeads;
@@ -431,12 +462,14 @@ void VirtRegRewriter::rewrite() {
}
}
- // The <def,undef> flag only makes sense for sub-register defs, and
- // we are substituting a full physreg. An <imp-use,kill> operand
- // from the SuperKills list will represent the partial read of the
- // super-register.
- if (MO.isDef())
+ // The <def,undef> and <def,internal> flags only make sense for
+ // sub-register defs, and we are substituting a full physreg. An
+ // <imp-use,kill> operand from the SuperKills list will represent the
+ // partial read of the super-register.
+ if (MO.isDef()) {
MO.setIsUndef(false);
+ MO.setIsInternalRead(false);
+ }
// PhysReg operands cannot have subregister indexes.
PhysReg = TRI->getSubReg(PhysReg, SubReg);
@@ -461,6 +494,8 @@ void VirtRegRewriter::rewrite() {
DEBUG(dbgs() << "> " << *MI);
+ expandCopyBundle(*MI);
+
// We can remove identity copies right now.
handleIdentityCopy(*MI);
}
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 568720c66e551..ae07e8b2fa032 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -86,6 +86,7 @@ private:
// All fields are reset by runOnFunction.
EHPersonality Personality = EHPersonality::Unknown;
+ const DataLayout *DL = nullptr;
DenseMap<BasicBlock *, ColorVector> BlockColors;
MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks;
};
@@ -111,6 +112,7 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
if (!isFuncletEHPersonality(Personality))
return false;
+ DL = &Fn.getParent()->getDataLayout();
return prepareExplicitEH(Fn);
}
@@ -1070,7 +1072,7 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
if (!isa<TerminatorInst>(EHPad)) {
// If the EHPad isn't a terminator, then we can insert a load in this block
// that will dominate all uses.
- SpillSlot = new AllocaInst(PN->getType(), nullptr,
+ SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(PN->getName(), ".wineh.spillslot"),
&F.getEntryBlock().front());
Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
@@ -1157,7 +1159,7 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
Function &F) {
// Lazilly create the spill slot.
if (!SpillSlot)
- SpillSlot = new AllocaInst(V->getType(), nullptr,
+ SpillSlot = new AllocaInst(V->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(V->getName(), ".wineh.spillslot"),
&F.getEntryBlock().front());
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 63bd762eeb2b8..7d2848bdc13b1 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -81,7 +81,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
.addImm(T.getOpcode());
for (auto &MO : T.operands())
- MIB.addOperand(MO);
+ MIB.add(MO);
Terminators.push_back(&T);
}
}
@@ -157,6 +157,11 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::arm:
case Triple::ArchType::thumb:
case Triple::ArchType::aarch64:
+ case Triple::ArchType::ppc64le:
+ case Triple::ArchType::mips:
+ case Triple::ArchType::mipsel:
+ case Triple::ArchType::mips64:
+ case Triple::ArchType::mips64el:
// For the architectures which don't have a single return instruction
prependRetWithPatchableExit(MF, TII);
break;